Mercurial > hg > CbC > CbC_gcc
comparison gcc/config/arm/cortex-a7.md @ 111:04ced10e8804
gcc 7
author | kono |
---|---|
date | Fri, 27 Oct 2017 22:46:09 +0900 |
parents | |
children | 84e7813d76e9 |
comparison
equal
deleted
inserted
replaced
68:561a7518be6b | 111:04ced10e8804 |
---|---|
1 ;; ARM Cortex-A7 pipeline description | |
2 ;; Copyright (C) 2012-2017 Free Software Foundation, Inc. | |
3 ;; | |
4 ;; Contributed by ARM Ltd. | |
5 ;; Based on cortex-a5.md which was originally contributed by CodeSourcery. | |
6 ;; | |
7 ;; This file is part of GCC. | |
8 ;; | |
9 ;; GCC is free software; you can redistribute it and/or modify it | |
10 ;; under the terms of the GNU General Public License as published by | |
11 ;; the Free Software Foundation; either version 3, or (at your option) | |
12 ;; any later version. | |
13 ;; | |
14 ;; GCC is distributed in the hope that it will be useful, but | |
15 ;; WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
17 ;; General Public License for more details. | |
18 ;; | |
19 ;; You should have received a copy of the GNU General Public License | |
20 ;; along with GCC; see the file COPYING3. If not see | |
21 ;; <http://www.gnu.org/licenses/>. | |
22 | |
23 (define_attr "cortex_a7_neon_type" | |
24 "neon_mul, neon_mla, neon_other" | |
25 (cond [ | |
26 (eq_attr "type" "neon_mul_b, neon_mul_b_q,\ | |
27 neon_mul_h, neon_mul_h_q,\ | |
28 neon_mul_s, neon_mul_s_q,\ | |
29 neon_mul_b_long, neon_mul_h_long,\ | |
30 neon_mul_s_long, neon_mul_h_scalar,\ | |
31 neon_mul_h_scalar_q, neon_mul_s_scalar,\ | |
32 neon_mul_s_scalar_q, neon_mul_h_scalar_long,\ | |
33 neon_mul_s_scalar_long,\ | |
34 neon_sat_mul_b, neon_sat_mul_b_q,\ | |
35 neon_sat_mul_h, neon_sat_mul_h_q,\ | |
36 neon_sat_mul_s, neon_sat_mul_s_q,\ | |
37 neon_sat_mul_b_long, neon_sat_mul_h_long,\ | |
38 neon_sat_mul_s_long,\ | |
39 neon_sat_mul_h_scalar, neon_sat_mul_h_scalar_q,\ | |
40 neon_sat_mul_s_scalar, neon_sat_mul_s_scalar_q,\ | |
41 neon_sat_mul_h_scalar_long,\ | |
42 neon_sat_mul_s_scalar_long,\ | |
43 neon_fp_mul_s, neon_fp_mul_s_q,\ | |
44 neon_fp_mul_s_scalar, neon_fp_mul_s_scalar_q") | |
45 (const_string "neon_mul") | |
46 (eq_attr "type" "neon_mla_b, neon_mla_b_q, neon_mla_h,\ | |
47 neon_mla_h_q, neon_mla_s, neon_mla_s_q,\ | |
48 neon_mla_b_long, neon_mla_h_long,\ | |
49 neon_mla_s_long,\ | |
50 neon_mla_h_scalar, neon_mla_h_scalar_q,\ | |
51 neon_mla_s_scalar, neon_mla_s_scalar_q,\ | |
52 neon_mla_h_scalar_long, neon_mla_s_scalar_long,\ | |
53 neon_sat_mla_b_long, neon_sat_mla_h_long,\ | |
54 neon_sat_mla_s_long,\ | |
55 neon_sat_mla_h_scalar_long,\ | |
56 neon_sat_mla_s_scalar_long,\ | |
57 neon_fp_mla_s, neon_fp_mla_s_q,\ | |
58 neon_fp_mla_s_scalar, neon_fp_mla_s_scalar_q") | |
59 (const_string "neon_mla")] | |
60 (const_string "neon_other"))) | |
61 | |
62 (define_automaton "cortex_a7") | |
63 | |
64 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
65 ;; Functional units. | |
66 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
67 | |
68 ;; The Cortex-A7 pipeline integer and vfp pipeline. | |
69 ;; The decode is the same for all instructions, so do not model it. | |
70 ;; We only model the first execution stage because | |
71 ;; instructions always advance one stage per cycle in order. | |
72 ;; We model all of the LS, Branch, ALU, MAC and FPU pipelines together. | |
73 | |
74 (define_cpu_unit "cortex_a7_ex1, cortex_a7_ex2" "cortex_a7") | |
75 | |
76 (define_reservation "cortex_a7_both" "cortex_a7_ex1+cortex_a7_ex2") | |
77 | |
78 (define_cpu_unit "cortex_a7_branch" "cortex_a7") | |
79 | |
80 ;; Cortex-A7 is in order and can dual-issue under limited circumstances. | |
81 ;; ex2 can be reserved only after ex1 is reserved. | |
82 | |
83 (final_presence_set "cortex_a7_ex2" "cortex_a7_ex1") | |
84 | |
85 ;; Pseudo-unit for blocking the multiply pipeline when a double-precision | |
86 ;; multiply is in progress. | |
87 | |
88 (define_cpu_unit "cortex_a7_fpmul_pipe" "cortex_a7") | |
89 | |
90 ;; The floating-point add pipeline (ex1/f1 stage), used to model the usage | |
91 ;; of the add pipeline by fmac instructions, etc. | |
92 | |
93 (define_cpu_unit "cortex_a7_fpadd_pipe" "cortex_a7") | |
94 | |
95 ;; Floating-point div/sqrt (long latency, out-of-order completion). | |
96 | |
97 (define_cpu_unit "cortex_a7_fp_div_sqrt" "cortex_a7") | |
98 | |
99 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
100 ;; Branches. | |
101 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
102 | |
103 ;; A direct branch can dual issue either as younger or older instruction, | |
104 ;; but branches cannot dual issue with branches. | |
105 ;; No latency as there is no result. | |
106 | |
107 (define_insn_reservation "cortex_a7_branch" 0 | |
108 (and (eq_attr "tune" "cortexa7") | |
109 (eq_attr "type" "branch")) | |
110 "(cortex_a7_ex2|cortex_a7_ex1)+cortex_a7_branch") | |
111 | |
112 ;; Call cannot dual-issue as an older instruction. It can dual-issue | |
113 ;; as a younger instruction, or single-issue. Call cannot dual-issue | |
114 ;; with another branch instruction. The result is available the next | |
115 ;; cycle. | |
116 (define_insn_reservation "cortex_a7_call" 1 | |
117 (and (eq_attr "tune" "cortexa7") | |
118 (eq_attr "type" "call")) | |
119 "(cortex_a7_ex2|cortex_a7_both)+cortex_a7_branch") | |
120 | |
121 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
122 ;; ALU instructions. | |
123 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
124 | |
125 ;; ALU instruction with an immediate operand can dual-issue. | |
126 (define_insn_reservation "cortex_a7_alu_imm" 2 | |
127 (and (eq_attr "tune" "cortexa7") | |
128 (ior (eq_attr "type" "adr,alu_imm,alus_imm,logic_imm,logics_imm,\ | |
129 mov_imm,mvn_imm,extend") | |
130 (and (eq_attr "type" "mov_reg,mov_shift,mov_shift_reg") | |
131 (not (eq_attr "length" "8"))))) | |
132 "cortex_a7_ex2|cortex_a7_ex1") | |
133 | |
134 ;; ALU instruction with register operands can dual-issue | |
135 ;; with a younger immediate-based instruction. | |
136 (define_insn_reservation "cortex_a7_alu_sreg" 2 | |
137 (and (eq_attr "tune" "cortexa7") | |
138 (eq_attr "type" "alu_sreg,alus_sreg,logic_reg,logics_reg,\ | |
139 adc_imm,adcs_imm,adc_reg,adcs_reg,\ | |
140 bfm,clz,rbit,rev,alu_dsp_reg,\ | |
141 shift_imm,shift_reg,mov_reg,mvn_reg")) | |
142 "cortex_a7_ex1") | |
143 | |
144 (define_insn_reservation "cortex_a7_alu_shift" 2 | |
145 (and (eq_attr "tune" "cortexa7") | |
146 (eq_attr "type" "alu_shift_imm,alus_shift_imm,\ | |
147 logic_shift_imm,logics_shift_imm,\ | |
148 alu_shift_reg,alus_shift_reg,\ | |
149 logic_shift_reg,logics_shift_reg,\ | |
150 mov_shift,mov_shift_reg,\ | |
151 mvn_shift,mvn_shift_reg,\ | |
152 mrs,multiple,no_insn")) | |
153 "cortex_a7_ex1") | |
154 | |
155 ;; Forwarding path for unshifted operands. | |
156 (define_bypass 1 "cortex_a7_alu_imm,cortex_a7_alu_sreg,cortex_a7_alu_shift" | |
157 "cortex_a7_alu_imm,cortex_a7_alu_sreg,cortex_a7_mul") | |
158 | |
159 (define_bypass 1 "cortex_a7_alu_imm,cortex_a7_alu_sreg,cortex_a7_alu_shift" | |
160 "cortex_a7_store*" | |
161 "arm_no_early_store_addr_dep") | |
162 | |
163 (define_bypass 1 "cortex_a7_alu_imm,cortex_a7_alu_sreg,cortex_a7_alu_shift" | |
164 "cortex_a7_alu_shift" | |
165 "arm_no_early_alu_shift_dep") | |
166 | |
167 ;; The multiplier pipeline can forward results from wr stage only so | |
168 ;; there's no need to specify bypasses. | |
169 ;; Multiply instructions cannot dual-issue. | |
170 | |
171 (define_insn_reservation "cortex_a7_mul" 2 | |
172 (and (eq_attr "tune" "cortexa7") | |
173 (ior (eq_attr "mul32" "yes") | |
174 (eq_attr "mul64" "yes"))) | |
175 "cortex_a7_both") | |
176 | |
177 ;; Forward the result of a multiply operation to the accumulator | |
178 ;; of the following multiply and accumulate instruction. | |
179 (define_bypass 1 "cortex_a7_mul" | |
180 "cortex_a7_mul" | |
181 "arm_mac_accumulator_is_result") | |
182 | |
183 ;; The latency depends on the operands, so we use an estimate here. | |
184 (define_insn_reservation "cortex_a7_idiv" 5 | |
185 (and (eq_attr "tune" "cortexa7") | |
186 (eq_attr "type" "udiv,sdiv")) | |
187 "cortex_a7_both*5") | |
188 | |
189 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
190 ;; Load/store instructions. | |
191 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
192 | |
193 ;; Address-generation happens in the issue stage. | |
194 ;; Double-word accesses can be issued in a single cycle, | |
195 ;; and occupy only one pipeline stage. | |
196 | |
197 (define_insn_reservation "cortex_a7_load1" 2 | |
198 (and (eq_attr "tune" "cortexa7") | |
199 (eq_attr "type" "load_byte,load_4")) | |
200 "cortex_a7_ex1") | |
201 | |
202 (define_insn_reservation "cortex_a7_store1" 0 | |
203 (and (eq_attr "tune" "cortexa7") | |
204 (eq_attr "type" "store_4")) | |
205 "cortex_a7_ex1") | |
206 | |
207 (define_insn_reservation "cortex_a7_load2" 2 | |
208 (and (eq_attr "tune" "cortexa7") | |
209 (eq_attr "type" "load_8")) | |
210 "cortex_a7_both") | |
211 | |
212 (define_insn_reservation "cortex_a7_store2" 0 | |
213 (and (eq_attr "tune" "cortexa7") | |
214 (eq_attr "type" "store_8")) | |
215 "cortex_a7_both") | |
216 | |
217 (define_insn_reservation "cortex_a7_load3" 3 | |
218 (and (eq_attr "tune" "cortexa7") | |
219 (eq_attr "type" "load_12")) | |
220 "cortex_a7_both, cortex_a7_ex1") | |
221 | |
222 (define_insn_reservation "cortex_a7_store3" 0 | |
223 (and (eq_attr "tune" "cortexa7") | |
224 (eq_attr "type" "store_16")) | |
225 "cortex_a7_both, cortex_a7_ex1") | |
226 | |
227 (define_insn_reservation "cortex_a7_load4" 3 | |
228 (and (eq_attr "tune" "cortexa7") | |
229 (eq_attr "type" "load_16")) | |
230 "cortex_a7_both, cortex_a7_both") | |
231 | |
232 (define_insn_reservation "cortex_a7_store4" 0 | |
233 (and (eq_attr "tune" "cortexa7") | |
234 (eq_attr "type" "store_12")) | |
235 "cortex_a7_both, cortex_a7_both") | |
236 | |
237 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
238 ;; Floating-point arithmetic. | |
239 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
240 ;; Neon integer, neon floating point, and single-precision floating | |
241 ;; point instructions of the same type have the same timing | |
242 ;; characteristics, but neon instructions cannot dual-issue. | |
243 | |
244 (define_insn_reservation "cortex_a7_fpalu" 4 | |
245 (and (eq_attr "tune" "cortexa7") | |
246 (eq_attr "type" "ffariths, fadds, ffarithd, faddd, fmov,\ | |
247 f_cvt, f_cvtf2i, f_cvti2f, fcmps, fcmpd")) | |
248 "cortex_a7_ex1+cortex_a7_fpadd_pipe") | |
249 | |
250 ;; For fconsts and fconstd, 8-bit immediate data is passed directly from | |
251 ;; f1 to f3 (which I think reduces the latency by one cycle). | |
252 | |
253 (define_insn_reservation "cortex_a7_fconst" 3 | |
254 (and (eq_attr "tune" "cortexa7") | |
255 (eq_attr "type" "fconsts,fconstd")) | |
256 "cortex_a7_ex1+cortex_a7_fpadd_pipe") | |
257 | |
258 ;; We should try not to attempt to issue a single-precision multiplication in | |
259 ;; the middle of a double-precision multiplication operation (the usage of | |
260 ;; cortex_a7_fpmul_pipe). | |
261 | |
262 (define_insn_reservation "cortex_a7_fpmuls" 4 | |
263 (and (eq_attr "tune" "cortexa7") | |
264 (eq_attr "type" "fmuls")) | |
265 "cortex_a7_ex1+cortex_a7_fpmul_pipe") | |
266 | |
267 (define_insn_reservation "cortex_a7_neon_mul" 4 | |
268 (and (eq_attr "tune" "cortexa7") | |
269 (eq_attr "cortex_a7_neon_type" "neon_mul")) | |
270 "(cortex_a7_both+cortex_a7_fpmul_pipe)*2") | |
271 | |
272 (define_insn_reservation "cortex_a7_fpmacs" 8 | |
273 (and (eq_attr "tune" "cortexa7") | |
274 (eq_attr "type" "fmacs,ffmas")) | |
275 "cortex_a7_ex1+cortex_a7_fpmul_pipe") | |
276 | |
277 (define_insn_reservation "cortex_a7_neon_mla" 8 | |
278 (and (eq_attr "tune" "cortexa7") | |
279 (eq_attr "cortex_a7_neon_type" "neon_mla")) | |
280 "cortex_a7_both+cortex_a7_fpmul_pipe") | |
281 | |
282 (define_bypass 4 "cortex_a7_fpmacs,cortex_a7_neon_mla" | |
283 "cortex_a7_fpmacs,cortex_a7_neon_mla" | |
284 "arm_mac_accumulator_is_result") | |
285 | |
286 ;; Non-multiply instructions can issue between two cycles of a | |
287 ;; double-precision multiply. | |
288 | |
289 (define_insn_reservation "cortex_a7_fpmuld" 7 | |
290 (and (eq_attr "tune" "cortexa7") | |
291 (eq_attr "type" "fmuld")) | |
292 "cortex_a7_ex1+cortex_a7_fpmul_pipe, cortex_a7_fpmul_pipe*3") | |
293 | |
294 (define_insn_reservation "cortex_a7_fpmacd" 11 | |
295 (and (eq_attr "tune" "cortexa7") | |
296 (eq_attr "type" "fmacd")) | |
297 "cortex_a7_ex1+cortex_a7_fpmul_pipe, cortex_a7_fpmul_pipe*3") | |
298 | |
299 (define_insn_reservation "cortex_a7_fpfmad" 8 | |
300 (and (eq_attr "tune" "cortexa7") | |
301 (eq_attr "type" "ffmad")) | |
302 "cortex_a7_ex1+cortex_a7_fpmul_pipe, cortex_a7_fpmul_pipe*4") | |
303 | |
304 (define_bypass 7 "cortex_a7_fpmacd" | |
305 "cortex_a7_fpmacd,cortex_a7_fpfmad" | |
306 "arm_mac_accumulator_is_result") | |
307 | |
308 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
309 ;; Floating-point divide/square root instructions. | |
310 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
311 | |
312 (define_insn_reservation "cortex_a7_fdivs" 16 | |
313 (and (eq_attr "tune" "cortexa7") | |
314 (eq_attr "type" "fdivs, fsqrts")) | |
315 "cortex_a7_ex1+cortex_a7_fp_div_sqrt, cortex_a7_fp_div_sqrt * 13") | |
316 | |
317 (define_insn_reservation "cortex_a7_fdivd" 31 | |
318 (and (eq_attr "tune" "cortexa7") | |
319 (eq_attr "type" "fdivd, fsqrtd")) | |
320 "cortex_a7_ex1+cortex_a7_fp_div_sqrt, cortex_a7_fp_div_sqrt * 28") | |
321 | |
322 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
323 ;; VFP to/from core transfers. | |
324 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
325 | |
326 ;; Core-to-VFP transfers. | |
327 | |
328 (define_insn_reservation "cortex_a7_r2f" 4 | |
329 (and (eq_attr "tune" "cortexa7") | |
330 (eq_attr "type" "f_mcr,f_mcrr")) | |
331 "cortex_a7_both") | |
332 | |
333 (define_insn_reservation "cortex_a7_f2r" 2 | |
334 (and (eq_attr "tune" "cortexa7") | |
335 (eq_attr "type" "f_mrc,f_mrrc")) | |
336 "cortex_a7_ex1") | |
337 | |
338 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
339 ;; VFP flag transfer. | |
340 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
341 | |
342 ;; Fuxne: The flag forwarding from fmstat to the second instruction is | |
343 ;; not modeled at present. | |
344 | |
345 (define_insn_reservation "cortex_a7_f_flags" 4 | |
346 (and (eq_attr "tune" "cortexa7") | |
347 (eq_attr "type" "f_flag")) | |
348 "cortex_a7_ex1") | |
349 | |
350 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
351 ;; VFP load/store. | |
352 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
353 | |
354 (define_insn_reservation "cortex_a7_f_loads" 4 | |
355 (and (eq_attr "tune" "cortexa7") | |
356 (eq_attr "type" "f_loads")) | |
357 "cortex_a7_ex1") | |
358 | |
359 (define_insn_reservation "cortex_a7_f_loadd" 4 | |
360 (and (eq_attr "tune" "cortexa7") | |
361 (eq_attr "type" "f_loadd")) | |
362 "cortex_a7_both") | |
363 | |
364 (define_insn_reservation "cortex_a7_f_stores" 0 | |
365 (and (eq_attr "tune" "cortexa7") | |
366 (eq_attr "type" "f_stores")) | |
367 "cortex_a7_ex1") | |
368 | |
369 (define_insn_reservation "cortex_a7_f_stored" 0 | |
370 (and (eq_attr "tune" "cortexa7") | |
371 (eq_attr "type" "f_stored")) | |
372 "cortex_a7_both") | |
373 | |
374 ;; Load-to-use for floating-point values has a penalty of one cycle, | |
375 ;; i.e. a latency of two. | |
376 | |
377 (define_bypass 2 "cortex_a7_f_loads, cortex_a7_f_loadd" | |
378 "cortex_a7_fpalu,\ | |
379 cortex_a7_fpmuls,cortex_a7_fpmacs,\ | |
380 cortex_a7_fpmuld,cortex_a7_fpmacd, cortex_a7_fpfmad,\ | |
381 cortex_a7_fdivs, cortex_a7_fdivd,\ | |
382 cortex_a7_f2r") | |
383 | |
384 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
385 ;; NEON | |
386 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
387 | |
388 ;; Simple modeling for all neon instructions not covered earlier. | |
389 | |
390 (define_insn_reservation "cortex_a7_neon" 4 | |
391 (and (eq_attr "tune" "cortexa7") | |
392 (and (eq_attr "is_neon_type" "yes") | |
393 (eq_attr "cortex_a7_neon_type" "neon_other"))) | |
394 "cortex_a7_both*2") |