121
|
1 //=- ARMScheduleA57.td - ARM Cortex-A57 Scheduling Defs -----*- tablegen -*-=//
|
|
2 //
|
|
3 // The LLVM Compiler Infrastructure
|
|
4 //
|
|
5 // This file is distributed under the University of Illinois Open Source
|
|
6 // License. See LICENSE.TXT for details.
|
|
7 //
|
|
8 //===----------------------------------------------------------------------===//
|
|
9 //
|
|
10 // This file defines the machine model for ARM Cortex-A57 to support
|
|
11 // instruction scheduling and other instruction cost heuristics.
|
|
12 //
|
|
13 //===----------------------------------------------------------------------===//
|
|
14
|
|
15 //===----------------------------------------------------------------------===//
|
|
16 // *** Common description and scheduling model parameters taken from AArch64 ***
|
|
17 // The Cortex-A57 is a traditional superscalar microprocessor with a
|
|
18 // conservative 3-wide in-order stage for decode and dispatch. Combined with the
|
|
19 // much wider out-of-order issue stage, this produced a need to carefully
|
|
20 // schedule micro-ops so that all three decoded each cycle are successfully
|
|
21 // issued as the reservation station(s) simply don't stay occupied for long.
|
|
22 // Therefore, IssueWidth is set to the narrower of the two at three, while still
|
|
23 // modeling the machine as out-of-order.
|
|
24
|
|
25 def IsCPSRDefinedPred : SchedPredicate<[{TII->isCPSRDefined(*MI)}]>;
|
|
26 def IsCPSRDefinedAndPredicatedPred :
|
|
27 SchedPredicate<[{TII->isCPSRDefined(*MI) && TII->isPredicated(*MI)}]>;
|
|
28
|
|
29 // Cortex A57 rev. r1p0 or later (false = r0px)
|
|
30 def IsR1P0AndLaterPred : SchedPredicate<[{false}]>;
|
|
31
|
|
32 // If Addrmode3 contains register offset (not immediate)
|
|
33 def IsLdrAm3RegOffPred :
|
|
34 SchedPredicate<[{!TII->isAddrMode3OpImm(*MI, 1)}]>;
|
|
35 // The same predicate with operand offset 2 and 3:
|
|
36 def IsLdrAm3RegOffPredX2 :
|
|
37 SchedPredicate<[{!TII->isAddrMode3OpImm(*MI, 2)}]>;
|
|
38 def IsLdrAm3RegOffPredX3 :
|
|
39 SchedPredicate<[{!TII->isAddrMode3OpImm(*MI, 3)}]>;
|
|
40
|
|
41 // If Addrmode3 contains "minus register"
|
|
42 def IsLdrAm3NegRegOffPred :
|
|
43 SchedPredicate<[{TII->isAddrMode3OpMinusReg(*MI, 1)}]>;
|
|
44 // The same predicate with operand offset 2 and 3:
|
|
45 def IsLdrAm3NegRegOffPredX2 :
|
|
46 SchedPredicate<[{TII->isAddrMode3OpMinusReg(*MI, 2)}]>;
|
|
47 def IsLdrAm3NegRegOffPredX3 :
|
|
48 SchedPredicate<[{TII->isAddrMode3OpMinusReg(*MI, 3)}]>;
|
|
49
|
|
50 // Load, scaled register offset, not plus LSL2
|
|
51 def IsLdstsoScaledNotOptimalPredX0 :
|
|
52 SchedPredicate<[{TII->isLdstScaledRegNotPlusLsl2(*MI, 0)}]>;
|
|
53 def IsLdstsoScaledNotOptimalPred :
|
|
54 SchedPredicate<[{TII->isLdstScaledRegNotPlusLsl2(*MI, 1)}]>;
|
|
55 def IsLdstsoScaledNotOptimalPredX2 :
|
|
56 SchedPredicate<[{TII->isLdstScaledRegNotPlusLsl2(*MI, 2)}]>;
|
|
57
|
|
58 // Load, scaled register offset
|
|
59 def IsLdstsoScaledPred :
|
|
60 SchedPredicate<[{TII->isLdstScaledReg(*MI, 1)}]>;
|
|
61 def IsLdstsoScaledPredX2 :
|
|
62 SchedPredicate<[{TII->isLdstScaledReg(*MI, 2)}]>;
|
|
63
|
|
64 def IsLdstsoMinusRegPredX0 :
|
|
65 SchedPredicate<[{TII->isLdstSoMinusReg(*MI, 0)}]>;
|
|
66 def IsLdstsoMinusRegPred :
|
|
67 SchedPredicate<[{TII->isLdstSoMinusReg(*MI, 1)}]>;
|
|
68 def IsLdstsoMinusRegPredX2 :
|
|
69 SchedPredicate<[{TII->isLdstSoMinusReg(*MI, 2)}]>;
|
|
70
|
|
71 // Load, scaled register offset
|
|
72 def IsLdrAm2ScaledPred :
|
|
73 SchedPredicate<[{TII->isAm2ScaledReg(*MI, 1)}]>;
|
|
74
|
|
75 // LDM, base reg in list
|
|
76 def IsLdmBaseRegInList :
|
|
77 SchedPredicate<[{TII->isLDMBaseRegInList(*MI)}]>;
|
|
78
|
|
79 class A57WriteLMOpsListType<list<SchedWriteRes> writes> {
|
|
80 list <SchedWriteRes> Writes = writes;
|
|
81 SchedMachineModel SchedModel = ?;
|
|
82 }
|
|
83
|
|
84 // *** Common description and scheduling model parameters taken from AArch64 ***
|
|
85 // (AArch64SchedA57.td)
|
|
86 def CortexA57Model : SchedMachineModel {
|
|
87 let IssueWidth = 3; // 3-way decode and dispatch
|
|
88 let MicroOpBufferSize = 128; // 128 micro-op re-order buffer
|
|
89 let LoadLatency = 4; // Optimistic load latency
|
|
90 let MispredictPenalty = 16; // Fetch + Decode/Rename/Dispatch + Branch
|
|
91
|
|
92 // Enable partial & runtime unrolling.
|
|
93 let LoopMicroOpBufferSize = 16;
|
|
94 let CompleteModel = 1;
|
|
95 }
|
|
96
|
|
97 //===----------------------------------------------------------------------===//
|
|
98 // Define each kind of processor resource and number available on Cortex-A57.
|
|
99 // Cortex A-57 has 8 pipelines that each has its own 8-entry queue where
|
|
100 // micro-ops wait for their operands and then issue out-of-order.
|
|
101
|
|
102 def A57UnitB : ProcResource<1>; // Type B micro-ops
|
|
103 def A57UnitI : ProcResource<2>; // Type I micro-ops
|
|
104 def A57UnitM : ProcResource<1>; // Type M micro-ops
|
|
105 def A57UnitL : ProcResource<1>; // Type L micro-ops
|
|
106 def A57UnitS : ProcResource<1>; // Type S micro-ops
|
|
107
|
|
108 def A57UnitX : ProcResource<1>; // Type X micro-ops (F1)
|
|
109 def A57UnitW : ProcResource<1>; // Type W micro-ops (F0)
|
|
110
|
|
111 let SchedModel = CortexA57Model in {
|
|
112 def A57UnitV : ProcResGroup<[A57UnitX, A57UnitW]>; // Type V micro-ops
|
|
113 }
|
|
114
|
|
115 let SchedModel = CortexA57Model in {
|
|
116
|
|
117 //===----------------------------------------------------------------------===//
|
|
118 // Define customized scheduler read/write types specific to the Cortex-A57.
|
|
119
|
|
120 include "ARMScheduleA57WriteRes.td"
|
|
121
|
|
122 // To have "CompleteModel = 1", support of pseudos and special instructions
|
|
123 def : InstRW<[WriteNoop], (instregex "(t)?BKPT$", "(t2)?CDP(2)?$",
|
|
124 "(t2)?CLREX$", "CONSTPOOL_ENTRY$", "COPY_STRUCT_BYVAL_I32$",
|
|
125 "(t2)?CPS[123]p$", "(t2)?DBG$", "(t2)?DMB$", "(t2)?DSB$", "ERET$",
|
|
126 "(t2|t)?HINT$", "(t)?HLT$", "(t2)?HVC$", "(t2)?ISB$", "ITasm$",
|
|
127 "(t2)?RFE(DA|DB|IA|IB)", "(t)?SETEND", "(t2)?SETPAN", "(t2)?SMC", "SPACE",
|
|
128 "(t2)?SRS(DA|DB|IA|IB)", "SWP(B)?", "t?TRAP", "UDF$", "t2DCPS", "t2SG",
|
|
129 "t2TT", "tCPS", "CMP_SWAP", "t?SVC", "t2IT", "CompilerBarrier")>;
|
|
130
|
|
131 def : InstRW<[WriteNoop], (instregex "VMRS", "VMSR", "FMSTAT")>;
|
|
132
|
|
133 // Specific memory instrs
|
|
134 def : InstRW<[WriteNoop, WriteNoop], (instregex "(t2)?LDA", "(t2)?LDC", "(t2)?STC",
|
|
135 "(t2)?STL", "(t2)?LDREX", "(t2)?STREX", "MEMCPY")>;
|
|
136
|
|
137 // coprocessor moves
|
|
138 def : InstRW<[WriteNoop, WriteNoop], (instregex
|
|
139 "(t2)?MCR(2|R|R2)?$", "(t2)?MRC(2)?$",
|
|
140 "(t2)?MRRC(2)?$", "(t2)?MRS(banked|sys|_AR|_M|sys_AR)?$",
|
|
141 "(t2)?MSR(banked|i|_AR|_M)?$")>;
|
|
142
|
|
143 // Deprecated instructions
|
|
144 def : InstRW<[WriteNoop], (instregex "FLDM", "FSTM")>;
|
|
145
|
|
146 // Pseudos
|
|
147 def : InstRW<[WriteNoop], (instregex "(t2)?ABS$",
|
|
148 "(t)?ADJCALLSTACKDOWN$", "(t)?ADJCALLSTACKUP$", "(t2|t)?Int_eh_sjlj",
|
|
149 "tLDRpci_pic", "t2SUBS_PC_LR",
|
|
150 "JUMPTABLE", "tInt_WIN_eh_sjlj_longjmp",
|
|
151 "VLD(1|2)LN(d|q)(WB_fixed_|WB_register_)?Asm",
|
|
152 "VLD(3|4)(DUP|LN)?(d|q)(WB_fixed_|WB_register_)?Asm",
|
|
153 "VST(1|2)LN(d|q)(WB_fixed_|WB_register_)?Asm",
|
|
154 "VST(3|4)(DUP|LN)?(d|q)(WB_fixed_|WB_register_)?Asm",
|
|
155 "WIN__CHKSTK", "WIN__DBZCHK")>;
|
|
156
|
|
157 // Miscellaneous
|
|
158 // -----------------------------------------------------------------------------
|
|
159
|
|
160 def : InstRW<[A57Write_1cyc_1I], (instrs COPY)>;
|
|
161
|
|
162 // --- 3.2 Branch Instructions ---
|
|
163 // B, BX, BL, BLX (imm, reg != LR, reg == LR), CBZ, CBNZ
|
|
164
|
|
165 def : InstRW<[A57Write_1cyc_1B], (instregex "(t2|t)?B$", "t?BX", "(t2|t)?Bcc$",
|
|
166 "t?TAILJMP(d|r)", "TCRETURN(d|r)i", "tBfar", "tCBN?Z")>;
|
|
167 def : InstRW<[A57Write_1cyc_1B_1I],
|
|
168 (instregex "t?BL$", "BL_pred$", "t?BLXi", "t?TPsoft")>;
|
|
169 def : InstRW<[A57Write_2cyc_1B_1I], (instregex "BLX", "tBLX(NS)?r")>;
|
|
170 // Pseudos
|
|
171 def : InstRW<[A57Write_2cyc_1B_1I], (instregex "BCCi64", "BCCZi64")>;
|
|
172 def : InstRW<[A57Write_3cyc_1B_1I], (instregex "BR_JTadd", "t?BR_JTr",
|
|
173 "t2BR_JT", "t2BXJ", "(t2)?TB(B|H)(_JT)?$", "tBRIND")>;
|
|
174 def : InstRW<[A57Write_6cyc_1B_1L], (instregex "BR_JTm")>;
|
|
175
|
|
176 // --- 3.3 Arithmetic and Logical Instructions ---
|
|
177 // ADD{S}, ADC{S}, ADR, AND{S}, BIC{S}, CMN, CMP, EOR{S}, ORN{S}, ORR{S},
|
|
178 // RSB{S}, RSC{S}, SUB{S}, SBC{S}, TEQ, TST
|
|
179
|
|
180 def : InstRW<[A57Write_1cyc_1I], (instregex "tADDframe")>;
|
|
181
|
|
182 // shift by register, conditional or unconditional
|
|
183 // TODO: according to the doc, conditional uses I0/I1, unconditional uses M
|
|
184 // Why more complex instruction uses more simple pipeline?
|
|
185 // May be an error in doc.
|
|
186 def A57WriteALUsi : SchedWriteVariant<[
|
|
187 // lsl #2, lsl #1, or lsr #1.
|
|
188 SchedVar<IsPredicatedPred, [A57Write_2cyc_1M]>,
|
|
189 SchedVar<NoSchedPred, [A57Write_2cyc_1M]>
|
|
190 ]>;
|
|
191 def A57WriteALUsr : SchedWriteVariant<[
|
|
192 SchedVar<IsPredicatedPred, [A57Write_2cyc_1I]>,
|
|
193 SchedVar<NoSchedPred, [A57Write_2cyc_1M]>
|
|
194 ]>;
|
|
195 def A57WriteALUSsr : SchedWriteVariant<[
|
|
196 SchedVar<IsPredicatedPred, [A57Write_2cyc_1I]>,
|
|
197 SchedVar<NoSchedPred, [A57Write_2cyc_1M]>
|
|
198 ]>;
|
|
199 def A57ReadALUsr : SchedReadVariant<[
|
|
200 SchedVar<IsPredicatedPred, [ReadDefault]>,
|
|
201 SchedVar<NoSchedPred, [ReadDefault]>
|
|
202 ]>;
|
|
203 def : SchedAlias<WriteALUsi, A57WriteALUsi>;
|
|
204 def : SchedAlias<WriteALUsr, A57WriteALUsr>;
|
|
205 def : SchedAlias<WriteALUSsr, A57WriteALUSsr>;
|
|
206 def : SchedAlias<ReadALUsr, A57ReadALUsr>;
|
|
207
|
|
208 def A57WriteCMPsr : SchedWriteVariant<[
|
|
209 SchedVar<IsPredicatedPred, [A57Write_2cyc_1I]>,
|
|
210 SchedVar<NoSchedPred, [A57Write_2cyc_1M]>
|
|
211 ]>;
|
|
212 def : SchedAlias<WriteCMP, A57Write_1cyc_1I>;
|
|
213 def : SchedAlias<WriteCMPsi, A57Write_2cyc_1M>;
|
|
214 def : SchedAlias<WriteCMPsr, A57WriteCMPsr>;
|
|
215
|
|
216 // --- 3.4 Move and Shift Instructions ---
|
|
217 // Move, basic
|
|
218 // MOV{S}, MOVW, MVN{S}
|
|
219 def : InstRW<[A57Write_1cyc_1I], (instregex "MOV(r|i|i16|r_TC)",
|
|
220 "(t2)?MVN(CC)?(r|i)", "BMOVPCB_CALL", "BMOVPCRX_CALL",
|
|
221 "MOVCC(r|i|i16|i32imm)", "tMOV", "tMVN")>;
|
|
222
|
|
223 // Move, shift by immed, setflags/no setflags
|
|
224 // (ASR, LSL, LSR, ROR, RRX)=MOVsi, MVN
|
|
225 // setflags = isCPSRDefined
|
|
226 def A57WriteMOVsi : SchedWriteVariant<[
|
|
227 SchedVar<IsCPSRDefinedPred, [A57Write_2cyc_1M]>,
|
|
228 SchedVar<NoSchedPred, [A57Write_1cyc_1I]>
|
|
229 ]>;
|
|
230 def : InstRW<[A57WriteMOVsi], (instregex "MOV(CC)?si", "MVNsi",
|
|
231 "ASRi", "(t2|t)ASRri", "LSRi", "(t2|t)LSRri", "LSLi", "(t2|t)LSLri", "RORi",
|
|
232 "(t2|t)RORri", "(t2)?RRX", "t2MOV", "tROR")>;
|
|
233
|
|
234 // shift by register, conditional or unconditional, setflags/no setflags
|
|
235 def A57WriteMOVsr : SchedWriteVariant<[
|
|
236 SchedVar<IsCPSRDefinedAndPredicatedPred, [A57Write_2cyc_1I]>,
|
|
237 SchedVar<IsCPSRDefinedPred, [A57Write_2cyc_1M]>,
|
|
238 SchedVar<IsPredicatedPred, [A57Write_2cyc_1I]>,
|
|
239 SchedVar<NoSchedPred, [A57Write_1cyc_1I]>
|
|
240 ]>;
|
|
241 def : InstRW<[A57WriteMOVsr], (instregex "MOV(CC)?sr", "MVNsr", "t2MVNs",
|
|
242 "ASRr", "(t2|t)ASRrr", "LSRr", "(t2|t)LSRrr", "LSLr", "(t2|t)?LSLrr", "RORr",
|
|
243 "(t2|t)RORrr")>;
|
|
244
|
|
245 // Move, top
|
|
246 // MOVT - A57Write_2cyc_1M for r0px, A57Write_1cyc_1I for r1p0 and later
|
|
247 def A57WriteMOVT : SchedWriteVariant<[
|
|
248 SchedVar<IsR1P0AndLaterPred, [A57Write_1cyc_1I]>,
|
|
249 SchedVar<NoSchedPred, [A57Write_2cyc_1M]>
|
|
250 ]>;
|
|
251 def : InstRW<[A57WriteMOVT], (instregex "MOVTi16")>;
|
|
252
|
|
253 def A57WriteI2pc :
|
|
254 WriteSequence<[A57Write_1cyc_1I, A57Write_1cyc_1I, A57Write_1cyc_1I]>;
|
|
255 def A57WriteI2ld :
|
|
256 WriteSequence<[A57Write_1cyc_1I, A57Write_1cyc_1I, A57Write_4cyc_1L]>;
|
|
257 def : InstRW< [A57WriteI2pc], (instregex "MOV_ga_pcrel")>;
|
|
258 def : InstRW< [A57WriteI2ld], (instregex "MOV_ga_pcrel_ldr")>;
|
|
259
|
|
260 // +2cyc for branch forms
|
|
261 def : InstRW<[A57Write_3cyc_1I], (instregex "MOVPC(LR|RX)")>;
|
|
262
|
|
263 // --- 3.5 Divide and Multiply Instructions ---
|
|
264 // Divide: SDIV, UDIV
|
|
265 // latency from documentration: 4 ‐ 20, maximum taken
|
|
266 def : SchedAlias<WriteDIV, A57Write_20cyc_1M>;
|
|
267 // Multiply: tMul not bound to common WriteRes types
|
|
268 def : InstRW<[A57Write_3cyc_1M], (instregex "tMUL")>;
|
|
269 def : SchedAlias<WriteMUL16, A57Write_3cyc_1M>;
|
|
270 def : SchedAlias<WriteMUL32, A57Write_3cyc_1M>;
|
|
271 def : ReadAdvance<ReadMUL, 0>;
|
|
272
|
|
273 // Multiply accumulate: MLA, MLS, SMLABB, SMLABT, SMLATB, SMLATT, SMLAWB,
|
|
274 // SMLAWT, SMLAD{X}, SMLSD{X}, SMMLA{R}, SMMLS{R}
|
|
275 // Multiply-accumulate pipelines support late-forwarding of accumulate operands
|
|
276 // from similar μops, allowing a typical sequence of multiply-accumulate μops
|
|
277 // to issue one every 1 cycle (sched advance = 2).
|
|
278 def A57WriteMLA : SchedWriteRes<[A57UnitM]> { let Latency = 3; }
|
|
279 def A57WriteMLAL : SchedWriteRes<[A57UnitM]> { let Latency = 4; }
|
|
280 def A57ReadMLA : SchedReadAdvance<2, [A57WriteMLA, A57WriteMLAL]>;
|
|
281
|
|
282 def : SchedAlias<WriteMAC16, A57WriteMLA>;
|
|
283 def : SchedAlias<WriteMAC32, A57WriteMLA>;
|
|
284 def : SchedAlias<ReadMAC, A57ReadMLA>;
|
|
285
|
|
286 def : SchedAlias<WriteMAC64Lo, A57WriteMLAL>;
|
|
287 def : SchedAlias<WriteMAC64Hi, A57WriteMLAL>;
|
|
288
|
|
289 // Multiply long: SMULL, UMULL
|
|
290 def : SchedAlias<WriteMUL64Lo, A57Write_4cyc_1M>;
|
|
291 def : SchedAlias<WriteMUL64Hi, A57Write_4cyc_1M>;
|
|
292
|
|
293 // --- 3.6 Saturating and Parallel Arithmetic Instructions ---
|
|
294 // Parallel arith
|
|
295 // SADD16, SADD8, SSUB16, SSUB8, UADD16, UADD8, USUB16, USUB8
|
|
296 // Conditional GE-setting instructions require three extra μops
|
|
297 // and two additional cycles to conditionally update the GE field.
|
|
298 def A57WriteParArith : SchedWriteVariant<[
|
|
299 SchedVar<IsPredicatedPred, [A57Write_4cyc_1I_1M]>,
|
|
300 SchedVar<NoSchedPred, [A57Write_2cyc_1I_1M]>
|
|
301 ]>;
|
|
302 def : InstRW< [A57WriteParArith], (instregex
|
|
303 "(t2)?SADD(16|8)", "(t2)?SSUB(16|8)",
|
|
304 "(t2)?UADD(16|8)", "(t2)?USUB(16|8)")>;
|
|
305
|
|
306 // Parallel arith with exchange: SASX, SSAX, UASX, USAX
|
|
307 def A57WriteParArithExch : SchedWriteVariant<[
|
|
308 SchedVar<IsPredicatedPred, [A57Write_5cyc_1I_1M]>,
|
|
309 SchedVar<NoSchedPred, [A57Write_3cyc_1I_1M]>
|
|
310 ]>;
|
|
311 def : InstRW<[A57WriteParArithExch],
|
|
312 (instregex "(t2)?SASX", "(t2)?SSAX", "(t2)?UASX", "(t2)?USAX")>;
|
|
313
|
|
314 // Parallel halving arith
|
|
315 // SHADD16, SHADD8, SHSUB16, SHSUB8, UHADD16, UHADD8, UHSUB16, UHSUB8
|
|
316 def : InstRW<[A57Write_2cyc_1M], (instregex
|
|
317 "(t2)?SHADD(16|8)", "(t2)?SHSUB(16|8)",
|
|
318 "(t2)?UHADD(16|8)", "(t2)?UHSUB(16|8)")>;
|
|
319
|
|
320 // Parallel halving arith with exchange
|
|
321 // SHASX, SHSAX, UHASX, UHSAX
|
|
322 def : InstRW<[A57Write_3cyc_1I_1M], (instregex "(t2)?SHASX", "(t2)?SHSAX",
|
|
323 "(t2)?UHASX", "(t2)?UHSAX")>;
|
|
324
|
|
325 // Parallel saturating arith
|
|
326 // QADD16, QADD8, QSUB16, QSUB8, UQADD16, UQADD8, UQSUB16, UQSUB8
|
|
327 def : InstRW<[A57Write_2cyc_1M], (instregex "QADD(16|8)", "QSUB(16|8)",
|
|
328 "UQADD(16|8)", "UQSUB(16|8)", "t2(U?)QADD", "t2(U?)QSUB")>;
|
|
329
|
|
330 // Parallel saturating arith with exchange
|
|
331 // QASX, QSAX, UQASX, UQSAX
|
|
332 def : InstRW<[A57Write_3cyc_1I_1M], (instregex "(t2)?QASX", "(t2)?QSAX",
|
|
333 "(t2)?UQASX", "(t2)?UQSAX")>;
|
|
334
|
|
335 // Saturate: SSAT, SSAT16, USAT, USAT16
|
|
336 def : InstRW<[A57Write_2cyc_1M],
|
|
337 (instregex "(t2)?SSAT(16)?", "(t2)?USAT(16)?")>;
|
|
338
|
|
339 // Saturating arith: QADD, QSUB
|
|
340 def : InstRW<[A57Write_2cyc_1M], (instregex "QADD$", "QSUB$")>;
|
|
341
|
|
342 // Saturating doubling arith: QDADD, QDSUB
|
|
343 def : InstRW<[A57Write_3cyc_1I_1M], (instregex "(t2)?QDADD", "(t2)?QDSUB")>;
|
|
344
|
|
345 // --- 3.7 Miscellaneous Data-Processing Instructions ---
|
|
346 // Bit field extract: SBFX, UBFX
|
|
347 def : InstRW<[A57Write_1cyc_1I], (instregex "(t2)?SBFX", "(t2)?UBFX")>;
|
|
348
|
|
349 // Bit field insert/clear: BFI, BFC
|
|
350 def : InstRW<[A57Write_2cyc_1M], (instregex "(t2)?BFI", "(t2)?BFC")>;
|
|
351
|
|
352 // Select bytes, conditional/unconditional
|
|
353 def A57WriteSEL : SchedWriteVariant<[
|
|
354 SchedVar<IsPredicatedPred, [A57Write_2cyc_1I]>,
|
|
355 SchedVar<NoSchedPred, [A57Write_1cyc_1I]>
|
|
356 ]>;
|
|
357 def : InstRW<[A57WriteSEL], (instregex "(t2)?SEL")>;
|
|
358
|
|
359 // Sign/zero extend, normal: SXTB, SXTH, UXTB, UXTH
|
|
360 def : InstRW<[A57Write_1cyc_1I],
|
|
361 (instregex "(t2|t)?SXT(B|H)$", "(t2|t)?UXT(B|H)$")>;
|
|
362
|
|
363 // Sign/zero extend and add, normal: SXTAB, SXTAH, UXTAB, UXTAH
|
|
364 def : InstRW<[A57Write_2cyc_1M],
|
|
365 (instregex "(t2)?SXTA(B|H)$", "(t2)?UXTA(B|H)$")>;
|
|
366
|
|
367 // Sign/zero extend and add, parallel: SXTAB16, UXTAB16
|
|
368 def : InstRW<[A57Write_4cyc_1M], (instregex "(t2)?SXTAB16", "(t2)?UXTAB16")>;
|
|
369
|
|
370 // Sum of absolute differences: USAD8, USADA8
|
|
371 def : InstRW<[A57Write_3cyc_1M], (instregex "(t2)?USAD8", "(t2)?USADA8")>;
|
|
372
|
|
373 // --- 3.8 Load Instructions ---
|
|
374
|
|
375 // Load, immed offset
|
|
376 // LDR and LDRB have LDRi12 and LDRBi12 forms for immediate
|
|
377 def : InstRW<[A57Write_4cyc_1L], (instregex "LDRi12", "LDRBi12",
|
|
378 "LDRcp", "(t2|t)?LDRConstPool", "LDRLIT_ga_(pcrel|abs)",
|
|
379 "PICLDR", "tLDR")>;
|
|
380
|
|
381 def : InstRW<[A57Write_4cyc_1L],
|
|
382 (instregex "t2LDRS?(B|H)?(pcrel|T|i8|i12|pci|pci_pic|s)?$")>;
|
|
383
|
|
384 // For "Load, register offset, minus" we need +1cyc, +1I
|
|
385 def A57WriteLdrAm3 : SchedWriteVariant<[
|
|
386 SchedVar<IsLdrAm3NegRegOffPred, [A57Write_5cyc_1I_1L]>,
|
|
387 SchedVar<NoSchedPred, [A57Write_4cyc_1L]>
|
|
388 ]>;
|
|
389 def : InstRW<[A57WriteLdrAm3], (instregex "LDR(H|SH|SB)$")>;
|
|
390 def A57WriteLdrAm3X2 : SchedWriteVariant<[
|
|
391 SchedVar<IsLdrAm3NegRegOffPredX2, [A57Write_5cyc_1I_1L]>,
|
|
392 SchedVar<NoSchedPred, [A57Write_4cyc_1L]>
|
|
393 ]>;
|
|
394 def : InstRW<[A57WriteLdrAm3X2, A57WriteLdrAm3X2], (instregex "LDRD$")>;
|
|
395 def : InstRW<[A57Write_4cyc_1L, A57Write_4cyc_1L], (instregex "t2LDRDi8")>;
|
|
396
|
|
397 def A57WriteLdrAmLDSTSO : SchedWriteVariant<[
|
|
398 SchedVar<IsLdstsoScaledNotOptimalPred, [A57Write_5cyc_1I_1L]>,
|
|
399 SchedVar<IsLdstsoMinusRegPred, [A57Write_5cyc_1I_1L]>,
|
|
400 SchedVar<NoSchedPred, [A57Write_4cyc_1L]>
|
|
401 ]>;
|
|
402 def : InstRW<[A57WriteLdrAmLDSTSO], (instregex "LDRrs", "LDRBrs")>;
|
|
403
|
|
404 def A57WrBackOne : SchedWriteRes<[]> {
|
|
405 let Latency = 1;
|
|
406 let NumMicroOps = 0;
|
|
407 }
|
|
408 def A57WrBackTwo : SchedWriteRes<[]> {
|
|
409 let Latency = 2;
|
|
410 let NumMicroOps = 0;
|
|
411 }
|
|
412 def A57WrBackThree : SchedWriteRes<[]> {
|
|
413 let Latency = 3;
|
|
414 let NumMicroOps = 0;
|
|
415 }
|
|
416
|
|
417 // --- LDR pre-indexed ---
|
|
418 // Load, immed pre-indexed (4 cyc for load result, 1 cyc for Base update)
|
|
419 def : InstRW<[A57Write_4cyc_1L_1I, A57WrBackOne], (instregex "LDR_PRE_IMM",
|
|
420 "LDRB_PRE_IMM", "t2LDRB_PRE")>;
|
|
421
|
|
422 // Load, register pre-indexed (4 cyc for load result, 2 cyc for Base update)
|
|
423 // (5 cyc load result for not-lsl2 scaled)
|
|
424 def A57WriteLdrAmLDSTSOPre : SchedWriteVariant<[
|
|
425 SchedVar<IsLdstsoScaledNotOptimalPredX2, [A57Write_5cyc_1I_1L]>,
|
|
426 SchedVar<NoSchedPred, [A57Write_4cyc_1L_1I]>
|
|
427 ]>;
|
|
428 def : InstRW<[A57WriteLdrAmLDSTSOPre, A57WrBackTwo],
|
|
429 (instregex "LDR_PRE_REG", "LDRB_PRE_REG")>;
|
|
430
|
|
431 def A57WriteLdrAm3PreWrBack : SchedWriteVariant<[
|
|
432 SchedVar<IsLdrAm3RegOffPredX2, [A57WrBackTwo]>,
|
|
433 SchedVar<NoSchedPred, [A57WrBackOne]>
|
|
434 ]>;
|
|
435 def : InstRW<[A57Write_4cyc_1L, A57WriteLdrAm3PreWrBack],
|
|
436 (instregex "LDR(H|SH|SB)_PRE")>;
|
|
437 def : InstRW<[A57Write_4cyc_1L, A57WrBackOne],
|
|
438 (instregex "t2LDR(H|SH|SB)?_PRE")>;
|
|
439
|
|
440 // LDRD pre-indexed: 5(2) cyc for reg, 4(1) cyc for imm.
|
|
441 def A57WriteLdrDAm3Pre : SchedWriteVariant<[
|
|
442 SchedVar<IsLdrAm3RegOffPredX3, [A57Write_5cyc_1I_1L]>,
|
|
443 SchedVar<NoSchedPred, [A57Write_4cyc_1L_1I]>
|
|
444 ]>;
|
|
445 def A57WriteLdrDAm3PreWrBack : SchedWriteVariant<[
|
|
446 SchedVar<IsLdrAm3RegOffPredX3, [A57WrBackTwo]>,
|
|
447 SchedVar<NoSchedPred, [A57WrBackOne]>
|
|
448 ]>;
|
|
449 def : InstRW<[A57WriteLdrDAm3Pre, A57WriteLdrDAm3Pre, A57WriteLdrDAm3PreWrBack],
|
|
450 (instregex "LDRD_PRE")>;
|
|
451 def : InstRW<[A57Write_4cyc_1L_1I, A57Write_4cyc_1L_1I, A57WrBackOne],
|
|
452 (instregex "t2LDRD_PRE")>;
|
|
453
|
|
454 // --- LDR post-indexed ---
|
|
455 def : InstRW<[A57Write_4cyc_1L_1I, A57WrBackOne], (instregex "LDR(T?)_POST_IMM",
|
|
456 "LDRB(T?)_POST_IMM", "LDR(SB|H|SH)Ti", "t2LDRB_POST")>;
|
|
457
|
|
458 def A57WriteLdrAm3PostWrBack : SchedWriteVariant<[
|
|
459 SchedVar<IsLdrAm3RegOffPred, [A57WrBackTwo]>,
|
|
460 SchedVar<NoSchedPred, [A57WrBackOne]>
|
|
461 ]>;
|
|
462 def : InstRW<[A57Write_4cyc_1L_1I, A57WriteLdrAm3PostWrBack],
|
|
463 (instregex "LDR(H|SH|SB)_POST")>;
|
|
464 def : InstRW<[A57Write_4cyc_1L, A57WrBackOne],
|
|
465 (instregex "t2LDR(H|SH|SB)?_POST")>;
|
|
466
|
|
467 def : InstRW<[A57Write_4cyc_1L_1I, A57WrBackTwo], (instregex "LDR_POST_REG",
|
|
468 "LDRB_POST_REG", "LDR(B?)T_POST$")>;
|
|
469
|
|
470 def A57WriteLdrTRegPost : SchedWriteVariant<[
|
|
471 SchedVar<IsLdrAm2ScaledPred, [A57Write_4cyc_1I_1L_1M]>,
|
|
472 SchedVar<NoSchedPred, [A57Write_4cyc_1L_1I]>
|
|
473 ]>;
|
|
474 def A57WriteLdrTRegPostWrBack : SchedWriteVariant<[
|
|
475 SchedVar<IsLdrAm2ScaledPred, [A57WrBackThree]>,
|
|
476 SchedVar<NoSchedPred, [A57WrBackTwo]>
|
|
477 ]>;
|
|
478 // 4(3) "I0/I1,L,M" for scaled register, otherwise 4(2) "I0/I1,L"
|
|
479 def : InstRW<[A57WriteLdrTRegPost, A57WriteLdrTRegPostWrBack],
|
|
480 (instregex "LDRT_POST_REG", "LDRBT_POST_REG")>;
|
|
481
|
|
482 def : InstRW<[A57Write_4cyc_1L_1I, A57WrBackTwo], (instregex "LDR(SB|H|SH)Tr")>;
|
|
483
|
|
484 def A57WriteLdrAm3PostWrBackX3 : SchedWriteVariant<[
|
|
485 SchedVar<IsLdrAm3RegOffPredX3, [A57WrBackTwo]>,
|
|
486 SchedVar<NoSchedPred, [A57WrBackOne]>
|
|
487 ]>;
|
|
488 // LDRD post-indexed: 4(2) cyc for reg, 4(1) cyc for imm.
|
|
489 def : InstRW<[A57Write_4cyc_1L_1I, A57Write_4cyc_1L_1I,
|
|
490 A57WriteLdrAm3PostWrBackX3], (instregex "LDRD_POST")>;
|
|
491 def : InstRW<[A57Write_4cyc_1L_1I, A57Write_4cyc_1L_1I, A57WrBackOne],
|
|
492 (instregex "t2LDRD_POST")>;
|
|
493
|
|
494 // --- Preload instructions ---
|
|
495 // Preload, immed offset
|
|
496 def : InstRW<[A57Write_4cyc_1L], (instregex "(t2)?PLDi12", "(t2)?PLDWi12",
|
|
497 "t2PLDW?(i8|pci|s)", "(t2)?PLI")>;
|
|
498
|
|
499 // Preload, register offset,
|
|
500 // 5cyc "I0/I1,L" for minus reg or scaled not plus lsl2
|
|
501 // otherwise 4cyc "L"
|
|
502 def A57WritePLD : SchedWriteVariant<[
|
|
503 SchedVar<IsLdstsoScaledNotOptimalPredX0, [A57Write_5cyc_1I_1L]>,
|
|
504 SchedVar<IsLdstsoMinusRegPredX0, [A57Write_5cyc_1I_1L]>,
|
|
505 SchedVar<NoSchedPred, [A57Write_4cyc_1L]>
|
|
506 ]>;
|
|
507 def : InstRW<[A57WritePLD], (instregex "PLDrs", "PLDWrs")>;
|
|
508
|
|
509 // --- Load multiple instructions ---
|
|
510 foreach NumAddr = 1-8 in {
|
|
511 def A57LMAddrPred#NumAddr :
|
|
512 SchedPredicate<"(TII->getLDMVariableDefsSize(*MI)+1)/2 == "#NumAddr>;
|
|
513 }
|
|
514
|
|
515 def A57LDMOpsListNoregin : A57WriteLMOpsListType<
|
|
516 [A57Write_3cyc_1L, A57Write_3cyc_1L,
|
|
517 A57Write_4cyc_1L, A57Write_4cyc_1L,
|
|
518 A57Write_5cyc_1L, A57Write_5cyc_1L,
|
|
519 A57Write_6cyc_1L, A57Write_6cyc_1L,
|
|
520 A57Write_7cyc_1L, A57Write_7cyc_1L,
|
|
521 A57Write_8cyc_1L, A57Write_8cyc_1L,
|
|
522 A57Write_9cyc_1L, A57Write_9cyc_1L,
|
|
523 A57Write_10cyc_1L, A57Write_10cyc_1L]>;
|
|
524 def A57WriteLDMnoreginlist : SchedWriteVariant<[
|
|
525 SchedVar<A57LMAddrPred1, A57LDMOpsListNoregin.Writes[0-1]>,
|
|
526 SchedVar<A57LMAddrPred2, A57LDMOpsListNoregin.Writes[0-3]>,
|
|
527 SchedVar<A57LMAddrPred3, A57LDMOpsListNoregin.Writes[0-5]>,
|
|
528 SchedVar<A57LMAddrPred4, A57LDMOpsListNoregin.Writes[0-7]>,
|
|
529 SchedVar<A57LMAddrPred5, A57LDMOpsListNoregin.Writes[0-9]>,
|
|
530 SchedVar<A57LMAddrPred6, A57LDMOpsListNoregin.Writes[0-11]>,
|
|
531 SchedVar<A57LMAddrPred7, A57LDMOpsListNoregin.Writes[0-13]>,
|
|
532 SchedVar<A57LMAddrPred8, A57LDMOpsListNoregin.Writes[0-15]>,
|
|
533 SchedVar<NoSchedPred, A57LDMOpsListNoregin.Writes[0-15]>
|
|
534 ]> { let Variadic=1; }
|
|
535
|
|
536 def A57LDMOpsListRegin : A57WriteLMOpsListType<
|
|
537 [A57Write_4cyc_1L_1I, A57Write_4cyc_1L_1I,
|
|
538 A57Write_5cyc_1L_1I, A57Write_5cyc_1L_1I,
|
|
539 A57Write_6cyc_1L_1I, A57Write_6cyc_1L_1I,
|
|
540 A57Write_7cyc_1L_1I, A57Write_7cyc_1L_1I,
|
|
541 A57Write_8cyc_1L_1I, A57Write_8cyc_1L_1I,
|
|
542 A57Write_9cyc_1L_1I, A57Write_9cyc_1L_1I,
|
|
543 A57Write_10cyc_1L_1I, A57Write_10cyc_1L_1I,
|
|
544 A57Write_11cyc_1L_1I, A57Write_11cyc_1L_1I]>;
|
|
545 def A57WriteLDMreginlist : SchedWriteVariant<[
|
|
546 SchedVar<A57LMAddrPred1, A57LDMOpsListRegin.Writes[0-1]>,
|
|
547 SchedVar<A57LMAddrPred2, A57LDMOpsListRegin.Writes[0-3]>,
|
|
548 SchedVar<A57LMAddrPred3, A57LDMOpsListRegin.Writes[0-5]>,
|
|
549 SchedVar<A57LMAddrPred4, A57LDMOpsListRegin.Writes[0-7]>,
|
|
550 SchedVar<A57LMAddrPred5, A57LDMOpsListRegin.Writes[0-9]>,
|
|
551 SchedVar<A57LMAddrPred6, A57LDMOpsListRegin.Writes[0-11]>,
|
|
552 SchedVar<A57LMAddrPred7, A57LDMOpsListRegin.Writes[0-13]>,
|
|
553 SchedVar<A57LMAddrPred8, A57LDMOpsListRegin.Writes[0-15]>,
|
|
554 SchedVar<NoSchedPred, A57LDMOpsListRegin.Writes[0-15]>
|
|
555 ]> { let Variadic=1; }
|
|
556
|
|
557 def A57LDMOpsList_Upd : A57WriteLMOpsListType<
|
|
558 [A57WrBackOne,
|
|
559 A57Write_3cyc_1L_1I, A57Write_3cyc_1L_1I,
|
|
560 A57Write_4cyc_1L_1I, A57Write_4cyc_1L_1I,
|
|
561 A57Write_5cyc_1L_1I, A57Write_5cyc_1L_1I,
|
|
562 A57Write_6cyc_1L_1I, A57Write_6cyc_1L_1I,
|
|
563 A57Write_7cyc_1L_1I, A57Write_7cyc_1L_1I,
|
|
564 A57Write_8cyc_1L_1I, A57Write_8cyc_1L_1I,
|
|
565 A57Write_9cyc_1L_1I, A57Write_9cyc_1L_1I,
|
|
566 A57Write_10cyc_1L_1I, A57Write_10cyc_1L_1I]>;
|
|
567 def A57WriteLDM_Upd : SchedWriteVariant<[
|
|
568 SchedVar<A57LMAddrPred1, A57LDMOpsList_Upd.Writes[0-2]>,
|
|
569 SchedVar<A57LMAddrPred2, A57LDMOpsList_Upd.Writes[0-4]>,
|
|
570 SchedVar<A57LMAddrPred3, A57LDMOpsList_Upd.Writes[0-6]>,
|
|
571 SchedVar<A57LMAddrPred4, A57LDMOpsList_Upd.Writes[0-8]>,
|
|
572 SchedVar<A57LMAddrPred5, A57LDMOpsList_Upd.Writes[0-10]>,
|
|
573 SchedVar<A57LMAddrPred6, A57LDMOpsList_Upd.Writes[0-12]>,
|
|
574 SchedVar<A57LMAddrPred7, A57LDMOpsList_Upd.Writes[0-14]>,
|
|
575 SchedVar<A57LMAddrPred8, A57LDMOpsList_Upd.Writes[0-16]>,
|
|
576 SchedVar<NoSchedPred, A57LDMOpsList_Upd.Writes[0-16]>
|
|
577 ]> { let Variadic=1; }
|
|
578
|
|
579 def A57WriteLDM : SchedWriteVariant<[
|
|
580 SchedVar<IsLdmBaseRegInList, [A57WriteLDMreginlist]>,
|
|
581 SchedVar<NoSchedPred, [A57WriteLDMnoreginlist]>
|
|
582 ]> { let Variadic=1; }
|
|
583
|
|
584 def : InstRW<[A57WriteLDM], (instregex "(t|t2|sys)?LDM(IA|DA|DB|IB)$")>;
|
|
585
|
|
586 // TODO: no writeback latency defined in documentation (implemented as 1 cyc)
|
|
587 def : InstRW<[A57WriteLDM_Upd],
|
|
588 (instregex "(t|t2|sys)?LDM(IA_UPD|DA_UPD|DB_UPD|IB_UPD|IA_RET)", "tPOP")>;
|
|
589
|
|
590 // --- 3.9 Store Instructions ---
|
|
591
|
|
592 // Store, immed offset
|
|
593 def : InstRW<[A57Write_1cyc_1S], (instregex "STRi12", "STRBi12", "PICSTR",
|
|
594 "t2STR(B?)(T|i12|i8|s)", "t2STRDi8", "t2STRH(i12|i8|s)", "tSTR")>;
|
|
595
|
|
596 // Store, register offset
|
|
597 // For minus or for not plus lsl2 scaled we need 3cyc "I0/I1, S",
|
|
598 // otherwise 1cyc S.
|
|
599 def A57WriteStrAmLDSTSO : SchedWriteVariant<[
|
|
600 SchedVar<IsLdstsoScaledNotOptimalPred, [A57Write_3cyc_1I_1S]>,
|
|
601 SchedVar<IsLdstsoMinusRegPred, [A57Write_3cyc_1I_1S]>,
|
|
602 SchedVar<NoSchedPred, [A57Write_1cyc_1S]>
|
|
603 ]>;
|
|
604 def : InstRW<[A57WriteStrAmLDSTSO], (instregex "STRrs", "STRBrs")>;
|
|
605
|
|
606 // STRH,STRD: 3cyc "I0/I1, S" for minus reg, 1cyc S for imm or for plus reg.
|
|
607 def A57WriteStrAm3 : SchedWriteVariant<[
|
|
608 SchedVar<IsLdrAm3NegRegOffPred, [A57Write_3cyc_1I_1S]>,
|
|
609 SchedVar<NoSchedPred, [A57Write_1cyc_1S]>
|
|
610 ]>;
|
|
611 def : InstRW<[A57WriteStrAm3], (instregex "STRH$")>;
|
|
612 def A57WriteStrAm3X2 : SchedWriteVariant<[
|
|
613 SchedVar<IsLdrAm3NegRegOffPredX2, [A57Write_3cyc_1I_1S]>,
|
|
614 SchedVar<NoSchedPred, [A57Write_1cyc_1S]>
|
|
615 ]>;
|
|
616 def : InstRW<[A57WriteStrAm3X2], (instregex "STRD$")>;
|
|
617
|
|
618 // Store, immed pre-indexed (1cyc "S, I0/I1", 1cyc writeback)
|
|
619 def : InstRW<[A57WrBackOne, A57Write_1cyc_1S_1I], (instregex "STR_PRE_IMM",
|
|
620 "STRB_PRE_IMM", "STR(B)?(r|i)_preidx", "(t2)?STRH_(preidx|PRE)",
|
|
621 "t2STR(B?)_(PRE|preidx)", "t2STRD_PRE")>;
|
|
622
|
|
623 // Store, register pre-indexed:
|
|
624 // 1(1) "S, I0/I1" for plus reg
|
|
625 // 3(2) "I0/I1, S" for minus reg
|
|
626 // 1(2) "S, M" for scaled plus lsl2
|
|
627 // 3(2) "I0/I1, S" for other scaled
|
|
628 def A57WriteStrAmLDSTSOPre : SchedWriteVariant<[
|
|
629 SchedVar<IsLdstsoScaledNotOptimalPredX2, [A57Write_3cyc_1I_1S]>,
|
|
630 SchedVar<IsLdstsoMinusRegPredX2, [A57Write_3cyc_1I_1S]>,
|
|
631 SchedVar<IsLdstsoScaledPredX2, [A57Write_1cyc_1S_1M]>,
|
|
632 SchedVar<NoSchedPred, [A57Write_1cyc_1S_1I]>
|
|
633 ]>;
|
|
634 def A57WriteStrAmLDSTSOPreWrBack : SchedWriteVariant<[
|
|
635 SchedVar<IsLdstsoScaledPredX2, [A57WrBackTwo]>,
|
|
636 SchedVar<IsLdstsoMinusRegPredX2, [A57WrBackTwo]>,
|
|
637 SchedVar<NoSchedPred, [A57WrBackOne]>
|
|
638 ]>;
|
|
639 def : InstRW<[A57WriteStrAmLDSTSOPreWrBack, A57WriteStrAmLDSTSOPre],
|
|
640 (instregex "STR_PRE_REG", "STRB_PRE_REG")>;
|
|
641
|
|
642 // pre-indexed STRH/STRD (STRH_PRE, STRD_PRE)
|
|
643 // 1(1) "S, I0/I1" for imm or reg plus
|
|
644 // 3(2) "I0/I1, S" for reg minus
|
|
645 def A57WriteStrAm3PreX2 : SchedWriteVariant<[
|
|
646 SchedVar<IsLdrAm3NegRegOffPredX2, [A57Write_3cyc_1I_1S]>,
|
|
647 SchedVar<NoSchedPred, [A57Write_1cyc_1S_1I]>
|
|
648 ]>;
|
|
649 def A57WriteStrAm3PreWrBackX2 : SchedWriteVariant<[
|
|
650 SchedVar<IsLdrAm3NegRegOffPredX2, [A57WrBackTwo]>,
|
|
651 SchedVar<NoSchedPred, [A57WrBackOne]>
|
|
652 ]>;
|
|
653 def : InstRW<[A57WriteStrAm3PreWrBackX2, A57WriteStrAm3PreX2],
|
|
654 (instregex "STRH_PRE")>;
|
|
655
|
|
656 def A57WriteStrAm3PreX3 : SchedWriteVariant<[
|
|
657 SchedVar<IsLdrAm3NegRegOffPredX3, [A57Write_3cyc_1I_1S]>,
|
|
658 SchedVar<NoSchedPred, [A57Write_1cyc_1S_1I]>
|
|
659 ]>;
|
|
660 def A57WriteStrAm3PreWrBackX3 : SchedWriteVariant<[
|
|
661 SchedVar<IsLdrAm3NegRegOffPredX3, [A57WrBackTwo]>,
|
|
662 SchedVar<NoSchedPred, [A57WrBackOne]>
|
|
663 ]>;
|
|
664 def : InstRW<[A57WriteStrAm3PreWrBackX3, A57WriteStrAm3PreX3],
|
|
665 (instregex "STRD_PRE")>;
|
|
666
|
|
667 def : InstRW<[A57WrBackOne, A57Write_1cyc_1S_1I], (instregex "STR(T?)_POST_IMM",
|
|
668 "STRB(T?)_POST_IMM", "t2STR(B?)_POST")>;
|
|
669
|
|
670 // 1(2) "S, M" for STR/STRB register post-indexed (both scaled or not)
|
|
671 def : InstRW<[A57WrBackTwo, A57Write_1cyc_1S_1M], (instregex "STR(T?)_POST_REG",
|
|
672 "STRB(T?)_POST_REG", "STR(B?)T_POST$")>;
|
|
673
|
|
674 // post-indexed STRH/STRD(STRH_POST, STRD_POST), STRHTi, STRHTr
|
|
675 // 1(1) "S, I0/I1" both for reg or imm
|
|
676 def : InstRW<[A57WrBackOne, A57Write_1cyc_1S_1I],
|
|
677 (instregex "(t2)?STR(H|D)_POST", "STRHT(i|r)", "t2STRHT")>;
|
|
678
|
|
679 // --- Store multiple instructions ---
|
|
680 // TODO: no writeback latency defined in documentation
|
|
681 def A57WriteSTM : SchedWriteVariant<[
|
|
682 SchedVar<A57LMAddrPred1, [A57Write_1cyc_1S]>,
|
|
683 SchedVar<A57LMAddrPred2, [A57Write_2cyc_1S]>,
|
|
684 SchedVar<A57LMAddrPred3, [A57Write_3cyc_1S]>,
|
|
685 SchedVar<A57LMAddrPred4, [A57Write_4cyc_1S]>,
|
|
686 SchedVar<A57LMAddrPred5, [A57Write_5cyc_1S]>,
|
|
687 SchedVar<A57LMAddrPred6, [A57Write_6cyc_1S]>,
|
|
688 SchedVar<A57LMAddrPred7, [A57Write_7cyc_1S]>,
|
|
689 SchedVar<A57LMAddrPred8, [A57Write_8cyc_1S]>,
|
|
690 SchedVar<NoSchedPred, [A57Write_2cyc_1S]>
|
|
691 ]>;
|
|
692 def A57WriteSTM_Upd : SchedWriteVariant<[
|
|
693 SchedVar<A57LMAddrPred1, [A57Write_1cyc_1S_1I]>,
|
|
694 SchedVar<A57LMAddrPred2, [A57Write_2cyc_1S_1I]>,
|
|
695 SchedVar<A57LMAddrPred3, [A57Write_3cyc_1S_1I]>,
|
|
696 SchedVar<A57LMAddrPred4, [A57Write_4cyc_1S_1I]>,
|
|
697 SchedVar<A57LMAddrPred5, [A57Write_5cyc_1S_1I]>,
|
|
698 SchedVar<A57LMAddrPred6, [A57Write_6cyc_1S_1I]>,
|
|
699 SchedVar<A57LMAddrPred7, [A57Write_7cyc_1S_1I]>,
|
|
700 SchedVar<A57LMAddrPred8, [A57Write_8cyc_1S_1I]>,
|
|
701 SchedVar<NoSchedPred, [A57Write_2cyc_1S_1I]>
|
|
702 ]>;
|
|
703
|
|
704 def : InstRW<[A57WriteSTM], (instregex "(t2|sys|t)?STM(IA|DA|DB|IB)$")>;
|
|
705 def : InstRW<[A57WrBackOne, A57WriteSTM_Upd],
|
|
706 (instregex "(t2|sys|t)?STM(IA_UPD|DA_UPD|DB_UPD|IB_UPD)", "tPUSH")>;
|
|
707
|
|
708 // --- 3.10 FP Data Processing Instructions ---
|
|
709 def : SchedAlias<WriteFPALU32, A57Write_5cyc_1V>;
|
|
710 def : SchedAlias<WriteFPALU64, A57Write_5cyc_1V>;
|
|
711
|
|
712 def : InstRW<[A57Write_3cyc_1V], (instregex "VABS(S|D|H)")>;
|
|
713
|
|
714 // fp compare - 3cyc F1 for unconditional, 6cyc "F0/F1, F1" for conditional
|
|
715 def A57WriteVcmp : SchedWriteVariant<[
|
|
716 SchedVar<IsPredicatedPred, [A57Write_6cyc_1V_1X]>,
|
|
717 SchedVar<NoSchedPred, [A57Write_3cyc_1X]>
|
|
718 ]>;
|
|
719 def : InstRW<[A57WriteVcmp],
|
|
720 (instregex "VCMP(D|S|H|ZD|ZS|ZH)$", "VCMPE(D|S|H|ZD|ZS|ZH)")>;
|
|
721
|
|
722 // fp convert
|
|
723 def : InstRW<[A57Write_5cyc_1V], (instregex
|
|
724 "VCVT(A|N|P|M)(SH|UH|SS|US|SD|UD)", "VCVT(BDH|THD|TDH)")>;
|
|
725
|
|
726 def : SchedAlias<WriteFPCVT, A57Write_5cyc_1V>;
|
|
727
|
|
728 // FP round to integral
|
|
729 def : InstRW<[A57Write_5cyc_1V], (instregex "VRINT(A|N|P|M|Z|R|X)(H|S|D)$")>;
|
|
730
|
|
731 // FP divide, FP square root
|
|
732 def : SchedAlias<WriteFPDIV32, A57Write_17cyc_1W>;
|
|
733 def : SchedAlias<WriteFPDIV64, A57Write_32cyc_1W>;
|
|
734 def : SchedAlias<WriteFPSQRT32, A57Write_17cyc_1W>;
|
|
735 def : SchedAlias<WriteFPSQRT64, A57Write_32cyc_1W>;
|
|
736
|
|
737 // FP max/min
|
|
738 def : InstRW<[A57Write_5cyc_1V], (instregex "VMAX", "VMIN")>;
|
|
739
|
|
740 // FP multiply-accumulate pipelines support late forwarding of the result
|
|
741 // from FP multiply μops to the accumulate operands of an
|
|
742 // FP multiply-accumulate μop. The latter can potentially be issued 1 cycle
|
|
743 // after the FP multiply μop has been issued
|
|
744 // FP multiply, FZ
|
|
745 def A57WriteVMUL : SchedWriteRes<[A57UnitV]> { let Latency = 5; }
|
|
746
|
|
747 def : SchedAlias<WriteFPMUL32, A57WriteVMUL>;
|
|
748 def : SchedAlias<WriteFPMUL64, A57WriteVMUL>;
|
|
749 def : ReadAdvance<ReadFPMUL, 0>;
|
|
750
|
|
751 // FP multiply accumulate, FZ: 9cyc "F0/F1" or 4 cyc for sequenced accumulate
|
|
752 // VFMA, VFMS, VFNMA, VFNMS, VMLA, VMLS, VNMLA, VNMLS
|
|
753 def A57WriteVFMA : SchedWriteRes<[A57UnitV]> { let Latency = 9; }
|
|
754
|
|
755 // VFMA takes 9 cyc for common case and 4 cyc for VFMA->VFMA chain (5 read adv.)
|
|
756 // VMUL takes 5 cyc for common case and 1 cyc for VMUL->VFMA chain (4 read adv.)
|
|
757 // Currently, there is no way to define different read advances for VFMA operand
|
|
758 // from VFMA or from VMUL, so there will be 5 read advance.
|
|
759 // Zero latency (instead of one) for VMUL->VFMA shouldn't break something.
|
|
760 // The same situation with ASIMD VMUL/VFMA instructions
|
|
761 // def A57ReadVFMA : SchedRead;
|
|
762 // def : ReadAdvance<A57ReadVFMA, 5, [A57WriteVFMA]>;
|
|
763 // def : ReadAdvance<A57ReadVFMA, 4, [A57WriteVMUL]>;
|
|
764 def A57ReadVFMA5 : SchedReadAdvance<5, [A57WriteVFMA, A57WriteVMUL]>;
|
|
765
|
|
766 def : SchedAlias<WriteFPMAC32, A57WriteVFMA>;
|
|
767 def : SchedAlias<WriteFPMAC64, A57WriteVFMA>;
|
|
768 def : SchedAlias<ReadFPMAC, A57ReadVFMA5>;
|
|
769
|
|
770 def : InstRW<[A57Write_3cyc_1V], (instregex "VNEG")>;
|
|
771 def : InstRW<[A57Write_3cyc_1V], (instregex "VSEL")>;
|
|
772
|
|
773 // --- 3.11 FP Miscellaneous Instructions ---
|
|
774 // VMOV: 3cyc "F0/F1" for imm/reg
|
|
775 def : InstRW<[A57Write_3cyc_1V], (instregex "FCONST(D|S|H)")>;
|
|
776 def : InstRW<[A57Write_3cyc_1V], (instregex "VMOV(D|S|H)(cc)?$")>;
|
|
777
|
|
778 // 5cyc L for FP transfer, vfp to core reg,
|
|
779 // 5cyc L for FP transfer, core reg to vfp
|
|
780 def : SchedAlias<WriteFPMOV, A57Write_5cyc_1L>;
|
|
781 // VMOVRRS/VMOVRRD in common code declared with one WriteFPMOV (instead of 2).
|
|
782 def : InstRW<[A57Write_5cyc_1L, A57Write_5cyc_1L], (instregex "VMOV(RRS|RRD)")>;
|
|
783
|
|
784 // 8cyc "L,F0/F1" for FP transfer, core reg to upper or lower half of vfp D-reg
|
|
785 def : InstRW<[A57Write_8cyc_1L_1I], (instregex "VMOVDRR")>;
|
|
786
|
|
787 // --- 3.12 FP Load Instructions ---
|
|
788 def : InstRW<[A57Write_5cyc_1L], (instregex "VLDR(D|S|H)")>;
|
|
789
|
|
790 def : InstRW<[A57Write_5cyc_1L], (instregex "VLDMQIA$")>;
|
|
791
|
|
792 // FP load multiple (VLDM)
|
|
793
|
|
794 def A57VLDMOpsListUncond : A57WriteLMOpsListType<
|
|
795 [A57Write_5cyc_1L, A57Write_5cyc_1L,
|
|
796 A57Write_6cyc_1L, A57Write_6cyc_1L,
|
|
797 A57Write_7cyc_1L, A57Write_7cyc_1L,
|
|
798 A57Write_8cyc_1L, A57Write_8cyc_1L,
|
|
799 A57Write_9cyc_1L, A57Write_9cyc_1L,
|
|
800 A57Write_10cyc_1L, A57Write_10cyc_1L,
|
|
801 A57Write_11cyc_1L, A57Write_11cyc_1L,
|
|
802 A57Write_12cyc_1L, A57Write_12cyc_1L]>;
|
|
803 def A57WriteVLDMuncond : SchedWriteVariant<[
|
|
804 SchedVar<A57LMAddrPred1, A57VLDMOpsListUncond.Writes[0-1]>,
|
|
805 SchedVar<A57LMAddrPred2, A57VLDMOpsListUncond.Writes[0-3]>,
|
|
806 SchedVar<A57LMAddrPred3, A57VLDMOpsListUncond.Writes[0-5]>,
|
|
807 SchedVar<A57LMAddrPred4, A57VLDMOpsListUncond.Writes[0-7]>,
|
|
808 SchedVar<A57LMAddrPred5, A57VLDMOpsListUncond.Writes[0-9]>,
|
|
809 SchedVar<A57LMAddrPred6, A57VLDMOpsListUncond.Writes[0-11]>,
|
|
810 SchedVar<A57LMAddrPred7, A57VLDMOpsListUncond.Writes[0-13]>,
|
|
811 SchedVar<A57LMAddrPred8, A57VLDMOpsListUncond.Writes[0-15]>,
|
|
812 SchedVar<NoSchedPred, A57VLDMOpsListUncond.Writes[0-15]>
|
|
813 ]> { let Variadic=1; }
|
|
814
|
|
815 def A57VLDMOpsListCond : A57WriteLMOpsListType<
|
|
816 [A57Write_5cyc_1L, A57Write_6cyc_1L,
|
|
817 A57Write_7cyc_1L, A57Write_8cyc_1L,
|
|
818 A57Write_9cyc_1L, A57Write_10cyc_1L,
|
|
819 A57Write_11cyc_1L, A57Write_12cyc_1L,
|
|
820 A57Write_13cyc_1L, A57Write_14cyc_1L,
|
|
821 A57Write_15cyc_1L, A57Write_16cyc_1L,
|
|
822 A57Write_17cyc_1L, A57Write_18cyc_1L,
|
|
823 A57Write_19cyc_1L, A57Write_20cyc_1L]>;
|
|
824 def A57WriteVLDMcond : SchedWriteVariant<[
|
|
825 SchedVar<A57LMAddrPred1, A57VLDMOpsListCond.Writes[0-1]>,
|
|
826 SchedVar<A57LMAddrPred2, A57VLDMOpsListCond.Writes[0-3]>,
|
|
827 SchedVar<A57LMAddrPred3, A57VLDMOpsListCond.Writes[0-5]>,
|
|
828 SchedVar<A57LMAddrPred4, A57VLDMOpsListCond.Writes[0-7]>,
|
|
829 SchedVar<A57LMAddrPred5, A57VLDMOpsListCond.Writes[0-9]>,
|
|
830 SchedVar<A57LMAddrPred6, A57VLDMOpsListCond.Writes[0-11]>,
|
|
831 SchedVar<A57LMAddrPred7, A57VLDMOpsListCond.Writes[0-13]>,
|
|
832 SchedVar<A57LMAddrPred8, A57VLDMOpsListCond.Writes[0-15]>,
|
|
833 SchedVar<NoSchedPred, A57VLDMOpsListCond.Writes[0-15]>
|
|
834 ]> { let Variadic=1; }
|
|
835
|
|
836 def A57WriteVLDM : SchedWriteVariant<[
|
|
837 SchedVar<IsPredicatedPred, [A57WriteVLDMcond]>,
|
|
838 SchedVar<NoSchedPred, [A57WriteVLDMuncond]>
|
|
839 ]> { let Variadic=1; }
|
|
840
|
|
841 def : InstRW<[A57WriteVLDM], (instregex "VLDM(DIA|SIA)$")>;
|
|
842
|
|
843 def A57VLDMOpsListUncond_Upd : A57WriteLMOpsListType<
|
|
844 [A57Write_5cyc_1L_1I, A57Write_5cyc_1L_1I,
|
|
845 A57Write_6cyc_1L_1I, A57Write_6cyc_1L_1I,
|
|
846 A57Write_7cyc_1L_1I, A57Write_7cyc_1L_1I,
|
|
847 A57Write_8cyc_1L_1I, A57Write_8cyc_1L_1I,
|
|
848 A57Write_9cyc_1L_1I, A57Write_9cyc_1L_1I,
|
|
849 A57Write_10cyc_1L_1I, A57Write_10cyc_1L_1I,
|
|
850 A57Write_11cyc_1L_1I, A57Write_11cyc_1L_1I,
|
|
851 A57Write_12cyc_1L_1I, A57Write_12cyc_1L_1I]>;
|
|
852 def A57WriteVLDMuncond_UPD : SchedWriteVariant<[
|
|
853 SchedVar<A57LMAddrPred1, A57VLDMOpsListUncond_Upd.Writes[0-1]>,
|
|
854 SchedVar<A57LMAddrPred2, A57VLDMOpsListUncond_Upd.Writes[0-3]>,
|
|
855 SchedVar<A57LMAddrPred3, A57VLDMOpsListUncond_Upd.Writes[0-5]>,
|
|
856 SchedVar<A57LMAddrPred4, A57VLDMOpsListUncond_Upd.Writes[0-7]>,
|
|
857 SchedVar<A57LMAddrPred5, A57VLDMOpsListUncond_Upd.Writes[0-9]>,
|
|
858 SchedVar<A57LMAddrPred6, A57VLDMOpsListUncond_Upd.Writes[0-11]>,
|
|
859 SchedVar<A57LMAddrPred7, A57VLDMOpsListUncond_Upd.Writes[0-13]>,
|
|
860 SchedVar<A57LMAddrPred8, A57VLDMOpsListUncond_Upd.Writes[0-15]>,
|
|
861 SchedVar<NoSchedPred, A57VLDMOpsListUncond_Upd.Writes[0-15]>
|
|
862 ]> { let Variadic=1; }
|
|
863
|
|
864 def A57VLDMOpsListCond_Upd : A57WriteLMOpsListType<
|
|
865 [A57Write_5cyc_1L_1I, A57Write_6cyc_1L_1I,
|
|
866 A57Write_7cyc_1L_1I, A57Write_8cyc_1L_1I,
|
|
867 A57Write_9cyc_1L_1I, A57Write_10cyc_1L_1I,
|
|
868 A57Write_11cyc_1L_1I, A57Write_12cyc_1L_1I,
|
|
869 A57Write_13cyc_1L_1I, A57Write_14cyc_1L_1I,
|
|
870 A57Write_15cyc_1L_1I, A57Write_16cyc_1L_1I,
|
|
871 A57Write_17cyc_1L_1I, A57Write_18cyc_1L_1I,
|
|
872 A57Write_19cyc_1L_1I, A57Write_20cyc_1L_1I]>;
|
|
873 def A57WriteVLDMcond_UPD : SchedWriteVariant<[
|
|
874 SchedVar<A57LMAddrPred1, A57VLDMOpsListCond_Upd.Writes[0-1]>,
|
|
875 SchedVar<A57LMAddrPred2, A57VLDMOpsListCond_Upd.Writes[0-3]>,
|
|
876 SchedVar<A57LMAddrPred3, A57VLDMOpsListCond_Upd.Writes[0-5]>,
|
|
877 SchedVar<A57LMAddrPred4, A57VLDMOpsListCond_Upd.Writes[0-7]>,
|
|
878 SchedVar<A57LMAddrPred5, A57VLDMOpsListCond_Upd.Writes[0-9]>,
|
|
879 SchedVar<A57LMAddrPred6, A57VLDMOpsListCond_Upd.Writes[0-11]>,
|
|
880 SchedVar<A57LMAddrPred7, A57VLDMOpsListCond_Upd.Writes[0-13]>,
|
|
881 SchedVar<A57LMAddrPred8, A57VLDMOpsListCond_Upd.Writes[0-15]>,
|
|
882 SchedVar<NoSchedPred, A57VLDMOpsListCond_Upd.Writes[0-15]>
|
|
883 ]> { let Variadic=1; }
|
|
884
|
|
885 def A57WriteVLDM_UPD : SchedWriteVariant<[
|
|
886 SchedVar<IsPredicatedPred, [A57WriteVLDMcond_UPD]>,
|
|
887 SchedVar<NoSchedPred, [A57WriteVLDMuncond_UPD]>
|
|
888 ]> { let Variadic=1; }
|
|
889
|
|
890 def : InstRW<[A57WrBackOne, A57WriteVLDM_UPD],
|
|
891 (instregex "VLDM(DIA_UPD|DDB_UPD|SIA_UPD|SDB_UPD)")>;
|
|
892
|
|
893 // --- 3.13 FP Store Instructions ---
|
|
894 def : InstRW<[A57Write_1cyc_1S], (instregex "VSTR(D|S|H)")>;
|
|
895
|
|
896 def : InstRW<[A57Write_2cyc_1S], (instregex "VSTMQIA$")>;
|
|
897
|
|
898 def A57WriteVSTMs : SchedWriteVariant<[
|
|
899 SchedVar<A57LMAddrPred1, [A57Write_1cyc_1S]>,
|
|
900 SchedVar<A57LMAddrPred2, [A57Write_2cyc_1S]>,
|
|
901 SchedVar<A57LMAddrPred3, [A57Write_3cyc_1S]>,
|
|
902 SchedVar<A57LMAddrPred4, [A57Write_4cyc_1S]>,
|
|
903 SchedVar<A57LMAddrPred5, [A57Write_5cyc_1S]>,
|
|
904 SchedVar<A57LMAddrPred6, [A57Write_6cyc_1S]>,
|
|
905 SchedVar<A57LMAddrPred7, [A57Write_7cyc_1S]>,
|
|
906 SchedVar<A57LMAddrPred8, [A57Write_8cyc_1S]>,
|
|
907 SchedVar<NoSchedPred, [A57Write_2cyc_1S]>
|
|
908 ]>;
|
|
909 def A57WriteVSTMd : SchedWriteVariant<[
|
|
910 SchedVar<A57LMAddrPred1, [A57Write_2cyc_1S]>,
|
|
911 SchedVar<A57LMAddrPred2, [A57Write_4cyc_1S]>,
|
|
912 SchedVar<A57LMAddrPred3, [A57Write_6cyc_1S]>,
|
|
913 SchedVar<A57LMAddrPred4, [A57Write_8cyc_1S]>,
|
|
914 SchedVar<A57LMAddrPred5, [A57Write_10cyc_1S]>,
|
|
915 SchedVar<A57LMAddrPred6, [A57Write_12cyc_1S]>,
|
|
916 SchedVar<A57LMAddrPred7, [A57Write_14cyc_1S]>,
|
|
917 SchedVar<A57LMAddrPred8, [A57Write_16cyc_1S]>,
|
|
918 SchedVar<NoSchedPred, [A57Write_4cyc_1S]>
|
|
919 ]>;
|
|
920 def A57WriteVSTMs_Upd : SchedWriteVariant<[
|
|
921 SchedVar<A57LMAddrPred1, [A57Write_1cyc_1S_1I]>,
|
|
922 SchedVar<A57LMAddrPred2, [A57Write_2cyc_1S_1I]>,
|
|
923 SchedVar<A57LMAddrPred3, [A57Write_3cyc_1S_1I]>,
|
|
924 SchedVar<A57LMAddrPred4, [A57Write_4cyc_1S_1I]>,
|
|
925 SchedVar<A57LMAddrPred5, [A57Write_5cyc_1S_1I]>,
|
|
926 SchedVar<A57LMAddrPred6, [A57Write_6cyc_1S_1I]>,
|
|
927 SchedVar<A57LMAddrPred7, [A57Write_7cyc_1S_1I]>,
|
|
928 SchedVar<A57LMAddrPred8, [A57Write_8cyc_1S_1I]>,
|
|
929 SchedVar<NoSchedPred, [A57Write_2cyc_1S_1I]>
|
|
930 ]>;
|
|
931 def A57WriteVSTMd_Upd : SchedWriteVariant<[
|
|
932 SchedVar<A57LMAddrPred1, [A57Write_2cyc_1S_1I]>,
|
|
933 SchedVar<A57LMAddrPred2, [A57Write_4cyc_1S_1I]>,
|
|
934 SchedVar<A57LMAddrPred3, [A57Write_6cyc_1S_1I]>,
|
|
935 SchedVar<A57LMAddrPred4, [A57Write_8cyc_1S_1I]>,
|
|
936 SchedVar<A57LMAddrPred5, [A57Write_10cyc_1S_1I]>,
|
|
937 SchedVar<A57LMAddrPred6, [A57Write_12cyc_1S_1I]>,
|
|
938 SchedVar<A57LMAddrPred7, [A57Write_14cyc_1S_1I]>,
|
|
939 SchedVar<A57LMAddrPred8, [A57Write_16cyc_1S_1I]>,
|
|
940 SchedVar<NoSchedPred, [A57Write_2cyc_1S_1I]>
|
|
941 ]>;
|
|
942
|
|
943 def : InstRW<[A57WriteVSTMs], (instregex "VSTMSIA$")>;
|
|
944 def : InstRW<[A57WriteVSTMd], (instregex "VSTMDIA$")>;
|
|
945 def : InstRW<[A57WrBackOne, A57WriteVSTMs_Upd],
|
|
946 (instregex "VSTM(SIA_UPD|SDB_UPD)")>;
|
|
947 def : InstRW<[A57WrBackOne, A57WriteVSTMd_Upd],
|
|
948 (instregex "VSTM(DIA_UPD|DDB_UPD)")>;
|
|
949
|
|
950 // --- 3.14 ASIMD Integer Instructions ---
|
|
951
|
|
952 // ASIMD absolute diff, 3cyc F0/F1 for integer VABD
|
|
953 def : InstRW<[A57Write_3cyc_1V], (instregex "VABD(s|u)")>;
|
|
954
|
|
955 // ASIMD absolute diff accum: 4(1) F1 for D-form, 5(2) F1 for Q-form
|
|
956 def A57WriteVABAD : SchedWriteRes<[A57UnitX]> { let Latency = 4; }
|
|
957 def A57ReadVABAD : SchedReadAdvance<3, [A57WriteVABAD]>;
|
|
958 def : InstRW<[A57WriteVABAD, A57ReadVABAD],
|
|
959 (instregex "VABA(s|u)(v8i8|v4i16|v2i32)")>;
|
|
960 def A57WriteVABAQ : SchedWriteRes<[A57UnitX]> { let Latency = 5; }
|
|
961 def A57ReadVABAQ : SchedReadAdvance<3, [A57WriteVABAQ]>;
|
|
962 def : InstRW<[A57WriteVABAQ, A57ReadVABAQ],
|
|
963 (instregex "VABA(s|u)(v16i8|v8i16|v4i32)")>;
|
|
964
|
|
965 // ASIMD absolute diff accum long: 4(1) F1 for VABAL
|
|
966 def A57WriteVABAL : SchedWriteRes<[A57UnitX]> { let Latency = 4; }
|
|
967 def A57ReadVABAL : SchedReadAdvance<3, [A57WriteVABAL]>;
|
|
968 def : InstRW<[A57WriteVABAL, A57ReadVABAL], (instregex "VABAL(s|u)")>;
|
|
969
|
|
970 // ASIMD absolute diff long: 3cyc F0/F1 for VABDL
|
|
971 def : InstRW<[A57Write_3cyc_1V], (instregex "VABDL(s|u)")>;
|
|
972
|
|
973 // ASIMD arith, basic
|
134
|
974 def : InstRW<[A57Write_3cyc_1V], (instregex "VADDv", "VADDL", "VADDW",
|
121
|
975 "VNEG(s8d|s16d|s32d|s8q|s16q|s32q|d|q)",
|
134
|
976 "VPADDi", "VPADDL", "VSUBv", "VSUBL", "VSUBW")>;
|
121
|
977
|
|
978 // ASIMD arith, complex
|
|
979 def : InstRW<[A57Write_3cyc_1V], (instregex "VABS", "VADDHN", "VHADD", "VHSUB",
|
|
980 "VQABS", "VQADD", "VQNEG", "VQSUB",
|
|
981 "VRADDHN", "VRHADD", "VRSUBHN", "VSUBHN")>;
|
|
982
|
|
983 // ASIMD compare
|
|
984 def : InstRW<[A57Write_3cyc_1V],
|
|
985 (instregex "VCEQ", "VCGE", "VCGT", "VCLE", "VTST", "VCLT")>;
|
|
986
|
|
987 // ASIMD logical
|
|
988 def : InstRW<[A57Write_3cyc_1V],
|
|
989 (instregex "VAND", "VBIC", "VMVN", "VORR", "VORN", "VEOR")>;
|
|
990
|
|
991 // ASIMD max/min
|
|
992 def : InstRW<[A57Write_3cyc_1V],
|
|
993 (instregex "(VMAX|VMIN)(s|u)", "(VPMAX|VPMIN)(s8|s16|s32|u8|u16|u32)")>;
|
|
994
|
|
995 // ASIMD multiply, D-form: 5cyc F0 for r0px, 4cyc F0 for r1p0 and later
|
|
996 // Cortex-A57 r1p0 and later reduce the latency of ASIMD multiply
|
|
997 // and multiply-with-accumulate instructions relative to r0pX.
|
|
998 def A57WriteVMULD_VecInt : SchedWriteVariant<[
|
|
999 SchedVar<IsR1P0AndLaterPred, [A57Write_4cyc_1W]>,
|
|
1000 SchedVar<NoSchedPred, [A57Write_5cyc_1W]>]>;
|
|
1001 def : InstRW<[A57WriteVMULD_VecInt], (instregex
|
|
1002 "VMUL(v8i8|v4i16|v2i32|pd)", "VMULsl(v4i16|v2i32)",
|
|
1003 "VQDMULH(sl)?(v4i16|v2i32)", "VQRDMULH(sl)?(v4i16|v2i32)")>;
|
|
1004
|
|
1005 // ASIMD multiply, Q-form: 6cyc F0 for r0px, 5cyc F0 for r1p0 and later
|
|
1006 def A57WriteVMULQ_VecInt : SchedWriteVariant<[
|
|
1007 SchedVar<IsR1P0AndLaterPred, [A57Write_5cyc_1W]>,
|
|
1008 SchedVar<NoSchedPred, [A57Write_6cyc_1W]>]>;
|
|
1009 def : InstRW<[A57WriteVMULQ_VecInt], (instregex
|
|
1010 "VMUL(v16i8|v8i16|v4i32|pq)", "VMULsl(v8i16|v4i32)",
|
|
1011 "VQDMULH(sl)?(v8i16|v4i32)", "VQRDMULH(sl)?(v8i16|v4i32)")>;
|
|
1012
|
|
1013 // ASIMD multiply accumulate, D-form
|
|
1014 // 5cyc F0 for r0px, 4cyc F0 for r1p0 and later, 1cyc for accumulate sequence
|
|
1015 // (4 or 3 ReadAdvance)
|
|
1016 def A57WriteVMLAD_VecInt : SchedWriteVariant<[
|
|
1017 SchedVar<IsR1P0AndLaterPred, [A57Write_4cyc_1W]>,
|
|
1018 SchedVar<NoSchedPred, [A57Write_5cyc_1W]>]>;
|
|
1019 def A57ReadVMLAD_VecInt : SchedReadVariant<[
|
|
1020 SchedVar<IsR1P0AndLaterPred, [SchedReadAdvance<3, [A57WriteVMLAD_VecInt]>]>,
|
|
1021 SchedVar<NoSchedPred, [SchedReadAdvance<4, [A57WriteVMLAD_VecInt]>]>
|
|
1022 ]>;
|
|
1023 def : InstRW<[A57WriteVMLAD_VecInt, A57ReadVMLAD_VecInt],
|
|
1024 (instregex "VMLA(sl)?(v8i8|v4i16|v2i32)", "VMLS(sl)?(v8i8|v4i16|v2i32)")>;
|
|
1025
|
|
1026 // ASIMD multiply accumulate, Q-form
|
|
1027 // 6cyc F0 for r0px, 5cyc F0 for r1p0 and later, 2cyc for accumulate sequence
|
|
1028 // (4 or 3 ReadAdvance)
|
|
1029 def A57WriteVMLAQ_VecInt : SchedWriteVariant<[
|
|
1030 SchedVar<IsR1P0AndLaterPred, [A57Write_5cyc_1W]>,
|
|
1031 SchedVar<NoSchedPred, [A57Write_6cyc_1W]>]>;
|
|
1032 def A57ReadVMLAQ_VecInt : SchedReadVariant<[
|
|
1033 SchedVar<IsR1P0AndLaterPred, [SchedReadAdvance<3, [A57WriteVMLAQ_VecInt]>]>,
|
|
1034 SchedVar<NoSchedPred, [SchedReadAdvance<4, [A57WriteVMLAQ_VecInt]>]>
|
|
1035 ]>;
|
|
1036 def : InstRW<[A57WriteVMLAQ_VecInt, A57ReadVMLAQ_VecInt],
|
|
1037 (instregex "VMLA(sl)?(v16i8|v8i16|v4i32)", "VMLS(sl)?(v16i8|v8i16|v4i32)")>;
|
|
1038
|
|
1039 // ASIMD multiply accumulate long
|
|
1040 // 5cyc F0 for r0px, 4cyc F0 for r1p0 and later, 1cyc for accumulate sequence
|
|
1041 // (4 or 3 ReadAdvance)
|
|
1042 def A57WriteVMLAL_VecInt : SchedWriteVariant<[
|
|
1043 SchedVar<IsR1P0AndLaterPred, [A57Write_4cyc_1W]>,
|
|
1044 SchedVar<NoSchedPred, [A57Write_5cyc_1W]>]>;
|
|
1045 def A57ReadVMLAL_VecInt : SchedReadVariant<[
|
|
1046 SchedVar<IsR1P0AndLaterPred, [SchedReadAdvance<3, [A57WriteVMLAL_VecInt]>]>,
|
|
1047 SchedVar<NoSchedPred, [SchedReadAdvance<4, [A57WriteVMLAL_VecInt]>]>
|
|
1048 ]>;
|
|
1049 def : InstRW<[A57WriteVMLAL_VecInt, A57ReadVMLAL_VecInt],
|
|
1050 (instregex "VMLAL(s|u)", "VMLSL(s|u)")>;
|
|
1051
|
|
1052 // ASIMD multiply accumulate saturating long
|
|
1053 // 5cyc F0 for r0px, 4cyc F0 for r1p0 and later, 2cyc for accumulate sequence
|
|
1054 // (3 or 2 ReadAdvance)
|
|
1055 def A57WriteVQDMLAL_VecInt : SchedWriteVariant<[
|
|
1056 SchedVar<IsR1P0AndLaterPred, [A57Write_4cyc_1W]>,
|
|
1057 SchedVar<NoSchedPred, [A57Write_5cyc_1W]>]>;
|
|
1058 def A57ReadVQDMLAL_VecInt : SchedReadVariant<[
|
|
1059 SchedVar<IsR1P0AndLaterPred, [SchedReadAdvance<2, [A57WriteVQDMLAL_VecInt]>]>,
|
|
1060 SchedVar<NoSchedPred, [SchedReadAdvance<3, [A57WriteVQDMLAL_VecInt]>]>
|
|
1061 ]>;
|
|
1062 def : InstRW<[A57WriteVQDMLAL_VecInt, A57ReadVQDMLAL_VecInt],
|
|
1063 (instregex "VQDMLAL", "VQDMLSL")>;
|
|
1064
|
|
1065 // ASIMD multiply long
|
|
1066 // 5cyc F0 for r0px, 4cyc F0 for r1p0 and later
|
|
1067 def A57WriteVMULL_VecInt : SchedWriteVariant<[
|
|
1068 SchedVar<IsR1P0AndLaterPred, [A57Write_4cyc_1W]>,
|
|
1069 SchedVar<NoSchedPred, [A57Write_5cyc_1W]>]>;
|
|
1070 def : InstRW<[A57WriteVMULL_VecInt],
|
|
1071 (instregex "VMULL(s|u|p8|sls|slu)", "VQDMULL")>;
|
|
1072
|
|
1073 // ASIMD pairwise add and accumulate
|
|
1074 // 4cyc F1, 1cyc for accumulate sequence (3cyc ReadAdvance)
|
|
1075 def A57WriteVPADAL : SchedWriteRes<[A57UnitX]> { let Latency = 4; }
|
|
1076 def A57ReadVPADAL : SchedReadAdvance<3, [A57WriteVPADAL]>;
|
|
1077 def : InstRW<[A57WriteVPADAL, A57ReadVPADAL], (instregex "VPADAL(s|u)")>;
|
|
1078
|
|
1079 // ASIMD shift accumulate
|
|
1080 // 4cyc F1, 1cyc for accumulate sequence (3cyc ReadAdvance)
|
|
1081 def A57WriteVSRA : SchedWriteRes<[A57UnitX]> { let Latency = 4; }
|
|
1082 def A57ReadVSRA : SchedReadAdvance<3, [A57WriteVSRA]>;
|
|
1083 def : InstRW<[A57WriteVSRA, A57ReadVSRA], (instregex "VSRA", "VRSRA")>;
|
|
1084
|
|
1085 // ASIMD shift by immed, basic
|
|
1086 def : InstRW<[A57Write_3cyc_1X],
|
|
1087 (instregex "VMOVL", "VSHLi", "VSHLL", "VSHR(s|u)", "VSHRN")>;
|
|
1088
|
|
1089 // ASIMD shift by immed, complex
|
|
1090 def : InstRW<[A57Write_4cyc_1X], (instregex
|
|
1091 "VQRSHRN", "VQRSHRUN", "VQSHL(si|ui|su)", "VQSHRN", "VQSHRUN", "VRSHR(s|u)",
|
|
1092 "VRSHRN")>;
|
|
1093
|
|
1094 // ASIMD shift by immed and insert, basic, D-form
|
|
1095 def : InstRW<[A57Write_4cyc_1X], (instregex
|
|
1096 "VSLI(v8i8|v4i16|v2i32|v1i64)", "VSRI(v8i8|v4i16|v2i32|v1i64)")>;
|
|
1097
|
|
1098 // ASIMD shift by immed and insert, basic, Q-form
|
|
1099 def : InstRW<[A57Write_5cyc_1X], (instregex
|
|
1100 "VSLI(v16i8|v8i16|v4i32|v2i64)", "VSRI(v16i8|v8i16|v4i32|v2i64)")>;
|
|
1101
|
|
1102 // ASIMD shift by register, basic, D-form
|
|
1103 def : InstRW<[A57Write_3cyc_1X], (instregex
|
|
1104 "VSHL(s|u)(v8i8|v4i16|v2i32|v1i64)")>;
|
|
1105
|
|
1106 // ASIMD shift by register, basic, Q-form
|
|
1107 def : InstRW<[A57Write_4cyc_1X], (instregex
|
|
1108 "VSHL(s|u)(v16i8|v8i16|v4i32|v2i64)")>;
|
|
1109
|
|
1110 // ASIMD shift by register, complex, D-form
|
|
1111 // VQRSHL, VQSHL, VRSHL
|
|
1112 def : InstRW<[A57Write_4cyc_1X], (instregex
|
|
1113 "VQRSHL(s|u)(v8i8|v4i16|v2i32|v1i64)", "VQSHL(s|u)(v8i8|v4i16|v2i32|v1i64)",
|
|
1114 "VRSHL(s|u)(v8i8|v4i16|v2i32|v1i64)")>;
|
|
1115
|
|
1116 // ASIMD shift by register, complex, Q-form
|
|
1117 def : InstRW<[A57Write_5cyc_1X], (instregex
|
|
1118 "VQRSHL(s|u)(v16i8|v8i16|v4i32|v2i64)", "VQSHL(s|u)(v16i8|v8i16|v4i32|v2i64)",
|
|
1119 "VRSHL(s|u)(v16i8|v8i16|v4i32|v2i64)")>;
|
|
1120
|
|
1121 // --- 3.15 ASIMD Floating-Point Instructions ---
|
|
1122 // ASIMD FP absolute value
|
|
1123 def : InstRW<[A57Write_3cyc_1V], (instregex "VABS(fd|fq|hd|hq)")>;
|
|
1124
|
|
1125 // ASIMD FP arith
|
|
1126 def : InstRW<[A57Write_5cyc_1V], (instregex "VABD(fd|fq|hd|hq)",
|
|
1127 "VADD(fd|fq|hd|hq)", "VPADD(f|h)", "VSUB(fd|fq|hd|hq)")>;
|
|
1128
|
|
1129 // ASIMD FP compare
|
|
1130 def : InstRW<[A57Write_5cyc_1V], (instregex "VAC(GE|GT|LE|LT)",
|
|
1131 "VC(EQ|GE|GT|LE)(fd|fq|hd|hq)")>;
|
|
1132
|
|
1133 // ASIMD FP convert, integer
|
|
1134 def : InstRW<[A57Write_5cyc_1V], (instregex
|
|
1135 "VCVT(f2sd|f2ud|s2fd|u2fd|f2sq|f2uq|s2fq|u2fq|f2xsd|f2xud|xs2fd|xu2fd)",
|
|
1136 "VCVT(f2xsq|f2xuq|xs2fq|xu2fq)",
|
|
1137 "VCVT(AN|MN|NN|PN)(SDf|SQf|UDf|UQf|SDh|SQh|UDh|UQh)")>;
|
|
1138
|
|
1139 // ASIMD FP convert, half-precision: 8cyc F0/F1
|
|
1140 def : InstRW<[A57Write_8cyc_1V], (instregex
|
|
1141 "VCVT(h2sd|h2ud|s2hd|u2hd|h2sq|h2uq|s2hq|u2hq|h2xsd|h2xud|xs2hd|xu2hd)",
|
|
1142 "VCVT(h2xsq|h2xuq|xs2hq|xu2hq)",
|
|
1143 "VCVT(f2h|h2f)")>;
|
|
1144
|
|
1145 // ASIMD FP max/min
|
|
1146 def : InstRW<[A57Write_5cyc_1V], (instregex
|
|
1147 "(VMAX|VMIN)(fd|fq|hd|hq)", "(VPMAX|VPMIN)(f|h)", "VMAXNM", "VMINNM")>;
|
|
1148
|
|
1149 // ASIMD FP multiply
|
|
1150 def A57WriteVMUL_VecFP : SchedWriteRes<[A57UnitV]> { let Latency = 5; }
|
|
1151 def : InstRW<[A57WriteVMUL_VecFP], (instregex "VMUL(sl)?(fd|fq|hd|hq)")>;
|
|
1152
|
|
1153 // ASIMD FP multiply accumulate: 9cyc F0/F1, 4cyc for accumulate sequence
|
|
1154 def A57WriteVMLA_VecFP : SchedWriteRes<[A57UnitV]> { let Latency = 9; }
|
|
1155 def A57ReadVMLA_VecFP :
|
|
1156 SchedReadAdvance<5, [A57WriteVMLA_VecFP, A57WriteVMUL_VecFP]>;
|
|
1157 def : InstRW<[A57WriteVMLA_VecFP, A57ReadVMLA_VecFP],
|
|
1158 (instregex "(VMLA|VMLS)(sl)?(fd|fq|hd|hq)", "(VFMA|VFMS)(fd|fq|hd|hq)")>;
|
|
1159
|
|
1160 // ASIMD FP negate
|
|
1161 def : InstRW<[A57Write_3cyc_1V], (instregex "VNEG(fd|f32q|hd|hq)")>;
|
|
1162
|
|
1163 // ASIMD FP round to integral
|
|
1164 def : InstRW<[A57Write_5cyc_1V], (instregex
|
|
1165 "VRINT(AN|MN|NN|PN|XN|ZN)(Df|Qf|Dh|Qh)")>;
|
|
1166
|
|
1167 // --- 3.16 ASIMD Miscellaneous Instructions ---
|
|
1168
|
|
1169 // ASIMD bitwise insert
|
|
1170 def : InstRW<[A57Write_3cyc_1V], (instregex "VBIF", "VBIT", "VBSL")>;
|
|
1171
|
|
1172 // ASIMD count
|
|
1173 def : InstRW<[A57Write_3cyc_1V], (instregex "VCLS", "VCLZ", "VCNT")>;
|
|
1174
|
|
1175 // ASIMD duplicate, core reg: 8cyc "L, F0/F1"
|
|
1176 def : InstRW<[A57Write_8cyc_1L_1V], (instregex "VDUP(8|16|32)(d|q)")>;
|
|
1177
|
|
1178 // ASIMD duplicate, scalar: 3cyc "F0/F1"
|
|
1179 def : InstRW<[A57Write_3cyc_1V], (instregex "VDUPLN(8|16|32)(d|q)")>;
|
|
1180
|
|
1181 // ASIMD extract
|
|
1182 def : InstRW<[A57Write_3cyc_1V], (instregex "VEXT(d|q)(8|16|32|64)")>;
|
|
1183
|
|
1184 // ASIMD move, immed
|
|
1185 def : InstRW<[A57Write_3cyc_1V], (instregex
|
|
1186 "VMOV(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v1i64|v2i64|v2f32|v4f32)",
|
|
1187 "VMOVQ0")>;
|
|
1188
|
|
1189 // ASIMD move, narrowing
|
|
1190 def : InstRW<[A57Write_3cyc_1V], (instregex "VMOVN")>;
|
|
1191
|
|
1192 // ASIMD move, saturating
|
|
1193 def : InstRW<[A57Write_4cyc_1X], (instregex "VQMOVN")>;
|
|
1194
|
|
1195 // ASIMD reciprocal estimate
|
|
1196 def : InstRW<[A57Write_5cyc_1V], (instregex "VRECPE", "VRSQRTE")>;
|
|
1197
|
|
1198 // ASIMD reciprocal step, FZ
|
|
1199 def : InstRW<[A57Write_9cyc_1V], (instregex "VRECPS", "VRSQRTS")>;
|
|
1200
|
|
1201 // ASIMD reverse, swap, table lookup (1-2 reg)
|
|
1202 def : InstRW<[A57Write_3cyc_1V], (instregex "VREV", "VSWP", "VTB(L|X)(1|2)")>;
|
|
1203
|
|
1204 // ASIMD table lookup (3-4 reg)
|
|
1205 def : InstRW<[A57Write_6cyc_1V], (instregex "VTBL(3|4)", "VTBX(3|4)")>;
|
|
1206
|
|
1207 // ASIMD transfer, scalar to core reg: 6cyc "L, I0/I1"
|
|
1208 def : InstRW<[A57Write_6cyc_1L_1I], (instregex "VGETLN")>;
|
|
1209
|
|
1210 // ASIMD transfer, core reg to scalar: 8cyc "L, F0/F1"
|
|
1211 def : InstRW<[A57Write_8cyc_1L_1V], (instregex "VSETLN")>;
|
|
1212
|
|
1213 // ASIMD transpose
|
|
1214 def : InstRW<[A57Write_3cyc_1V, A57Write_3cyc_1V], (instregex "VTRN")>;
|
|
1215
|
|
1216 // ASIMD unzip/zip, D-form
|
|
1217 def : InstRW<[A57Write_3cyc_1V, A57Write_3cyc_1V],
|
|
1218 (instregex "VUZPd", "VZIPd")>;
|
|
1219
|
|
1220 // ASIMD unzip/zip, Q-form
|
|
1221 def : InstRW<[A57Write_6cyc_1V, A57Write_6cyc_1V],
|
|
1222 (instregex "VUZPq", "VZIPq")>;
|
|
1223
|
|
1224 // --- 3.17 ASIMD Load Instructions ---
|
|
1225
|
|
1226 // Overriden via InstRW for this processor.
|
|
1227 def : WriteRes<WriteVLD1, []>;
|
|
1228 def : WriteRes<WriteVLD2, []>;
|
|
1229 def : WriteRes<WriteVLD3, []>;
|
|
1230 def : WriteRes<WriteVLD4, []>;
|
|
1231 def : WriteRes<WriteVST1, []>;
|
|
1232 def : WriteRes<WriteVST2, []>;
|
|
1233 def : WriteRes<WriteVST3, []>;
|
|
1234 def : WriteRes<WriteVST4, []>;
|
|
1235
|
|
1236 // 1-2 reg: 5cyc L, +I for writeback, 1 cyc wb latency
|
|
1237 def : InstRW<[A57Write_5cyc_1L], (instregex "VLD1(d|q)(8|16|32|64)$")>;
|
|
1238 def : InstRW<[A57Write_5cyc_1L_1I, A57WrBackOne],
|
|
1239 (instregex "VLD1(d|q)(8|16|32|64)wb")>;
|
|
1240
|
|
1241 // 3-4 reg: 6cyc L, +I for writeback, 1 cyc wb latency
|
|
1242 def : InstRW<[A57Write_6cyc_1L],
|
|
1243 (instregex "VLD1(d|q)(8|16|32|64)(T|Q)$", "VLD1d64(T|Q)Pseudo")>;
|
|
1244
|
|
1245 def : InstRW<[A57Write_6cyc_1L_1I, A57WrBackOne],
|
|
1246 (instregex "VLD1(d|q)(8|16|32|64)(T|Q)wb")>;
|
|
1247
|
|
1248 // ASIMD load, 1 element, one lane and all lanes: 8cyc "L, F0/F1"
|
|
1249 def : InstRW<[A57Write_8cyc_1L_1V], (instregex
|
|
1250 "VLD1(LN|DUP)(d|q)(8|16|32)$", "VLD1(LN|DUP)(d|q)(8|16|32)Pseudo$")>;
|
|
1251 def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne], (instregex
|
|
1252 "VLD1(LN|DUP)(d|q)(8|16|32)(wb|_UPD)", "VLD1LNq(8|16|32)Pseudo_UPD")>;
|
|
1253
|
|
1254 // ASIMD load, 2 element, multiple, 2 reg: 8cyc "L, F0/F1"
|
|
1255 def : InstRW<[A57Write_8cyc_1L_1V],
|
|
1256 (instregex "VLD2(d|q)(8|16|32)$", "VLD2q(8|16|32)Pseudo$")>;
|
|
1257 def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne],
|
|
1258 (instregex "VLD2(d|q)(8|16|32)wb", "VLD2q(8|16|32)PseudoWB")>;
|
|
1259
|
|
1260 // ASIMD load, 2 element, multiple, 4 reg: 9cyc "L, F0/F1"
|
|
1261 def : InstRW<[A57Write_9cyc_1L_1V], (instregex "VLD2b(8|16|32)$")>;
|
|
1262 def : InstRW<[A57Write_9cyc_1L_1V_1I, A57WrBackOne],
|
|
1263 (instregex "VLD2b(8|16|32)wb")>;
|
|
1264
|
|
1265 // ASIMD load, 2 element, one lane and all lanes: 8cyc "L, F0/F1"
|
|
1266 def : InstRW<[A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V],
|
|
1267 (instregex "VLD2(DUP|LN)(d|q)(8|16|32|8x2|16x2|32x2)$",
|
|
1268 "VLD2LN(d|q)(8|16|32)Pseudo$")>;
|
|
1269 // 2 results + wb result
|
|
1270 def : InstRW<[A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V, A57WrBackOne],
|
|
1271 (instregex "VLD2LN(d|q)(8|16|32)_UPD$")>;
|
|
1272 // 1 result + wb result
|
|
1273 def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne],
|
|
1274 (instregex "VLD2DUPd(8|16|32|8x2|16x2|32x2)wb",
|
|
1275 "VLD2LN(d|q)(8|16|32)Pseudo_UPD")>;
|
|
1276
|
|
1277 // ASIMD load, 3 element, multiple, 3 reg: 9cyc "L, F0/F1"
|
|
1278 // 3 results
|
|
1279 def : InstRW<[A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V],
|
|
1280 (instregex "VLD3(d|q)(8|16|32)$")>;
|
|
1281 // 1 result
|
|
1282 def : InstRW<[A57Write_9cyc_1L_1V],
|
|
1283 (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo$")>;
|
|
1284 // 3 results + wb
|
|
1285 def : InstRW<[A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I,
|
|
1286 A57Write_9cyc_1L_1V_1I, A57WrBackOne],
|
|
1287 (instregex "VLD3(d|q)(8|16|32)_UPD$")>;
|
|
1288 // 1 result + wb
|
|
1289 def : InstRW<[A57Write_9cyc_1L_1V_1I, A57WrBackOne],
|
|
1290 (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo_UPD")>;
|
|
1291
|
|
1292 // ASIMD load, 3 element, one lane, size 32: 8cyc "L, F0/F1"
|
|
1293 def : InstRW<[A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V],
|
|
1294 (instregex "VLD3LN(d|q)32$",
|
|
1295 "VLD3LN(d|q)32Pseudo$")>;
|
|
1296 def : InstRW<[A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I,
|
|
1297 A57Write_8cyc_1L_1V_1I, A57WrBackOne],
|
|
1298 (instregex "VLD3LN(d|q)32_UPD")>;
|
|
1299 def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne],
|
|
1300 (instregex "VLD3LN(d|q)32Pseudo_UPD")>;
|
|
1301
|
|
1302 // ASIMD load, 3 element, one lane, size 8/16: 9cyc "L, F0/F1"
|
|
1303 def : InstRW<[A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V],
|
|
1304 (instregex "VLD3LN(d|q)(8|16)$",
|
|
1305 "VLD3LN(d|q)(8|16)Pseudo$")>;
|
|
1306 def : InstRW<[A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I,
|
|
1307 A57Write_9cyc_1L_1V_1I, A57WrBackOne],
|
|
1308 (instregex "VLD3LN(d|q)(8|16)_UPD")>;
|
|
1309 def : InstRW<[A57Write_9cyc_1L_1V_1I, A57WrBackOne],
|
|
1310 (instregex "VLD3LN(d|q)(8|16)Pseudo_UPD")>;
|
|
1311
|
|
1312 // ASIMD load, 3 element, all lanes: 8cyc "L, F0/F1"
|
|
1313 def : InstRW<[A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V],
|
|
1314 (instregex "VLD3DUP(d|q)(8|16|32)$",
|
|
1315 "VLD3DUP(d|q)(8|16|32)Pseudo$")>;
|
|
1316 def : InstRW<[A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I,
|
|
1317 A57Write_8cyc_1L_1V_1I, A57WrBackOne],
|
|
1318 (instregex "VLD3DUP(d|q)(8|16|32)_UPD")>;
|
|
1319 def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne],
|
|
1320 (instregex "VLD3DUP(d|q)(8|16|32)Pseudo_UPD")>;
|
|
1321
|
|
1322 // ASIMD load, 4 element, multiple, 4 reg: 9cyc "L, F0/F1"
|
|
1323 def : InstRW<[A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V,
|
|
1324 A57Write_9cyc_1L_1V],
|
|
1325 (instregex "VLD4(d|q)(8|16|32)$")>;
|
|
1326 def : InstRW<[A57Write_9cyc_1L_1V],
|
|
1327 (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo$")>;
|
|
1328 def : InstRW<[A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I,
|
|
1329 A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I, A57WrBackOne],
|
|
1330 (instregex "VLD4(d|q)(8|16|32)_UPD")>;
|
|
1331 def : InstRW<[A57Write_9cyc_1L_1V_1I, A57WrBackOne],
|
|
1332 (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo_UPD")>;
|
|
1333
|
|
1334 // ASIMD load, 4 element, one lane, size 32: 8cyc "L, F0/F1"
|
|
1335 def : InstRW<[A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V,
|
|
1336 A57Write_8cyc_1L_1V],
|
|
1337 (instregex "VLD4LN(d|q)32$",
|
|
1338 "VLD4LN(d|q)32Pseudo$")>;
|
|
1339 def : InstRW<[A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I,
|
|
1340 A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I,
|
|
1341 A57WrBackOne],
|
|
1342 (instregex "VLD4LN(d|q)32_UPD")>;
|
|
1343 def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne],
|
|
1344 (instregex "VLD4LN(d|q)32Pseudo_UPD")>;
|
|
1345
|
|
1346 // ASIMD load, 4 element, one lane, size 8/16: 9cyc "L, F0/F1"
|
|
1347 def : InstRW<[A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V,
|
|
1348 A57Write_9cyc_1L_1V],
|
|
1349 (instregex "VLD4LN(d|q)(8|16)$",
|
|
1350 "VLD4LN(d|q)(8|16)Pseudo$")>;
|
|
1351 def : InstRW<[A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I,
|
|
1352 A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I,
|
|
1353 A57WrBackOne],
|
|
1354 (instregex "VLD4LN(d|q)(8|16)_UPD")>;
|
|
1355 def : InstRW<[A57Write_9cyc_1L_1V_1I, A57WrBackOne],
|
|
1356 (instregex "VLD4LN(d|q)(8|16)Pseudo_UPD")>;
|
|
1357
|
|
1358 // ASIMD load, 4 element, all lanes: 8cyc "L, F0/F1"
|
|
1359 def : InstRW<[A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V,
|
|
1360 A57Write_8cyc_1L_1V],
|
|
1361 (instregex "VLD4DUP(d|q)(8|16|32)$",
|
|
1362 "VLD4DUP(d|q)(8|16|32)Pseudo$")>;
|
|
1363 def : InstRW<[A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I,
|
|
1364 A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I,
|
|
1365 A57WrBackOne],
|
|
1366 (instregex "VLD4DUP(d|q)(8|16|32)_UPD")>;
|
|
1367 def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne],
|
|
1368 (instregex "VLD4DUP(d|q)(8|16|32)Pseudo_UPD")>;
|
|
1369
|
|
1370 // --- 3.18 ASIMD Store Instructions ---
|
|
1371
|
|
1372 // ASIMD store, 1 element, multiple, 1 reg: 1cyc S
|
|
1373 def : InstRW<[A57Write_1cyc_1S], (instregex "VST1d(8|16|32|64)$")>;
|
|
1374 def : InstRW<[A57WrBackOne, A57Write_1cyc_1S_1I],
|
|
1375 (instregex "VST1d(8|16|32|64)wb")>;
|
|
1376 // ASIMD store, 1 element, multiple, 2 reg: 2cyc S
|
|
1377 def : InstRW<[A57Write_2cyc_1S], (instregex "VST1q(8|16|32|64)$")>;
|
|
1378 def : InstRW<[A57WrBackOne, A57Write_2cyc_1S_1I],
|
|
1379 (instregex "VST1q(8|16|32|64)wb")>;
|
|
1380 // ASIMD store, 1 element, multiple, 3 reg: 3cyc S
|
|
1381 def : InstRW<[A57Write_3cyc_1S],
|
|
1382 (instregex "VST1d(8|16|32|64)T$", "VST1d64TPseudo$")>;
|
|
1383 def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1I],
|
|
1384 (instregex "VST1d(8|16|32|64)Twb", "VST1d64TPseudoWB")>;
|
|
1385 // ASIMD store, 1 element, multiple, 4 reg: 4cyc S
|
|
1386 def : InstRW<[A57Write_4cyc_1S],
|
|
1387 (instregex "VST1d(8|16|32|64)(Q|QPseudo)$")>;
|
|
1388 def : InstRW<[A57WrBackOne, A57Write_4cyc_1S_1I],
|
|
1389 (instregex "VST1d(8|16|32|64)(Qwb|QPseudoWB)")>;
|
|
1390 // ASIMD store, 1 element, one lane: 3cyc "F0/F1, S"
|
|
1391 def : InstRW<[A57Write_3cyc_1S_1V],
|
|
1392 (instregex "VST1LNd(8|16|32)$", "VST1LNq(8|16|32)Pseudo$")>;
|
|
1393 def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I],
|
|
1394 (instregex "VST1LNd(8|16|32)_UPD", "VST1LNq(8|16|32)Pseudo_UPD")>;
|
|
1395 // ASIMD store, 2 element, multiple, 2 reg: 3cyc "F0/F1, S"
|
|
1396 def : InstRW<[A57Write_3cyc_1S_1V],
|
|
1397 (instregex "VST2(d|b)(8|16|32)$")>;
|
|
1398 def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I],
|
|
1399 (instregex "VST2(b|d)(8|16|32)wb")>;
|
|
1400 // ASIMD store, 2 element, multiple, 4 reg: 4cyc "F0/F1, S"
|
|
1401 def : InstRW<[A57Write_4cyc_1S_1V],
|
|
1402 (instregex "VST2q(8|16|32)$", "VST2q(8|16|32)Pseudo$")>;
|
|
1403 def : InstRW<[A57WrBackOne, A57Write_4cyc_1S_1V_1I],
|
|
1404 (instregex "VST2q(8|16|32)wb", "VST2q(8|16|32)PseudoWB")>;
|
|
1405 // ASIMD store, 2 element, one lane: 3cyc "F0/F1, S"
|
|
1406 def : InstRW<[A57Write_3cyc_1S_1V],
|
|
1407 (instregex "VST2LN(d|q)(8|16|32)$", "VST2LN(d|q)(8|16|32)Pseudo$")>;
|
|
1408 def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I],
|
|
1409 (instregex "VST2LN(d|q)(8|16|32)_UPD",
|
|
1410 "VST2LN(d|q)(8|16|32)Pseudo_UPD")>;
|
|
1411 // ASIMD store, 3 element, multiple, 3 reg
|
|
1412 def : InstRW<[A57Write_3cyc_1S_1V],
|
|
1413 (instregex "VST3(d|q)(8|16|32)$", "VST3(d|q)(8|16|32)(oddP|P)seudo$")>;
|
|
1414 def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I],
|
|
1415 (instregex "VST3(d|q)(8|16|32)_UPD",
|
|
1416 "VST3(d|q)(8|16|32)(oddP|P)seudo_UPD$")>;
|
|
1417 // ASIMD store, 3 element, one lane
|
|
1418 def : InstRW<[A57Write_3cyc_1S_1V],
|
|
1419 (instregex "VST3LN(d|q)(8|16|32)$", "VST3LN(d|q)(8|16|32)Pseudo$")>;
|
|
1420 def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I],
|
|
1421 (instregex "VST3LN(d|q)(8|16|32)_UPD",
|
|
1422 "VST3LN(d|q)(8|16|32)Pseudo_UPD")>;
|
|
1423 // ASIMD store, 4 element, multiple, 4 reg
|
|
1424 def : InstRW<[A57Write_4cyc_1S_1V],
|
|
1425 (instregex "VST4(d|q)(8|16|32)$", "VST4(d|q)(8|16|32)(oddP|P)seudo$")>;
|
|
1426 def : InstRW<[A57WrBackOne, A57Write_4cyc_1S_1V_1I],
|
|
1427 (instregex "VST4(d|q)(8|16|32)_UPD",
|
|
1428 "VST4(d|q)(8|16|32)(oddP|P)seudo_UPD$")>;
|
|
1429 // ASIMD store, 4 element, one lane
|
|
1430 def : InstRW<[A57Write_3cyc_1S_1V],
|
|
1431 (instregex "VST4LN(d|q)(8|16|32)$", "VST4LN(d|q)(8|16|32)Pseudo$")>;
|
|
1432 def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I],
|
|
1433 (instregex "VST4LN(d|q)(8|16|32)_UPD",
|
|
1434 "VST4LN(d|q)(8|16|32)Pseudo_UPD")>;
|
|
1435
|
|
1436 // --- 3.19 Cryptography Extensions ---
|
|
1437 // Crypto AES ops
|
|
1438 // AESD, AESE, AESIMC, AESMC: 3cyc F0
|
|
1439 def : InstRW<[A57Write_3cyc_1W], (instregex "^AES")>;
|
|
1440 // Crypto polynomial (64x64) multiply long (VMULL.P64): 3cyc F0
|
|
1441 def : InstRW<[A57Write_3cyc_1W], (instregex "^VMULLp64")>;
|
|
1442 // Crypto SHA1 xor ops: 6cyc F0/F1
|
|
1443 def : InstRW<[A57Write_6cyc_2V], (instregex "^SHA1SU0")>;
|
|
1444 // Crypto SHA1 fast ops: 3cyc F0
|
|
1445 def : InstRW<[A57Write_3cyc_1W], (instregex "^SHA1(H|SU1)")>;
|
|
1446 // Crypto SHA1 slow ops: 6cyc F0
|
|
1447 def : InstRW<[A57Write_6cyc_2W], (instregex "^SHA1[CMP]")>;
|
|
1448 // Crypto SHA256 fast ops: 3cyc F0
|
|
1449 def : InstRW<[A57Write_3cyc_1W], (instregex "^SHA256SU0")>;
|
|
1450 // Crypto SHA256 slow ops: 6cyc F0
|
|
1451 def : InstRW<[A57Write_6cyc_2W], (instregex "^SHA256(H|H2|SU1)")>;
|
|
1452
|
|
1453 // --- 3.20 CRC ---
|
|
1454 def : InstRW<[A57Write_3cyc_1W], (instregex "^(t2)?CRC32")>;
|
|
1455
|
|
1456 // -----------------------------------------------------------------------------
|
|
1457 // Common definitions
|
|
1458 def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; }
|
|
1459 def : SchedAlias<WriteALU, A57Write_1cyc_1I>;
|
|
1460
|
|
1461 def : SchedAlias<WriteBr, A57Write_1cyc_1B>;
|
|
1462 def : SchedAlias<WriteBrL, A57Write_1cyc_1B_1I>;
|
|
1463 def : SchedAlias<WriteBrTbl, A57Write_1cyc_1B_1I>;
|
|
1464 def : SchedAlias<WritePreLd, A57Write_4cyc_1L>;
|
|
1465
|
|
1466 def : SchedAlias<WriteLd, A57Write_4cyc_1L>;
|
|
1467 def : SchedAlias<WriteST, A57Write_1cyc_1S>;
|
|
1468 def : ReadAdvance<ReadALU, 0>;
|
|
1469
|
|
1470 } // SchedModel = CortexA57Model
|
|
1471
|