121
|
1 //=- X86ScheduleZnver1.td - X86 Znver1 Scheduling -------------*- tablegen -*-=//
|
|
2 //
|
|
3 // The LLVM Compiler Infrastructure
|
|
4 //
|
|
5 // This file is distributed under the University of Illinois Open Source
|
|
6 // License. See LICENSE.TXT for details.
|
|
7 //
|
|
8 //===----------------------------------------------------------------------===//
|
|
9 //
|
|
10 // This file defines the machine model for Znver1 to support instruction
|
|
11 // scheduling and other instruction cost heuristics.
|
|
12 //
|
|
13 //===----------------------------------------------------------------------===//
|
|
14
|
|
15 def Znver1Model : SchedMachineModel {
|
|
16 // Zen can decode 4 instructions per cycle.
|
|
17 let IssueWidth = 4;
|
|
18 // Based on the reorder buffer we define MicroOpBufferSize
|
|
19 let MicroOpBufferSize = 192;
|
|
20 let LoadLatency = 4;
|
|
21 let MispredictPenalty = 17;
|
|
22 let HighLatency = 25;
|
|
23 let PostRAScheduler = 1;
|
|
24
|
|
25 // FIXME: This variable is required for incomplete model.
|
|
26 // We haven't catered all instructions.
|
|
27 // So, we reset the value of this variable so as to
|
|
28 // say that the model is incomplete.
|
|
29 let CompleteModel = 0;
|
|
30 }
|
|
31
|
|
32 let SchedModel = Znver1Model in {
|
|
33
|
|
34 // Zen can issue micro-ops to 10 different units in one cycle.
|
|
35 // These are
|
|
36 // * Four integer ALU units (ZALU0, ZALU1, ZALU2, ZALU3)
|
|
37 // * Two AGU units (ZAGU0, ZAGU1)
|
|
38 // * Four FPU units (ZFPU0, ZFPU1, ZFPU2, ZFPU3)
|
|
39 // AGUs feed load store queues @two loads and 1 store per cycle.
|
|
40
|
|
41 // Four ALU units are defined below
|
|
42 def ZnALU0 : ProcResource<1>;
|
|
43 def ZnALU1 : ProcResource<1>;
|
|
44 def ZnALU2 : ProcResource<1>;
|
|
45 def ZnALU3 : ProcResource<1>;
|
|
46
|
|
47 // Two AGU units are defined below
|
|
48 def ZnAGU0 : ProcResource<1>;
|
|
49 def ZnAGU1 : ProcResource<1>;
|
|
50
|
|
51 // Four FPU units are defined below
|
|
52 def ZnFPU0 : ProcResource<1>;
|
|
53 def ZnFPU1 : ProcResource<1>;
|
|
54 def ZnFPU2 : ProcResource<1>;
|
|
55 def ZnFPU3 : ProcResource<1>;
|
|
56
|
|
57 // FPU grouping
|
|
58 def ZnFPU : ProcResGroup<[ZnFPU0, ZnFPU1, ZnFPU2, ZnFPU3]>;
|
|
59 def ZnFPU013 : ProcResGroup<[ZnFPU0, ZnFPU1, ZnFPU3]>;
|
|
60 def ZnFPU01 : ProcResGroup<[ZnFPU0, ZnFPU1]>;
|
|
61 def ZnFPU12 : ProcResGroup<[ZnFPU1, ZnFPU2]>;
|
|
62 def ZnFPU13 : ProcResGroup<[ZnFPU1, ZnFPU3]>;
|
|
63 def ZnFPU23 : ProcResGroup<[ZnFPU2, ZnFPU3]>;
|
|
64 def ZnFPU02 : ProcResGroup<[ZnFPU0, ZnFPU2]>;
|
|
65 def ZnFPU03 : ProcResGroup<[ZnFPU0, ZnFPU3]>;
|
|
66
|
|
67 // Below are the grouping of the units.
|
|
68 // Micro-ops to be issued to multiple units are tackled this way.
|
|
69
|
|
70 // ALU grouping
|
|
71 // ZnALU03 - 0,3 grouping
|
|
72 def ZnALU03: ProcResGroup<[ZnALU0, ZnALU3]>;
|
|
73
|
|
74 // 56 Entry (14x4 entries) Int Scheduler
|
|
75 def ZnALU : ProcResGroup<[ZnALU0, ZnALU1, ZnALU2, ZnALU3]> {
|
|
76 let BufferSize=56;
|
|
77 }
|
|
78
|
|
79 // 28 Entry (14x2) AGU group. AGUs can't be used for all ALU operations
|
|
80 // but are relevant for some instructions
|
|
81 def ZnAGU : ProcResGroup<[ZnAGU0, ZnAGU1]> {
|
|
82 let BufferSize=28;
|
|
83 }
|
|
84
|
|
85 // Integer Multiplication issued on ALU1.
|
|
86 def ZnMultiplier : ProcResource<1>;
|
|
87
|
|
88 // Integer division issued on ALU2.
|
|
89 def ZnDivider : ProcResource<1>;
|
|
90
|
|
91 // 4 Cycles load-to use Latency is captured
|
|
92 def : ReadAdvance<ReadAfterLd, 4>;
|
|
93
|
|
94 // (a folded load is an instruction that loads and does some operation)
|
|
95 // Ex: ADDPD xmm,[mem]-> This instruction has two micro-ops
|
|
96 // Instructions with folded loads are usually micro-fused, so they only appear
|
|
97 // as two micro-ops.
|
|
98 // a. load and
|
|
99 // b. addpd
|
|
100 // This multiclass is for folded loads for integer units.
|
|
101 multiclass ZnWriteResPair<X86FoldableSchedWrite SchedRW,
|
|
102 ProcResourceKind ExePort,
|
|
103 int Lat> {
|
|
104 // Register variant takes 1-cycle on Execution Port.
|
|
105 def : WriteRes<SchedRW, [ExePort]> { let Latency = Lat; }
|
|
106
|
|
107 // Memory variant also uses a cycle on ZnAGU
|
|
108 // adds 4 cycles to the latency.
|
|
109 def : WriteRes<SchedRW.Folded, [ZnAGU, ExePort]> {
|
|
110 let NumMicroOps = 2;
|
|
111 let Latency = !add(Lat, 4);
|
|
112 }
|
|
113 }
|
|
114
|
|
115 // This multiclass is for folded loads for floating point units.
|
|
116 multiclass ZnWriteResFpuPair<X86FoldableSchedWrite SchedRW,
|
|
117 ProcResourceKind ExePort,
|
|
118 int Lat> {
|
|
119 // Register variant takes 1-cycle on Execution Port.
|
|
120 def : WriteRes<SchedRW, [ExePort]> { let Latency = Lat; }
|
|
121
|
|
122 // Memory variant also uses a cycle on ZnAGU
|
|
123 // adds 7 cycles to the latency.
|
|
124 def : WriteRes<SchedRW.Folded, [ZnAGU, ExePort]> {
|
|
125 let Latency = !add(Lat, 7);
|
|
126 }
|
|
127 }
|
|
128
|
|
129 // WriteRMW is set for instructions with Memory write
|
|
130 // operation in codegen
|
|
131 def : WriteRes<WriteRMW, [ZnAGU]>;
|
|
132
|
|
133 def : WriteRes<WriteStore, [ZnAGU]>;
|
|
134 def : WriteRes<WriteMove, [ZnALU]>;
|
|
135 def : WriteRes<WriteLoad, [ZnAGU]> { let Latency = 8; }
|
|
136
|
|
137 def : WriteRes<WriteZero, []>;
|
|
138 def : WriteRes<WriteLEA, [ZnALU]>;
|
|
139 defm : ZnWriteResPair<WriteALU, ZnALU, 1>;
|
|
140 defm : ZnWriteResPair<WriteShift, ZnALU, 1>;
|
|
141 defm : ZnWriteResPair<WriteJump, ZnALU, 1>;
|
|
142
|
|
143 // IDIV
|
|
144 def : WriteRes<WriteIDiv, [ZnALU2, ZnDivider]> {
|
|
145 let Latency = 41;
|
|
146 let ResourceCycles = [1, 41];
|
|
147 }
|
|
148
|
|
149 def : WriteRes<WriteIDivLd, [ZnALU2, ZnAGU, ZnDivider]> {
|
|
150 let Latency = 45;
|
|
151 let ResourceCycles = [1, 4, 41];
|
|
152 }
|
|
153
|
|
154 // IMUL
|
|
155 def : WriteRes<WriteIMulH, [ZnALU1, ZnMultiplier]>{
|
|
156 let Latency = 4;
|
|
157 }
|
|
158 def : WriteRes<WriteIMul, [ZnALU1, ZnMultiplier]> {
|
|
159 let Latency = 4;
|
|
160 }
|
|
161
|
|
162 def : WriteRes<WriteIMulLd,[ZnALU1, ZnMultiplier]> {
|
|
163 let Latency = 8;
|
|
164 }
|
|
165
|
|
166 // Floating point operations
|
|
167 defm : ZnWriteResFpuPair<WriteFHAdd, ZnFPU0, 3>;
|
|
168 defm : ZnWriteResFpuPair<WriteFAdd, ZnFPU0, 3>;
|
|
169 defm : ZnWriteResFpuPair<WriteFBlend, ZnFPU01, 1>;
|
|
170 defm : ZnWriteResFpuPair<WriteFVarBlend, ZnFPU01, 1>;
|
|
171 defm : ZnWriteResFpuPair<WriteVarBlend, ZnFPU0, 1>;
|
|
172 defm : ZnWriteResFpuPair<WriteCvtI2F, ZnFPU3, 5>;
|
|
173 defm : ZnWriteResFpuPair<WriteCvtF2F, ZnFPU3, 5>;
|
|
174 defm : ZnWriteResFpuPair<WriteCvtF2I, ZnFPU3, 5>;
|
|
175 defm : ZnWriteResFpuPair<WriteFDiv, ZnFPU3, 15>;
|
|
176 defm : ZnWriteResFpuPair<WriteFShuffle, ZnFPU12, 1>;
|
|
177 defm : ZnWriteResFpuPair<WriteFMul, ZnFPU0, 5>;
|
|
178 defm : ZnWriteResFpuPair<WriteFRcp, ZnFPU01, 5>;
|
|
179 defm : ZnWriteResFpuPair<WriteFRsqrt, ZnFPU01, 5>;
|
|
180 defm : ZnWriteResFpuPair<WriteFSqrt, ZnFPU3, 20>;
|
|
181
|
|
182 // Vector integer operations which uses FPU units
|
|
183 defm : ZnWriteResFpuPair<WriteVecShift, ZnFPU, 1>;
|
|
184 defm : ZnWriteResFpuPair<WriteVecLogic, ZnFPU, 1>;
|
|
185 defm : ZnWriteResFpuPair<WritePHAdd, ZnFPU, 1>;
|
|
186 defm : ZnWriteResFpuPair<WriteVecALU, ZnFPU, 1>;
|
|
187 defm : ZnWriteResFpuPair<WriteVecIMul, ZnFPU0, 4>;
|
|
188 defm : ZnWriteResFpuPair<WriteShuffle, ZnFPU, 1>;
|
|
189 defm : ZnWriteResFpuPair<WriteBlend, ZnFPU01, 1>;
|
|
190 defm : ZnWriteResFpuPair<WriteShuffle256, ZnFPU, 2>;
|
|
191
|
|
192 // Vector Shift Operations
|
|
193 defm : ZnWriteResFpuPair<WriteVarVecShift, ZnFPU12, 1>;
|
|
194
|
|
195 // AES Instructions.
|
|
196 defm : ZnWriteResFpuPair<WriteAESDecEnc, ZnFPU01, 4>;
|
|
197 defm : ZnWriteResFpuPair<WriteAESIMC, ZnFPU01, 4>;
|
|
198 defm : ZnWriteResFpuPair<WriteAESKeyGen, ZnFPU01, 4>;
|
|
199
|
|
200 def : WriteRes<WriteFence, [ZnAGU]>;
|
|
201 def : WriteRes<WriteNop, []>;
|
|
202
|
|
203 // Following instructions with latency=100 are microcoded.
|
|
204 // We set long latency so as to block the entire pipeline.
|
|
205 defm : ZnWriteResFpuPair<WriteFShuffle256, ZnFPU, 100>;
|
|
206
|
|
207 //Microcoded Instructions
|
|
208 let Latency = 100 in {
|
|
209 def : WriteRes<WriteMicrocoded, []>;
|
|
210 def : WriteRes<WriteSystem, []>;
|
|
211 def : WriteRes<WriteMPSAD, []>;
|
|
212 def : WriteRes<WriteMPSADLd, []>;
|
|
213 def : WriteRes<WriteCLMul, []>;
|
|
214 def : WriteRes<WriteCLMulLd, []>;
|
|
215 def : WriteRes<WritePCmpIStrM, []>;
|
|
216 def : WriteRes<WritePCmpIStrMLd, []>;
|
|
217 def : WriteRes<WritePCmpEStrI, []>;
|
|
218 def : WriteRes<WritePCmpEStrILd, []>;
|
|
219 def : WriteRes<WritePCmpEStrM, []>;
|
|
220 def : WriteRes<WritePCmpEStrMLd, []>;
|
|
221 def : WriteRes<WritePCmpIStrI, []>;
|
|
222 def : WriteRes<WritePCmpIStrILd, []>;
|
|
223 }
|
|
224
|
|
225 //=== Regex based itineraries ===//
|
|
226 // Notation:
|
|
227 // - r: register.
|
|
228 // - m = memory.
|
|
229 // - i = immediate
|
|
230 // - mm: 64 bit mmx register.
|
|
231 // - x = 128 bit xmm register.
|
|
232 // - (x)mm = mmx or xmm register.
|
|
233 // - y = 256 bit ymm register.
|
|
234 // - v = any vector register.
|
|
235
|
|
236 //=== Integer Instructions ===//
|
|
237 //-- Move instructions --//
|
|
238 // MOV.
|
|
239 // r16,m.
|
|
240 def : InstRW<[WriteALULd, ReadAfterLd], (instregex "MOV16rm")>;
|
|
241
|
|
242 // MOVSX, MOVZX.
|
|
243 // r,m.
|
|
244 def : InstRW<[WriteLoad], (instregex "MOV(S|Z)X32rm(8|16)")>;
|
|
245
|
|
246 // CMOVcc.
|
|
247 // r,r.
|
|
248 def : InstRW<[WriteALU],
|
|
249 (instregex "CMOV(O|NO|B|AE|E|NE|BE|A|S|NS|P|NP|L|GE|LE|G)(16|32|64)rr")>;
|
|
250 // r,m.
|
|
251 def : InstRW<[WriteALULd, ReadAfterLd],
|
|
252 (instregex "CMOV(O|NO|B|AE|E|NE|BE|A|S|NS|P|NP|L|GE|LE|G)(16|32|64)rm")>;
|
|
253
|
|
254 // XCHG.
|
|
255 // r,r.
|
|
256 def ZnWriteXCHG : SchedWriteRes<[ZnALU]> {
|
|
257 let NumMicroOps = 2;
|
|
258 let ResourceCycles = [2];
|
|
259 }
|
|
260
|
|
261 def : InstRW<[ZnWriteXCHG], (instregex "XCHG(8|16|32|64)rr", "XCHG(16|32|64)ar")>;
|
|
262
|
|
263 // r,m.
|
|
264 def ZnWriteXCHGrm : SchedWriteRes<[ZnAGU, ZnALU]> {
|
|
265 let Latency = 5;
|
|
266 let NumMicroOps = 2;
|
|
267 }
|
|
268 def : InstRW<[ZnWriteXCHGrm, ReadAfterLd], (instregex "XCHG(8|16|32|64)rm")>;
|
|
269
|
|
270 def : InstRW<[WriteMicrocoded], (instregex "XLAT")>;
|
|
271
|
|
272 // POP16.
|
|
273 // r.
|
|
274 def ZnWritePop16r : SchedWriteRes<[ZnAGU]>{
|
|
275 let Latency = 5;
|
|
276 let NumMicroOps = 2;
|
|
277 }
|
|
278 def : InstRW<[ZnWritePop16r], (instregex "POP16rmm")>;
|
|
279 def : InstRW<[WriteMicrocoded], (instregex "POPF(16|32)")>;
|
|
280 def : InstRW<[WriteMicrocoded], (instregex "POPA(16|32)")>;
|
|
281
|
|
282
|
|
283 // PUSH.
|
|
284 // r. Has default values.
|
|
285 // m.
|
|
286 def ZnWritePUSH : SchedWriteRes<[ZnAGU]>{
|
|
287 let Latency = 4;
|
|
288 }
|
|
289 def : InstRW<[ZnWritePUSH], (instregex "PUSH(16|32)rmm")>;
|
|
290
|
|
291 //PUSHF
|
|
292 def : InstRW<[WriteMicrocoded], (instregex "PUSHF(16|32)")>;
|
|
293
|
|
294 // PUSHA.
|
|
295 def ZnWritePushA : SchedWriteRes<[ZnAGU]> {
|
|
296 let Latency = 8;
|
|
297 }
|
|
298 def : InstRW<[ZnWritePushA], (instregex "PUSHA(16|32)")>;
|
|
299
|
|
300 //LAHF
|
|
301 def : InstRW<[WriteMicrocoded], (instregex "LAHF")>;
|
|
302
|
|
303 // SAHF.
|
|
304 def ZnWriteSAHF : SchedWriteRes<[ZnALU]> {
|
|
305 let Latency = 2;
|
|
306 let NumMicroOps = 2;
|
|
307 }
|
|
308 def : InstRW<[ZnWriteSAHF], (instregex "SAHF")>;
|
|
309
|
|
310 // BSWAP.
|
|
311 def ZnWriteBSwap : SchedWriteRes<[ZnALU]> {
|
|
312 let ResourceCycles = [4];
|
|
313 }
|
|
314 def : InstRW<[ZnWriteBSwap], (instregex "BSWAP")>;
|
|
315
|
|
316 // MOVBE.
|
|
317 // r,m.
|
|
318 def ZnWriteMOVBE : SchedWriteRes<[ZnAGU, ZnALU]> {
|
|
319 let Latency = 5;
|
|
320 }
|
|
321 def : InstRW<[ZnWriteMOVBE, ReadAfterLd], (instregex "MOVBE(16|32|64)rm")>;
|
|
322
|
|
323 // m16,r16.
|
|
324 def : InstRW<[ZnWriteMOVBE], (instregex "MOVBE(16|32|64)mr")>;
|
|
325
|
|
326 //-- Arithmetic instructions --//
|
|
327
|
|
328 // ADD SUB.
|
|
329 // m,r/i.
|
|
330 def : InstRW<[WriteALULd], (instregex "(ADD|SUB)(8|16|32|64)m(r|i)",
|
|
331 "(ADD|SUB)(8|16|32|64)mi8",
|
|
332 "(ADD|SUB)64mi32")>;
|
|
333
|
|
334 // ADC SBB.
|
|
335 // r,r/i.
|
|
336 def : InstRW<[WriteALU], (instregex "(ADC|SBB)(8|16|32|64)r(r|i)",
|
|
337 "(ADC|SBB)(16|32|64)ri8",
|
|
338 "(ADC|SBB)64ri32",
|
|
339 "(ADC|SBB)(8|16|32|64)rr_REV")>;
|
|
340
|
|
341 // r,m.
|
|
342 def : InstRW<[WriteALULd, ReadAfterLd],
|
|
343 (instregex "(ADC|SBB)(8|16|32|64)rm")>;
|
|
344
|
|
345 // m,r/i.
|
|
346 def : InstRW<[WriteALULd],
|
|
347 (instregex "(ADC|SBB)(8|16|32|64)m(r|i)",
|
|
348 "(ADC|SBB)(16|32|64)mi8",
|
|
349 "(ADC|SBB)64mi32")>;
|
|
350
|
|
351 // INC DEC NOT NEG.
|
|
352 // m.
|
|
353 def : InstRW<[WriteALULd],
|
|
354 (instregex "(INC|DEC|NOT|NEG)(8|16|32|64)m",
|
|
355 "(INC|DEC)64(16|32)m")>;
|
|
356
|
|
357 // MUL IMUL.
|
|
358 // r16.
|
|
359 def ZnWriteMul16 : SchedWriteRes<[ZnALU1, ZnMultiplier]> {
|
|
360 let Latency = 3;
|
|
361 }
|
|
362 def : InstRW<[ZnWriteMul16], (instregex "IMUL16r", "MUL16r")>;
|
|
363
|
|
364 // m16.
|
|
365 def ZnWriteMul16Ld : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> {
|
|
366 let Latency = 8;
|
|
367 }
|
|
368 def : InstRW<[ZnWriteMul16Ld, ReadAfterLd], (instregex "IMUL16m", "MUL16m")>;
|
|
369
|
|
370 // r32.
|
|
371 def ZnWriteMul32 : SchedWriteRes<[ZnALU1, ZnMultiplier]> {
|
|
372 let Latency = 3;
|
|
373 }
|
|
374 def : InstRW<[ZnWriteMul32], (instregex "IMUL32r", "MUL32r")>;
|
|
375
|
|
376 // m32.
|
|
377 def ZnWriteMul32Ld : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> {
|
|
378 let Latency = 8;
|
|
379 }
|
|
380 def : InstRW<[ZnWriteMul32Ld, ReadAfterLd], (instregex "IMUL32m", "MUL32m")>;
|
|
381
|
|
382 // r64.
|
|
383 def ZnWriteMul64 : SchedWriteRes<[ZnALU1, ZnMultiplier]> {
|
|
384 let Latency = 4;
|
|
385 let NumMicroOps = 2;
|
|
386 }
|
|
387 def : InstRW<[ZnWriteMul64], (instregex "IMUL64r", "MUL64r")>;
|
|
388
|
|
389 // m64.
|
|
390 def ZnWriteMul64Ld : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> {
|
|
391 let Latency = 9;
|
|
392 let NumMicroOps = 2;
|
|
393 }
|
|
394 def : InstRW<[ZnWriteMul64Ld, ReadAfterLd], (instregex "IMUL64m", "MUL64m")>;
|
|
395
|
|
396 // r16,r16.
|
|
397 def ZnWriteMul16rri : SchedWriteRes<[ZnALU1, ZnMultiplier]> {
|
|
398 let Latency = 3;
|
|
399 }
|
|
400 def : InstRW<[ZnWriteMul16rri], (instregex "IMUL16rri", "IMUL16rri8")>;
|
|
401
|
|
402 // r16,m16.
|
|
403 def ZnWriteMul16rmi : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> {
|
|
404 let Latency = 8;
|
|
405 }
|
|
406 def : InstRW<[ZnWriteMul16rmi, ReadAfterLd], (instregex "IMUL16rmi", "IMUL16rmi8")>;
|
|
407
|
|
408 // MULX.
|
|
409 // r32,r32,r32.
|
|
410 def ZnWriteMulX32 : SchedWriteRes<[ZnALU1, ZnMultiplier]> {
|
|
411 let Latency = 3;
|
|
412 let ResourceCycles = [1, 2];
|
|
413 }
|
|
414 def : InstRW<[ZnWriteMulX32], (instregex "MULX32rr")>;
|
|
415
|
|
416 // r32,r32,m32.
|
|
417 def ZnWriteMulX32Ld : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> {
|
|
418 let Latency = 8;
|
|
419 let ResourceCycles = [1, 2, 2];
|
|
420 }
|
|
421 def : InstRW<[ZnWriteMulX32Ld, ReadAfterLd], (instregex "MULX32rm")>;
|
|
422
|
|
423 // r64,r64,r64.
|
|
424 def ZnWriteMulX64 : SchedWriteRes<[ZnALU1]> {
|
|
425 let Latency = 3;
|
|
426 }
|
|
427 def : InstRW<[ZnWriteMulX64], (instregex "MULX64rr")>;
|
|
428
|
|
429 // r64,r64,m64.
|
|
430 def ZnWriteMulX64Ld : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> {
|
|
431 let Latency = 8;
|
|
432 }
|
|
433 def : InstRW<[ZnWriteMulX64Ld, ReadAfterLd], (instregex "MULX64rm")>;
|
|
434
|
|
435 // DIV, IDIV.
|
|
436 // r8.
|
|
437 def ZnWriteDiv8 : SchedWriteRes<[ZnALU2, ZnDivider]> {
|
|
438 let Latency = 15;
|
|
439 }
|
|
440 def : InstRW<[ZnWriteDiv8], (instregex "DIV8r", "IDIV8r")>;
|
|
441
|
|
442 // r16.
|
|
443 def ZnWriteDiv16 : SchedWriteRes<[ZnALU2, ZnDivider]> {
|
|
444 let Latency = 17;
|
|
445 let NumMicroOps = 2;
|
|
446 }
|
|
447 def : InstRW<[ZnWriteDiv16], (instregex "DIV16r", "IDIV16r")>;
|
|
448
|
|
449 // r32.
|
|
450 def ZnWriteDiv32 : SchedWriteRes<[ZnALU2, ZnDivider]> {
|
|
451 let Latency = 25;
|
|
452 let NumMicroOps = 2;
|
|
453 }
|
|
454 def : InstRW<[ZnWriteDiv32], (instregex "DIV32r", "IDIV32r")>;
|
|
455
|
|
456 // r64.
|
|
457 def ZnWriteDiv64 : SchedWriteRes<[ZnALU2, ZnDivider]> {
|
|
458 let Latency = 41;
|
|
459 let NumMicroOps = 2;
|
|
460 }
|
|
461 def : InstRW<[ZnWriteDiv64], (instregex "DIV64r", "IDIV64r")>;
|
|
462
|
|
463 //-- Control transfer instructions --//
|
|
464
|
|
465 // J(E|R)CXZ.
|
|
466 def ZnWriteJCXZ : SchedWriteRes<[ZnALU03]>;
|
|
467 def : InstRW<[ZnWriteJCXZ], (instregex "JCXZ", "JECXZ_(32|64)", "JRCXZ")>;
|
|
468
|
|
469 // INTO
|
|
470 def : InstRW<[WriteMicrocoded], (instregex "INTO")>;
|
|
471
|
|
472 // LOOP.
|
|
473 def ZnWriteLOOP : SchedWriteRes<[ZnALU03]>;
|
|
474 def : InstRW<[ZnWriteLOOP], (instregex "LOOP")>;
|
|
475
|
|
476 // LOOP(N)E, LOOP(N)Z
|
|
477 def ZnWriteLOOPE : SchedWriteRes<[ZnALU03]>;
|
|
478 def : InstRW<[ZnWriteLOOPE], (instregex "LOOPE", "LOOPNE",
|
|
479 "LOOPZ", "LOOPNZ")>;
|
|
480
|
|
481 // CALL.
|
|
482 // r.
|
|
483 def ZnWriteCALLr : SchedWriteRes<[ZnAGU, ZnALU03]>;
|
|
484 def : InstRW<[ZnWriteCALLr], (instregex "CALL(16|32)r")>;
|
|
485
|
|
486 def : InstRW<[WriteMicrocoded], (instregex "CALL(16|32)m")>;
|
|
487
|
|
488 // RET.
|
|
489 def ZnWriteRET : SchedWriteRes<[ZnALU03]> {
|
|
490 let NumMicroOps = 2;
|
|
491 }
|
|
492 def : InstRW<[ZnWriteRET], (instregex "RET(L|Q|W)", "LRET(L|Q|W)",
|
|
493 "IRET(D|Q)", "RETF")>;
|
|
494
|
|
495 //-- Logic instructions --//
|
|
496
|
|
497 // AND OR XOR.
|
|
498 // m,r/i.
|
|
499 def : InstRW<[WriteALULd],
|
|
500 (instregex "(AND|OR|XOR)(8|16|32|64)m(r|i)",
|
|
501 "(AND|OR|XOR)(8|16|32|64)mi8", "(AND|OR|XOR)64mi32")>;
|
|
502
|
|
503 // ANDN.
|
|
504 // r,r.
|
|
505 def : InstRW<[WriteALU], (instregex "ANDN(32|64)rr")>;
|
|
506 // r,m.
|
|
507 def : InstRW<[WriteALULd, ReadAfterLd], (instregex "ANDN(32|64)rm")>;
|
|
508
|
|
509 // Define ALU latency variants
|
|
510 def ZnWriteALULat2 : SchedWriteRes<[ZnALU]> {
|
|
511 let Latency = 2;
|
|
512 }
|
|
513 def ZnWriteALULat2Ld : SchedWriteRes<[ZnAGU, ZnALU]> {
|
|
514 let Latency = 6;
|
|
515 }
|
|
516
|
|
517 def ZnWriteALULat3 : SchedWriteRes<[ZnALU]> {
|
|
518 let Latency = 3;
|
|
519 }
|
|
520 def ZnWriteALULat3Ld : SchedWriteRes<[ZnAGU, ZnALU]> {
|
|
521 let Latency = 7;
|
|
522 }
|
|
523
|
|
524 // BSF BSR.
|
|
525 // r,r.
|
|
526 def : InstRW<[ZnWriteALULat3], (instregex "BS(R|F)(16|32|64)rr")>;
|
|
527 // r,m.
|
|
528 def : InstRW<[ZnWriteALULat3Ld, ReadAfterLd], (instregex "BS(R|F)(16|32|64)rm")>;
|
|
529
|
|
530 // BT.
|
|
531 // r,r/i.
|
|
532 def : InstRW<[WriteShift], (instregex "BT(16|32|64)r(r|i8)")>;
|
|
533
|
|
534 def : InstRW<[WriteShiftLd], (instregex "BT(16|32|64)mr")>;
|
|
535 def : InstRW<[WriteShiftLd], (instregex "BT(16|32|64)mi8")>;
|
|
536
|
|
537 // BTR BTS BTC.
|
|
538 // r,r,i.
|
|
539 def ZnWriteBTRSC : SchedWriteRes<[ZnALU]> {
|
|
540 let Latency = 2;
|
|
541 let NumMicroOps = 2;
|
|
542 }
|
|
543 def : InstRW<[ZnWriteBTRSC], (instregex "BT(R|S|C)(16|32|64)r(r|i8)")>;
|
|
544
|
|
545
|
|
546 // m,r,i.
|
|
547 def ZnWriteBTRSCm : SchedWriteRes<[ZnAGU, ZnALU]> {
|
|
548 let Latency = 6;
|
|
549 let NumMicroOps = 2;
|
|
550 }
|
|
551 // m,r,i.
|
|
552 def : InstRW<[ZnWriteBTRSCm], (instregex "BT(R|S|C)(16|32|64)m(r|i8)")>;
|
|
553
|
|
554 // BLSI BLSMSK BLSR.
|
|
555 // r,r.
|
|
556 def : InstRW<[ZnWriteALULat2], (instregex "BLS(I|MSK|R)(32|64)rr")>;
|
|
557 // r,m.
|
|
558 def : InstRW<[ZnWriteALULat2Ld, ReadAfterLd], (instregex "BLS(I|MSK|R)(32|64)rm")>;
|
|
559
|
|
560 // BEXTR.
|
|
561 // r,r,r.
|
|
562 def : InstRW<[WriteALU], (instregex "BEXTR(32|64)rr")>;
|
|
563 // r,m,r.
|
|
564 def : InstRW<[WriteALULd, ReadAfterLd], (instregex "BEXTR(32|64)rm")>;
|
|
565
|
|
566 // BZHI.
|
|
567 // r,r,r.
|
|
568 def : InstRW<[WriteALU], (instregex "BZHI(32|64)rr")>;
|
|
569 // r,m,r.
|
|
570 def : InstRW<[WriteALULd, ReadAfterLd], (instregex "BZHI(32|64)rm")>;
|
|
571
|
|
572 // CLD STD.
|
|
573 def : InstRW<[WriteALU], (instregex "STD", "CLD")>;
|
|
574
|
|
575 // PDEP PEXT.
|
|
576 // r,r,r.
|
|
577 def : InstRW<[WriteMicrocoded], (instregex "PDEP(32|64)rr", "PEXT(32|64)rr")>;
|
|
578 // r,m,r.
|
|
579 def : InstRW<[WriteMicrocoded], (instregex "PDEP(32|64)rm", "PEXT(32|64)rm")>;
|
|
580
|
|
581 // ROR ROL.
|
|
582 def : InstRW<[WriteShift], (instregex "RO(R|L)(8|16|32|64)r1")>;
|
|
583
|
|
584 // RCR RCL.
|
|
585 // r,1.
|
|
586 def : InstRW<[WriteShift], (instregex "RC(R|L)(8|16|32|64)r1")>;
|
|
587
|
|
588 // m,1.
|
|
589 def : InstRW<[WriteMicrocoded], (instregex "RC(R|L)(8|16|32|64)m1")>;
|
|
590
|
|
591 // i.
|
|
592 def : InstRW<[WriteShift], (instregex "RC(R|L)(8|16|32|64)r(i|CL)")>;
|
|
593
|
|
594 // m,i.
|
|
595 def : InstRW<[WriteMicrocoded], (instregex "RC(R|L)(8|16|32|64)m(i|CL)")>;
|
|
596
|
|
597 // SHR SHL SAR.
|
|
598 // m,i.
|
|
599 def : InstRW<[WriteShiftLd], (instregex "S(A|H)(R|L)(8|16|32|64)m(i|1)")>;
|
|
600
|
|
601 // SHRD SHLD.
|
|
602 // r,r
|
|
603 def : InstRW<[WriteShift], (instregex "SH(R|L)D(16|32|64)rri8")>;
|
|
604
|
|
605 // m,r
|
|
606 def : InstRW<[WriteShiftLd], (instregex "SH(R|L)D(16|32|64)mri8")>;
|
|
607
|
|
608 // r,r,cl.
|
|
609 def : InstRW<[WriteMicrocoded], (instregex "SHLD(16|32|64)rrCL")>;
|
|
610
|
|
611 // r,r,cl.
|
|
612 def : InstRW<[WriteMicrocoded], (instregex "SHRD(16|32|64)rrCL")>;
|
|
613
|
|
614 // m,r,cl.
|
|
615 def : InstRW<[WriteMicrocoded], (instregex "SH(R|L)D(16|32|64)mrCL")>;
|
|
616
|
|
617 // SETcc.
|
|
618 // r.
|
|
619 def : InstRW<[WriteShift],
|
|
620 (instregex "SET(O|NO|B|AE|E|NE|BE|A|S|NS|P|NP|L|GE|LE|G)r")>;
|
|
621 // m.
|
|
622 def : InstRW<[WriteShift],
|
|
623 (instregex "SET(O|NO|B|AE|E|NE|BE|A|S|NS|P|NP|L|GE|LE|G)m")>;
|
|
624
|
|
625 // LZCNT TZCNT.
|
|
626 // r,r.
|
|
627 def : InstRW<[ZnWriteALULat2], (instregex "(LZCNT|TZCNT)(16|32|64)rr")>;
|
|
628 // r,m.
|
|
629 def : InstRW<[ZnWriteALULat2Ld, ReadAfterLd], (instregex "(LZCNT|TZCNT)(16|32|64)rm")>;
|
|
630
|
|
631 //-- Misc instructions --//
|
|
632 // CMPXCHG.
|
|
633 def ZnWriteCMPXCHG : SchedWriteRes<[ZnAGU, ZnALU]> {
|
|
634 let Latency = 8;
|
|
635 let NumMicroOps = 5;
|
|
636 }
|
|
637 def : InstRW<[ZnWriteCMPXCHG], (instregex "CMPXCHG(8|16|32|64)rm")>;
|
|
638
|
|
639 // CMPXCHG8B.
|
|
640 def ZnWriteCMPXCHG8B : SchedWriteRes<[ZnAGU, ZnALU]> {
|
|
641 let NumMicroOps = 18;
|
|
642 }
|
|
643 def : InstRW<[ZnWriteCMPXCHG8B], (instregex "CMPXCHG8B")>;
|
|
644
|
|
645 def : InstRW<[WriteMicrocoded], (instregex "CMPXCHG16B")>;
|
|
646
|
|
647 // LEAVE
|
|
648 def ZnWriteLEAVE : SchedWriteRes<[ZnALU, ZnAGU]> {
|
|
649 let Latency = 8;
|
|
650 let NumMicroOps = 2;
|
|
651 }
|
|
652 def : InstRW<[ZnWriteLEAVE], (instregex "LEAVE")>;
|
|
653
|
|
654 // PAUSE.
|
|
655 def : InstRW<[WriteMicrocoded], (instregex "PAUSE")>;
|
|
656
|
|
657 // RDTSC.
|
|
658 def : InstRW<[WriteMicrocoded], (instregex "RDTSC")>;
|
|
659
|
|
660 // RDPMC.
|
|
661 def : InstRW<[WriteMicrocoded], (instregex "RDPMC")>;
|
|
662
|
|
663 // RDRAND.
|
|
664 def : InstRW<[WriteMicrocoded], (instregex "RDRAND(16|32|64)r")>;
|
|
665
|
|
666 // XGETBV.
|
|
667 def : InstRW<[WriteMicrocoded], (instregex "XGETBV")>;
|
|
668
|
|
669 //-- String instructions --//
|
|
670 // CMPS.
|
|
671 def : InstRW<[WriteMicrocoded], (instregex "CMPS(B|L|Q|W)")>;
|
|
672
|
|
673 // LODSB/W.
|
|
674 def : InstRW<[WriteMicrocoded], (instregex "LODS(B|W)")>;
|
|
675
|
|
676 // LODSD/Q.
|
|
677 def : InstRW<[WriteMicrocoded], (instregex "LODS(L|Q)")>;
|
|
678
|
|
679 // MOVS.
|
|
680 def : InstRW<[WriteMicrocoded], (instregex "MOVS(B|L|Q|W)")>;
|
|
681
|
|
682 // SCAS.
|
|
683 def : InstRW<[WriteMicrocoded], (instregex "SCAS(B|W|L|Q)")>;
|
|
684
|
|
685 // STOS
|
|
686 def : InstRW<[WriteMicrocoded], (instregex "STOS(B|L|Q|W)")>;
|
|
687
|
|
688 // XADD.
|
|
689 def : InstRW<[WriteMicrocoded], (instregex "XADD(8|16|32|64)rm")>;
|
|
690
|
|
691 //=== Floating Point x87 Instructions ===//
|
|
692 //-- Move instructions --//
|
|
693
|
|
694 def ZnWriteFLDr : SchedWriteRes<[ZnFPU13]> ;
|
|
695
|
|
696 def ZnWriteSTr: SchedWriteRes<[ZnFPU23]> {
|
|
697 let Latency = 5;
|
|
698 let NumMicroOps = 2;
|
|
699 }
|
|
700
|
|
701 // LD_F.
|
|
702 // r.
|
|
703 def : InstRW<[ZnWriteFLDr], (instregex "LD_Frr")>;
|
|
704
|
|
705 // m.
|
|
706 def ZnWriteLD_F80m : SchedWriteRes<[ZnAGU, ZnFPU13]> {
|
|
707 let NumMicroOps = 2;
|
|
708 }
|
|
709 def : InstRW<[ZnWriteLD_F80m], (instregex "LD_F80m")>;
|
|
710
|
|
711 // FBLD.
|
|
712 def : InstRW<[WriteMicrocoded], (instregex "FBLDm")>;
|
|
713
|
|
714 // FST(P).
|
|
715 // r.
|
|
716 def : InstRW<[ZnWriteSTr], (instregex "ST_(F|FP)rr")>;
|
|
717
|
|
718 // m80.
|
|
719 def ZnWriteST_FP80m : SchedWriteRes<[ZnAGU, ZnFPU23]> {
|
|
720 let Latency = 5;
|
|
721 }
|
|
722 def : InstRW<[ZnWriteST_FP80m], (instregex "ST_FP80m")>;
|
|
723
|
|
724 // FBSTP.
|
|
725 // m80.
|
|
726 def : InstRW<[WriteMicrocoded], (instregex "FBSTPm")>;
|
|
727
|
|
728 def ZnWriteFXCH : SchedWriteRes<[ZnFPU]>;
|
|
729
|
|
730 // FXCHG.
|
|
731 def : InstRW<[ZnWriteFXCH], (instregex "XCH_F")>;
|
|
732
|
|
733 // FILD.
|
|
734 def ZnWriteFILD : SchedWriteRes<[ZnAGU, ZnFPU3]> {
|
|
735 let Latency = 11;
|
|
736 let NumMicroOps = 2;
|
|
737 }
|
|
738 def : InstRW<[ZnWriteFILD], (instregex "ILD_F(16|32|64)m")>;
|
|
739
|
|
740 // FIST(P) FISTTP.
|
|
741 def ZnWriteFIST : SchedWriteRes<[ZnAGU, ZnFPU23]> {
|
|
742 let Latency = 12;
|
|
743 }
|
|
744 def : InstRW<[ZnWriteFIST], (instregex "IST_(F|FP)(16|32)m")>;
|
|
745
|
|
746 def ZnWriteFPU13 : SchedWriteRes<[ZnAGU, ZnFPU13]> {
|
|
747 let Latency = 8;
|
|
748 }
|
|
749
|
|
750 def ZnWriteFPU3 : SchedWriteRes<[ZnAGU, ZnFPU3]> {
|
|
751 let Latency = 11;
|
|
752 }
|
|
753
|
|
754 // FLDZ.
|
|
755 def : InstRW<[ZnWriteFPU13], (instregex "LD_F0")>;
|
|
756
|
|
757 // FLD1.
|
|
758 def : InstRW<[ZnWriteFPU3], (instregex "LD_F1")>;
|
|
759
|
|
760 // FLDPI FLDL2E etc.
|
|
761 def : InstRW<[ZnWriteFPU3], (instregex "FLDPI", "FLDL2(T|E)" "FLDL(G|N)2")>;
|
|
762
|
|
763 def : InstRW<[WriteMicrocoded], (instregex "CMOV(B|BE|P|NB|NBE|NE|NP)_F")>;
|
|
764
|
|
765 // FNSTSW.
|
|
766 // AX.
|
|
767 def : InstRW<[WriteMicrocoded], (instregex "FNSTSW16r")>;
|
|
768
|
|
769 // m16.
|
|
770 def : InstRW<[WriteMicrocoded], (instregex "FNSTSWm")>;
|
|
771
|
|
772 // FLDCW.
|
|
773 def : InstRW<[WriteMicrocoded], (instregex "FLDCW16m")>;
|
|
774
|
|
775 // FNSTCW.
|
|
776 def : InstRW<[WriteMicrocoded], (instregex "FNSTCW16m")>;
|
|
777
|
|
778 // FINCSTP FDECSTP.
|
|
779 def : InstRW<[ZnWriteFPU3], (instregex "FINCSTP", "FDECSTP")>;
|
|
780
|
|
781 // FFREE.
|
|
782 def : InstRW<[ZnWriteFPU3], (instregex "FFREE")>;
|
|
783
|
|
784 // FNSAVE.
|
|
785 def : InstRW<[WriteMicrocoded], (instregex "FSAVEm")>;
|
|
786
|
|
787 // FRSTOR.
|
|
788 def : InstRW<[WriteMicrocoded], (instregex "FRSTORm")>;
|
|
789
|
|
790 //-- Arithmetic instructions --//
|
|
791
|
|
792 def ZnWriteFPU3Lat2 : SchedWriteRes<[ZnFPU3]> {
|
|
793 let Latency = 2;
|
|
794 }
|
|
795
|
|
796 def ZnWriteFPU3Lat2Ld : SchedWriteRes<[ZnAGU, ZnFPU3]> {
|
|
797 let Latency = 9;
|
|
798 }
|
|
799
|
|
800 def ZnWriteFPU3Lat1 : SchedWriteRes<[ZnFPU3]> ;
|
|
801
|
|
802 def ZnWriteFPU0Lat1 : SchedWriteRes<[ZnFPU0]> ;
|
|
803
|
|
804 def ZnWriteFPU0Lat1Ld : SchedWriteRes<[ZnAGU, ZnFPU0]> {
|
|
805 let Latency = 8;
|
|
806 }
|
|
807
|
|
808 // FABS.
|
|
809 def : InstRW<[ZnWriteFPU3Lat2], (instregex "ABS_F")>;
|
|
810
|
|
811 // FCHS.
|
|
812 def : InstRW<[ZnWriteFPU3Lat1], (instregex "CHS_F")>;
|
|
813
|
|
814 // FCOM(P) FUCOM(P).
|
|
815 // r.
|
|
816 def : InstRW<[ZnWriteFPU0Lat1], (instregex "COM_FST0r", "COMP_FST0r", "UCOM_Fr",
|
|
817 "UCOM_FPr")>;
|
|
818 // m.
|
|
819 def : InstRW<[ZnWriteFPU0Lat1Ld], (instregex "FCOM(32|64)m", "FCOMP(32|64)m")>;
|
|
820
|
|
821 // FCOMPP FUCOMPP.
|
|
822 // r.
|
|
823 def : InstRW<[ZnWriteFPU0Lat1], (instregex "FCOMPP", "UCOM_FPPr")>;
|
|
824
|
|
825 def ZnWriteFPU02 : SchedWriteRes<[ZnAGU, ZnFPU02]>
|
|
826 {
|
|
827 let Latency = 9;
|
|
828 }
|
|
829
|
|
830 // FCOMI(P) FUCOMI(P).
|
|
831 // m.
|
|
832 def : InstRW<[ZnWriteFPU02], (instregex "COM_FIr", "COM_FIPr", "UCOM_FIr",
|
|
833 "UCOM_FIPr")>;
|
|
834
|
|
835 def ZnWriteFPU03 : SchedWriteRes<[ZnAGU, ZnFPU03]>
|
|
836 {
|
|
837 let Latency = 12;
|
|
838 let NumMicroOps = 2;
|
|
839 let ResourceCycles = [1,3];
|
|
840 }
|
|
841
|
|
842 // FICOM(P).
|
|
843 def : InstRW<[ZnWriteFPU03], (instregex "FICOM(16|32)m", "FICOMP(16|32)m")>;
|
|
844
|
|
845 // FTST.
|
|
846 def : InstRW<[ZnWriteFPU0Lat1], (instregex "TST_F")>;
|
|
847
|
|
848 // FXAM.
|
|
849 def : InstRW<[ZnWriteFPU3Lat1], (instregex "FXAM")>;
|
|
850
|
|
851 // FPREM.
|
|
852 def : InstRW<[WriteMicrocoded], (instregex "FPREM")>;
|
|
853
|
|
854 // FPREM1.
|
|
855 def : InstRW<[WriteMicrocoded], (instregex "FPREM1")>;
|
|
856
|
|
857 // FRNDINT.
|
|
858 def : InstRW<[WriteMicrocoded], (instregex "FRNDINT")>;
|
|
859
|
|
860 // FSCALE.
|
|
861 def : InstRW<[WriteMicrocoded], (instregex "FSCALE")>;
|
|
862
|
|
863 // FXTRACT.
|
|
864 def : InstRW<[WriteMicrocoded], (instregex "FXTRACT")>;
|
|
865
|
|
866 // FNOP.
|
|
867 def : InstRW<[ZnWriteFPU0Lat1], (instregex "FNOP")>;
|
|
868
|
|
869 // WAIT.
|
|
870 def : InstRW<[ZnWriteFPU0Lat1], (instregex "WAIT")>;
|
|
871
|
|
872 // FNCLEX.
|
|
873 def : InstRW<[WriteMicrocoded], (instregex "FNCLEX")>;
|
|
874
|
|
875 // FNINIT.
|
|
876 def : InstRW<[WriteMicrocoded], (instregex "FNINIT")>;
|
|
877
|
|
878 //=== Integer MMX and XMM Instructions ===//
|
|
879 //-- Move instructions --//
|
|
880
|
|
881 // Moves from GPR to FPR incurs a penalty
|
|
882 def ZnWriteFPU2 : SchedWriteRes<[ZnFPU2]> {
|
|
883 let Latency = 3;
|
|
884 }
|
|
885
|
|
886 // Move to ALU doesn't incur penalty
|
|
887 def ZnWriteToALU2 : SchedWriteRes<[ZnFPU2]> {
|
|
888 let Latency = 2;
|
|
889 }
|
|
890
|
|
891 def ZnWriteFPU : SchedWriteRes<[ZnFPU]>;
|
|
892 def ZnWriteFPUY : SchedWriteRes<[ZnFPU]> {
|
|
893 let NumMicroOps = 2;
|
|
894 let Latency=2;
|
|
895 }
|
|
896
|
|
897 // MOVD.
|
|
898 // r32/64 <- (x)mm.
|
|
899 def : InstRW<[ZnWriteToALU2], (instregex "MMX_MOVD64grr", "MMX_MOVD64from64rr",
|
|
900 "VMOVPDI2DIrr", "MOVPDI2DIrr")>;
|
|
901
|
|
902 // (x)mm <- r32/64.
|
|
903 def : InstRW<[ZnWriteFPU2], (instregex "MMX_MOVD64rr", "MMX_MOVD64to64rr",
|
|
904 "VMOVDI2PDIrr", "MOVDI2PDIrr")>;
|
|
905
|
|
906 // MOVQ.
|
|
907 // r64 <- (x)mm.
|
|
908 def : InstRW<[ZnWriteToALU2], (instregex "VMOVPQIto64rr")>;
|
|
909
|
|
910 // (x)mm <- r64.
|
|
911 def : InstRW<[ZnWriteFPU2], (instregex "VMOV64toPQIrr", "VMOVZQI2PQIrr")>;
|
|
912
|
|
913 // (x)mm <- (x)mm.
|
|
914 def : InstRW<[ZnWriteFPU], (instregex "MMX_MOVQ64rr")>;
|
|
915
|
|
916 // (V)MOVDQA/U.
|
|
917 // x <- x.
|
|
918 def : InstRW<[ZnWriteFPU], (instregex "MOVDQ(A|U)rr", "VMOVDQ(A|U)rr",
|
|
919 "MOVDQ(A|U)rr_REV", "VMOVDQ(A|U)rr_REV")>;
|
|
920
|
|
921 // y <- y.
|
|
922 def : InstRW<[ZnWriteFPUY], (instregex "VMOVDQ(A|U)Yrr", "VMOVDQ(A|U)Yrr_REV")>;
|
|
923
|
|
924 // MOVDQ2Q.
|
|
925 def : InstRW<[ZnWriteFPU], (instregex "MMX_MOVDQ2Qrr")>;
|
|
926
|
|
927 // MOVQ2DQ.
|
|
928 def : InstRW<[ZnWriteFPU], (instregex "MMX_MOVQ2DQrr")>;
|
|
929
|
|
930 // PACKSSWB/DW.
|
|
931 // mm <- mm.
|
|
932 def ZnWriteFPU12 : SchedWriteRes<[ZnFPU12]> ;
|
|
933 def ZnWriteFPU12Y : SchedWriteRes<[ZnFPU12]> {
|
|
934 let NumMicroOps = 2;
|
|
935 }
|
|
936 def ZnWriteFPU12m : SchedWriteRes<[ZnAGU, ZnFPU12]> ;
|
|
937
|
|
938 def : InstRW<[ZnWriteFPU12], (instregex "MMX_PACKSSDWirr",
|
|
939 "MMX_PACKSSWBirr", "MMX_PACKUSWBirr")>;
|
|
940 def : InstRW<[ZnWriteFPU12m], (instregex "MMX_PACKSSDWirm",
|
|
941 "MMX_PACKSSWBirm", "MMX_PACKUSWBirm")>;
|
|
942
|
|
943 // VPMOVSX/ZX BW BD BQ DW DQ.
|
|
944 // y <- x.
|
|
945 def : InstRW<[ZnWriteFPU12Y], (instregex "VPMOV(SX|ZX)(BW|BQ|DW|DQ)Yrr")>;
|
|
946
|
|
947 def ZnWriteFPU013 : SchedWriteRes<[ZnFPU013]> ;
|
|
948 def ZnWriteFPU013Y : SchedWriteRes<[ZnFPU013]> {
|
|
949 let Latency = 2;
|
|
950 }
|
|
951 def ZnWriteFPU013m : SchedWriteRes<[ZnAGU, ZnFPU013]> {
|
|
952 let Latency = 8;
|
|
953 let NumMicroOps = 2;
|
|
954 }
|
|
955 def ZnWriteFPU013Ld : SchedWriteRes<[ZnAGU, ZnFPU013]> {
|
|
956 let Latency = 8;
|
|
957 let NumMicroOps = 2;
|
|
958 }
|
|
959 def ZnWriteFPU013LdY : SchedWriteRes<[ZnAGU, ZnFPU013]> {
|
|
960 let Latency = 9;
|
|
961 let NumMicroOps = 2;
|
|
962 }
|
|
963
|
|
964 // PBLENDW.
|
|
965 // x,x,i / v,v,v,i
|
|
966 def : InstRW<[ZnWriteFPU013], (instregex "(V?)PBLENDWrri")>;
|
|
967 // ymm
|
|
968 def : InstRW<[ZnWriteFPU013Y], (instregex "(V?)PBLENDWYrri")>;
|
|
969
|
|
970 // x,m,i / v,v,m,i
|
|
971 def : InstRW<[ZnWriteFPU013Ld], (instregex "(V?)PBLENDWrmi")>;
|
|
972 // y,m,i
|
|
973 def : InstRW<[ZnWriteFPU013LdY], (instregex "(V?)PBLENDWYrmi")>;
|
|
974
|
|
975 def ZnWriteFPU01 : SchedWriteRes<[ZnFPU01]> ;
|
|
976 def ZnWriteFPU01Y : SchedWriteRes<[ZnFPU01]> {
|
|
977 let NumMicroOps = 2;
|
|
978 }
|
|
979
|
|
980 // VPBLENDD.
|
|
981 // v,v,v,i.
|
|
982 def : InstRW<[ZnWriteFPU01], (instregex "VPBLENDDrri")>;
|
|
983 // ymm
|
|
984 def : InstRW<[ZnWriteFPU01Y], (instregex "VPBLENDDYrri")>;
|
|
985
|
|
986 // v,v,m,i
|
|
987 def ZnWriteFPU01Op2 : SchedWriteRes<[ZnAGU, ZnFPU01]> {
|
|
988 let NumMicroOps = 2;
|
|
989 let Latency = 8;
|
|
990 let ResourceCycles = [1, 2];
|
|
991 }
|
|
992 def ZnWriteFPU01Op2Y : SchedWriteRes<[ZnAGU, ZnFPU01]> {
|
|
993 let NumMicroOps = 2;
|
|
994 let Latency = 9;
|
|
995 let ResourceCycles = [1, 3];
|
|
996 }
|
|
997 def : InstRW<[ZnWriteFPU01Op2], (instregex "VPBLENDDrmi")>;
|
|
998 def : InstRW<[ZnWriteFPU01Op2Y], (instregex "VPBLENDDYrmi")>;
|
|
999
|
|
1000 // MASKMOVQ.
|
|
1001 def : InstRW<[WriteMicrocoded], (instregex "MMX_MASKMOVQ(64)?")>;
|
|
1002
|
|
1003 // MASKMOVDQU.
|
|
1004 def : InstRW<[WriteMicrocoded], (instregex "(V?)MASKMOVDQU(64)?")>;
|
|
1005
|
|
1006 // VPMASKMOVQ.
|
|
1007 // ymm
|
|
1008 def : InstRW<[ZnWriteFPU01Op2],(instregex "VPMASKMOVQrm")>;
|
|
1009 def : InstRW<[ZnWriteFPU01Op2Y],(instregex "VPMASKMOVQYrm")>;
|
|
1010
|
|
1011 def : InstRW<[WriteMicrocoded],
|
|
1012 (instregex "VPMASKMOVD(Y?)rm")>;
|
|
1013 // m, v,v.
|
|
1014 def : InstRW<[WriteMicrocoded], (instregex "VPMASKMOV(D|Q)(Y?)mr")>;
|
|
1015
|
|
1016 // PMOVMSKB.
|
|
1017 def ZnWritePMOVMSKB : SchedWriteRes<[ZnFPU2]> {
|
|
1018 let NumMicroOps = 2;
|
|
1019 }
|
|
1020 def ZnWritePMOVMSKBY : SchedWriteRes<[ZnFPU2]> {
|
|
1021 let Latency = 2;
|
|
1022 }
|
|
1023 def : InstRW<[ZnWritePMOVMSKB], (instregex "(V|MMX_)?PMOVMSKBrr")>;
|
|
1024 def : InstRW<[ZnWritePMOVMSKBY], (instregex "(V|MMX_)?PMOVMSKBYrr")>;
|
|
1025
|
|
1026 // PEXTR B/W/D/Q.
|
|
1027 // r32,x,i.
|
|
1028 def ZnWritePEXTRr : SchedWriteRes<[ZnFPU12, ZnFPU2]> {
|
|
1029 let Latency = 2;
|
|
1030 let ResourceCycles = [1, 2];
|
|
1031 }
|
|
1032 def : InstRW<[ZnWritePEXTRr], (instregex "PEXTR(B|W|D|Q)rr", "MMX_PEXTRWirri")>;
|
|
1033
|
|
1034 def ZnWritePEXTRm : SchedWriteRes<[ZnAGU, ZnFPU12, ZnFPU2]> {
|
|
1035 let Latency = 5;
|
|
1036 let NumMicroOps = 2;
|
|
1037 let ResourceCycles = [1, 2, 3];
|
|
1038 }
|
|
1039 // m8,x,i.
|
|
1040 def : InstRW<[ZnWritePEXTRm], (instregex "PEXTR(B|W|D|Q)mr")>;
|
|
1041
|
|
1042 // VPBROADCAST B/W.
|
|
1043 // x, m8/16.
|
|
1044 def ZnWriteVPBROADCAST128Ld : SchedWriteRes<[ZnAGU, ZnFPU12]> {
|
|
1045 let Latency = 8;
|
|
1046 let NumMicroOps = 2;
|
|
1047 let ResourceCycles = [1, 2];
|
|
1048 }
|
|
1049 def : InstRW<[ZnWriteVPBROADCAST128Ld],
|
|
1050 (instregex "VPBROADCAST(B|W)rm")>;
|
|
1051
|
|
1052 // y, m8/16
|
|
1053 def ZnWriteVPBROADCAST256Ld : SchedWriteRes<[ZnAGU, ZnFPU1]> {
|
|
1054 let Latency = 8;
|
|
1055 let NumMicroOps = 2;
|
|
1056 let ResourceCycles = [1, 2];
|
|
1057 }
|
|
1058 def : InstRW<[ZnWriteVPBROADCAST256Ld],
|
|
1059 (instregex "VPBROADCAST(B|W)Yrm")>;
|
|
1060
|
|
1061 // VPGATHER.
|
|
1062 def : InstRW<[WriteMicrocoded], (instregex "VPGATHER(Q|D)(Q|D)(Y?)rm")>;
|
|
1063
|
|
1064 //-- Arithmetic instructions --//
|
|
1065
|
|
1066 // HADD, HSUB PS/PD
|
|
1067 // PHADD|PHSUB (S) W/D.
|
|
1068 def : InstRW<[WriteMicrocoded], (instregex "MMX_PHADD(W?)r(r|m)64",
|
|
1069 "MMX_PHADDSWr(r|m)64",
|
|
1070 "MMX_PHSUB(W|D)r(r|m)64",
|
|
1071 "MMX_PHSUBSWrr64",
|
|
1072 "(V?)PH(ADD|SUB)(W|D)(Y?)r(r|m)",
|
|
1073 "(V?)PH(ADD|SUB)SWr(r|m)(256)?")>;
|
|
1074
|
|
1075
|
|
1076 // PCMPGTQ.
|
|
1077 def ZnWritePCMPGTQr : SchedWriteRes<[ZnFPU03]>;
|
|
1078 def : InstRW<[ZnWritePCMPGTQr], (instregex "(V?)PCMPGTQ(Y?)rr")>;
|
|
1079
|
|
1080 // x <- x,m.
|
|
1081 def ZnWritePCMPGTQm : SchedWriteRes<[ZnAGU, ZnFPU03]> {
|
|
1082 let Latency = 8;
|
|
1083 }
|
|
1084 // ymm.
|
|
1085 def ZnWritePCMPGTQYm : SchedWriteRes<[ZnAGU, ZnFPU03]> {
|
|
1086 let Latency = 8;
|
|
1087 let NumMicroOps = 2;
|
|
1088 let ResourceCycles = [1,2];
|
|
1089 }
|
|
1090 def : InstRW<[ZnWritePCMPGTQm], (instregex "(V?)PCMPGTQrm")>;
|
|
1091 def : InstRW<[ZnWritePCMPGTQYm], (instregex "(V?)PCMPGTQYrm")>;
|
|
1092
|
|
1093 // PMULLD.
|
|
1094 // x,x.
|
|
1095 def ZnWritePMULLDr : SchedWriteRes<[ZnFPU0]> {
|
|
1096 let Latency = 4;
|
|
1097 }
|
|
1098 // ymm.
|
|
1099 def ZnWritePMULLDYr : SchedWriteRes<[ZnFPU0]> {
|
|
1100 let Latency = 5;
|
|
1101 let ResourceCycles = [2];
|
|
1102 }
|
|
1103 def : InstRW<[ZnWritePMULLDr], (instregex "(V?)PMULLDrr")>;
|
|
1104 def : InstRW<[ZnWritePMULLDYr], (instregex "(V?)PMULLDYrr")>;
|
|
1105
|
|
1106 // x,m.
|
|
1107 def ZnWritePMULLDm : SchedWriteRes<[ZnAGU, ZnFPU0]> {
|
|
1108 let Latency = 11;
|
|
1109 let NumMicroOps = 2;
|
|
1110 }
|
|
1111 // y,m.
|
|
1112 def ZnWritePMULLDYm : SchedWriteRes<[ZnAGU, ZnFPU0]> {
|
|
1113 let Latency = 12;
|
|
1114 let NumMicroOps = 2;
|
|
1115 let ResourceCycles = [1, 2];
|
|
1116 }
|
|
1117 def : InstRW<[ZnWritePMULLDm], (instregex "(V?)PMULLDrm")>;
|
|
1118 def : InstRW<[ZnWritePMULLDYm], (instregex "(V?)PMULLDYrm")>;
|
|
1119
|
|
1120 //-- Logic instructions --//
|
|
1121
|
|
1122 // PTEST.
|
|
1123 // v,v.
|
|
1124 def ZnWritePTESTr : SchedWriteRes<[ZnFPU12]> {
|
|
1125 let ResourceCycles = [2];
|
|
1126 }
|
|
1127 def : InstRW<[ZnWritePTESTr], (instregex "(V?)PTEST(Y?)rr")>;
|
|
1128
|
|
1129 // v,m.
|
|
1130 def ZnWritePTESTm : SchedWriteRes<[ZnAGU, ZnFPU12]> {
|
|
1131 let Latency = 8;
|
|
1132 let NumMicroOps = 2;
|
|
1133 let ResourceCycles = [1, 2];
|
|
1134 }
|
|
1135 def : InstRW<[ZnWritePTESTm], (instregex "(V?)PTEST(Y?)rm")>;
|
|
1136
|
|
1137 // PSLL,PSRL,PSRA W/D/Q.
|
|
1138 // x,x / v,v,x.
|
|
1139 def ZnWritePShift : SchedWriteRes<[ZnFPU2]> ;
|
|
1140 def ZnWritePShiftY : SchedWriteRes<[ZnFPU2]> {
|
|
1141 let Latency = 2;
|
|
1142 }
|
|
1143 def ZnWritePShiftLd : SchedWriteRes<[ZnAGU,ZnFPU2]> {
|
|
1144 let Latency = 8;
|
|
1145 }
|
|
1146 def ZnWritePShiftYLd : SchedWriteRes<[ZnAGU, ZnFPU2]> {
|
|
1147 let Latency = 9;
|
|
1148 }
|
|
1149 def : InstRW<[ZnWritePShift], (instregex "(V?)PS(LL|RL|RA)(W|D|Q)rr")>;
|
|
1150 def : InstRW<[ZnWritePShiftY], (instregex "(V?)PS(LL|RL|RA)(W|D|Q)Yrr")>;
|
|
1151
|
|
1152 def : InstRW<[ZnWritePShiftLd], (instregex "(V?)PS(LL|RL|RA)(W|D|Q)rm")>;
|
|
1153 def : InstRW<[ZnWritePShiftYLd], (instregex "(V?)PS(LL|RL|RA)(W|D|Q)Yrm")>;
|
|
1154
|
|
1155 // PSLL,PSRL DQ.
|
|
1156 def : InstRW<[ZnWritePShift], (instregex "(V?)PS(R|L)LDQri")>;
|
|
1157 def : InstRW<[ZnWritePShiftY], (instregex "(V?)PS(R|L)LDQYri")>;
|
|
1158
|
|
1159 //=== Floating Point XMM and YMM Instructions ===//
|
|
1160 //-- Move instructions --//
|
|
1161
|
|
1162 // MOVMSKP S/D.
|
|
1163 // r32 <- x,y.
|
|
1164 def ZnWriteMOVMSKPr : SchedWriteRes<[ZnFPU2]> ;
|
|
1165 def : InstRW<[ZnWriteMOVMSKPr], (instregex "(V?)MOVMSKP(S|D)(Y?)rr")>;
|
|
1166
|
|
1167 // VPERM2F128.
|
|
1168 def : InstRW<[WriteMicrocoded], (instregex "VPERM2F128rr")>;
|
|
1169 def : InstRW<[WriteMicrocoded], (instregex "VPERM2F128rm")>;
|
|
1170
|
|
1171 // BLENDVP S/D.
|
|
1172 def ZnWriteFPU01Lat3 : SchedWriteRes<[ZnFPU013]> {
|
|
1173 let Latency = 3;
|
|
1174 }
|
|
1175 def ZnWriteFPU01Lat3Ld : SchedWriteRes<[ZnAGU, ZnFPU013]> {
|
|
1176 let Latency = 11;
|
|
1177 let NumMicroOps = 2;
|
|
1178 let ResourceCycles = [1, 2];
|
|
1179 }
|
|
1180 def : InstRW<[ZnWriteFPU01Lat3], (instregex "BLENDVP(S|D)rr0")>;
|
|
1181 def : InstRW<[ZnWriteFPU01Lat3Ld, ReadAfterLd], (instregex "BLENDVP(S|D)rm0")>;
|
|
1182
|
|
1183 def ZnWriteBROADCAST : SchedWriteRes<[ZnAGU, ZnFPU13]> {
|
|
1184 let NumMicroOps = 2;
|
|
1185 let Latency = 8;
|
|
1186 }
|
|
1187 // VBROADCASTF128.
|
|
1188 def : InstRW<[ZnWriteBROADCAST], (instregex "VBROADCASTF128")>;
|
|
1189
|
|
1190 // EXTRACTPS.
|
|
1191 // r32,x,i.
|
|
1192 def ZnWriteEXTRACTPSr : SchedWriteRes<[ZnFPU12, ZnFPU2]> {
|
|
1193 let Latency = 2;
|
|
1194 let NumMicroOps = 2;
|
|
1195 let ResourceCycles = [1, 2];
|
|
1196 }
|
|
1197 def : InstRW<[ZnWriteEXTRACTPSr], (instregex "(V?)EXTRACTPSrr")>;
|
|
1198
|
|
1199 def ZnWriteEXTRACTPSm : SchedWriteRes<[ZnAGU,ZnFPU12, ZnFPU2]> {
|
|
1200 let Latency = 5;
|
|
1201 let NumMicroOps = 2;
|
|
1202 let ResourceCycles = [5, 1, 2];
|
|
1203 }
|
|
1204 // m32,x,i.
|
|
1205 def : InstRW<[ZnWriteEXTRACTPSm], (instregex "(V?)EXTRACTPSmr")>;
|
|
1206
|
|
1207 // VEXTRACTF128.
|
|
1208 // x,y,i.
|
|
1209 def : InstRW<[ZnWriteFPU013], (instregex "VEXTRACTF128rr")>;
|
|
1210
|
|
1211 // m128,y,i.
|
|
1212 def : InstRW<[ZnWriteFPU013m], (instregex "VEXTRACTF128mr")>;
|
|
1213
|
|
1214 def ZnWriteVINSERT128r: SchedWriteRes<[ZnFPU013]> {
|
|
1215 let Latency = 2;
|
|
1216 let ResourceCycles = [2];
|
|
1217 }
|
|
1218 def ZnWriteVINSERT128Ld: SchedWriteRes<[ZnAGU,ZnFPU013]> {
|
|
1219 let Latency = 9;
|
|
1220 let NumMicroOps = 2;
|
|
1221 let ResourceCycles = [1, 2];
|
|
1222 }
|
|
1223 // VINSERTF128.
|
|
1224 // y,y,x,i.
|
|
1225 def : InstRW<[ZnWriteVINSERT128r], (instregex "VINSERTF128rr")>;
|
|
1226 def : InstRW<[ZnWriteVINSERT128Ld], (instregex "VINSERTF128rm")>;
|
|
1227
|
|
1228 // VMASKMOVP S/D.
|
|
1229 // x,x,m.
|
|
1230 def ZnWriteVMASKMOVPLd : SchedWriteRes<[ZnAGU, ZnFPU01]> {
|
|
1231 let Latency = 8;
|
|
1232 }
|
|
1233 // y,y,m.
|
|
1234 def ZnWriteVMASKMOVPLdY : SchedWriteRes<[ZnAGU, ZnFPU01]> {
|
|
1235 let Latency = 8;
|
|
1236 let NumMicroOps = 2;
|
|
1237 let ResourceCycles = [1, 2];
|
|
1238 }
|
|
1239 def ZnWriteVMASKMOVPm : SchedWriteRes<[ZnAGU, ZnFPU01]> {
|
|
1240 let Latency = 4;
|
|
1241 }
|
|
1242 def : InstRW<[ZnWriteVMASKMOVPLd], (instregex "VMASKMOVP(S|D)rm")>;
|
|
1243 def : InstRW<[ZnWriteVMASKMOVPLdY], (instregex "VMASKMOVP(S|D)Yrm")>;
|
|
1244 def : InstRW<[ZnWriteVMASKMOVPm], (instregex "VMASKMOVP(S|D)mr")>;
|
|
1245
|
|
1246 // m256,y,y.
|
|
1247 def ZnWriteVMASKMOVPYmr : SchedWriteRes<[ZnAGU,ZnFPU01]> {
|
|
1248 let Latency = 5;
|
|
1249 let NumMicroOps = 2;
|
|
1250 let ResourceCycles = [1, 2];
|
|
1251 }
|
|
1252 def : InstRW<[ZnWriteVMASKMOVPYmr], (instregex "VMASKMOVP(S|D)Ymr")>;
|
|
1253
|
|
1254 // VGATHERDPS.
|
|
1255 // x.
|
|
1256 def : InstRW<[WriteMicrocoded], (instregex "VGATHERDPSrm")>;
|
|
1257 // y.
|
|
1258 def : InstRW<[WriteMicrocoded], (instregex "VGATHERDPSYrm")>;
|
|
1259
|
|
1260 // VGATHERQPS.
|
|
1261 // x.
|
|
1262 def : InstRW<[WriteMicrocoded], (instregex "VGATHERQPSrm")>;
|
|
1263
|
|
1264 // y.
|
|
1265 def : InstRW<[WriteMicrocoded], (instregex "VGATHERQPSYrm")>;
|
|
1266
|
|
1267 // VGATHERDPD.
|
|
1268 // x.
|
|
1269 def : InstRW<[WriteMicrocoded], (instregex "VGATHERDPDrm")>;
|
|
1270
|
|
1271 // y.
|
|
1272 def : InstRW<[WriteMicrocoded], (instregex "VGATHERDPDYrm")>;
|
|
1273
|
|
1274 // VGATHERQPD.
|
|
1275 // x.
|
|
1276 def : InstRW<[WriteMicrocoded], (instregex "VGATHERQPDrm")>;
|
|
1277
|
|
1278 // y.
|
|
1279 def : InstRW<[WriteMicrocoded], (instregex "VGATHERQPDYrm")>;
|
|
1280
|
|
1281 //-- Conversion instructions --//
|
|
1282 def ZnWriteCVTPD2PSr: SchedWriteRes<[ZnFPU3]> {
|
|
1283 let Latency = 4;
|
|
1284 }
|
|
1285 // CVTPD2PS.
|
|
1286 // x,x.
|
|
1287 def : InstRW<[ZnWriteCVTPD2PSr], (instregex "(V?)CVTPD2PSrr")>;
|
|
1288
|
|
1289 def ZnWriteCVTPD2PSLd: SchedWriteRes<[ZnAGU,ZnFPU03]> {
|
|
1290 let Latency = 11;
|
|
1291 let NumMicroOps = 2;
|
|
1292 let ResourceCycles = [1,2];
|
|
1293 }
|
|
1294 // x,m128.
|
|
1295 def : InstRW<[ZnWriteCVTPD2PSLd], (instregex "(V?)CVTPD2PS(X?)rm")>;
|
|
1296
|
|
1297 // x,y.
|
|
1298 def ZnWriteCVTPD2PSYr : SchedWriteRes<[ZnFPU3]> {
|
|
1299 let Latency = 5;
|
|
1300 }
|
|
1301 def : InstRW<[ZnWriteCVTPD2PSYr], (instregex "(V?)CVTPD2PSYrr")>;
|
|
1302
|
|
1303 // x,m256.
|
|
1304 def ZnWriteCVTPD2PSYLd : SchedWriteRes<[ZnAGU, ZnFPU3]> {
|
|
1305 let Latency = 11;
|
|
1306 }
|
|
1307 def : InstRW<[ZnWriteCVTPD2PSYLd], (instregex "(V?)CVTPD2PSYrm")>;
|
|
1308
|
|
1309 // CVTSD2SS.
|
|
1310 // x,x.
|
|
1311 // Same as WriteCVTPD2PSr
|
|
1312 def : InstRW<[ZnWriteCVTPD2PSr], (instregex "(Int_)?(V)?CVTSD2SSrr")>;
|
|
1313
|
|
1314 // x,m64.
|
|
1315 def : InstRW<[ZnWriteCVTPD2PSLd], (instregex "(Int_)?(V)?CVTSD2SSrm")>;
|
|
1316
|
|
1317 // CVTPS2PD.
|
|
1318 // x,x.
|
|
1319 def ZnWriteCVTPS2PDr : SchedWriteRes<[ZnFPU3]> {
|
|
1320 let Latency = 3;
|
|
1321 }
|
|
1322 def : InstRW<[ZnWriteCVTPS2PDr], (instregex "(V?)CVTPS2PDrr")>;
|
|
1323
|
|
1324 // x,m64.
|
|
1325 // y,m128.
|
|
1326 def ZnWriteCVTPS2PDLd : SchedWriteRes<[ZnAGU, ZnFPU3]> {
|
|
1327 let Latency = 10;
|
|
1328 let NumMicroOps = 2;
|
|
1329 }
|
|
1330 def : InstRW<[ZnWriteCVTPS2PDLd], (instregex "(V?)CVTPS2PD(Y?)rm")>;
|
|
1331
|
|
1332 // y,x.
|
|
1333 def ZnWriteVCVTPS2PDY : SchedWriteRes<[ZnFPU3]> {
|
|
1334 let Latency = 3;
|
|
1335 }
|
|
1336 def : InstRW<[ZnWriteVCVTPS2PDY], (instregex "VCVTPS2PDYrr")>;
|
|
1337
|
|
1338 // CVTSS2SD.
|
|
1339 // x,x.
|
|
1340 def ZnWriteCVTSS2SDr : SchedWriteRes<[ZnFPU3]> {
|
|
1341 let Latency = 4;
|
|
1342 }
|
|
1343 def : InstRW<[ZnWriteCVTSS2SDr], (instregex "(Int_)?(V?)CVTSS2SDrr")>;
|
|
1344
|
|
1345 // x,m32.
|
|
1346 def ZnWriteCVTSS2SDLd : SchedWriteRes<[ZnAGU, ZnFPU3]> {
|
|
1347 let Latency = 11;
|
|
1348 let NumMicroOps = 2;
|
|
1349 let ResourceCycles = [1, 2];
|
|
1350 }
|
|
1351 def : InstRW<[ZnWriteCVTSS2SDLd], (instregex "(Int_)?(V?)CVTSS2SDrm")>;
|
|
1352
|
|
1353 def ZnWriteCVTDQ2PDr: SchedWriteRes<[ZnFPU12,ZnFPU3]> {
|
|
1354 let Latency = 5;
|
|
1355 }
|
|
1356 // CVTDQ2PD.
|
|
1357 // x,x.
|
|
1358 def : InstRW<[ZnWriteCVTDQ2PDr], (instregex "(V)?CVTDQ2PDrr")>;
|
|
1359
|
|
1360 // Same as xmm
|
|
1361 // y,x.
|
|
1362 def : InstRW<[ZnWriteCVTDQ2PDr], (instregex "VCVTDQ2PDYrr")>;
|
|
1363
|
|
1364 def ZnWriteCVTPD2DQr: SchedWriteRes<[ZnFPU12, ZnFPU3]> {
|
|
1365 let Latency = 5;
|
|
1366 }
|
|
1367 // CVT(T)PD2DQ.
|
|
1368 // x,x.
|
|
1369 def : InstRW<[ZnWriteCVTDQ2PDr], (instregex "(V?)CVT(T?)PD2DQrr")>;
|
|
1370
|
|
1371 def ZnWriteCVTPD2DQLd: SchedWriteRes<[ZnAGU,ZnFPU12,ZnFPU3]> {
|
|
1372 let Latency = 12;
|
|
1373 let NumMicroOps = 2;
|
|
1374 }
|
|
1375 // x,m128.
|
|
1376 def : InstRW<[ZnWriteCVTPD2DQLd], (instregex "(V?)CVT(T?)PD2DQrm")>;
|
|
1377 // same as xmm handling
|
|
1378 // x,y.
|
|
1379 def : InstRW<[ZnWriteCVTPD2DQr], (instregex "VCVT(T?)PD2DQYrr")>;
|
|
1380 // x,m256.
|
|
1381 def : InstRW<[ZnWriteCVTPD2DQLd], (instregex "VCVT(T?)PD2DQYrm")>;
|
|
1382 def : InstRW<[ZnWriteCVTPD2DQLd], (instregex "VCVT(T?)PD2DQ(64)?rm")>;
|
|
1383
|
|
1384 def ZnWriteCVTPS2PIr: SchedWriteRes<[ZnFPU3]> {
|
|
1385 let Latency = 4;
|
|
1386 }
|
|
1387 // CVT(T)PS2PI.
|
|
1388 // mm,x.
|
|
1389 def : InstRW<[ZnWriteCVTPS2PIr], (instregex "MMX_CVT(T?)PS2PIirr")>;
|
|
1390
|
|
1391 // CVTPI2PD.
|
|
1392 // x,mm.
|
|
1393 def : InstRW<[ZnWriteCVTPS2PDr], (instregex "MMX_CVT(T?)PI2PDirr")>;
|
|
1394
|
|
1395 // CVT(T)PD2PI.
|
|
1396 // mm,x.
|
|
1397 def : InstRW<[ZnWriteCVTPS2PIr], (instregex "MMX_CVT(T?)PD2PIirr")>;
|
|
1398
|
|
1399 def ZnWriteCVSTSI2SSr: SchedWriteRes<[ZnFPU3]> {
|
|
1400 let Latency = 5;
|
|
1401 }
|
|
1402 // CVSTSI2SS.
|
|
1403 // x,r32.
|
|
1404 def : InstRW<[ZnWriteCVSTSI2SSr], (instregex "(Int_)?(V?)CVT(T?)SI2SS(64)?rr")>;
|
|
1405
|
|
1406 // same as CVTPD2DQr
|
|
1407 // CVT(T)SS2SI.
|
|
1408 // r32,x.
|
|
1409 def : InstRW<[ZnWriteCVTPD2DQr], (instregex "(Int_)?(V?)CVT(T?)SS2SI(64)?rr")>;
|
|
1410 // same as CVTPD2DQm
|
|
1411 // r32,m32.
|
|
1412 def : InstRW<[ZnWriteCVTPD2DQLd], (instregex "(Int_)?(V?)CVT(T?)SS2SI(64)?rm")>;
|
|
1413
|
|
1414 def ZnWriteCVSTSI2SDr: SchedWriteRes<[ZnFPU013, ZnFPU3]> {
|
|
1415 let Latency = 5;
|
|
1416 }
|
|
1417 // CVTSI2SD.
|
|
1418 // x,r32/64.
|
|
1419 def : InstRW<[ZnWriteCVSTSI2SDr], (instregex "(Int_)?(V?)CVTSI2SS(64)?rr")>;
|
|
1420
|
|
1421
|
|
1422 def ZnWriteCVSTSI2SIr: SchedWriteRes<[ZnFPU3, ZnFPU2]> {
|
|
1423 let Latency = 5;
|
|
1424 }
|
|
1425 def ZnWriteCVSTSI2SILd: SchedWriteRes<[ZnAGU, ZnFPU3, ZnFPU2]> {
|
|
1426 let Latency = 12;
|
|
1427 }
|
|
1428 // CVTSD2SI.
|
|
1429 // r32/64
|
|
1430 def : InstRW<[ZnWriteCVSTSI2SIr], (instregex "(Int_)?CVT(T?)SD2SI(64)?rr")>;
|
|
1431 // r32,m32.
|
|
1432 def : InstRW<[ZnWriteCVSTSI2SILd], (instregex "(Int_)?CVT(T?)SD2SI(64)?rm")>;
|
|
1433
|
|
1434
|
|
1435 def ZnWriteVCVSTSI2SIr: SchedWriteRes<[ZnFPU3]> {
|
|
1436 let Latency = 5;
|
|
1437 }
|
|
1438 def ZnWriteVCVSTSI2SILd: SchedWriteRes<[ZnFPU3, ZnAGU]> {
|
|
1439 let Latency = 12;
|
|
1440 }
|
|
1441 // VCVTSD2SI.
|
|
1442 // r32/64
|
|
1443 def : InstRW<[ZnWriteCVSTSI2SIr], (instregex "(Int_)?VCVT(T?)SD2SI(64)?rr")>;
|
|
1444 // r32,m32.
|
|
1445 def : InstRW<[ZnWriteCVSTSI2SILd], (instregex "(Int_)?VCVT(T?)SD2SI(64)?rm")>;
|
|
1446
|
|
1447 // VCVTPS2PH.
|
|
1448 // x,v,i.
|
|
1449 def : InstRW<[WriteMicrocoded], (instregex "VCVTPS2PH(Y?)rr")>;
|
|
1450 // m,v,i.
|
|
1451 def : InstRW<[WriteMicrocoded], (instregex "VCVTPS2PH(Y?)mr")>;
|
|
1452
|
|
1453 // VCVTPH2PS.
|
|
1454 // v,x.
|
|
1455 def : InstRW<[WriteMicrocoded], (instregex "VCVTPH2PS(Y?)rr")>;
|
|
1456 // v,m.
|
|
1457 def : InstRW<[WriteMicrocoded], (instregex "VCVTPH2PS(Y?)rm")>;
|
|
1458
|
|
1459 //-- SSE4A instructions --//
|
|
1460 // EXTRQ
|
|
1461 def ZnWriteEXTRQ: SchedWriteRes<[ZnFPU12, ZnFPU2]> {
|
|
1462 let Latency = 2;
|
|
1463 }
|
|
1464 def : InstRW<[ZnWriteEXTRQ], (instregex "EXTRQ")>;
|
|
1465
|
|
1466 // INSERTQ
|
|
1467 def ZnWriteINSERTQ: SchedWriteRes<[ZnFPU03,ZnFPU1]> {
|
|
1468 let Latency = 4;
|
|
1469 }
|
|
1470 def : InstRW<[ZnWriteINSERTQ], (instregex "INSERTQ")>;
|
|
1471
|
|
1472 // MOVNTSS/MOVNTSD
|
|
1473 def ZnWriteMOVNT: SchedWriteRes<[ZnAGU,ZnFPU2]> {
|
|
1474 let Latency = 8;
|
|
1475 }
|
|
1476 def : InstRW<[ZnWriteMOVNT], (instregex "MOVNTS(S|D)")>;
|
|
1477
|
|
1478 //-- SHA instructions --//
|
|
1479 // SHA256MSG2
|
|
1480 def : InstRW<[WriteMicrocoded], (instregex "SHA256MSG2(Y?)r(r|m)")>;
|
|
1481
|
|
1482 // SHA1MSG1, SHA256MSG1
|
|
1483 // x,x.
|
|
1484 def ZnWriteSHA1MSG1r : SchedWriteRes<[ZnFPU12]> {
|
|
1485 let Latency = 2;
|
|
1486 let ResourceCycles = [2];
|
|
1487 }
|
|
1488 def : InstRW<[ZnWriteSHA1MSG1r], (instregex "SHA(1|256)MSG1rr")>;
|
|
1489 // x,m.
|
|
1490 def ZnWriteSHA1MSG1Ld : SchedWriteRes<[ZnAGU, ZnFPU12]> {
|
|
1491 let Latency = 9;
|
|
1492 let ResourceCycles = [1,2];
|
|
1493 }
|
|
1494 def : InstRW<[ZnWriteSHA1MSG1Ld], (instregex "SHA(1|256)MSG1rm")>;
|
|
1495
|
|
1496 // SHA1MSG2
|
|
1497 // x,x.
|
|
1498 def ZnWriteSHA1MSG2r : SchedWriteRes<[ZnFPU12]> ;
|
|
1499 def : InstRW<[ZnWriteSHA1MSG2r], (instregex "SHA1MSG2rr")>;
|
|
1500 // x,m.
|
|
1501 def ZnWriteSHA1MSG2Ld : SchedWriteRes<[ZnAGU, ZnFPU12]> {
|
|
1502 let Latency = 8;
|
|
1503 }
|
|
1504 def : InstRW<[ZnWriteSHA1MSG2Ld], (instregex "SHA1MSG2rm")>;
|
|
1505
|
|
1506 // SHA1NEXTE
|
|
1507 // x,x.
|
|
1508 def ZnWriteSHA1NEXTEr : SchedWriteRes<[ZnFPU1]> ;
|
|
1509 def : InstRW<[ZnWriteSHA1NEXTEr], (instregex "SHA1NEXTErr")>;
|
|
1510 // x,m.
|
|
1511 def ZnWriteSHA1NEXTELd : SchedWriteRes<[ZnAGU, ZnFPU1]> {
|
|
1512 let Latency = 8;
|
|
1513 }
|
|
1514 def : InstRW<[ZnWriteSHA1NEXTELd], (instregex "SHA1NEXTErm")>;
|
|
1515
|
|
1516 // SHA1RNDS4
|
|
1517 // x,x.
|
|
1518 def ZnWriteSHA1RNDS4r : SchedWriteRes<[ZnFPU1]> {
|
|
1519 let Latency = 6;
|
|
1520 }
|
|
1521 def : InstRW<[ZnWriteSHA1RNDS4r], (instregex "SHA1RNDS4rr")>;
|
|
1522 // x,m.
|
|
1523 def ZnWriteSHA1RNDS4Ld : SchedWriteRes<[ZnAGU, ZnFPU1]> {
|
|
1524 let Latency = 13;
|
|
1525 }
|
|
1526 def : InstRW<[ZnWriteSHA1RNDS4Ld], (instregex "SHA1RNDS4rm")>;
|
|
1527
|
|
1528 // SHA256RNDS2
|
|
1529 // x,x.
|
|
1530 def ZnWriteSHA256RNDS2r : SchedWriteRes<[ZnFPU1]> {
|
|
1531 let Latency = 4;
|
|
1532 }
|
|
1533 def : InstRW<[ZnWriteSHA256RNDS2r], (instregex "SHA256RNDS2rr")>;
|
|
1534 // x,m.
|
|
1535 def ZnWriteSHA256RNDS2Ld : SchedWriteRes<[ZnAGU, ZnFPU1]> {
|
|
1536 let Latency = 11;
|
|
1537 }
|
|
1538 def : InstRW<[ZnWriteSHA256RNDS2Ld], (instregex "SHA256RNDS2rm")>;
|
|
1539
|
|
1540 //-- Arithmetic instructions --//
|
|
1541
|
|
1542 // HADD, HSUB PS/PD
|
|
1543 def : InstRW<[WriteMicrocoded], (instregex "(V?)H(ADD|SUB)P(S|D)(Y?)r(r|m)")>;
|
|
1544
|
|
1545 // MULL SS/SD PS/PD.
|
|
1546 // x,x / v,v,v.
|
|
1547 def ZnWriteMULr : SchedWriteRes<[ZnFPU01]> {
|
|
1548 let Latency = 3;
|
|
1549 }
|
|
1550 // ymm.
|
|
1551 def ZnWriteMULYr : SchedWriteRes<[ZnFPU01]> {
|
|
1552 let Latency = 4;
|
|
1553 }
|
|
1554 def : InstRW<[ZnWriteMULr], (instregex "(V?)MUL(P|S)(S|D)rr")>;
|
|
1555 def : InstRW<[ZnWriteMULYr], (instregex "(V?)MUL(P|S)(S|D)Yrr")>;
|
|
1556
|
|
1557 // x,m / v,v,m.
|
|
1558 def ZnWriteMULLd : SchedWriteRes<[ZnAGU, ZnFPU01]> {
|
|
1559 let Latency = 10;
|
|
1560 let NumMicroOps = 2;
|
|
1561 }
|
|
1562 def : InstRW<[ZnWriteMULLd], (instregex "(V?)MUL(P|S)(S|D)rm")>;
|
|
1563
|
|
1564 // ymm
|
|
1565 def ZnWriteMULYLd : SchedWriteRes<[ZnAGU, ZnFPU01]> {
|
|
1566 let Latency = 11;
|
|
1567 let NumMicroOps = 2;
|
|
1568 }
|
|
1569 def : InstRW<[ZnWriteMULYLd], (instregex "(V?)MUL(P|S)(S|D)Yrm")>;
|
|
1570
|
|
1571 // VDIVPS.
|
|
1572 // y,y,y.
|
|
1573 def ZnWriteVDIVPSYr : SchedWriteRes<[ZnFPU3]> {
|
|
1574 let Latency = 12;
|
|
1575 let ResourceCycles = [12];
|
|
1576 }
|
|
1577 def : InstRW<[ZnWriteVDIVPSYr], (instregex "VDIVPSYrr")>;
|
|
1578
|
|
1579 // y,y,m256.
|
|
1580 def ZnWriteVDIVPSYLd : SchedWriteRes<[ZnAGU, ZnFPU3]> {
|
|
1581 let Latency = 19;
|
|
1582 let NumMicroOps = 2;
|
|
1583 let ResourceCycles = [1, 19];
|
|
1584 }
|
|
1585 def : InstRW<[ZnWriteVDIVPSYLd], (instregex "VDIVPSYrm")>;
|
|
1586
|
|
1587 // VDIVPD.
|
|
1588 // y,y,y.
|
|
1589 def ZnWriteVDIVPDY : SchedWriteRes<[ZnFPU3]> {
|
|
1590 let Latency = 15;
|
|
1591 let ResourceCycles = [15];
|
|
1592 }
|
|
1593 def : InstRW<[ZnWriteVDIVPDY], (instregex "VDIVPDYrr")>;
|
|
1594
|
|
1595 // y,y,m256.
|
|
1596 def ZnWriteVDIVPDYLd : SchedWriteRes<[ZnAGU, ZnFPU3]> {
|
|
1597 let Latency = 22;
|
|
1598 let NumMicroOps = 2;
|
|
1599 let ResourceCycles = [1,22];
|
|
1600 }
|
|
1601 def : InstRW<[ZnWriteVDIVPDYLd], (instregex "VDIVPDYrm")>;
|
|
1602
|
|
1603 // VRCPPS.
|
|
1604 // y,y.
|
|
1605 def ZnWriteVRCPPSr : SchedWriteRes<[ZnFPU01]> {
|
|
1606 let Latency = 5;
|
|
1607 }
|
|
1608 def : InstRW<[ZnWriteVRCPPSr], (instregex "VRCPPSYr(_Int)?")>;
|
|
1609
|
|
1610 // y,m256.
|
|
1611 def ZnWriteVRCPPSLd : SchedWriteRes<[ZnAGU, ZnFPU01]> {
|
|
1612 let Latency = 12;
|
|
1613 let NumMicroOps = 3;
|
|
1614 }
|
|
1615 def : InstRW<[ZnWriteVRCPPSLd], (instregex "VRCPPSYm(_Int)?")>;
|
|
1616
|
|
1617 // ROUND SS/SD PS/PD.
|
|
1618 // v,v,i.
|
|
1619 def ZnWriteROUNDr : SchedWriteRes<[ZnFPU3]> {
|
|
1620 let Latency = 4;
|
|
1621 }
|
|
1622 def : InstRW<[ZnWriteROUNDr], (instregex "(V?)ROUND(Y?)(S|P)(S|D)r(_Int)?")>;
|
|
1623
|
|
1624 // VFMADD.
|
|
1625 // v,v,v.
|
|
1626 def ZnWriteFMADDr : SchedWriteRes<[ZnFPU03]> {
|
|
1627 let Latency = 5;
|
|
1628 }
|
|
1629 def : InstRW<[ZnWriteFMADDr],
|
|
1630 (instregex
|
|
1631 "VF(N?)M(ADD|SUB|ADDSUB|SUBADD)P(S|D)(r213|r132|r231)r(Y)?",
|
|
1632 "VF(N?)M(ADD|SUB)S(S|D)(r132|r231|r213)r",
|
|
1633 "VF(N?)M(ADD|SUB)S(S|D)4rr(_REV|_Int)?",
|
|
1634 "VF(N?)M(ADD|SUB)P(S|D)4rr(Y)?(_REV)?")>;
|
|
1635
|
|
1636 // v,v,m.
|
|
1637 def ZnWriteFMADDm : SchedWriteRes<[ZnAGU, ZnFPU03]> {
|
|
1638 let Latency = 12;
|
|
1639 let NumMicroOps = 2;
|
|
1640 }
|
|
1641 def : InstRW<[ZnWriteFMADDm],
|
|
1642 (instregex
|
|
1643 "VF(N?)M(ADD|SUB|ADDSUB|SUBADD)P(S|D)(r213|r132|r231)m(Y)?",
|
|
1644 "VF(N?)M(ADD|SUB)S(S|D)(r132|r231|r213)m",
|
|
1645 "VF(N?)M(ADD|SUB)S(S|D)4(rm|mr)(_Int)?",
|
|
1646 "VF(N?)M(ADD|SUB)P(S|D)4(rm|mr)(Y)?")>;
|
|
1647
|
|
1648 // v,m,i.
|
|
1649 def ZnWriteROUNDm : SchedWriteRes<[ZnAGU, ZnFPU3]> {
|
|
1650 let Latency = 11;
|
|
1651 let NumMicroOps = 2;
|
|
1652 }
|
|
1653 def : InstRW<[ZnWriteROUNDm], (instregex "(V?)ROUND(Y?)(S|P)(S|D)m(_Int)?")>;
|
|
1654
|
|
1655 // DPPS.
|
|
1656 // x,x,i / v,v,v,i.
|
|
1657 def : InstRW<[WriteMicrocoded], (instregex "(V?)DPPS(Y?)rri")>;
|
|
1658
|
|
1659 // x,m,i / v,v,m,i.
|
|
1660 def : InstRW<[WriteMicrocoded], (instregex "(V?)DPPS(Y?)rmi")>;
|
|
1661
|
|
1662 // DPPD.
|
|
1663 // x,x,i.
|
|
1664 def : InstRW<[WriteMicrocoded], (instregex "(V?)DPPDrri")>;
|
|
1665
|
|
1666 // x,m,i.
|
|
1667 def : InstRW<[WriteMicrocoded], (instregex "(V?)DPPDrmi")>;
|
|
1668
|
|
1669 // VSQRTPS.
|
|
1670 // y,y.
|
|
1671 def ZnWriteVSQRTPSYr : SchedWriteRes<[ZnFPU3]> {
|
|
1672 let Latency = 28;
|
|
1673 let ResourceCycles = [28];
|
|
1674 }
|
|
1675 def : InstRW<[ZnWriteVSQRTPSYr], (instregex "VSQRTPSYr")>;
|
|
1676
|
|
1677 // y,m256.
|
|
1678 def ZnWriteVSQRTPSYLd : SchedWriteRes<[ZnAGU, ZnFPU3]> {
|
|
1679 let Latency = 35;
|
|
1680 let ResourceCycles = [1,35];
|
|
1681 let NumMicroOps = 2;
|
|
1682 }
|
|
1683 def : InstRW<[ZnWriteVSQRTPSYLd], (instregex "VSQRTPSYm")>;
|
|
1684
|
|
1685 // VSQRTPD.
|
|
1686 // y,y.
|
|
1687 def ZnWriteVSQRTPDYr : SchedWriteRes<[ZnFPU3]> {
|
|
1688 let Latency = 40;
|
|
1689 let ResourceCycles = [40];
|
|
1690 }
|
|
1691 def : InstRW<[ZnWriteVSQRTPDYr], (instregex "VSQRTPDYr")>;
|
|
1692
|
|
1693 // y,m256.
|
|
1694 def ZnWriteVSQRTPDYLd : SchedWriteRes<[ZnAGU, ZnFPU3]> {
|
|
1695 let Latency = 47;
|
|
1696 let NumMicroOps = 2;
|
|
1697 let ResourceCycles = [1,47];
|
|
1698 }
|
|
1699 def : InstRW<[ZnWriteVSQRTPDYLd], (instregex "VSQRTPDYm")>;
|
|
1700
|
|
1701 // RSQRTSS
|
|
1702 // x,x.
|
|
1703 def ZnWriteRSQRTSSr : SchedWriteRes<[ZnFPU02]> {
|
|
1704 let Latency = 5;
|
|
1705 }
|
|
1706 def : InstRW<[ZnWriteRSQRTSSr], (instregex "(V?)RSQRTSS(Y?)r(_Int)?")>;
|
|
1707
|
|
1708 // RSQRTPS
|
|
1709 // x,x.
|
|
1710 def ZnWriteRSQRTPSr : SchedWriteRes<[ZnFPU01]> {
|
|
1711 let Latency = 5;
|
|
1712 }
|
|
1713 def : InstRW<[ZnWriteRSQRTPSr], (instregex "(V?)RSQRTPS(Y?)r(_Int)?")>;
|
|
1714
|
|
1715 // RSQRTSSm
|
|
1716 // x,m128.
|
|
1717 def ZnWriteRSQRTSSLd: SchedWriteRes<[ZnAGU, ZnFPU02]> {
|
|
1718 let Latency = 12;
|
|
1719 let NumMicroOps = 2;
|
|
1720 let ResourceCycles = [1,2];
|
|
1721 }
|
|
1722 def : InstRW<[ZnWriteRSQRTSSLd], (instregex "(V?)RSQRTSSm(_Int)?")>;
|
|
1723
|
|
1724 // RSQRTPSm
|
|
1725 def ZnWriteRSQRTPSLd : SchedWriteRes<[ZnAGU, ZnFPU01]> {
|
|
1726 let Latency = 12;
|
|
1727 let NumMicroOps = 2;
|
|
1728 }
|
|
1729 def : InstRW<[ZnWriteRSQRTPSLd], (instregex "(V?)RSQRTPSm(_Int)?")>;
|
|
1730
|
|
1731 // RSQRTPS 256.
|
|
1732 // y,y.
|
|
1733 def ZnWriteRSQRTPSYr : SchedWriteRes<[ZnFPU01]> {
|
|
1734 let Latency = 5;
|
|
1735 let NumMicroOps = 2;
|
|
1736 let ResourceCycles = [2];
|
|
1737 }
|
|
1738 def : InstRW<[ZnWriteRSQRTPSYr], (instregex "VRSQRTPSYr(_Int)?")>;
|
|
1739
|
|
1740 // y,m256.
|
|
1741 def ZnWriteRSQRTPSYLd : SchedWriteRes<[ZnAGU, ZnFPU01]> {
|
|
1742 let Latency = 12;
|
|
1743 let NumMicroOps = 2;
|
|
1744 }
|
|
1745 def : InstRW<[ZnWriteRSQRTPSYLd], (instregex "VRSQRTPSYm(_Int)?")>;
|
|
1746
|
|
1747 //-- Logic instructions --//
|
|
1748
|
|
1749 // AND, ANDN, OR, XOR PS/PD.
|
|
1750 // x,x / v,v,v.
|
|
1751 def : InstRW<[WriteVecLogic], (instregex "(V?)(AND|ANDN|OR|XOR)P(S|D)(Y?)rr")>;
|
|
1752 // x,m / v,v,m.
|
|
1753 def : InstRW<[WriteVecLogicLd],
|
|
1754 (instregex "(V?)(AND|ANDN|OR|XOR)P(S|D)(Y?)rm")>;
|
|
1755
|
|
1756 //-- Other instructions --//
|
|
1757
|
|
1758 // VZEROUPPER.
|
|
1759 def : InstRW<[WriteMicrocoded], (instregex "VZEROUPPER")>;
|
|
1760
|
|
1761 // VZEROALL.
|
|
1762 def : InstRW<[WriteMicrocoded], (instregex "VZEROALL")>;
|
|
1763
|
|
1764 // LDMXCSR.
|
|
1765 def : InstRW<[WriteMicrocoded], (instregex "(V)?LDMXCSR")>;
|
|
1766
|
|
1767 // STMXCSR.
|
|
1768 def : InstRW<[WriteMicrocoded], (instregex "(V)?STMXCSR")>;
|
|
1769
|
|
1770 } // SchedModel
|