Mercurial > hg > CbC > CbC_llvm
comparison lib/Target/NVPTX/NVPTXInstrInfo.td @ 0:95c75e76d11b LLVM3.4
LLVM 3.4
author | Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp> |
---|---|
date | Thu, 12 Dec 2013 13:56:28 +0900 |
parents | |
children | 54457678186b |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:95c75e76d11b |
---|---|
1 //===- NVPTXInstrInfo.td - NVPTX Instruction defs -------------*- tblgen-*-===// | |
2 // | |
3 // The LLVM Compiler Infrastructure | |
4 // | |
5 // This file is distributed under the University of Illinois Open Source | |
6 // License. See LICENSE.TXT for details. | |
7 // | |
8 //===----------------------------------------------------------------------===// | |
9 // | |
10 // This file describes the PTX instructions in TableGen format. | |
11 // | |
12 //===----------------------------------------------------------------------===// | |
13 | |
14 include "NVPTXInstrFormats.td" | |
15 | |
16 // A NOP instruction | |
17 def NOP : NVPTXInst<(outs), (ins), "", []>; | |
18 | |
19 // List of vector specific properties | |
20 def isVecLD : VecInstTypeEnum<1>; | |
21 def isVecST : VecInstTypeEnum<2>; | |
22 def isVecBuild : VecInstTypeEnum<3>; | |
23 def isVecShuffle : VecInstTypeEnum<4>; | |
24 def isVecExtract : VecInstTypeEnum<5>; | |
25 def isVecInsert : VecInstTypeEnum<6>; | |
26 def isVecDest : VecInstTypeEnum<7>; | |
27 def isVecOther : VecInstTypeEnum<15>; | |
28 | |
29 //===----------------------------------------------------------------------===// | |
30 // NVPTX Operand Definitions. | |
31 //===----------------------------------------------------------------------===// | |
32 | |
33 def brtarget : Operand<OtherVT>; | |
34 | |
35 // CVT conversion modes | |
36 // These must match the enum in NVPTX.h | |
37 def CvtNONE : PatLeaf<(i32 0x0)>; | |
38 def CvtRNI : PatLeaf<(i32 0x1)>; | |
39 def CvtRZI : PatLeaf<(i32 0x2)>; | |
40 def CvtRMI : PatLeaf<(i32 0x3)>; | |
41 def CvtRPI : PatLeaf<(i32 0x4)>; | |
42 def CvtRN : PatLeaf<(i32 0x5)>; | |
43 def CvtRZ : PatLeaf<(i32 0x6)>; | |
44 def CvtRM : PatLeaf<(i32 0x7)>; | |
45 def CvtRP : PatLeaf<(i32 0x8)>; | |
46 | |
47 def CvtNONE_FTZ : PatLeaf<(i32 0x10)>; | |
48 def CvtRNI_FTZ : PatLeaf<(i32 0x11)>; | |
49 def CvtRZI_FTZ : PatLeaf<(i32 0x12)>; | |
50 def CvtRMI_FTZ : PatLeaf<(i32 0x13)>; | |
51 def CvtRPI_FTZ : PatLeaf<(i32 0x14)>; | |
52 def CvtRN_FTZ : PatLeaf<(i32 0x15)>; | |
53 def CvtRZ_FTZ : PatLeaf<(i32 0x16)>; | |
54 def CvtRM_FTZ : PatLeaf<(i32 0x17)>; | |
55 def CvtRP_FTZ : PatLeaf<(i32 0x18)>; | |
56 | |
57 def CvtSAT : PatLeaf<(i32 0x20)>; | |
58 def CvtSAT_FTZ : PatLeaf<(i32 0x30)>; | |
59 | |
60 def CvtMode : Operand<i32> { | |
61 let PrintMethod = "printCvtMode"; | |
62 } | |
63 | |
64 // Compare modes | |
65 // These must match the enum in NVPTX.h | |
66 def CmpEQ : PatLeaf<(i32 0)>; | |
67 def CmpNE : PatLeaf<(i32 1)>; | |
68 def CmpLT : PatLeaf<(i32 2)>; | |
69 def CmpLE : PatLeaf<(i32 3)>; | |
70 def CmpGT : PatLeaf<(i32 4)>; | |
71 def CmpGE : PatLeaf<(i32 5)>; | |
72 def CmpLO : PatLeaf<(i32 6)>; | |
73 def CmpLS : PatLeaf<(i32 7)>; | |
74 def CmpHI : PatLeaf<(i32 8)>; | |
75 def CmpHS : PatLeaf<(i32 9)>; | |
76 def CmpEQU : PatLeaf<(i32 10)>; | |
77 def CmpNEU : PatLeaf<(i32 11)>; | |
78 def CmpLTU : PatLeaf<(i32 12)>; | |
79 def CmpLEU : PatLeaf<(i32 13)>; | |
80 def CmpGTU : PatLeaf<(i32 14)>; | |
81 def CmpGEU : PatLeaf<(i32 15)>; | |
82 def CmpNUM : PatLeaf<(i32 16)>; | |
83 def CmpNAN : PatLeaf<(i32 17)>; | |
84 | |
85 def CmpEQ_FTZ : PatLeaf<(i32 0x100)>; | |
86 def CmpNE_FTZ : PatLeaf<(i32 0x101)>; | |
87 def CmpLT_FTZ : PatLeaf<(i32 0x102)>; | |
88 def CmpLE_FTZ : PatLeaf<(i32 0x103)>; | |
89 def CmpGT_FTZ : PatLeaf<(i32 0x104)>; | |
90 def CmpGE_FTZ : PatLeaf<(i32 0x105)>; | |
91 def CmpLO_FTZ : PatLeaf<(i32 0x106)>; | |
92 def CmpLS_FTZ : PatLeaf<(i32 0x107)>; | |
93 def CmpHI_FTZ : PatLeaf<(i32 0x108)>; | |
94 def CmpHS_FTZ : PatLeaf<(i32 0x109)>; | |
95 def CmpEQU_FTZ : PatLeaf<(i32 0x10A)>; | |
96 def CmpNEU_FTZ : PatLeaf<(i32 0x10B)>; | |
97 def CmpLTU_FTZ : PatLeaf<(i32 0x10C)>; | |
98 def CmpLEU_FTZ : PatLeaf<(i32 0x10D)>; | |
99 def CmpGTU_FTZ : PatLeaf<(i32 0x10E)>; | |
100 def CmpGEU_FTZ : PatLeaf<(i32 0x10F)>; | |
101 def CmpNUM_FTZ : PatLeaf<(i32 0x110)>; | |
102 def CmpNAN_FTZ : PatLeaf<(i32 0x111)>; | |
103 | |
104 def CmpMode : Operand<i32> { | |
105 let PrintMethod = "printCmpMode"; | |
106 } | |
107 | |
108 def F32ConstZero : Operand<f32>, PatLeaf<(f32 fpimm)>, SDNodeXForm<fpimm, [{ | |
109 return CurDAG->getTargetConstantFP(0.0, MVT::f32); | |
110 }]>; | |
111 def F32ConstOne : Operand<f32>, PatLeaf<(f32 fpimm)>, SDNodeXForm<fpimm, [{ | |
112 return CurDAG->getTargetConstantFP(1.0, MVT::f32); | |
113 }]>; | |
114 | |
115 //===----------------------------------------------------------------------===// | |
116 // NVPTX Instruction Predicate Definitions | |
117 //===----------------------------------------------------------------------===// | |
118 | |
119 | |
120 def hasAtomRedG32 : Predicate<"Subtarget.hasAtomRedG32()">; | |
121 def hasAtomRedS32 : Predicate<"Subtarget.hasAtomRedS32()">; | |
122 def hasAtomRedGen32 : Predicate<"Subtarget.hasAtomRedGen32()">; | |
123 def useAtomRedG32forGen32 : | |
124 Predicate<"!Subtarget.hasAtomRedGen32() && Subtarget.hasAtomRedG32()">; | |
125 def hasBrkPt : Predicate<"Subtarget.hasBrkPt()">; | |
126 def hasAtomRedG64 : Predicate<"Subtarget.hasAtomRedG64()">; | |
127 def hasAtomRedS64 : Predicate<"Subtarget.hasAtomRedS64()">; | |
128 def hasAtomRedGen64 : Predicate<"Subtarget.hasAtomRedGen64()">; | |
129 def useAtomRedG64forGen64 : | |
130 Predicate<"!Subtarget.hasAtomRedGen64() && Subtarget.hasAtomRedG64()">; | |
131 def hasAtomAddF32 : Predicate<"Subtarget.hasAtomAddF32()">; | |
132 def hasVote : Predicate<"Subtarget.hasVote()">; | |
133 def hasDouble : Predicate<"Subtarget.hasDouble()">; | |
134 def reqPTX20 : Predicate<"Subtarget.reqPTX20()">; | |
135 def hasLDG : Predicate<"Subtarget.hasLDG()">; | |
136 def hasLDU : Predicate<"Subtarget.hasLDU()">; | |
137 def hasGenericLdSt : Predicate<"Subtarget.hasGenericLdSt()">; | |
138 | |
139 def doF32FTZ : Predicate<"useF32FTZ()">; | |
140 def doNoF32FTZ : Predicate<"!useF32FTZ()">; | |
141 | |
142 def doFMAF32 : Predicate<"doFMAF32">; | |
143 def doFMAF32_ftz : Predicate<"(doFMAF32 && useF32FTZ())">; | |
144 def doFMAF32AGG : Predicate<"doFMAF32AGG">; | |
145 def doFMAF32AGG_ftz : Predicate<"(doFMAF32AGG && useF32FTZ())">; | |
146 def doFMAF64 : Predicate<"doFMAF64">; | |
147 def doFMAF64AGG : Predicate<"doFMAF64AGG">; | |
148 | |
149 def doMulWide : Predicate<"doMulWide">; | |
150 | |
151 def allowFMA : Predicate<"allowFMA">; | |
152 def allowFMA_ftz : Predicate<"(allowFMA && useF32FTZ())">; | |
153 | |
154 def do_DIVF32_APPROX : Predicate<"getDivF32Level()==0">; | |
155 def do_DIVF32_FULL : Predicate<"getDivF32Level()==1">; | |
156 | |
157 def do_SQRTF32_APPROX : Predicate<"!usePrecSqrtF32()">; | |
158 def do_SQRTF32_RN : Predicate<"usePrecSqrtF32()">; | |
159 | |
160 def hasHWROT32 : Predicate<"Subtarget.hasHWROT32()">; | |
161 | |
162 def true : Predicate<"1">; | |
163 | |
164 | |
165 //===----------------------------------------------------------------------===// | |
166 // Some Common Instruction Class Templates | |
167 //===----------------------------------------------------------------------===// | |
168 | |
169 multiclass I3<string OpcStr, SDNode OpNode> { | |
170 def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b), | |
171 !strconcat(OpcStr, "64 \t$dst, $a, $b;"), | |
172 [(set Int64Regs:$dst, (OpNode Int64Regs:$a, | |
173 Int64Regs:$b))]>; | |
174 def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b), | |
175 !strconcat(OpcStr, "64 \t$dst, $a, $b;"), | |
176 [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>; | |
177 def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), | |
178 !strconcat(OpcStr, "32 \t$dst, $a, $b;"), | |
179 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, | |
180 Int32Regs:$b))]>; | |
181 def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), | |
182 !strconcat(OpcStr, "32 \t$dst, $a, $b;"), | |
183 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>; | |
184 def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), | |
185 !strconcat(OpcStr, "16 \t$dst, $a, $b;"), | |
186 [(set Int16Regs:$dst, (OpNode Int16Regs:$a, | |
187 Int16Regs:$b))]>; | |
188 def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b), | |
189 !strconcat(OpcStr, "16 \t$dst, $a, $b;"), | |
190 [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>; | |
191 } | |
192 | |
193 multiclass ADD_SUB_INT_32<string OpcStr, SDNode OpNode> { | |
194 def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, | |
195 Int32Regs:$b), | |
196 !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"), | |
197 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, | |
198 Int32Regs:$b))]>; | |
199 def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), | |
200 !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"), | |
201 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>; | |
202 } | |
203 | |
204 multiclass F3<string OpcStr, SDNode OpNode> { | |
205 def f64rr : NVPTXInst<(outs Float64Regs:$dst), | |
206 (ins Float64Regs:$a, Float64Regs:$b), | |
207 !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"), | |
208 [(set Float64Regs:$dst, | |
209 (OpNode Float64Regs:$a, Float64Regs:$b))]>, | |
210 Requires<[allowFMA]>; | |
211 def f64ri : NVPTXInst<(outs Float64Regs:$dst), | |
212 (ins Float64Regs:$a, f64imm:$b), | |
213 !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"), | |
214 [(set Float64Regs:$dst, | |
215 (OpNode Float64Regs:$a, fpimm:$b))]>, | |
216 Requires<[allowFMA]>; | |
217 def f32rr_ftz : NVPTXInst<(outs Float32Regs:$dst), | |
218 (ins Float32Regs:$a, Float32Regs:$b), | |
219 !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"), | |
220 [(set Float32Regs:$dst, | |
221 (OpNode Float32Regs:$a, Float32Regs:$b))]>, | |
222 Requires<[allowFMA_ftz]>; | |
223 def f32ri_ftz : NVPTXInst<(outs Float32Regs:$dst), | |
224 (ins Float32Regs:$a, f32imm:$b), | |
225 !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"), | |
226 [(set Float32Regs:$dst, | |
227 (OpNode Float32Regs:$a, fpimm:$b))]>, | |
228 Requires<[allowFMA_ftz]>; | |
229 def f32rr : NVPTXInst<(outs Float32Regs:$dst), | |
230 (ins Float32Regs:$a, Float32Regs:$b), | |
231 !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"), | |
232 [(set Float32Regs:$dst, | |
233 (OpNode Float32Regs:$a, Float32Regs:$b))]>, | |
234 Requires<[allowFMA]>; | |
235 def f32ri : NVPTXInst<(outs Float32Regs:$dst), | |
236 (ins Float32Regs:$a, f32imm:$b), | |
237 !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"), | |
238 [(set Float32Regs:$dst, | |
239 (OpNode Float32Regs:$a, fpimm:$b))]>, | |
240 Requires<[allowFMA]>; | |
241 } | |
242 | |
243 multiclass F3_rn<string OpcStr, SDNode OpNode> { | |
244 def f64rr : NVPTXInst<(outs Float64Regs:$dst), | |
245 (ins Float64Regs:$a, Float64Regs:$b), | |
246 !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"), | |
247 [(set Float64Regs:$dst, | |
248 (OpNode Float64Regs:$a, Float64Regs:$b))]>; | |
249 def f64ri : NVPTXInst<(outs Float64Regs:$dst), | |
250 (ins Float64Regs:$a, f64imm:$b), | |
251 !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"), | |
252 [(set Float64Regs:$dst, | |
253 (OpNode Float64Regs:$a, fpimm:$b))]>; | |
254 def f32rr_ftz : NVPTXInst<(outs Float32Regs:$dst), | |
255 (ins Float32Regs:$a, Float32Regs:$b), | |
256 !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"), | |
257 [(set Float32Regs:$dst, | |
258 (OpNode Float32Regs:$a, Float32Regs:$b))]>, | |
259 Requires<[doF32FTZ]>; | |
260 def f32ri_ftz : NVPTXInst<(outs Float32Regs:$dst), | |
261 (ins Float32Regs:$a, f32imm:$b), | |
262 !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"), | |
263 [(set Float32Regs:$dst, | |
264 (OpNode Float32Regs:$a, fpimm:$b))]>, | |
265 Requires<[doF32FTZ]>; | |
266 def f32rr : NVPTXInst<(outs Float32Regs:$dst), | |
267 (ins Float32Regs:$a, Float32Regs:$b), | |
268 !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"), | |
269 [(set Float32Regs:$dst, | |
270 (OpNode Float32Regs:$a, Float32Regs:$b))]>; | |
271 def f32ri : NVPTXInst<(outs Float32Regs:$dst), | |
272 (ins Float32Regs:$a, f32imm:$b), | |
273 !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"), | |
274 [(set Float32Regs:$dst, | |
275 (OpNode Float32Regs:$a, fpimm:$b))]>; | |
276 } | |
277 | |
278 multiclass F2<string OpcStr, SDNode OpNode> { | |
279 def f64 : NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$a), | |
280 !strconcat(OpcStr, ".f64 \t$dst, $a;"), | |
281 [(set Float64Regs:$dst, (OpNode Float64Regs:$a))]>; | |
282 def f32_ftz : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a), | |
283 !strconcat(OpcStr, ".ftz.f32 \t$dst, $a;"), | |
284 [(set Float32Regs:$dst, (OpNode Float32Regs:$a))]>, | |
285 Requires<[doF32FTZ]>; | |
286 def f32 : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a), | |
287 !strconcat(OpcStr, ".f32 \t$dst, $a;"), | |
288 [(set Float32Regs:$dst, (OpNode Float32Regs:$a))]>; | |
289 } | |
290 | |
291 //===----------------------------------------------------------------------===// | |
292 // NVPTX Instructions. | |
293 //===----------------------------------------------------------------------===// | |
294 | |
295 //----------------------------------- | |
296 // General Type Conversion | |
297 //----------------------------------- | |
298 | |
299 let neverHasSideEffects = 1 in { | |
300 // Generate a cvt to the given type from all possible types. | |
301 // Each instance takes a CvtMode immediate that defines the conversion mode to | |
302 // use. It can be CvtNONE to omit a conversion mode. | |
303 multiclass CVT_FROM_ALL<string FromName, RegisterClass RC> { | |
304 def _s16 : NVPTXInst<(outs RC:$dst), | |
305 (ins Int16Regs:$src, CvtMode:$mode), | |
306 !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", | |
307 FromName, ".s16\t$dst, $src;"), | |
308 []>; | |
309 def _u16 : NVPTXInst<(outs RC:$dst), | |
310 (ins Int16Regs:$src, CvtMode:$mode), | |
311 !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", | |
312 FromName, ".u16\t$dst, $src;"), | |
313 []>; | |
314 def _f16 : NVPTXInst<(outs RC:$dst), | |
315 (ins Int16Regs:$src, CvtMode:$mode), | |
316 !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", | |
317 FromName, ".f16\t$dst, $src;"), | |
318 []>; | |
319 def _s32 : NVPTXInst<(outs RC:$dst), | |
320 (ins Int32Regs:$src, CvtMode:$mode), | |
321 !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", | |
322 FromName, ".s32\t$dst, $src;"), | |
323 []>; | |
324 def _u32 : NVPTXInst<(outs RC:$dst), | |
325 (ins Int32Regs:$src, CvtMode:$mode), | |
326 !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", | |
327 FromName, ".u32\t$dst, $src;"), | |
328 []>; | |
329 def _s64 : NVPTXInst<(outs RC:$dst), | |
330 (ins Int64Regs:$src, CvtMode:$mode), | |
331 !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", | |
332 FromName, ".s64\t$dst, $src;"), | |
333 []>; | |
334 def _u64 : NVPTXInst<(outs RC:$dst), | |
335 (ins Int64Regs:$src, CvtMode:$mode), | |
336 !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", | |
337 FromName, ".u64\t$dst, $src;"), | |
338 []>; | |
339 def _f32 : NVPTXInst<(outs RC:$dst), | |
340 (ins Float32Regs:$src, CvtMode:$mode), | |
341 !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", | |
342 FromName, ".f32\t$dst, $src;"), | |
343 []>; | |
344 def _f64 : NVPTXInst<(outs RC:$dst), | |
345 (ins Float64Regs:$src, CvtMode:$mode), | |
346 !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", | |
347 FromName, ".f64\t$dst, $src;"), | |
348 []>; | |
349 } | |
350 | |
351 // Generate a cvt to all possible types. | |
352 defm CVT_s16 : CVT_FROM_ALL<"s16", Int16Regs>; | |
353 defm CVT_u16 : CVT_FROM_ALL<"u16", Int16Regs>; | |
354 defm CVT_f16 : CVT_FROM_ALL<"f16", Int16Regs>; | |
355 defm CVT_s32 : CVT_FROM_ALL<"s32", Int32Regs>; | |
356 defm CVT_u32 : CVT_FROM_ALL<"u32", Int32Regs>; | |
357 defm CVT_s64 : CVT_FROM_ALL<"s64", Int64Regs>; | |
358 defm CVT_u64 : CVT_FROM_ALL<"u64", Int64Regs>; | |
359 defm CVT_f32 : CVT_FROM_ALL<"f32", Float32Regs>; | |
360 defm CVT_f64 : CVT_FROM_ALL<"f64", Float64Regs>; | |
361 | |
362 // This set of cvt is different from the above. The type of the source | |
363 // and target are the same. | |
364 // | |
365 def CVT_INREG_s16_s8 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src), | |
366 "cvt.s16.s8 \t$dst, $src;", []>; | |
367 def CVT_INREG_s32_s8 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src), | |
368 "cvt.s32.s8 \t$dst, $src;", []>; | |
369 def CVT_INREG_s32_s16 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src), | |
370 "cvt.s32.s16 \t$dst, $src;", []>; | |
371 def CVT_INREG_s64_s8 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src), | |
372 "cvt.s64.s8 \t$dst, $src;", []>; | |
373 def CVT_INREG_s64_s16 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src), | |
374 "cvt.s64.s16 \t$dst, $src;", []>; | |
375 def CVT_INREG_s64_s32 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src), | |
376 "cvt.s64.s32 \t$dst, $src;", []>; | |
377 } | |
378 | |
379 //----------------------------------- | |
380 // Integer Arithmetic | |
381 //----------------------------------- | |
382 | |
383 multiclass ADD_SUB_i1<SDNode OpNode> { | |
384 def _rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, Int1Regs:$b), | |
385 "xor.pred \t$dst, $a, $b;", | |
386 [(set Int1Regs:$dst, (OpNode Int1Regs:$a, Int1Regs:$b))]>; | |
387 def _ri: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, i1imm:$b), | |
388 "xor.pred \t$dst, $a, $b;", | |
389 [(set Int1Regs:$dst, (OpNode Int1Regs:$a, (imm):$b))]>; | |
390 } | |
391 | |
392 defm ADD_i1 : ADD_SUB_i1<add>; | |
393 defm SUB_i1 : ADD_SUB_i1<sub>; | |
394 | |
395 | |
396 defm ADD : I3<"add.s", add>; | |
397 defm SUB : I3<"sub.s", sub>; | |
398 | |
399 defm ADDCC : ADD_SUB_INT_32<"add.cc", addc>; | |
400 defm SUBCC : ADD_SUB_INT_32<"sub.cc", subc>; | |
401 | |
402 defm ADDCCC : ADD_SUB_INT_32<"addc.cc", adde>; | |
403 defm SUBCCC : ADD_SUB_INT_32<"subc.cc", sube>; | |
404 | |
405 //mul.wide PTX instruction | |
406 def SInt32Const : PatLeaf<(imm), [{ | |
407 const APInt &v = N->getAPIntValue(); | |
408 if (v.isSignedIntN(32)) | |
409 return true; | |
410 return false; | |
411 }]>; | |
412 | |
413 def UInt32Const : PatLeaf<(imm), [{ | |
414 const APInt &v = N->getAPIntValue(); | |
415 if (v.isIntN(32)) | |
416 return true; | |
417 return false; | |
418 }]>; | |
419 | |
420 def SInt16Const : PatLeaf<(imm), [{ | |
421 const APInt &v = N->getAPIntValue(); | |
422 if (v.isSignedIntN(16)) | |
423 return true; | |
424 return false; | |
425 }]>; | |
426 | |
427 def UInt16Const : PatLeaf<(imm), [{ | |
428 const APInt &v = N->getAPIntValue(); | |
429 if (v.isIntN(16)) | |
430 return true; | |
431 return false; | |
432 }]>; | |
433 | |
434 def Int5Const : PatLeaf<(imm), [{ | |
435 const APInt &v = N->getAPIntValue(); | |
436 // Check if 0 <= v < 32 | |
437 // Only then the result from (x << v) will be i32 | |
438 if (v.sge(0) && v.slt(32)) | |
439 return true; | |
440 return false; | |
441 }]>; | |
442 | |
443 def Int4Const : PatLeaf<(imm), [{ | |
444 const APInt &v = N->getAPIntValue(); | |
445 // Check if 0 <= v < 16 | |
446 // Only then the result from (x << v) will be i16 | |
447 if (v.sge(0) && v.slt(16)) | |
448 return true; | |
449 return false; | |
450 }]>; | |
451 | |
452 def SHL2MUL32 : SDNodeXForm<imm, [{ | |
453 const APInt &v = N->getAPIntValue(); | |
454 APInt temp(32, 1); | |
455 return CurDAG->getTargetConstant(temp.shl(v), MVT::i32); | |
456 }]>; | |
457 | |
458 def SHL2MUL16 : SDNodeXForm<imm, [{ | |
459 const APInt &v = N->getAPIntValue(); | |
460 APInt temp(16, 1); | |
461 return CurDAG->getTargetConstant(temp.shl(v), MVT::i16); | |
462 }]>; | |
463 | |
464 def MULWIDES64 : NVPTXInst<(outs Int64Regs:$dst), | |
465 (ins Int32Regs:$a, Int32Regs:$b), | |
466 "mul.wide.s32 \t$dst, $a, $b;", []>; | |
467 def MULWIDES64Imm : NVPTXInst<(outs Int64Regs:$dst), | |
468 (ins Int32Regs:$a, i64imm:$b), | |
469 "mul.wide.s32 \t$dst, $a, $b;", []>; | |
470 | |
471 def MULWIDEU64 : NVPTXInst<(outs Int64Regs:$dst), | |
472 (ins Int32Regs:$a, Int32Regs:$b), | |
473 "mul.wide.u32 \t$dst, $a, $b;", []>; | |
474 def MULWIDEU64Imm : NVPTXInst<(outs Int64Regs:$dst), | |
475 (ins Int32Regs:$a, i64imm:$b), | |
476 "mul.wide.u32 \t$dst, $a, $b;", []>; | |
477 | |
478 def MULWIDES32 : NVPTXInst<(outs Int32Regs:$dst), | |
479 (ins Int16Regs:$a, Int16Regs:$b), | |
480 "mul.wide.s16 \t$dst, $a, $b;", []>; | |
481 def MULWIDES32Imm : NVPTXInst<(outs Int32Regs:$dst), | |
482 (ins Int16Regs:$a, i32imm:$b), | |
483 "mul.wide.s16 \t$dst, $a, $b;", []>; | |
484 | |
485 def MULWIDEU32 : NVPTXInst<(outs Int32Regs:$dst), | |
486 (ins Int16Regs:$a, Int16Regs:$b), | |
487 "mul.wide.u16 \t$dst, $a, $b;", []>; | |
488 def MULWIDEU32Imm : NVPTXInst<(outs Int32Regs:$dst), | |
489 (ins Int16Regs:$a, i32imm:$b), | |
490 "mul.wide.u16 \t$dst, $a, $b;", []>; | |
491 | |
492 def : Pat<(shl (sext Int32Regs:$a), (i32 Int5Const:$b)), | |
493 (MULWIDES64Imm Int32Regs:$a, (SHL2MUL32 node:$b))>, | |
494 Requires<[doMulWide]>; | |
495 def : Pat<(shl (zext Int32Regs:$a), (i32 Int5Const:$b)), | |
496 (MULWIDEU64Imm Int32Regs:$a, (SHL2MUL32 node:$b))>, | |
497 Requires<[doMulWide]>; | |
498 | |
499 def : Pat<(shl (sext Int16Regs:$a), (i16 Int4Const:$b)), | |
500 (MULWIDES32Imm Int16Regs:$a, (SHL2MUL16 node:$b))>, | |
501 Requires<[doMulWide]>; | |
502 def : Pat<(shl (zext Int16Regs:$a), (i16 Int4Const:$b)), | |
503 (MULWIDEU32Imm Int16Regs:$a, (SHL2MUL16 node:$b))>, | |
504 Requires<[doMulWide]>; | |
505 | |
506 def : Pat<(mul (sext Int32Regs:$a), (sext Int32Regs:$b)), | |
507 (MULWIDES64 Int32Regs:$a, Int32Regs:$b)>, | |
508 Requires<[doMulWide]>; | |
509 def : Pat<(mul (sext Int32Regs:$a), (i64 SInt32Const:$b)), | |
510 (MULWIDES64Imm Int32Regs:$a, (i64 SInt32Const:$b))>, | |
511 Requires<[doMulWide]>; | |
512 | |
513 def : Pat<(mul (zext Int32Regs:$a), (zext Int32Regs:$b)), | |
514 (MULWIDEU64 Int32Regs:$a, Int32Regs:$b)>, Requires<[doMulWide]>; | |
515 def : Pat<(mul (zext Int32Regs:$a), (i64 UInt32Const:$b)), | |
516 (MULWIDEU64Imm Int32Regs:$a, (i64 UInt32Const:$b))>, | |
517 Requires<[doMulWide]>; | |
518 | |
519 def : Pat<(mul (sext Int16Regs:$a), (sext Int16Regs:$b)), | |
520 (MULWIDES32 Int16Regs:$a, Int16Regs:$b)>, Requires<[doMulWide]>; | |
521 def : Pat<(mul (sext Int16Regs:$a), (i32 SInt16Const:$b)), | |
522 (MULWIDES32Imm Int16Regs:$a, (i32 SInt16Const:$b))>, | |
523 Requires<[doMulWide]>; | |
524 | |
525 def : Pat<(mul (zext Int16Regs:$a), (zext Int16Regs:$b)), | |
526 (MULWIDEU32 Int16Regs:$a, Int16Regs:$b)>, Requires<[doMulWide]>; | |
527 def : Pat<(mul (zext Int16Regs:$a), (i32 UInt16Const:$b)), | |
528 (MULWIDEU32Imm Int16Regs:$a, (i32 UInt16Const:$b))>, | |
529 Requires<[doMulWide]>; | |
530 | |
531 defm MULT : I3<"mul.lo.s", mul>; | |
532 | |
533 defm MULTHS : I3<"mul.hi.s", mulhs>; | |
534 defm MULTHU : I3<"mul.hi.u", mulhu>; | |
535 | |
536 defm SDIV : I3<"div.s", sdiv>; | |
537 defm UDIV : I3<"div.u", udiv>; | |
538 | |
539 defm SREM : I3<"rem.s", srem>; | |
540 // The ri version will not be selected as DAGCombiner::visitSREM will lower it. | |
541 defm UREM : I3<"rem.u", urem>; | |
542 // The ri version will not be selected as DAGCombiner::visitUREM will lower it. | |
543 | |
544 def MAD16rrr : NVPTXInst<(outs Int16Regs:$dst), | |
545 (ins Int16Regs:$a, Int16Regs:$b, Int16Regs:$c), | |
546 "mad.lo.s16 \t$dst, $a, $b, $c;", | |
547 [(set Int16Regs:$dst, (add | |
548 (mul Int16Regs:$a, Int16Regs:$b), Int16Regs:$c))]>; | |
549 def MAD16rri : NVPTXInst<(outs Int16Regs:$dst), | |
550 (ins Int16Regs:$a, Int16Regs:$b, i16imm:$c), | |
551 "mad.lo.s16 \t$dst, $a, $b, $c;", | |
552 [(set Int16Regs:$dst, (add | |
553 (mul Int16Regs:$a, Int16Regs:$b), imm:$c))]>; | |
554 def MAD16rir : NVPTXInst<(outs Int16Regs:$dst), | |
555 (ins Int16Regs:$a, i16imm:$b, Int16Regs:$c), | |
556 "mad.lo.s16 \t$dst, $a, $b, $c;", | |
557 [(set Int16Regs:$dst, (add | |
558 (mul Int16Regs:$a, imm:$b), Int16Regs:$c))]>; | |
559 def MAD16rii : NVPTXInst<(outs Int16Regs:$dst), | |
560 (ins Int16Regs:$a, i16imm:$b, i16imm:$c), | |
561 "mad.lo.s16 \t$dst, $a, $b, $c;", | |
562 [(set Int16Regs:$dst, (add (mul Int16Regs:$a, imm:$b), | |
563 imm:$c))]>; | |
564 | |
565 def MAD32rrr : NVPTXInst<(outs Int32Regs:$dst), | |
566 (ins Int32Regs:$a, Int32Regs:$b, Int32Regs:$c), | |
567 "mad.lo.s32 \t$dst, $a, $b, $c;", | |
568 [(set Int32Regs:$dst, (add | |
569 (mul Int32Regs:$a, Int32Regs:$b), Int32Regs:$c))]>; | |
570 def MAD32rri : NVPTXInst<(outs Int32Regs:$dst), | |
571 (ins Int32Regs:$a, Int32Regs:$b, i32imm:$c), | |
572 "mad.lo.s32 \t$dst, $a, $b, $c;", | |
573 [(set Int32Regs:$dst, (add | |
574 (mul Int32Regs:$a, Int32Regs:$b), imm:$c))]>; | |
575 def MAD32rir : NVPTXInst<(outs Int32Regs:$dst), | |
576 (ins Int32Regs:$a, i32imm:$b, Int32Regs:$c), | |
577 "mad.lo.s32 \t$dst, $a, $b, $c;", | |
578 [(set Int32Regs:$dst, (add | |
579 (mul Int32Regs:$a, imm:$b), Int32Regs:$c))]>; | |
580 def MAD32rii : NVPTXInst<(outs Int32Regs:$dst), | |
581 (ins Int32Regs:$a, i32imm:$b, i32imm:$c), | |
582 "mad.lo.s32 \t$dst, $a, $b, $c;", | |
583 [(set Int32Regs:$dst, (add | |
584 (mul Int32Regs:$a, imm:$b), imm:$c))]>; | |
585 | |
586 def MAD64rrr : NVPTXInst<(outs Int64Regs:$dst), | |
587 (ins Int64Regs:$a, Int64Regs:$b, Int64Regs:$c), | |
588 "mad.lo.s64 \t$dst, $a, $b, $c;", | |
589 [(set Int64Regs:$dst, (add | |
590 (mul Int64Regs:$a, Int64Regs:$b), Int64Regs:$c))]>; | |
591 def MAD64rri : NVPTXInst<(outs Int64Regs:$dst), | |
592 (ins Int64Regs:$a, Int64Regs:$b, i64imm:$c), | |
593 "mad.lo.s64 \t$dst, $a, $b, $c;", | |
594 [(set Int64Regs:$dst, (add | |
595 (mul Int64Regs:$a, Int64Regs:$b), imm:$c))]>; | |
596 def MAD64rir : NVPTXInst<(outs Int64Regs:$dst), | |
597 (ins Int64Regs:$a, i64imm:$b, Int64Regs:$c), | |
598 "mad.lo.s64 \t$dst, $a, $b, $c;", | |
599 [(set Int64Regs:$dst, (add | |
600 (mul Int64Regs:$a, imm:$b), Int64Regs:$c))]>; | |
601 def MAD64rii : NVPTXInst<(outs Int64Regs:$dst), | |
602 (ins Int64Regs:$a, i64imm:$b, i64imm:$c), | |
603 "mad.lo.s64 \t$dst, $a, $b, $c;", | |
604 [(set Int64Regs:$dst, (add | |
605 (mul Int64Regs:$a, imm:$b), imm:$c))]>; | |
606 | |
607 | |
608 def INEG16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src), | |
609 "neg.s16 \t$dst, $src;", | |
610 [(set Int16Regs:$dst, (ineg Int16Regs:$src))]>; | |
611 def INEG32 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src), | |
612 "neg.s32 \t$dst, $src;", | |
613 [(set Int32Regs:$dst, (ineg Int32Regs:$src))]>; | |
614 def INEG64 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src), | |
615 "neg.s64 \t$dst, $src;", | |
616 [(set Int64Regs:$dst, (ineg Int64Regs:$src))]>; | |
617 | |
618 //----------------------------------- | |
619 // Floating Point Arithmetic | |
620 //----------------------------------- | |
621 | |
622 // Constant 1.0f | |
623 def FloatConst1 : PatLeaf<(fpimm), [{ | |
624 if (&(N->getValueAPF().getSemantics()) != &llvm::APFloat::IEEEsingle) | |
625 return false; | |
626 float f = (float)N->getValueAPF().convertToFloat(); | |
627 return (f==1.0f); | |
628 }]>; | |
629 // Constand (double)1.0 | |
630 def DoubleConst1 : PatLeaf<(fpimm), [{ | |
631 if (&(N->getValueAPF().getSemantics()) != &llvm::APFloat::IEEEdouble) | |
632 return false; | |
633 double d = (double)N->getValueAPF().convertToDouble(); | |
634 return (d==1.0); | |
635 }]>; | |
636 | |
637 defm FADD : F3<"add", fadd>; | |
638 defm FSUB : F3<"sub", fsub>; | |
639 defm FMUL : F3<"mul", fmul>; | |
640 | |
641 defm FADD_rn : F3_rn<"add", fadd>; | |
642 defm FSUB_rn : F3_rn<"sub", fsub>; | |
643 defm FMUL_rn : F3_rn<"mul", fmul>; | |
644 | |
645 defm FABS : F2<"abs", fabs>; | |
646 defm FNEG : F2<"neg", fneg>; | |
647 defm FSQRT : F2<"sqrt.rn", fsqrt>; | |
648 | |
649 // | |
650 // F64 division | |
651 // | |
652 def FDIV641r : NVPTXInst<(outs Float64Regs:$dst), | |
653 (ins f64imm:$a, Float64Regs:$b), | |
654 "rcp.rn.f64 \t$dst, $b;", | |
655 [(set Float64Regs:$dst, | |
656 (fdiv DoubleConst1:$a, Float64Regs:$b))]>; | |
657 def FDIV64rr : NVPTXInst<(outs Float64Regs:$dst), | |
658 (ins Float64Regs:$a, Float64Regs:$b), | |
659 "div.rn.f64 \t$dst, $a, $b;", | |
660 [(set Float64Regs:$dst, | |
661 (fdiv Float64Regs:$a, Float64Regs:$b))]>; | |
662 def FDIV64ri : NVPTXInst<(outs Float64Regs:$dst), | |
663 (ins Float64Regs:$a, f64imm:$b), | |
664 "div.rn.f64 \t$dst, $a, $b;", | |
665 [(set Float64Regs:$dst, | |
666 (fdiv Float64Regs:$a, fpimm:$b))]>; | |
667 | |
668 // | |
669 // F32 Approximate reciprocal | |
670 // | |
671 def FDIV321r_ftz : NVPTXInst<(outs Float32Regs:$dst), | |
672 (ins f32imm:$a, Float32Regs:$b), | |
673 "rcp.approx.ftz.f32 \t$dst, $b;", | |
674 [(set Float32Regs:$dst, | |
675 (fdiv FloatConst1:$a, Float32Regs:$b))]>, | |
676 Requires<[do_DIVF32_APPROX, doF32FTZ]>; | |
677 def FDIV321r : NVPTXInst<(outs Float32Regs:$dst), | |
678 (ins f32imm:$a, Float32Regs:$b), | |
679 "rcp.approx.f32 \t$dst, $b;", | |
680 [(set Float32Regs:$dst, | |
681 (fdiv FloatConst1:$a, Float32Regs:$b))]>, | |
682 Requires<[do_DIVF32_APPROX]>; | |
683 // | |
684 // F32 Approximate division | |
685 // | |
686 def FDIV32approxrr_ftz : NVPTXInst<(outs Float32Regs:$dst), | |
687 (ins Float32Regs:$a, Float32Regs:$b), | |
688 "div.approx.ftz.f32 \t$dst, $a, $b;", | |
689 [(set Float32Regs:$dst, | |
690 (fdiv Float32Regs:$a, Float32Regs:$b))]>, | |
691 Requires<[do_DIVF32_APPROX, doF32FTZ]>; | |
692 def FDIV32approxrr : NVPTXInst<(outs Float32Regs:$dst), | |
693 (ins Float32Regs:$a, Float32Regs:$b), | |
694 "div.approx.f32 \t$dst, $a, $b;", | |
695 [(set Float32Regs:$dst, | |
696 (fdiv Float32Regs:$a, Float32Regs:$b))]>, | |
697 Requires<[do_DIVF32_APPROX]>; | |
698 // | |
699 // F32 Semi-accurate reciprocal | |
700 // | |
701 // rcp.approx gives the same result as div.full(1.0f, a) and is faster. | |
702 // | |
703 def FDIV321r_approx_ftz : NVPTXInst<(outs Float32Regs:$dst), | |
704 (ins f32imm:$a, Float32Regs:$b), | |
705 "rcp.approx.ftz.f32 \t$dst, $b;", | |
706 [(set Float32Regs:$dst, | |
707 (fdiv FloatConst1:$a, Float32Regs:$b))]>, | |
708 Requires<[do_DIVF32_FULL, doF32FTZ]>; | |
709 def FDIV321r_approx : NVPTXInst<(outs Float32Regs:$dst), | |
710 (ins f32imm:$a, Float32Regs:$b), | |
711 "rcp.approx.f32 \t$dst, $b;", | |
712 [(set Float32Regs:$dst, | |
713 (fdiv FloatConst1:$a, Float32Regs:$b))]>, | |
714 Requires<[do_DIVF32_FULL]>; | |
715 // | |
716 // F32 Semi-accurate division | |
717 // | |
718 def FDIV32rr_ftz : NVPTXInst<(outs Float32Regs:$dst), | |
719 (ins Float32Regs:$a, Float32Regs:$b), | |
720 "div.full.ftz.f32 \t$dst, $a, $b;", | |
721 [(set Float32Regs:$dst, | |
722 (fdiv Float32Regs:$a, Float32Regs:$b))]>, | |
723 Requires<[do_DIVF32_FULL, doF32FTZ]>; | |
724 def FDIV32ri_ftz : NVPTXInst<(outs Float32Regs:$dst), | |
725 (ins Float32Regs:$a, f32imm:$b), | |
726 "div.full.ftz.f32 \t$dst, $a, $b;", | |
727 [(set Float32Regs:$dst, | |
728 (fdiv Float32Regs:$a, fpimm:$b))]>, | |
729 Requires<[do_DIVF32_FULL, doF32FTZ]>; | |
730 def FDIV32rr : NVPTXInst<(outs Float32Regs:$dst), | |
731 (ins Float32Regs:$a, Float32Regs:$b), | |
732 "div.full.f32 \t$dst, $a, $b;", | |
733 [(set Float32Regs:$dst, | |
734 (fdiv Float32Regs:$a, Float32Regs:$b))]>, | |
735 Requires<[do_DIVF32_FULL]>; | |
736 def FDIV32ri : NVPTXInst<(outs Float32Regs:$dst), | |
737 (ins Float32Regs:$a, f32imm:$b), | |
738 "div.full.f32 \t$dst, $a, $b;", | |
739 [(set Float32Regs:$dst, | |
740 (fdiv Float32Regs:$a, fpimm:$b))]>, | |
741 Requires<[do_DIVF32_FULL]>; | |
742 // | |
743 // F32 Accurate reciprocal | |
744 // | |
745 def FDIV321r_prec_ftz : NVPTXInst<(outs Float32Regs:$dst), | |
746 (ins f32imm:$a, Float32Regs:$b), | |
747 "rcp.rn.ftz.f32 \t$dst, $b;", | |
748 [(set Float32Regs:$dst, | |
749 (fdiv FloatConst1:$a, Float32Regs:$b))]>, | |
750 Requires<[reqPTX20, doF32FTZ]>; | |
751 def FDIV321r_prec : NVPTXInst<(outs Float32Regs:$dst), | |
752 (ins f32imm:$a, Float32Regs:$b), | |
753 "rcp.rn.f32 \t$dst, $b;", | |
754 [(set Float32Regs:$dst, | |
755 (fdiv FloatConst1:$a, Float32Regs:$b))]>, | |
756 Requires<[reqPTX20]>; | |
757 // | |
758 // F32 Accurate division | |
759 // | |
760 def FDIV32rr_prec_ftz : NVPTXInst<(outs Float32Regs:$dst), | |
761 (ins Float32Regs:$a, Float32Regs:$b), | |
762 "div.rn.ftz.f32 \t$dst, $a, $b;", | |
763 [(set Float32Regs:$dst, | |
764 (fdiv Float32Regs:$a, Float32Regs:$b))]>, | |
765 Requires<[doF32FTZ, reqPTX20]>; | |
766 def FDIV32ri_prec_ftz : NVPTXInst<(outs Float32Regs:$dst), | |
767 (ins Float32Regs:$a, f32imm:$b), | |
768 "div.rn.ftz.f32 \t$dst, $a, $b;", | |
769 [(set Float32Regs:$dst, | |
770 (fdiv Float32Regs:$a, fpimm:$b))]>, | |
771 Requires<[doF32FTZ, reqPTX20]>; | |
772 def FDIV32rr_prec : NVPTXInst<(outs Float32Regs:$dst), | |
773 (ins Float32Regs:$a, Float32Regs:$b), | |
774 "div.rn.f32 \t$dst, $a, $b;", | |
775 [(set Float32Regs:$dst, | |
776 (fdiv Float32Regs:$a, Float32Regs:$b))]>, | |
777 Requires<[reqPTX20]>; | |
778 def FDIV32ri_prec : NVPTXInst<(outs Float32Regs:$dst), | |
779 (ins Float32Regs:$a, f32imm:$b), | |
780 "div.rn.f32 \t$dst, $a, $b;", | |
781 [(set Float32Regs:$dst, | |
782 (fdiv Float32Regs:$a, fpimm:$b))]>, | |
783 Requires<[reqPTX20]>; | |
784 | |
785 // | |
786 // F32 rsqrt | |
787 // | |
788 | |
789 def RSQRTF32approx1r : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$b), | |
790 "rsqrt.approx.f32 \t$dst, $b;", []>; | |
791 | |
792 def: Pat<(fdiv FloatConst1, (int_nvvm_sqrt_f Float32Regs:$b)), | |
793 (RSQRTF32approx1r Float32Regs:$b)>, | |
794 Requires<[do_DIVF32_FULL, do_SQRTF32_APPROX, doNoF32FTZ]>; | |
795 | |
796 multiclass FPCONTRACT32<string OpcStr, Predicate Pred> { | |
797 def rrr : NVPTXInst<(outs Float32Regs:$dst), | |
798 (ins Float32Regs:$a, Float32Regs:$b, Float32Regs:$c), | |
799 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), | |
800 [(set Float32Regs:$dst, (fadd | |
801 (fmul Float32Regs:$a, Float32Regs:$b), | |
802 Float32Regs:$c))]>, Requires<[Pred]>; | |
803 // This is to WAR a weird bug in Tablegen that does not automatically | |
804 // generate the following permutated rule rrr2 from the above rrr. | |
805 // So we explicitly add it here. This happens to FMA32 only. | |
806 // See the comments at FMAD32 and FMA32 for more information. | |
807 def rrr2 : NVPTXInst<(outs Float32Regs:$dst), | |
808 (ins Float32Regs:$a, Float32Regs:$b, Float32Regs:$c), | |
809 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), | |
810 [(set Float32Regs:$dst, (fadd Float32Regs:$c, | |
811 (fmul Float32Regs:$a, Float32Regs:$b)))]>, | |
812 Requires<[Pred]>; | |
813 def rri : NVPTXInst<(outs Float32Regs:$dst), | |
814 (ins Float32Regs:$a, Float32Regs:$b, f32imm:$c), | |
815 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), | |
816 [(set Float32Regs:$dst, (fadd | |
817 (fmul Float32Regs:$a, Float32Regs:$b), fpimm:$c))]>, | |
818 Requires<[Pred]>; | |
819 def rir : NVPTXInst<(outs Float32Regs:$dst), | |
820 (ins Float32Regs:$a, f32imm:$b, Float32Regs:$c), | |
821 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), | |
822 [(set Float32Regs:$dst, (fadd | |
823 (fmul Float32Regs:$a, fpimm:$b), Float32Regs:$c))]>, | |
824 Requires<[Pred]>; | |
825 def rii : NVPTXInst<(outs Float32Regs:$dst), | |
826 (ins Float32Regs:$a, f32imm:$b, f32imm:$c), | |
827 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), | |
828 [(set Float32Regs:$dst, (fadd | |
829 (fmul Float32Regs:$a, fpimm:$b), fpimm:$c))]>, | |
830 Requires<[Pred]>; | |
831 } | |
832 | |
833 multiclass FPCONTRACT64<string OpcStr, Predicate Pred> { | |
834 def rrr : NVPTXInst<(outs Float64Regs:$dst), | |
835 (ins Float64Regs:$a, Float64Regs:$b, Float64Regs:$c), | |
836 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), | |
837 [(set Float64Regs:$dst, (fadd | |
838 (fmul Float64Regs:$a, Float64Regs:$b), | |
839 Float64Regs:$c))]>, Requires<[Pred]>; | |
840 def rri : NVPTXInst<(outs Float64Regs:$dst), | |
841 (ins Float64Regs:$a, Float64Regs:$b, f64imm:$c), | |
842 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), | |
843 [(set Float64Regs:$dst, (fadd (fmul Float64Regs:$a, | |
844 Float64Regs:$b), fpimm:$c))]>, Requires<[Pred]>; | |
845 def rir : NVPTXInst<(outs Float64Regs:$dst), | |
846 (ins Float64Regs:$a, f64imm:$b, Float64Regs:$c), | |
847 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), | |
848 [(set Float64Regs:$dst, (fadd | |
849 (fmul Float64Regs:$a, fpimm:$b), Float64Regs:$c))]>, | |
850 Requires<[Pred]>; | |
851 def rii : NVPTXInst<(outs Float64Regs:$dst), | |
852 (ins Float64Regs:$a, f64imm:$b, f64imm:$c), | |
853 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), | |
854 [(set Float64Regs:$dst, (fadd | |
855 (fmul Float64Regs:$a, fpimm:$b), fpimm:$c))]>, | |
856 Requires<[Pred]>; | |
857 } | |
858 | |
859 // Due to a unknown reason (most likely a bug in tablegen), tablegen does not | |
860 // automatically generate the rrr2 rule from | |
861 // the rrr rule (see FPCONTRACT32) for FMA32, though it does for FMAD32. | |
862 // If we reverse the order of the following two lines, then rrr2 rule will be | |
863 // generated for FMA32, but not for rrr. | |
864 // Therefore, we manually write the rrr2 rule in FPCONTRACT32. | |
865 defm FMA32_ftz : FPCONTRACT32<"fma.rn.ftz.f32", doFMAF32_ftz>; | |
866 defm FMA32 : FPCONTRACT32<"fma.rn.f32", doFMAF32>; | |
867 defm FMA64 : FPCONTRACT64<"fma.rn.f64", doFMAF64>; | |
868 | |
869 // b*c-a => fmad(b, c, -a) | |
870 multiclass FPCONTRACT32_SUB_PAT_MAD<NVPTXInst Inst, Predicate Pred> { | |
871 def : Pat<(fsub (fmul Float32Regs:$b, Float32Regs:$c), Float32Regs:$a), | |
872 (Inst Float32Regs:$b, Float32Regs:$c, (FNEGf32 Float32Regs:$a))>, | |
873 Requires<[Pred]>; | |
874 } | |
875 | |
876 // a-b*c => fmad(-b,c, a) | |
877 // - legal because a-b*c <=> a+(-b*c) <=> a+(-b)*c | |
878 // b*c-a => fmad(b, c, -a) | |
879 // - legal because b*c-a <=> b*c+(-a) | |
880 multiclass FPCONTRACT32_SUB_PAT<NVPTXInst Inst, Predicate Pred> { | |
881 def : Pat<(fsub Float32Regs:$a, (fmul Float32Regs:$b, Float32Regs:$c)), | |
882 (Inst (FNEGf32 Float32Regs:$b), Float32Regs:$c, Float32Regs:$a)>, | |
883 Requires<[Pred]>; | |
884 def : Pat<(fsub (fmul Float32Regs:$b, Float32Regs:$c), Float32Regs:$a), | |
885 (Inst Float32Regs:$b, Float32Regs:$c, (FNEGf32 Float32Regs:$a))>, | |
886 Requires<[Pred]>; | |
887 } | |
888 | |
889 // a-b*c => fmad(-b,c, a) | |
890 // b*c-a => fmad(b, c, -a) | |
891 multiclass FPCONTRACT64_SUB_PAT<NVPTXInst Inst, Predicate Pred> { | |
892 def : Pat<(fsub Float64Regs:$a, (fmul Float64Regs:$b, Float64Regs:$c)), | |
893 (Inst (FNEGf64 Float64Regs:$b), Float64Regs:$c, Float64Regs:$a)>, | |
894 Requires<[Pred]>; | |
895 | |
896 def : Pat<(fsub (fmul Float64Regs:$b, Float64Regs:$c), Float64Regs:$a), | |
897 (Inst Float64Regs:$b, Float64Regs:$c, (FNEGf64 Float64Regs:$a))>, | |
898 Requires<[Pred]>; | |
899 } | |
900 | |
901 defm FMAF32ext_ftz : FPCONTRACT32_SUB_PAT<FMA32_ftzrrr, doFMAF32AGG_ftz>; | |
902 defm FMAF32ext : FPCONTRACT32_SUB_PAT<FMA32rrr, doFMAF32AGG>; | |
903 defm FMAF64ext : FPCONTRACT64_SUB_PAT<FMA64rrr, doFMAF64AGG>; | |
904 | |
905 def SINF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src), | |
906 "sin.approx.f32 \t$dst, $src;", | |
907 [(set Float32Regs:$dst, (fsin Float32Regs:$src))]>; | |
908 def COSF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src), | |
909 "cos.approx.f32 \t$dst, $src;", | |
910 [(set Float32Regs:$dst, (fcos Float32Regs:$src))]>; | |
911 | |
912 // Lower (frem x, y) into (sub x, (mul (floor (div x, y)) y)) | |
913 // e.g. "poor man's fmod()" | |
914 | |
915 // frem - f32 FTZ | |
916 def : Pat<(frem Float32Regs:$x, Float32Regs:$y), | |
917 (FSUBf32rr_ftz Float32Regs:$x, (FMULf32rr_ftz (CVT_f32_f32 | |
918 (FDIV32rr_prec_ftz Float32Regs:$x, Float32Regs:$y), CvtRMI_FTZ), | |
919 Float32Regs:$y))>, | |
920 Requires<[doF32FTZ]>; | |
921 def : Pat<(frem Float32Regs:$x, fpimm:$y), | |
922 (FSUBf32rr_ftz Float32Regs:$x, (FMULf32ri_ftz (CVT_f32_f32 | |
923 (FDIV32ri_prec_ftz Float32Regs:$x, fpimm:$y), CvtRMI_FTZ), | |
924 fpimm:$y))>, | |
925 Requires<[doF32FTZ]>; | |
926 | |
927 // frem - f32 | |
928 def : Pat<(frem Float32Regs:$x, Float32Regs:$y), | |
929 (FSUBf32rr Float32Regs:$x, (FMULf32rr (CVT_f32_f32 | |
930 (FDIV32rr_prec Float32Regs:$x, Float32Regs:$y), CvtRMI), | |
931 Float32Regs:$y))>; | |
932 def : Pat<(frem Float32Regs:$x, fpimm:$y), | |
933 (FSUBf32rr Float32Regs:$x, (FMULf32ri (CVT_f32_f32 | |
934 (FDIV32ri_prec Float32Regs:$x, fpimm:$y), CvtRMI), | |
935 fpimm:$y))>; | |
936 | |
937 // frem - f64 | |
938 def : Pat<(frem Float64Regs:$x, Float64Regs:$y), | |
939 (FSUBf64rr Float64Regs:$x, (FMULf64rr (CVT_f64_f64 | |
940 (FDIV64rr Float64Regs:$x, Float64Regs:$y), CvtRMI), | |
941 Float64Regs:$y))>; | |
942 def : Pat<(frem Float64Regs:$x, fpimm:$y), | |
943 (FSUBf64rr Float64Regs:$x, (FMULf64ri (CVT_f64_f64 | |
944 (FDIV64ri Float64Regs:$x, fpimm:$y), CvtRMI), | |
945 fpimm:$y))>; | |
946 | |
947 //----------------------------------- | |
948 // Logical Arithmetic | |
949 //----------------------------------- | |
950 | |
951 multiclass LOG_FORMAT<string OpcStr, SDNode OpNode> { | |
952 def b1rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, Int1Regs:$b), | |
953 !strconcat(OpcStr, ".pred \t$dst, $a, $b;"), | |
954 [(set Int1Regs:$dst, (OpNode Int1Regs:$a, Int1Regs:$b))]>; | |
955 def b1ri: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, i1imm:$b), | |
956 !strconcat(OpcStr, ".pred \t$dst, $a, $b;"), | |
957 [(set Int1Regs:$dst, (OpNode Int1Regs:$a, imm:$b))]>; | |
958 def b16rr: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), | |
959 !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"), | |
960 [(set Int16Regs:$dst, (OpNode Int16Regs:$a, | |
961 Int16Regs:$b))]>; | |
962 def b16ri: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b), | |
963 !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"), | |
964 [(set Int16Regs:$dst, (OpNode Int16Regs:$a, imm:$b))]>; | |
965 def b32rr: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), | |
966 !strconcat(OpcStr, ".b32 \t$dst, $a, $b;"), | |
967 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, | |
968 Int32Regs:$b))]>; | |
969 def b32ri: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), | |
970 !strconcat(OpcStr, ".b32 \t$dst, $a, $b;"), | |
971 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>; | |
972 def b64rr: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b), | |
973 !strconcat(OpcStr, ".b64 \t$dst, $a, $b;"), | |
974 [(set Int64Regs:$dst, (OpNode Int64Regs:$a, | |
975 Int64Regs:$b))]>; | |
976 def b64ri: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b), | |
977 !strconcat(OpcStr, ".b64 \t$dst, $a, $b;"), | |
978 [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>; | |
979 } | |
980 | |
981 defm OR : LOG_FORMAT<"or", or>; | |
982 defm AND : LOG_FORMAT<"and", and>; | |
983 defm XOR : LOG_FORMAT<"xor", xor>; | |
984 | |
985 def NOT1: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$src), | |
986 "not.pred \t$dst, $src;", | |
987 [(set Int1Regs:$dst, (not Int1Regs:$src))]>; | |
988 def NOT16: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src), | |
989 "not.b16 \t$dst, $src;", | |
990 [(set Int16Regs:$dst, (not Int16Regs:$src))]>; | |
991 def NOT32: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src), | |
992 "not.b32 \t$dst, $src;", | |
993 [(set Int32Regs:$dst, (not Int32Regs:$src))]>; | |
994 def NOT64: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src), | |
995 "not.b64 \t$dst, $src;", | |
996 [(set Int64Regs:$dst, (not Int64Regs:$src))]>; | |
997 | |
998 // For shifts, the second src operand must be 32-bit value | |
999 multiclass LSHIFT_FORMAT<string OpcStr, SDNode OpNode> { | |
1000 def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, | |
1001 Int32Regs:$b), | |
1002 !strconcat(OpcStr, "64 \t$dst, $a, $b;"), | |
1003 [(set Int64Regs:$dst, (OpNode Int64Regs:$a, | |
1004 Int32Regs:$b))]>; | |
1005 def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i32imm:$b), | |
1006 !strconcat(OpcStr, "64 \t$dst, $a, $b;"), | |
1007 [(set Int64Regs:$dst, (OpNode Int64Regs:$a, | |
1008 (i32 imm:$b)))]>; | |
1009 def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, | |
1010 Int32Regs:$b), | |
1011 !strconcat(OpcStr, "32 \t$dst, $a, $b;"), | |
1012 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, | |
1013 Int32Regs:$b))]>; | |
1014 def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), | |
1015 !strconcat(OpcStr, "32 \t$dst, $a, $b;"), | |
1016 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, | |
1017 (i32 imm:$b)))]>; | |
1018 def i32ii : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, i32imm:$b), | |
1019 !strconcat(OpcStr, "32 \t$dst, $a, $b;"), | |
1020 [(set Int32Regs:$dst, (OpNode (i32 imm:$a), | |
1021 (i32 imm:$b)))]>; | |
1022 def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, | |
1023 Int32Regs:$b), | |
1024 !strconcat(OpcStr, "16 \t$dst, $a, $b;"), | |
1025 [(set Int16Regs:$dst, (OpNode Int16Regs:$a, | |
1026 Int32Regs:$b))]>; | |
1027 def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i32imm:$b), | |
1028 !strconcat(OpcStr, "16 \t$dst, $a, $b;"), | |
1029 [(set Int16Regs:$dst, (OpNode Int16Regs:$a, | |
1030 (i32 imm:$b)))]>; | |
1031 } | |
1032 | |
1033 defm SHL : LSHIFT_FORMAT<"shl.b", shl>; | |
1034 | |
1035 // For shifts, the second src operand must be 32-bit value | |
1036 // Need to add cvt for the 8-bits. | |
1037 multiclass RSHIFT_FORMAT<string OpcStr, SDNode OpNode> { | |
1038 def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, | |
1039 Int32Regs:$b), | |
1040 !strconcat(OpcStr, "64 \t$dst, $a, $b;"), | |
1041 [(set Int64Regs:$dst, (OpNode Int64Regs:$a, | |
1042 Int32Regs:$b))]>; | |
1043 def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i32imm:$b), | |
1044 !strconcat(OpcStr, "64 \t$dst, $a, $b;"), | |
1045 [(set Int64Regs:$dst, (OpNode Int64Regs:$a, | |
1046 (i32 imm:$b)))]>; | |
1047 def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, | |
1048 Int32Regs:$b), | |
1049 !strconcat(OpcStr, "32 \t$dst, $a, $b;"), | |
1050 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, | |
1051 Int32Regs:$b))]>; | |
1052 def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), | |
1053 !strconcat(OpcStr, "32 \t$dst, $a, $b;"), | |
1054 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, | |
1055 (i32 imm:$b)))]>; | |
1056 def i32ii : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, i32imm:$b), | |
1057 !strconcat(OpcStr, "32 \t$dst, $a, $b;"), | |
1058 [(set Int32Regs:$dst, (OpNode (i32 imm:$a), | |
1059 (i32 imm:$b)))]>; | |
1060 def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, | |
1061 Int32Regs:$b), | |
1062 !strconcat(OpcStr, "16 \t$dst, $a, $b;"), | |
1063 [(set Int16Regs:$dst, (OpNode Int16Regs:$a, | |
1064 Int32Regs:$b))]>; | |
1065 def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i32imm:$b), | |
1066 !strconcat(OpcStr, "16 \t$dst, $a, $b;"), | |
1067 [(set Int16Regs:$dst, (OpNode Int16Regs:$a, | |
1068 (i32 imm:$b)))]>; | |
1069 } | |
1070 | |
1071 defm SRA : RSHIFT_FORMAT<"shr.s", sra>; | |
1072 defm SRL : RSHIFT_FORMAT<"shr.u", srl>; | |
1073 | |
1074 // 32bit | |
1075 def ROT32imm_sw : NVPTXInst<(outs Int32Regs:$dst), | |
1076 (ins Int32Regs:$src, i32imm:$amt1, i32imm:$amt2), | |
1077 !strconcat("{{\n\t", | |
1078 !strconcat(".reg .b32 %lhs;\n\t", | |
1079 !strconcat(".reg .b32 %rhs;\n\t", | |
1080 !strconcat("shl.b32 \t%lhs, $src, $amt1;\n\t", | |
1081 !strconcat("shr.b32 \t%rhs, $src, $amt2;\n\t", | |
1082 !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t", | |
1083 !strconcat("}}", ""))))))), | |
1084 []>; | |
1085 | |
1086 def SUB_FRM_32 : SDNodeXForm<imm, [{ | |
1087 return CurDAG->getTargetConstant(32-N->getZExtValue(), MVT::i32); | |
1088 }]>; | |
1089 | |
1090 def : Pat<(rotl Int32Regs:$src, (i32 imm:$amt)), | |
1091 (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>; | |
1092 def : Pat<(rotr Int32Regs:$src, (i32 imm:$amt)), | |
1093 (ROT32imm_sw Int32Regs:$src, (SUB_FRM_32 node:$amt), imm:$amt)>; | |
1094 | |
1095 def ROTL32reg_sw : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, | |
1096 Int32Regs:$amt), | |
1097 !strconcat("{{\n\t", | |
1098 !strconcat(".reg .b32 %lhs;\n\t", | |
1099 !strconcat(".reg .b32 %rhs;\n\t", | |
1100 !strconcat(".reg .b32 %amt2;\n\t", | |
1101 !strconcat("shl.b32 \t%lhs, $src, $amt;\n\t", | |
1102 !strconcat("sub.s32 \t%amt2, 32, $amt;\n\t", | |
1103 !strconcat("shr.b32 \t%rhs, $src, %amt2;\n\t", | |
1104 !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t", | |
1105 !strconcat("}}", ""))))))))), | |
1106 [(set Int32Regs:$dst, (rotl Int32Regs:$src, Int32Regs:$amt))]>; | |
1107 | |
1108 def ROTR32reg_sw : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, | |
1109 Int32Regs:$amt), | |
1110 !strconcat("{{\n\t", | |
1111 !strconcat(".reg .b32 %lhs;\n\t", | |
1112 !strconcat(".reg .b32 %rhs;\n\t", | |
1113 !strconcat(".reg .b32 %amt2;\n\t", | |
1114 !strconcat("shr.b32 \t%lhs, $src, $amt;\n\t", | |
1115 !strconcat("sub.s32 \t%amt2, 32, $amt;\n\t", | |
1116 !strconcat("shl.b32 \t%rhs, $src, %amt2;\n\t", | |
1117 !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t", | |
1118 !strconcat("}}", ""))))))))), | |
1119 [(set Int32Regs:$dst, (rotr Int32Regs:$src, Int32Regs:$amt))]>; | |
1120 | |
1121 // 64bit | |
1122 def ROT64imm_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src, | |
1123 i32imm:$amt1, i32imm:$amt2), | |
1124 !strconcat("{{\n\t", | |
1125 !strconcat(".reg .b64 %lhs;\n\t", | |
1126 !strconcat(".reg .b64 %rhs;\n\t", | |
1127 !strconcat("shl.b64 \t%lhs, $src, $amt1;\n\t", | |
1128 !strconcat("shr.b64 \t%rhs, $src, $amt2;\n\t", | |
1129 !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t", | |
1130 !strconcat("}}", ""))))))), | |
1131 []>; | |
1132 | |
1133 def SUB_FRM_64 : SDNodeXForm<imm, [{ | |
1134 return CurDAG->getTargetConstant(64-N->getZExtValue(), MVT::i32); | |
1135 }]>; | |
1136 | |
1137 def : Pat<(rotl Int64Regs:$src, (i32 imm:$amt)), | |
1138 (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_64 node:$amt))>; | |
1139 def : Pat<(rotr Int64Regs:$src, (i32 imm:$amt)), | |
1140 (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>; | |
1141 | |
1142 def ROTL64reg_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src, | |
1143 Int32Regs:$amt), | |
1144 !strconcat("{{\n\t", | |
1145 !strconcat(".reg .b64 %lhs;\n\t", | |
1146 !strconcat(".reg .b64 %rhs;\n\t", | |
1147 !strconcat(".reg .u32 %amt2;\n\t", | |
1148 !strconcat("shl.b64 \t%lhs, $src, $amt;\n\t", | |
1149 !strconcat("sub.u32 \t%amt2, 64, $amt;\n\t", | |
1150 !strconcat("shr.b64 \t%rhs, $src, %amt2;\n\t", | |
1151 !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t", | |
1152 !strconcat("}}", ""))))))))), | |
1153 [(set Int64Regs:$dst, (rotl Int64Regs:$src, Int32Regs:$amt))]>; | |
1154 | |
1155 def ROTR64reg_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src, | |
1156 Int32Regs:$amt), | |
1157 !strconcat("{{\n\t", | |
1158 !strconcat(".reg .b64 %lhs;\n\t", | |
1159 !strconcat(".reg .b64 %rhs;\n\t", | |
1160 !strconcat(".reg .u32 %amt2;\n\t", | |
1161 !strconcat("shr.b64 \t%lhs, $src, $amt;\n\t", | |
1162 !strconcat("sub.u32 \t%amt2, 64, $amt;\n\t", | |
1163 !strconcat("shl.b64 \t%rhs, $src, %amt2;\n\t", | |
1164 !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t", | |
1165 !strconcat("}}", ""))))))))), | |
1166 [(set Int64Regs:$dst, (rotr Int64Regs:$src, Int32Regs:$amt))]>; | |
1167 | |
1168 | |
1169 //----------------------------------- | |
1170 // General Comparison | |
1171 //----------------------------------- | |
1172 | |
1173 // General setp instructions | |
1174 multiclass SETP<string TypeStr, RegisterClass RC, Operand ImmCls> { | |
1175 def rr : NVPTXInst<(outs Int1Regs:$dst), | |
1176 (ins RC:$a, RC:$b, CmpMode:$cmp), | |
1177 !strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr, "\t$dst, $a, $b;"), | |
1178 []>; | |
1179 def ri : NVPTXInst<(outs Int1Regs:$dst), | |
1180 (ins RC:$a, ImmCls:$b, CmpMode:$cmp), | |
1181 !strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr, "\t$dst, $a, $b;"), | |
1182 []>; | |
1183 def ir : NVPTXInst<(outs Int1Regs:$dst), | |
1184 (ins ImmCls:$a, RC:$b, CmpMode:$cmp), | |
1185 !strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr, "\t$dst, $a, $b;"), | |
1186 []>; | |
1187 } | |
1188 | |
1189 defm SETP_b16 : SETP<"b16", Int16Regs, i16imm>; | |
1190 defm SETP_s16 : SETP<"s16", Int16Regs, i16imm>; | |
1191 defm SETP_u16 : SETP<"u16", Int16Regs, i16imm>; | |
1192 defm SETP_b32 : SETP<"b32", Int32Regs, i32imm>; | |
1193 defm SETP_s32 : SETP<"s32", Int32Regs, i32imm>; | |
1194 defm SETP_u32 : SETP<"u32", Int32Regs, i32imm>; | |
1195 defm SETP_b64 : SETP<"b64", Int64Regs, i64imm>; | |
1196 defm SETP_s64 : SETP<"s64", Int64Regs, i64imm>; | |
1197 defm SETP_u64 : SETP<"u64", Int64Regs, i64imm>; | |
1198 defm SETP_f32 : SETP<"f32", Float32Regs, f32imm>; | |
1199 defm SETP_f64 : SETP<"f64", Float64Regs, f64imm>; | |
1200 | |
1201 // General set instructions | |
1202 multiclass SET<string TypeStr, RegisterClass RC, Operand ImmCls> { | |
1203 def rr : NVPTXInst<(outs Int32Regs:$dst), | |
1204 (ins RC:$a, RC:$b, CmpMode:$cmp), | |
1205 !strconcat("set$cmp.", TypeStr, "\t$dst, $a, $b;"), []>; | |
1206 def ri : NVPTXInst<(outs Int32Regs:$dst), | |
1207 (ins RC:$a, ImmCls:$b, CmpMode:$cmp), | |
1208 !strconcat("set$cmp.", TypeStr, "\t$dst, $a, $b;"), []>; | |
1209 def ir : NVPTXInst<(outs Int32Regs:$dst), | |
1210 (ins ImmCls:$a, RC:$b, CmpMode:$cmp), | |
1211 !strconcat("set$cmp.", TypeStr, "\t$dst, $a, $b;"), []>; | |
1212 } | |
1213 | |
1214 defm SET_b16 : SET<"b16", Int16Regs, i16imm>; | |
1215 defm SET_s16 : SET<"s16", Int16Regs, i16imm>; | |
1216 defm SET_u16 : SET<"u16", Int16Regs, i16imm>; | |
1217 defm SET_b32 : SET<"b32", Int32Regs, i32imm>; | |
1218 defm SET_s32 : SET<"s32", Int32Regs, i32imm>; | |
1219 defm SET_u32 : SET<"u32", Int32Regs, i32imm>; | |
1220 defm SET_b64 : SET<"b64", Int64Regs, i64imm>; | |
1221 defm SET_s64 : SET<"s64", Int64Regs, i64imm>; | |
1222 defm SET_u64 : SET<"u64", Int64Regs, i64imm>; | |
1223 defm SET_f32 : SET<"f32", Float32Regs, f32imm>; | |
1224 defm SET_f64 : SET<"f64", Float64Regs, f64imm>; | |
1225 | |
1226 //----------------------------------- | |
1227 // General Selection | |
1228 //----------------------------------- | |
1229 | |
1230 // General selp instructions | |
1231 multiclass SELP<string TypeStr, RegisterClass RC, Operand ImmCls> { | |
1232 def rr : NVPTXInst<(outs RC:$dst), | |
1233 (ins RC:$a, RC:$b, Int1Regs:$p), | |
1234 !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), []>; | |
1235 def ri : NVPTXInst<(outs RC:$dst), | |
1236 (ins RC:$a, ImmCls:$b, Int1Regs:$p), | |
1237 !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), []>; | |
1238 def ir : NVPTXInst<(outs RC:$dst), | |
1239 (ins ImmCls:$a, RC:$b, Int1Regs:$p), | |
1240 !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), []>; | |
1241 def ii : NVPTXInst<(outs RC:$dst), | |
1242 (ins ImmCls:$a, ImmCls:$b, Int1Regs:$p), | |
1243 !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), []>; | |
1244 } | |
1245 | |
1246 multiclass SELP_PATTERN<string TypeStr, RegisterClass RC, Operand ImmCls, | |
1247 SDNode ImmNode> { | |
1248 def rr : NVPTXInst<(outs RC:$dst), | |
1249 (ins RC:$a, RC:$b, Int1Regs:$p), | |
1250 !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), | |
1251 [(set RC:$dst, (select Int1Regs:$p, RC:$a, RC:$b))]>; | |
1252 def ri : NVPTXInst<(outs RC:$dst), | |
1253 (ins RC:$a, ImmCls:$b, Int1Regs:$p), | |
1254 !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), | |
1255 [(set RC:$dst, (select Int1Regs:$p, RC:$a, ImmNode:$b))]>; | |
1256 def ir : NVPTXInst<(outs RC:$dst), | |
1257 (ins ImmCls:$a, RC:$b, Int1Regs:$p), | |
1258 !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), | |
1259 [(set RC:$dst, (select Int1Regs:$p, ImmNode:$a, RC:$b))]>; | |
1260 def ii : NVPTXInst<(outs RC:$dst), | |
1261 (ins ImmCls:$a, ImmCls:$b, Int1Regs:$p), | |
1262 !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), | |
1263 [(set RC:$dst, (select Int1Regs:$p, ImmNode:$a, ImmNode:$b))]>; | |
1264 } | |
1265 | |
1266 defm SELP_b16 : SELP_PATTERN<"b16", Int16Regs, i16imm, imm>; | |
1267 defm SELP_s16 : SELP<"s16", Int16Regs, i16imm>; | |
1268 defm SELP_u16 : SELP<"u16", Int16Regs, i16imm>; | |
1269 defm SELP_b32 : SELP_PATTERN<"b32", Int32Regs, i32imm, imm>; | |
1270 defm SELP_s32 : SELP<"s32", Int32Regs, i32imm>; | |
1271 defm SELP_u32 : SELP<"u32", Int32Regs, i32imm>; | |
1272 defm SELP_b64 : SELP_PATTERN<"b64", Int64Regs, i64imm, imm>; | |
1273 defm SELP_s64 : SELP<"s64", Int64Regs, i64imm>; | |
1274 defm SELP_u64 : SELP<"u64", Int64Regs, i64imm>; | |
1275 defm SELP_f32 : SELP_PATTERN<"f32", Float32Regs, f32imm, fpimm>; | |
1276 defm SELP_f64 : SELP_PATTERN<"f64", Float64Regs, f64imm, fpimm>; | |
1277 | |
1278 // Special select for predicate operands | |
1279 def : Pat<(i1 (select Int1Regs:$p, Int1Regs:$a, Int1Regs:$b)), | |
1280 (ORb1rr (ANDb1rr Int1Regs:$p, Int1Regs:$a), | |
1281 (ANDb1rr (NOT1 Int1Regs:$p), Int1Regs:$b))>; | |
1282 | |
1283 //----------------------------------- | |
1284 // Data Movement (Load / Store, Move) | |
1285 //----------------------------------- | |
1286 | |
1287 def ADDRri : ComplexPattern<i32, 2, "SelectADDRri", [frameindex], | |
1288 [SDNPWantRoot]>; | |
1289 def ADDRri64 : ComplexPattern<i64, 2, "SelectADDRri64", [frameindex], | |
1290 [SDNPWantRoot]>; | |
1291 | |
1292 def MEMri : Operand<i32> { | |
1293 let PrintMethod = "printMemOperand"; | |
1294 let MIOperandInfo = (ops Int32Regs, i32imm); | |
1295 } | |
1296 def MEMri64 : Operand<i64> { | |
1297 let PrintMethod = "printMemOperand"; | |
1298 let MIOperandInfo = (ops Int64Regs, i64imm); | |
1299 } | |
1300 | |
1301 def imem : Operand<iPTR> { | |
1302 let PrintMethod = "printOperand"; | |
1303 } | |
1304 | |
1305 def imemAny : Operand<iPTRAny> { | |
1306 let PrintMethod = "printOperand"; | |
1307 } | |
1308 | |
1309 def LdStCode : Operand<i32> { | |
1310 let PrintMethod = "printLdStCode"; | |
1311 } | |
1312 | |
1313 def SDTWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>; | |
1314 def Wrapper : SDNode<"NVPTXISD::Wrapper", SDTWrapper>; | |
1315 | |
1316 def MOV_ADDR : NVPTXInst<(outs Int32Regs:$dst), (ins imem:$a), | |
1317 "mov.u32 \t$dst, $a;", | |
1318 [(set Int32Regs:$dst, (Wrapper tglobaladdr:$a))]>; | |
1319 | |
1320 def MOV_ADDR64 : NVPTXInst<(outs Int64Regs:$dst), (ins imem:$a), | |
1321 "mov.u64 \t$dst, $a;", | |
1322 [(set Int64Regs:$dst, (Wrapper tglobaladdr:$a))]>; | |
1323 | |
1324 // Get pointer to local stack | |
1325 def MOV_DEPOT_ADDR | |
1326 : NVPTXInst<(outs Int32Regs:$d), (ins i32imm:$num), | |
1327 "mov.u32 \t$d, __local_depot$num;", []>; | |
1328 def MOV_DEPOT_ADDR_64 | |
1329 : NVPTXInst<(outs Int64Regs:$d), (ins i32imm:$num), | |
1330 "mov.u64 \t$d, __local_depot$num;", []>; | |
1331 | |
1332 | |
1333 // copyPhysreg is hard-coded in NVPTXInstrInfo.cpp | |
1334 let IsSimpleMove=1 in { | |
1335 def IMOV1rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$sss), | |
1336 "mov.pred \t$dst, $sss;", []>; | |
1337 def IMOV16rr: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$sss), | |
1338 "mov.u16 \t$dst, $sss;", []>; | |
1339 def IMOV32rr: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$sss), | |
1340 "mov.u32 \t$dst, $sss;", []>; | |
1341 def IMOV64rr: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$sss), | |
1342 "mov.u64 \t$dst, $sss;", []>; | |
1343 | |
1344 def FMOV32rr: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src), | |
1345 "mov.f32 \t$dst, $src;", []>; | |
1346 def FMOV64rr: NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$src), | |
1347 "mov.f64 \t$dst, $src;", []>; | |
1348 } | |
1349 def IMOV1ri: NVPTXInst<(outs Int1Regs:$dst), (ins i1imm:$src), | |
1350 "mov.pred \t$dst, $src;", | |
1351 [(set Int1Regs:$dst, imm:$src)]>; | |
1352 def IMOV16ri: NVPTXInst<(outs Int16Regs:$dst), (ins i16imm:$src), | |
1353 "mov.u16 \t$dst, $src;", | |
1354 [(set Int16Regs:$dst, imm:$src)]>; | |
1355 def IMOV32ri: NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$src), | |
1356 "mov.u32 \t$dst, $src;", | |
1357 [(set Int32Regs:$dst, imm:$src)]>; | |
1358 def IMOV64i: NVPTXInst<(outs Int64Regs:$dst), (ins i64imm:$src), | |
1359 "mov.u64 \t$dst, $src;", | |
1360 [(set Int64Regs:$dst, imm:$src)]>; | |
1361 | |
1362 def FMOV32ri: NVPTXInst<(outs Float32Regs:$dst), (ins f32imm:$src), | |
1363 "mov.f32 \t$dst, $src;", | |
1364 [(set Float32Regs:$dst, fpimm:$src)]>; | |
1365 def FMOV64ri: NVPTXInst<(outs Float64Regs:$dst), (ins f64imm:$src), | |
1366 "mov.f64 \t$dst, $src;", | |
1367 [(set Float64Regs:$dst, fpimm:$src)]>; | |
1368 | |
1369 def : Pat<(i32 (Wrapper texternalsym:$dst)), (IMOV32ri texternalsym:$dst)>; | |
1370 | |
1371 //---- Copy Frame Index ---- | |
1372 def LEA_ADDRi : NVPTXInst<(outs Int32Regs:$dst), (ins MEMri:$addr), | |
1373 "add.u32 \t$dst, ${addr:add};", | |
1374 [(set Int32Regs:$dst, ADDRri:$addr)]>; | |
1375 def LEA_ADDRi64 : NVPTXInst<(outs Int64Regs:$dst), (ins MEMri64:$addr), | |
1376 "add.u64 \t$dst, ${addr:add};", | |
1377 [(set Int64Regs:$dst, ADDRri64:$addr)]>; | |
1378 | |
1379 //----------------------------------- | |
1380 // Comparison and Selection | |
1381 //----------------------------------- | |
1382 | |
1383 multiclass ISET_FORMAT<PatFrag OpNode, PatLeaf Mode, | |
1384 Instruction setp_16rr, | |
1385 Instruction setp_16ri, | |
1386 Instruction setp_16ir, | |
1387 Instruction setp_32rr, | |
1388 Instruction setp_32ri, | |
1389 Instruction setp_32ir, | |
1390 Instruction setp_64rr, | |
1391 Instruction setp_64ri, | |
1392 Instruction setp_64ir, | |
1393 Instruction set_16rr, | |
1394 Instruction set_16ri, | |
1395 Instruction set_16ir, | |
1396 Instruction set_32rr, | |
1397 Instruction set_32ri, | |
1398 Instruction set_32ir, | |
1399 Instruction set_64rr, | |
1400 Instruction set_64ri, | |
1401 Instruction set_64ir> { | |
1402 // i16 -> pred | |
1403 def : Pat<(i1 (OpNode Int16Regs:$a, Int16Regs:$b)), | |
1404 (setp_16rr Int16Regs:$a, Int16Regs:$b, Mode)>; | |
1405 def : Pat<(i1 (OpNode Int16Regs:$a, imm:$b)), | |
1406 (setp_16ri Int16Regs:$a, imm:$b, Mode)>; | |
1407 def : Pat<(i1 (OpNode imm:$a, Int16Regs:$b)), | |
1408 (setp_16ir imm:$a, Int16Regs:$b, Mode)>; | |
1409 // i32 -> pred | |
1410 def : Pat<(i1 (OpNode Int32Regs:$a, Int32Regs:$b)), | |
1411 (setp_32rr Int32Regs:$a, Int32Regs:$b, Mode)>; | |
1412 def : Pat<(i1 (OpNode Int32Regs:$a, imm:$b)), | |
1413 (setp_32ri Int32Regs:$a, imm:$b, Mode)>; | |
1414 def : Pat<(i1 (OpNode imm:$a, Int32Regs:$b)), | |
1415 (setp_32ir imm:$a, Int32Regs:$b, Mode)>; | |
1416 // i64 -> pred | |
1417 def : Pat<(i1 (OpNode Int64Regs:$a, Int64Regs:$b)), | |
1418 (setp_64rr Int64Regs:$a, Int64Regs:$b, Mode)>; | |
1419 def : Pat<(i1 (OpNode Int64Regs:$a, imm:$b)), | |
1420 (setp_64ri Int64Regs:$a, imm:$b, Mode)>; | |
1421 def : Pat<(i1 (OpNode imm:$a, Int64Regs:$b)), | |
1422 (setp_64ir imm:$a, Int64Regs:$b, Mode)>; | |
1423 | |
1424 // i16 -> i32 | |
1425 def : Pat<(i32 (OpNode Int16Regs:$a, Int16Regs:$b)), | |
1426 (set_16rr Int16Regs:$a, Int16Regs:$b, Mode)>; | |
1427 def : Pat<(i32 (OpNode Int16Regs:$a, imm:$b)), | |
1428 (set_16ri Int16Regs:$a, imm:$b, Mode)>; | |
1429 def : Pat<(i32 (OpNode imm:$a, Int16Regs:$b)), | |
1430 (set_16ir imm:$a, Int16Regs:$b, Mode)>; | |
1431 // i32 -> i32 | |
1432 def : Pat<(i32 (OpNode Int32Regs:$a, Int32Regs:$b)), | |
1433 (set_32rr Int32Regs:$a, Int32Regs:$b, Mode)>; | |
1434 def : Pat<(i32 (OpNode Int32Regs:$a, imm:$b)), | |
1435 (set_32ri Int32Regs:$a, imm:$b, Mode)>; | |
1436 def : Pat<(i32 (OpNode imm:$a, Int32Regs:$b)), | |
1437 (set_32ir imm:$a, Int32Regs:$b, Mode)>; | |
1438 // i64 -> i32 | |
1439 def : Pat<(i32 (OpNode Int64Regs:$a, Int64Regs:$b)), | |
1440 (set_64rr Int64Regs:$a, Int64Regs:$b, Mode)>; | |
1441 def : Pat<(i32 (OpNode Int64Regs:$a, imm:$b)), | |
1442 (set_64ri Int64Regs:$a, imm:$b, Mode)>; | |
1443 def : Pat<(i32 (OpNode imm:$a, Int64Regs:$b)), | |
1444 (set_64ir imm:$a, Int64Regs:$b, Mode)>; | |
1445 } | |
1446 | |
1447 multiclass ISET_FORMAT_SIGNED<PatFrag OpNode, PatLeaf Mode> | |
1448 : ISET_FORMAT<OpNode, Mode, | |
1449 SETP_s16rr, SETP_s16ri, SETP_s16ir, | |
1450 SETP_s32rr, SETP_s32ri, SETP_s32ir, | |
1451 SETP_s64rr, SETP_s64ri, SETP_s64ir, | |
1452 SET_s16rr, SET_s16ri, SET_s16ir, | |
1453 SET_s32rr, SET_s32ri, SET_s32ir, | |
1454 SET_s64rr, SET_s64ri, SET_s64ir> { | |
1455 // TableGen doesn't like empty multiclasses | |
1456 def : PatLeaf<(i32 0)>; | |
1457 } | |
1458 | |
1459 multiclass ISET_FORMAT_UNSIGNED<PatFrag OpNode, PatLeaf Mode> | |
1460 : ISET_FORMAT<OpNode, Mode, | |
1461 SETP_u16rr, SETP_u16ri, SETP_u16ir, | |
1462 SETP_u32rr, SETP_u32ri, SETP_u32ir, | |
1463 SETP_u64rr, SETP_u64ri, SETP_u64ir, | |
1464 SET_u16rr, SET_u16ri, SET_u16ir, | |
1465 SET_u32rr, SET_u32ri, SET_u32ir, | |
1466 SET_u64rr, SET_u64ri, SET_u64ir> { | |
1467 // TableGen doesn't like empty multiclasses | |
1468 def : PatLeaf<(i32 0)>; | |
1469 } | |
1470 | |
1471 defm : ISET_FORMAT_SIGNED<setgt, CmpGT>; | |
1472 defm : ISET_FORMAT_UNSIGNED<setugt, CmpGT>; | |
1473 defm : ISET_FORMAT_SIGNED<setlt, CmpLT>; | |
1474 defm : ISET_FORMAT_UNSIGNED<setult, CmpLT>; | |
1475 defm : ISET_FORMAT_SIGNED<setge, CmpGE>; | |
1476 defm : ISET_FORMAT_UNSIGNED<setuge, CmpGE>; | |
1477 defm : ISET_FORMAT_SIGNED<setle, CmpLE>; | |
1478 defm : ISET_FORMAT_UNSIGNED<setule, CmpLE>; | |
1479 defm : ISET_FORMAT_SIGNED<seteq, CmpEQ>; | |
1480 defm : ISET_FORMAT_UNSIGNED<setueq, CmpEQ>; | |
1481 defm : ISET_FORMAT_SIGNED<setne, CmpNE>; | |
1482 defm : ISET_FORMAT_UNSIGNED<setune, CmpNE>; | |
1483 | |
1484 // i1 compares | |
1485 def : Pat<(setne Int1Regs:$a, Int1Regs:$b), | |
1486 (XORb1rr Int1Regs:$a, Int1Regs:$b)>; | |
1487 def : Pat<(setune Int1Regs:$a, Int1Regs:$b), | |
1488 (XORb1rr Int1Regs:$a, Int1Regs:$b)>; | |
1489 | |
1490 def : Pat<(seteq Int1Regs:$a, Int1Regs:$b), | |
1491 (NOT1 (XORb1rr Int1Regs:$a, Int1Regs:$b))>; | |
1492 def : Pat<(setueq Int1Regs:$a, Int1Regs:$b), | |
1493 (NOT1 (XORb1rr Int1Regs:$a, Int1Regs:$b))>; | |
1494 | |
1495 // i1 compare -> i32 | |
1496 def : Pat<(i32 (setne Int1Regs:$a, Int1Regs:$b)), | |
1497 (SELP_u32ii -1, 0, (XORb1rr Int1Regs:$a, Int1Regs:$b))>; | |
1498 def : Pat<(i32 (setne Int1Regs:$a, Int1Regs:$b)), | |
1499 (SELP_u32ii 0, -1, (XORb1rr Int1Regs:$a, Int1Regs:$b))>; | |
1500 | |
1501 | |
1502 | |
1503 multiclass FSET_FORMAT<PatFrag OpNode, PatLeaf Mode, PatLeaf ModeFTZ> { | |
1504 // f32 -> pred | |
1505 def : Pat<(i1 (OpNode Float32Regs:$a, Float32Regs:$b)), | |
1506 (SETP_f32rr Float32Regs:$a, Float32Regs:$b, ModeFTZ)>, | |
1507 Requires<[doF32FTZ]>; | |
1508 def : Pat<(i1 (OpNode Float32Regs:$a, Float32Regs:$b)), | |
1509 (SETP_f32rr Float32Regs:$a, Float32Regs:$b, Mode)>; | |
1510 def : Pat<(i1 (OpNode Float32Regs:$a, fpimm:$b)), | |
1511 (SETP_f32ri Float32Regs:$a, fpimm:$b, ModeFTZ)>, | |
1512 Requires<[doF32FTZ]>; | |
1513 def : Pat<(i1 (OpNode Float32Regs:$a, fpimm:$b)), | |
1514 (SETP_f32ri Float32Regs:$a, fpimm:$b, Mode)>; | |
1515 def : Pat<(i1 (OpNode fpimm:$a, Float32Regs:$b)), | |
1516 (SETP_f32ir fpimm:$a, Float32Regs:$b, ModeFTZ)>, | |
1517 Requires<[doF32FTZ]>; | |
1518 def : Pat<(i1 (OpNode fpimm:$a, Float32Regs:$b)), | |
1519 (SETP_f32ir fpimm:$a, Float32Regs:$b, Mode)>; | |
1520 | |
1521 // f64 -> pred | |
1522 def : Pat<(i1 (OpNode Float64Regs:$a, Float64Regs:$b)), | |
1523 (SETP_f64rr Float64Regs:$a, Float64Regs:$b, Mode)>; | |
1524 def : Pat<(i1 (OpNode Float64Regs:$a, fpimm:$b)), | |
1525 (SETP_f64ri Float64Regs:$a, fpimm:$b, Mode)>; | |
1526 def : Pat<(i1 (OpNode fpimm:$a, Float64Regs:$b)), | |
1527 (SETP_f64ir fpimm:$a, Float64Regs:$b, Mode)>; | |
1528 | |
1529 // f32 -> i32 | |
1530 def : Pat<(i32 (OpNode Float32Regs:$a, Float32Regs:$b)), | |
1531 (SET_f32rr Float32Regs:$a, Float32Regs:$b, ModeFTZ)>, | |
1532 Requires<[doF32FTZ]>; | |
1533 def : Pat<(i32 (OpNode Float32Regs:$a, Float32Regs:$b)), | |
1534 (SET_f32rr Float32Regs:$a, Float32Regs:$b, Mode)>; | |
1535 def : Pat<(i32 (OpNode Float32Regs:$a, fpimm:$b)), | |
1536 (SET_f32ri Float32Regs:$a, fpimm:$b, ModeFTZ)>, | |
1537 Requires<[doF32FTZ]>; | |
1538 def : Pat<(i32 (OpNode Float32Regs:$a, fpimm:$b)), | |
1539 (SET_f32ri Float32Regs:$a, fpimm:$b, Mode)>; | |
1540 def : Pat<(i32 (OpNode fpimm:$a, Float32Regs:$b)), | |
1541 (SET_f32ir fpimm:$a, Float32Regs:$b, ModeFTZ)>, | |
1542 Requires<[doF32FTZ]>; | |
1543 def : Pat<(i32 (OpNode fpimm:$a, Float32Regs:$b)), | |
1544 (SET_f32ir fpimm:$a, Float32Regs:$b, Mode)>; | |
1545 | |
1546 // f64 -> i32 | |
1547 def : Pat<(i32 (OpNode Float64Regs:$a, Float64Regs:$b)), | |
1548 (SET_f64rr Float64Regs:$a, Float64Regs:$b, Mode)>; | |
1549 def : Pat<(i32 (OpNode Float64Regs:$a, fpimm:$b)), | |
1550 (SET_f64ri Float64Regs:$a, fpimm:$b, Mode)>; | |
1551 def : Pat<(i32 (OpNode fpimm:$a, Float64Regs:$b)), | |
1552 (SET_f64ir fpimm:$a, Float64Regs:$b, Mode)>; | |
1553 } | |
1554 | |
1555 defm FSetGT : FSET_FORMAT<setogt, CmpGT, CmpGT_FTZ>; | |
1556 defm FSetLT : FSET_FORMAT<setolt, CmpLT, CmpLT_FTZ>; | |
1557 defm FSetGE : FSET_FORMAT<setoge, CmpGE, CmpGE_FTZ>; | |
1558 defm FSetLE : FSET_FORMAT<setole, CmpLE, CmpLE_FTZ>; | |
1559 defm FSetEQ : FSET_FORMAT<setoeq, CmpEQ, CmpEQ_FTZ>; | |
1560 defm FSetNE : FSET_FORMAT<setone, CmpNE, CmpNE_FTZ>; | |
1561 | |
1562 defm FSetUGT : FSET_FORMAT<setugt, CmpGTU, CmpGTU_FTZ>; | |
1563 defm FSetULT : FSET_FORMAT<setult, CmpLTU, CmpLTU_FTZ>; | |
1564 defm FSetUGE : FSET_FORMAT<setuge, CmpGEU, CmpGEU_FTZ>; | |
1565 defm FSetULE : FSET_FORMAT<setule, CmpLEU, CmpLEU_FTZ>; | |
1566 defm FSetUEQ : FSET_FORMAT<setueq, CmpEQU, CmpEQU_FTZ>; | |
1567 defm FSetUNE : FSET_FORMAT<setune, CmpNEU, CmpNEU_FTZ>; | |
1568 | |
1569 defm FSetNUM : FSET_FORMAT<seto, CmpNUM, CmpNUM_FTZ>; | |
1570 defm FSetNAN : FSET_FORMAT<setuo, CmpNAN, CmpNAN_FTZ>; | |
1571 | |
1572 //def ld_param : SDNode<"NVPTXISD::LOAD_PARAM", SDTLoad, | |
1573 // [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; | |
1574 | |
1575 def SDTDeclareParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>, | |
1576 SDTCisInt<2>]>; | |
1577 def SDTDeclareScalarParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>, | |
1578 SDTCisInt<1>, SDTCisInt<2>]>; | |
1579 def SDTLoadParamProfile : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>; | |
1580 def SDTLoadParamV2Profile : SDTypeProfile<2, 2, [SDTCisSameAs<0, 1>, SDTCisInt<2>, SDTCisInt<3>]>; | |
1581 def SDTLoadParamV4Profile : SDTypeProfile<4, 2, [SDTCisInt<4>, SDTCisInt<5>]>; | |
1582 def SDTPrintCallProfile : SDTypeProfile<0, 1, [SDTCisInt<0>]>; | |
1583 def SDTPrintCallUniProfile : SDTypeProfile<0, 1, [SDTCisInt<0>]>; | |
1584 def SDTStoreParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>]>; | |
1585 def SDTStoreParamV2Profile : SDTypeProfile<0, 4, [SDTCisInt<0>, SDTCisInt<1>]>; | |
1586 def SDTStoreParamV4Profile : SDTypeProfile<0, 6, [SDTCisInt<0>, SDTCisInt<1>]>; | |
1587 def SDTStoreParam32Profile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>]>; | |
1588 def SDTCallArgProfile : SDTypeProfile<0, 2, [SDTCisInt<0>]>; | |
1589 def SDTCallArgMarkProfile : SDTypeProfile<0, 0, []>; | |
1590 def SDTCallVoidProfile : SDTypeProfile<0, 1, []>; | |
1591 def SDTCallValProfile : SDTypeProfile<1, 0, []>; | |
1592 def SDTMoveParamProfile : SDTypeProfile<1, 1, []>; | |
1593 def SDTStoreRetvalProfile : SDTypeProfile<0, 2, [SDTCisInt<0>]>; | |
1594 def SDTStoreRetvalV2Profile : SDTypeProfile<0, 3, [SDTCisInt<0>]>; | |
1595 def SDTStoreRetvalV4Profile : SDTypeProfile<0, 5, [SDTCisInt<0>]>; | |
1596 def SDTPseudoUseParamProfile : SDTypeProfile<0, 1, []>; | |
1597 | |
1598 def DeclareParam : SDNode<"NVPTXISD::DeclareParam", SDTDeclareParamProfile, | |
1599 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; | |
1600 def DeclareScalarParam : SDNode<"NVPTXISD::DeclareScalarParam", | |
1601 SDTDeclareScalarParamProfile, | |
1602 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; | |
1603 def DeclareRetParam : SDNode<"NVPTXISD::DeclareRetParam", | |
1604 SDTDeclareParamProfile, | |
1605 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; | |
1606 def DeclareRet : SDNode<"NVPTXISD::DeclareRet", SDTDeclareScalarParamProfile, | |
1607 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; | |
1608 def LoadParam : SDNode<"NVPTXISD::LoadParam", SDTLoadParamProfile, | |
1609 [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>; | |
1610 def LoadParamV2 : SDNode<"NVPTXISD::LoadParamV2", SDTLoadParamV2Profile, | |
1611 [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>; | |
1612 def LoadParamV4 : SDNode<"NVPTXISD::LoadParamV4", SDTLoadParamV4Profile, | |
1613 [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>; | |
1614 def PrintCall : SDNode<"NVPTXISD::PrintCall", SDTPrintCallProfile, | |
1615 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; | |
1616 def PrintCallUni : SDNode<"NVPTXISD::PrintCallUni", SDTPrintCallUniProfile, | |
1617 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; | |
1618 def StoreParam : SDNode<"NVPTXISD::StoreParam", SDTStoreParamProfile, | |
1619 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; | |
1620 def StoreParamV2 : SDNode<"NVPTXISD::StoreParamV2", SDTStoreParamV2Profile, | |
1621 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; | |
1622 def StoreParamV4 : SDNode<"NVPTXISD::StoreParamV4", SDTStoreParamV4Profile, | |
1623 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; | |
1624 def StoreParamU32 : SDNode<"NVPTXISD::StoreParamU32", SDTStoreParam32Profile, | |
1625 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; | |
1626 def StoreParamS32 : SDNode<"NVPTXISD::StoreParamS32", SDTStoreParam32Profile, | |
1627 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; | |
1628 def CallArgBegin : SDNode<"NVPTXISD::CallArgBegin", SDTCallArgMarkProfile, | |
1629 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; | |
1630 def CallArg : SDNode<"NVPTXISD::CallArg", SDTCallArgProfile, | |
1631 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; | |
1632 def LastCallArg : SDNode<"NVPTXISD::LastCallArg", SDTCallArgProfile, | |
1633 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; | |
1634 def CallArgEnd : SDNode<"NVPTXISD::CallArgEnd", SDTCallVoidProfile, | |
1635 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; | |
1636 def CallVoid : SDNode<"NVPTXISD::CallVoid", SDTCallVoidProfile, | |
1637 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; | |
1638 def Prototype : SDNode<"NVPTXISD::Prototype", SDTCallVoidProfile, | |
1639 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; | |
1640 def CallVal : SDNode<"NVPTXISD::CallVal", SDTCallValProfile, | |
1641 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; | |
1642 def MoveParam : SDNode<"NVPTXISD::MoveParam", SDTMoveParamProfile, | |
1643 []>; | |
1644 def StoreRetval : SDNode<"NVPTXISD::StoreRetval", SDTStoreRetvalProfile, | |
1645 [SDNPHasChain, SDNPSideEffect]>; | |
1646 def StoreRetvalV2 : SDNode<"NVPTXISD::StoreRetvalV2", SDTStoreRetvalV2Profile, | |
1647 [SDNPHasChain, SDNPSideEffect]>; | |
1648 def StoreRetvalV4 : SDNode<"NVPTXISD::StoreRetvalV4", SDTStoreRetvalV4Profile, | |
1649 [SDNPHasChain, SDNPSideEffect]>; | |
1650 def PseudoUseParam : SDNode<"NVPTXISD::PseudoUseParam", | |
1651 SDTPseudoUseParamProfile, | |
1652 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; | |
1653 def RETURNNode : SDNode<"NVPTXISD::RETURN", SDTCallArgMarkProfile, | |
1654 [SDNPHasChain, SDNPSideEffect]>; | |
1655 | |
1656 class LoadParamMemInst<NVPTXRegClass regclass, string opstr> : | |
1657 NVPTXInst<(outs regclass:$dst), (ins i32imm:$b), | |
1658 !strconcat(!strconcat("ld.param", opstr), | |
1659 "\t$dst, [retval0+$b];"), | |
1660 []>; | |
1661 | |
1662 class LoadParamRegInst<NVPTXRegClass regclass, string opstr> : | |
1663 NVPTXInst<(outs regclass:$dst), (ins i32imm:$b), | |
1664 !strconcat(!strconcat("mov", opstr), | |
1665 "\t$dst, retval$b;"), | |
1666 [(set regclass:$dst, (LoadParam (i32 0), (i32 imm:$b)))]>; | |
1667 | |
1668 class LoadParamV2MemInst<NVPTXRegClass regclass, string opstr> : | |
1669 NVPTXInst<(outs regclass:$dst, regclass:$dst2), (ins i32imm:$b), | |
1670 !strconcat(!strconcat("ld.param.v2", opstr), | |
1671 "\t{{$dst, $dst2}}, [retval0+$b];"), []>; | |
1672 | |
1673 class LoadParamV4MemInst<NVPTXRegClass regclass, string opstr> : | |
1674 NVPTXInst<(outs regclass:$dst, regclass:$dst2, regclass:$dst3, | |
1675 regclass:$dst4), | |
1676 (ins i32imm:$b), | |
1677 !strconcat(!strconcat("ld.param.v4", opstr), | |
1678 "\t{{$dst, $dst2, $dst3, $dst4}}, [retval0+$b];"), []>; | |
1679 | |
1680 class StoreParamInst<NVPTXRegClass regclass, string opstr> : | |
1681 NVPTXInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b), | |
1682 !strconcat(!strconcat("st.param", opstr), | |
1683 "\t[param$a+$b], $val;"), | |
1684 []>; | |
1685 | |
1686 class StoreParamV2Inst<NVPTXRegClass regclass, string opstr> : | |
1687 NVPTXInst<(outs), (ins regclass:$val, regclass:$val2, | |
1688 i32imm:$a, i32imm:$b), | |
1689 !strconcat(!strconcat("st.param.v2", opstr), | |
1690 "\t[param$a+$b], {{$val, $val2}};"), | |
1691 []>; | |
1692 | |
1693 class StoreParamV4Inst<NVPTXRegClass regclass, string opstr> : | |
1694 NVPTXInst<(outs), (ins regclass:$val, regclass:$val1, regclass:$val2, | |
1695 regclass:$val3, i32imm:$a, i32imm:$b), | |
1696 !strconcat(!strconcat("st.param.v4", opstr), | |
1697 "\t[param$a+$b], {{$val, $val2, $val3, $val4}};"), | |
1698 []>; | |
1699 | |
1700 class StoreRetvalInst<NVPTXRegClass regclass, string opstr> : | |
1701 NVPTXInst<(outs), (ins regclass:$val, i32imm:$a), | |
1702 !strconcat(!strconcat("st.param", opstr), | |
1703 "\t[func_retval0+$a], $val;"), | |
1704 []>; | |
1705 | |
1706 class StoreRetvalV2Inst<NVPTXRegClass regclass, string opstr> : | |
1707 NVPTXInst<(outs), (ins regclass:$val, regclass:$val2, i32imm:$a), | |
1708 !strconcat(!strconcat("st.param.v2", opstr), | |
1709 "\t[func_retval0+$a], {{$val, $val2}};"), | |
1710 []>; | |
1711 | |
1712 class StoreRetvalV4Inst<NVPTXRegClass regclass, string opstr> : | |
1713 NVPTXInst<(outs), | |
1714 (ins regclass:$val, regclass:$val2, regclass:$val3, | |
1715 regclass:$val4, i32imm:$a), | |
1716 !strconcat(!strconcat("st.param.v4", opstr), | |
1717 "\t[func_retval0+$a], {{$val, $val2, $val3, $val4}};"), | |
1718 []>; | |
1719 | |
1720 def PrintCallRetInst1 : NVPTXInst<(outs), (ins), | |
1721 "call (retval0), ", | |
1722 [(PrintCall (i32 1))]>; | |
1723 def PrintCallRetInst2 : NVPTXInst<(outs), (ins), | |
1724 "call (retval0, retval1), ", | |
1725 [(PrintCall (i32 2))]>; | |
1726 def PrintCallRetInst3 : NVPTXInst<(outs), (ins), | |
1727 "call (retval0, retval1, retval2), ", | |
1728 [(PrintCall (i32 3))]>; | |
1729 def PrintCallRetInst4 : NVPTXInst<(outs), (ins), | |
1730 "call (retval0, retval1, retval2, retval3), ", | |
1731 [(PrintCall (i32 4))]>; | |
1732 def PrintCallRetInst5 : NVPTXInst<(outs), (ins), | |
1733 "call (retval0, retval1, retval2, retval3, retval4), ", | |
1734 [(PrintCall (i32 5))]>; | |
1735 def PrintCallRetInst6 : NVPTXInst<(outs), (ins), | |
1736 "call (retval0, retval1, retval2, retval3, retval4, retval5), ", | |
1737 [(PrintCall (i32 6))]>; | |
1738 def PrintCallRetInst7 : NVPTXInst<(outs), (ins), | |
1739 "call (retval0, retval1, retval2, retval3, retval4, retval5, retval6), ", | |
1740 [(PrintCall (i32 7))]>; | |
1741 def PrintCallRetInst8 : NVPTXInst<(outs), (ins), | |
1742 !strconcat("call (retval0, retval1, retval2, retval3, retval4", | |
1743 ", retval5, retval6, retval7), "), | |
1744 [(PrintCall (i32 8))]>; | |
1745 | |
1746 def PrintCallNoRetInst : NVPTXInst<(outs), (ins), "call ", | |
1747 [(PrintCall (i32 0))]>; | |
1748 | |
1749 def PrintCallUniRetInst1 : NVPTXInst<(outs), (ins), | |
1750 "call.uni (retval0), ", | |
1751 [(PrintCallUni (i32 1))]>; | |
1752 def PrintCallUniRetInst2 : NVPTXInst<(outs), (ins), | |
1753 "call.uni (retval0, retval1), ", | |
1754 [(PrintCallUni (i32 2))]>; | |
1755 def PrintCallUniRetInst3 : NVPTXInst<(outs), (ins), | |
1756 "call.uni (retval0, retval1, retval2), ", | |
1757 [(PrintCallUni (i32 3))]>; | |
1758 def PrintCallUniRetInst4 : NVPTXInst<(outs), (ins), | |
1759 "call.uni (retval0, retval1, retval2, retval3), ", | |
1760 [(PrintCallUni (i32 4))]>; | |
1761 def PrintCallUniRetInst5 : NVPTXInst<(outs), (ins), | |
1762 "call.uni (retval0, retval1, retval2, retval3, retval4), ", | |
1763 [(PrintCallUni (i32 5))]>; | |
1764 def PrintCallUniRetInst6 : NVPTXInst<(outs), (ins), | |
1765 "call.uni (retval0, retval1, retval2, retval3, retval4, retval5), ", | |
1766 [(PrintCallUni (i32 6))]>; | |
1767 def PrintCallUniRetInst7 : NVPTXInst<(outs), (ins), | |
1768 "call.uni (retval0, retval1, retval2, retval3, retval4, retval5, retval6), ", | |
1769 [(PrintCallUni (i32 7))]>; | |
1770 def PrintCallUniRetInst8 : NVPTXInst<(outs), (ins), | |
1771 !strconcat("call.uni (retval0, retval1, retval2, retval3, retval4", | |
1772 ", retval5, retval6, retval7), "), | |
1773 [(PrintCallUni (i32 8))]>; | |
1774 | |
1775 def PrintCallUniNoRetInst : NVPTXInst<(outs), (ins), "call.uni ", | |
1776 [(PrintCallUni (i32 0))]>; | |
1777 | |
1778 def LoadParamMemI64 : LoadParamMemInst<Int64Regs, ".b64">; | |
1779 def LoadParamMemI32 : LoadParamMemInst<Int32Regs, ".b32">; | |
1780 def LoadParamMemI16 : LoadParamMemInst<Int16Regs, ".b16">; | |
1781 def LoadParamMemI8 : LoadParamMemInst<Int16Regs, ".b8">; | |
1782 def LoadParamMemV2I64 : LoadParamV2MemInst<Int64Regs, ".b64">; | |
1783 def LoadParamMemV2I32 : LoadParamV2MemInst<Int32Regs, ".b32">; | |
1784 def LoadParamMemV2I16 : LoadParamV2MemInst<Int16Regs, ".b16">; | |
1785 def LoadParamMemV2I8 : LoadParamV2MemInst<Int16Regs, ".b8">; | |
1786 def LoadParamMemV4I32 : LoadParamV4MemInst<Int32Regs, ".b32">; | |
1787 def LoadParamMemV4I16 : LoadParamV4MemInst<Int16Regs, ".b16">; | |
1788 def LoadParamMemV4I8 : LoadParamV4MemInst<Int16Regs, ".b8">; | |
1789 def LoadParamMemF32 : LoadParamMemInst<Float32Regs, ".f32">; | |
1790 def LoadParamMemF64 : LoadParamMemInst<Float64Regs, ".f64">; | |
1791 def LoadParamMemV2F32 : LoadParamV2MemInst<Float32Regs, ".f32">; | |
1792 def LoadParamMemV2F64 : LoadParamV2MemInst<Float64Regs, ".f64">; | |
1793 def LoadParamMemV4F32 : LoadParamV4MemInst<Float32Regs, ".f32">; | |
1794 | |
1795 def StoreParamI64 : StoreParamInst<Int64Regs, ".b64">; | |
1796 def StoreParamI32 : StoreParamInst<Int32Regs, ".b32">; | |
1797 | |
1798 def StoreParamI16 : StoreParamInst<Int16Regs, ".b16">; | |
1799 def StoreParamI8 : StoreParamInst<Int16Regs, ".b8">; | |
1800 def StoreParamV2I64 : StoreParamV2Inst<Int64Regs, ".b64">; | |
1801 def StoreParamV2I32 : StoreParamV2Inst<Int32Regs, ".b32">; | |
1802 def StoreParamV2I16 : StoreParamV2Inst<Int16Regs, ".b16">; | |
1803 def StoreParamV2I8 : StoreParamV2Inst<Int16Regs, ".b8">; | |
1804 | |
1805 // FIXME: StoreParamV4Inst crashes llvm-tblgen :( | |
1806 //def StoreParamV4I32 : StoreParamV4Inst<Int32Regs, ".b32">; | |
1807 def StoreParamV4I32 : NVPTXInst<(outs), (ins Int32Regs:$val, Int32Regs:$val2, | |
1808 Int32Regs:$val3, Int32Regs:$val4, | |
1809 i32imm:$a, i32imm:$b), | |
1810 "st.param.b32\t[param$a+$b], {{$val, $val2, $val3, $val4}};", | |
1811 []>; | |
1812 | |
1813 def StoreParamV4I16 : NVPTXInst<(outs), (ins Int16Regs:$val, Int16Regs:$val2, | |
1814 Int16Regs:$val3, Int16Regs:$val4, | |
1815 i32imm:$a, i32imm:$b), | |
1816 "st.param.v4.b16\t[param$a+$b], {{$val, $val2, $val3, $val4}};", | |
1817 []>; | |
1818 | |
1819 def StoreParamV4I8 : NVPTXInst<(outs), (ins Int16Regs:$val, Int16Regs:$val2, | |
1820 Int16Regs:$val3, Int16Regs:$val4, | |
1821 i32imm:$a, i32imm:$b), | |
1822 "st.param.v4.b8\t[param$a+$b], {{$val, $val2, $val3, $val4}};", | |
1823 []>; | |
1824 | |
1825 def StoreParamF32 : StoreParamInst<Float32Regs, ".f32">; | |
1826 def StoreParamF64 : StoreParamInst<Float64Regs, ".f64">; | |
1827 def StoreParamV2F32 : StoreParamV2Inst<Float32Regs, ".f32">; | |
1828 def StoreParamV2F64 : StoreParamV2Inst<Float64Regs, ".f64">; | |
1829 // FIXME: StoreParamV4Inst crashes llvm-tblgen :( | |
1830 //def StoreParamV4F32 : StoreParamV4Inst<Float32Regs, ".f32">; | |
1831 def StoreParamV4F32 : NVPTXInst<(outs), | |
1832 (ins Float32Regs:$val, Float32Regs:$val2, | |
1833 Float32Regs:$val3, Float32Regs:$val4, | |
1834 i32imm:$a, i32imm:$b), | |
1835 "st.param.v4.f32\t[param$a+$b], {{$val, $val2, $val3, $val4}};", | |
1836 []>; | |
1837 | |
1838 | |
1839 def StoreRetvalI64 : StoreRetvalInst<Int64Regs, ".b64">; | |
1840 def StoreRetvalI32 : StoreRetvalInst<Int32Regs, ".b32">; | |
1841 def StoreRetvalI16 : StoreRetvalInst<Int16Regs, ".b16">; | |
1842 def StoreRetvalI8 : StoreRetvalInst<Int16Regs, ".b8">; | |
1843 def StoreRetvalV2I64 : StoreRetvalV2Inst<Int64Regs, ".b64">; | |
1844 def StoreRetvalV2I32 : StoreRetvalV2Inst<Int32Regs, ".b32">; | |
1845 def StoreRetvalV2I16 : StoreRetvalV2Inst<Int16Regs, ".b16">; | |
1846 def StoreRetvalV2I8 : StoreRetvalV2Inst<Int16Regs, ".b8">; | |
1847 def StoreRetvalV4I32 : StoreRetvalV4Inst<Int32Regs, ".b32">; | |
1848 def StoreRetvalV4I16 : StoreRetvalV4Inst<Int16Regs, ".b16">; | |
1849 def StoreRetvalV4I8 : StoreRetvalV4Inst<Int16Regs, ".b8">; | |
1850 | |
1851 def StoreRetvalF64 : StoreRetvalInst<Float64Regs, ".f64">; | |
1852 def StoreRetvalF32 : StoreRetvalInst<Float32Regs, ".f32">; | |
1853 def StoreRetvalV2F64 : StoreRetvalV2Inst<Float64Regs, ".f64">; | |
1854 def StoreRetvalV2F32 : StoreRetvalV2Inst<Float32Regs, ".f32">; | |
1855 def StoreRetvalV4F32 : StoreRetvalV4Inst<Float32Regs, ".f32">; | |
1856 | |
1857 def CallArgBeginInst : NVPTXInst<(outs), (ins), "(", [(CallArgBegin)]>; | |
1858 def CallArgEndInst1 : NVPTXInst<(outs), (ins), ");", [(CallArgEnd (i32 1))]>; | |
1859 def CallArgEndInst0 : NVPTXInst<(outs), (ins), ")", [(CallArgEnd (i32 0))]>; | |
1860 def RETURNInst : NVPTXInst<(outs), (ins), "ret;", [(RETURNNode)]>; | |
1861 | |
1862 class CallArgInst<NVPTXRegClass regclass> : | |
1863 NVPTXInst<(outs), (ins regclass:$a), "$a, ", | |
1864 [(CallArg (i32 0), regclass:$a)]>; | |
1865 | |
1866 class LastCallArgInst<NVPTXRegClass regclass> : | |
1867 NVPTXInst<(outs), (ins regclass:$a), "$a", | |
1868 [(LastCallArg (i32 0), regclass:$a)]>; | |
1869 | |
1870 def CallArgI64 : CallArgInst<Int64Regs>; | |
1871 def CallArgI32 : CallArgInst<Int32Regs>; | |
1872 def CallArgI16 : CallArgInst<Int16Regs>; | |
1873 | |
1874 def CallArgF64 : CallArgInst<Float64Regs>; | |
1875 def CallArgF32 : CallArgInst<Float32Regs>; | |
1876 | |
1877 def LastCallArgI64 : LastCallArgInst<Int64Regs>; | |
1878 def LastCallArgI32 : LastCallArgInst<Int32Regs>; | |
1879 def LastCallArgI16 : LastCallArgInst<Int16Regs>; | |
1880 | |
1881 def LastCallArgF64 : LastCallArgInst<Float64Regs>; | |
1882 def LastCallArgF32 : LastCallArgInst<Float32Regs>; | |
1883 | |
1884 def CallArgI32imm : NVPTXInst<(outs), (ins i32imm:$a), "$a, ", | |
1885 [(CallArg (i32 0), (i32 imm:$a))]>; | |
1886 def LastCallArgI32imm : NVPTXInst<(outs), (ins i32imm:$a), "$a", | |
1887 [(LastCallArg (i32 0), (i32 imm:$a))]>; | |
1888 | |
1889 def CallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a, ", | |
1890 [(CallArg (i32 1), (i32 imm:$a))]>; | |
1891 def LastCallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a", | |
1892 [(LastCallArg (i32 1), (i32 imm:$a))]>; | |
1893 | |
1894 def CallVoidInst : NVPTXInst<(outs), (ins imem:$addr), | |
1895 "$addr, ", | |
1896 [(CallVoid (Wrapper tglobaladdr:$addr))]>; | |
1897 def CallVoidInstReg : NVPTXInst<(outs), (ins Int32Regs:$addr), | |
1898 "$addr, ", | |
1899 [(CallVoid Int32Regs:$addr)]>; | |
1900 def CallVoidInstReg64 : NVPTXInst<(outs), (ins Int64Regs:$addr), | |
1901 "$addr, ", | |
1902 [(CallVoid Int64Regs:$addr)]>; | |
1903 def PrototypeInst : NVPTXInst<(outs), (ins i32imm:$val), | |
1904 ", prototype_$val;", | |
1905 [(Prototype (i32 imm:$val))]>; | |
1906 | |
1907 def DeclareRetMemInst : NVPTXInst<(outs), | |
1908 (ins i32imm:$align, i32imm:$size, i32imm:$num), | |
1909 ".param .align $align .b8 retval$num[$size];", | |
1910 [(DeclareRetParam (i32 imm:$align), (i32 imm:$size), (i32 imm:$num))]>; | |
1911 def DeclareRetScalarInst : NVPTXInst<(outs), (ins i32imm:$size, i32imm:$num), | |
1912 ".param .b$size retval$num;", | |
1913 [(DeclareRet (i32 1), (i32 imm:$size), (i32 imm:$num))]>; | |
1914 def DeclareRetRegInst : NVPTXInst<(outs), (ins i32imm:$size, i32imm:$num), | |
1915 ".reg .b$size retval$num;", | |
1916 [(DeclareRet (i32 2), (i32 imm:$size), (i32 imm:$num))]>; | |
1917 | |
1918 def DeclareParamInst : NVPTXInst<(outs), | |
1919 (ins i32imm:$align, i32imm:$a, i32imm:$size), | |
1920 ".param .align $align .b8 param$a[$size];", | |
1921 [(DeclareParam (i32 imm:$align), (i32 imm:$a), (i32 imm:$size))]>; | |
1922 def DeclareScalarParamInst : NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size), | |
1923 ".param .b$size param$a;", | |
1924 [(DeclareScalarParam (i32 imm:$a), (i32 imm:$size), (i32 0))]>; | |
1925 def DeclareScalarRegInst : NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size), | |
1926 ".reg .b$size param$a;", | |
1927 [(DeclareScalarParam (i32 imm:$a), (i32 imm:$size), (i32 1))]>; | |
1928 | |
1929 class MoveParamInst<NVPTXRegClass regclass, string asmstr> : | |
1930 NVPTXInst<(outs regclass:$dst), (ins regclass:$src), | |
1931 !strconcat(!strconcat("mov", asmstr), "\t$dst, $src;"), | |
1932 [(set regclass:$dst, (MoveParam regclass:$src))]>; | |
1933 | |
1934 def MoveParamI64 : MoveParamInst<Int64Regs, ".b64">; | |
1935 def MoveParamI32 : MoveParamInst<Int32Regs, ".b32">; | |
1936 def MoveParamI16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src), | |
1937 "cvt.u16.u32\t$dst, $src;", | |
1938 [(set Int16Regs:$dst, (MoveParam Int16Regs:$src))]>; | |
1939 def MoveParamF64 : MoveParamInst<Float64Regs, ".f64">; | |
1940 def MoveParamF32 : MoveParamInst<Float32Regs, ".f32">; | |
1941 | |
1942 class PseudoUseParamInst<NVPTXRegClass regclass> : | |
1943 NVPTXInst<(outs), (ins regclass:$src), | |
1944 "// Pseudo use of $src", | |
1945 [(PseudoUseParam regclass:$src)]>; | |
1946 | |
1947 def PseudoUseParamI64 : PseudoUseParamInst<Int64Regs>; | |
1948 def PseudoUseParamI32 : PseudoUseParamInst<Int32Regs>; | |
1949 def PseudoUseParamI16 : PseudoUseParamInst<Int16Regs>; | |
1950 def PseudoUseParamF64 : PseudoUseParamInst<Float64Regs>; | |
1951 def PseudoUseParamF32 : PseudoUseParamInst<Float32Regs>; | |
1952 | |
1953 | |
1954 // | |
1955 // Load / Store Handling | |
1956 // | |
1957 multiclass LD<NVPTXRegClass regclass> { | |
1958 def _avar : NVPTXInst<(outs regclass:$dst), | |
1959 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, | |
1960 i32imm:$fromWidth, imem:$addr), | |
1961 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", | |
1962 "$fromWidth \t$dst, [$addr];"), []>; | |
1963 def _areg : NVPTXInst<(outs regclass:$dst), | |
1964 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, | |
1965 i32imm:$fromWidth, Int32Regs:$addr), | |
1966 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", | |
1967 "$fromWidth \t$dst, [$addr];"), []>; | |
1968 def _areg_64 : NVPTXInst<(outs regclass:$dst), | |
1969 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, | |
1970 i32imm:$fromWidth, Int64Regs:$addr), | |
1971 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth", | |
1972 " \t$dst, [$addr];"), []>; | |
1973 def _ari : NVPTXInst<(outs regclass:$dst), | |
1974 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, | |
1975 i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset), | |
1976 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", | |
1977 "$fromWidth \t$dst, [$addr+$offset];"), []>; | |
1978 def _ari_64 : NVPTXInst<(outs regclass:$dst), | |
1979 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, | |
1980 i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset), | |
1981 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth", | |
1982 " \t$dst, [$addr+$offset];"), []>; | |
1983 def _asi : NVPTXInst<(outs regclass:$dst), | |
1984 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, | |
1985 i32imm:$fromWidth, imem:$addr, i32imm:$offset), | |
1986 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", | |
1987 "$fromWidth \t$dst, [$addr+$offset];"), []>; | |
1988 } | |
1989 | |
1990 let mayLoad=1, neverHasSideEffects=1 in { | |
1991 defm LD_i8 : LD<Int16Regs>; | |
1992 defm LD_i16 : LD<Int16Regs>; | |
1993 defm LD_i32 : LD<Int32Regs>; | |
1994 defm LD_i64 : LD<Int64Regs>; | |
1995 defm LD_f32 : LD<Float32Regs>; | |
1996 defm LD_f64 : LD<Float64Regs>; | |
1997 } | |
1998 | |
1999 multiclass ST<NVPTXRegClass regclass> { | |
2000 def _avar : NVPTXInst<(outs), | |
2001 (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, | |
2002 LdStCode:$Sign, i32imm:$toWidth, imem:$addr), | |
2003 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth", | |
2004 " \t[$addr], $src;"), []>; | |
2005 def _areg : NVPTXInst<(outs), | |
2006 (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, | |
2007 LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr), | |
2008 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth", | |
2009 " \t[$addr], $src;"), []>; | |
2010 def _areg_64 : NVPTXInst<(outs), | |
2011 (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, | |
2012 LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr), | |
2013 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth ", | |
2014 "\t[$addr], $src;"), []>; | |
2015 def _ari : NVPTXInst<(outs), | |
2016 (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, | |
2017 LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr, i32imm:$offset), | |
2018 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth", | |
2019 " \t[$addr+$offset], $src;"), []>; | |
2020 def _ari_64 : NVPTXInst<(outs), | |
2021 (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, | |
2022 LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr, i32imm:$offset), | |
2023 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth ", | |
2024 "\t[$addr+$offset], $src;"), []>; | |
2025 def _asi : NVPTXInst<(outs), | |
2026 (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, | |
2027 LdStCode:$Sign, i32imm:$toWidth, imem:$addr, i32imm:$offset), | |
2028 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth", | |
2029 " \t[$addr+$offset], $src;"), []>; | |
2030 } | |
2031 | |
2032 let mayStore=1, neverHasSideEffects=1 in { | |
2033 defm ST_i8 : ST<Int16Regs>; | |
2034 defm ST_i16 : ST<Int16Regs>; | |
2035 defm ST_i32 : ST<Int32Regs>; | |
2036 defm ST_i64 : ST<Int64Regs>; | |
2037 defm ST_f32 : ST<Float32Regs>; | |
2038 defm ST_f64 : ST<Float64Regs>; | |
2039 } | |
2040 | |
2041 // The following is used only in and after vector elementizations. | |
2042 // Vector elementization happens at the machine instruction level, so the | |
2043 // following instruction | |
2044 // never appears in the DAG. | |
2045 multiclass LD_VEC<NVPTXRegClass regclass> { | |
2046 def _v2_avar : NVPTXInst<(outs regclass:$dst1, regclass:$dst2), | |
2047 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, | |
2048 i32imm:$fromWidth, imem:$addr), | |
2049 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", | |
2050 "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>; | |
2051 def _v2_areg : NVPTXInst<(outs regclass:$dst1, regclass:$dst2), | |
2052 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, | |
2053 i32imm:$fromWidth, Int32Regs:$addr), | |
2054 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", | |
2055 "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>; | |
2056 def _v2_areg_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2), | |
2057 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, | |
2058 i32imm:$fromWidth, Int64Regs:$addr), | |
2059 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", | |
2060 "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>; | |
2061 def _v2_ari : NVPTXInst<(outs regclass:$dst1, regclass:$dst2), | |
2062 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, | |
2063 i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset), | |
2064 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", | |
2065 "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>; | |
2066 def _v2_ari_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2), | |
2067 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, | |
2068 i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset), | |
2069 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", | |
2070 "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>; | |
2071 def _v2_asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2), | |
2072 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, | |
2073 i32imm:$fromWidth, imem:$addr, i32imm:$offset), | |
2074 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", | |
2075 "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>; | |
2076 def _v4_avar : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, | |
2077 regclass:$dst3, regclass:$dst4), | |
2078 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, | |
2079 i32imm:$fromWidth, imem:$addr), | |
2080 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", | |
2081 "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>; | |
2082 def _v4_areg : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, | |
2083 regclass:$dst4), | |
2084 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, | |
2085 i32imm:$fromWidth, Int32Regs:$addr), | |
2086 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", | |
2087 "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>; | |
2088 def _v4_areg_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, | |
2089 regclass:$dst3, regclass:$dst4), | |
2090 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, | |
2091 i32imm:$fromWidth, Int64Regs:$addr), | |
2092 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", | |
2093 "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>; | |
2094 def _v4_ari : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, | |
2095 regclass:$dst4), | |
2096 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, | |
2097 i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset), | |
2098 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", | |
2099 "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"), | |
2100 []>; | |
2101 def _v4_ari_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, | |
2102 regclass:$dst3, regclass:$dst4), | |
2103 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, | |
2104 i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset), | |
2105 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", | |
2106 "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"), | |
2107 []>; | |
2108 def _v4_asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, | |
2109 regclass:$dst4), | |
2110 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, | |
2111 i32imm:$fromWidth, imem:$addr, i32imm:$offset), | |
2112 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", | |
2113 "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"), | |
2114 []>; | |
2115 } | |
2116 let mayLoad=1, neverHasSideEffects=1 in { | |
2117 defm LDV_i8 : LD_VEC<Int16Regs>; | |
2118 defm LDV_i16 : LD_VEC<Int16Regs>; | |
2119 defm LDV_i32 : LD_VEC<Int32Regs>; | |
2120 defm LDV_i64 : LD_VEC<Int64Regs>; | |
2121 defm LDV_f32 : LD_VEC<Float32Regs>; | |
2122 defm LDV_f64 : LD_VEC<Float64Regs>; | |
2123 } | |
2124 | |
2125 multiclass ST_VEC<NVPTXRegClass regclass> { | |
2126 def _v2_avar : NVPTXInst<(outs), | |
2127 (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp, | |
2128 LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr), | |
2129 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", | |
2130 "$fromWidth \t[$addr], {{$src1, $src2}};"), []>; | |
2131 def _v2_areg : NVPTXInst<(outs), | |
2132 (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp, | |
2133 LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr), | |
2134 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", | |
2135 "$fromWidth \t[$addr], {{$src1, $src2}};"), []>; | |
2136 def _v2_areg_64 : NVPTXInst<(outs), | |
2137 (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp, | |
2138 LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr), | |
2139 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", | |
2140 "$fromWidth \t[$addr], {{$src1, $src2}};"), []>; | |
2141 def _v2_ari : NVPTXInst<(outs), | |
2142 (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp, | |
2143 LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, | |
2144 i32imm:$offset), | |
2145 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", | |
2146 "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>; | |
2147 def _v2_ari_64 : NVPTXInst<(outs), | |
2148 (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp, | |
2149 LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, | |
2150 i32imm:$offset), | |
2151 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", | |
2152 "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>; | |
2153 def _v2_asi : NVPTXInst<(outs), | |
2154 (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp, | |
2155 LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, | |
2156 i32imm:$offset), | |
2157 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", | |
2158 "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>; | |
2159 def _v4_avar : NVPTXInst<(outs), | |
2160 (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, | |
2161 LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, | |
2162 i32imm:$fromWidth, imem:$addr), | |
2163 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", | |
2164 "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>; | |
2165 def _v4_areg : NVPTXInst<(outs), | |
2166 (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, | |
2167 LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, | |
2168 i32imm:$fromWidth, Int32Regs:$addr), | |
2169 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", | |
2170 "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>; | |
2171 def _v4_areg_64 : NVPTXInst<(outs), | |
2172 (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, | |
2173 LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, | |
2174 i32imm:$fromWidth, Int64Regs:$addr), | |
2175 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", | |
2176 "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>; | |
2177 def _v4_ari : NVPTXInst<(outs), | |
2178 (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, | |
2179 LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, | |
2180 i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset), | |
2181 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", | |
2182 "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"), | |
2183 []>; | |
2184 def _v4_ari_64 : NVPTXInst<(outs), | |
2185 (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, | |
2186 LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, | |
2187 i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset), | |
2188 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", | |
2189 "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"), | |
2190 []>; | |
2191 def _v4_asi : NVPTXInst<(outs), | |
2192 (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, | |
2193 LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, | |
2194 i32imm:$fromWidth, imem:$addr, i32imm:$offset), | |
2195 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", | |
2196 "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"), | |
2197 []>; | |
2198 } | |
2199 let mayStore=1, neverHasSideEffects=1 in { | |
2200 defm STV_i8 : ST_VEC<Int16Regs>; | |
2201 defm STV_i16 : ST_VEC<Int16Regs>; | |
2202 defm STV_i32 : ST_VEC<Int32Regs>; | |
2203 defm STV_i64 : ST_VEC<Int64Regs>; | |
2204 defm STV_f32 : ST_VEC<Float32Regs>; | |
2205 defm STV_f64 : ST_VEC<Float64Regs>; | |
2206 } | |
2207 | |
2208 | |
2209 //---- Conversion ---- | |
2210 | |
2211 class F_BITCONVERT<string SzStr, NVPTXRegClass regclassIn, | |
2212 NVPTXRegClass regclassOut> : | |
2213 NVPTXInst<(outs regclassOut:$d), (ins regclassIn:$a), | |
2214 !strconcat("mov.b", !strconcat(SzStr, " \t $d, $a;")), | |
2215 [(set regclassOut:$d, (bitconvert regclassIn:$a))]>; | |
2216 | |
2217 def BITCONVERT_32_I2F : F_BITCONVERT<"32", Int32Regs, Float32Regs>; | |
2218 def BITCONVERT_32_F2I : F_BITCONVERT<"32", Float32Regs, Int32Regs>; | |
2219 def BITCONVERT_64_I2F : F_BITCONVERT<"64", Int64Regs, Float64Regs>; | |
2220 def BITCONVERT_64_F2I : F_BITCONVERT<"64", Float64Regs, Int64Regs>; | |
2221 | |
2222 // NOTE: pred->fp are currently sub-optimal due to an issue in TableGen where | |
2223 // we cannot specify floating-point literals in isel patterns. Therefore, we | |
2224 // use an integer selp to select either 1 or 0 and then cvt to floating-point. | |
2225 | |
2226 // sint -> f32 | |
2227 def : Pat<(f32 (sint_to_fp Int1Regs:$a)), | |
2228 (CVT_f32_s32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>; | |
2229 def : Pat<(f32 (sint_to_fp Int16Regs:$a)), | |
2230 (CVT_f32_s16 Int16Regs:$a, CvtRN)>; | |
2231 def : Pat<(f32 (sint_to_fp Int32Regs:$a)), | |
2232 (CVT_f32_s32 Int32Regs:$a, CvtRN)>; | |
2233 def : Pat<(f32 (sint_to_fp Int64Regs:$a)), | |
2234 (CVT_f32_s64 Int64Regs:$a, CvtRN)>; | |
2235 | |
2236 // uint -> f32 | |
2237 def : Pat<(f32 (uint_to_fp Int1Regs:$a)), | |
2238 (CVT_f32_u32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>; | |
2239 def : Pat<(f32 (uint_to_fp Int16Regs:$a)), | |
2240 (CVT_f32_u16 Int16Regs:$a, CvtRN)>; | |
2241 def : Pat<(f32 (uint_to_fp Int32Regs:$a)), | |
2242 (CVT_f32_u32 Int32Regs:$a, CvtRN)>; | |
2243 def : Pat<(f32 (uint_to_fp Int64Regs:$a)), | |
2244 (CVT_f32_u64 Int64Regs:$a, CvtRN)>; | |
2245 | |
2246 // sint -> f64 | |
2247 def : Pat<(f64 (sint_to_fp Int1Regs:$a)), | |
2248 (CVT_f64_s32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>; | |
2249 def : Pat<(f64 (sint_to_fp Int16Regs:$a)), | |
2250 (CVT_f64_s16 Int16Regs:$a, CvtRN)>; | |
2251 def : Pat<(f64 (sint_to_fp Int32Regs:$a)), | |
2252 (CVT_f64_s32 Int32Regs:$a, CvtRN)>; | |
2253 def : Pat<(f64 (sint_to_fp Int64Regs:$a)), | |
2254 (CVT_f64_s64 Int64Regs:$a, CvtRN)>; | |
2255 | |
2256 // uint -> f64 | |
2257 def : Pat<(f64 (uint_to_fp Int1Regs:$a)), | |
2258 (CVT_f64_u32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>; | |
2259 def : Pat<(f64 (uint_to_fp Int16Regs:$a)), | |
2260 (CVT_f64_u16 Int16Regs:$a, CvtRN)>; | |
2261 def : Pat<(f64 (uint_to_fp Int32Regs:$a)), | |
2262 (CVT_f64_u32 Int32Regs:$a, CvtRN)>; | |
2263 def : Pat<(f64 (uint_to_fp Int64Regs:$a)), | |
2264 (CVT_f64_u64 Int64Regs:$a, CvtRN)>; | |
2265 | |
2266 | |
2267 // f32 -> sint | |
2268 def : Pat<(i1 (fp_to_sint Float32Regs:$a)), | |
2269 (SETP_b32ri (BITCONVERT_32_F2I Float32Regs:$a), 0, CmpEQ)>; | |
2270 def : Pat<(i16 (fp_to_sint Float32Regs:$a)), | |
2271 (CVT_s16_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>; | |
2272 def : Pat<(i16 (fp_to_sint Float32Regs:$a)), | |
2273 (CVT_s16_f32 Float32Regs:$a, CvtRZI)>; | |
2274 def : Pat<(i32 (fp_to_sint Float32Regs:$a)), | |
2275 (CVT_s32_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>; | |
2276 def : Pat<(i32 (fp_to_sint Float32Regs:$a)), | |
2277 (CVT_s32_f32 Float32Regs:$a, CvtRZI)>; | |
2278 def : Pat<(i64 (fp_to_sint Float32Regs:$a)), | |
2279 (CVT_s64_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>; | |
2280 def : Pat<(i64 (fp_to_sint Float32Regs:$a)), | |
2281 (CVT_s64_f32 Float32Regs:$a, CvtRZI)>; | |
2282 | |
2283 // f32 -> uint | |
2284 def : Pat<(i1 (fp_to_uint Float32Regs:$a)), | |
2285 (SETP_b32ri (BITCONVERT_32_F2I Float32Regs:$a), 0, CmpEQ)>; | |
2286 def : Pat<(i16 (fp_to_uint Float32Regs:$a)), | |
2287 (CVT_u16_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>; | |
2288 def : Pat<(i16 (fp_to_uint Float32Regs:$a)), | |
2289 (CVT_u16_f32 Float32Regs:$a, CvtRZI)>; | |
2290 def : Pat<(i32 (fp_to_uint Float32Regs:$a)), | |
2291 (CVT_u32_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>; | |
2292 def : Pat<(i32 (fp_to_uint Float32Regs:$a)), | |
2293 (CVT_u32_f32 Float32Regs:$a, CvtRZI)>; | |
2294 def : Pat<(i64 (fp_to_uint Float32Regs:$a)), | |
2295 (CVT_u64_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>; | |
2296 def : Pat<(i64 (fp_to_uint Float32Regs:$a)), | |
2297 (CVT_u64_f32 Float32Regs:$a, CvtRZI)>; | |
2298 | |
2299 // f64 -> sint | |
2300 def : Pat<(i1 (fp_to_sint Float64Regs:$a)), | |
2301 (SETP_b64ri (BITCONVERT_64_F2I Float64Regs:$a), 0, CmpEQ)>; | |
2302 def : Pat<(i16 (fp_to_sint Float64Regs:$a)), | |
2303 (CVT_s16_f64 Float64Regs:$a, CvtRZI)>; | |
2304 def : Pat<(i32 (fp_to_sint Float64Regs:$a)), | |
2305 (CVT_s32_f64 Float64Regs:$a, CvtRZI)>; | |
2306 def : Pat<(i64 (fp_to_sint Float64Regs:$a)), | |
2307 (CVT_s64_f64 Float64Regs:$a, CvtRZI)>; | |
2308 | |
2309 // f64 -> uint | |
2310 def : Pat<(i1 (fp_to_uint Float64Regs:$a)), | |
2311 (SETP_b64ri (BITCONVERT_64_F2I Float64Regs:$a), 0, CmpEQ)>; | |
2312 def : Pat<(i16 (fp_to_uint Float64Regs:$a)), | |
2313 (CVT_u16_f64 Float64Regs:$a, CvtRZI)>; | |
2314 def : Pat<(i32 (fp_to_uint Float64Regs:$a)), | |
2315 (CVT_u32_f64 Float64Regs:$a, CvtRZI)>; | |
2316 def : Pat<(i64 (fp_to_uint Float64Regs:$a)), | |
2317 (CVT_u64_f64 Float64Regs:$a, CvtRZI)>; | |
2318 | |
2319 // sext i1 | |
2320 def : Pat<(i16 (sext Int1Regs:$a)), | |
2321 (SELP_s16ii -1, 0, Int1Regs:$a)>; | |
2322 def : Pat<(i32 (sext Int1Regs:$a)), | |
2323 (SELP_s32ii -1, 0, Int1Regs:$a)>; | |
2324 def : Pat<(i64 (sext Int1Regs:$a)), | |
2325 (SELP_s64ii -1, 0, Int1Regs:$a)>; | |
2326 | |
2327 // zext i1 | |
2328 def : Pat<(i16 (zext Int1Regs:$a)), | |
2329 (SELP_u16ii 1, 0, Int1Regs:$a)>; | |
2330 def : Pat<(i32 (zext Int1Regs:$a)), | |
2331 (SELP_u32ii 1, 0, Int1Regs:$a)>; | |
2332 def : Pat<(i64 (zext Int1Regs:$a)), | |
2333 (SELP_u64ii 1, 0, Int1Regs:$a)>; | |
2334 | |
2335 // anyext i1 | |
2336 def : Pat<(i16 (anyext Int1Regs:$a)), | |
2337 (SELP_u16ii -1, 0, Int1Regs:$a)>; | |
2338 def : Pat<(i32 (anyext Int1Regs:$a)), | |
2339 (SELP_u32ii -1, 0, Int1Regs:$a)>; | |
2340 def : Pat<(i64 (anyext Int1Regs:$a)), | |
2341 (SELP_u64ii -1, 0, Int1Regs:$a)>; | |
2342 | |
2343 // sext i16 | |
2344 def : Pat<(i32 (sext Int16Regs:$a)), | |
2345 (CVT_s32_s16 Int16Regs:$a, CvtNONE)>; | |
2346 def : Pat<(i64 (sext Int16Regs:$a)), | |
2347 (CVT_s64_s16 Int16Regs:$a, CvtNONE)>; | |
2348 | |
2349 // zext i16 | |
2350 def : Pat<(i32 (zext Int16Regs:$a)), | |
2351 (CVT_u32_u16 Int16Regs:$a, CvtNONE)>; | |
2352 def : Pat<(i64 (zext Int16Regs:$a)), | |
2353 (CVT_u64_u16 Int16Regs:$a, CvtNONE)>; | |
2354 | |
2355 // anyext i16 | |
2356 def : Pat<(i32 (anyext Int16Regs:$a)), | |
2357 (CVT_u32_u16 Int16Regs:$a, CvtNONE)>; | |
2358 def : Pat<(i64 (anyext Int16Regs:$a)), | |
2359 (CVT_u64_u16 Int16Regs:$a, CvtNONE)>; | |
2360 | |
2361 // sext i32 | |
2362 def : Pat<(i64 (sext Int32Regs:$a)), | |
2363 (CVT_s64_s32 Int32Regs:$a, CvtNONE)>; | |
2364 | |
2365 // zext i32 | |
2366 def : Pat<(i64 (zext Int32Regs:$a)), | |
2367 (CVT_u64_u32 Int32Regs:$a, CvtNONE)>; | |
2368 | |
2369 // anyext i32 | |
2370 def : Pat<(i64 (anyext Int32Regs:$a)), | |
2371 (CVT_u64_u32 Int32Regs:$a, CvtNONE)>; | |
2372 | |
2373 | |
2374 // truncate i64 | |
2375 def : Pat<(i32 (trunc Int64Regs:$a)), | |
2376 (CVT_u32_u64 Int64Regs:$a, CvtNONE)>; | |
2377 def : Pat<(i16 (trunc Int64Regs:$a)), | |
2378 (CVT_u16_u64 Int64Regs:$a, CvtNONE)>; | |
2379 def : Pat<(i1 (trunc Int64Regs:$a)), | |
2380 (SETP_b64ri (ANDb64ri Int64Regs:$a, 1), 1, CmpEQ)>; | |
2381 | |
2382 // truncate i32 | |
2383 def : Pat<(i16 (trunc Int32Regs:$a)), | |
2384 (CVT_u16_u32 Int32Regs:$a, CvtNONE)>; | |
2385 def : Pat<(i1 (trunc Int32Regs:$a)), | |
2386 (SETP_b32ri (ANDb32ri Int32Regs:$a, 1), 1, CmpEQ)>; | |
2387 | |
2388 // truncate i16 | |
2389 def : Pat<(i1 (trunc Int16Regs:$a)), | |
2390 (SETP_b16ri (ANDb16ri Int16Regs:$a, 1), 1, CmpEQ)>; | |
2391 | |
2392 // sext_inreg | |
2393 def : Pat<(sext_inreg Int16Regs:$a, i8), (CVT_INREG_s16_s8 Int16Regs:$a)>; | |
2394 def : Pat<(sext_inreg Int32Regs:$a, i8), (CVT_INREG_s32_s8 Int32Regs:$a)>; | |
2395 def : Pat<(sext_inreg Int32Regs:$a, i16), (CVT_INREG_s32_s16 Int32Regs:$a)>; | |
2396 def : Pat<(sext_inreg Int64Regs:$a, i8), (CVT_INREG_s64_s8 Int64Regs:$a)>; | |
2397 def : Pat<(sext_inreg Int64Regs:$a, i16), (CVT_INREG_s64_s16 Int64Regs:$a)>; | |
2398 def : Pat<(sext_inreg Int64Regs:$a, i32), (CVT_INREG_s64_s32 Int64Regs:$a)>; | |
2399 | |
2400 | |
2401 // Select instructions with 32-bit predicates | |
2402 def : Pat<(select Int32Regs:$pred, Int16Regs:$a, Int16Regs:$b), | |
2403 (SELP_b16rr Int16Regs:$a, Int16Regs:$b, | |
2404 (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>; | |
2405 def : Pat<(select Int32Regs:$pred, Int32Regs:$a, Int32Regs:$b), | |
2406 (SELP_b32rr Int32Regs:$a, Int32Regs:$b, | |
2407 (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>; | |
2408 def : Pat<(select Int32Regs:$pred, Int64Regs:$a, Int64Regs:$b), | |
2409 (SELP_b64rr Int64Regs:$a, Int64Regs:$b, | |
2410 (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>; | |
2411 def : Pat<(select Int32Regs:$pred, Float32Regs:$a, Float32Regs:$b), | |
2412 (SELP_f32rr Float32Regs:$a, Float32Regs:$b, | |
2413 (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>; | |
2414 def : Pat<(select Int32Regs:$pred, Float64Regs:$a, Float64Regs:$b), | |
2415 (SELP_f64rr Float64Regs:$a, Float64Regs:$b, | |
2416 (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>; | |
2417 | |
2418 | |
2419 // pack a set of smaller int registers to a larger int register | |
2420 def V4I16toI64 : NVPTXInst<(outs Int64Regs:$d), | |
2421 (ins Int16Regs:$s1, Int16Regs:$s2, | |
2422 Int16Regs:$s3, Int16Regs:$s4), | |
2423 "mov.b64\t$d, {{$s1, $s2, $s3, $s4}};", | |
2424 []>; | |
2425 def V2I16toI32 : NVPTXInst<(outs Int32Regs:$d), | |
2426 (ins Int16Regs:$s1, Int16Regs:$s2), | |
2427 "mov.b32\t$d, {{$s1, $s2}};", | |
2428 []>; | |
2429 def V2I32toI64 : NVPTXInst<(outs Int64Regs:$d), | |
2430 (ins Int32Regs:$s1, Int32Regs:$s2), | |
2431 "mov.b64\t$d, {{$s1, $s2}};", | |
2432 []>; | |
2433 def V2F32toF64 : NVPTXInst<(outs Float64Regs:$d), | |
2434 (ins Float32Regs:$s1, Float32Regs:$s2), | |
2435 "mov.b64\t$d, {{$s1, $s2}};", | |
2436 []>; | |
2437 | |
2438 // unpack a larger int register to a set of smaller int registers | |
2439 def I64toV4I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2, | |
2440 Int16Regs:$d3, Int16Regs:$d4), | |
2441 (ins Int64Regs:$s), | |
2442 "mov.b64\t{{$d1, $d2, $d3, $d4}}, $s;", | |
2443 []>; | |
2444 def I32toV2I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2), | |
2445 (ins Int32Regs:$s), | |
2446 "mov.b32\t{{$d1, $d2}}, $s;", | |
2447 []>; | |
2448 def I64toV2I32 : NVPTXInst<(outs Int32Regs:$d1, Int32Regs:$d2), | |
2449 (ins Int64Regs:$s), | |
2450 "mov.b64\t{{$d1, $d2}}, $s;", | |
2451 []>; | |
2452 def F64toV2F32 : NVPTXInst<(outs Float32Regs:$d1, Float32Regs:$d2), | |
2453 (ins Float64Regs:$s), | |
2454 "mov.b64\t{{$d1, $d2}}, $s;", | |
2455 []>; | |
2456 | |
2457 // Count leading zeros | |
2458 def CLZr32 : NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a), | |
2459 "clz.b32\t$d, $a;", | |
2460 []>; | |
2461 def CLZr64 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), | |
2462 "clz.b64\t$d, $a;", | |
2463 []>; | |
2464 | |
2465 // 32-bit has a direct PTX instruction | |
2466 def : Pat<(ctlz Int32Regs:$a), | |
2467 (CLZr32 Int32Regs:$a)>; | |
2468 def : Pat<(ctlz_zero_undef Int32Regs:$a), | |
2469 (CLZr32 Int32Regs:$a)>; | |
2470 | |
2471 // For 64-bit, the result in PTX is actually 32-bit so we zero-extend | |
2472 // to 64-bit to match the LLVM semantics | |
2473 def : Pat<(ctlz Int64Regs:$a), | |
2474 (CVT_u64_u32 (CLZr64 Int64Regs:$a), CvtNONE)>; | |
2475 def : Pat<(ctlz_zero_undef Int64Regs:$a), | |
2476 (CVT_u64_u32 (CLZr64 Int64Regs:$a), CvtNONE)>; | |
2477 | |
2478 // For 16-bit, we zero-extend to 32-bit, then trunc the result back | |
2479 // to 16-bits (ctlz of a 16-bit value is guaranteed to require less | |
2480 // than 16 bits to store). We also need to subtract 16 because the | |
2481 // high-order 16 zeros were counted. | |
2482 def : Pat<(ctlz Int16Regs:$a), | |
2483 (SUBi16ri (CVT_u16_u32 (CLZr32 | |
2484 (CVT_u32_u16 Int16Regs:$a, CvtNONE)), | |
2485 CvtNONE), 16)>; | |
2486 def : Pat<(ctlz_zero_undef Int16Regs:$a), | |
2487 (SUBi16ri (CVT_u16_u32 (CLZr32 | |
2488 (CVT_u32_u16 Int16Regs:$a, CvtNONE)), | |
2489 CvtNONE), 16)>; | |
2490 | |
2491 // Population count | |
2492 def POPCr32 : NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a), | |
2493 "popc.b32\t$d, $a;", | |
2494 []>; | |
2495 def POPCr64 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), | |
2496 "popc.b64\t$d, $a;", | |
2497 []>; | |
2498 | |
2499 // 32-bit has a direct PTX instruction | |
2500 def : Pat<(ctpop Int32Regs:$a), | |
2501 (POPCr32 Int32Regs:$a)>; | |
2502 | |
2503 // For 64-bit, the result in PTX is actually 32-bit so we zero-extend | |
2504 // to 64-bit to match the LLVM semantics | |
2505 def : Pat<(ctpop Int64Regs:$a), | |
2506 (CVT_u64_u32 (POPCr64 Int64Regs:$a), CvtNONE)>; | |
2507 | |
2508 // For 16-bit, we zero-extend to 32-bit, then trunc the result back | |
2509 // to 16-bits (ctpop of a 16-bit value is guaranteed to require less | |
2510 // than 16 bits to store) | |
2511 def : Pat<(ctpop Int16Regs:$a), | |
2512 (CVT_u16_u32 (POPCr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE)), | |
2513 CvtNONE)>; | |
2514 | |
2515 // fround f64 -> f32 | |
2516 def : Pat<(f32 (fround Float64Regs:$a)), | |
2517 (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>, Requires<[doF32FTZ]>; | |
2518 def : Pat<(f32 (fround Float64Regs:$a)), | |
2519 (CVT_f32_f64 Float64Regs:$a, CvtRN)>; | |
2520 | |
2521 // fextend f32 -> f64 | |
2522 def : Pat<(f64 (fextend Float32Regs:$a)), | |
2523 (CVT_f64_f32 Float32Regs:$a, CvtNONE_FTZ)>, Requires<[doF32FTZ]>; | |
2524 def : Pat<(f64 (fextend Float32Regs:$a)), | |
2525 (CVT_f64_f32 Float32Regs:$a, CvtNONE)>; | |
2526 | |
2527 def retflag : SDNode<"NVPTXISD::RET_FLAG", SDTNone, | |
2528 [SDNPHasChain, SDNPOptInGlue]>; | |
2529 | |
2530 //----------------------------------- | |
2531 // Control-flow | |
2532 //----------------------------------- | |
2533 | |
2534 let isTerminator=1 in { | |
2535 let isReturn=1, isBarrier=1 in | |
2536 def Return : NVPTXInst<(outs), (ins), "ret;", [(retflag)]>; | |
2537 | |
2538 let isBranch=1 in | |
2539 def CBranch : NVPTXInst<(outs), (ins Int1Regs:$a, brtarget:$target), | |
2540 "@$a bra \t$target;", | |
2541 [(brcond Int1Regs:$a, bb:$target)]>; | |
2542 let isBranch=1 in | |
2543 def CBranchOther : NVPTXInst<(outs), (ins Int1Regs:$a, brtarget:$target), | |
2544 "@!$a bra \t$target;", | |
2545 []>; | |
2546 | |
2547 let isBranch=1, isBarrier=1 in | |
2548 def GOTO : NVPTXInst<(outs), (ins brtarget:$target), | |
2549 "bra.uni \t$target;", | |
2550 [(br bb:$target)]>; | |
2551 } | |
2552 | |
2553 def : Pat<(brcond Int32Regs:$a, bb:$target), | |
2554 (CBranch (SETP_u32ri Int32Regs:$a, 0, CmpNE), bb:$target)>; | |
2555 | |
2556 // SelectionDAGBuilder::visitSWitchCase() will invert the condition of a | |
2557 // conditional branch if | |
2558 // the target block is the next block so that the code can fall through to the | |
2559 // target block. | |
2560 // The invertion is done by 'xor condition, 1', which will be translated to | |
2561 // (setne condition, -1). | |
2562 // Since ptx supports '@!pred bra target', we should use it. | |
2563 def : Pat<(brcond (i1 (setne Int1Regs:$a, -1)), bb:$target), | |
2564 (CBranchOther Int1Regs:$a, bb:$target)>; | |
2565 | |
2566 // Call | |
2567 def SDT_NVPTXCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>; | |
2568 def SDT_NVPTXCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>, | |
2569 SDTCisVT<1, i32> ]>; | |
2570 | |
2571 def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_NVPTXCallSeqStart, | |
2572 [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>; | |
2573 def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_NVPTXCallSeqEnd, | |
2574 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, | |
2575 SDNPSideEffect]>; | |
2576 | |
2577 def SDT_NVPTXCall : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>; | |
2578 def call : SDNode<"NVPTXISD::CALL", SDT_NVPTXCall, | |
2579 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; | |
2580 def calltarget : Operand<i32>; | |
2581 let isCall=1 in { | |
2582 def CALL : NVPTXInst<(outs), (ins calltarget:$dst), | |
2583 "call \t$dst, (1);", []>; | |
2584 } | |
2585 | |
2586 def : Pat<(call tglobaladdr:$dst), | |
2587 (CALL tglobaladdr:$dst)>; | |
2588 def : Pat<(call texternalsym:$dst), | |
2589 (CALL texternalsym:$dst)>; | |
2590 | |
2591 // Pseudo instructions. | |
2592 class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern> | |
2593 : NVPTXInst<outs, ins, asmstr, pattern>; | |
2594 | |
2595 // @TODO: We use some tricks here to emit curly braces. Can we clean this up | |
2596 // a bit without TableGen modifications? | |
2597 def Callseq_Start : NVPTXInst<(outs), (ins i32imm:$amt), | |
2598 "// Callseq Start $amt\n\t{{\n\t.reg .b32 temp_param_reg;\n\t// <end>}}", | |
2599 [(callseq_start timm:$amt)]>; | |
2600 def Callseq_End : NVPTXInst<(outs), (ins i32imm:$amt1, i32imm:$amt2), | |
2601 "\n\t//{{\n\t}}// Callseq End $amt1", | |
2602 [(callseq_end timm:$amt1, timm:$amt2)]>; | |
2603 | |
2604 // trap instruction | |
2605 | |
2606 def trapinst : NVPTXInst<(outs), (ins), | |
2607 "trap;", | |
2608 [(trap)]>; | |
2609 | |
2610 // Call prototype wrapper | |
2611 def SDTCallPrototype : SDTypeProfile<0, 1, [SDTCisInt<0>]>; | |
2612 def CallPrototype | |
2613 : SDNode<"NVPTXISD::CallPrototype", SDTCallPrototype, | |
2614 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; | |
2615 def ProtoIdent : Operand<i32> { | |
2616 let PrintMethod = "printProtoIdent"; | |
2617 } | |
2618 def CALL_PROTOTYPE | |
2619 : NVPTXInst<(outs), (ins ProtoIdent:$ident), | |
2620 "$ident", [(CallPrototype (i32 texternalsym:$ident))]>; | |
2621 | |
2622 | |
2623 | |
2624 include "NVPTXIntrinsics.td" | |
2625 | |
2626 | |
2627 //----------------------------------- | |
2628 // Notes | |
2629 //----------------------------------- | |
2630 // BSWAP is currently expanded. The following is a more efficient | |
2631 // - for < sm_20, use vector scalar mov, as tesla support native 16-bit register | |
2632 // - for sm_20, use pmpt (use vector scalar mov to get the pack and | |
2633 // unpack). sm_20 supports native 32-bit register, but not native 16-bit | |
2634 // register. |