Mercurial > hg > CbC > CbC_llvm
comparison lib/Target/NVPTX/NVPTXInstrInfo.td @ 77:54457678186b LLVM3.6
LLVM 3.6
author | Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp> |
---|---|
date | Mon, 08 Sep 2014 22:06:00 +0900 |
parents | 95c75e76d11b |
children | 60c9769439b8 |
comparison
equal
deleted
inserted
replaced
34:e874dbf0ad9d | 77:54457678186b |
---|---|
137 def hasGenericLdSt : Predicate<"Subtarget.hasGenericLdSt()">; | 137 def hasGenericLdSt : Predicate<"Subtarget.hasGenericLdSt()">; |
138 | 138 |
139 def doF32FTZ : Predicate<"useF32FTZ()">; | 139 def doF32FTZ : Predicate<"useF32FTZ()">; |
140 def doNoF32FTZ : Predicate<"!useF32FTZ()">; | 140 def doNoF32FTZ : Predicate<"!useF32FTZ()">; |
141 | 141 |
142 def doFMAF32 : Predicate<"doFMAF32">; | |
143 def doFMAF32_ftz : Predicate<"(doFMAF32 && useF32FTZ())">; | |
144 def doFMAF32AGG : Predicate<"doFMAF32AGG">; | |
145 def doFMAF32AGG_ftz : Predicate<"(doFMAF32AGG && useF32FTZ())">; | |
146 def doFMAF64 : Predicate<"doFMAF64">; | |
147 def doFMAF64AGG : Predicate<"doFMAF64AGG">; | |
148 | |
149 def doMulWide : Predicate<"doMulWide">; | 142 def doMulWide : Predicate<"doMulWide">; |
150 | 143 |
151 def allowFMA : Predicate<"allowFMA">; | 144 def allowFMA : Predicate<"allowFMA()">; |
152 def allowFMA_ftz : Predicate<"(allowFMA && useF32FTZ())">; | 145 def noFMA : Predicate<"!allowFMA()">; |
153 | 146 |
154 def do_DIVF32_APPROX : Predicate<"getDivF32Level()==0">; | 147 def do_DIVF32_APPROX : Predicate<"getDivF32Level()==0">; |
155 def do_DIVF32_FULL : Predicate<"getDivF32Level()==1">; | 148 def do_DIVF32_FULL : Predicate<"getDivF32Level()==1">; |
156 | 149 |
157 def do_SQRTF32_APPROX : Predicate<"!usePrecSqrtF32()">; | 150 def do_SQRTF32_APPROX : Predicate<"!usePrecSqrtF32()">; |
158 def do_SQRTF32_RN : Predicate<"usePrecSqrtF32()">; | 151 def do_SQRTF32_RN : Predicate<"usePrecSqrtF32()">; |
159 | 152 |
160 def hasHWROT32 : Predicate<"Subtarget.hasHWROT32()">; | 153 def hasHWROT32 : Predicate<"Subtarget.hasHWROT32()">; |
154 def noHWROT32 : Predicate<"!Subtarget.hasHWROT32()">; | |
161 | 155 |
162 def true : Predicate<"1">; | 156 def true : Predicate<"1">; |
157 | |
158 def hasPTX31 : Predicate<"Subtarget.getPTXVersion() >= 31">; | |
163 | 159 |
164 | 160 |
165 //===----------------------------------------------------------------------===// | 161 //===----------------------------------------------------------------------===// |
166 // Some Common Instruction Class Templates | 162 // Some Common Instruction Class Templates |
167 //===----------------------------------------------------------------------===// | 163 //===----------------------------------------------------------------------===// |
217 def f32rr_ftz : NVPTXInst<(outs Float32Regs:$dst), | 213 def f32rr_ftz : NVPTXInst<(outs Float32Regs:$dst), |
218 (ins Float32Regs:$a, Float32Regs:$b), | 214 (ins Float32Regs:$a, Float32Regs:$b), |
219 !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"), | 215 !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"), |
220 [(set Float32Regs:$dst, | 216 [(set Float32Regs:$dst, |
221 (OpNode Float32Regs:$a, Float32Regs:$b))]>, | 217 (OpNode Float32Regs:$a, Float32Regs:$b))]>, |
222 Requires<[allowFMA_ftz]>; | 218 Requires<[allowFMA, doF32FTZ]>; |
223 def f32ri_ftz : NVPTXInst<(outs Float32Regs:$dst), | 219 def f32ri_ftz : NVPTXInst<(outs Float32Regs:$dst), |
224 (ins Float32Regs:$a, f32imm:$b), | 220 (ins Float32Regs:$a, f32imm:$b), |
225 !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"), | 221 !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"), |
226 [(set Float32Regs:$dst, | 222 [(set Float32Regs:$dst, |
227 (OpNode Float32Regs:$a, fpimm:$b))]>, | 223 (OpNode Float32Regs:$a, fpimm:$b))]>, |
228 Requires<[allowFMA_ftz]>; | 224 Requires<[allowFMA, doF32FTZ]>; |
229 def f32rr : NVPTXInst<(outs Float32Regs:$dst), | 225 def f32rr : NVPTXInst<(outs Float32Regs:$dst), |
230 (ins Float32Regs:$a, Float32Regs:$b), | 226 (ins Float32Regs:$a, Float32Regs:$b), |
231 !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"), | 227 !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"), |
232 [(set Float32Regs:$dst, | 228 [(set Float32Regs:$dst, |
233 (OpNode Float32Regs:$a, Float32Regs:$b))]>, | 229 (OpNode Float32Regs:$a, Float32Regs:$b))]>, |
243 multiclass F3_rn<string OpcStr, SDNode OpNode> { | 239 multiclass F3_rn<string OpcStr, SDNode OpNode> { |
244 def f64rr : NVPTXInst<(outs Float64Regs:$dst), | 240 def f64rr : NVPTXInst<(outs Float64Regs:$dst), |
245 (ins Float64Regs:$a, Float64Regs:$b), | 241 (ins Float64Regs:$a, Float64Regs:$b), |
246 !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"), | 242 !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"), |
247 [(set Float64Regs:$dst, | 243 [(set Float64Regs:$dst, |
248 (OpNode Float64Regs:$a, Float64Regs:$b))]>; | 244 (OpNode Float64Regs:$a, Float64Regs:$b))]>, |
245 Requires<[noFMA]>; | |
249 def f64ri : NVPTXInst<(outs Float64Regs:$dst), | 246 def f64ri : NVPTXInst<(outs Float64Regs:$dst), |
250 (ins Float64Regs:$a, f64imm:$b), | 247 (ins Float64Regs:$a, f64imm:$b), |
251 !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"), | 248 !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"), |
252 [(set Float64Regs:$dst, | 249 [(set Float64Regs:$dst, |
253 (OpNode Float64Regs:$a, fpimm:$b))]>; | 250 (OpNode Float64Regs:$a, fpimm:$b))]>, |
251 Requires<[noFMA]>; | |
254 def f32rr_ftz : NVPTXInst<(outs Float32Regs:$dst), | 252 def f32rr_ftz : NVPTXInst<(outs Float32Regs:$dst), |
255 (ins Float32Regs:$a, Float32Regs:$b), | 253 (ins Float32Regs:$a, Float32Regs:$b), |
256 !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"), | 254 !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"), |
257 [(set Float32Regs:$dst, | 255 [(set Float32Regs:$dst, |
258 (OpNode Float32Regs:$a, Float32Regs:$b))]>, | 256 (OpNode Float32Regs:$a, Float32Regs:$b))]>, |
259 Requires<[doF32FTZ]>; | 257 Requires<[noFMA, doF32FTZ]>; |
260 def f32ri_ftz : NVPTXInst<(outs Float32Regs:$dst), | 258 def f32ri_ftz : NVPTXInst<(outs Float32Regs:$dst), |
261 (ins Float32Regs:$a, f32imm:$b), | 259 (ins Float32Regs:$a, f32imm:$b), |
262 !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"), | 260 !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"), |
263 [(set Float32Regs:$dst, | 261 [(set Float32Regs:$dst, |
264 (OpNode Float32Regs:$a, fpimm:$b))]>, | 262 (OpNode Float32Regs:$a, fpimm:$b))]>, |
265 Requires<[doF32FTZ]>; | 263 Requires<[noFMA, doF32FTZ]>; |
266 def f32rr : NVPTXInst<(outs Float32Regs:$dst), | 264 def f32rr : NVPTXInst<(outs Float32Regs:$dst), |
267 (ins Float32Regs:$a, Float32Regs:$b), | 265 (ins Float32Regs:$a, Float32Regs:$b), |
268 !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"), | 266 !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"), |
269 [(set Float32Regs:$dst, | 267 [(set Float32Regs:$dst, |
270 (OpNode Float32Regs:$a, Float32Regs:$b))]>; | 268 (OpNode Float32Regs:$a, Float32Regs:$b))]>, |
269 Requires<[noFMA]>; | |
271 def f32ri : NVPTXInst<(outs Float32Regs:$dst), | 270 def f32ri : NVPTXInst<(outs Float32Regs:$dst), |
272 (ins Float32Regs:$a, f32imm:$b), | 271 (ins Float32Regs:$a, f32imm:$b), |
273 !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"), | 272 !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"), |
274 [(set Float32Regs:$dst, | 273 [(set Float32Regs:$dst, |
275 (OpNode Float32Regs:$a, fpimm:$b))]>; | 274 (OpNode Float32Regs:$a, fpimm:$b))]>, |
275 Requires<[noFMA]>; | |
276 } | 276 } |
277 | 277 |
278 multiclass F2<string OpcStr, SDNode OpNode> { | 278 multiclass F2<string OpcStr, SDNode OpNode> { |
279 def f64 : NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$a), | 279 def f64 : NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$a), |
280 !strconcat(OpcStr, ".f64 \t$dst, $a;"), | 280 !strconcat(OpcStr, ".f64 \t$dst, $a;"), |
459 const APInt &v = N->getAPIntValue(); | 459 const APInt &v = N->getAPIntValue(); |
460 APInt temp(16, 1); | 460 APInt temp(16, 1); |
461 return CurDAG->getTargetConstant(temp.shl(v), MVT::i16); | 461 return CurDAG->getTargetConstant(temp.shl(v), MVT::i16); |
462 }]>; | 462 }]>; |
463 | 463 |
464 def MULWIDES64 : NVPTXInst<(outs Int64Regs:$dst), | 464 def MULWIDES64 |
465 (ins Int32Regs:$a, Int32Regs:$b), | 465 : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), |
466 "mul.wide.s32 \t$dst, $a, $b;", []>; | |
467 def MULWIDES64Imm | |
468 : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i32imm:$b), | |
466 "mul.wide.s32 \t$dst, $a, $b;", []>; | 469 "mul.wide.s32 \t$dst, $a, $b;", []>; |
467 def MULWIDES64Imm : NVPTXInst<(outs Int64Regs:$dst), | 470 def MULWIDES64Imm64 |
468 (ins Int32Regs:$a, i64imm:$b), | 471 : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i64imm:$b), |
469 "mul.wide.s32 \t$dst, $a, $b;", []>; | 472 "mul.wide.s32 \t$dst, $a, $b;", []>; |
470 | 473 |
471 def MULWIDEU64 : NVPTXInst<(outs Int64Regs:$dst), | 474 def MULWIDEU64 |
472 (ins Int32Regs:$a, Int32Regs:$b), | 475 : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), |
476 "mul.wide.u32 \t$dst, $a, $b;", []>; | |
477 def MULWIDEU64Imm | |
478 : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i32imm:$b), | |
473 "mul.wide.u32 \t$dst, $a, $b;", []>; | 479 "mul.wide.u32 \t$dst, $a, $b;", []>; |
474 def MULWIDEU64Imm : NVPTXInst<(outs Int64Regs:$dst), | 480 def MULWIDEU64Imm64 |
475 (ins Int32Regs:$a, i64imm:$b), | 481 : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i64imm:$b), |
476 "mul.wide.u32 \t$dst, $a, $b;", []>; | 482 "mul.wide.u32 \t$dst, $a, $b;", []>; |
477 | 483 |
478 def MULWIDES32 : NVPTXInst<(outs Int32Regs:$dst), | 484 def MULWIDES32 |
479 (ins Int16Regs:$a, Int16Regs:$b), | 485 : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), |
480 "mul.wide.s16 \t$dst, $a, $b;", []>; | 486 "mul.wide.s16 \t$dst, $a, $b;", []>; |
481 def MULWIDES32Imm : NVPTXInst<(outs Int32Regs:$dst), | 487 def MULWIDES32Imm |
482 (ins Int16Regs:$a, i32imm:$b), | 488 : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i16imm:$b), |
489 "mul.wide.s16 \t$dst, $a, $b;", []>; | |
490 def MULWIDES32Imm32 | |
491 : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i32imm:$b), | |
483 "mul.wide.s16 \t$dst, $a, $b;", []>; | 492 "mul.wide.s16 \t$dst, $a, $b;", []>; |
484 | 493 |
485 def MULWIDEU32 : NVPTXInst<(outs Int32Regs:$dst), | 494 def MULWIDEU32 |
486 (ins Int16Regs:$a, Int16Regs:$b), | 495 : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), |
496 "mul.wide.u16 \t$dst, $a, $b;", []>; | |
497 def MULWIDEU32Imm | |
498 : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i16imm:$b), | |
487 "mul.wide.u16 \t$dst, $a, $b;", []>; | 499 "mul.wide.u16 \t$dst, $a, $b;", []>; |
488 def MULWIDEU32Imm : NVPTXInst<(outs Int32Regs:$dst), | 500 def MULWIDEU32Imm32 |
489 (ins Int16Regs:$a, i32imm:$b), | 501 : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i32imm:$b), |
490 "mul.wide.u16 \t$dst, $a, $b;", []>; | 502 "mul.wide.u16 \t$dst, $a, $b;", []>; |
491 | 503 |
492 def : Pat<(shl (sext Int32Regs:$a), (i32 Int5Const:$b)), | 504 def : Pat<(shl (sext Int32Regs:$a), (i32 Int5Const:$b)), |
493 (MULWIDES64Imm Int32Regs:$a, (SHL2MUL32 node:$b))>, | 505 (MULWIDES64Imm Int32Regs:$a, (SHL2MUL32 node:$b))>, |
494 Requires<[doMulWide]>; | 506 Requires<[doMulWide]>; |
495 def : Pat<(shl (zext Int32Regs:$a), (i32 Int5Const:$b)), | 507 def : Pat<(shl (zext Int32Regs:$a), (i32 Int5Const:$b)), |
505 | 517 |
506 def : Pat<(mul (sext Int32Regs:$a), (sext Int32Regs:$b)), | 518 def : Pat<(mul (sext Int32Regs:$a), (sext Int32Regs:$b)), |
507 (MULWIDES64 Int32Regs:$a, Int32Regs:$b)>, | 519 (MULWIDES64 Int32Regs:$a, Int32Regs:$b)>, |
508 Requires<[doMulWide]>; | 520 Requires<[doMulWide]>; |
509 def : Pat<(mul (sext Int32Regs:$a), (i64 SInt32Const:$b)), | 521 def : Pat<(mul (sext Int32Regs:$a), (i64 SInt32Const:$b)), |
510 (MULWIDES64Imm Int32Regs:$a, (i64 SInt32Const:$b))>, | 522 (MULWIDES64Imm64 Int32Regs:$a, (i64 SInt32Const:$b))>, |
511 Requires<[doMulWide]>; | 523 Requires<[doMulWide]>; |
512 | 524 |
513 def : Pat<(mul (zext Int32Regs:$a), (zext Int32Regs:$b)), | 525 def : Pat<(mul (zext Int32Regs:$a), (zext Int32Regs:$b)), |
514 (MULWIDEU64 Int32Regs:$a, Int32Regs:$b)>, Requires<[doMulWide]>; | 526 (MULWIDEU64 Int32Regs:$a, Int32Regs:$b)>, |
527 Requires<[doMulWide]>; | |
515 def : Pat<(mul (zext Int32Regs:$a), (i64 UInt32Const:$b)), | 528 def : Pat<(mul (zext Int32Regs:$a), (i64 UInt32Const:$b)), |
516 (MULWIDEU64Imm Int32Regs:$a, (i64 UInt32Const:$b))>, | 529 (MULWIDEU64Imm64 Int32Regs:$a, (i64 UInt32Const:$b))>, |
517 Requires<[doMulWide]>; | 530 Requires<[doMulWide]>; |
518 | 531 |
519 def : Pat<(mul (sext Int16Regs:$a), (sext Int16Regs:$b)), | 532 def : Pat<(mul (sext Int16Regs:$a), (sext Int16Regs:$b)), |
520 (MULWIDES32 Int16Regs:$a, Int16Regs:$b)>, Requires<[doMulWide]>; | 533 (MULWIDES32 Int16Regs:$a, Int16Regs:$b)>, |
534 Requires<[doMulWide]>; | |
521 def : Pat<(mul (sext Int16Regs:$a), (i32 SInt16Const:$b)), | 535 def : Pat<(mul (sext Int16Regs:$a), (i32 SInt16Const:$b)), |
522 (MULWIDES32Imm Int16Regs:$a, (i32 SInt16Const:$b))>, | 536 (MULWIDES32Imm32 Int16Regs:$a, (i32 SInt16Const:$b))>, |
523 Requires<[doMulWide]>; | 537 Requires<[doMulWide]>; |
524 | 538 |
525 def : Pat<(mul (zext Int16Regs:$a), (zext Int16Regs:$b)), | 539 def : Pat<(mul (zext Int16Regs:$a), (zext Int16Regs:$b)), |
526 (MULWIDEU32 Int16Regs:$a, Int16Regs:$b)>, Requires<[doMulWide]>; | 540 (MULWIDEU32 Int16Regs:$a, Int16Regs:$b)>, |
541 Requires<[doMulWide]>; | |
527 def : Pat<(mul (zext Int16Regs:$a), (i32 UInt16Const:$b)), | 542 def : Pat<(mul (zext Int16Regs:$a), (i32 UInt16Const:$b)), |
528 (MULWIDEU32Imm Int16Regs:$a, (i32 UInt16Const:$b))>, | 543 (MULWIDEU32Imm32 Int16Regs:$a, (i32 UInt16Const:$b))>, |
544 Requires<[doMulWide]>; | |
545 | |
546 | |
547 def SDTMulWide | |
548 : SDTypeProfile<1, 2, [SDTCisSameAs<1, 2>]>; | |
549 def mul_wide_signed | |
550 : SDNode<"NVPTXISD::MUL_WIDE_SIGNED", SDTMulWide>; | |
551 def mul_wide_unsigned | |
552 : SDNode<"NVPTXISD::MUL_WIDE_UNSIGNED", SDTMulWide>; | |
553 | |
554 def : Pat<(i32 (mul_wide_signed Int16Regs:$a, Int16Regs:$b)), | |
555 (MULWIDES32 Int16Regs:$a, Int16Regs:$b)>, | |
556 Requires<[doMulWide]>; | |
557 def : Pat<(i32 (mul_wide_signed Int16Regs:$a, imm:$b)), | |
558 (MULWIDES32Imm Int16Regs:$a, imm:$b)>, | |
559 Requires<[doMulWide]>; | |
560 def : Pat<(i32 (mul_wide_unsigned Int16Regs:$a, Int16Regs:$b)), | |
561 (MULWIDEU32 Int16Regs:$a, Int16Regs:$b)>, | |
562 Requires<[doMulWide]>; | |
563 def : Pat<(i32 (mul_wide_unsigned Int16Regs:$a, imm:$b)), | |
564 (MULWIDEU32Imm Int16Regs:$a, imm:$b)>, | |
565 Requires<[doMulWide]>; | |
566 | |
567 | |
568 def : Pat<(i64 (mul_wide_signed Int32Regs:$a, Int32Regs:$b)), | |
569 (MULWIDES64 Int32Regs:$a, Int32Regs:$b)>, | |
570 Requires<[doMulWide]>; | |
571 def : Pat<(i64 (mul_wide_signed Int32Regs:$a, imm:$b)), | |
572 (MULWIDES64Imm Int32Regs:$a, imm:$b)>, | |
573 Requires<[doMulWide]>; | |
574 def : Pat<(i64 (mul_wide_unsigned Int32Regs:$a, Int32Regs:$b)), | |
575 (MULWIDEU64 Int32Regs:$a, Int32Regs:$b)>, | |
576 Requires<[doMulWide]>; | |
577 def : Pat<(i64 (mul_wide_unsigned Int32Regs:$a, imm:$b)), | |
578 (MULWIDEU64Imm Int32Regs:$a, imm:$b)>, | |
529 Requires<[doMulWide]>; | 579 Requires<[doMulWide]>; |
530 | 580 |
531 defm MULT : I3<"mul.lo.s", mul>; | 581 defm MULT : I3<"mul.lo.s", mul>; |
532 | 582 |
533 defm MULTHS : I3<"mul.hi.s", mulhs>; | 583 defm MULTHS : I3<"mul.hi.s", mulhs>; |
539 defm SREM : I3<"rem.s", srem>; | 589 defm SREM : I3<"rem.s", srem>; |
540 // The ri version will not be selected as DAGCombiner::visitSREM will lower it. | 590 // The ri version will not be selected as DAGCombiner::visitSREM will lower it. |
541 defm UREM : I3<"rem.u", urem>; | 591 defm UREM : I3<"rem.u", urem>; |
542 // The ri version will not be selected as DAGCombiner::visitUREM will lower it. | 592 // The ri version will not be selected as DAGCombiner::visitUREM will lower it. |
543 | 593 |
594 def SDTIMAD | |
595 : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisInt<0>, | |
596 SDTCisInt<2>, SDTCisSameAs<0, 2>, | |
597 SDTCisSameAs<0, 3>]>; | |
598 def imad | |
599 : SDNode<"NVPTXISD::IMAD", SDTIMAD>; | |
600 | |
544 def MAD16rrr : NVPTXInst<(outs Int16Regs:$dst), | 601 def MAD16rrr : NVPTXInst<(outs Int16Regs:$dst), |
545 (ins Int16Regs:$a, Int16Regs:$b, Int16Regs:$c), | 602 (ins Int16Regs:$a, Int16Regs:$b, Int16Regs:$c), |
546 "mad.lo.s16 \t$dst, $a, $b, $c;", | 603 "mad.lo.s16 \t$dst, $a, $b, $c;", |
547 [(set Int16Regs:$dst, (add | 604 [(set Int16Regs:$dst, |
548 (mul Int16Regs:$a, Int16Regs:$b), Int16Regs:$c))]>; | 605 (imad Int16Regs:$a, Int16Regs:$b, Int16Regs:$c))]>; |
549 def MAD16rri : NVPTXInst<(outs Int16Regs:$dst), | 606 def MAD16rri : NVPTXInst<(outs Int16Regs:$dst), |
550 (ins Int16Regs:$a, Int16Regs:$b, i16imm:$c), | 607 (ins Int16Regs:$a, Int16Regs:$b, i16imm:$c), |
551 "mad.lo.s16 \t$dst, $a, $b, $c;", | 608 "mad.lo.s16 \t$dst, $a, $b, $c;", |
552 [(set Int16Regs:$dst, (add | 609 [(set Int16Regs:$dst, |
553 (mul Int16Regs:$a, Int16Regs:$b), imm:$c))]>; | 610 (imad Int16Regs:$a, Int16Regs:$b, imm:$c))]>; |
554 def MAD16rir : NVPTXInst<(outs Int16Regs:$dst), | 611 def MAD16rir : NVPTXInst<(outs Int16Regs:$dst), |
555 (ins Int16Regs:$a, i16imm:$b, Int16Regs:$c), | 612 (ins Int16Regs:$a, i16imm:$b, Int16Regs:$c), |
556 "mad.lo.s16 \t$dst, $a, $b, $c;", | 613 "mad.lo.s16 \t$dst, $a, $b, $c;", |
557 [(set Int16Regs:$dst, (add | 614 [(set Int16Regs:$dst, |
558 (mul Int16Regs:$a, imm:$b), Int16Regs:$c))]>; | 615 (imad Int16Regs:$a, imm:$b, Int16Regs:$c))]>; |
559 def MAD16rii : NVPTXInst<(outs Int16Regs:$dst), | 616 def MAD16rii : NVPTXInst<(outs Int16Regs:$dst), |
560 (ins Int16Regs:$a, i16imm:$b, i16imm:$c), | 617 (ins Int16Regs:$a, i16imm:$b, i16imm:$c), |
561 "mad.lo.s16 \t$dst, $a, $b, $c;", | 618 "mad.lo.s16 \t$dst, $a, $b, $c;", |
562 [(set Int16Regs:$dst, (add (mul Int16Regs:$a, imm:$b), | 619 [(set Int16Regs:$dst, |
563 imm:$c))]>; | 620 (imad Int16Regs:$a, imm:$b, imm:$c))]>; |
564 | 621 |
565 def MAD32rrr : NVPTXInst<(outs Int32Regs:$dst), | 622 def MAD32rrr : NVPTXInst<(outs Int32Regs:$dst), |
566 (ins Int32Regs:$a, Int32Regs:$b, Int32Regs:$c), | 623 (ins Int32Regs:$a, Int32Regs:$b, Int32Regs:$c), |
567 "mad.lo.s32 \t$dst, $a, $b, $c;", | 624 "mad.lo.s32 \t$dst, $a, $b, $c;", |
568 [(set Int32Regs:$dst, (add | 625 [(set Int32Regs:$dst, |
569 (mul Int32Regs:$a, Int32Regs:$b), Int32Regs:$c))]>; | 626 (imad Int32Regs:$a, Int32Regs:$b, Int32Regs:$c))]>; |
570 def MAD32rri : NVPTXInst<(outs Int32Regs:$dst), | 627 def MAD32rri : NVPTXInst<(outs Int32Regs:$dst), |
571 (ins Int32Regs:$a, Int32Regs:$b, i32imm:$c), | 628 (ins Int32Regs:$a, Int32Regs:$b, i32imm:$c), |
572 "mad.lo.s32 \t$dst, $a, $b, $c;", | 629 "mad.lo.s32 \t$dst, $a, $b, $c;", |
573 [(set Int32Regs:$dst, (add | 630 [(set Int32Regs:$dst, |
574 (mul Int32Regs:$a, Int32Regs:$b), imm:$c))]>; | 631 (imad Int32Regs:$a, Int32Regs:$b, imm:$c))]>; |
575 def MAD32rir : NVPTXInst<(outs Int32Regs:$dst), | 632 def MAD32rir : NVPTXInst<(outs Int32Regs:$dst), |
576 (ins Int32Regs:$a, i32imm:$b, Int32Regs:$c), | 633 (ins Int32Regs:$a, i32imm:$b, Int32Regs:$c), |
577 "mad.lo.s32 \t$dst, $a, $b, $c;", | 634 "mad.lo.s32 \t$dst, $a, $b, $c;", |
578 [(set Int32Regs:$dst, (add | 635 [(set Int32Regs:$dst, |
579 (mul Int32Regs:$a, imm:$b), Int32Regs:$c))]>; | 636 (imad Int32Regs:$a, imm:$b, Int32Regs:$c))]>; |
580 def MAD32rii : NVPTXInst<(outs Int32Regs:$dst), | 637 def MAD32rii : NVPTXInst<(outs Int32Regs:$dst), |
581 (ins Int32Regs:$a, i32imm:$b, i32imm:$c), | 638 (ins Int32Regs:$a, i32imm:$b, i32imm:$c), |
582 "mad.lo.s32 \t$dst, $a, $b, $c;", | 639 "mad.lo.s32 \t$dst, $a, $b, $c;", |
583 [(set Int32Regs:$dst, (add | 640 [(set Int32Regs:$dst, |
584 (mul Int32Regs:$a, imm:$b), imm:$c))]>; | 641 (imad Int32Regs:$a, imm:$b, imm:$c))]>; |
585 | 642 |
586 def MAD64rrr : NVPTXInst<(outs Int64Regs:$dst), | 643 def MAD64rrr : NVPTXInst<(outs Int64Regs:$dst), |
587 (ins Int64Regs:$a, Int64Regs:$b, Int64Regs:$c), | 644 (ins Int64Regs:$a, Int64Regs:$b, Int64Regs:$c), |
588 "mad.lo.s64 \t$dst, $a, $b, $c;", | 645 "mad.lo.s64 \t$dst, $a, $b, $c;", |
589 [(set Int64Regs:$dst, (add | 646 [(set Int64Regs:$dst, |
590 (mul Int64Regs:$a, Int64Regs:$b), Int64Regs:$c))]>; | 647 (imad Int64Regs:$a, Int64Regs:$b, Int64Regs:$c))]>; |
591 def MAD64rri : NVPTXInst<(outs Int64Regs:$dst), | 648 def MAD64rri : NVPTXInst<(outs Int64Regs:$dst), |
592 (ins Int64Regs:$a, Int64Regs:$b, i64imm:$c), | 649 (ins Int64Regs:$a, Int64Regs:$b, i64imm:$c), |
593 "mad.lo.s64 \t$dst, $a, $b, $c;", | 650 "mad.lo.s64 \t$dst, $a, $b, $c;", |
594 [(set Int64Regs:$dst, (add | 651 [(set Int64Regs:$dst, |
595 (mul Int64Regs:$a, Int64Regs:$b), imm:$c))]>; | 652 (imad Int64Regs:$a, Int64Regs:$b, imm:$c))]>; |
596 def MAD64rir : NVPTXInst<(outs Int64Regs:$dst), | 653 def MAD64rir : NVPTXInst<(outs Int64Regs:$dst), |
597 (ins Int64Regs:$a, i64imm:$b, Int64Regs:$c), | 654 (ins Int64Regs:$a, i64imm:$b, Int64Regs:$c), |
598 "mad.lo.s64 \t$dst, $a, $b, $c;", | 655 "mad.lo.s64 \t$dst, $a, $b, $c;", |
599 [(set Int64Regs:$dst, (add | 656 [(set Int64Regs:$dst, |
600 (mul Int64Regs:$a, imm:$b), Int64Regs:$c))]>; | 657 (imad Int64Regs:$a, imm:$b, Int64Regs:$c))]>; |
601 def MAD64rii : NVPTXInst<(outs Int64Regs:$dst), | 658 def MAD64rii : NVPTXInst<(outs Int64Regs:$dst), |
602 (ins Int64Regs:$a, i64imm:$b, i64imm:$c), | 659 (ins Int64Regs:$a, i64imm:$b, i64imm:$c), |
603 "mad.lo.s64 \t$dst, $a, $b, $c;", | 660 "mad.lo.s64 \t$dst, $a, $b, $c;", |
604 [(set Int64Regs:$dst, (add | 661 [(set Int64Regs:$dst, |
605 (mul Int64Regs:$a, imm:$b), imm:$c))]>; | 662 (imad Int64Regs:$a, imm:$b, imm:$c))]>; |
606 | |
607 | 663 |
608 def INEG16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src), | 664 def INEG16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src), |
609 "neg.s16 \t$dst, $src;", | 665 "neg.s16 \t$dst, $src;", |
610 [(set Int16Regs:$dst, (ineg Int16Regs:$src))]>; | 666 [(set Int16Regs:$dst, (ineg Int16Regs:$src))]>; |
611 def INEG32 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src), | 667 def INEG32 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src), |
687 (ins Float32Regs:$a, Float32Regs:$b), | 743 (ins Float32Regs:$a, Float32Regs:$b), |
688 "div.approx.ftz.f32 \t$dst, $a, $b;", | 744 "div.approx.ftz.f32 \t$dst, $a, $b;", |
689 [(set Float32Regs:$dst, | 745 [(set Float32Regs:$dst, |
690 (fdiv Float32Regs:$a, Float32Regs:$b))]>, | 746 (fdiv Float32Regs:$a, Float32Regs:$b))]>, |
691 Requires<[do_DIVF32_APPROX, doF32FTZ]>; | 747 Requires<[do_DIVF32_APPROX, doF32FTZ]>; |
748 def FDIV32approxri_ftz : NVPTXInst<(outs Float32Regs:$dst), | |
749 (ins Float32Regs:$a, f32imm:$b), | |
750 "div.approx.ftz.f32 \t$dst, $a, $b;", | |
751 [(set Float32Regs:$dst, | |
752 (fdiv Float32Regs:$a, fpimm:$b))]>, | |
753 Requires<[do_DIVF32_APPROX, doF32FTZ]>; | |
692 def FDIV32approxrr : NVPTXInst<(outs Float32Regs:$dst), | 754 def FDIV32approxrr : NVPTXInst<(outs Float32Regs:$dst), |
693 (ins Float32Regs:$a, Float32Regs:$b), | 755 (ins Float32Regs:$a, Float32Regs:$b), |
694 "div.approx.f32 \t$dst, $a, $b;", | 756 "div.approx.f32 \t$dst, $a, $b;", |
695 [(set Float32Regs:$dst, | 757 [(set Float32Regs:$dst, |
696 (fdiv Float32Regs:$a, Float32Regs:$b))]>, | 758 (fdiv Float32Regs:$a, Float32Regs:$b))]>, |
759 Requires<[do_DIVF32_APPROX]>; | |
760 def FDIV32approxri : NVPTXInst<(outs Float32Regs:$dst), | |
761 (ins Float32Regs:$a, f32imm:$b), | |
762 "div.approx.f32 \t$dst, $a, $b;", | |
763 [(set Float32Regs:$dst, | |
764 (fdiv Float32Regs:$a, fpimm:$b))]>, | |
697 Requires<[do_DIVF32_APPROX]>; | 765 Requires<[do_DIVF32_APPROX]>; |
698 // | 766 // |
699 // F32 Semi-accurate reciprocal | 767 // F32 Semi-accurate reciprocal |
700 // | 768 // |
701 // rcp.approx gives the same result as div.full(1.0f, a) and is faster. | 769 // rcp.approx gives the same result as div.full(1.0f, a) and is faster. |
795 | 863 |
796 multiclass FPCONTRACT32<string OpcStr, Predicate Pred> { | 864 multiclass FPCONTRACT32<string OpcStr, Predicate Pred> { |
797 def rrr : NVPTXInst<(outs Float32Regs:$dst), | 865 def rrr : NVPTXInst<(outs Float32Regs:$dst), |
798 (ins Float32Regs:$a, Float32Regs:$b, Float32Regs:$c), | 866 (ins Float32Regs:$a, Float32Regs:$b, Float32Regs:$c), |
799 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), | 867 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), |
800 [(set Float32Regs:$dst, (fadd | 868 [(set Float32Regs:$dst, |
801 (fmul Float32Regs:$a, Float32Regs:$b), | 869 (fma Float32Regs:$a, Float32Regs:$b, Float32Regs:$c))]>, |
802 Float32Regs:$c))]>, Requires<[Pred]>; | |
803 // This is to WAR a weird bug in Tablegen that does not automatically | |
804 // generate the following permutated rule rrr2 from the above rrr. | |
805 // So we explicitly add it here. This happens to FMA32 only. | |
806 // See the comments at FMAD32 and FMA32 for more information. | |
807 def rrr2 : NVPTXInst<(outs Float32Regs:$dst), | |
808 (ins Float32Regs:$a, Float32Regs:$b, Float32Regs:$c), | |
809 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), | |
810 [(set Float32Regs:$dst, (fadd Float32Regs:$c, | |
811 (fmul Float32Regs:$a, Float32Regs:$b)))]>, | |
812 Requires<[Pred]>; | 870 Requires<[Pred]>; |
813 def rri : NVPTXInst<(outs Float32Regs:$dst), | 871 def rri : NVPTXInst<(outs Float32Regs:$dst), |
814 (ins Float32Regs:$a, Float32Regs:$b, f32imm:$c), | 872 (ins Float32Regs:$a, Float32Regs:$b, f32imm:$c), |
815 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), | 873 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), |
816 [(set Float32Regs:$dst, (fadd | 874 [(set Float32Regs:$dst, |
817 (fmul Float32Regs:$a, Float32Regs:$b), fpimm:$c))]>, | 875 (fma Float32Regs:$a, Float32Regs:$b, fpimm:$c))]>, |
818 Requires<[Pred]>; | 876 Requires<[Pred]>; |
819 def rir : NVPTXInst<(outs Float32Regs:$dst), | 877 def rir : NVPTXInst<(outs Float32Regs:$dst), |
820 (ins Float32Regs:$a, f32imm:$b, Float32Regs:$c), | 878 (ins Float32Regs:$a, f32imm:$b, Float32Regs:$c), |
821 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), | 879 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), |
822 [(set Float32Regs:$dst, (fadd | 880 [(set Float32Regs:$dst, |
823 (fmul Float32Regs:$a, fpimm:$b), Float32Regs:$c))]>, | 881 (fma Float32Regs:$a, fpimm:$b, Float32Regs:$c))]>, |
824 Requires<[Pred]>; | 882 Requires<[Pred]>; |
825 def rii : NVPTXInst<(outs Float32Regs:$dst), | 883 def rii : NVPTXInst<(outs Float32Regs:$dst), |
826 (ins Float32Regs:$a, f32imm:$b, f32imm:$c), | 884 (ins Float32Regs:$a, f32imm:$b, f32imm:$c), |
827 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), | 885 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), |
828 [(set Float32Regs:$dst, (fadd | 886 [(set Float32Regs:$dst, |
829 (fmul Float32Regs:$a, fpimm:$b), fpimm:$c))]>, | 887 (fma Float32Regs:$a, fpimm:$b, fpimm:$c))]>, |
830 Requires<[Pred]>; | 888 Requires<[Pred]>; |
831 } | 889 } |
832 | 890 |
833 multiclass FPCONTRACT64<string OpcStr, Predicate Pred> { | 891 multiclass FPCONTRACT64<string OpcStr, Predicate Pred> { |
834 def rrr : NVPTXInst<(outs Float64Regs:$dst), | 892 def rrr : NVPTXInst<(outs Float64Regs:$dst), |
835 (ins Float64Regs:$a, Float64Regs:$b, Float64Regs:$c), | 893 (ins Float64Regs:$a, Float64Regs:$b, Float64Regs:$c), |
836 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), | 894 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), |
837 [(set Float64Regs:$dst, (fadd | 895 [(set Float64Regs:$dst, |
838 (fmul Float64Regs:$a, Float64Regs:$b), | 896 (fma Float64Regs:$a, Float64Regs:$b, Float64Regs:$c))]>, |
839 Float64Regs:$c))]>, Requires<[Pred]>; | 897 Requires<[Pred]>; |
840 def rri : NVPTXInst<(outs Float64Regs:$dst), | 898 def rri : NVPTXInst<(outs Float64Regs:$dst), |
841 (ins Float64Regs:$a, Float64Regs:$b, f64imm:$c), | 899 (ins Float64Regs:$a, Float64Regs:$b, f64imm:$c), |
842 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), | 900 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), |
843 [(set Float64Regs:$dst, (fadd (fmul Float64Regs:$a, | 901 [(set Float64Regs:$dst, |
844 Float64Regs:$b), fpimm:$c))]>, Requires<[Pred]>; | 902 (fma Float64Regs:$a, Float64Regs:$b, fpimm:$c))]>, |
903 Requires<[Pred]>; | |
845 def rir : NVPTXInst<(outs Float64Regs:$dst), | 904 def rir : NVPTXInst<(outs Float64Regs:$dst), |
846 (ins Float64Regs:$a, f64imm:$b, Float64Regs:$c), | 905 (ins Float64Regs:$a, f64imm:$b, Float64Regs:$c), |
847 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), | 906 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), |
848 [(set Float64Regs:$dst, (fadd | 907 [(set Float64Regs:$dst, |
849 (fmul Float64Regs:$a, fpimm:$b), Float64Regs:$c))]>, | 908 (fma Float64Regs:$a, fpimm:$b, Float64Regs:$c))]>, |
850 Requires<[Pred]>; | 909 Requires<[Pred]>; |
851 def rii : NVPTXInst<(outs Float64Regs:$dst), | 910 def rii : NVPTXInst<(outs Float64Regs:$dst), |
852 (ins Float64Regs:$a, f64imm:$b, f64imm:$c), | 911 (ins Float64Regs:$a, f64imm:$b, f64imm:$c), |
853 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), | 912 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), |
854 [(set Float64Regs:$dst, (fadd | 913 [(set Float64Regs:$dst, |
855 (fmul Float64Regs:$a, fpimm:$b), fpimm:$c))]>, | 914 (fma Float64Regs:$a, fpimm:$b, fpimm:$c))]>, |
856 Requires<[Pred]>; | 915 Requires<[Pred]>; |
857 } | 916 } |
858 | 917 |
859 // Due to a unknown reason (most likely a bug in tablegen), tablegen does not | 918 defm FMA32_ftz : FPCONTRACT32<"fma.rn.ftz.f32", doF32FTZ>; |
860 // automatically generate the rrr2 rule from | 919 defm FMA32 : FPCONTRACT32<"fma.rn.f32", true>; |
861 // the rrr rule (see FPCONTRACT32) for FMA32, though it does for FMAD32. | 920 defm FMA64 : FPCONTRACT64<"fma.rn.f64", true>; |
862 // If we reverse the order of the following two lines, then rrr2 rule will be | |
863 // generated for FMA32, but not for rrr. | |
864 // Therefore, we manually write the rrr2 rule in FPCONTRACT32. | |
865 defm FMA32_ftz : FPCONTRACT32<"fma.rn.ftz.f32", doFMAF32_ftz>; | |
866 defm FMA32 : FPCONTRACT32<"fma.rn.f32", doFMAF32>; | |
867 defm FMA64 : FPCONTRACT64<"fma.rn.f64", doFMAF64>; | |
868 | |
869 // b*c-a => fmad(b, c, -a) | |
870 multiclass FPCONTRACT32_SUB_PAT_MAD<NVPTXInst Inst, Predicate Pred> { | |
871 def : Pat<(fsub (fmul Float32Regs:$b, Float32Regs:$c), Float32Regs:$a), | |
872 (Inst Float32Regs:$b, Float32Regs:$c, (FNEGf32 Float32Regs:$a))>, | |
873 Requires<[Pred]>; | |
874 } | |
875 | |
876 // a-b*c => fmad(-b,c, a) | |
877 // - legal because a-b*c <=> a+(-b*c) <=> a+(-b)*c | |
878 // b*c-a => fmad(b, c, -a) | |
879 // - legal because b*c-a <=> b*c+(-a) | |
880 multiclass FPCONTRACT32_SUB_PAT<NVPTXInst Inst, Predicate Pred> { | |
881 def : Pat<(fsub Float32Regs:$a, (fmul Float32Regs:$b, Float32Regs:$c)), | |
882 (Inst (FNEGf32 Float32Regs:$b), Float32Regs:$c, Float32Regs:$a)>, | |
883 Requires<[Pred]>; | |
884 def : Pat<(fsub (fmul Float32Regs:$b, Float32Regs:$c), Float32Regs:$a), | |
885 (Inst Float32Regs:$b, Float32Regs:$c, (FNEGf32 Float32Regs:$a))>, | |
886 Requires<[Pred]>; | |
887 } | |
888 | |
889 // a-b*c => fmad(-b,c, a) | |
890 // b*c-a => fmad(b, c, -a) | |
891 multiclass FPCONTRACT64_SUB_PAT<NVPTXInst Inst, Predicate Pred> { | |
892 def : Pat<(fsub Float64Regs:$a, (fmul Float64Regs:$b, Float64Regs:$c)), | |
893 (Inst (FNEGf64 Float64Regs:$b), Float64Regs:$c, Float64Regs:$a)>, | |
894 Requires<[Pred]>; | |
895 | |
896 def : Pat<(fsub (fmul Float64Regs:$b, Float64Regs:$c), Float64Regs:$a), | |
897 (Inst Float64Regs:$b, Float64Regs:$c, (FNEGf64 Float64Regs:$a))>, | |
898 Requires<[Pred]>; | |
899 } | |
900 | |
901 defm FMAF32ext_ftz : FPCONTRACT32_SUB_PAT<FMA32_ftzrrr, doFMAF32AGG_ftz>; | |
902 defm FMAF32ext : FPCONTRACT32_SUB_PAT<FMA32rrr, doFMAF32AGG>; | |
903 defm FMAF64ext : FPCONTRACT64_SUB_PAT<FMA64rrr, doFMAF64AGG>; | |
904 | 921 |
905 def SINF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src), | 922 def SINF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src), |
906 "sin.approx.f32 \t$dst, $src;", | 923 "sin.approx.f32 \t$dst, $src;", |
907 [(set Float32Regs:$dst, (fsin Float32Regs:$src))]>; | 924 [(set Float32Regs:$dst, (fsin Float32Regs:$src))]>; |
908 def COSF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src), | 925 def COSF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src), |
1069 } | 1086 } |
1070 | 1087 |
1071 defm SRA : RSHIFT_FORMAT<"shr.s", sra>; | 1088 defm SRA : RSHIFT_FORMAT<"shr.s", sra>; |
1072 defm SRL : RSHIFT_FORMAT<"shr.u", srl>; | 1089 defm SRL : RSHIFT_FORMAT<"shr.u", srl>; |
1073 | 1090 |
1091 // | |
1092 // Rotate: use ptx shf instruction if available. | |
1093 // | |
1094 | |
1095 // 32 bit r2 = rotl r1, n | |
1096 // => | |
1097 // r2 = shf.l r1, r1, n | |
1098 def ROTL32imm_hw : NVPTXInst<(outs Int32Regs:$dst), | |
1099 (ins Int32Regs:$src, i32imm:$amt), | |
1100 "shf.l.wrap.b32 \t$dst, $src, $src, $amt;", | |
1101 [(set Int32Regs:$dst, (rotl Int32Regs:$src, (i32 imm:$amt)))]>, | |
1102 Requires<[hasHWROT32]> ; | |
1103 | |
1104 def ROTL32reg_hw : NVPTXInst<(outs Int32Regs:$dst), | |
1105 (ins Int32Regs:$src, Int32Regs:$amt), | |
1106 "shf.l.wrap.b32 \t$dst, $src, $src, $amt;", | |
1107 [(set Int32Regs:$dst, (rotl Int32Regs:$src, Int32Regs:$amt))]>, | |
1108 Requires<[hasHWROT32]>; | |
1109 | |
1110 // 32 bit r2 = rotr r1, n | |
1111 // => | |
1112 // r2 = shf.r r1, r1, n | |
1113 def ROTR32imm_hw : NVPTXInst<(outs Int32Regs:$dst), | |
1114 (ins Int32Regs:$src, i32imm:$amt), | |
1115 "shf.r.wrap.b32 \t$dst, $src, $src, $amt;", | |
1116 [(set Int32Regs:$dst, (rotr Int32Regs:$src, (i32 imm:$amt)))]>, | |
1117 Requires<[hasHWROT32]>; | |
1118 | |
1119 def ROTR32reg_hw : NVPTXInst<(outs Int32Regs:$dst), | |
1120 (ins Int32Regs:$src, Int32Regs:$amt), | |
1121 "shf.r.wrap.b32 \t$dst, $src, $src, $amt;", | |
1122 [(set Int32Regs:$dst, (rotr Int32Regs:$src, Int32Regs:$amt))]>, | |
1123 Requires<[hasHWROT32]>; | |
1124 | |
1125 // | |
1126 // Rotate: if ptx shf instruction is not available, then use shift+add | |
1127 // | |
1074 // 32bit | 1128 // 32bit |
1075 def ROT32imm_sw : NVPTXInst<(outs Int32Regs:$dst), | 1129 def ROT32imm_sw : NVPTXInst<(outs Int32Regs:$dst), |
1076 (ins Int32Regs:$src, i32imm:$amt1, i32imm:$amt2), | 1130 (ins Int32Regs:$src, i32imm:$amt1, i32imm:$amt2), |
1077 !strconcat("{{\n\t", | 1131 !strconcat("{{\n\t", |
1078 !strconcat(".reg .b32 %lhs;\n\t", | 1132 !strconcat(".reg .b32 %lhs;\n\t", |
1086 def SUB_FRM_32 : SDNodeXForm<imm, [{ | 1140 def SUB_FRM_32 : SDNodeXForm<imm, [{ |
1087 return CurDAG->getTargetConstant(32-N->getZExtValue(), MVT::i32); | 1141 return CurDAG->getTargetConstant(32-N->getZExtValue(), MVT::i32); |
1088 }]>; | 1142 }]>; |
1089 | 1143 |
1090 def : Pat<(rotl Int32Regs:$src, (i32 imm:$amt)), | 1144 def : Pat<(rotl Int32Regs:$src, (i32 imm:$amt)), |
1091 (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>; | 1145 (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>, |
1146 Requires<[noHWROT32]>; | |
1092 def : Pat<(rotr Int32Regs:$src, (i32 imm:$amt)), | 1147 def : Pat<(rotr Int32Regs:$src, (i32 imm:$amt)), |
1093 (ROT32imm_sw Int32Regs:$src, (SUB_FRM_32 node:$amt), imm:$amt)>; | 1148 (ROT32imm_sw Int32Regs:$src, (SUB_FRM_32 node:$amt), imm:$amt)>, |
1149 Requires<[noHWROT32]>; | |
1094 | 1150 |
1095 def ROTL32reg_sw : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, | 1151 def ROTL32reg_sw : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, |
1096 Int32Regs:$amt), | 1152 Int32Regs:$amt), |
1097 !strconcat("{{\n\t", | 1153 !strconcat("{{\n\t", |
1098 !strconcat(".reg .b32 %lhs;\n\t", | 1154 !strconcat(".reg .b32 %lhs;\n\t", |
1101 !strconcat("shl.b32 \t%lhs, $src, $amt;\n\t", | 1157 !strconcat("shl.b32 \t%lhs, $src, $amt;\n\t", |
1102 !strconcat("sub.s32 \t%amt2, 32, $amt;\n\t", | 1158 !strconcat("sub.s32 \t%amt2, 32, $amt;\n\t", |
1103 !strconcat("shr.b32 \t%rhs, $src, %amt2;\n\t", | 1159 !strconcat("shr.b32 \t%rhs, $src, %amt2;\n\t", |
1104 !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t", | 1160 !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t", |
1105 !strconcat("}}", ""))))))))), | 1161 !strconcat("}}", ""))))))))), |
1106 [(set Int32Regs:$dst, (rotl Int32Regs:$src, Int32Regs:$amt))]>; | 1162 [(set Int32Regs:$dst, (rotl Int32Regs:$src, Int32Regs:$amt))]>, |
1163 Requires<[noHWROT32]>; | |
1107 | 1164 |
1108 def ROTR32reg_sw : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, | 1165 def ROTR32reg_sw : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, |
1109 Int32Regs:$amt), | 1166 Int32Regs:$amt), |
1110 !strconcat("{{\n\t", | 1167 !strconcat("{{\n\t", |
1111 !strconcat(".reg .b32 %lhs;\n\t", | 1168 !strconcat(".reg .b32 %lhs;\n\t", |
1114 !strconcat("shr.b32 \t%lhs, $src, $amt;\n\t", | 1171 !strconcat("shr.b32 \t%lhs, $src, $amt;\n\t", |
1115 !strconcat("sub.s32 \t%amt2, 32, $amt;\n\t", | 1172 !strconcat("sub.s32 \t%amt2, 32, $amt;\n\t", |
1116 !strconcat("shl.b32 \t%rhs, $src, %amt2;\n\t", | 1173 !strconcat("shl.b32 \t%rhs, $src, %amt2;\n\t", |
1117 !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t", | 1174 !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t", |
1118 !strconcat("}}", ""))))))))), | 1175 !strconcat("}}", ""))))))))), |
1119 [(set Int32Regs:$dst, (rotr Int32Regs:$src, Int32Regs:$amt))]>; | 1176 [(set Int32Regs:$dst, (rotr Int32Regs:$src, Int32Regs:$amt))]>, |
1177 Requires<[noHWROT32]>; | |
1120 | 1178 |
1121 // 64bit | 1179 // 64bit |
1122 def ROT64imm_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src, | 1180 def ROT64imm_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src, |
1123 i32imm:$amt1, i32imm:$amt2), | 1181 i32imm:$amt1, i32imm:$amt2), |
1124 !strconcat("{{\n\t", | 1182 !strconcat("{{\n\t", |
1163 !strconcat("shl.b64 \t%rhs, $src, %amt2;\n\t", | 1221 !strconcat("shl.b64 \t%rhs, $src, %amt2;\n\t", |
1164 !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t", | 1222 !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t", |
1165 !strconcat("}}", ""))))))))), | 1223 !strconcat("}}", ""))))))))), |
1166 [(set Int64Regs:$dst, (rotr Int64Regs:$src, Int32Regs:$amt))]>; | 1224 [(set Int64Regs:$dst, (rotr Int64Regs:$src, Int32Regs:$amt))]>; |
1167 | 1225 |
1226 // BFE - bit-field extract | |
1227 | |
1228 multiclass BFE<string TyStr, RegisterClass RC> { | |
1229 // BFE supports both 32-bit and 64-bit values, but the start and length | |
1230 // operands are always 32-bit | |
1231 def rrr | |
1232 : NVPTXInst<(outs RC:$d), | |
1233 (ins RC:$a, Int32Regs:$b, Int32Regs:$c), | |
1234 !strconcat("bfe.", TyStr, " \t$d, $a, $b, $c;"), []>; | |
1235 def rri | |
1236 : NVPTXInst<(outs RC:$d), | |
1237 (ins RC:$a, Int32Regs:$b, i32imm:$c), | |
1238 !strconcat("bfe.", TyStr, " \t$d, $a, $b, $c;"), []>; | |
1239 def rii | |
1240 : NVPTXInst<(outs RC:$d), | |
1241 (ins RC:$a, i32imm:$b, i32imm:$c), | |
1242 !strconcat("bfe.", TyStr, " \t$d, $a, $b, $c;"), []>; | |
1243 } | |
1244 | |
1245 defm BFE_S32 : BFE<"s32", Int32Regs>; | |
1246 defm BFE_U32 : BFE<"u32", Int32Regs>; | |
1247 defm BFE_S64 : BFE<"s64", Int64Regs>; | |
1248 defm BFE_U64 : BFE<"u64", Int64Regs>; | |
1168 | 1249 |
1169 //----------------------------------- | 1250 //----------------------------------- |
1170 // General Comparison | 1251 // General Comparison |
1171 //----------------------------------- | 1252 //----------------------------------- |
1172 | 1253 |
1277 | 1358 |
1278 // Special select for predicate operands | 1359 // Special select for predicate operands |
1279 def : Pat<(i1 (select Int1Regs:$p, Int1Regs:$a, Int1Regs:$b)), | 1360 def : Pat<(i1 (select Int1Regs:$p, Int1Regs:$a, Int1Regs:$b)), |
1280 (ORb1rr (ANDb1rr Int1Regs:$p, Int1Regs:$a), | 1361 (ORb1rr (ANDb1rr Int1Regs:$p, Int1Regs:$a), |
1281 (ANDb1rr (NOT1 Int1Regs:$p), Int1Regs:$b))>; | 1362 (ANDb1rr (NOT1 Int1Regs:$p), Int1Regs:$b))>; |
1363 | |
1364 // | |
1365 // Funnnel shift in clamp mode | |
1366 // | |
1367 // - SDNodes are created so they can be used in the DAG code, | |
1368 // e.g. NVPTXISelLowering (LowerShiftLeftParts and LowerShiftRightParts) | |
1369 // | |
1370 def SDTIntShiftDOp: SDTypeProfile<1, 3, | |
1371 [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, | |
1372 SDTCisInt<0>, SDTCisInt<3>]>; | |
1373 def FUN_SHFL_CLAMP : SDNode<"NVPTXISD::FUN_SHFL_CLAMP", SDTIntShiftDOp, []>; | |
1374 def FUN_SHFR_CLAMP : SDNode<"NVPTXISD::FUN_SHFR_CLAMP", SDTIntShiftDOp, []>; | |
1375 | |
1376 def FUNSHFLCLAMP : NVPTXInst<(outs Int32Regs:$dst), | |
1377 (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt), | |
1378 "shf.l.clamp.b32 \t$dst, $lo, $hi, $amt;", | |
1379 [(set Int32Regs:$dst, | |
1380 (FUN_SHFL_CLAMP Int32Regs:$lo, | |
1381 Int32Regs:$hi, Int32Regs:$amt))]>; | |
1382 | |
1383 def FUNSHFRCLAMP : NVPTXInst<(outs Int32Regs:$dst), | |
1384 (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt), | |
1385 "shf.r.clamp.b32 \t$dst, $lo, $hi, $amt;", | |
1386 [(set Int32Regs:$dst, | |
1387 (FUN_SHFR_CLAMP Int32Regs:$lo, | |
1388 Int32Regs:$hi, Int32Regs:$amt))]>; | |
1282 | 1389 |
1283 //----------------------------------- | 1390 //----------------------------------- |
1284 // Data Movement (Load / Store, Move) | 1391 // Data Movement (Load / Store, Move) |
1285 //----------------------------------- | 1392 //----------------------------------- |
1286 | 1393 |
1805 // FIXME: StoreParamV4Inst crashes llvm-tblgen :( | 1912 // FIXME: StoreParamV4Inst crashes llvm-tblgen :( |
1806 //def StoreParamV4I32 : StoreParamV4Inst<Int32Regs, ".b32">; | 1913 //def StoreParamV4I32 : StoreParamV4Inst<Int32Regs, ".b32">; |
1807 def StoreParamV4I32 : NVPTXInst<(outs), (ins Int32Regs:$val, Int32Regs:$val2, | 1914 def StoreParamV4I32 : NVPTXInst<(outs), (ins Int32Regs:$val, Int32Regs:$val2, |
1808 Int32Regs:$val3, Int32Regs:$val4, | 1915 Int32Regs:$val3, Int32Regs:$val4, |
1809 i32imm:$a, i32imm:$b), | 1916 i32imm:$a, i32imm:$b), |
1810 "st.param.b32\t[param$a+$b], {{$val, $val2, $val3, $val4}};", | 1917 "st.param.v4.b32\t[param$a+$b], {{$val, $val2, $val3, $val4}};", |
1811 []>; | 1918 []>; |
1812 | 1919 |
1813 def StoreParamV4I16 : NVPTXInst<(outs), (ins Int16Regs:$val, Int16Regs:$val2, | 1920 def StoreParamV4I16 : NVPTXInst<(outs), (ins Int16Regs:$val, Int16Regs:$val2, |
1814 Int16Regs:$val3, Int16Regs:$val4, | 1921 Int16Regs:$val3, Int16Regs:$val4, |
1815 i32imm:$a, i32imm:$b), | 1922 i32imm:$a, i32imm:$b), |