comparison lib/Target/NVPTX/NVPTXInstrInfo.td @ 77:54457678186b LLVM3.6

LLVM 3.6
author Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
date Mon, 08 Sep 2014 22:06:00 +0900
parents 95c75e76d11b
children 60c9769439b8
comparison
equal deleted inserted replaced
34:e874dbf0ad9d 77:54457678186b
137 def hasGenericLdSt : Predicate<"Subtarget.hasGenericLdSt()">; 137 def hasGenericLdSt : Predicate<"Subtarget.hasGenericLdSt()">;
138 138
139 def doF32FTZ : Predicate<"useF32FTZ()">; 139 def doF32FTZ : Predicate<"useF32FTZ()">;
140 def doNoF32FTZ : Predicate<"!useF32FTZ()">; 140 def doNoF32FTZ : Predicate<"!useF32FTZ()">;
141 141
142 def doFMAF32 : Predicate<"doFMAF32">;
143 def doFMAF32_ftz : Predicate<"(doFMAF32 && useF32FTZ())">;
144 def doFMAF32AGG : Predicate<"doFMAF32AGG">;
145 def doFMAF32AGG_ftz : Predicate<"(doFMAF32AGG && useF32FTZ())">;
146 def doFMAF64 : Predicate<"doFMAF64">;
147 def doFMAF64AGG : Predicate<"doFMAF64AGG">;
148
149 def doMulWide : Predicate<"doMulWide">; 142 def doMulWide : Predicate<"doMulWide">;
150 143
151 def allowFMA : Predicate<"allowFMA">; 144 def allowFMA : Predicate<"allowFMA()">;
152 def allowFMA_ftz : Predicate<"(allowFMA && useF32FTZ())">; 145 def noFMA : Predicate<"!allowFMA()">;
153 146
154 def do_DIVF32_APPROX : Predicate<"getDivF32Level()==0">; 147 def do_DIVF32_APPROX : Predicate<"getDivF32Level()==0">;
155 def do_DIVF32_FULL : Predicate<"getDivF32Level()==1">; 148 def do_DIVF32_FULL : Predicate<"getDivF32Level()==1">;
156 149
157 def do_SQRTF32_APPROX : Predicate<"!usePrecSqrtF32()">; 150 def do_SQRTF32_APPROX : Predicate<"!usePrecSqrtF32()">;
158 def do_SQRTF32_RN : Predicate<"usePrecSqrtF32()">; 151 def do_SQRTF32_RN : Predicate<"usePrecSqrtF32()">;
159 152
160 def hasHWROT32 : Predicate<"Subtarget.hasHWROT32()">; 153 def hasHWROT32 : Predicate<"Subtarget.hasHWROT32()">;
154 def noHWROT32 : Predicate<"!Subtarget.hasHWROT32()">;
161 155
162 def true : Predicate<"1">; 156 def true : Predicate<"1">;
157
158 def hasPTX31 : Predicate<"Subtarget.getPTXVersion() >= 31">;
163 159
164 160
165 //===----------------------------------------------------------------------===// 161 //===----------------------------------------------------------------------===//
166 // Some Common Instruction Class Templates 162 // Some Common Instruction Class Templates
167 //===----------------------------------------------------------------------===// 163 //===----------------------------------------------------------------------===//
217 def f32rr_ftz : NVPTXInst<(outs Float32Regs:$dst), 213 def f32rr_ftz : NVPTXInst<(outs Float32Regs:$dst),
218 (ins Float32Regs:$a, Float32Regs:$b), 214 (ins Float32Regs:$a, Float32Regs:$b),
219 !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"), 215 !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"),
220 [(set Float32Regs:$dst, 216 [(set Float32Regs:$dst,
221 (OpNode Float32Regs:$a, Float32Regs:$b))]>, 217 (OpNode Float32Regs:$a, Float32Regs:$b))]>,
222 Requires<[allowFMA_ftz]>; 218 Requires<[allowFMA, doF32FTZ]>;
223 def f32ri_ftz : NVPTXInst<(outs Float32Regs:$dst), 219 def f32ri_ftz : NVPTXInst<(outs Float32Regs:$dst),
224 (ins Float32Regs:$a, f32imm:$b), 220 (ins Float32Regs:$a, f32imm:$b),
225 !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"), 221 !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"),
226 [(set Float32Regs:$dst, 222 [(set Float32Regs:$dst,
227 (OpNode Float32Regs:$a, fpimm:$b))]>, 223 (OpNode Float32Regs:$a, fpimm:$b))]>,
228 Requires<[allowFMA_ftz]>; 224 Requires<[allowFMA, doF32FTZ]>;
229 def f32rr : NVPTXInst<(outs Float32Regs:$dst), 225 def f32rr : NVPTXInst<(outs Float32Regs:$dst),
230 (ins Float32Regs:$a, Float32Regs:$b), 226 (ins Float32Regs:$a, Float32Regs:$b),
231 !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"), 227 !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"),
232 [(set Float32Regs:$dst, 228 [(set Float32Regs:$dst,
233 (OpNode Float32Regs:$a, Float32Regs:$b))]>, 229 (OpNode Float32Regs:$a, Float32Regs:$b))]>,
243 multiclass F3_rn<string OpcStr, SDNode OpNode> { 239 multiclass F3_rn<string OpcStr, SDNode OpNode> {
244 def f64rr : NVPTXInst<(outs Float64Regs:$dst), 240 def f64rr : NVPTXInst<(outs Float64Regs:$dst),
245 (ins Float64Regs:$a, Float64Regs:$b), 241 (ins Float64Regs:$a, Float64Regs:$b),
246 !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"), 242 !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"),
247 [(set Float64Regs:$dst, 243 [(set Float64Regs:$dst,
248 (OpNode Float64Regs:$a, Float64Regs:$b))]>; 244 (OpNode Float64Regs:$a, Float64Regs:$b))]>,
245 Requires<[noFMA]>;
249 def f64ri : NVPTXInst<(outs Float64Regs:$dst), 246 def f64ri : NVPTXInst<(outs Float64Regs:$dst),
250 (ins Float64Regs:$a, f64imm:$b), 247 (ins Float64Regs:$a, f64imm:$b),
251 !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"), 248 !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"),
252 [(set Float64Regs:$dst, 249 [(set Float64Regs:$dst,
253 (OpNode Float64Regs:$a, fpimm:$b))]>; 250 (OpNode Float64Regs:$a, fpimm:$b))]>,
251 Requires<[noFMA]>;
254 def f32rr_ftz : NVPTXInst<(outs Float32Regs:$dst), 252 def f32rr_ftz : NVPTXInst<(outs Float32Regs:$dst),
255 (ins Float32Regs:$a, Float32Regs:$b), 253 (ins Float32Regs:$a, Float32Regs:$b),
256 !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"), 254 !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"),
257 [(set Float32Regs:$dst, 255 [(set Float32Regs:$dst,
258 (OpNode Float32Regs:$a, Float32Regs:$b))]>, 256 (OpNode Float32Regs:$a, Float32Regs:$b))]>,
259 Requires<[doF32FTZ]>; 257 Requires<[noFMA, doF32FTZ]>;
260 def f32ri_ftz : NVPTXInst<(outs Float32Regs:$dst), 258 def f32ri_ftz : NVPTXInst<(outs Float32Regs:$dst),
261 (ins Float32Regs:$a, f32imm:$b), 259 (ins Float32Regs:$a, f32imm:$b),
262 !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"), 260 !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"),
263 [(set Float32Regs:$dst, 261 [(set Float32Regs:$dst,
264 (OpNode Float32Regs:$a, fpimm:$b))]>, 262 (OpNode Float32Regs:$a, fpimm:$b))]>,
265 Requires<[doF32FTZ]>; 263 Requires<[noFMA, doF32FTZ]>;
266 def f32rr : NVPTXInst<(outs Float32Regs:$dst), 264 def f32rr : NVPTXInst<(outs Float32Regs:$dst),
267 (ins Float32Regs:$a, Float32Regs:$b), 265 (ins Float32Regs:$a, Float32Regs:$b),
268 !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"), 266 !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"),
269 [(set Float32Regs:$dst, 267 [(set Float32Regs:$dst,
270 (OpNode Float32Regs:$a, Float32Regs:$b))]>; 268 (OpNode Float32Regs:$a, Float32Regs:$b))]>,
269 Requires<[noFMA]>;
271 def f32ri : NVPTXInst<(outs Float32Regs:$dst), 270 def f32ri : NVPTXInst<(outs Float32Regs:$dst),
272 (ins Float32Regs:$a, f32imm:$b), 271 (ins Float32Regs:$a, f32imm:$b),
273 !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"), 272 !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"),
274 [(set Float32Regs:$dst, 273 [(set Float32Regs:$dst,
275 (OpNode Float32Regs:$a, fpimm:$b))]>; 274 (OpNode Float32Regs:$a, fpimm:$b))]>,
275 Requires<[noFMA]>;
276 } 276 }
277 277
278 multiclass F2<string OpcStr, SDNode OpNode> { 278 multiclass F2<string OpcStr, SDNode OpNode> {
279 def f64 : NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$a), 279 def f64 : NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$a),
280 !strconcat(OpcStr, ".f64 \t$dst, $a;"), 280 !strconcat(OpcStr, ".f64 \t$dst, $a;"),
459 const APInt &v = N->getAPIntValue(); 459 const APInt &v = N->getAPIntValue();
460 APInt temp(16, 1); 460 APInt temp(16, 1);
461 return CurDAG->getTargetConstant(temp.shl(v), MVT::i16); 461 return CurDAG->getTargetConstant(temp.shl(v), MVT::i16);
462 }]>; 462 }]>;
463 463
464 def MULWIDES64 : NVPTXInst<(outs Int64Regs:$dst), 464 def MULWIDES64
465 (ins Int32Regs:$a, Int32Regs:$b), 465 : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
466 "mul.wide.s32 \t$dst, $a, $b;", []>;
467 def MULWIDES64Imm
468 : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
466 "mul.wide.s32 \t$dst, $a, $b;", []>; 469 "mul.wide.s32 \t$dst, $a, $b;", []>;
467 def MULWIDES64Imm : NVPTXInst<(outs Int64Regs:$dst), 470 def MULWIDES64Imm64
468 (ins Int32Regs:$a, i64imm:$b), 471 : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i64imm:$b),
469 "mul.wide.s32 \t$dst, $a, $b;", []>; 472 "mul.wide.s32 \t$dst, $a, $b;", []>;
470 473
471 def MULWIDEU64 : NVPTXInst<(outs Int64Regs:$dst), 474 def MULWIDEU64
472 (ins Int32Regs:$a, Int32Regs:$b), 475 : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
476 "mul.wide.u32 \t$dst, $a, $b;", []>;
477 def MULWIDEU64Imm
478 : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
473 "mul.wide.u32 \t$dst, $a, $b;", []>; 479 "mul.wide.u32 \t$dst, $a, $b;", []>;
474 def MULWIDEU64Imm : NVPTXInst<(outs Int64Regs:$dst), 480 def MULWIDEU64Imm64
475 (ins Int32Regs:$a, i64imm:$b), 481 : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i64imm:$b),
476 "mul.wide.u32 \t$dst, $a, $b;", []>; 482 "mul.wide.u32 \t$dst, $a, $b;", []>;
477 483
478 def MULWIDES32 : NVPTXInst<(outs Int32Regs:$dst), 484 def MULWIDES32
479 (ins Int16Regs:$a, Int16Regs:$b), 485 : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
480 "mul.wide.s16 \t$dst, $a, $b;", []>; 486 "mul.wide.s16 \t$dst, $a, $b;", []>;
481 def MULWIDES32Imm : NVPTXInst<(outs Int32Regs:$dst), 487 def MULWIDES32Imm
482 (ins Int16Regs:$a, i32imm:$b), 488 : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
489 "mul.wide.s16 \t$dst, $a, $b;", []>;
490 def MULWIDES32Imm32
491 : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i32imm:$b),
483 "mul.wide.s16 \t$dst, $a, $b;", []>; 492 "mul.wide.s16 \t$dst, $a, $b;", []>;
484 493
485 def MULWIDEU32 : NVPTXInst<(outs Int32Regs:$dst), 494 def MULWIDEU32
486 (ins Int16Regs:$a, Int16Regs:$b), 495 : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
496 "mul.wide.u16 \t$dst, $a, $b;", []>;
497 def MULWIDEU32Imm
498 : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
487 "mul.wide.u16 \t$dst, $a, $b;", []>; 499 "mul.wide.u16 \t$dst, $a, $b;", []>;
488 def MULWIDEU32Imm : NVPTXInst<(outs Int32Regs:$dst), 500 def MULWIDEU32Imm32
489 (ins Int16Regs:$a, i32imm:$b), 501 : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i32imm:$b),
490 "mul.wide.u16 \t$dst, $a, $b;", []>; 502 "mul.wide.u16 \t$dst, $a, $b;", []>;
491 503
492 def : Pat<(shl (sext Int32Regs:$a), (i32 Int5Const:$b)), 504 def : Pat<(shl (sext Int32Regs:$a), (i32 Int5Const:$b)),
493 (MULWIDES64Imm Int32Regs:$a, (SHL2MUL32 node:$b))>, 505 (MULWIDES64Imm Int32Regs:$a, (SHL2MUL32 node:$b))>,
494 Requires<[doMulWide]>; 506 Requires<[doMulWide]>;
495 def : Pat<(shl (zext Int32Regs:$a), (i32 Int5Const:$b)), 507 def : Pat<(shl (zext Int32Regs:$a), (i32 Int5Const:$b)),
505 517
506 def : Pat<(mul (sext Int32Regs:$a), (sext Int32Regs:$b)), 518 def : Pat<(mul (sext Int32Regs:$a), (sext Int32Regs:$b)),
507 (MULWIDES64 Int32Regs:$a, Int32Regs:$b)>, 519 (MULWIDES64 Int32Regs:$a, Int32Regs:$b)>,
508 Requires<[doMulWide]>; 520 Requires<[doMulWide]>;
509 def : Pat<(mul (sext Int32Regs:$a), (i64 SInt32Const:$b)), 521 def : Pat<(mul (sext Int32Regs:$a), (i64 SInt32Const:$b)),
510 (MULWIDES64Imm Int32Regs:$a, (i64 SInt32Const:$b))>, 522 (MULWIDES64Imm64 Int32Regs:$a, (i64 SInt32Const:$b))>,
511 Requires<[doMulWide]>; 523 Requires<[doMulWide]>;
512 524
513 def : Pat<(mul (zext Int32Regs:$a), (zext Int32Regs:$b)), 525 def : Pat<(mul (zext Int32Regs:$a), (zext Int32Regs:$b)),
514 (MULWIDEU64 Int32Regs:$a, Int32Regs:$b)>, Requires<[doMulWide]>; 526 (MULWIDEU64 Int32Regs:$a, Int32Regs:$b)>,
527 Requires<[doMulWide]>;
515 def : Pat<(mul (zext Int32Regs:$a), (i64 UInt32Const:$b)), 528 def : Pat<(mul (zext Int32Regs:$a), (i64 UInt32Const:$b)),
516 (MULWIDEU64Imm Int32Regs:$a, (i64 UInt32Const:$b))>, 529 (MULWIDEU64Imm64 Int32Regs:$a, (i64 UInt32Const:$b))>,
517 Requires<[doMulWide]>; 530 Requires<[doMulWide]>;
518 531
519 def : Pat<(mul (sext Int16Regs:$a), (sext Int16Regs:$b)), 532 def : Pat<(mul (sext Int16Regs:$a), (sext Int16Regs:$b)),
520 (MULWIDES32 Int16Regs:$a, Int16Regs:$b)>, Requires<[doMulWide]>; 533 (MULWIDES32 Int16Regs:$a, Int16Regs:$b)>,
534 Requires<[doMulWide]>;
521 def : Pat<(mul (sext Int16Regs:$a), (i32 SInt16Const:$b)), 535 def : Pat<(mul (sext Int16Regs:$a), (i32 SInt16Const:$b)),
522 (MULWIDES32Imm Int16Regs:$a, (i32 SInt16Const:$b))>, 536 (MULWIDES32Imm32 Int16Regs:$a, (i32 SInt16Const:$b))>,
523 Requires<[doMulWide]>; 537 Requires<[doMulWide]>;
524 538
525 def : Pat<(mul (zext Int16Regs:$a), (zext Int16Regs:$b)), 539 def : Pat<(mul (zext Int16Regs:$a), (zext Int16Regs:$b)),
526 (MULWIDEU32 Int16Regs:$a, Int16Regs:$b)>, Requires<[doMulWide]>; 540 (MULWIDEU32 Int16Regs:$a, Int16Regs:$b)>,
541 Requires<[doMulWide]>;
527 def : Pat<(mul (zext Int16Regs:$a), (i32 UInt16Const:$b)), 542 def : Pat<(mul (zext Int16Regs:$a), (i32 UInt16Const:$b)),
528 (MULWIDEU32Imm Int16Regs:$a, (i32 UInt16Const:$b))>, 543 (MULWIDEU32Imm32 Int16Regs:$a, (i32 UInt16Const:$b))>,
544 Requires<[doMulWide]>;
545
546
547 def SDTMulWide
548 : SDTypeProfile<1, 2, [SDTCisSameAs<1, 2>]>;
549 def mul_wide_signed
550 : SDNode<"NVPTXISD::MUL_WIDE_SIGNED", SDTMulWide>;
551 def mul_wide_unsigned
552 : SDNode<"NVPTXISD::MUL_WIDE_UNSIGNED", SDTMulWide>;
553
554 def : Pat<(i32 (mul_wide_signed Int16Regs:$a, Int16Regs:$b)),
555 (MULWIDES32 Int16Regs:$a, Int16Regs:$b)>,
556 Requires<[doMulWide]>;
557 def : Pat<(i32 (mul_wide_signed Int16Regs:$a, imm:$b)),
558 (MULWIDES32Imm Int16Regs:$a, imm:$b)>,
559 Requires<[doMulWide]>;
560 def : Pat<(i32 (mul_wide_unsigned Int16Regs:$a, Int16Regs:$b)),
561 (MULWIDEU32 Int16Regs:$a, Int16Regs:$b)>,
562 Requires<[doMulWide]>;
563 def : Pat<(i32 (mul_wide_unsigned Int16Regs:$a, imm:$b)),
564 (MULWIDEU32Imm Int16Regs:$a, imm:$b)>,
565 Requires<[doMulWide]>;
566
567
568 def : Pat<(i64 (mul_wide_signed Int32Regs:$a, Int32Regs:$b)),
569 (MULWIDES64 Int32Regs:$a, Int32Regs:$b)>,
570 Requires<[doMulWide]>;
571 def : Pat<(i64 (mul_wide_signed Int32Regs:$a, imm:$b)),
572 (MULWIDES64Imm Int32Regs:$a, imm:$b)>,
573 Requires<[doMulWide]>;
574 def : Pat<(i64 (mul_wide_unsigned Int32Regs:$a, Int32Regs:$b)),
575 (MULWIDEU64 Int32Regs:$a, Int32Regs:$b)>,
576 Requires<[doMulWide]>;
577 def : Pat<(i64 (mul_wide_unsigned Int32Regs:$a, imm:$b)),
578 (MULWIDEU64Imm Int32Regs:$a, imm:$b)>,
529 Requires<[doMulWide]>; 579 Requires<[doMulWide]>;
530 580
531 defm MULT : I3<"mul.lo.s", mul>; 581 defm MULT : I3<"mul.lo.s", mul>;
532 582
533 defm MULTHS : I3<"mul.hi.s", mulhs>; 583 defm MULTHS : I3<"mul.hi.s", mulhs>;
539 defm SREM : I3<"rem.s", srem>; 589 defm SREM : I3<"rem.s", srem>;
540 // The ri version will not be selected as DAGCombiner::visitSREM will lower it. 590 // The ri version will not be selected as DAGCombiner::visitSREM will lower it.
541 defm UREM : I3<"rem.u", urem>; 591 defm UREM : I3<"rem.u", urem>;
542 // The ri version will not be selected as DAGCombiner::visitUREM will lower it. 592 // The ri version will not be selected as DAGCombiner::visitUREM will lower it.
543 593
594 def SDTIMAD
595 : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisInt<0>,
596 SDTCisInt<2>, SDTCisSameAs<0, 2>,
597 SDTCisSameAs<0, 3>]>;
598 def imad
599 : SDNode<"NVPTXISD::IMAD", SDTIMAD>;
600
544 def MAD16rrr : NVPTXInst<(outs Int16Regs:$dst), 601 def MAD16rrr : NVPTXInst<(outs Int16Regs:$dst),
545 (ins Int16Regs:$a, Int16Regs:$b, Int16Regs:$c), 602 (ins Int16Regs:$a, Int16Regs:$b, Int16Regs:$c),
546 "mad.lo.s16 \t$dst, $a, $b, $c;", 603 "mad.lo.s16 \t$dst, $a, $b, $c;",
547 [(set Int16Regs:$dst, (add 604 [(set Int16Regs:$dst,
548 (mul Int16Regs:$a, Int16Regs:$b), Int16Regs:$c))]>; 605 (imad Int16Regs:$a, Int16Regs:$b, Int16Regs:$c))]>;
549 def MAD16rri : NVPTXInst<(outs Int16Regs:$dst), 606 def MAD16rri : NVPTXInst<(outs Int16Regs:$dst),
550 (ins Int16Regs:$a, Int16Regs:$b, i16imm:$c), 607 (ins Int16Regs:$a, Int16Regs:$b, i16imm:$c),
551 "mad.lo.s16 \t$dst, $a, $b, $c;", 608 "mad.lo.s16 \t$dst, $a, $b, $c;",
552 [(set Int16Regs:$dst, (add 609 [(set Int16Regs:$dst,
553 (mul Int16Regs:$a, Int16Regs:$b), imm:$c))]>; 610 (imad Int16Regs:$a, Int16Regs:$b, imm:$c))]>;
554 def MAD16rir : NVPTXInst<(outs Int16Regs:$dst), 611 def MAD16rir : NVPTXInst<(outs Int16Regs:$dst),
555 (ins Int16Regs:$a, i16imm:$b, Int16Regs:$c), 612 (ins Int16Regs:$a, i16imm:$b, Int16Regs:$c),
556 "mad.lo.s16 \t$dst, $a, $b, $c;", 613 "mad.lo.s16 \t$dst, $a, $b, $c;",
557 [(set Int16Regs:$dst, (add 614 [(set Int16Regs:$dst,
558 (mul Int16Regs:$a, imm:$b), Int16Regs:$c))]>; 615 (imad Int16Regs:$a, imm:$b, Int16Regs:$c))]>;
559 def MAD16rii : NVPTXInst<(outs Int16Regs:$dst), 616 def MAD16rii : NVPTXInst<(outs Int16Regs:$dst),
560 (ins Int16Regs:$a, i16imm:$b, i16imm:$c), 617 (ins Int16Regs:$a, i16imm:$b, i16imm:$c),
561 "mad.lo.s16 \t$dst, $a, $b, $c;", 618 "mad.lo.s16 \t$dst, $a, $b, $c;",
562 [(set Int16Regs:$dst, (add (mul Int16Regs:$a, imm:$b), 619 [(set Int16Regs:$dst,
563 imm:$c))]>; 620 (imad Int16Regs:$a, imm:$b, imm:$c))]>;
564 621
565 def MAD32rrr : NVPTXInst<(outs Int32Regs:$dst), 622 def MAD32rrr : NVPTXInst<(outs Int32Regs:$dst),
566 (ins Int32Regs:$a, Int32Regs:$b, Int32Regs:$c), 623 (ins Int32Regs:$a, Int32Regs:$b, Int32Regs:$c),
567 "mad.lo.s32 \t$dst, $a, $b, $c;", 624 "mad.lo.s32 \t$dst, $a, $b, $c;",
568 [(set Int32Regs:$dst, (add 625 [(set Int32Regs:$dst,
569 (mul Int32Regs:$a, Int32Regs:$b), Int32Regs:$c))]>; 626 (imad Int32Regs:$a, Int32Regs:$b, Int32Regs:$c))]>;
570 def MAD32rri : NVPTXInst<(outs Int32Regs:$dst), 627 def MAD32rri : NVPTXInst<(outs Int32Regs:$dst),
571 (ins Int32Regs:$a, Int32Regs:$b, i32imm:$c), 628 (ins Int32Regs:$a, Int32Regs:$b, i32imm:$c),
572 "mad.lo.s32 \t$dst, $a, $b, $c;", 629 "mad.lo.s32 \t$dst, $a, $b, $c;",
573 [(set Int32Regs:$dst, (add 630 [(set Int32Regs:$dst,
574 (mul Int32Regs:$a, Int32Regs:$b), imm:$c))]>; 631 (imad Int32Regs:$a, Int32Regs:$b, imm:$c))]>;
575 def MAD32rir : NVPTXInst<(outs Int32Regs:$dst), 632 def MAD32rir : NVPTXInst<(outs Int32Regs:$dst),
576 (ins Int32Regs:$a, i32imm:$b, Int32Regs:$c), 633 (ins Int32Regs:$a, i32imm:$b, Int32Regs:$c),
577 "mad.lo.s32 \t$dst, $a, $b, $c;", 634 "mad.lo.s32 \t$dst, $a, $b, $c;",
578 [(set Int32Regs:$dst, (add 635 [(set Int32Regs:$dst,
579 (mul Int32Regs:$a, imm:$b), Int32Regs:$c))]>; 636 (imad Int32Regs:$a, imm:$b, Int32Regs:$c))]>;
580 def MAD32rii : NVPTXInst<(outs Int32Regs:$dst), 637 def MAD32rii : NVPTXInst<(outs Int32Regs:$dst),
581 (ins Int32Regs:$a, i32imm:$b, i32imm:$c), 638 (ins Int32Regs:$a, i32imm:$b, i32imm:$c),
582 "mad.lo.s32 \t$dst, $a, $b, $c;", 639 "mad.lo.s32 \t$dst, $a, $b, $c;",
583 [(set Int32Regs:$dst, (add 640 [(set Int32Regs:$dst,
584 (mul Int32Regs:$a, imm:$b), imm:$c))]>; 641 (imad Int32Regs:$a, imm:$b, imm:$c))]>;
585 642
586 def MAD64rrr : NVPTXInst<(outs Int64Regs:$dst), 643 def MAD64rrr : NVPTXInst<(outs Int64Regs:$dst),
587 (ins Int64Regs:$a, Int64Regs:$b, Int64Regs:$c), 644 (ins Int64Regs:$a, Int64Regs:$b, Int64Regs:$c),
588 "mad.lo.s64 \t$dst, $a, $b, $c;", 645 "mad.lo.s64 \t$dst, $a, $b, $c;",
589 [(set Int64Regs:$dst, (add 646 [(set Int64Regs:$dst,
590 (mul Int64Regs:$a, Int64Regs:$b), Int64Regs:$c))]>; 647 (imad Int64Regs:$a, Int64Regs:$b, Int64Regs:$c))]>;
591 def MAD64rri : NVPTXInst<(outs Int64Regs:$dst), 648 def MAD64rri : NVPTXInst<(outs Int64Regs:$dst),
592 (ins Int64Regs:$a, Int64Regs:$b, i64imm:$c), 649 (ins Int64Regs:$a, Int64Regs:$b, i64imm:$c),
593 "mad.lo.s64 \t$dst, $a, $b, $c;", 650 "mad.lo.s64 \t$dst, $a, $b, $c;",
594 [(set Int64Regs:$dst, (add 651 [(set Int64Regs:$dst,
595 (mul Int64Regs:$a, Int64Regs:$b), imm:$c))]>; 652 (imad Int64Regs:$a, Int64Regs:$b, imm:$c))]>;
596 def MAD64rir : NVPTXInst<(outs Int64Regs:$dst), 653 def MAD64rir : NVPTXInst<(outs Int64Regs:$dst),
597 (ins Int64Regs:$a, i64imm:$b, Int64Regs:$c), 654 (ins Int64Regs:$a, i64imm:$b, Int64Regs:$c),
598 "mad.lo.s64 \t$dst, $a, $b, $c;", 655 "mad.lo.s64 \t$dst, $a, $b, $c;",
599 [(set Int64Regs:$dst, (add 656 [(set Int64Regs:$dst,
600 (mul Int64Regs:$a, imm:$b), Int64Regs:$c))]>; 657 (imad Int64Regs:$a, imm:$b, Int64Regs:$c))]>;
601 def MAD64rii : NVPTXInst<(outs Int64Regs:$dst), 658 def MAD64rii : NVPTXInst<(outs Int64Regs:$dst),
602 (ins Int64Regs:$a, i64imm:$b, i64imm:$c), 659 (ins Int64Regs:$a, i64imm:$b, i64imm:$c),
603 "mad.lo.s64 \t$dst, $a, $b, $c;", 660 "mad.lo.s64 \t$dst, $a, $b, $c;",
604 [(set Int64Regs:$dst, (add 661 [(set Int64Regs:$dst,
605 (mul Int64Regs:$a, imm:$b), imm:$c))]>; 662 (imad Int64Regs:$a, imm:$b, imm:$c))]>;
606
607 663
608 def INEG16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src), 664 def INEG16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
609 "neg.s16 \t$dst, $src;", 665 "neg.s16 \t$dst, $src;",
610 [(set Int16Regs:$dst, (ineg Int16Regs:$src))]>; 666 [(set Int16Regs:$dst, (ineg Int16Regs:$src))]>;
611 def INEG32 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src), 667 def INEG32 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src),
687 (ins Float32Regs:$a, Float32Regs:$b), 743 (ins Float32Regs:$a, Float32Regs:$b),
688 "div.approx.ftz.f32 \t$dst, $a, $b;", 744 "div.approx.ftz.f32 \t$dst, $a, $b;",
689 [(set Float32Regs:$dst, 745 [(set Float32Regs:$dst,
690 (fdiv Float32Regs:$a, Float32Regs:$b))]>, 746 (fdiv Float32Regs:$a, Float32Regs:$b))]>,
691 Requires<[do_DIVF32_APPROX, doF32FTZ]>; 747 Requires<[do_DIVF32_APPROX, doF32FTZ]>;
748 def FDIV32approxri_ftz : NVPTXInst<(outs Float32Regs:$dst),
749 (ins Float32Regs:$a, f32imm:$b),
750 "div.approx.ftz.f32 \t$dst, $a, $b;",
751 [(set Float32Regs:$dst,
752 (fdiv Float32Regs:$a, fpimm:$b))]>,
753 Requires<[do_DIVF32_APPROX, doF32FTZ]>;
692 def FDIV32approxrr : NVPTXInst<(outs Float32Regs:$dst), 754 def FDIV32approxrr : NVPTXInst<(outs Float32Regs:$dst),
693 (ins Float32Regs:$a, Float32Regs:$b), 755 (ins Float32Regs:$a, Float32Regs:$b),
694 "div.approx.f32 \t$dst, $a, $b;", 756 "div.approx.f32 \t$dst, $a, $b;",
695 [(set Float32Regs:$dst, 757 [(set Float32Regs:$dst,
696 (fdiv Float32Regs:$a, Float32Regs:$b))]>, 758 (fdiv Float32Regs:$a, Float32Regs:$b))]>,
759 Requires<[do_DIVF32_APPROX]>;
760 def FDIV32approxri : NVPTXInst<(outs Float32Regs:$dst),
761 (ins Float32Regs:$a, f32imm:$b),
762 "div.approx.f32 \t$dst, $a, $b;",
763 [(set Float32Regs:$dst,
764 (fdiv Float32Regs:$a, fpimm:$b))]>,
697 Requires<[do_DIVF32_APPROX]>; 765 Requires<[do_DIVF32_APPROX]>;
698 // 766 //
699 // F32 Semi-accurate reciprocal 767 // F32 Semi-accurate reciprocal
700 // 768 //
701 // rcp.approx gives the same result as div.full(1.0f, a) and is faster. 769 // rcp.approx gives the same result as div.full(1.0f, a) and is faster.
795 863
796 multiclass FPCONTRACT32<string OpcStr, Predicate Pred> { 864 multiclass FPCONTRACT32<string OpcStr, Predicate Pred> {
797 def rrr : NVPTXInst<(outs Float32Regs:$dst), 865 def rrr : NVPTXInst<(outs Float32Regs:$dst),
798 (ins Float32Regs:$a, Float32Regs:$b, Float32Regs:$c), 866 (ins Float32Regs:$a, Float32Regs:$b, Float32Regs:$c),
799 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), 867 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
800 [(set Float32Regs:$dst, (fadd 868 [(set Float32Regs:$dst,
801 (fmul Float32Regs:$a, Float32Regs:$b), 869 (fma Float32Regs:$a, Float32Regs:$b, Float32Regs:$c))]>,
802 Float32Regs:$c))]>, Requires<[Pred]>;
803 // This is to WAR a weird bug in Tablegen that does not automatically
804 // generate the following permutated rule rrr2 from the above rrr.
805 // So we explicitly add it here. This happens to FMA32 only.
806 // See the comments at FMAD32 and FMA32 for more information.
807 def rrr2 : NVPTXInst<(outs Float32Regs:$dst),
808 (ins Float32Regs:$a, Float32Regs:$b, Float32Regs:$c),
809 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
810 [(set Float32Regs:$dst, (fadd Float32Regs:$c,
811 (fmul Float32Regs:$a, Float32Regs:$b)))]>,
812 Requires<[Pred]>; 870 Requires<[Pred]>;
813 def rri : NVPTXInst<(outs Float32Regs:$dst), 871 def rri : NVPTXInst<(outs Float32Regs:$dst),
814 (ins Float32Regs:$a, Float32Regs:$b, f32imm:$c), 872 (ins Float32Regs:$a, Float32Regs:$b, f32imm:$c),
815 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), 873 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
816 [(set Float32Regs:$dst, (fadd 874 [(set Float32Regs:$dst,
817 (fmul Float32Regs:$a, Float32Regs:$b), fpimm:$c))]>, 875 (fma Float32Regs:$a, Float32Regs:$b, fpimm:$c))]>,
818 Requires<[Pred]>; 876 Requires<[Pred]>;
819 def rir : NVPTXInst<(outs Float32Regs:$dst), 877 def rir : NVPTXInst<(outs Float32Regs:$dst),
820 (ins Float32Regs:$a, f32imm:$b, Float32Regs:$c), 878 (ins Float32Regs:$a, f32imm:$b, Float32Regs:$c),
821 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), 879 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
822 [(set Float32Regs:$dst, (fadd 880 [(set Float32Regs:$dst,
823 (fmul Float32Regs:$a, fpimm:$b), Float32Regs:$c))]>, 881 (fma Float32Regs:$a, fpimm:$b, Float32Regs:$c))]>,
824 Requires<[Pred]>; 882 Requires<[Pred]>;
825 def rii : NVPTXInst<(outs Float32Regs:$dst), 883 def rii : NVPTXInst<(outs Float32Regs:$dst),
826 (ins Float32Regs:$a, f32imm:$b, f32imm:$c), 884 (ins Float32Regs:$a, f32imm:$b, f32imm:$c),
827 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), 885 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
828 [(set Float32Regs:$dst, (fadd 886 [(set Float32Regs:$dst,
829 (fmul Float32Regs:$a, fpimm:$b), fpimm:$c))]>, 887 (fma Float32Regs:$a, fpimm:$b, fpimm:$c))]>,
830 Requires<[Pred]>; 888 Requires<[Pred]>;
831 } 889 }
832 890
833 multiclass FPCONTRACT64<string OpcStr, Predicate Pred> { 891 multiclass FPCONTRACT64<string OpcStr, Predicate Pred> {
834 def rrr : NVPTXInst<(outs Float64Regs:$dst), 892 def rrr : NVPTXInst<(outs Float64Regs:$dst),
835 (ins Float64Regs:$a, Float64Regs:$b, Float64Regs:$c), 893 (ins Float64Regs:$a, Float64Regs:$b, Float64Regs:$c),
836 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), 894 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
837 [(set Float64Regs:$dst, (fadd 895 [(set Float64Regs:$dst,
838 (fmul Float64Regs:$a, Float64Regs:$b), 896 (fma Float64Regs:$a, Float64Regs:$b, Float64Regs:$c))]>,
839 Float64Regs:$c))]>, Requires<[Pred]>; 897 Requires<[Pred]>;
840 def rri : NVPTXInst<(outs Float64Regs:$dst), 898 def rri : NVPTXInst<(outs Float64Regs:$dst),
841 (ins Float64Regs:$a, Float64Regs:$b, f64imm:$c), 899 (ins Float64Regs:$a, Float64Regs:$b, f64imm:$c),
842 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), 900 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
843 [(set Float64Regs:$dst, (fadd (fmul Float64Regs:$a, 901 [(set Float64Regs:$dst,
844 Float64Regs:$b), fpimm:$c))]>, Requires<[Pred]>; 902 (fma Float64Regs:$a, Float64Regs:$b, fpimm:$c))]>,
903 Requires<[Pred]>;
845 def rir : NVPTXInst<(outs Float64Regs:$dst), 904 def rir : NVPTXInst<(outs Float64Regs:$dst),
846 (ins Float64Regs:$a, f64imm:$b, Float64Regs:$c), 905 (ins Float64Regs:$a, f64imm:$b, Float64Regs:$c),
847 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), 906 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
848 [(set Float64Regs:$dst, (fadd 907 [(set Float64Regs:$dst,
849 (fmul Float64Regs:$a, fpimm:$b), Float64Regs:$c))]>, 908 (fma Float64Regs:$a, fpimm:$b, Float64Regs:$c))]>,
850 Requires<[Pred]>; 909 Requires<[Pred]>;
851 def rii : NVPTXInst<(outs Float64Regs:$dst), 910 def rii : NVPTXInst<(outs Float64Regs:$dst),
852 (ins Float64Regs:$a, f64imm:$b, f64imm:$c), 911 (ins Float64Regs:$a, f64imm:$b, f64imm:$c),
853 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), 912 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
854 [(set Float64Regs:$dst, (fadd 913 [(set Float64Regs:$dst,
855 (fmul Float64Regs:$a, fpimm:$b), fpimm:$c))]>, 914 (fma Float64Regs:$a, fpimm:$b, fpimm:$c))]>,
856 Requires<[Pred]>; 915 Requires<[Pred]>;
857 } 916 }
858 917
859 // Due to a unknown reason (most likely a bug in tablegen), tablegen does not 918 defm FMA32_ftz : FPCONTRACT32<"fma.rn.ftz.f32", doF32FTZ>;
860 // automatically generate the rrr2 rule from 919 defm FMA32 : FPCONTRACT32<"fma.rn.f32", true>;
861 // the rrr rule (see FPCONTRACT32) for FMA32, though it does for FMAD32. 920 defm FMA64 : FPCONTRACT64<"fma.rn.f64", true>;
862 // If we reverse the order of the following two lines, then rrr2 rule will be
863 // generated for FMA32, but not for rrr.
864 // Therefore, we manually write the rrr2 rule in FPCONTRACT32.
865 defm FMA32_ftz : FPCONTRACT32<"fma.rn.ftz.f32", doFMAF32_ftz>;
866 defm FMA32 : FPCONTRACT32<"fma.rn.f32", doFMAF32>;
867 defm FMA64 : FPCONTRACT64<"fma.rn.f64", doFMAF64>;
868
869 // b*c-a => fmad(b, c, -a)
870 multiclass FPCONTRACT32_SUB_PAT_MAD<NVPTXInst Inst, Predicate Pred> {
871 def : Pat<(fsub (fmul Float32Regs:$b, Float32Regs:$c), Float32Regs:$a),
872 (Inst Float32Regs:$b, Float32Regs:$c, (FNEGf32 Float32Regs:$a))>,
873 Requires<[Pred]>;
874 }
875
876 // a-b*c => fmad(-b,c, a)
877 // - legal because a-b*c <=> a+(-b*c) <=> a+(-b)*c
878 // b*c-a => fmad(b, c, -a)
879 // - legal because b*c-a <=> b*c+(-a)
880 multiclass FPCONTRACT32_SUB_PAT<NVPTXInst Inst, Predicate Pred> {
881 def : Pat<(fsub Float32Regs:$a, (fmul Float32Regs:$b, Float32Regs:$c)),
882 (Inst (FNEGf32 Float32Regs:$b), Float32Regs:$c, Float32Regs:$a)>,
883 Requires<[Pred]>;
884 def : Pat<(fsub (fmul Float32Regs:$b, Float32Regs:$c), Float32Regs:$a),
885 (Inst Float32Regs:$b, Float32Regs:$c, (FNEGf32 Float32Regs:$a))>,
886 Requires<[Pred]>;
887 }
888
889 // a-b*c => fmad(-b,c, a)
890 // b*c-a => fmad(b, c, -a)
891 multiclass FPCONTRACT64_SUB_PAT<NVPTXInst Inst, Predicate Pred> {
892 def : Pat<(fsub Float64Regs:$a, (fmul Float64Regs:$b, Float64Regs:$c)),
893 (Inst (FNEGf64 Float64Regs:$b), Float64Regs:$c, Float64Regs:$a)>,
894 Requires<[Pred]>;
895
896 def : Pat<(fsub (fmul Float64Regs:$b, Float64Regs:$c), Float64Regs:$a),
897 (Inst Float64Regs:$b, Float64Regs:$c, (FNEGf64 Float64Regs:$a))>,
898 Requires<[Pred]>;
899 }
900
901 defm FMAF32ext_ftz : FPCONTRACT32_SUB_PAT<FMA32_ftzrrr, doFMAF32AGG_ftz>;
902 defm FMAF32ext : FPCONTRACT32_SUB_PAT<FMA32rrr, doFMAF32AGG>;
903 defm FMAF64ext : FPCONTRACT64_SUB_PAT<FMA64rrr, doFMAF64AGG>;
904 921
905 def SINF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src), 922 def SINF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
906 "sin.approx.f32 \t$dst, $src;", 923 "sin.approx.f32 \t$dst, $src;",
907 [(set Float32Regs:$dst, (fsin Float32Regs:$src))]>; 924 [(set Float32Regs:$dst, (fsin Float32Regs:$src))]>;
908 def COSF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src), 925 def COSF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
1069 } 1086 }
1070 1087
1071 defm SRA : RSHIFT_FORMAT<"shr.s", sra>; 1088 defm SRA : RSHIFT_FORMAT<"shr.s", sra>;
1072 defm SRL : RSHIFT_FORMAT<"shr.u", srl>; 1089 defm SRL : RSHIFT_FORMAT<"shr.u", srl>;
1073 1090
1091 //
1092 // Rotate: use ptx shf instruction if available.
1093 //
1094
1095 // 32 bit r2 = rotl r1, n
1096 // =>
1097 // r2 = shf.l r1, r1, n
1098 def ROTL32imm_hw : NVPTXInst<(outs Int32Regs:$dst),
1099 (ins Int32Regs:$src, i32imm:$amt),
1100 "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
1101 [(set Int32Regs:$dst, (rotl Int32Regs:$src, (i32 imm:$amt)))]>,
1102 Requires<[hasHWROT32]> ;
1103
1104 def ROTL32reg_hw : NVPTXInst<(outs Int32Regs:$dst),
1105 (ins Int32Regs:$src, Int32Regs:$amt),
1106 "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
1107 [(set Int32Regs:$dst, (rotl Int32Regs:$src, Int32Regs:$amt))]>,
1108 Requires<[hasHWROT32]>;
1109
1110 // 32 bit r2 = rotr r1, n
1111 // =>
1112 // r2 = shf.r r1, r1, n
1113 def ROTR32imm_hw : NVPTXInst<(outs Int32Regs:$dst),
1114 (ins Int32Regs:$src, i32imm:$amt),
1115 "shf.r.wrap.b32 \t$dst, $src, $src, $amt;",
1116 [(set Int32Regs:$dst, (rotr Int32Regs:$src, (i32 imm:$amt)))]>,
1117 Requires<[hasHWROT32]>;
1118
1119 def ROTR32reg_hw : NVPTXInst<(outs Int32Regs:$dst),
1120 (ins Int32Regs:$src, Int32Regs:$amt),
1121 "shf.r.wrap.b32 \t$dst, $src, $src, $amt;",
1122 [(set Int32Regs:$dst, (rotr Int32Regs:$src, Int32Regs:$amt))]>,
1123 Requires<[hasHWROT32]>;
1124
1125 //
1126 // Rotate: if ptx shf instruction is not available, then use shift+add
1127 //
1074 // 32bit 1128 // 32bit
1075 def ROT32imm_sw : NVPTXInst<(outs Int32Regs:$dst), 1129 def ROT32imm_sw : NVPTXInst<(outs Int32Regs:$dst),
1076 (ins Int32Regs:$src, i32imm:$amt1, i32imm:$amt2), 1130 (ins Int32Regs:$src, i32imm:$amt1, i32imm:$amt2),
1077 !strconcat("{{\n\t", 1131 !strconcat("{{\n\t",
1078 !strconcat(".reg .b32 %lhs;\n\t", 1132 !strconcat(".reg .b32 %lhs;\n\t",
1086 def SUB_FRM_32 : SDNodeXForm<imm, [{ 1140 def SUB_FRM_32 : SDNodeXForm<imm, [{
1087 return CurDAG->getTargetConstant(32-N->getZExtValue(), MVT::i32); 1141 return CurDAG->getTargetConstant(32-N->getZExtValue(), MVT::i32);
1088 }]>; 1142 }]>;
1089 1143
1090 def : Pat<(rotl Int32Regs:$src, (i32 imm:$amt)), 1144 def : Pat<(rotl Int32Regs:$src, (i32 imm:$amt)),
1091 (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>; 1145 (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
1146 Requires<[noHWROT32]>;
1092 def : Pat<(rotr Int32Regs:$src, (i32 imm:$amt)), 1147 def : Pat<(rotr Int32Regs:$src, (i32 imm:$amt)),
1093 (ROT32imm_sw Int32Regs:$src, (SUB_FRM_32 node:$amt), imm:$amt)>; 1148 (ROT32imm_sw Int32Regs:$src, (SUB_FRM_32 node:$amt), imm:$amt)>,
1149 Requires<[noHWROT32]>;
1094 1150
1095 def ROTL32reg_sw : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, 1151 def ROTL32reg_sw : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src,
1096 Int32Regs:$amt), 1152 Int32Regs:$amt),
1097 !strconcat("{{\n\t", 1153 !strconcat("{{\n\t",
1098 !strconcat(".reg .b32 %lhs;\n\t", 1154 !strconcat(".reg .b32 %lhs;\n\t",
1101 !strconcat("shl.b32 \t%lhs, $src, $amt;\n\t", 1157 !strconcat("shl.b32 \t%lhs, $src, $amt;\n\t",
1102 !strconcat("sub.s32 \t%amt2, 32, $amt;\n\t", 1158 !strconcat("sub.s32 \t%amt2, 32, $amt;\n\t",
1103 !strconcat("shr.b32 \t%rhs, $src, %amt2;\n\t", 1159 !strconcat("shr.b32 \t%rhs, $src, %amt2;\n\t",
1104 !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t", 1160 !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t",
1105 !strconcat("}}", ""))))))))), 1161 !strconcat("}}", ""))))))))),
1106 [(set Int32Regs:$dst, (rotl Int32Regs:$src, Int32Regs:$amt))]>; 1162 [(set Int32Regs:$dst, (rotl Int32Regs:$src, Int32Regs:$amt))]>,
1163 Requires<[noHWROT32]>;
1107 1164
1108 def ROTR32reg_sw : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, 1165 def ROTR32reg_sw : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src,
1109 Int32Regs:$amt), 1166 Int32Regs:$amt),
1110 !strconcat("{{\n\t", 1167 !strconcat("{{\n\t",
1111 !strconcat(".reg .b32 %lhs;\n\t", 1168 !strconcat(".reg .b32 %lhs;\n\t",
1114 !strconcat("shr.b32 \t%lhs, $src, $amt;\n\t", 1171 !strconcat("shr.b32 \t%lhs, $src, $amt;\n\t",
1115 !strconcat("sub.s32 \t%amt2, 32, $amt;\n\t", 1172 !strconcat("sub.s32 \t%amt2, 32, $amt;\n\t",
1116 !strconcat("shl.b32 \t%rhs, $src, %amt2;\n\t", 1173 !strconcat("shl.b32 \t%rhs, $src, %amt2;\n\t",
1117 !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t", 1174 !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t",
1118 !strconcat("}}", ""))))))))), 1175 !strconcat("}}", ""))))))))),
1119 [(set Int32Regs:$dst, (rotr Int32Regs:$src, Int32Regs:$amt))]>; 1176 [(set Int32Regs:$dst, (rotr Int32Regs:$src, Int32Regs:$amt))]>,
1177 Requires<[noHWROT32]>;
1120 1178
1121 // 64bit 1179 // 64bit
1122 def ROT64imm_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src, 1180 def ROT64imm_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src,
1123 i32imm:$amt1, i32imm:$amt2), 1181 i32imm:$amt1, i32imm:$amt2),
1124 !strconcat("{{\n\t", 1182 !strconcat("{{\n\t",
1163 !strconcat("shl.b64 \t%rhs, $src, %amt2;\n\t", 1221 !strconcat("shl.b64 \t%rhs, $src, %amt2;\n\t",
1164 !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t", 1222 !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t",
1165 !strconcat("}}", ""))))))))), 1223 !strconcat("}}", ""))))))))),
1166 [(set Int64Regs:$dst, (rotr Int64Regs:$src, Int32Regs:$amt))]>; 1224 [(set Int64Regs:$dst, (rotr Int64Regs:$src, Int32Regs:$amt))]>;
1167 1225
1226 // BFE - bit-field extract
1227
1228 multiclass BFE<string TyStr, RegisterClass RC> {
1229 // BFE supports both 32-bit and 64-bit values, but the start and length
1230 // operands are always 32-bit
1231 def rrr
1232 : NVPTXInst<(outs RC:$d),
1233 (ins RC:$a, Int32Regs:$b, Int32Regs:$c),
1234 !strconcat("bfe.", TyStr, " \t$d, $a, $b, $c;"), []>;
1235 def rri
1236 : NVPTXInst<(outs RC:$d),
1237 (ins RC:$a, Int32Regs:$b, i32imm:$c),
1238 !strconcat("bfe.", TyStr, " \t$d, $a, $b, $c;"), []>;
1239 def rii
1240 : NVPTXInst<(outs RC:$d),
1241 (ins RC:$a, i32imm:$b, i32imm:$c),
1242 !strconcat("bfe.", TyStr, " \t$d, $a, $b, $c;"), []>;
1243 }
1244
1245 defm BFE_S32 : BFE<"s32", Int32Regs>;
1246 defm BFE_U32 : BFE<"u32", Int32Regs>;
1247 defm BFE_S64 : BFE<"s64", Int64Regs>;
1248 defm BFE_U64 : BFE<"u64", Int64Regs>;
1168 1249
1169 //----------------------------------- 1250 //-----------------------------------
1170 // General Comparison 1251 // General Comparison
1171 //----------------------------------- 1252 //-----------------------------------
1172 1253
1277 1358
1278 // Special select for predicate operands 1359 // Special select for predicate operands
1279 def : Pat<(i1 (select Int1Regs:$p, Int1Regs:$a, Int1Regs:$b)), 1360 def : Pat<(i1 (select Int1Regs:$p, Int1Regs:$a, Int1Regs:$b)),
1280 (ORb1rr (ANDb1rr Int1Regs:$p, Int1Regs:$a), 1361 (ORb1rr (ANDb1rr Int1Regs:$p, Int1Regs:$a),
1281 (ANDb1rr (NOT1 Int1Regs:$p), Int1Regs:$b))>; 1362 (ANDb1rr (NOT1 Int1Regs:$p), Int1Regs:$b))>;
1363
1364 //
1365 // Funnnel shift in clamp mode
1366 //
1367 // - SDNodes are created so they can be used in the DAG code,
1368 // e.g. NVPTXISelLowering (LowerShiftLeftParts and LowerShiftRightParts)
1369 //
1370 def SDTIntShiftDOp: SDTypeProfile<1, 3,
1371 [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
1372 SDTCisInt<0>, SDTCisInt<3>]>;
1373 def FUN_SHFL_CLAMP : SDNode<"NVPTXISD::FUN_SHFL_CLAMP", SDTIntShiftDOp, []>;
1374 def FUN_SHFR_CLAMP : SDNode<"NVPTXISD::FUN_SHFR_CLAMP", SDTIntShiftDOp, []>;
1375
1376 def FUNSHFLCLAMP : NVPTXInst<(outs Int32Regs:$dst),
1377 (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
1378 "shf.l.clamp.b32 \t$dst, $lo, $hi, $amt;",
1379 [(set Int32Regs:$dst,
1380 (FUN_SHFL_CLAMP Int32Regs:$lo,
1381 Int32Regs:$hi, Int32Regs:$amt))]>;
1382
1383 def FUNSHFRCLAMP : NVPTXInst<(outs Int32Regs:$dst),
1384 (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
1385 "shf.r.clamp.b32 \t$dst, $lo, $hi, $amt;",
1386 [(set Int32Regs:$dst,
1387 (FUN_SHFR_CLAMP Int32Regs:$lo,
1388 Int32Regs:$hi, Int32Regs:$amt))]>;
1282 1389
1283 //----------------------------------- 1390 //-----------------------------------
1284 // Data Movement (Load / Store, Move) 1391 // Data Movement (Load / Store, Move)
1285 //----------------------------------- 1392 //-----------------------------------
1286 1393
1805 // FIXME: StoreParamV4Inst crashes llvm-tblgen :( 1912 // FIXME: StoreParamV4Inst crashes llvm-tblgen :(
1806 //def StoreParamV4I32 : StoreParamV4Inst<Int32Regs, ".b32">; 1913 //def StoreParamV4I32 : StoreParamV4Inst<Int32Regs, ".b32">;
1807 def StoreParamV4I32 : NVPTXInst<(outs), (ins Int32Regs:$val, Int32Regs:$val2, 1914 def StoreParamV4I32 : NVPTXInst<(outs), (ins Int32Regs:$val, Int32Regs:$val2,
1808 Int32Regs:$val3, Int32Regs:$val4, 1915 Int32Regs:$val3, Int32Regs:$val4,
1809 i32imm:$a, i32imm:$b), 1916 i32imm:$a, i32imm:$b),
1810 "st.param.b32\t[param$a+$b], {{$val, $val2, $val3, $val4}};", 1917 "st.param.v4.b32\t[param$a+$b], {{$val, $val2, $val3, $val4}};",
1811 []>; 1918 []>;
1812 1919
1813 def StoreParamV4I16 : NVPTXInst<(outs), (ins Int16Regs:$val, Int16Regs:$val2, 1920 def StoreParamV4I16 : NVPTXInst<(outs), (ins Int16Regs:$val, Int16Regs:$val2,
1814 Int16Regs:$val3, Int16Regs:$val4, 1921 Int16Regs:$val3, Int16Regs:$val4,
1815 i32imm:$a, i32imm:$b), 1922 i32imm:$a, i32imm:$b),