Mercurial > hg > Members > tobaru > cbc > CbC_llvm
comparison lib/Target/X86/X86FastISel.cpp @ 121:803732b1fca8
LLVM 5.0
author | kono |
---|---|
date | Fri, 27 Oct 2017 17:07:41 +0900 (2017-10-27) |
parents | 1172e4bd9c6f |
children |
comparison
equal
deleted
inserted
replaced
120:1172e4bd9c6f | 121:803732b1fca8 |
---|---|
108 | 108 |
109 bool X86SelectCmp(const Instruction *I); | 109 bool X86SelectCmp(const Instruction *I); |
110 | 110 |
111 bool X86SelectZExt(const Instruction *I); | 111 bool X86SelectZExt(const Instruction *I); |
112 | 112 |
113 bool X86SelectSExt(const Instruction *I); | |
114 | |
113 bool X86SelectBranch(const Instruction *I); | 115 bool X86SelectBranch(const Instruction *I); |
114 | 116 |
115 bool X86SelectShift(const Instruction *I); | 117 bool X86SelectShift(const Instruction *I); |
116 | 118 |
117 bool X86SelectDivRem(const Instruction *I); | 119 bool X86SelectDivRem(const Instruction *I); |
168 bool foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I, | 170 bool foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I, |
169 const Value *Cond); | 171 const Value *Cond); |
170 | 172 |
171 const MachineInstrBuilder &addFullAddress(const MachineInstrBuilder &MIB, | 173 const MachineInstrBuilder &addFullAddress(const MachineInstrBuilder &MIB, |
172 X86AddressMode &AM); | 174 X86AddressMode &AM); |
175 | |
176 unsigned fastEmitInst_rrrr(unsigned MachineInstOpcode, | |
177 const TargetRegisterClass *RC, unsigned Op0, | |
178 bool Op0IsKill, unsigned Op1, bool Op1IsKill, | |
179 unsigned Op2, bool Op2IsKill, unsigned Op3, | |
180 bool Op3IsKill); | |
173 }; | 181 }; |
174 | 182 |
175 } // end anonymous namespace. | 183 } // end anonymous namespace. |
176 | |
177 static std::pair<X86::CondCode, bool> | |
178 getX86ConditionCode(CmpInst::Predicate Predicate) { | |
179 X86::CondCode CC = X86::COND_INVALID; | |
180 bool NeedSwap = false; | |
181 switch (Predicate) { | |
182 default: break; | |
183 // Floating-point Predicates | |
184 case CmpInst::FCMP_UEQ: CC = X86::COND_E; break; | |
185 case CmpInst::FCMP_OLT: NeedSwap = true; LLVM_FALLTHROUGH; | |
186 case CmpInst::FCMP_OGT: CC = X86::COND_A; break; | |
187 case CmpInst::FCMP_OLE: NeedSwap = true; LLVM_FALLTHROUGH; | |
188 case CmpInst::FCMP_OGE: CC = X86::COND_AE; break; | |
189 case CmpInst::FCMP_UGT: NeedSwap = true; LLVM_FALLTHROUGH; | |
190 case CmpInst::FCMP_ULT: CC = X86::COND_B; break; | |
191 case CmpInst::FCMP_UGE: NeedSwap = true; LLVM_FALLTHROUGH; | |
192 case CmpInst::FCMP_ULE: CC = X86::COND_BE; break; | |
193 case CmpInst::FCMP_ONE: CC = X86::COND_NE; break; | |
194 case CmpInst::FCMP_UNO: CC = X86::COND_P; break; | |
195 case CmpInst::FCMP_ORD: CC = X86::COND_NP; break; | |
196 case CmpInst::FCMP_OEQ: LLVM_FALLTHROUGH; | |
197 case CmpInst::FCMP_UNE: CC = X86::COND_INVALID; break; | |
198 | |
199 // Integer Predicates | |
200 case CmpInst::ICMP_EQ: CC = X86::COND_E; break; | |
201 case CmpInst::ICMP_NE: CC = X86::COND_NE; break; | |
202 case CmpInst::ICMP_UGT: CC = X86::COND_A; break; | |
203 case CmpInst::ICMP_UGE: CC = X86::COND_AE; break; | |
204 case CmpInst::ICMP_ULT: CC = X86::COND_B; break; | |
205 case CmpInst::ICMP_ULE: CC = X86::COND_BE; break; | |
206 case CmpInst::ICMP_SGT: CC = X86::COND_G; break; | |
207 case CmpInst::ICMP_SGE: CC = X86::COND_GE; break; | |
208 case CmpInst::ICMP_SLT: CC = X86::COND_L; break; | |
209 case CmpInst::ICMP_SLE: CC = X86::COND_LE; break; | |
210 } | |
211 | |
212 return std::make_pair(CC, NeedSwap); | |
213 } | |
214 | 184 |
215 static std::pair<unsigned, bool> | 185 static std::pair<unsigned, bool> |
216 getX86SSEConditionCode(CmpInst::Predicate Predicate) { | 186 getX86SSEConditionCode(CmpInst::Predicate Predicate) { |
217 unsigned CC; | 187 unsigned CC; |
218 bool NeedSwap = false; | 188 bool NeedSwap = false; |
238 case CmpInst::FCMP_ULE: NeedSwap = true; LLVM_FALLTHROUGH; | 208 case CmpInst::FCMP_ULE: NeedSwap = true; LLVM_FALLTHROUGH; |
239 case CmpInst::FCMP_UGE: CC = 5; break; | 209 case CmpInst::FCMP_UGE: CC = 5; break; |
240 case CmpInst::FCMP_ULT: NeedSwap = true; LLVM_FALLTHROUGH; | 210 case CmpInst::FCMP_ULT: NeedSwap = true; LLVM_FALLTHROUGH; |
241 case CmpInst::FCMP_UGT: CC = 6; break; | 211 case CmpInst::FCMP_UGT: CC = 6; break; |
242 case CmpInst::FCMP_ORD: CC = 7; break; | 212 case CmpInst::FCMP_ORD: CC = 7; break; |
243 case CmpInst::FCMP_UEQ: | 213 case CmpInst::FCMP_UEQ: CC = 8; break; |
244 case CmpInst::FCMP_ONE: CC = 8; break; | 214 case CmpInst::FCMP_ONE: CC = 12; break; |
245 } | 215 } |
246 | 216 |
247 return std::make_pair(CC, NeedSwap); | 217 return std::make_pair(CC, NeedSwap); |
248 } | 218 } |
249 | 219 |
440 break; | 410 break; |
441 case MVT::v8f32: | 411 case MVT::v8f32: |
442 assert(HasAVX); | 412 assert(HasAVX); |
443 if (IsNonTemporal && Alignment >= 32 && HasAVX2) | 413 if (IsNonTemporal && Alignment >= 32 && HasAVX2) |
444 Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm; | 414 Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm; |
415 else if (IsNonTemporal && Alignment >= 16) | |
416 return false; // Force split for X86::VMOVNTDQArm | |
445 else if (Alignment >= 32) | 417 else if (Alignment >= 32) |
446 Opc = HasVLX ? X86::VMOVAPSZ256rm : X86::VMOVAPSYrm; | 418 Opc = HasVLX ? X86::VMOVAPSZ256rm : X86::VMOVAPSYrm; |
447 else | 419 else |
448 Opc = HasVLX ? X86::VMOVUPSZ256rm : X86::VMOVUPSYrm; | 420 Opc = HasVLX ? X86::VMOVUPSZ256rm : X86::VMOVUPSYrm; |
449 RC = &X86::VR256RegClass; | 421 RC = &X86::VR256RegClass; |
450 break; | 422 break; |
451 case MVT::v4f64: | 423 case MVT::v4f64: |
452 assert(HasAVX); | 424 assert(HasAVX); |
453 if (IsNonTemporal && Alignment >= 32 && HasAVX2) | 425 if (IsNonTemporal && Alignment >= 32 && HasAVX2) |
454 Opc = X86::VMOVNTDQAYrm; | 426 Opc = X86::VMOVNTDQAYrm; |
427 else if (IsNonTemporal && Alignment >= 16) | |
428 return false; // Force split for X86::VMOVNTDQArm | |
455 else if (Alignment >= 32) | 429 else if (Alignment >= 32) |
456 Opc = HasVLX ? X86::VMOVAPDZ256rm : X86::VMOVAPDYrm; | 430 Opc = HasVLX ? X86::VMOVAPDZ256rm : X86::VMOVAPDYrm; |
457 else | 431 else |
458 Opc = HasVLX ? X86::VMOVUPDZ256rm : X86::VMOVUPDYrm; | 432 Opc = HasVLX ? X86::VMOVUPDZ256rm : X86::VMOVUPDYrm; |
459 RC = &X86::VR256RegClass; | 433 RC = &X86::VR256RegClass; |
463 case MVT::v16i16: | 437 case MVT::v16i16: |
464 case MVT::v32i8: | 438 case MVT::v32i8: |
465 assert(HasAVX); | 439 assert(HasAVX); |
466 if (IsNonTemporal && Alignment >= 32 && HasAVX2) | 440 if (IsNonTemporal && Alignment >= 32 && HasAVX2) |
467 Opc = X86::VMOVNTDQAYrm; | 441 Opc = X86::VMOVNTDQAYrm; |
442 else if (IsNonTemporal && Alignment >= 16) | |
443 return false; // Force split for X86::VMOVNTDQArm | |
468 else if (Alignment >= 32) | 444 else if (Alignment >= 32) |
469 Opc = HasVLX ? X86::VMOVDQA64Z256rm : X86::VMOVDQAYrm; | 445 Opc = HasVLX ? X86::VMOVDQA64Z256rm : X86::VMOVDQAYrm; |
470 else | 446 else |
471 Opc = HasVLX ? X86::VMOVDQU64Z256rm : X86::VMOVDQUYrm; | 447 Opc = HasVLX ? X86::VMOVDQU64Z256rm : X86::VMOVDQUYrm; |
472 RC = &X86::VR256RegClass; | 448 RC = &X86::VR256RegClass; |
516 /// and a displacement offset, or a GlobalAddress, | 492 /// and a displacement offset, or a GlobalAddress, |
517 /// i.e. V. Return true if it is possible. | 493 /// i.e. V. Return true if it is possible. |
518 bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill, | 494 bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill, |
519 X86AddressMode &AM, | 495 X86AddressMode &AM, |
520 MachineMemOperand *MMO, bool Aligned) { | 496 MachineMemOperand *MMO, bool Aligned) { |
497 bool HasSSE1 = Subtarget->hasSSE1(); | |
521 bool HasSSE2 = Subtarget->hasSSE2(); | 498 bool HasSSE2 = Subtarget->hasSSE2(); |
522 bool HasSSE4A = Subtarget->hasSSE4A(); | 499 bool HasSSE4A = Subtarget->hasSSE4A(); |
523 bool HasAVX = Subtarget->hasAVX(); | 500 bool HasAVX = Subtarget->hasAVX(); |
524 bool HasAVX512 = Subtarget->hasAVX512(); | 501 bool HasAVX512 = Subtarget->hasAVX512(); |
525 bool HasVLX = Subtarget->hasVLX(); | 502 bool HasVLX = Subtarget->hasVLX(); |
565 else | 542 else |
566 Opc = HasAVX512 ? X86::VMOVSDZmr : | 543 Opc = HasAVX512 ? X86::VMOVSDZmr : |
567 HasAVX ? X86::VMOVSDmr : X86::MOVSDmr; | 544 HasAVX ? X86::VMOVSDmr : X86::MOVSDmr; |
568 } else | 545 } else |
569 Opc = X86::ST_Fp64m; | 546 Opc = X86::ST_Fp64m; |
547 break; | |
548 case MVT::x86mmx: | |
549 Opc = (IsNonTemporal && HasSSE1) ? X86::MMX_MOVNTQmr : X86::MMX_MOVQ64mr; | |
570 break; | 550 break; |
571 case MVT::v4f32: | 551 case MVT::v4f32: |
572 if (Aligned) { | 552 if (Aligned) { |
573 if (IsNonTemporal) | 553 if (IsNonTemporal) |
574 Opc = HasVLX ? X86::VMOVNTPSZ128mr : | 554 Opc = HasVLX ? X86::VMOVNTPSZ128mr : |
934 // Iterate through the indices, folding what we can. Constants can be | 914 // Iterate through the indices, folding what we can. Constants can be |
935 // folded, and one dynamic index can be handled, if the scale is supported. | 915 // folded, and one dynamic index can be handled, if the scale is supported. |
936 for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end(); | 916 for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end(); |
937 i != e; ++i, ++GTI) { | 917 i != e; ++i, ++GTI) { |
938 const Value *Op = *i; | 918 const Value *Op = *i; |
939 if (StructType *STy = dyn_cast<StructType>(*GTI)) { | 919 if (StructType *STy = GTI.getStructTypeOrNull()) { |
940 const StructLayout *SL = DL.getStructLayout(STy); | 920 const StructLayout *SL = DL.getStructLayout(STy); |
941 Disp += SL->getElementOffset(cast<ConstantInt>(Op)->getZExtValue()); | 921 Disp += SL->getElementOffset(cast<ConstantInt>(Op)->getZExtValue()); |
942 continue; | 922 continue; |
943 } | 923 } |
944 | 924 |
1083 // RIP-relative addresses can't have additional register operands. | 1063 // RIP-relative addresses can't have additional register operands. |
1084 if (Subtarget->isPICStyleRIPRel() && | 1064 if (Subtarget->isPICStyleRIPRel() && |
1085 (AM.Base.Reg != 0 || AM.IndexReg != 0)) | 1065 (AM.Base.Reg != 0 || AM.IndexReg != 0)) |
1086 return false; | 1066 return false; |
1087 | 1067 |
1088 // Can't handle DLL Import. | |
1089 if (GV->hasDLLImportStorageClass()) | |
1090 return false; | |
1091 | |
1092 // Can't handle TLS. | 1068 // Can't handle TLS. |
1093 if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) | 1069 if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) |
1094 if (GVar->isThreadLocal()) | 1070 if (GVar->isThreadLocal()) |
1095 return false; | 1071 return false; |
1096 | 1072 |
1097 // Okay, we've committed to selecting this global. Set up the basic address. | 1073 // Okay, we've committed to selecting this global. Set up the basic address. |
1098 AM.GV = GV; | 1074 AM.GV = GV; |
1099 | 1075 |
1100 // No ABI requires an extra load for anything other than DLLImport, which | 1076 // Return a direct reference to the global. Fastisel can handle calls to |
1101 // we rejected above. Return a direct reference to the global. | 1077 // functions that require loads, such as dllimport and nonlazybind |
1078 // functions. | |
1102 if (Subtarget->isPICStyleRIPRel()) { | 1079 if (Subtarget->isPICStyleRIPRel()) { |
1103 // Use rip-relative addressing if we can. Above we verified that the | 1080 // Use rip-relative addressing if we can. Above we verified that the |
1104 // base and index registers are unused. | 1081 // base and index registers are unused. |
1105 assert(AM.Base.Reg == 0 && AM.IndexReg == 0); | 1082 assert(AM.Base.Reg == 0 && AM.IndexReg == 0); |
1106 AM.Base.Reg = X86::RIP; | 1083 AM.Base.Reg = X86::RIP; |
1193 CC != CallingConv::Fast && | 1170 CC != CallingConv::Fast && |
1194 CC != CallingConv::X86_FastCall && | 1171 CC != CallingConv::X86_FastCall && |
1195 CC != CallingConv::X86_StdCall && | 1172 CC != CallingConv::X86_StdCall && |
1196 CC != CallingConv::X86_ThisCall && | 1173 CC != CallingConv::X86_ThisCall && |
1197 CC != CallingConv::X86_64_SysV && | 1174 CC != CallingConv::X86_64_SysV && |
1198 CC != CallingConv::X86_64_Win64) | 1175 CC != CallingConv::Win64) |
1199 return false; | 1176 return false; |
1200 | 1177 |
1201 // Don't handle popping bytes if they don't fit the ret's immediate. | 1178 // Don't handle popping bytes if they don't fit the ret's immediate. |
1202 if (!isUInt<16>(X86MFInfo->getBytesToPopOnReturn())) | 1179 if (!isUInt<16>(X86MFInfo->getBytesToPopOnReturn())) |
1203 return false; | 1180 return false; |
1523 return true; | 1500 return true; |
1524 } | 1501 } |
1525 | 1502 |
1526 X86::CondCode CC; | 1503 X86::CondCode CC; |
1527 bool SwapArgs; | 1504 bool SwapArgs; |
1528 std::tie(CC, SwapArgs) = getX86ConditionCode(Predicate); | 1505 std::tie(CC, SwapArgs) = X86::getX86ConditionCode(Predicate); |
1529 assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code."); | 1506 assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code."); |
1530 unsigned Opc = X86::getSETFromCond(CC); | 1507 unsigned Opc = X86::getSETFromCond(CC); |
1531 | 1508 |
1532 if (SwapArgs) | 1509 if (SwapArgs) |
1533 std::swap(LHS, RHS); | 1510 std::swap(LHS, RHS); |
1550 if (ResultReg == 0) | 1527 if (ResultReg == 0) |
1551 return false; | 1528 return false; |
1552 | 1529 |
1553 // Handle zero-extension from i1 to i8, which is common. | 1530 // Handle zero-extension from i1 to i8, which is common. |
1554 MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType()); | 1531 MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType()); |
1555 if (SrcVT.SimpleTy == MVT::i1) { | 1532 if (SrcVT == MVT::i1) { |
1556 // Set the high bits to zero. | 1533 // Set the high bits to zero. |
1557 ResultReg = fastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false); | 1534 ResultReg = fastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false); |
1558 SrcVT = MVT::i8; | 1535 SrcVT = MVT::i8; |
1559 | 1536 |
1560 if (ResultReg == 0) | 1537 if (ResultReg == 0) |
1578 | 1555 |
1579 ResultReg = createResultReg(&X86::GR64RegClass); | 1556 ResultReg = createResultReg(&X86::GR64RegClass); |
1580 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::SUBREG_TO_REG), | 1557 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::SUBREG_TO_REG), |
1581 ResultReg) | 1558 ResultReg) |
1582 .addImm(0).addReg(Result32).addImm(X86::sub_32bit); | 1559 .addImm(0).addReg(Result32).addImm(X86::sub_32bit); |
1560 } else if (DstVT == MVT::i16) { | |
1561 // i8->i16 doesn't exist in the autogenerated isel table. Need to zero | |
1562 // extend to 32-bits and then extract down to 16-bits. | |
1563 unsigned Result32 = createResultReg(&X86::GR32RegClass); | |
1564 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOVZX32rr8), | |
1565 Result32).addReg(ResultReg); | |
1566 | |
1567 ResultReg = fastEmitInst_extractsubreg(MVT::i16, Result32, /*Kill=*/true, | |
1568 X86::sub_16bit); | |
1583 } else if (DstVT != MVT::i8) { | 1569 } else if (DstVT != MVT::i8) { |
1584 ResultReg = fastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::ZERO_EXTEND, | 1570 ResultReg = fastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::ZERO_EXTEND, |
1571 ResultReg, /*Kill=*/true); | |
1572 if (ResultReg == 0) | |
1573 return false; | |
1574 } | |
1575 | |
1576 updateValueMap(I, ResultReg); | |
1577 return true; | |
1578 } | |
1579 | |
1580 bool X86FastISel::X86SelectSExt(const Instruction *I) { | |
1581 EVT DstVT = TLI.getValueType(DL, I->getType()); | |
1582 if (!TLI.isTypeLegal(DstVT)) | |
1583 return false; | |
1584 | |
1585 unsigned ResultReg = getRegForValue(I->getOperand(0)); | |
1586 if (ResultReg == 0) | |
1587 return false; | |
1588 | |
1589 // Handle sign-extension from i1 to i8. | |
1590 MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType()); | |
1591 if (SrcVT == MVT::i1) { | |
1592 // Set the high bits to zero. | |
1593 unsigned ZExtReg = fastEmitZExtFromI1(MVT::i8, ResultReg, | |
1594 /*TODO: Kill=*/false); | |
1595 if (ZExtReg == 0) | |
1596 return false; | |
1597 | |
1598 // Negate the result to make an 8-bit sign extended value. | |
1599 ResultReg = createResultReg(&X86::GR8RegClass); | |
1600 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::NEG8r), | |
1601 ResultReg).addReg(ZExtReg); | |
1602 | |
1603 SrcVT = MVT::i8; | |
1604 } | |
1605 | |
1606 if (DstVT == MVT::i16) { | |
1607 // i8->i16 doesn't exist in the autogenerated isel table. Need to sign | |
1608 // extend to 32-bits and then extract down to 16-bits. | |
1609 unsigned Result32 = createResultReg(&X86::GR32RegClass); | |
1610 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOVSX32rr8), | |
1611 Result32).addReg(ResultReg); | |
1612 | |
1613 ResultReg = fastEmitInst_extractsubreg(MVT::i16, Result32, /*Kill=*/true, | |
1614 X86::sub_16bit); | |
1615 } else if (DstVT != MVT::i8) { | |
1616 ResultReg = fastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::SIGN_EXTEND, | |
1585 ResultReg, /*Kill=*/true); | 1617 ResultReg, /*Kill=*/true); |
1586 if (ResultReg == 0) | 1618 if (ResultReg == 0) |
1587 return false; | 1619 return false; |
1588 } | 1620 } |
1589 | 1621 |
1650 break; | 1682 break; |
1651 } | 1683 } |
1652 | 1684 |
1653 bool SwapArgs; | 1685 bool SwapArgs; |
1654 unsigned BranchOpc; | 1686 unsigned BranchOpc; |
1655 std::tie(CC, SwapArgs) = getX86ConditionCode(Predicate); | 1687 std::tie(CC, SwapArgs) = X86::getX86ConditionCode(Predicate); |
1656 assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code."); | 1688 assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code."); |
1657 | 1689 |
1658 BranchOpc = X86::GetCondBranchFromCond(CC); | 1690 BranchOpc = X86::GetCondBranchFromCond(CC); |
1659 if (SwapArgs) | 1691 if (SwapArgs) |
1660 std::swap(CmpLHS, CmpRHS); | 1692 std::swap(CmpLHS, CmpRHS); |
1732 if (OpReg == 0) return false; | 1764 if (OpReg == 0) return false; |
1733 | 1765 |
1734 // In case OpReg is a K register, COPY to a GPR | 1766 // In case OpReg is a K register, COPY to a GPR |
1735 if (MRI.getRegClass(OpReg) == &X86::VK1RegClass) { | 1767 if (MRI.getRegClass(OpReg) == &X86::VK1RegClass) { |
1736 unsigned KOpReg = OpReg; | 1768 unsigned KOpReg = OpReg; |
1737 OpReg = createResultReg(&X86::GR8RegClass); | 1769 OpReg = createResultReg(&X86::GR32RegClass); |
1738 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, | 1770 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
1739 TII.get(TargetOpcode::COPY), OpReg) | 1771 TII.get(TargetOpcode::COPY), OpReg) |
1740 .addReg(KOpReg); | 1772 .addReg(KOpReg); |
1773 OpReg = fastEmitInst_extractsubreg(MVT::i8, OpReg, /*Kill=*/true, | |
1774 X86::sub_8bit); | |
1741 } | 1775 } |
1742 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri)) | 1776 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri)) |
1743 .addReg(OpReg) | 1777 .addReg(OpReg) |
1744 .addImm(1); | 1778 .addImm(1); |
1745 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JNE_1)) | 1779 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JNE_1)) |
1925 TII.get(X86::MOV32r0), Zero32); | 1959 TII.get(X86::MOV32r0), Zero32); |
1926 | 1960 |
1927 // Copy the zero into the appropriate sub/super/identical physical | 1961 // Copy the zero into the appropriate sub/super/identical physical |
1928 // register. Unfortunately the operations needed are not uniform enough | 1962 // register. Unfortunately the operations needed are not uniform enough |
1929 // to fit neatly into the table above. | 1963 // to fit neatly into the table above. |
1930 if (VT.SimpleTy == MVT::i16) { | 1964 if (VT == MVT::i16) { |
1931 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, | 1965 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
1932 TII.get(Copy), TypeEntry.HighInReg) | 1966 TII.get(Copy), TypeEntry.HighInReg) |
1933 .addReg(Zero32, 0, X86::sub_16bit); | 1967 .addReg(Zero32, 0, X86::sub_16bit); |
1934 } else if (VT.SimpleTy == MVT::i32) { | 1968 } else if (VT == MVT::i32) { |
1935 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, | 1969 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
1936 TII.get(Copy), TypeEntry.HighInReg) | 1970 TII.get(Copy), TypeEntry.HighInReg) |
1937 .addReg(Zero32); | 1971 .addReg(Zero32); |
1938 } else if (VT.SimpleTy == MVT::i64) { | 1972 } else if (VT == MVT::i64) { |
1939 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, | 1973 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
1940 TII.get(TargetOpcode::SUBREG_TO_REG), TypeEntry.HighInReg) | 1974 TII.get(TargetOpcode::SUBREG_TO_REG), TypeEntry.HighInReg) |
1941 .addImm(0).addReg(Zero32).addImm(X86::sub_32bit); | 1975 .addImm(0).addReg(Zero32).addImm(X86::sub_32bit); |
1942 } | 1976 } |
1943 } | 1977 } |
1948 // For i8 remainder, we can't reference AH directly, as we'll end | 1982 // For i8 remainder, we can't reference AH directly, as we'll end |
1949 // up with bogus copies like %R9B = COPY %AH. Reference AX | 1983 // up with bogus copies like %R9B = COPY %AH. Reference AX |
1950 // instead to prevent AH references in a REX instruction. | 1984 // instead to prevent AH references in a REX instruction. |
1951 // | 1985 // |
1952 // The current assumption of the fast register allocator is that isel | 1986 // The current assumption of the fast register allocator is that isel |
1953 // won't generate explicit references to the GPR8_NOREX registers. If | 1987 // won't generate explicit references to the GR8_NOREX registers. If |
1954 // the allocator and/or the backend get enhanced to be more robust in | 1988 // the allocator and/or the backend get enhanced to be more robust in |
1955 // that regard, this can be, and should be, removed. | 1989 // that regard, this can be, and should be, removed. |
1956 unsigned ResultReg = 0; | 1990 unsigned ResultReg = 0; |
1957 if ((I->getOpcode() == Instruction::SRem || | 1991 if ((I->getOpcode() == Instruction::SRem || |
1958 I->getOpcode() == Instruction::URem) && | 1992 I->getOpcode() == Instruction::URem) && |
2021 Predicate = CmpInst::ICMP_NE; | 2055 Predicate = CmpInst::ICMP_NE; |
2022 break; | 2056 break; |
2023 } | 2057 } |
2024 | 2058 |
2025 bool NeedSwap; | 2059 bool NeedSwap; |
2026 std::tie(CC, NeedSwap) = getX86ConditionCode(Predicate); | 2060 std::tie(CC, NeedSwap) = X86::getX86ConditionCode(Predicate); |
2027 assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code."); | 2061 assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code."); |
2028 | 2062 |
2029 const Value *CmpLHS = CI->getOperand(0); | 2063 const Value *CmpLHS = CI->getOperand(0); |
2030 const Value *CmpRHS = CI->getOperand(1); | 2064 const Value *CmpRHS = CI->getOperand(1); |
2031 if (NeedSwap) | 2065 if (NeedSwap) |
2076 bool CondIsKill = hasTrivialKill(Cond); | 2110 bool CondIsKill = hasTrivialKill(Cond); |
2077 | 2111 |
2078 // In case OpReg is a K register, COPY to a GPR | 2112 // In case OpReg is a K register, COPY to a GPR |
2079 if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) { | 2113 if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) { |
2080 unsigned KCondReg = CondReg; | 2114 unsigned KCondReg = CondReg; |
2081 CondReg = createResultReg(&X86::GR8RegClass); | 2115 CondReg = createResultReg(&X86::GR32RegClass); |
2082 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, | 2116 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
2083 TII.get(TargetOpcode::COPY), CondReg) | 2117 TII.get(TargetOpcode::COPY), CondReg) |
2084 .addReg(KCondReg, getKillRegState(CondIsKill)); | 2118 .addReg(KCondReg, getKillRegState(CondIsKill)); |
2119 CondReg = fastEmitInst_extractsubreg(MVT::i8, CondReg, /*Kill=*/true, | |
2120 X86::sub_8bit); | |
2085 } | 2121 } |
2086 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri)) | 2122 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri)) |
2087 .addReg(CondReg, getKillRegState(CondIsKill)) | 2123 .addReg(CondReg, getKillRegState(CondIsKill)) |
2088 .addImm(1); | 2124 .addImm(1); |
2089 } | 2125 } |
2098 bool LHSIsKill = hasTrivialKill(LHS); | 2134 bool LHSIsKill = hasTrivialKill(LHS); |
2099 | 2135 |
2100 if (!LHSReg || !RHSReg) | 2136 if (!LHSReg || !RHSReg) |
2101 return false; | 2137 return false; |
2102 | 2138 |
2103 unsigned Opc = X86::getCMovFromCond(CC, RC->getSize()); | 2139 const TargetRegisterInfo &TRI = *Subtarget->getRegisterInfo(); |
2140 unsigned Opc = X86::getCMovFromCond(CC, TRI.getRegSizeInBits(*RC)/8); | |
2104 unsigned ResultReg = fastEmitInst_rr(Opc, RC, RHSReg, RHSIsKill, | 2141 unsigned ResultReg = fastEmitInst_rr(Opc, RC, RHSReg, RHSIsKill, |
2105 LHSReg, LHSIsKill); | 2142 LHSReg, LHSIsKill); |
2106 updateValueMap(I, ResultReg); | 2143 updateValueMap(I, ResultReg); |
2107 return true; | 2144 return true; |
2108 } | 2145 } |
2139 } | 2176 } |
2140 | 2177 |
2141 unsigned CC; | 2178 unsigned CC; |
2142 bool NeedSwap; | 2179 bool NeedSwap; |
2143 std::tie(CC, NeedSwap) = getX86SSEConditionCode(Predicate); | 2180 std::tie(CC, NeedSwap) = getX86SSEConditionCode(Predicate); |
2144 if (CC > 7) | 2181 if (CC > 7 && !Subtarget->hasAVX()) |
2145 return false; | 2182 return false; |
2146 | 2183 |
2147 if (NeedSwap) | 2184 if (NeedSwap) |
2148 std::swap(CmpLHS, CmpRHS); | 2185 std::swap(CmpLHS, CmpRHS); |
2149 | 2186 |
2150 // Choose the SSE instruction sequence based on data type (float or double). | 2187 // Choose the SSE instruction sequence based on data type (float or double). |
2151 static const uint16_t OpcTable[2][4] = { | 2188 static const uint16_t OpcTable[2][4] = { |
2152 { X86::CMPSSrr, X86::FsANDPSrr, X86::FsANDNPSrr, X86::FsORPSrr }, | 2189 { X86::CMPSSrr, X86::ANDPSrr, X86::ANDNPSrr, X86::ORPSrr }, |
2153 { X86::CMPSDrr, X86::FsANDPDrr, X86::FsANDNPDrr, X86::FsORPDrr } | 2190 { X86::CMPSDrr, X86::ANDPDrr, X86::ANDNPDrr, X86::ORPDrr } |
2154 }; | 2191 }; |
2155 | 2192 |
2156 const uint16_t *Opc = nullptr; | 2193 const uint16_t *Opc = nullptr; |
2157 switch (RetVT.SimpleTy) { | 2194 switch (RetVT.SimpleTy) { |
2158 default: return false; | 2195 default: return false; |
2178 if (!LHSReg || !RHSReg || !CmpLHS || !CmpRHS) | 2215 if (!LHSReg || !RHSReg || !CmpLHS || !CmpRHS) |
2179 return false; | 2216 return false; |
2180 | 2217 |
2181 const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT); | 2218 const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT); |
2182 unsigned ResultReg; | 2219 unsigned ResultReg; |
2183 | 2220 |
2184 if (Subtarget->hasAVX()) { | 2221 if (Subtarget->hasAVX512()) { |
2185 const TargetRegisterClass *FR32 = &X86::FR32RegClass; | 2222 // If we have AVX512 we can use a mask compare and masked movss/sd. |
2223 const TargetRegisterClass *VR128X = &X86::VR128XRegClass; | |
2224 const TargetRegisterClass *VK1 = &X86::VK1RegClass; | |
2225 | |
2226 unsigned CmpOpcode = | |
2227 (RetVT == MVT::f32) ? X86::VCMPSSZrr : X86::VCMPSDZrr; | |
2228 unsigned CmpReg = fastEmitInst_rri(CmpOpcode, VK1, CmpLHSReg, CmpLHSIsKill, | |
2229 CmpRHSReg, CmpRHSIsKill, CC); | |
2230 | |
2231 // Need an IMPLICIT_DEF for the input that is used to generate the upper | |
2232 // bits of the result register since its not based on any of the inputs. | |
2233 unsigned ImplicitDefReg = createResultReg(VR128X); | |
2234 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, | |
2235 TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg); | |
2236 | |
2237 // Place RHSReg is the passthru of the masked movss/sd operation and put | |
2238 // LHS in the input. The mask input comes from the compare. | |
2239 unsigned MovOpcode = | |
2240 (RetVT == MVT::f32) ? X86::VMOVSSZrrk : X86::VMOVSDZrrk; | |
2241 unsigned MovReg = fastEmitInst_rrrr(MovOpcode, VR128X, RHSReg, RHSIsKill, | |
2242 CmpReg, true, ImplicitDefReg, true, | |
2243 LHSReg, LHSIsKill); | |
2244 | |
2245 ResultReg = createResultReg(RC); | |
2246 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, | |
2247 TII.get(TargetOpcode::COPY), ResultReg).addReg(MovReg); | |
2248 | |
2249 } else if (Subtarget->hasAVX()) { | |
2186 const TargetRegisterClass *VR128 = &X86::VR128RegClass; | 2250 const TargetRegisterClass *VR128 = &X86::VR128RegClass; |
2187 | 2251 |
2188 // If we have AVX, create 1 blendv instead of 3 logic instructions. | 2252 // If we have AVX, create 1 blendv instead of 3 logic instructions. |
2189 // Blendv was introduced with SSE 4.1, but the 2 register form implicitly | 2253 // Blendv was introduced with SSE 4.1, but the 2 register form implicitly |
2190 // uses XMM0 as the selection register. That may need just as many | 2254 // uses XMM0 as the selection register. That may need just as many |
2191 // instructions as the AND/ANDN/OR sequence due to register moves, so | 2255 // instructions as the AND/ANDN/OR sequence due to register moves, so |
2192 // don't bother. | 2256 // don't bother. |
2193 unsigned CmpOpcode = | 2257 unsigned CmpOpcode = |
2194 (RetVT.SimpleTy == MVT::f32) ? X86::VCMPSSrr : X86::VCMPSDrr; | 2258 (RetVT == MVT::f32) ? X86::VCMPSSrr : X86::VCMPSDrr; |
2195 unsigned BlendOpcode = | 2259 unsigned BlendOpcode = |
2196 (RetVT.SimpleTy == MVT::f32) ? X86::VBLENDVPSrr : X86::VBLENDVPDrr; | 2260 (RetVT == MVT::f32) ? X86::VBLENDVPSrr : X86::VBLENDVPDrr; |
2197 | 2261 |
2198 unsigned CmpReg = fastEmitInst_rri(CmpOpcode, FR32, CmpLHSReg, CmpLHSIsKill, | 2262 unsigned CmpReg = fastEmitInst_rri(CmpOpcode, RC, CmpLHSReg, CmpLHSIsKill, |
2199 CmpRHSReg, CmpRHSIsKill, CC); | 2263 CmpRHSReg, CmpRHSIsKill, CC); |
2200 unsigned VBlendReg = fastEmitInst_rrr(BlendOpcode, VR128, RHSReg, RHSIsKill, | 2264 unsigned VBlendReg = fastEmitInst_rrr(BlendOpcode, VR128, RHSReg, RHSIsKill, |
2201 LHSReg, LHSIsKill, CmpReg, true); | 2265 LHSReg, LHSIsKill, CmpReg, true); |
2202 ResultReg = createResultReg(RC); | 2266 ResultReg = createResultReg(RC); |
2203 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, | 2267 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
2204 TII.get(TargetOpcode::COPY), ResultReg).addReg(VBlendReg); | 2268 TII.get(TargetOpcode::COPY), ResultReg).addReg(VBlendReg); |
2205 } else { | 2269 } else { |
2270 const TargetRegisterClass *VR128 = &X86::VR128RegClass; | |
2206 unsigned CmpReg = fastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpLHSIsKill, | 2271 unsigned CmpReg = fastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpLHSIsKill, |
2207 CmpRHSReg, CmpRHSIsKill, CC); | 2272 CmpRHSReg, CmpRHSIsKill, CC); |
2208 unsigned AndReg = fastEmitInst_rr(Opc[1], RC, CmpReg, /*IsKill=*/false, | 2273 unsigned AndReg = fastEmitInst_rr(Opc[1], VR128, CmpReg, /*IsKill=*/false, |
2209 LHSReg, LHSIsKill); | 2274 LHSReg, LHSIsKill); |
2210 unsigned AndNReg = fastEmitInst_rr(Opc[2], RC, CmpReg, /*IsKill=*/true, | 2275 unsigned AndNReg = fastEmitInst_rr(Opc[2], VR128, CmpReg, /*IsKill=*/true, |
2211 RHSReg, RHSIsKill); | 2276 RHSReg, RHSIsKill); |
2212 ResultReg = fastEmitInst_rr(Opc[3], RC, AndNReg, /*IsKill=*/true, | 2277 unsigned OrReg = fastEmitInst_rr(Opc[3], VR128, AndNReg, /*IsKill=*/true, |
2213 AndReg, /*IsKill=*/true); | 2278 AndReg, /*IsKill=*/true); |
2279 ResultReg = createResultReg(RC); | |
2280 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, | |
2281 TII.get(TargetOpcode::COPY), ResultReg).addReg(OrReg); | |
2214 } | 2282 } |
2215 updateValueMap(I, ResultReg); | 2283 updateValueMap(I, ResultReg); |
2216 return true; | 2284 return true; |
2217 } | 2285 } |
2218 | 2286 |
2236 // same basic block (values defined in other basic blocks may not have | 2304 // same basic block (values defined in other basic blocks may not have |
2237 // initialized registers). | 2305 // initialized registers). |
2238 const auto *CI = dyn_cast<CmpInst>(Cond); | 2306 const auto *CI = dyn_cast<CmpInst>(Cond); |
2239 if (CI && (CI->getParent() == I->getParent())) { | 2307 if (CI && (CI->getParent() == I->getParent())) { |
2240 bool NeedSwap; | 2308 bool NeedSwap; |
2241 std::tie(CC, NeedSwap) = getX86ConditionCode(CI->getPredicate()); | 2309 std::tie(CC, NeedSwap) = X86::getX86ConditionCode(CI->getPredicate()); |
2242 if (CC > X86::LAST_VALID_COND) | 2310 if (CC > X86::LAST_VALID_COND) |
2243 return false; | 2311 return false; |
2244 | 2312 |
2245 const Value *CmpLHS = CI->getOperand(0); | 2313 const Value *CmpLHS = CI->getOperand(0); |
2246 const Value *CmpRHS = CI->getOperand(1); | 2314 const Value *CmpRHS = CI->getOperand(1); |
2258 bool CondIsKill = hasTrivialKill(Cond); | 2326 bool CondIsKill = hasTrivialKill(Cond); |
2259 | 2327 |
2260 // In case OpReg is a K register, COPY to a GPR | 2328 // In case OpReg is a K register, COPY to a GPR |
2261 if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) { | 2329 if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) { |
2262 unsigned KCondReg = CondReg; | 2330 unsigned KCondReg = CondReg; |
2263 CondReg = createResultReg(&X86::GR8RegClass); | 2331 CondReg = createResultReg(&X86::GR32RegClass); |
2264 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, | 2332 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
2265 TII.get(TargetOpcode::COPY), CondReg) | 2333 TII.get(TargetOpcode::COPY), CondReg) |
2266 .addReg(KCondReg, getKillRegState(CondIsKill)); | 2334 .addReg(KCondReg, getKillRegState(CondIsKill)); |
2335 CondReg = fastEmitInst_extractsubreg(MVT::i8, CondReg, /*Kill=*/true, | |
2336 X86::sub_8bit); | |
2267 } | 2337 } |
2268 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri)) | 2338 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri)) |
2269 .addReg(CondReg, getKillRegState(CondIsKill)) | 2339 .addReg(CondReg, getKillRegState(CondIsKill)) |
2270 .addImm(1); | 2340 .addImm(1); |
2271 } | 2341 } |
2384 | 2454 |
2385 unsigned OpReg = getRegForValue(I->getOperand(0)); | 2455 unsigned OpReg = getRegForValue(I->getOperand(0)); |
2386 if (OpReg == 0) | 2456 if (OpReg == 0) |
2387 return false; | 2457 return false; |
2388 | 2458 |
2459 unsigned ImplicitDefReg; | |
2460 if (Subtarget->hasAVX()) { | |
2461 ImplicitDefReg = createResultReg(RC); | |
2462 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, | |
2463 TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg); | |
2464 | |
2465 } | |
2466 | |
2389 unsigned ResultReg = createResultReg(RC); | 2467 unsigned ResultReg = createResultReg(RC); |
2390 MachineInstrBuilder MIB; | 2468 MachineInstrBuilder MIB; |
2391 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpc), | 2469 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpc), |
2392 ResultReg); | 2470 ResultReg); |
2471 | |
2393 if (Subtarget->hasAVX()) | 2472 if (Subtarget->hasAVX()) |
2394 MIB.addReg(OpReg); | 2473 MIB.addReg(ImplicitDefReg); |
2474 | |
2395 MIB.addReg(OpReg); | 2475 MIB.addReg(OpReg); |
2396 updateValueMap(I, ResultReg); | 2476 updateValueMap(I, ResultReg); |
2397 return true; | 2477 return true; |
2398 } | 2478 } |
2399 | 2479 |
2438 // Truncate from i8 to i1; no code needed. | 2518 // Truncate from i8 to i1; no code needed. |
2439 updateValueMap(I, InputReg); | 2519 updateValueMap(I, InputReg); |
2440 return true; | 2520 return true; |
2441 } | 2521 } |
2442 | 2522 |
2443 bool KillInputReg = false; | |
2444 if (!Subtarget->is64Bit()) { | |
2445 // If we're on x86-32; we can't extract an i8 from a general register. | |
2446 // First issue a copy to GR16_ABCD or GR32_ABCD. | |
2447 const TargetRegisterClass *CopyRC = | |
2448 (SrcVT == MVT::i16) ? &X86::GR16_ABCDRegClass : &X86::GR32_ABCDRegClass; | |
2449 unsigned CopyReg = createResultReg(CopyRC); | |
2450 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, | |
2451 TII.get(TargetOpcode::COPY), CopyReg).addReg(InputReg); | |
2452 InputReg = CopyReg; | |
2453 KillInputReg = true; | |
2454 } | |
2455 | |
2456 // Issue an extract_subreg. | 2523 // Issue an extract_subreg. |
2457 unsigned ResultReg = fastEmitInst_extractsubreg(MVT::i8, | 2524 unsigned ResultReg = fastEmitInst_extractsubreg(MVT::i8, |
2458 InputReg, KillInputReg, | 2525 InputReg, false, |
2459 X86::sub_8bit); | 2526 X86::sub_8bit); |
2460 if (!ResultReg) | 2527 if (!ResultReg) |
2461 return false; | 2528 return false; |
2462 | 2529 |
2463 updateValueMap(I, ResultReg); | 2530 updateValueMap(I, ResultReg); |
2975 return false; | 3042 return false; |
2976 | 3043 |
2977 if (!Subtarget->is64Bit()) | 3044 if (!Subtarget->is64Bit()) |
2978 return false; | 3045 return false; |
2979 | 3046 |
3047 if (Subtarget->useSoftFloat()) | |
3048 return false; | |
3049 | |
2980 // Only handle simple cases. i.e. Up to 6 i32/i64 scalar arguments. | 3050 // Only handle simple cases. i.e. Up to 6 i32/i64 scalar arguments. |
2981 unsigned GPRCnt = 0; | 3051 unsigned GPRCnt = 0; |
2982 unsigned FPRCnt = 0; | 3052 unsigned FPRCnt = 0; |
2983 unsigned Idx = 0; | |
2984 for (auto const &Arg : F->args()) { | 3053 for (auto const &Arg : F->args()) { |
2985 // The first argument is at index 1. | 3054 if (Arg.hasAttribute(Attribute::ByVal) || |
2986 ++Idx; | 3055 Arg.hasAttribute(Attribute::InReg) || |
2987 if (F->getAttributes().hasAttribute(Idx, Attribute::ByVal) || | 3056 Arg.hasAttribute(Attribute::StructRet) || |
2988 F->getAttributes().hasAttribute(Idx, Attribute::InReg) || | 3057 Arg.hasAttribute(Attribute::SwiftSelf) || |
2989 F->getAttributes().hasAttribute(Idx, Attribute::StructRet) || | 3058 Arg.hasAttribute(Attribute::SwiftError) || |
2990 F->getAttributes().hasAttribute(Idx, Attribute::SwiftSelf) || | 3059 Arg.hasAttribute(Attribute::Nest)) |
2991 F->getAttributes().hasAttribute(Idx, Attribute::SwiftError) || | |
2992 F->getAttributes().hasAttribute(Idx, Attribute::Nest)) | |
2993 return false; | 3060 return false; |
2994 | 3061 |
2995 Type *ArgTy = Arg.getType(); | 3062 Type *ArgTy = Arg.getType(); |
2996 if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy()) | 3063 if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy()) |
2997 return false; | 3064 return false; |
3066 if (CC == CallingConv::Fast || CC == CallingConv::GHC || | 3133 if (CC == CallingConv::Fast || CC == CallingConv::GHC || |
3067 CC == CallingConv::HiPE) | 3134 CC == CallingConv::HiPE) |
3068 return 0; | 3135 return 0; |
3069 | 3136 |
3070 if (CS) | 3137 if (CS) |
3071 if (CS->arg_empty() || !CS->paramHasAttr(1, Attribute::StructRet) || | 3138 if (CS->arg_empty() || !CS->paramHasAttr(0, Attribute::StructRet) || |
3072 CS->paramHasAttr(1, Attribute::InReg) || Subtarget->isTargetMCU()) | 3139 CS->paramHasAttr(0, Attribute::InReg) || Subtarget->isTargetMCU()) |
3073 return 0; | 3140 return 0; |
3074 | 3141 |
3075 return 4; | 3142 return 4; |
3076 } | 3143 } |
3077 | 3144 |
3088 MCSymbol *Symbol = CLI.Symbol; | 3155 MCSymbol *Symbol = CLI.Symbol; |
3089 | 3156 |
3090 bool Is64Bit = Subtarget->is64Bit(); | 3157 bool Is64Bit = Subtarget->is64Bit(); |
3091 bool IsWin64 = Subtarget->isCallingConvWin64(CC); | 3158 bool IsWin64 = Subtarget->isCallingConvWin64(CC); |
3092 | 3159 |
3160 const CallInst *CI = | |
3161 CLI.CS ? dyn_cast<CallInst>(CLI.CS->getInstruction()) : nullptr; | |
3162 const Function *CalledFn = CI ? CI->getCalledFunction() : nullptr; | |
3163 | |
3164 // Functions with no_caller_saved_registers that need special handling. | |
3165 if ((CI && CI->hasFnAttr("no_caller_saved_registers")) || | |
3166 (CalledFn && CalledFn->hasFnAttribute("no_caller_saved_registers"))) | |
3167 return false; | |
3168 | |
3093 // Handle only C, fastcc, and webkit_js calling conventions for now. | 3169 // Handle only C, fastcc, and webkit_js calling conventions for now. |
3094 switch (CC) { | 3170 switch (CC) { |
3095 default: return false; | 3171 default: return false; |
3096 case CallingConv::C: | 3172 case CallingConv::C: |
3097 case CallingConv::Fast: | 3173 case CallingConv::Fast: |
3098 case CallingConv::WebKit_JS: | 3174 case CallingConv::WebKit_JS: |
3099 case CallingConv::Swift: | 3175 case CallingConv::Swift: |
3100 case CallingConv::X86_FastCall: | 3176 case CallingConv::X86_FastCall: |
3101 case CallingConv::X86_StdCall: | 3177 case CallingConv::X86_StdCall: |
3102 case CallingConv::X86_ThisCall: | 3178 case CallingConv::X86_ThisCall: |
3103 case CallingConv::X86_64_Win64: | 3179 case CallingConv::Win64: |
3104 case CallingConv::X86_64_SysV: | 3180 case CallingConv::X86_64_SysV: |
3105 break; | 3181 break; |
3106 } | 3182 } |
3107 | 3183 |
3108 // Allow SelectionDAG isel to handle tail calls. | 3184 // Allow SelectionDAG isel to handle tail calls. |
3191 unsigned NumBytes = CCInfo.getAlignedCallFrameSize(); | 3267 unsigned NumBytes = CCInfo.getAlignedCallFrameSize(); |
3192 | 3268 |
3193 // Issue CALLSEQ_START | 3269 // Issue CALLSEQ_START |
3194 unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); | 3270 unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); |
3195 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown)) | 3271 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown)) |
3196 .addImm(NumBytes).addImm(0); | 3272 .addImm(NumBytes).addImm(0).addImm(0); |
3197 | 3273 |
3198 // Walk the register/memloc assignments, inserting copies/loads. | 3274 // Walk the register/memloc assignments, inserting copies/loads. |
3199 const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); | 3275 const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); |
3200 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { | 3276 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { |
3201 CCValAssign const &VA = ArgLocs[i]; | 3277 CCValAssign const &VA = ArgLocs[i]; |
3212 case CCValAssign::Full: break; | 3288 case CCValAssign::Full: break; |
3213 case CCValAssign::SExt: { | 3289 case CCValAssign::SExt: { |
3214 assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() && | 3290 assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() && |
3215 "Unexpected extend"); | 3291 "Unexpected extend"); |
3216 | 3292 |
3217 if (ArgVT.SimpleTy == MVT::i1) | 3293 if (ArgVT == MVT::i1) |
3218 return false; | 3294 return false; |
3219 | 3295 |
3220 bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), ArgReg, | 3296 bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), ArgReg, |
3221 ArgVT, ArgReg); | 3297 ArgVT, ArgReg); |
3222 assert(Emitted && "Failed to emit a sext!"); (void)Emitted; | 3298 assert(Emitted && "Failed to emit a sext!"); (void)Emitted; |
3226 case CCValAssign::ZExt: { | 3302 case CCValAssign::ZExt: { |
3227 assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() && | 3303 assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() && |
3228 "Unexpected extend"); | 3304 "Unexpected extend"); |
3229 | 3305 |
3230 // Handle zero-extension from i1 to i8, which is common. | 3306 // Handle zero-extension from i1 to i8, which is common. |
3231 if (ArgVT.SimpleTy == MVT::i1) { | 3307 if (ArgVT == MVT::i1) { |
3232 // Set the high bits to zero. | 3308 // Set the high bits to zero. |
3233 ArgReg = fastEmitZExtFromI1(MVT::i8, ArgReg, /*TODO: Kill=*/false); | 3309 ArgReg = fastEmitZExtFromI1(MVT::i8, ArgReg, /*TODO: Kill=*/false); |
3234 ArgVT = MVT::i8; | 3310 ArgVT = MVT::i8; |
3235 | 3311 |
3236 if (ArgReg == 0) | 3312 if (ArgReg == 0) |
3372 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc)) | 3448 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc)) |
3373 .addReg(CalleeOp); | 3449 .addReg(CalleeOp); |
3374 } else { | 3450 } else { |
3375 // Direct call. | 3451 // Direct call. |
3376 assert(GV && "Not a direct call"); | 3452 assert(GV && "Not a direct call"); |
3377 unsigned CallOpc = Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32; | |
3378 | |
3379 // See if we need any target-specific flags on the GV operand. | 3453 // See if we need any target-specific flags on the GV operand. |
3380 unsigned char OpFlags = Subtarget->classifyGlobalFunctionReference(GV); | 3454 unsigned char OpFlags = Subtarget->classifyGlobalFunctionReference(GV); |
3381 // Ignore NonLazyBind attribute in FastISel | 3455 // Ignore NonLazyBind attribute in FastISel |
3382 if (OpFlags == X86II::MO_GOTPCREL) | 3456 if (OpFlags == X86II::MO_GOTPCREL) |
3383 OpFlags = 0; | 3457 OpFlags = 0; |
3384 | 3458 |
3459 // This will be a direct call, or an indirect call through memory for | |
3460 // NonLazyBind calls or dllimport calls. | |
3461 bool NeedLoad = OpFlags == X86II::MO_DLLIMPORT; | |
3462 unsigned CallOpc = NeedLoad | |
3463 ? (Is64Bit ? X86::CALL64m : X86::CALL32m) | |
3464 : (Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32); | |
3465 | |
3385 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc)); | 3466 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc)); |
3467 if (NeedLoad) | |
3468 MIB.addReg(Is64Bit ? X86::RIP : 0).addImm(1).addReg(0); | |
3386 if (Symbol) | 3469 if (Symbol) |
3387 MIB.addSym(Symbol, OpFlags); | 3470 MIB.addSym(Symbol, OpFlags); |
3388 else | 3471 else |
3389 MIB.addGlobalAddress(GV, 0, OpFlags); | 3472 MIB.addGlobalAddress(GV, 0, OpFlags); |
3473 if (NeedLoad) | |
3474 MIB.addReg(0); | |
3390 } | 3475 } |
3391 | 3476 |
3392 // Add a register mask operand representing the call-preserved registers. | 3477 // Add a register mask operand representing the call-preserved registers. |
3393 // Proper defs for return values will be added by setPhysRegsDeadExcept(). | 3478 // Proper defs for return values will be added by setPhysRegsDeadExcept(). |
3394 MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC)); | 3479 MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC)); |
3424 unsigned ResultReg = FuncInfo.CreateRegs(CLI.RetTy); | 3509 unsigned ResultReg = FuncInfo.CreateRegs(CLI.RetTy); |
3425 for (unsigned i = 0; i != RVLocs.size(); ++i) { | 3510 for (unsigned i = 0; i != RVLocs.size(); ++i) { |
3426 CCValAssign &VA = RVLocs[i]; | 3511 CCValAssign &VA = RVLocs[i]; |
3427 EVT CopyVT = VA.getValVT(); | 3512 EVT CopyVT = VA.getValVT(); |
3428 unsigned CopyReg = ResultReg + i; | 3513 unsigned CopyReg = ResultReg + i; |
3514 unsigned SrcReg = VA.getLocReg(); | |
3429 | 3515 |
3430 // If this is x86-64, and we disabled SSE, we can't return FP values | 3516 // If this is x86-64, and we disabled SSE, we can't return FP values |
3431 if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) && | 3517 if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) && |
3432 ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) { | 3518 ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) { |
3433 report_fatal_error("SSE register return with SSE disabled"); | 3519 report_fatal_error("SSE register return with SSE disabled"); |
3434 } | 3520 } |
3435 | 3521 |
3522 // If the return value is an i1 and AVX-512 is enabled, we need | |
3523 // to do a fixup to make the copy legal. | |
3524 if (CopyVT == MVT::i1 && SrcReg == X86::AL && Subtarget->hasAVX512()) { | |
3525 // Need to copy to a GR32 first. | |
3526 // TODO: MOVZX isn't great here. We don't care about the upper bits. | |
3527 SrcReg = createResultReg(&X86::GR32RegClass); | |
3528 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, | |
3529 TII.get(X86::MOVZX32rr8), SrcReg).addReg(X86::AL); | |
3530 } | |
3531 | |
3436 // If we prefer to use the value in xmm registers, copy it out as f80 and | 3532 // If we prefer to use the value in xmm registers, copy it out as f80 and |
3437 // use a truncate to move it from fp stack reg to xmm reg. | 3533 // use a truncate to move it from fp stack reg to xmm reg. |
3438 if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) && | 3534 if ((SrcReg == X86::FP0 || SrcReg == X86::FP1) && |
3439 isScalarFPTypeInSSEReg(VA.getValVT())) { | 3535 isScalarFPTypeInSSEReg(VA.getValVT())) { |
3440 CopyVT = MVT::f80; | 3536 CopyVT = MVT::f80; |
3441 CopyReg = createResultReg(&X86::RFP80RegClass); | 3537 CopyReg = createResultReg(&X86::RFP80RegClass); |
3442 } | 3538 } |
3443 | 3539 |
3444 // Copy out the result. | 3540 // Copy out the result. |
3445 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, | 3541 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, |
3446 TII.get(TargetOpcode::COPY), CopyReg).addReg(VA.getLocReg()); | 3542 TII.get(TargetOpcode::COPY), CopyReg).addReg(SrcReg); |
3447 InRegs.push_back(VA.getLocReg()); | 3543 InRegs.push_back(VA.getLocReg()); |
3448 | 3544 |
3449 // Round the f80 to the right size, which also moves it to the appropriate | 3545 // Round the f80 to the right size, which also moves it to the appropriate |
3450 // xmm register. This is accomplished by storing the f80 value in memory | 3546 // xmm register. This is accomplished by storing the f80 value in memory |
3451 // and then loading it back. | 3547 // and then loading it back. |
3483 case Instruction::ICmp: | 3579 case Instruction::ICmp: |
3484 case Instruction::FCmp: | 3580 case Instruction::FCmp: |
3485 return X86SelectCmp(I); | 3581 return X86SelectCmp(I); |
3486 case Instruction::ZExt: | 3582 case Instruction::ZExt: |
3487 return X86SelectZExt(I); | 3583 return X86SelectZExt(I); |
3584 case Instruction::SExt: | |
3585 return X86SelectSExt(I); | |
3488 case Instruction::Br: | 3586 case Instruction::Br: |
3489 return X86SelectBranch(I); | 3587 return X86SelectBranch(I); |
3490 case Instruction::LShr: | 3588 case Instruction::LShr: |
3491 case Instruction::AShr: | 3589 case Instruction::AShr: |
3492 case Instruction::Shl: | 3590 case Instruction::Shl: |
3528 EVT DstVT = TLI.getValueType(DL, I->getType()); | 3626 EVT DstVT = TLI.getValueType(DL, I->getType()); |
3529 | 3627 |
3530 if (!SrcVT.isSimple() || !DstVT.isSimple()) | 3628 if (!SrcVT.isSimple() || !DstVT.isSimple()) |
3531 return false; | 3629 return false; |
3532 | 3630 |
3533 if (!SrcVT.is128BitVector() && | 3631 MVT SVT = SrcVT.getSimpleVT(); |
3534 !(Subtarget->hasAVX() && SrcVT.is256BitVector())) | 3632 MVT DVT = DstVT.getSimpleVT(); |
3633 | |
3634 if (!SVT.is128BitVector() && | |
3635 !(Subtarget->hasAVX() && SVT.is256BitVector()) && | |
3636 !(Subtarget->hasAVX512() && SVT.is512BitVector() && | |
3637 (Subtarget->hasBWI() || (SVT.getScalarSizeInBits() >= 32 && | |
3638 DVT.getScalarSizeInBits() >= 32)))) | |
3535 return false; | 3639 return false; |
3536 | 3640 |
3537 unsigned Reg = getRegForValue(I->getOperand(0)); | 3641 unsigned Reg = getRegForValue(I->getOperand(0)); |
3538 if (Reg == 0) | 3642 if (Reg == 0) |
3539 return false; | 3643 return false; |
3577 } | 3681 } |
3578 | 3682 |
3579 unsigned Opc = 0; | 3683 unsigned Opc = 0; |
3580 switch (VT.SimpleTy) { | 3684 switch (VT.SimpleTy) { |
3581 default: llvm_unreachable("Unexpected value type"); | 3685 default: llvm_unreachable("Unexpected value type"); |
3582 case MVT::i1: VT = MVT::i8; LLVM_FALLTHROUGH; | 3686 case MVT::i1: |
3687 // TODO: Support this properly. | |
3688 if (Subtarget->hasAVX512()) | |
3689 return 0; | |
3690 VT = MVT::i8; | |
3691 LLVM_FALLTHROUGH; | |
3583 case MVT::i8: Opc = X86::MOV8ri; break; | 3692 case MVT::i8: Opc = X86::MOV8ri; break; |
3584 case MVT::i16: Opc = X86::MOV16ri; break; | 3693 case MVT::i16: Opc = X86::MOV16ri; break; |
3585 case MVT::i32: Opc = X86::MOV32ri; break; | 3694 case MVT::i32: Opc = X86::MOV32ri; break; |
3586 case MVT::i64: { | 3695 case MVT::i64: { |
3587 if (isUInt<32>(Imm)) | 3696 if (isUInt<32>(Imm)) |
3847 Result->addMemOperand(*FuncInfo.MF, createMachineMemOperandFor(LI)); | 3956 Result->addMemOperand(*FuncInfo.MF, createMachineMemOperandFor(LI)); |
3848 MI->eraseFromParent(); | 3957 MI->eraseFromParent(); |
3849 return true; | 3958 return true; |
3850 } | 3959 } |
3851 | 3960 |
3961 unsigned X86FastISel::fastEmitInst_rrrr(unsigned MachineInstOpcode, | |
3962 const TargetRegisterClass *RC, | |
3963 unsigned Op0, bool Op0IsKill, | |
3964 unsigned Op1, bool Op1IsKill, | |
3965 unsigned Op2, bool Op2IsKill, | |
3966 unsigned Op3, bool Op3IsKill) { | |
3967 const MCInstrDesc &II = TII.get(MachineInstOpcode); | |
3968 | |
3969 unsigned ResultReg = createResultReg(RC); | |
3970 Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); | |
3971 Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1); | |
3972 Op2 = constrainOperandRegClass(II, Op2, II.getNumDefs() + 2); | |
3973 Op3 = constrainOperandRegClass(II, Op3, II.getNumDefs() + 3); | |
3974 | |
3975 if (II.getNumDefs() >= 1) | |
3976 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) | |
3977 .addReg(Op0, getKillRegState(Op0IsKill)) | |
3978 .addReg(Op1, getKillRegState(Op1IsKill)) | |
3979 .addReg(Op2, getKillRegState(Op2IsKill)) | |
3980 .addReg(Op3, getKillRegState(Op3IsKill)); | |
3981 else { | |
3982 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) | |
3983 .addReg(Op0, getKillRegState(Op0IsKill)) | |
3984 .addReg(Op1, getKillRegState(Op1IsKill)) | |
3985 .addReg(Op2, getKillRegState(Op2IsKill)) | |
3986 .addReg(Op3, getKillRegState(Op3IsKill)); | |
3987 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, | |
3988 TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); | |
3989 } | |
3990 return ResultReg; | |
3991 } | |
3992 | |
3852 | 3993 |
3853 namespace llvm { | 3994 namespace llvm { |
3854 FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo, | 3995 FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo, |
3855 const TargetLibraryInfo *libInfo) { | 3996 const TargetLibraryInfo *libInfo) { |
3856 return new X86FastISel(funcInfo, libInfo); | 3997 return new X86FastISel(funcInfo, libInfo); |