comparison lib/Target/X86/X86FastISel.cpp @ 121:803732b1fca8

LLVM 5.0
author kono
date Fri, 27 Oct 2017 17:07:41 +0900 (2017-10-27)
parents 1172e4bd9c6f
children
comparison
equal deleted inserted replaced
120:1172e4bd9c6f 121:803732b1fca8
108 108
109 bool X86SelectCmp(const Instruction *I); 109 bool X86SelectCmp(const Instruction *I);
110 110
111 bool X86SelectZExt(const Instruction *I); 111 bool X86SelectZExt(const Instruction *I);
112 112
113 bool X86SelectSExt(const Instruction *I);
114
113 bool X86SelectBranch(const Instruction *I); 115 bool X86SelectBranch(const Instruction *I);
114 116
115 bool X86SelectShift(const Instruction *I); 117 bool X86SelectShift(const Instruction *I);
116 118
117 bool X86SelectDivRem(const Instruction *I); 119 bool X86SelectDivRem(const Instruction *I);
168 bool foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I, 170 bool foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
169 const Value *Cond); 171 const Value *Cond);
170 172
171 const MachineInstrBuilder &addFullAddress(const MachineInstrBuilder &MIB, 173 const MachineInstrBuilder &addFullAddress(const MachineInstrBuilder &MIB,
172 X86AddressMode &AM); 174 X86AddressMode &AM);
175
176 unsigned fastEmitInst_rrrr(unsigned MachineInstOpcode,
177 const TargetRegisterClass *RC, unsigned Op0,
178 bool Op0IsKill, unsigned Op1, bool Op1IsKill,
179 unsigned Op2, bool Op2IsKill, unsigned Op3,
180 bool Op3IsKill);
173 }; 181 };
174 182
175 } // end anonymous namespace. 183 } // end anonymous namespace.
176
177 static std::pair<X86::CondCode, bool>
178 getX86ConditionCode(CmpInst::Predicate Predicate) {
179 X86::CondCode CC = X86::COND_INVALID;
180 bool NeedSwap = false;
181 switch (Predicate) {
182 default: break;
183 // Floating-point Predicates
184 case CmpInst::FCMP_UEQ: CC = X86::COND_E; break;
185 case CmpInst::FCMP_OLT: NeedSwap = true; LLVM_FALLTHROUGH;
186 case CmpInst::FCMP_OGT: CC = X86::COND_A; break;
187 case CmpInst::FCMP_OLE: NeedSwap = true; LLVM_FALLTHROUGH;
188 case CmpInst::FCMP_OGE: CC = X86::COND_AE; break;
189 case CmpInst::FCMP_UGT: NeedSwap = true; LLVM_FALLTHROUGH;
190 case CmpInst::FCMP_ULT: CC = X86::COND_B; break;
191 case CmpInst::FCMP_UGE: NeedSwap = true; LLVM_FALLTHROUGH;
192 case CmpInst::FCMP_ULE: CC = X86::COND_BE; break;
193 case CmpInst::FCMP_ONE: CC = X86::COND_NE; break;
194 case CmpInst::FCMP_UNO: CC = X86::COND_P; break;
195 case CmpInst::FCMP_ORD: CC = X86::COND_NP; break;
196 case CmpInst::FCMP_OEQ: LLVM_FALLTHROUGH;
197 case CmpInst::FCMP_UNE: CC = X86::COND_INVALID; break;
198
199 // Integer Predicates
200 case CmpInst::ICMP_EQ: CC = X86::COND_E; break;
201 case CmpInst::ICMP_NE: CC = X86::COND_NE; break;
202 case CmpInst::ICMP_UGT: CC = X86::COND_A; break;
203 case CmpInst::ICMP_UGE: CC = X86::COND_AE; break;
204 case CmpInst::ICMP_ULT: CC = X86::COND_B; break;
205 case CmpInst::ICMP_ULE: CC = X86::COND_BE; break;
206 case CmpInst::ICMP_SGT: CC = X86::COND_G; break;
207 case CmpInst::ICMP_SGE: CC = X86::COND_GE; break;
208 case CmpInst::ICMP_SLT: CC = X86::COND_L; break;
209 case CmpInst::ICMP_SLE: CC = X86::COND_LE; break;
210 }
211
212 return std::make_pair(CC, NeedSwap);
213 }
214 184
215 static std::pair<unsigned, bool> 185 static std::pair<unsigned, bool>
216 getX86SSEConditionCode(CmpInst::Predicate Predicate) { 186 getX86SSEConditionCode(CmpInst::Predicate Predicate) {
217 unsigned CC; 187 unsigned CC;
218 bool NeedSwap = false; 188 bool NeedSwap = false;
238 case CmpInst::FCMP_ULE: NeedSwap = true; LLVM_FALLTHROUGH; 208 case CmpInst::FCMP_ULE: NeedSwap = true; LLVM_FALLTHROUGH;
239 case CmpInst::FCMP_UGE: CC = 5; break; 209 case CmpInst::FCMP_UGE: CC = 5; break;
240 case CmpInst::FCMP_ULT: NeedSwap = true; LLVM_FALLTHROUGH; 210 case CmpInst::FCMP_ULT: NeedSwap = true; LLVM_FALLTHROUGH;
241 case CmpInst::FCMP_UGT: CC = 6; break; 211 case CmpInst::FCMP_UGT: CC = 6; break;
242 case CmpInst::FCMP_ORD: CC = 7; break; 212 case CmpInst::FCMP_ORD: CC = 7; break;
243 case CmpInst::FCMP_UEQ: 213 case CmpInst::FCMP_UEQ: CC = 8; break;
244 case CmpInst::FCMP_ONE: CC = 8; break; 214 case CmpInst::FCMP_ONE: CC = 12; break;
245 } 215 }
246 216
247 return std::make_pair(CC, NeedSwap); 217 return std::make_pair(CC, NeedSwap);
248 } 218 }
249 219
440 break; 410 break;
441 case MVT::v8f32: 411 case MVT::v8f32:
442 assert(HasAVX); 412 assert(HasAVX);
443 if (IsNonTemporal && Alignment >= 32 && HasAVX2) 413 if (IsNonTemporal && Alignment >= 32 && HasAVX2)
444 Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm; 414 Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm;
415 else if (IsNonTemporal && Alignment >= 16)
416 return false; // Force split for X86::VMOVNTDQArm
445 else if (Alignment >= 32) 417 else if (Alignment >= 32)
446 Opc = HasVLX ? X86::VMOVAPSZ256rm : X86::VMOVAPSYrm; 418 Opc = HasVLX ? X86::VMOVAPSZ256rm : X86::VMOVAPSYrm;
447 else 419 else
448 Opc = HasVLX ? X86::VMOVUPSZ256rm : X86::VMOVUPSYrm; 420 Opc = HasVLX ? X86::VMOVUPSZ256rm : X86::VMOVUPSYrm;
449 RC = &X86::VR256RegClass; 421 RC = &X86::VR256RegClass;
450 break; 422 break;
451 case MVT::v4f64: 423 case MVT::v4f64:
452 assert(HasAVX); 424 assert(HasAVX);
453 if (IsNonTemporal && Alignment >= 32 && HasAVX2) 425 if (IsNonTemporal && Alignment >= 32 && HasAVX2)
454 Opc = X86::VMOVNTDQAYrm; 426 Opc = X86::VMOVNTDQAYrm;
427 else if (IsNonTemporal && Alignment >= 16)
428 return false; // Force split for X86::VMOVNTDQArm
455 else if (Alignment >= 32) 429 else if (Alignment >= 32)
456 Opc = HasVLX ? X86::VMOVAPDZ256rm : X86::VMOVAPDYrm; 430 Opc = HasVLX ? X86::VMOVAPDZ256rm : X86::VMOVAPDYrm;
457 else 431 else
458 Opc = HasVLX ? X86::VMOVUPDZ256rm : X86::VMOVUPDYrm; 432 Opc = HasVLX ? X86::VMOVUPDZ256rm : X86::VMOVUPDYrm;
459 RC = &X86::VR256RegClass; 433 RC = &X86::VR256RegClass;
463 case MVT::v16i16: 437 case MVT::v16i16:
464 case MVT::v32i8: 438 case MVT::v32i8:
465 assert(HasAVX); 439 assert(HasAVX);
466 if (IsNonTemporal && Alignment >= 32 && HasAVX2) 440 if (IsNonTemporal && Alignment >= 32 && HasAVX2)
467 Opc = X86::VMOVNTDQAYrm; 441 Opc = X86::VMOVNTDQAYrm;
442 else if (IsNonTemporal && Alignment >= 16)
443 return false; // Force split for X86::VMOVNTDQArm
468 else if (Alignment >= 32) 444 else if (Alignment >= 32)
469 Opc = HasVLX ? X86::VMOVDQA64Z256rm : X86::VMOVDQAYrm; 445 Opc = HasVLX ? X86::VMOVDQA64Z256rm : X86::VMOVDQAYrm;
470 else 446 else
471 Opc = HasVLX ? X86::VMOVDQU64Z256rm : X86::VMOVDQUYrm; 447 Opc = HasVLX ? X86::VMOVDQU64Z256rm : X86::VMOVDQUYrm;
472 RC = &X86::VR256RegClass; 448 RC = &X86::VR256RegClass;
516 /// and a displacement offset, or a GlobalAddress, 492 /// and a displacement offset, or a GlobalAddress,
517 /// i.e. V. Return true if it is possible. 493 /// i.e. V. Return true if it is possible.
518 bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill, 494 bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
519 X86AddressMode &AM, 495 X86AddressMode &AM,
520 MachineMemOperand *MMO, bool Aligned) { 496 MachineMemOperand *MMO, bool Aligned) {
497 bool HasSSE1 = Subtarget->hasSSE1();
521 bool HasSSE2 = Subtarget->hasSSE2(); 498 bool HasSSE2 = Subtarget->hasSSE2();
522 bool HasSSE4A = Subtarget->hasSSE4A(); 499 bool HasSSE4A = Subtarget->hasSSE4A();
523 bool HasAVX = Subtarget->hasAVX(); 500 bool HasAVX = Subtarget->hasAVX();
524 bool HasAVX512 = Subtarget->hasAVX512(); 501 bool HasAVX512 = Subtarget->hasAVX512();
525 bool HasVLX = Subtarget->hasVLX(); 502 bool HasVLX = Subtarget->hasVLX();
565 else 542 else
566 Opc = HasAVX512 ? X86::VMOVSDZmr : 543 Opc = HasAVX512 ? X86::VMOVSDZmr :
567 HasAVX ? X86::VMOVSDmr : X86::MOVSDmr; 544 HasAVX ? X86::VMOVSDmr : X86::MOVSDmr;
568 } else 545 } else
569 Opc = X86::ST_Fp64m; 546 Opc = X86::ST_Fp64m;
547 break;
548 case MVT::x86mmx:
549 Opc = (IsNonTemporal && HasSSE1) ? X86::MMX_MOVNTQmr : X86::MMX_MOVQ64mr;
570 break; 550 break;
571 case MVT::v4f32: 551 case MVT::v4f32:
572 if (Aligned) { 552 if (Aligned) {
573 if (IsNonTemporal) 553 if (IsNonTemporal)
574 Opc = HasVLX ? X86::VMOVNTPSZ128mr : 554 Opc = HasVLX ? X86::VMOVNTPSZ128mr :
934 // Iterate through the indices, folding what we can. Constants can be 914 // Iterate through the indices, folding what we can. Constants can be
935 // folded, and one dynamic index can be handled, if the scale is supported. 915 // folded, and one dynamic index can be handled, if the scale is supported.
936 for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end(); 916 for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end();
937 i != e; ++i, ++GTI) { 917 i != e; ++i, ++GTI) {
938 const Value *Op = *i; 918 const Value *Op = *i;
939 if (StructType *STy = dyn_cast<StructType>(*GTI)) { 919 if (StructType *STy = GTI.getStructTypeOrNull()) {
940 const StructLayout *SL = DL.getStructLayout(STy); 920 const StructLayout *SL = DL.getStructLayout(STy);
941 Disp += SL->getElementOffset(cast<ConstantInt>(Op)->getZExtValue()); 921 Disp += SL->getElementOffset(cast<ConstantInt>(Op)->getZExtValue());
942 continue; 922 continue;
943 } 923 }
944 924
1083 // RIP-relative addresses can't have additional register operands. 1063 // RIP-relative addresses can't have additional register operands.
1084 if (Subtarget->isPICStyleRIPRel() && 1064 if (Subtarget->isPICStyleRIPRel() &&
1085 (AM.Base.Reg != 0 || AM.IndexReg != 0)) 1065 (AM.Base.Reg != 0 || AM.IndexReg != 0))
1086 return false; 1066 return false;
1087 1067
1088 // Can't handle DLL Import.
1089 if (GV->hasDLLImportStorageClass())
1090 return false;
1091
1092 // Can't handle TLS. 1068 // Can't handle TLS.
1093 if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) 1069 if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
1094 if (GVar->isThreadLocal()) 1070 if (GVar->isThreadLocal())
1095 return false; 1071 return false;
1096 1072
1097 // Okay, we've committed to selecting this global. Set up the basic address. 1073 // Okay, we've committed to selecting this global. Set up the basic address.
1098 AM.GV = GV; 1074 AM.GV = GV;
1099 1075
1100 // No ABI requires an extra load for anything other than DLLImport, which 1076 // Return a direct reference to the global. Fastisel can handle calls to
1101 // we rejected above. Return a direct reference to the global. 1077 // functions that require loads, such as dllimport and nonlazybind
1078 // functions.
1102 if (Subtarget->isPICStyleRIPRel()) { 1079 if (Subtarget->isPICStyleRIPRel()) {
1103 // Use rip-relative addressing if we can. Above we verified that the 1080 // Use rip-relative addressing if we can. Above we verified that the
1104 // base and index registers are unused. 1081 // base and index registers are unused.
1105 assert(AM.Base.Reg == 0 && AM.IndexReg == 0); 1082 assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
1106 AM.Base.Reg = X86::RIP; 1083 AM.Base.Reg = X86::RIP;
1193 CC != CallingConv::Fast && 1170 CC != CallingConv::Fast &&
1194 CC != CallingConv::X86_FastCall && 1171 CC != CallingConv::X86_FastCall &&
1195 CC != CallingConv::X86_StdCall && 1172 CC != CallingConv::X86_StdCall &&
1196 CC != CallingConv::X86_ThisCall && 1173 CC != CallingConv::X86_ThisCall &&
1197 CC != CallingConv::X86_64_SysV && 1174 CC != CallingConv::X86_64_SysV &&
1198 CC != CallingConv::X86_64_Win64) 1175 CC != CallingConv::Win64)
1199 return false; 1176 return false;
1200 1177
1201 // Don't handle popping bytes if they don't fit the ret's immediate. 1178 // Don't handle popping bytes if they don't fit the ret's immediate.
1202 if (!isUInt<16>(X86MFInfo->getBytesToPopOnReturn())) 1179 if (!isUInt<16>(X86MFInfo->getBytesToPopOnReturn()))
1203 return false; 1180 return false;
1523 return true; 1500 return true;
1524 } 1501 }
1525 1502
1526 X86::CondCode CC; 1503 X86::CondCode CC;
1527 bool SwapArgs; 1504 bool SwapArgs;
1528 std::tie(CC, SwapArgs) = getX86ConditionCode(Predicate); 1505 std::tie(CC, SwapArgs) = X86::getX86ConditionCode(Predicate);
1529 assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code."); 1506 assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
1530 unsigned Opc = X86::getSETFromCond(CC); 1507 unsigned Opc = X86::getSETFromCond(CC);
1531 1508
1532 if (SwapArgs) 1509 if (SwapArgs)
1533 std::swap(LHS, RHS); 1510 std::swap(LHS, RHS);
1550 if (ResultReg == 0) 1527 if (ResultReg == 0)
1551 return false; 1528 return false;
1552 1529
1553 // Handle zero-extension from i1 to i8, which is common. 1530 // Handle zero-extension from i1 to i8, which is common.
1554 MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType()); 1531 MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType());
1555 if (SrcVT.SimpleTy == MVT::i1) { 1532 if (SrcVT == MVT::i1) {
1556 // Set the high bits to zero. 1533 // Set the high bits to zero.
1557 ResultReg = fastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false); 1534 ResultReg = fastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false);
1558 SrcVT = MVT::i8; 1535 SrcVT = MVT::i8;
1559 1536
1560 if (ResultReg == 0) 1537 if (ResultReg == 0)
1578 1555
1579 ResultReg = createResultReg(&X86::GR64RegClass); 1556 ResultReg = createResultReg(&X86::GR64RegClass);
1580 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::SUBREG_TO_REG), 1557 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::SUBREG_TO_REG),
1581 ResultReg) 1558 ResultReg)
1582 .addImm(0).addReg(Result32).addImm(X86::sub_32bit); 1559 .addImm(0).addReg(Result32).addImm(X86::sub_32bit);
1560 } else if (DstVT == MVT::i16) {
1561 // i8->i16 doesn't exist in the autogenerated isel table. Need to zero
1562 // extend to 32-bits and then extract down to 16-bits.
1563 unsigned Result32 = createResultReg(&X86::GR32RegClass);
1564 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOVZX32rr8),
1565 Result32).addReg(ResultReg);
1566
1567 ResultReg = fastEmitInst_extractsubreg(MVT::i16, Result32, /*Kill=*/true,
1568 X86::sub_16bit);
1583 } else if (DstVT != MVT::i8) { 1569 } else if (DstVT != MVT::i8) {
1584 ResultReg = fastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::ZERO_EXTEND, 1570 ResultReg = fastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::ZERO_EXTEND,
1571 ResultReg, /*Kill=*/true);
1572 if (ResultReg == 0)
1573 return false;
1574 }
1575
1576 updateValueMap(I, ResultReg);
1577 return true;
1578 }
1579
1580 bool X86FastISel::X86SelectSExt(const Instruction *I) {
1581 EVT DstVT = TLI.getValueType(DL, I->getType());
1582 if (!TLI.isTypeLegal(DstVT))
1583 return false;
1584
1585 unsigned ResultReg = getRegForValue(I->getOperand(0));
1586 if (ResultReg == 0)
1587 return false;
1588
1589 // Handle sign-extension from i1 to i8.
1590 MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType());
1591 if (SrcVT == MVT::i1) {
1592 // Set the high bits to zero.
1593 unsigned ZExtReg = fastEmitZExtFromI1(MVT::i8, ResultReg,
1594 /*TODO: Kill=*/false);
1595 if (ZExtReg == 0)
1596 return false;
1597
1598 // Negate the result to make an 8-bit sign extended value.
1599 ResultReg = createResultReg(&X86::GR8RegClass);
1600 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::NEG8r),
1601 ResultReg).addReg(ZExtReg);
1602
1603 SrcVT = MVT::i8;
1604 }
1605
1606 if (DstVT == MVT::i16) {
1607 // i8->i16 doesn't exist in the autogenerated isel table. Need to sign
1608 // extend to 32-bits and then extract down to 16-bits.
1609 unsigned Result32 = createResultReg(&X86::GR32RegClass);
1610 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOVSX32rr8),
1611 Result32).addReg(ResultReg);
1612
1613 ResultReg = fastEmitInst_extractsubreg(MVT::i16, Result32, /*Kill=*/true,
1614 X86::sub_16bit);
1615 } else if (DstVT != MVT::i8) {
1616 ResultReg = fastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::SIGN_EXTEND,
1585 ResultReg, /*Kill=*/true); 1617 ResultReg, /*Kill=*/true);
1586 if (ResultReg == 0) 1618 if (ResultReg == 0)
1587 return false; 1619 return false;
1588 } 1620 }
1589 1621
1650 break; 1682 break;
1651 } 1683 }
1652 1684
1653 bool SwapArgs; 1685 bool SwapArgs;
1654 unsigned BranchOpc; 1686 unsigned BranchOpc;
1655 std::tie(CC, SwapArgs) = getX86ConditionCode(Predicate); 1687 std::tie(CC, SwapArgs) = X86::getX86ConditionCode(Predicate);
1656 assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code."); 1688 assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
1657 1689
1658 BranchOpc = X86::GetCondBranchFromCond(CC); 1690 BranchOpc = X86::GetCondBranchFromCond(CC);
1659 if (SwapArgs) 1691 if (SwapArgs)
1660 std::swap(CmpLHS, CmpRHS); 1692 std::swap(CmpLHS, CmpRHS);
1732 if (OpReg == 0) return false; 1764 if (OpReg == 0) return false;
1733 1765
1734 // In case OpReg is a K register, COPY to a GPR 1766 // In case OpReg is a K register, COPY to a GPR
1735 if (MRI.getRegClass(OpReg) == &X86::VK1RegClass) { 1767 if (MRI.getRegClass(OpReg) == &X86::VK1RegClass) {
1736 unsigned KOpReg = OpReg; 1768 unsigned KOpReg = OpReg;
1737 OpReg = createResultReg(&X86::GR8RegClass); 1769 OpReg = createResultReg(&X86::GR32RegClass);
1738 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 1770 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1739 TII.get(TargetOpcode::COPY), OpReg) 1771 TII.get(TargetOpcode::COPY), OpReg)
1740 .addReg(KOpReg); 1772 .addReg(KOpReg);
1773 OpReg = fastEmitInst_extractsubreg(MVT::i8, OpReg, /*Kill=*/true,
1774 X86::sub_8bit);
1741 } 1775 }
1742 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri)) 1776 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
1743 .addReg(OpReg) 1777 .addReg(OpReg)
1744 .addImm(1); 1778 .addImm(1);
1745 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JNE_1)) 1779 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JNE_1))
1925 TII.get(X86::MOV32r0), Zero32); 1959 TII.get(X86::MOV32r0), Zero32);
1926 1960
1927 // Copy the zero into the appropriate sub/super/identical physical 1961 // Copy the zero into the appropriate sub/super/identical physical
1928 // register. Unfortunately the operations needed are not uniform enough 1962 // register. Unfortunately the operations needed are not uniform enough
1929 // to fit neatly into the table above. 1963 // to fit neatly into the table above.
1930 if (VT.SimpleTy == MVT::i16) { 1964 if (VT == MVT::i16) {
1931 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 1965 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1932 TII.get(Copy), TypeEntry.HighInReg) 1966 TII.get(Copy), TypeEntry.HighInReg)
1933 .addReg(Zero32, 0, X86::sub_16bit); 1967 .addReg(Zero32, 0, X86::sub_16bit);
1934 } else if (VT.SimpleTy == MVT::i32) { 1968 } else if (VT == MVT::i32) {
1935 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 1969 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1936 TII.get(Copy), TypeEntry.HighInReg) 1970 TII.get(Copy), TypeEntry.HighInReg)
1937 .addReg(Zero32); 1971 .addReg(Zero32);
1938 } else if (VT.SimpleTy == MVT::i64) { 1972 } else if (VT == MVT::i64) {
1939 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 1973 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1940 TII.get(TargetOpcode::SUBREG_TO_REG), TypeEntry.HighInReg) 1974 TII.get(TargetOpcode::SUBREG_TO_REG), TypeEntry.HighInReg)
1941 .addImm(0).addReg(Zero32).addImm(X86::sub_32bit); 1975 .addImm(0).addReg(Zero32).addImm(X86::sub_32bit);
1942 } 1976 }
1943 } 1977 }
1948 // For i8 remainder, we can't reference AH directly, as we'll end 1982 // For i8 remainder, we can't reference AH directly, as we'll end
1949 // up with bogus copies like %R9B = COPY %AH. Reference AX 1983 // up with bogus copies like %R9B = COPY %AH. Reference AX
1950 // instead to prevent AH references in a REX instruction. 1984 // instead to prevent AH references in a REX instruction.
1951 // 1985 //
1952 // The current assumption of the fast register allocator is that isel 1986 // The current assumption of the fast register allocator is that isel
1953 // won't generate explicit references to the GPR8_NOREX registers. If 1987 // won't generate explicit references to the GR8_NOREX registers. If
1954 // the allocator and/or the backend get enhanced to be more robust in 1988 // the allocator and/or the backend get enhanced to be more robust in
1955 // that regard, this can be, and should be, removed. 1989 // that regard, this can be, and should be, removed.
1956 unsigned ResultReg = 0; 1990 unsigned ResultReg = 0;
1957 if ((I->getOpcode() == Instruction::SRem || 1991 if ((I->getOpcode() == Instruction::SRem ||
1958 I->getOpcode() == Instruction::URem) && 1992 I->getOpcode() == Instruction::URem) &&
2021 Predicate = CmpInst::ICMP_NE; 2055 Predicate = CmpInst::ICMP_NE;
2022 break; 2056 break;
2023 } 2057 }
2024 2058
2025 bool NeedSwap; 2059 bool NeedSwap;
2026 std::tie(CC, NeedSwap) = getX86ConditionCode(Predicate); 2060 std::tie(CC, NeedSwap) = X86::getX86ConditionCode(Predicate);
2027 assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code."); 2061 assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
2028 2062
2029 const Value *CmpLHS = CI->getOperand(0); 2063 const Value *CmpLHS = CI->getOperand(0);
2030 const Value *CmpRHS = CI->getOperand(1); 2064 const Value *CmpRHS = CI->getOperand(1);
2031 if (NeedSwap) 2065 if (NeedSwap)
2076 bool CondIsKill = hasTrivialKill(Cond); 2110 bool CondIsKill = hasTrivialKill(Cond);
2077 2111
2078 // In case OpReg is a K register, COPY to a GPR 2112 // In case OpReg is a K register, COPY to a GPR
2079 if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) { 2113 if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) {
2080 unsigned KCondReg = CondReg; 2114 unsigned KCondReg = CondReg;
2081 CondReg = createResultReg(&X86::GR8RegClass); 2115 CondReg = createResultReg(&X86::GR32RegClass);
2082 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 2116 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2083 TII.get(TargetOpcode::COPY), CondReg) 2117 TII.get(TargetOpcode::COPY), CondReg)
2084 .addReg(KCondReg, getKillRegState(CondIsKill)); 2118 .addReg(KCondReg, getKillRegState(CondIsKill));
2119 CondReg = fastEmitInst_extractsubreg(MVT::i8, CondReg, /*Kill=*/true,
2120 X86::sub_8bit);
2085 } 2121 }
2086 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri)) 2122 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
2087 .addReg(CondReg, getKillRegState(CondIsKill)) 2123 .addReg(CondReg, getKillRegState(CondIsKill))
2088 .addImm(1); 2124 .addImm(1);
2089 } 2125 }
2098 bool LHSIsKill = hasTrivialKill(LHS); 2134 bool LHSIsKill = hasTrivialKill(LHS);
2099 2135
2100 if (!LHSReg || !RHSReg) 2136 if (!LHSReg || !RHSReg)
2101 return false; 2137 return false;
2102 2138
2103 unsigned Opc = X86::getCMovFromCond(CC, RC->getSize()); 2139 const TargetRegisterInfo &TRI = *Subtarget->getRegisterInfo();
2140 unsigned Opc = X86::getCMovFromCond(CC, TRI.getRegSizeInBits(*RC)/8);
2104 unsigned ResultReg = fastEmitInst_rr(Opc, RC, RHSReg, RHSIsKill, 2141 unsigned ResultReg = fastEmitInst_rr(Opc, RC, RHSReg, RHSIsKill,
2105 LHSReg, LHSIsKill); 2142 LHSReg, LHSIsKill);
2106 updateValueMap(I, ResultReg); 2143 updateValueMap(I, ResultReg);
2107 return true; 2144 return true;
2108 } 2145 }
2139 } 2176 }
2140 2177
2141 unsigned CC; 2178 unsigned CC;
2142 bool NeedSwap; 2179 bool NeedSwap;
2143 std::tie(CC, NeedSwap) = getX86SSEConditionCode(Predicate); 2180 std::tie(CC, NeedSwap) = getX86SSEConditionCode(Predicate);
2144 if (CC > 7) 2181 if (CC > 7 && !Subtarget->hasAVX())
2145 return false; 2182 return false;
2146 2183
2147 if (NeedSwap) 2184 if (NeedSwap)
2148 std::swap(CmpLHS, CmpRHS); 2185 std::swap(CmpLHS, CmpRHS);
2149 2186
2150 // Choose the SSE instruction sequence based on data type (float or double). 2187 // Choose the SSE instruction sequence based on data type (float or double).
2151 static const uint16_t OpcTable[2][4] = { 2188 static const uint16_t OpcTable[2][4] = {
2152 { X86::CMPSSrr, X86::FsANDPSrr, X86::FsANDNPSrr, X86::FsORPSrr }, 2189 { X86::CMPSSrr, X86::ANDPSrr, X86::ANDNPSrr, X86::ORPSrr },
2153 { X86::CMPSDrr, X86::FsANDPDrr, X86::FsANDNPDrr, X86::FsORPDrr } 2190 { X86::CMPSDrr, X86::ANDPDrr, X86::ANDNPDrr, X86::ORPDrr }
2154 }; 2191 };
2155 2192
2156 const uint16_t *Opc = nullptr; 2193 const uint16_t *Opc = nullptr;
2157 switch (RetVT.SimpleTy) { 2194 switch (RetVT.SimpleTy) {
2158 default: return false; 2195 default: return false;
2178 if (!LHSReg || !RHSReg || !CmpLHS || !CmpRHS) 2215 if (!LHSReg || !RHSReg || !CmpLHS || !CmpRHS)
2179 return false; 2216 return false;
2180 2217
2181 const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT); 2218 const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
2182 unsigned ResultReg; 2219 unsigned ResultReg;
2183 2220
2184 if (Subtarget->hasAVX()) { 2221 if (Subtarget->hasAVX512()) {
2185 const TargetRegisterClass *FR32 = &X86::FR32RegClass; 2222 // If we have AVX512 we can use a mask compare and masked movss/sd.
2223 const TargetRegisterClass *VR128X = &X86::VR128XRegClass;
2224 const TargetRegisterClass *VK1 = &X86::VK1RegClass;
2225
2226 unsigned CmpOpcode =
2227 (RetVT == MVT::f32) ? X86::VCMPSSZrr : X86::VCMPSDZrr;
2228 unsigned CmpReg = fastEmitInst_rri(CmpOpcode, VK1, CmpLHSReg, CmpLHSIsKill,
2229 CmpRHSReg, CmpRHSIsKill, CC);
2230
2231 // Need an IMPLICIT_DEF for the input that is used to generate the upper
2232 // bits of the result register since its not based on any of the inputs.
2233 unsigned ImplicitDefReg = createResultReg(VR128X);
2234 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2235 TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2236
2237 // Place RHSReg is the passthru of the masked movss/sd operation and put
2238 // LHS in the input. The mask input comes from the compare.
2239 unsigned MovOpcode =
2240 (RetVT == MVT::f32) ? X86::VMOVSSZrrk : X86::VMOVSDZrrk;
2241 unsigned MovReg = fastEmitInst_rrrr(MovOpcode, VR128X, RHSReg, RHSIsKill,
2242 CmpReg, true, ImplicitDefReg, true,
2243 LHSReg, LHSIsKill);
2244
2245 ResultReg = createResultReg(RC);
2246 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2247 TII.get(TargetOpcode::COPY), ResultReg).addReg(MovReg);
2248
2249 } else if (Subtarget->hasAVX()) {
2186 const TargetRegisterClass *VR128 = &X86::VR128RegClass; 2250 const TargetRegisterClass *VR128 = &X86::VR128RegClass;
2187 2251
2188 // If we have AVX, create 1 blendv instead of 3 logic instructions. 2252 // If we have AVX, create 1 blendv instead of 3 logic instructions.
2189 // Blendv was introduced with SSE 4.1, but the 2 register form implicitly 2253 // Blendv was introduced with SSE 4.1, but the 2 register form implicitly
2190 // uses XMM0 as the selection register. That may need just as many 2254 // uses XMM0 as the selection register. That may need just as many
2191 // instructions as the AND/ANDN/OR sequence due to register moves, so 2255 // instructions as the AND/ANDN/OR sequence due to register moves, so
2192 // don't bother. 2256 // don't bother.
2193 unsigned CmpOpcode = 2257 unsigned CmpOpcode =
2194 (RetVT.SimpleTy == MVT::f32) ? X86::VCMPSSrr : X86::VCMPSDrr; 2258 (RetVT == MVT::f32) ? X86::VCMPSSrr : X86::VCMPSDrr;
2195 unsigned BlendOpcode = 2259 unsigned BlendOpcode =
2196 (RetVT.SimpleTy == MVT::f32) ? X86::VBLENDVPSrr : X86::VBLENDVPDrr; 2260 (RetVT == MVT::f32) ? X86::VBLENDVPSrr : X86::VBLENDVPDrr;
2197 2261
2198 unsigned CmpReg = fastEmitInst_rri(CmpOpcode, FR32, CmpLHSReg, CmpLHSIsKill, 2262 unsigned CmpReg = fastEmitInst_rri(CmpOpcode, RC, CmpLHSReg, CmpLHSIsKill,
2199 CmpRHSReg, CmpRHSIsKill, CC); 2263 CmpRHSReg, CmpRHSIsKill, CC);
2200 unsigned VBlendReg = fastEmitInst_rrr(BlendOpcode, VR128, RHSReg, RHSIsKill, 2264 unsigned VBlendReg = fastEmitInst_rrr(BlendOpcode, VR128, RHSReg, RHSIsKill,
2201 LHSReg, LHSIsKill, CmpReg, true); 2265 LHSReg, LHSIsKill, CmpReg, true);
2202 ResultReg = createResultReg(RC); 2266 ResultReg = createResultReg(RC);
2203 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 2267 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2204 TII.get(TargetOpcode::COPY), ResultReg).addReg(VBlendReg); 2268 TII.get(TargetOpcode::COPY), ResultReg).addReg(VBlendReg);
2205 } else { 2269 } else {
2270 const TargetRegisterClass *VR128 = &X86::VR128RegClass;
2206 unsigned CmpReg = fastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpLHSIsKill, 2271 unsigned CmpReg = fastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpLHSIsKill,
2207 CmpRHSReg, CmpRHSIsKill, CC); 2272 CmpRHSReg, CmpRHSIsKill, CC);
2208 unsigned AndReg = fastEmitInst_rr(Opc[1], RC, CmpReg, /*IsKill=*/false, 2273 unsigned AndReg = fastEmitInst_rr(Opc[1], VR128, CmpReg, /*IsKill=*/false,
2209 LHSReg, LHSIsKill); 2274 LHSReg, LHSIsKill);
2210 unsigned AndNReg = fastEmitInst_rr(Opc[2], RC, CmpReg, /*IsKill=*/true, 2275 unsigned AndNReg = fastEmitInst_rr(Opc[2], VR128, CmpReg, /*IsKill=*/true,
2211 RHSReg, RHSIsKill); 2276 RHSReg, RHSIsKill);
2212 ResultReg = fastEmitInst_rr(Opc[3], RC, AndNReg, /*IsKill=*/true, 2277 unsigned OrReg = fastEmitInst_rr(Opc[3], VR128, AndNReg, /*IsKill=*/true,
2213 AndReg, /*IsKill=*/true); 2278 AndReg, /*IsKill=*/true);
2279 ResultReg = createResultReg(RC);
2280 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2281 TII.get(TargetOpcode::COPY), ResultReg).addReg(OrReg);
2214 } 2282 }
2215 updateValueMap(I, ResultReg); 2283 updateValueMap(I, ResultReg);
2216 return true; 2284 return true;
2217 } 2285 }
2218 2286
2236 // same basic block (values defined in other basic blocks may not have 2304 // same basic block (values defined in other basic blocks may not have
2237 // initialized registers). 2305 // initialized registers).
2238 const auto *CI = dyn_cast<CmpInst>(Cond); 2306 const auto *CI = dyn_cast<CmpInst>(Cond);
2239 if (CI && (CI->getParent() == I->getParent())) { 2307 if (CI && (CI->getParent() == I->getParent())) {
2240 bool NeedSwap; 2308 bool NeedSwap;
2241 std::tie(CC, NeedSwap) = getX86ConditionCode(CI->getPredicate()); 2309 std::tie(CC, NeedSwap) = X86::getX86ConditionCode(CI->getPredicate());
2242 if (CC > X86::LAST_VALID_COND) 2310 if (CC > X86::LAST_VALID_COND)
2243 return false; 2311 return false;
2244 2312
2245 const Value *CmpLHS = CI->getOperand(0); 2313 const Value *CmpLHS = CI->getOperand(0);
2246 const Value *CmpRHS = CI->getOperand(1); 2314 const Value *CmpRHS = CI->getOperand(1);
2258 bool CondIsKill = hasTrivialKill(Cond); 2326 bool CondIsKill = hasTrivialKill(Cond);
2259 2327
2260 // In case OpReg is a K register, COPY to a GPR 2328 // In case OpReg is a K register, COPY to a GPR
2261 if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) { 2329 if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) {
2262 unsigned KCondReg = CondReg; 2330 unsigned KCondReg = CondReg;
2263 CondReg = createResultReg(&X86::GR8RegClass); 2331 CondReg = createResultReg(&X86::GR32RegClass);
2264 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 2332 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2265 TII.get(TargetOpcode::COPY), CondReg) 2333 TII.get(TargetOpcode::COPY), CondReg)
2266 .addReg(KCondReg, getKillRegState(CondIsKill)); 2334 .addReg(KCondReg, getKillRegState(CondIsKill));
2335 CondReg = fastEmitInst_extractsubreg(MVT::i8, CondReg, /*Kill=*/true,
2336 X86::sub_8bit);
2267 } 2337 }
2268 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri)) 2338 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
2269 .addReg(CondReg, getKillRegState(CondIsKill)) 2339 .addReg(CondReg, getKillRegState(CondIsKill))
2270 .addImm(1); 2340 .addImm(1);
2271 } 2341 }
2384 2454
2385 unsigned OpReg = getRegForValue(I->getOperand(0)); 2455 unsigned OpReg = getRegForValue(I->getOperand(0));
2386 if (OpReg == 0) 2456 if (OpReg == 0)
2387 return false; 2457 return false;
2388 2458
2459 unsigned ImplicitDefReg;
2460 if (Subtarget->hasAVX()) {
2461 ImplicitDefReg = createResultReg(RC);
2462 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2463 TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
2464
2465 }
2466
2389 unsigned ResultReg = createResultReg(RC); 2467 unsigned ResultReg = createResultReg(RC);
2390 MachineInstrBuilder MIB; 2468 MachineInstrBuilder MIB;
2391 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpc), 2469 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpc),
2392 ResultReg); 2470 ResultReg);
2471
2393 if (Subtarget->hasAVX()) 2472 if (Subtarget->hasAVX())
2394 MIB.addReg(OpReg); 2473 MIB.addReg(ImplicitDefReg);
2474
2395 MIB.addReg(OpReg); 2475 MIB.addReg(OpReg);
2396 updateValueMap(I, ResultReg); 2476 updateValueMap(I, ResultReg);
2397 return true; 2477 return true;
2398 } 2478 }
2399 2479
2438 // Truncate from i8 to i1; no code needed. 2518 // Truncate from i8 to i1; no code needed.
2439 updateValueMap(I, InputReg); 2519 updateValueMap(I, InputReg);
2440 return true; 2520 return true;
2441 } 2521 }
2442 2522
2443 bool KillInputReg = false;
2444 if (!Subtarget->is64Bit()) {
2445 // If we're on x86-32; we can't extract an i8 from a general register.
2446 // First issue a copy to GR16_ABCD or GR32_ABCD.
2447 const TargetRegisterClass *CopyRC =
2448 (SrcVT == MVT::i16) ? &X86::GR16_ABCDRegClass : &X86::GR32_ABCDRegClass;
2449 unsigned CopyReg = createResultReg(CopyRC);
2450 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2451 TII.get(TargetOpcode::COPY), CopyReg).addReg(InputReg);
2452 InputReg = CopyReg;
2453 KillInputReg = true;
2454 }
2455
2456 // Issue an extract_subreg. 2523 // Issue an extract_subreg.
2457 unsigned ResultReg = fastEmitInst_extractsubreg(MVT::i8, 2524 unsigned ResultReg = fastEmitInst_extractsubreg(MVT::i8,
2458 InputReg, KillInputReg, 2525 InputReg, false,
2459 X86::sub_8bit); 2526 X86::sub_8bit);
2460 if (!ResultReg) 2527 if (!ResultReg)
2461 return false; 2528 return false;
2462 2529
2463 updateValueMap(I, ResultReg); 2530 updateValueMap(I, ResultReg);
2975 return false; 3042 return false;
2976 3043
2977 if (!Subtarget->is64Bit()) 3044 if (!Subtarget->is64Bit())
2978 return false; 3045 return false;
2979 3046
3047 if (Subtarget->useSoftFloat())
3048 return false;
3049
2980 // Only handle simple cases. i.e. Up to 6 i32/i64 scalar arguments. 3050 // Only handle simple cases. i.e. Up to 6 i32/i64 scalar arguments.
2981 unsigned GPRCnt = 0; 3051 unsigned GPRCnt = 0;
2982 unsigned FPRCnt = 0; 3052 unsigned FPRCnt = 0;
2983 unsigned Idx = 0;
2984 for (auto const &Arg : F->args()) { 3053 for (auto const &Arg : F->args()) {
2985 // The first argument is at index 1. 3054 if (Arg.hasAttribute(Attribute::ByVal) ||
2986 ++Idx; 3055 Arg.hasAttribute(Attribute::InReg) ||
2987 if (F->getAttributes().hasAttribute(Idx, Attribute::ByVal) || 3056 Arg.hasAttribute(Attribute::StructRet) ||
2988 F->getAttributes().hasAttribute(Idx, Attribute::InReg) || 3057 Arg.hasAttribute(Attribute::SwiftSelf) ||
2989 F->getAttributes().hasAttribute(Idx, Attribute::StructRet) || 3058 Arg.hasAttribute(Attribute::SwiftError) ||
2990 F->getAttributes().hasAttribute(Idx, Attribute::SwiftSelf) || 3059 Arg.hasAttribute(Attribute::Nest))
2991 F->getAttributes().hasAttribute(Idx, Attribute::SwiftError) ||
2992 F->getAttributes().hasAttribute(Idx, Attribute::Nest))
2993 return false; 3060 return false;
2994 3061
2995 Type *ArgTy = Arg.getType(); 3062 Type *ArgTy = Arg.getType();
2996 if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy()) 3063 if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
2997 return false; 3064 return false;
3066 if (CC == CallingConv::Fast || CC == CallingConv::GHC || 3133 if (CC == CallingConv::Fast || CC == CallingConv::GHC ||
3067 CC == CallingConv::HiPE) 3134 CC == CallingConv::HiPE)
3068 return 0; 3135 return 0;
3069 3136
3070 if (CS) 3137 if (CS)
3071 if (CS->arg_empty() || !CS->paramHasAttr(1, Attribute::StructRet) || 3138 if (CS->arg_empty() || !CS->paramHasAttr(0, Attribute::StructRet) ||
3072 CS->paramHasAttr(1, Attribute::InReg) || Subtarget->isTargetMCU()) 3139 CS->paramHasAttr(0, Attribute::InReg) || Subtarget->isTargetMCU())
3073 return 0; 3140 return 0;
3074 3141
3075 return 4; 3142 return 4;
3076 } 3143 }
3077 3144
3088 MCSymbol *Symbol = CLI.Symbol; 3155 MCSymbol *Symbol = CLI.Symbol;
3089 3156
3090 bool Is64Bit = Subtarget->is64Bit(); 3157 bool Is64Bit = Subtarget->is64Bit();
3091 bool IsWin64 = Subtarget->isCallingConvWin64(CC); 3158 bool IsWin64 = Subtarget->isCallingConvWin64(CC);
3092 3159
3160 const CallInst *CI =
3161 CLI.CS ? dyn_cast<CallInst>(CLI.CS->getInstruction()) : nullptr;
3162 const Function *CalledFn = CI ? CI->getCalledFunction() : nullptr;
3163
3164 // Functions with no_caller_saved_registers that need special handling.
3165 if ((CI && CI->hasFnAttr("no_caller_saved_registers")) ||
3166 (CalledFn && CalledFn->hasFnAttribute("no_caller_saved_registers")))
3167 return false;
3168
3093 // Handle only C, fastcc, and webkit_js calling conventions for now. 3169 // Handle only C, fastcc, and webkit_js calling conventions for now.
3094 switch (CC) { 3170 switch (CC) {
3095 default: return false; 3171 default: return false;
3096 case CallingConv::C: 3172 case CallingConv::C:
3097 case CallingConv::Fast: 3173 case CallingConv::Fast:
3098 case CallingConv::WebKit_JS: 3174 case CallingConv::WebKit_JS:
3099 case CallingConv::Swift: 3175 case CallingConv::Swift:
3100 case CallingConv::X86_FastCall: 3176 case CallingConv::X86_FastCall:
3101 case CallingConv::X86_StdCall: 3177 case CallingConv::X86_StdCall:
3102 case CallingConv::X86_ThisCall: 3178 case CallingConv::X86_ThisCall:
3103 case CallingConv::X86_64_Win64: 3179 case CallingConv::Win64:
3104 case CallingConv::X86_64_SysV: 3180 case CallingConv::X86_64_SysV:
3105 break; 3181 break;
3106 } 3182 }
3107 3183
3108 // Allow SelectionDAG isel to handle tail calls. 3184 // Allow SelectionDAG isel to handle tail calls.
3191 unsigned NumBytes = CCInfo.getAlignedCallFrameSize(); 3267 unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
3192 3268
3193 // Issue CALLSEQ_START 3269 // Issue CALLSEQ_START
3194 unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); 3270 unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
3195 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown)) 3271 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
3196 .addImm(NumBytes).addImm(0); 3272 .addImm(NumBytes).addImm(0).addImm(0);
3197 3273
3198 // Walk the register/memloc assignments, inserting copies/loads. 3274 // Walk the register/memloc assignments, inserting copies/loads.
3199 const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); 3275 const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3200 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 3276 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3201 CCValAssign const &VA = ArgLocs[i]; 3277 CCValAssign const &VA = ArgLocs[i];
3212 case CCValAssign::Full: break; 3288 case CCValAssign::Full: break;
3213 case CCValAssign::SExt: { 3289 case CCValAssign::SExt: {
3214 assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() && 3290 assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
3215 "Unexpected extend"); 3291 "Unexpected extend");
3216 3292
3217 if (ArgVT.SimpleTy == MVT::i1) 3293 if (ArgVT == MVT::i1)
3218 return false; 3294 return false;
3219 3295
3220 bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), ArgReg, 3296 bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), ArgReg,
3221 ArgVT, ArgReg); 3297 ArgVT, ArgReg);
3222 assert(Emitted && "Failed to emit a sext!"); (void)Emitted; 3298 assert(Emitted && "Failed to emit a sext!"); (void)Emitted;
3226 case CCValAssign::ZExt: { 3302 case CCValAssign::ZExt: {
3227 assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() && 3303 assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
3228 "Unexpected extend"); 3304 "Unexpected extend");
3229 3305
3230 // Handle zero-extension from i1 to i8, which is common. 3306 // Handle zero-extension from i1 to i8, which is common.
3231 if (ArgVT.SimpleTy == MVT::i1) { 3307 if (ArgVT == MVT::i1) {
3232 // Set the high bits to zero. 3308 // Set the high bits to zero.
3233 ArgReg = fastEmitZExtFromI1(MVT::i8, ArgReg, /*TODO: Kill=*/false); 3309 ArgReg = fastEmitZExtFromI1(MVT::i8, ArgReg, /*TODO: Kill=*/false);
3234 ArgVT = MVT::i8; 3310 ArgVT = MVT::i8;
3235 3311
3236 if (ArgReg == 0) 3312 if (ArgReg == 0)
3372 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc)) 3448 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc))
3373 .addReg(CalleeOp); 3449 .addReg(CalleeOp);
3374 } else { 3450 } else {
3375 // Direct call. 3451 // Direct call.
3376 assert(GV && "Not a direct call"); 3452 assert(GV && "Not a direct call");
3377 unsigned CallOpc = Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32;
3378
3379 // See if we need any target-specific flags on the GV operand. 3453 // See if we need any target-specific flags on the GV operand.
3380 unsigned char OpFlags = Subtarget->classifyGlobalFunctionReference(GV); 3454 unsigned char OpFlags = Subtarget->classifyGlobalFunctionReference(GV);
3381 // Ignore NonLazyBind attribute in FastISel 3455 // Ignore NonLazyBind attribute in FastISel
3382 if (OpFlags == X86II::MO_GOTPCREL) 3456 if (OpFlags == X86II::MO_GOTPCREL)
3383 OpFlags = 0; 3457 OpFlags = 0;
3384 3458
3459 // This will be a direct call, or an indirect call through memory for
3460 // NonLazyBind calls or dllimport calls.
3461 bool NeedLoad = OpFlags == X86II::MO_DLLIMPORT;
3462 unsigned CallOpc = NeedLoad
3463 ? (Is64Bit ? X86::CALL64m : X86::CALL32m)
3464 : (Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32);
3465
3385 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc)); 3466 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc));
3467 if (NeedLoad)
3468 MIB.addReg(Is64Bit ? X86::RIP : 0).addImm(1).addReg(0);
3386 if (Symbol) 3469 if (Symbol)
3387 MIB.addSym(Symbol, OpFlags); 3470 MIB.addSym(Symbol, OpFlags);
3388 else 3471 else
3389 MIB.addGlobalAddress(GV, 0, OpFlags); 3472 MIB.addGlobalAddress(GV, 0, OpFlags);
3473 if (NeedLoad)
3474 MIB.addReg(0);
3390 } 3475 }
3391 3476
3392 // Add a register mask operand representing the call-preserved registers. 3477 // Add a register mask operand representing the call-preserved registers.
3393 // Proper defs for return values will be added by setPhysRegsDeadExcept(). 3478 // Proper defs for return values will be added by setPhysRegsDeadExcept().
3394 MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC)); 3479 MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3424 unsigned ResultReg = FuncInfo.CreateRegs(CLI.RetTy); 3509 unsigned ResultReg = FuncInfo.CreateRegs(CLI.RetTy);
3425 for (unsigned i = 0; i != RVLocs.size(); ++i) { 3510 for (unsigned i = 0; i != RVLocs.size(); ++i) {
3426 CCValAssign &VA = RVLocs[i]; 3511 CCValAssign &VA = RVLocs[i];
3427 EVT CopyVT = VA.getValVT(); 3512 EVT CopyVT = VA.getValVT();
3428 unsigned CopyReg = ResultReg + i; 3513 unsigned CopyReg = ResultReg + i;
3514 unsigned SrcReg = VA.getLocReg();
3429 3515
3430 // If this is x86-64, and we disabled SSE, we can't return FP values 3516 // If this is x86-64, and we disabled SSE, we can't return FP values
3431 if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) && 3517 if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
3432 ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) { 3518 ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) {
3433 report_fatal_error("SSE register return with SSE disabled"); 3519 report_fatal_error("SSE register return with SSE disabled");
3434 } 3520 }
3435 3521
3522 // If the return value is an i1 and AVX-512 is enabled, we need
3523 // to do a fixup to make the copy legal.
3524 if (CopyVT == MVT::i1 && SrcReg == X86::AL && Subtarget->hasAVX512()) {
3525 // Need to copy to a GR32 first.
3526 // TODO: MOVZX isn't great here. We don't care about the upper bits.
3527 SrcReg = createResultReg(&X86::GR32RegClass);
3528 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3529 TII.get(X86::MOVZX32rr8), SrcReg).addReg(X86::AL);
3530 }
3531
3436 // If we prefer to use the value in xmm registers, copy it out as f80 and 3532 // If we prefer to use the value in xmm registers, copy it out as f80 and
3437 // use a truncate to move it from fp stack reg to xmm reg. 3533 // use a truncate to move it from fp stack reg to xmm reg.
3438 if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) && 3534 if ((SrcReg == X86::FP0 || SrcReg == X86::FP1) &&
3439 isScalarFPTypeInSSEReg(VA.getValVT())) { 3535 isScalarFPTypeInSSEReg(VA.getValVT())) {
3440 CopyVT = MVT::f80; 3536 CopyVT = MVT::f80;
3441 CopyReg = createResultReg(&X86::RFP80RegClass); 3537 CopyReg = createResultReg(&X86::RFP80RegClass);
3442 } 3538 }
3443 3539
3444 // Copy out the result. 3540 // Copy out the result.
3445 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3541 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3446 TII.get(TargetOpcode::COPY), CopyReg).addReg(VA.getLocReg()); 3542 TII.get(TargetOpcode::COPY), CopyReg).addReg(SrcReg);
3447 InRegs.push_back(VA.getLocReg()); 3543 InRegs.push_back(VA.getLocReg());
3448 3544
3449 // Round the f80 to the right size, which also moves it to the appropriate 3545 // Round the f80 to the right size, which also moves it to the appropriate
3450 // xmm register. This is accomplished by storing the f80 value in memory 3546 // xmm register. This is accomplished by storing the f80 value in memory
3451 // and then loading it back. 3547 // and then loading it back.
3483 case Instruction::ICmp: 3579 case Instruction::ICmp:
3484 case Instruction::FCmp: 3580 case Instruction::FCmp:
3485 return X86SelectCmp(I); 3581 return X86SelectCmp(I);
3486 case Instruction::ZExt: 3582 case Instruction::ZExt:
3487 return X86SelectZExt(I); 3583 return X86SelectZExt(I);
3584 case Instruction::SExt:
3585 return X86SelectSExt(I);
3488 case Instruction::Br: 3586 case Instruction::Br:
3489 return X86SelectBranch(I); 3587 return X86SelectBranch(I);
3490 case Instruction::LShr: 3588 case Instruction::LShr:
3491 case Instruction::AShr: 3589 case Instruction::AShr:
3492 case Instruction::Shl: 3590 case Instruction::Shl:
3528 EVT DstVT = TLI.getValueType(DL, I->getType()); 3626 EVT DstVT = TLI.getValueType(DL, I->getType());
3529 3627
3530 if (!SrcVT.isSimple() || !DstVT.isSimple()) 3628 if (!SrcVT.isSimple() || !DstVT.isSimple())
3531 return false; 3629 return false;
3532 3630
3533 if (!SrcVT.is128BitVector() && 3631 MVT SVT = SrcVT.getSimpleVT();
3534 !(Subtarget->hasAVX() && SrcVT.is256BitVector())) 3632 MVT DVT = DstVT.getSimpleVT();
3633
3634 if (!SVT.is128BitVector() &&
3635 !(Subtarget->hasAVX() && SVT.is256BitVector()) &&
3636 !(Subtarget->hasAVX512() && SVT.is512BitVector() &&
3637 (Subtarget->hasBWI() || (SVT.getScalarSizeInBits() >= 32 &&
3638 DVT.getScalarSizeInBits() >= 32))))
3535 return false; 3639 return false;
3536 3640
3537 unsigned Reg = getRegForValue(I->getOperand(0)); 3641 unsigned Reg = getRegForValue(I->getOperand(0));
3538 if (Reg == 0) 3642 if (Reg == 0)
3539 return false; 3643 return false;
3577 } 3681 }
3578 3682
3579 unsigned Opc = 0; 3683 unsigned Opc = 0;
3580 switch (VT.SimpleTy) { 3684 switch (VT.SimpleTy) {
3581 default: llvm_unreachable("Unexpected value type"); 3685 default: llvm_unreachable("Unexpected value type");
3582 case MVT::i1: VT = MVT::i8; LLVM_FALLTHROUGH; 3686 case MVT::i1:
3687 // TODO: Support this properly.
3688 if (Subtarget->hasAVX512())
3689 return 0;
3690 VT = MVT::i8;
3691 LLVM_FALLTHROUGH;
3583 case MVT::i8: Opc = X86::MOV8ri; break; 3692 case MVT::i8: Opc = X86::MOV8ri; break;
3584 case MVT::i16: Opc = X86::MOV16ri; break; 3693 case MVT::i16: Opc = X86::MOV16ri; break;
3585 case MVT::i32: Opc = X86::MOV32ri; break; 3694 case MVT::i32: Opc = X86::MOV32ri; break;
3586 case MVT::i64: { 3695 case MVT::i64: {
3587 if (isUInt<32>(Imm)) 3696 if (isUInt<32>(Imm))
3847 Result->addMemOperand(*FuncInfo.MF, createMachineMemOperandFor(LI)); 3956 Result->addMemOperand(*FuncInfo.MF, createMachineMemOperandFor(LI));
3848 MI->eraseFromParent(); 3957 MI->eraseFromParent();
3849 return true; 3958 return true;
3850 } 3959 }
3851 3960
3961 unsigned X86FastISel::fastEmitInst_rrrr(unsigned MachineInstOpcode,
3962 const TargetRegisterClass *RC,
3963 unsigned Op0, bool Op0IsKill,
3964 unsigned Op1, bool Op1IsKill,
3965 unsigned Op2, bool Op2IsKill,
3966 unsigned Op3, bool Op3IsKill) {
3967 const MCInstrDesc &II = TII.get(MachineInstOpcode);
3968
3969 unsigned ResultReg = createResultReg(RC);
3970 Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
3971 Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1);
3972 Op2 = constrainOperandRegClass(II, Op2, II.getNumDefs() + 2);
3973 Op3 = constrainOperandRegClass(II, Op3, II.getNumDefs() + 3);
3974
3975 if (II.getNumDefs() >= 1)
3976 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
3977 .addReg(Op0, getKillRegState(Op0IsKill))
3978 .addReg(Op1, getKillRegState(Op1IsKill))
3979 .addReg(Op2, getKillRegState(Op2IsKill))
3980 .addReg(Op3, getKillRegState(Op3IsKill));
3981 else {
3982 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
3983 .addReg(Op0, getKillRegState(Op0IsKill))
3984 .addReg(Op1, getKillRegState(Op1IsKill))
3985 .addReg(Op2, getKillRegState(Op2IsKill))
3986 .addReg(Op3, getKillRegState(Op3IsKill));
3987 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3988 TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
3989 }
3990 return ResultReg;
3991 }
3992
3852 3993
3853 namespace llvm { 3994 namespace llvm {
3854 FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo, 3995 FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo,
3855 const TargetLibraryInfo *libInfo) { 3996 const TargetLibraryInfo *libInfo) {
3856 return new X86FastISel(funcInfo, libInfo); 3997 return new X86FastISel(funcInfo, libInfo);