# HG changeset patch # User mir3636 # Date 1512390052 -32400 # Node ID c4cc77a799c9715e4c434714eaeca6ea161f5699 # Parent 56c5119fbcd2771902273bd645aff33ebbb836fe fix diff -r 56c5119fbcd2 -r c4cc77a799c9 lib/Target/Hexagon/HexagonTargetObjectFile.cpp --- a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp Sun Dec 03 20:09:16 2017 +0900 +++ b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp Mon Dec 04 21:20:52 2017 +0900 @@ -323,6 +323,9 @@ case Type::MetadataTyID: case Type::X86_MMXTyID: case Type::TokenTyID: +#ifndef noCbC + case Type::__CodeTyID: +#endif return 0; } diff -r 56c5119fbcd2 -r c4cc77a799c9 lib/Target/XCore/XCoreISelLowering.cpp --- a/lib/Target/XCore/XCoreISelLowering.cpp Sun Dec 03 20:09:16 2017 +0900 +++ b/lib/Target/XCore/XCoreISelLowering.cpp Mon Dec 04 21:20:52 2017 +0900 @@ -34,6 +34,7 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/raw_ostream.h" #include @@ -110,8 +111,6 @@ setOperationAction(ISD::CTPOP, MVT::i32, Expand); setOperationAction(ISD::ROTL , MVT::i32, Expand); setOperationAction(ISD::ROTR , MVT::i32, Expand); - setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand); - setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand); setOperationAction(ISD::TRAP, MVT::Other, Legal); @@ -156,7 +155,6 @@ // Atomic operations // We request a fence for ATOMIC_* instructions, to reduce them to Monotonic. // As we are always Sequential Consistent, an ATOMIC_FENCE becomes a no OP. - setInsertFencesForAtomic(true); setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom); setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom); @@ -261,7 +259,7 @@ return DAG.getNode(XCoreISD::PCRelativeWrapper, dl, MVT::i32, GA); const auto *GVar = dyn_cast(GV); - if ((GV->hasSection() && StringRef(GV->getSection()).startswith(".cp.")) || + if ((GV->hasSection() && GV->getSection().startswith(".cp.")) || (GVar && GVar->isConstant() && GV->hasLocalLinkage())) return DAG.getNode(XCoreISD::CPRelativeWrapper, dl, MVT::i32, GA); @@ -309,8 +307,7 @@ Type::getInt8Ty(*DAG.getContext()), GA, Idx); SDValue CP = DAG.getConstantPool(GAI, MVT::i32); return DAG.getLoad(getPointerTy(DAG.getDataLayout()), DL, - DAG.getEntryNode(), CP, MachinePointerInfo(), false, - false, false, 0); + DAG.getEntryNode(), CP, MachinePointerInfo()); } } @@ -371,14 +368,12 @@ ScaledIndex); } -SDValue XCoreTargetLowering:: -lowerLoadWordFromAlignedBasePlusOffset(SDLoc DL, SDValue Chain, SDValue Base, - int64_t Offset, SelectionDAG &DAG) const -{ +SDValue XCoreTargetLowering::lowerLoadWordFromAlignedBasePlusOffset( + const SDLoc &DL, SDValue Chain, SDValue Base, int64_t Offset, + SelectionDAG &DAG) const { auto PtrVT = getPointerTy(DAG.getDataLayout()); if ((Offset & 0x3) == 0) { - return DAG.getLoad(PtrVT, DL, Chain, Base, MachinePointerInfo(), false, - false, false, 0); + return DAG.getLoad(PtrVT, DL, Chain, Base, MachinePointerInfo()); } // Lower to pair of consecutive word aligned loads plus some bit shifting. int32_t HighOffset = alignTo(Offset, 4); @@ -399,10 +394,8 @@ SDValue LowShift = DAG.getConstant((Offset - LowOffset) * 8, DL, MVT::i32); SDValue HighShift = DAG.getConstant((HighOffset - Offset) * 8, DL, MVT::i32); - SDValue Low = DAG.getLoad(PtrVT, DL, Chain, LowAddr, MachinePointerInfo(), - false, false, false, 0); - SDValue High = DAG.getLoad(PtrVT, DL, Chain, HighAddr, MachinePointerInfo(), - false, false, false, 0); + SDValue Low = DAG.getLoad(PtrVT, DL, Chain, LowAddr, MachinePointerInfo()); + SDValue High = DAG.getLoad(PtrVT, DL, Chain, HighAddr, MachinePointerInfo()); SDValue LowShifted = DAG.getNode(ISD::SRL, DL, MVT::i32, Low, LowShift); SDValue HighShifted = DAG.getNode(ISD::SHL, DL, MVT::i32, High, HighShift); SDValue Result = DAG.getNode(ISD::OR, DL, MVT::i32, LowShifted, HighShifted); @@ -414,9 +407,9 @@ static bool isWordAligned(SDValue Value, SelectionDAG &DAG) { - APInt KnownZero, KnownOne; - DAG.computeKnownBits(Value, KnownZero, KnownOne); - return KnownZero.countTrailingOnes() >= 2; + KnownBits Known; + DAG.computeKnownBits(Value, Known); + return Known.countMinTrailingZeros() >= 2; } SDValue XCoreTargetLowering:: @@ -462,17 +455,16 @@ } if (LD->getAlignment() == 2) { - SDValue Low = DAG.getExtLoad(ISD::ZEXTLOAD, DL, MVT::i32, Chain, - BasePtr, LD->getPointerInfo(), MVT::i16, - LD->isVolatile(), LD->isNonTemporal(), - LD->isInvariant(), 2); + SDValue Low = + DAG.getExtLoad(ISD::ZEXTLOAD, DL, MVT::i32, Chain, BasePtr, + LD->getPointerInfo(), MVT::i16, + /* Alignment = */ 2, LD->getMemOperand()->getFlags()); SDValue HighAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, DAG.getConstant(2, DL, MVT::i32)); - SDValue High = DAG.getExtLoad(ISD::EXTLOAD, DL, MVT::i32, Chain, - HighAddr, - LD->getPointerInfo().getWithOffset(2), - MVT::i16, LD->isVolatile(), - LD->isNonTemporal(), LD->isInvariant(), 2); + SDValue High = + DAG.getExtLoad(ISD::EXTLOAD, DL, MVT::i32, Chain, HighAddr, + LD->getPointerInfo().getWithOffset(2), MVT::i16, + /* Alignment = */ 2, LD->getMemOperand()->getFlags()); SDValue HighShifted = DAG.getNode(ISD::SHL, DL, MVT::i32, High, DAG.getConstant(16, DL, MVT::i32)); SDValue Result = DAG.getNode(ISD::OR, DL, MVT::i32, Low, HighShifted); @@ -492,11 +484,11 @@ Args.push_back(Entry); TargetLowering::CallLoweringInfo CLI(DAG); - CLI.setDebugLoc(DL).setChain(Chain).setCallee( + CLI.setDebugLoc(DL).setChain(Chain).setLibCallee( CallingConv::C, IntPtrTy, DAG.getExternalSymbol("__misaligned_load", getPointerTy(DAG.getDataLayout())), - std::move(Args), 0); + std::move(Args)); std::pair CallResult = LowerCallTo(CLI); SDValue Ops[] = { CallResult.first, CallResult.second }; @@ -529,16 +521,14 @@ SDValue Low = Value; SDValue High = DAG.getNode(ISD::SRL, dl, MVT::i32, Value, DAG.getConstant(16, dl, MVT::i32)); - SDValue StoreLow = DAG.getTruncStore(Chain, dl, Low, BasePtr, - ST->getPointerInfo(), MVT::i16, - ST->isVolatile(), ST->isNonTemporal(), - 2); + SDValue StoreLow = DAG.getTruncStore( + Chain, dl, Low, BasePtr, ST->getPointerInfo(), MVT::i16, + /* Alignment = */ 2, ST->getMemOperand()->getFlags()); SDValue HighAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, BasePtr, DAG.getConstant(2, dl, MVT::i32)); - SDValue StoreHigh = DAG.getTruncStore(Chain, dl, High, HighAddr, - ST->getPointerInfo().getWithOffset(2), - MVT::i16, ST->isVolatile(), - ST->isNonTemporal(), 2); + SDValue StoreHigh = DAG.getTruncStore( + Chain, dl, High, HighAddr, ST->getPointerInfo().getWithOffset(2), + MVT::i16, /* Alignment = */ 2, ST->getMemOperand()->getFlags()); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, StoreLow, StoreHigh); } @@ -559,7 +549,7 @@ CallingConv::C, Type::getVoidTy(*DAG.getContext()), DAG.getExternalSymbol("__misaligned_store", getPointerTy(DAG.getDataLayout())), - std::move(Args), 0); + std::move(Args)); std::pair CallResult = LowerCallTo(CLI); return CallResult.second; @@ -725,11 +715,9 @@ (N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) && "Unknown operand to lower!"); - if (N->getOpcode() == ISD::ADD) { - SDValue Result = TryExpandADDWithMul(N, DAG); - if (Result.getNode()) + if (N->getOpcode() == ISD::ADD) + if (SDValue Result = TryExpandADDWithMul(N, DAG)) return Result; - } SDLoc dl(N); @@ -774,19 +762,17 @@ EVT PtrVT = VAListPtr.getValueType(); const Value *SV = cast(Node->getOperand(2))->getValue(); SDLoc dl(Node); - SDValue VAList = DAG.getLoad(PtrVT, dl, InChain, - VAListPtr, MachinePointerInfo(SV), - false, false, false, 0); + SDValue VAList = + DAG.getLoad(PtrVT, dl, InChain, VAListPtr, MachinePointerInfo(SV)); // Increment the pointer, VAList, to the next vararg SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAList, DAG.getIntPtrConstant(VT.getSizeInBits() / 8, dl)); // Store the incremented VAList to the legalized pointer InChain = DAG.getStore(VAList.getValue(1), dl, nextPtr, VAListPtr, - MachinePointerInfo(SV), false, false, 0); + MachinePointerInfo(SV)); // Load the actual argument out of the pointer VAList - return DAG.getLoad(VT, dl, InChain, VAList, MachinePointerInfo(), - false, false, false, 0); + return DAG.getLoad(VT, dl, InChain, VAList, MachinePointerInfo()); } SDValue XCoreTargetLowering:: @@ -799,7 +785,7 @@ XCoreFunctionInfo *XFI = MF.getInfo(); SDValue Addr = DAG.getFrameIndex(XFI->getVarArgsFrameIndex(), MVT::i32); return DAG.getStore(Op.getOperand(0), dl, Addr, Op.getOperand(1), - MachinePointerInfo(), false, false, 0); + MachinePointerInfo()); } SDValue XCoreTargetLowering::LowerFRAMEADDR(SDValue Op, @@ -832,9 +818,9 @@ XCoreFunctionInfo *XFI = MF.getInfo(); int FI = XFI->createLRSpillSlot(MF); SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); - return DAG.getLoad( - getPointerTy(DAG.getDataLayout()), SDLoc(Op), DAG.getEntryNode(), FIN, - MachinePointerInfo::getFixedStack(MF, FI), false, false, false, 0); + return DAG.getLoad(getPointerTy(DAG.getDataLayout()), SDLoc(Op), + DAG.getEntryNode(), FIN, + MachinePointerInfo::getFixedStack(MF, FI)); } SDValue XCoreTargetLowering:: @@ -915,33 +901,31 @@ SDValue Addr = Trmp; SDLoc dl(Op); - OutChains[0] = DAG.getStore(Chain, dl, - DAG.getConstant(0x0a3cd805, dl, MVT::i32), Addr, - MachinePointerInfo(TrmpAddr), false, false, 0); + OutChains[0] = + DAG.getStore(Chain, dl, DAG.getConstant(0x0a3cd805, dl, MVT::i32), Addr, + MachinePointerInfo(TrmpAddr)); Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp, DAG.getConstant(4, dl, MVT::i32)); - OutChains[1] = DAG.getStore(Chain, dl, - DAG.getConstant(0xd80456c0, dl, MVT::i32), Addr, - MachinePointerInfo(TrmpAddr, 4), false, false, 0); + OutChains[1] = + DAG.getStore(Chain, dl, DAG.getConstant(0xd80456c0, dl, MVT::i32), Addr, + MachinePointerInfo(TrmpAddr, 4)); Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp, DAG.getConstant(8, dl, MVT::i32)); - OutChains[2] = DAG.getStore(Chain, dl, - DAG.getConstant(0x27fb0a3c, dl, MVT::i32), Addr, - MachinePointerInfo(TrmpAddr, 8), false, false, 0); + OutChains[2] = + DAG.getStore(Chain, dl, DAG.getConstant(0x27fb0a3c, dl, MVT::i32), Addr, + MachinePointerInfo(TrmpAddr, 8)); Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp, DAG.getConstant(12, dl, MVT::i32)); - OutChains[3] = DAG.getStore(Chain, dl, Nest, Addr, - MachinePointerInfo(TrmpAddr, 12), false, false, - 0); + OutChains[3] = + DAG.getStore(Chain, dl, Nest, Addr, MachinePointerInfo(TrmpAddr, 12)); Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp, DAG.getConstant(16, dl, MVT::i32)); - OutChains[4] = DAG.getStore(Chain, dl, FPtr, Addr, - MachinePointerInfo(TrmpAddr, 16), false, false, - 0); + OutChains[4] = + DAG.getStore(Chain, dl, FPtr, Addr, MachinePointerInfo(TrmpAddr, 16)); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); } @@ -973,29 +957,30 @@ LowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const { AtomicSDNode *N = cast(Op); assert(N->getOpcode() == ISD::ATOMIC_LOAD && "Bad Atomic OP"); - assert(N->getOrdering() <= Monotonic && - "setInsertFencesForAtomic(true) and yet greater than Monotonic"); + assert((N->getOrdering() == AtomicOrdering::Unordered || + N->getOrdering() == AtomicOrdering::Monotonic) && + "setInsertFencesForAtomic(true) expects unordered / monotonic"); if (N->getMemoryVT() == MVT::i32) { if (N->getAlignment() < 4) report_fatal_error("atomic load must be aligned"); return DAG.getLoad(getPointerTy(DAG.getDataLayout()), SDLoc(Op), N->getChain(), N->getBasePtr(), N->getPointerInfo(), - N->isVolatile(), N->isNonTemporal(), N->isInvariant(), - N->getAlignment(), N->getAAInfo(), N->getRanges()); + N->getAlignment(), N->getMemOperand()->getFlags(), + N->getAAInfo(), N->getRanges()); } if (N->getMemoryVT() == MVT::i16) { if (N->getAlignment() < 2) report_fatal_error("atomic load must be aligned"); return DAG.getExtLoad(ISD::EXTLOAD, SDLoc(Op), MVT::i32, N->getChain(), N->getBasePtr(), N->getPointerInfo(), MVT::i16, - N->isVolatile(), N->isNonTemporal(), - N->isInvariant(), N->getAlignment(), N->getAAInfo()); + N->getAlignment(), N->getMemOperand()->getFlags(), + N->getAAInfo()); } if (N->getMemoryVT() == MVT::i8) return DAG.getExtLoad(ISD::EXTLOAD, SDLoc(Op), MVT::i32, N->getChain(), N->getBasePtr(), N->getPointerInfo(), MVT::i8, - N->isVolatile(), N->isNonTemporal(), - N->isInvariant(), N->getAlignment(), N->getAAInfo()); + N->getAlignment(), N->getMemOperand()->getFlags(), + N->getAAInfo()); return SDValue(); } @@ -1003,29 +988,29 @@ LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const { AtomicSDNode *N = cast(Op); assert(N->getOpcode() == ISD::ATOMIC_STORE && "Bad Atomic OP"); - assert(N->getOrdering() <= Monotonic && - "setInsertFencesForAtomic(true) and yet greater than Monotonic"); + assert((N->getOrdering() == AtomicOrdering::Unordered || + N->getOrdering() == AtomicOrdering::Monotonic) && + "setInsertFencesForAtomic(true) expects unordered / monotonic"); if (N->getMemoryVT() == MVT::i32) { if (N->getAlignment() < 4) report_fatal_error("atomic store must be aligned"); - return DAG.getStore(N->getChain(), SDLoc(Op), N->getVal(), - N->getBasePtr(), N->getPointerInfo(), - N->isVolatile(), N->isNonTemporal(), - N->getAlignment(), N->getAAInfo()); + return DAG.getStore(N->getChain(), SDLoc(Op), N->getVal(), N->getBasePtr(), + N->getPointerInfo(), N->getAlignment(), + N->getMemOperand()->getFlags(), N->getAAInfo()); } if (N->getMemoryVT() == MVT::i16) { if (N->getAlignment() < 2) report_fatal_error("atomic store must be aligned"); return DAG.getTruncStore(N->getChain(), SDLoc(Op), N->getVal(), N->getBasePtr(), N->getPointerInfo(), MVT::i16, - N->isVolatile(), N->isNonTemporal(), - N->getAlignment(), N->getAAInfo()); + N->getAlignment(), N->getMemOperand()->getFlags(), + N->getAAInfo()); } if (N->getMemoryVT() == MVT::i8) return DAG.getTruncStore(N->getChain(), SDLoc(Op), N->getVal(), N->getBasePtr(), N->getPointerInfo(), MVT::i8, - N->isVolatile(), N->isNonTemporal(), - N->getAlignment(), N->getAAInfo()); + N->getAlignment(), N->getMemOperand()->getFlags(), + N->getAAInfo()); return SDValue(); } @@ -1061,7 +1046,7 @@ switch (CallConv) { default: - llvm_unreachable("Unsupported calling convention"); + report_fatal_error("Unsupported calling convention"); case CallingConv::Fast: case CallingConv::C: return LowerCCCCallTo(Chain, Callee, CallConv, isVarArg, isTailCall, @@ -1071,11 +1056,10 @@ /// LowerCallResult - Lower the result values of a call into the /// appropriate copies out of appropriate physical registers / memory locations. -static SDValue -LowerCallResult(SDValue Chain, SDValue InFlag, - const SmallVectorImpl &RVLocs, - SDLoc dl, SelectionDAG &DAG, - SmallVectorImpl &InVals) { +static SDValue LowerCallResult(SDValue Chain, SDValue InFlag, + const SmallVectorImpl &RVLocs, + const SDLoc &dl, SelectionDAG &DAG, + SmallVectorImpl &InVals) { SmallVector, 4> ResultMemLocs; // Copy results out of physical registers. for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { @@ -1118,15 +1102,12 @@ /// regs to (physical regs)/(stack frame), CALLSEQ_START and /// CALLSEQ_END are emitted. /// TODO: isTailCall, sret. -SDValue -XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, - CallingConv::ID CallConv, bool isVarArg, - bool isTailCall, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, - const SmallVectorImpl &Ins, - SDLoc dl, SelectionDAG &DAG, - SmallVectorImpl &InVals) const { +SDValue XCoreTargetLowering::LowerCCCCallTo( + SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, + bool isTailCall, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, + const SmallVectorImpl &Ins, const SDLoc &dl, + SelectionDAG &DAG, SmallVectorImpl &InVals) const { // Analyze operands of the call, assigning locations to each operand. SmallVector ArgLocs; @@ -1150,8 +1131,7 @@ unsigned NumBytes = RetCCInfo.getNextStackOffset(); auto PtrVT = getPointerTy(DAG.getDataLayout()); - Chain = DAG.getCALLSEQ_START(Chain, - DAG.getConstant(NumBytes, dl, PtrVT, true), dl); + Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl); SmallVector, 4> RegsToPass; SmallVector MemOpChains; @@ -1256,19 +1236,14 @@ } /// XCore formal arguments implementation -SDValue -XCoreTargetLowering::LowerFormalArguments(SDValue Chain, - CallingConv::ID CallConv, - bool isVarArg, - const SmallVectorImpl &Ins, - SDLoc dl, - SelectionDAG &DAG, - SmallVectorImpl &InVals) - const { +SDValue XCoreTargetLowering::LowerFormalArguments( + SDValue Chain, CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Ins, const SDLoc &dl, + SelectionDAG &DAG, SmallVectorImpl &InVals) const { switch (CallConv) { default: - llvm_unreachable("Unsupported calling convention"); + report_fatal_error("Unsupported calling convention"); case CallingConv::C: case CallingConv::Fast: return LowerCCCArguments(Chain, CallConv, isVarArg, @@ -1280,17 +1255,12 @@ /// virtual registers and generate load operations for /// arguments places on the stack. /// TODO: sret -SDValue -XCoreTargetLowering::LowerCCCArguments(SDValue Chain, - CallingConv::ID CallConv, - bool isVarArg, - const SmallVectorImpl - &Ins, - SDLoc dl, - SelectionDAG &DAG, - SmallVectorImpl &InVals) const { +SDValue XCoreTargetLowering::LowerCCCArguments( + SDValue Chain, CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Ins, const SDLoc &dl, + SelectionDAG &DAG, SmallVectorImpl &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); - MachineFrameInfo *MFI = MF.getFrameInfo(); + MachineFrameInfo &MFI = MF.getFrameInfo(); MachineRegisterInfo &RegInfo = MF.getRegInfo(); XCoreFunctionInfo *XFI = MF.getInfo(); @@ -1333,7 +1303,7 @@ { #ifndef NDEBUG errs() << "LowerFormalArguments Unhandled argument type: " - << RegVT.getSimpleVT().SimpleTy << "\n"; + << RegVT.getEVTString() << "\n"; #endif llvm_unreachable(nullptr); } @@ -1354,16 +1324,15 @@ << "\n"; } // Create the frame index object for this incoming parameter... - int FI = MFI->CreateFixedObject(ObjSize, - LRSaveSize + VA.getLocMemOffset(), - true); + int FI = MFI.CreateFixedObject(ObjSize, + LRSaveSize + VA.getLocMemOffset(), + true); // Create the SelectionDAG nodes corresponding to a load //from this parameter SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); ArgIn = DAG.getLoad(VA.getLocVT(), dl, Chain, FIN, - MachinePointerInfo::getFixedStack(MF, FI), false, - false, false, 0); + MachinePointerInfo::getFixedStack(MF, FI)); } const ArgDataPair ADP = { ArgIn, Ins[i].Flags }; ArgData.push_back(ADP); @@ -1383,7 +1352,7 @@ // address for (int i = array_lengthof(ArgRegs) - 1; i >= (int)FirstVAReg; --i) { // Create a stack slot - int FI = MFI->CreateFixedObject(4, offset, true); + int FI = MFI.CreateFixedObject(4, offset, true); if (i == (int)FirstVAReg) { XFI->setVarArgsFrameIndex(FI); } @@ -1395,15 +1364,15 @@ SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); CFRegNode.push_back(Val.getValue(Val->getNumValues() - 1)); // Move argument from virt reg -> stack - SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, - MachinePointerInfo(), false, false, 0); + SDValue Store = + DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo()); MemOps.push_back(Store); } } else { // This will point to the next argument passed via stack. XFI->setVarArgsFrameIndex( - MFI->CreateFixedObject(4, LRSaveSize + CCInfo.getNextStackOffset(), - true)); + MFI.CreateFixedObject(4, LRSaveSize + CCInfo.getNextStackOffset(), + true)); } } @@ -1422,7 +1391,7 @@ unsigned Size = ArgDI->Flags.getByValSize(); unsigned Align = std::max(StackSlotSize, ArgDI->Flags.getByValAlign()); // Create a new object on the stack and copy the pointee into it. - int FI = MFI->CreateStackObject(Size, Align, false); + int FI = MFI.CreateStackObject(Size, Align, false); SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); InVals.push_back(FIN); MemOps.push_back(DAG.getMemcpy(Chain, dl, FIN, ArgDI->SDV, @@ -1463,15 +1432,15 @@ } SDValue -XCoreTargetLowering::LowerReturn(SDValue Chain, - CallingConv::ID CallConv, bool isVarArg, +XCoreTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, + bool isVarArg, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, - SDLoc dl, SelectionDAG &DAG) const { + const SDLoc &dl, SelectionDAG &DAG) const { XCoreFunctionInfo *XFI = DAG.getMachineFunction().getInfo(); - MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); // CCValAssign - represent the assignment of // the return value to a location @@ -1507,15 +1476,14 @@ int Offset = VA.getLocMemOffset(); unsigned ObjSize = VA.getLocVT().getSizeInBits() / 8; // Create the frame index object for the memory location. - int FI = MFI->CreateFixedObject(ObjSize, Offset, false); + int FI = MFI.CreateFixedObject(ObjSize, Offset, false); // Create a SelectionDAG node corresponding to a store // to this memory location. SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); MemOpChains.push_back(DAG.getStore( Chain, dl, OutVals[i], FIN, - MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), false, - false, 0)); + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI))); } // Transform all store nodes into one single node because @@ -1551,11 +1519,11 @@ //===----------------------------------------------------------------------===// MachineBasicBlock * -XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, +XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const { const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); - DebugLoc dl = MI->getDebugLoc(); - assert((MI->getOpcode() == XCore::SELECT_CC) && + DebugLoc dl = MI.getDebugLoc(); + assert((MI.getOpcode() == XCore::SELECT_CC) && "Unexpected instr type to insert"); // To "insert" a SELECT_CC instruction, we actually have to insert the diamond @@ -1588,7 +1556,8 @@ BB->addSuccessor(sinkMBB); BuildMI(BB, dl, TII.get(XCore::BRFT_lru6)) - .addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB); + .addReg(MI.getOperand(1).getReg()) + .addMBB(sinkMBB); // copy0MBB: // %FalseValue = ... @@ -1602,12 +1571,13 @@ // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] // ... BB = sinkMBB; - BuildMI(*BB, BB->begin(), dl, - TII.get(XCore::PHI), MI->getOperand(0).getReg()) - .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB) - .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); + BuildMI(*BB, BB->begin(), dl, TII.get(XCore::PHI), MI.getOperand(0).getReg()) + .addReg(MI.getOperand(3).getReg()) + .addMBB(copy0MBB) + .addReg(MI.getOperand(2).getReg()) + .addMBB(thisMBB); - MI->eraseFromParent(); // The pseudo instruction is gone now. + MI.eraseFromParent(); // The pseudo instruction is gone now. return BB; } @@ -1631,13 +1601,12 @@ if (OutVal.hasOneUse()) { unsigned BitWidth = OutVal.getValueSizeInBits(); APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 8); - APInt KnownZero, KnownOne; + KnownBits Known; TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), !DCI.isBeforeLegalizeOps()); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (TLO.ShrinkDemandedConstant(OutVal, DemandedMask) || - TLI.SimplifyDemandedBits(OutVal, DemandedMask, KnownZero, KnownOne, - TLO)) + if (TLI.ShrinkDemandedConstant(OutVal, DemandedMask, TLO) || + TLI.SimplifyDemandedBits(OutVal, DemandedMask, Known, TLO)) DCI.CommitTargetLoweringOpt(TLO); } break; @@ -1648,13 +1617,12 @@ if (Time.hasOneUse()) { unsigned BitWidth = Time.getValueSizeInBits(); APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16); - APInt KnownZero, KnownOne; + KnownBits Known; TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), !DCI.isBeforeLegalizeOps()); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (TLO.ShrinkDemandedConstant(Time, DemandedMask) || - TLI.SimplifyDemandedBits(Time, DemandedMask, KnownZero, KnownOne, - TLO)) + if (TLI.ShrinkDemandedConstant(Time, DemandedMask, TLO) || + TLI.SimplifyDemandedBits(Time, DemandedMask, Known, TLO)) DCI.CommitTargetLoweringOpt(TLO); } break; @@ -1685,11 +1653,11 @@ // fold (ladd x, 0, y) -> 0, add x, y iff carry is unused and y has only the // low bit set if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 1)) { - APInt KnownZero, KnownOne; + KnownBits Known; APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), VT.getSizeInBits() - 1); - DAG.computeKnownBits(N2, KnownZero, KnownOne); - if ((KnownZero & Mask) == Mask) { + DAG.computeKnownBits(N2, Known); + if ((Known.Zero & Mask) == Mask) { SDValue Carry = DAG.getConstant(0, dl, VT); SDValue Result = DAG.getNode(ISD::ADD, dl, VT, N0, N2); SDValue Ops[] = { Result, Carry }; @@ -1708,11 +1676,11 @@ // fold (lsub 0, 0, x) -> x, -x iff x has only the low bit set if (N0C && N0C->isNullValue() && N1C && N1C->isNullValue()) { - APInt KnownZero, KnownOne; + KnownBits Known; APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), VT.getSizeInBits() - 1); - DAG.computeKnownBits(N2, KnownZero, KnownOne); - if ((KnownZero & Mask) == Mask) { + DAG.computeKnownBits(N2, Known); + if ((Known.Zero & Mask) == Mask) { SDValue Borrow = N2; SDValue Result = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT), N2); @@ -1724,11 +1692,11 @@ // fold (lsub x, 0, y) -> 0, sub x, y iff borrow is unused and y has only the // low bit set if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 1)) { - APInt KnownZero, KnownOne; + KnownBits Known; APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), VT.getSizeInBits() - 1); - DAG.computeKnownBits(N2, KnownZero, KnownOne); - if ((KnownZero & Mask) == Mask) { + DAG.computeKnownBits(N2, Known); + if ((Known.Zero & Mask) == Mask) { SDValue Borrow = DAG.getConstant(0, dl, VT); SDValue Result = DAG.getNode(ISD::SUB, dl, VT, N0, N2); SDValue Ops[] = { Result, Borrow }; @@ -1852,19 +1820,19 @@ } void XCoreTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, - APInt &KnownZero, - APInt &KnownOne, + KnownBits &Known, + const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const { - KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0); + Known.resetAll(); switch (Op.getOpcode()) { default: break; case XCoreISD::LADD: case XCoreISD::LSUB: if (Op.getResNo() == 1) { // Top bits of carry / borrow are clear. - KnownZero = APInt::getHighBitsSet(KnownZero.getBitWidth(), - KnownZero.getBitWidth() - 1); + Known.Zero = APInt::getHighBitsSet(Known.getBitWidth(), + Known.getBitWidth() - 1); } break; case ISD::INTRINSIC_W_CHAIN: @@ -1873,24 +1841,24 @@ switch (IntNo) { case Intrinsic::xcore_getts: // High bits are known to be zero. - KnownZero = APInt::getHighBitsSet(KnownZero.getBitWidth(), - KnownZero.getBitWidth() - 16); + Known.Zero = APInt::getHighBitsSet(Known.getBitWidth(), + Known.getBitWidth() - 16); break; case Intrinsic::xcore_int: case Intrinsic::xcore_inct: // High bits are known to be zero. - KnownZero = APInt::getHighBitsSet(KnownZero.getBitWidth(), - KnownZero.getBitWidth() - 8); + Known.Zero = APInt::getHighBitsSet(Known.getBitWidth(), + Known.getBitWidth() - 8); break; case Intrinsic::xcore_testct: // Result is either 0 or 1. - KnownZero = APInt::getHighBitsSet(KnownZero.getBitWidth(), - KnownZero.getBitWidth() - 1); + Known.Zero = APInt::getHighBitsSet(Known.getBitWidth(), + Known.getBitWidth() - 1); break; case Intrinsic::xcore_testwct: // Result is in the range 0 - 4. - KnownZero = APInt::getHighBitsSet(KnownZero.getBitWidth(), - KnownZero.getBitWidth() - 3); + Known.Zero = APInt::getHighBitsSet(Known.getBitWidth(), + Known.getBitWidth() - 3); break; } } @@ -1921,7 +1889,8 @@ /// by AM is legal for this target, for a load/store of the specified type. bool XCoreTargetLowering::isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, - unsigned AS) const { + unsigned AS, + Instruction *I) const { #ifndef noCbC if (Ty->getTypeID() == Type::VoidTyID || Ty->getTypeID() == Type::__CodeTyID) #else diff -r 56c5119fbcd2 -r c4cc77a799c9 lib/Transforms/IPO/PassManagerBuilder.cpp --- a/lib/Transforms/IPO/PassManagerBuilder.cpp Sun Dec 03 20:09:16 2017 +0900 +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp Mon Dec 04 21:20:52 2017 +0900 @@ -16,41 +16,46 @@ #include "llvm-c/Transforms/PassManagerBuilder.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/BasicAliasAnalysis.h" -#include "llvm/Analysis/CFLAliasAnalysis.h" +#include "llvm/Analysis/CFLAndersAliasAnalysis.h" +#include "llvm/Analysis/CFLSteensAliasAnalysis.h" #include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/ScopedNoAliasAA.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TypeBasedAliasAnalysis.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/FunctionInfo.h" #include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/ModuleSummaryIndex.h" #include "llvm/IR/Verifier.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/ForceFunctionAttrs.h" +#include "llvm/Transforms/IPO/FunctionAttrs.h" #include "llvm/Transforms/IPO/InferFunctionAttrs.h" +#include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar/GVN.h" +#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h" #include "llvm/Transforms/Vectorize.h" -#include "llvm/Transforms/Instrumentation.h" using namespace llvm; static cl::opt -RunLoopVectorization("vectorize-loops", cl::Hidden, - cl::desc("Run the Loop vectorization passes")); + RunPartialInlining("enable-partial-inlining", cl::init(false), cl::Hidden, + cl::ZeroOrMore, cl::desc("Run Partial inlinining pass")); + +static cl::opt + RunLoopVectorization("vectorize-loops", cl::Hidden, + cl::desc("Run the Loop vectorization passes")); static cl::opt RunSLPVectorization("vectorize-slp", cl::Hidden, cl::desc("Run the SLP vectorization passes")); static cl::opt -RunBBVectorization("vectorize-slp-aggressive", cl::Hidden, - cl::desc("Run the BB vectorization passes")); - -static cl::opt UseGVNAfterVectorization("use-gvn-after-vectorization", cl::init(false), cl::Hidden, cl::desc("Run GVN instead of Early CSE after vectorization passes")); @@ -59,21 +64,12 @@ "extra-vectorizer-passes", cl::init(false), cl::Hidden, cl::desc("Run cleanup optimization passes after vectorization.")); -static cl::opt UseNewSROA("use-new-sroa", - cl::init(true), cl::Hidden, - cl::desc("Enable the new, experimental SROA pass")); - static cl::opt RunLoopRerolling("reroll-loops", cl::Hidden, cl::desc("Run the loop rerolling pass")); -static cl::opt -RunFloat2Int("float-to-int", cl::Hidden, cl::init(true), - cl::desc("Run the float2int (float demotion) pass")); - -static cl::opt RunLoadCombine("combine-loads", cl::init(false), - cl::Hidden, - cl::desc("Run the load combining pass")); +static cl::opt RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden, + cl::desc("Run the NewGVN pass")); static cl::opt RunSLPAfterLoopVectorization("run-slp-after-loop-vectorization", @@ -81,61 +77,96 @@ cl::desc("Run the SLP vectorizer (and BB vectorizer) after the Loop " "vectorizer instead of before")); -static cl::opt UseCFLAA("use-cfl-aa", - cl::init(false), cl::Hidden, - cl::desc("Enable the new, experimental CFL alias analysis")); - -static cl::opt -EnableMLSM("mlsm", cl::init(true), cl::Hidden, - cl::desc("Enable motion of merged load and store")); +// Experimental option to use CFL-AA +enum class CFLAAType { None, Steensgaard, Andersen, Both }; +static cl::opt + UseCFLAA("use-cfl-aa", cl::init(CFLAAType::None), cl::Hidden, + cl::desc("Enable the new, experimental CFL alias analysis"), + cl::values(clEnumValN(CFLAAType::None, "none", "Disable CFL-AA"), + clEnumValN(CFLAAType::Steensgaard, "steens", + "Enable unification-based CFL-AA"), + clEnumValN(CFLAAType::Andersen, "anders", + "Enable inclusion-based CFL-AA"), + clEnumValN(CFLAAType::Both, "both", + "Enable both variants of CFL-AA"))); static cl::opt EnableLoopInterchange( "enable-loopinterchange", cl::init(false), cl::Hidden, cl::desc("Enable the new, experimental LoopInterchange Pass")); -static cl::opt EnableLoopDistribute( - "enable-loop-distribute", cl::init(false), cl::Hidden, - cl::desc("Enable the new, experimental LoopDistribution Pass")); - -static cl::opt EnableNonLTOGlobalsModRef( - "enable-non-lto-gmr", cl::init(true), cl::Hidden, - cl::desc( - "Enable the GlobalsModRef AliasAnalysis outside of the LTO pipeline.")); +static cl::opt + EnablePrepareForThinLTO("prepare-for-thinlto", cl::init(false), cl::Hidden, + cl::desc("Enable preparation for ThinLTO.")); -static cl::opt EnableLoopLoadElim( - "enable-loop-load-elim", cl::init(false), cl::Hidden, - cl::desc("Enable the new, experimental LoopLoadElimination Pass")); +static cl::opt RunPGOInstrGen( + "profile-generate", cl::init(false), cl::Hidden, + cl::desc("Enable PGO instrumentation.")); -static cl::opt RunPGOInstrGen( - "profile-generate", cl::init(""), cl::Hidden, - cl::desc("Enable generation phase of PGO instrumentation and specify the " - "path of profile data file")); +static cl::opt + PGOOutputFile("profile-generate-file", cl::init(""), cl::Hidden, + cl::desc("Specify the path of profile data file.")); static cl::opt RunPGOInstrUse( "profile-use", cl::init(""), cl::Hidden, cl::value_desc("filename"), cl::desc("Enable use phase of PGO instrumentation and specify the path " "of profile data file")); +static cl::opt UseLoopVersioningLICM( + "enable-loop-versioning-licm", cl::init(false), cl::Hidden, + cl::desc("Enable the experimental Loop Versioning LICM pass")); + +static cl::opt + DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden, + cl::desc("Disable pre-instrumentation inliner")); + +static cl::opt PreInlineThreshold( + "preinline-threshold", cl::Hidden, cl::init(75), cl::ZeroOrMore, + cl::desc("Control the amount of inlining in pre-instrumentation inliner " + "(default = 75)")); + +static cl::opt EnableEarlyCSEMemSSA( + "enable-earlycse-memssa", cl::init(true), cl::Hidden, + cl::desc("Enable the EarlyCSE w/ MemorySSA pass (default = on)")); + +static cl::opt EnableGVNHoist( + "enable-gvn-hoist", cl::init(false), cl::Hidden, + cl::desc("Enable the GVN hoisting pass (default = off)")); + +static cl::opt + DisableLibCallsShrinkWrap("disable-libcalls-shrinkwrap", cl::init(false), + cl::Hidden, + cl::desc("Disable shrink-wrap library calls")); + +static cl::opt + EnableSimpleLoopUnswitch("enable-simple-loop-unswitch", cl::init(false), + cl::Hidden, + cl::desc("Enable the simple loop unswitch pass.")); + +static cl::opt EnableGVNSink( + "enable-gvn-sink", cl::init(false), cl::Hidden, + cl::desc("Enable the GVN sinking pass (default = off)")); + PassManagerBuilder::PassManagerBuilder() { OptLevel = 2; SizeLevel = 0; LibraryInfo = nullptr; Inliner = nullptr; - FunctionIndex = nullptr; - DisableUnitAtATime = false; DisableUnrollLoops = false; - BBVectorize = RunBBVectorization; SLPVectorize = RunSLPVectorization; LoopVectorize = RunLoopVectorization; RerollLoops = RunLoopRerolling; - LoadCombine = RunLoadCombine; + NewGVN = RunNewGVN; DisableGVNLoadPRE = false; VerifyInput = false; VerifyOutput = false; MergeFunctions = false; PrepareForLTO = false; - PGOInstrGen = RunPGOInstrGen; + EnablePGOInstrGen = RunPGOInstrGen; + PGOInstrGen = PGOOutputFile; PGOInstrUse = RunPGOInstrUse; + PrepareForThinLTO = EnablePrepareForThinLTO; + PerformThinLTO = false; + DivergentTarget = false; } PassManagerBuilder::~PassManagerBuilder() { @@ -147,21 +178,31 @@ static ManagedStatic, 8> > GlobalExtensions; +/// Check if GlobalExtensions is constructed and not empty. +/// Since GlobalExtensions is a managed static, calling 'empty()' will trigger +/// the construction of the object. +static bool GlobalExtensionsNotEmpty() { + return GlobalExtensions.isConstructed() && !GlobalExtensions->empty(); +} + void PassManagerBuilder::addGlobalExtension( PassManagerBuilder::ExtensionPointTy Ty, PassManagerBuilder::ExtensionFn Fn) { - GlobalExtensions->push_back(std::make_pair(Ty, Fn)); + GlobalExtensions->push_back(std::make_pair(Ty, std::move(Fn))); } void PassManagerBuilder::addExtension(ExtensionPointTy Ty, ExtensionFn Fn) { - Extensions.push_back(std::make_pair(Ty, Fn)); + Extensions.push_back(std::make_pair(Ty, std::move(Fn))); } void PassManagerBuilder::addExtensionsToPM(ExtensionPointTy ETy, legacy::PassManagerBase &PM) const { - for (unsigned i = 0, e = GlobalExtensions->size(); i != e; ++i) - if ((*GlobalExtensions)[i].first == ETy) - (*GlobalExtensions)[i].second(*this, PM); + if (GlobalExtensionsNotEmpty()) { + for (auto &Ext : *GlobalExtensions) { + if (Ext.first == ETy) + Ext.second(*this, PM); + } + } for (unsigned i = 0, e = Extensions.size(); i != e; ++i) if (Extensions[i].first == ETy) Extensions[i].second(*this, PM); @@ -169,15 +210,34 @@ void PassManagerBuilder::addInitialAliasAnalysisPasses( legacy::PassManagerBase &PM) const { + switch (UseCFLAA) { + case CFLAAType::Steensgaard: + PM.add(createCFLSteensAAWrapperPass()); + break; + case CFLAAType::Andersen: + PM.add(createCFLAndersAAWrapperPass()); + break; + case CFLAAType::Both: + PM.add(createCFLSteensAAWrapperPass()); + PM.add(createCFLAndersAAWrapperPass()); + break; + default: + break; + } + // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that // BasicAliasAnalysis wins if they disagree. This is intended to help // support "obvious" type-punning idioms. - if (UseCFLAA) - PM.add(createCFLAAWrapperPass()); PM.add(createTypeBasedAAWrapperPass()); PM.add(createScopedNoAliasAAWrapperPass()); } +void PassManagerBuilder::addInstructionCombiningPass( + legacy::PassManagerBase &PM) const { + bool ExpensiveCombines = OptLevel > 2; + PM.add(createInstructionCombiningPass(ExpensiveCombines)); +} + void PassManagerBuilder::populateFunctionPassManager( legacy::FunctionPassManager &FPM) { addExtensionsToPM(EP_EarlyAsPossible, FPM); @@ -188,10 +248,7 @@ #ifndef noCbC if (OptLevel == 0) { - if (UseNewSROA) - FPM.add(createSROAPass(true)); - else - FPM.add(createScalarReplAggregatesPass()); + FPM.add(createSROAPass(true)); } #else if (OptLevel == 0) return; @@ -201,15 +258,9 @@ FPM.add(createCFGSimplificationPass()); #ifndef noCbC - if (UseNewSROA) - FPM.add(createSROAPass(false)); - else - FPM.add(createScalarReplAggregatesPass()); + FPM.add(createSROAPass(false)); #else - if (UseNewSROA) - FPM.add(createSROAPass()); - else - FPM.add(createScalarReplAggregatesPass()); + FPM.add(createSROAPass()); #endif FPM.add(createEarlyCSEPass()); FPM.add(createLowerExpectIntrinsicPass()); @@ -217,19 +268,152 @@ // Do PGO instrumentation generation or use pass as the option specified. void PassManagerBuilder::addPGOInstrPasses(legacy::PassManagerBase &MPM) { - if (!PGOInstrGen.empty()) { - MPM.add(createPGOInstrumentationGenPass()); + if (!EnablePGOInstrGen && PGOInstrUse.empty() && PGOSampleUse.empty()) + return; + // Perform the preinline and cleanup passes for O1 and above. + // And avoid doing them if optimizing for size. + if (OptLevel > 0 && SizeLevel == 0 && !DisablePreInliner && + PGOSampleUse.empty()) { + // Create preinline pass. We construct an InlineParams object and specify + // the threshold here to avoid the command line options of the regular + // inliner to influence pre-inlining. The only fields of InlineParams we + // care about are DefaultThreshold and HintThreshold. + InlineParams IP; + IP.DefaultThreshold = PreInlineThreshold; + // FIXME: The hint threshold has the same value used by the regular inliner. + // This should probably be lowered after performance testing. + IP.HintThreshold = 325; + + MPM.add(createFunctionInliningPass(IP)); + MPM.add(createSROAPass()); + MPM.add(createEarlyCSEPass()); // Catch trivial redundancies + MPM.add(createCFGSimplificationPass()); // Merge & remove BBs + MPM.add(createInstructionCombiningPass()); // Combine silly seq's + addExtensionsToPM(EP_Peephole, MPM); + } + if (EnablePGOInstrGen) { + MPM.add(createPGOInstrumentationGenLegacyPass()); // Add the profile lowering pass. InstrProfOptions Options; - Options.InstrProfileOutput = PGOInstrGen; - MPM.add(createInstrProfilingPass(Options)); + if (!PGOInstrGen.empty()) + Options.InstrProfileOutput = PGOInstrGen; + Options.DoCounterPromotion = true; + MPM.add(createLoopRotatePass()); + MPM.add(createInstrProfilingLegacyPass(Options)); } if (!PGOInstrUse.empty()) - MPM.add(createPGOInstrumentationUsePass(PGOInstrUse)); + MPM.add(createPGOInstrumentationUseLegacyPass(PGOInstrUse)); + // Indirect call promotion that promotes intra-module targets only. + // For ThinLTO this is done earlier due to interactions with globalopt + // for imported functions. We don't run this at -O0. + if (OptLevel > 0) + MPM.add( + createPGOIndirectCallPromotionLegacyPass(false, !PGOSampleUse.empty())); +} +void PassManagerBuilder::addFunctionSimplificationPasses( + legacy::PassManagerBase &MPM) { + // Start of function pass. + // Break up aggregate allocas, using SSAUpdater. +#ifndef noCbC + MPM.add(createSROAPass(false)); +#else + MPM.add(createSROAPass()); +#endif + MPM.add(createEarlyCSEPass(EnableEarlyCSEMemSSA)); // Catch trivial redundancies + if (EnableGVNHoist) + MPM.add(createGVNHoistPass()); + if (EnableGVNSink) { + MPM.add(createGVNSinkPass()); + MPM.add(createCFGSimplificationPass()); + } + + // Speculative execution if the target has divergent branches; otherwise nop. + MPM.add(createSpeculativeExecutionIfHasBranchDivergencePass()); + MPM.add(createJumpThreadingPass()); // Thread jumps. + MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals + MPM.add(createCFGSimplificationPass()); // Merge & remove BBs + // Combine silly seq's + addInstructionCombiningPass(MPM); + if (SizeLevel == 0 && !DisableLibCallsShrinkWrap) + MPM.add(createLibCallsShrinkWrapPass()); + addExtensionsToPM(EP_Peephole, MPM); + + // Optimize memory intrinsic calls based on the profiled size information. + if (SizeLevel == 0) + MPM.add(createPGOMemOPSizeOptLegacyPass()); + +#ifndef noCbC + MPM.add(createTailCallEliminationPass(false)); // Eliminate tail calls +#else + MPM.add(createTailCallEliminationPass()); // Eliminate tail calls +#endif + MPM.add(createCFGSimplificationPass()); // Merge & remove BBs + MPM.add(createReassociatePass()); // Reassociate expressions + // Rotate Loop - disable header duplication at -Oz + MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1)); + MPM.add(createLICMPass()); // Hoist loop invariants + if (EnableSimpleLoopUnswitch) + MPM.add(createSimpleLoopUnswitchLegacyPass()); + else + MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget)); + MPM.add(createCFGSimplificationPass()); + addInstructionCombiningPass(MPM); + MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars + MPM.add(createLoopIdiomPass()); // Recognize idioms like memset. + addExtensionsToPM(EP_LateLoopOptimizations, MPM); + MPM.add(createLoopDeletionPass()); // Delete dead loops + + if (EnableLoopInterchange) { + MPM.add(createLoopInterchangePass()); // Interchange loops + MPM.add(createCFGSimplificationPass()); + } + if (!DisableUnrollLoops) + MPM.add(createSimpleLoopUnrollPass(OptLevel)); // Unroll small loops + addExtensionsToPM(EP_LoopOptimizerEnd, MPM); + + if (OptLevel > 1) { + MPM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds + MPM.add(NewGVN ? createNewGVNPass() + : createGVNPass(DisableGVNLoadPRE)); // Remove redundancies + } + MPM.add(createMemCpyOptPass()); // Remove memcpy / form memset + MPM.add(createSCCPPass()); // Constant prop with SCCP + + // Delete dead bit computations (instcombine runs after to fold away the dead + // computations, and then ADCE will run later to exploit any new DCE + // opportunities that creates). + MPM.add(createBitTrackingDCEPass()); // Delete dead bit computations + + // Run instcombine after redundancy elimination to exploit opportunities + // opened up by them. + addInstructionCombiningPass(MPM); + addExtensionsToPM(EP_Peephole, MPM); + MPM.add(createJumpThreadingPass()); // Thread jumps + MPM.add(createCorrelatedValuePropagationPass()); + MPM.add(createDeadStoreEliminationPass()); // Delete dead stores + MPM.add(createLICMPass()); + + addExtensionsToPM(EP_ScalarOptimizerLate, MPM); + + if (RerollLoops) + MPM.add(createLoopRerollPass()); + if (!RunSLPAfterLoopVectorization && SLPVectorize) + MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains. + + MPM.add(createAggressiveDCEPass()); // Delete dead instructions + MPM.add(createCFGSimplificationPass()); // Merge & remove BBs + // Clean up after everything. + addInstructionCombiningPass(MPM); + addExtensionsToPM(EP_Peephole, MPM); } void PassManagerBuilder::populateModulePassManager( legacy::PassManagerBase &MPM) { + if (!PGOSampleUse.empty()) { + MPM.add(createPruneEHPass()); + MPM.add(createSampleProfileLoaderPass(PGOSampleUse)); + } + // Allow forcing function attributes as a debugging and tuning aid. MPM.add(createForceFunctionAttrsLegacyPass()); @@ -246,10 +430,10 @@ // creates a CGSCC pass manager, but we don't want to add extensions into // that pass manager. To prevent this we insert a no-op module pass to reset // the pass manager to get the same behavior as EP_OptimizerLast in non-O0 - // builds. The function merging pass is + // builds. The function merging pass is if (MergeFunctions) MPM.add(createMergeFunctionsPass()); - else if (!GlobalExtensions->empty() || !Extensions.empty()) + else if (GlobalExtensionsNotEmpty() || !Extensions.empty()) MPM.add(createBarrierNoopPass()); #ifndef noCbC @@ -257,6 +441,13 @@ #endif addExtensionsToPM(EP_EnabledOnOptLevel0, MPM); + + // Rename anon globals to be able to export them in the summary. + // This has to be done after we add the extensions to the pass manager + // as there could be passes (e.g. Adddress sanitizer) which introduce + // new unnamed globals. + if (PrepareForThinLTO) + MPM.add(createNameAnonGlobalPass()); return; } @@ -266,150 +457,80 @@ addInitialAliasAnalysisPasses(MPM); - if (!DisableUnitAtATime) { - // Infer attributes about declarations if possible. - MPM.add(createInferFunctionAttrsLegacyPass()); - - addExtensionsToPM(EP_ModuleOptimizerEarly, MPM); + // For ThinLTO there are two passes of indirect call promotion. The + // first is during the compile phase when PerformThinLTO=false and + // intra-module indirect call targets are promoted. The second is during + // the ThinLTO backend when PerformThinLTO=true, when we promote imported + // inter-module indirect calls. For that we perform indirect call promotion + // earlier in the pass pipeline, here before globalopt. Otherwise imported + // available_externally functions look unreferenced and are removed. + if (PerformThinLTO) + MPM.add(createPGOIndirectCallPromotionLegacyPass(/*InLTO = */ true, + !PGOSampleUse.empty())); - MPM.add(createIPSCCPPass()); // IP SCCP - MPM.add(createGlobalOptimizerPass()); // Optimize out global vars - // Promote any localized global vars - MPM.add(createPromoteMemoryToRegisterPass()); + // For SamplePGO in ThinLTO compile phase, we do not want to unroll loops + // as it will change the CFG too much to make the 2nd profile annotation + // in backend more difficult. + bool PrepareForThinLTOUsingPGOSampleProfile = + PrepareForThinLTO && !PGOSampleUse.empty(); + if (PrepareForThinLTOUsingPGOSampleProfile) + DisableUnrollLoops = true; - MPM.add(createDeadArgEliminationPass()); // Dead argument elimination + // Infer attributes about declarations if possible. + MPM.add(createInferFunctionAttrsLegacyPass()); + + addExtensionsToPM(EP_ModuleOptimizerEarly, MPM); - MPM.add(createInstructionCombiningPass());// Clean up after IPCP & DAE - addExtensionsToPM(EP_Peephole, MPM); - MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE - } + MPM.add(createIPSCCPPass()); // IP SCCP + MPM.add(createCalledValuePropagationPass()); + MPM.add(createGlobalOptimizerPass()); // Optimize out global vars + // Promote any localized global vars. + MPM.add(createPromoteMemoryToRegisterPass()); + + MPM.add(createDeadArgEliminationPass()); // Dead argument elimination + + addInstructionCombiningPass(MPM); // Clean up after IPCP & DAE + addExtensionsToPM(EP_Peephole, MPM); + MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE - addPGOInstrPasses(MPM); + // For SamplePGO in ThinLTO compile phase, we do not want to do indirect + // call promotion as it will change the CFG too much to make the 2nd + // profile annotation in backend more difficult. + // PGO instrumentation is added during the compile phase for ThinLTO, do + // not run it a second time + if (!PerformThinLTO && !PrepareForThinLTOUsingPGOSampleProfile) + addPGOInstrPasses(MPM); - if (EnableNonLTOGlobalsModRef) - // We add a module alias analysis pass here. In part due to bugs in the - // analysis infrastructure this "works" in that the analysis stays alive - // for the entire SCC pass run below. - MPM.add(createGlobalsAAWrapperPass()); + // We add a module alias analysis pass here. In part due to bugs in the + // analysis infrastructure this "works" in that the analysis stays alive + // for the entire SCC pass run below. + MPM.add(createGlobalsAAWrapperPass()); // Start of CallGraph SCC passes. - if (!DisableUnitAtATime) - MPM.add(createPruneEHPass()); // Remove dead EH info + MPM.add(createPruneEHPass()); // Remove dead EH info + bool RunInliner = false; if (Inliner) { MPM.add(Inliner); Inliner = nullptr; - } - if (!DisableUnitAtATime) - MPM.add(createPostOrderFunctionAttrsPass()); - if (OptLevel > 2) - MPM.add(createArgumentPromotionPass()); // Scalarize uninlined fn args - - // Start of function pass. - // Break up aggregate allocas, using SSAUpdater. -#ifndef noCbC - if (UseNewSROA) - MPM.add(createSROAPass(false)); - else - MPM.add(createScalarReplAggregatesPass(-1, false)); -#else - if (UseNewSROA) - MPM.add(createSROAPass()); - else - MPM.add(createScalarReplAggregatesPass(-1, false)); -#endif - MPM.add(createEarlyCSEPass()); // Catch trivial redundancies - MPM.add(createJumpThreadingPass()); // Thread jumps. - MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals - MPM.add(createCFGSimplificationPass()); // Merge & remove BBs - MPM.add(createInstructionCombiningPass()); // Combine silly seq's - addExtensionsToPM(EP_Peephole, MPM); - -#ifndef noCbC - MPM.add(createTailCallEliminationPass(false)); // Eliminate tail calls -#else - MPM.add(createTailCallEliminationPass()); // Eliminate tail calls -#endif - MPM.add(createCFGSimplificationPass()); // Merge & remove BBs - MPM.add(createReassociatePass()); // Reassociate expressions - // Rotate Loop - disable header duplication at -Oz - MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1)); - MPM.add(createLICMPass()); // Hoist loop invariants - MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3)); - MPM.add(createCFGSimplificationPass()); - MPM.add(createInstructionCombiningPass()); - MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars - MPM.add(createLoopIdiomPass()); // Recognize idioms like memset. - MPM.add(createLoopDeletionPass()); // Delete dead loops - if (EnableLoopInterchange) { - MPM.add(createLoopInterchangePass()); // Interchange loops - MPM.add(createCFGSimplificationPass()); - } - if (!DisableUnrollLoops) - MPM.add(createSimpleLoopUnrollPass()); // Unroll small loops - addExtensionsToPM(EP_LoopOptimizerEnd, MPM); - - if (OptLevel > 1) { - if (EnableMLSM) - MPM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds - MPM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies - } - MPM.add(createMemCpyOptPass()); // Remove memcpy / form memset - MPM.add(createSCCPPass()); // Constant prop with SCCP - - // Delete dead bit computations (instcombine runs after to fold away the dead - // computations, and then ADCE will run later to exploit any new DCE - // opportunities that creates). - MPM.add(createBitTrackingDCEPass()); // Delete dead bit computations - - // Run instcombine after redundancy elimination to exploit opportunities - // opened up by them. - MPM.add(createInstructionCombiningPass()); - addExtensionsToPM(EP_Peephole, MPM); - MPM.add(createJumpThreadingPass()); // Thread jumps - MPM.add(createCorrelatedValuePropagationPass()); - MPM.add(createDeadStoreEliminationPass()); // Delete dead stores - MPM.add(createLICMPass()); - - addExtensionsToPM(EP_ScalarOptimizerLate, MPM); - - if (RerollLoops) - MPM.add(createLoopRerollPass()); - if (!RunSLPAfterLoopVectorization) { - if (SLPVectorize) - MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains. - - if (BBVectorize) { - MPM.add(createBBVectorizePass()); - MPM.add(createInstructionCombiningPass()); - addExtensionsToPM(EP_Peephole, MPM); - if (OptLevel > 1 && UseGVNAfterVectorization) - MPM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies - else - MPM.add(createEarlyCSEPass()); // Catch trivial redundancies - - // BBVectorize may have significantly shortened a loop body; unroll again. - if (!DisableUnrollLoops) - MPM.add(createLoopUnrollPass()); - } + RunInliner = true; } - if (LoadCombine) - MPM.add(createLoadCombinePass()); + MPM.add(createPostOrderFunctionAttrsLegacyPass()); + if (OptLevel > 2) + MPM.add(createArgumentPromotionPass()); // Scalarize uninlined fn args - MPM.add(createAggressiveDCEPass()); // Delete dead instructions - MPM.add(createCFGSimplificationPass()); // Merge & remove BBs - MPM.add(createInstructionCombiningPass()); // Clean up after everything. - addExtensionsToPM(EP_Peephole, MPM); + addExtensionsToPM(EP_CGSCCOptimizerLate, MPM); + addFunctionSimplificationPasses(MPM); // FIXME: This is a HACK! The inliner pass above implicitly creates a CGSCC // pass manager that we are specifically trying to avoid. To prevent this // we must insert a no-op module pass to reset the pass manager. MPM.add(createBarrierNoopPass()); - if (!DisableUnitAtATime) - MPM.add(createReversePostOrderFunctionAttrsPass()); + if (RunPartialInlining) + MPM.add(createPartialInliningPass()); - if (!DisableUnitAtATime && OptLevel > 1 && !PrepareForLTO) { + if (OptLevel > 1 && !PrepareForLTO && !PrepareForThinLTO)