Mercurial > hg > Members > tobaru > cbc > CbC_llvm
changeset 125:56c5119fbcd2
fix
author | mir3636 |
---|---|
date | Sun, 03 Dec 2017 20:09:16 +0900 |
parents | 4fa72497ed5d |
children | c4cc77a799c9 |
files | lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp lib/Transforms/IPO/MergeFunctions.cpp lib/Transforms/Scalar/SROA.cpp |
diffstat | 3 files changed, 3418 insertions(+), 3031 deletions(-) [+] |
line wrap: on
line diff
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp Thu Nov 30 20:04:56 2017 +0900 +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp Sun Dec 03 20:09:16 2017 +0900 @@ -1,4 +1,4 @@ -//===-- SelectionDAGBuilder.cpp - Selection-DAG building ------------------===// +//===- SelectionDAGBuilder.cpp - Selection-DAG building -------------------===// // // The LLVM Compiler Infrastructure // @@ -12,47 +12,88 @@ //===----------------------------------------------------------------------===// #include "SelectionDAGBuilder.h" -#include "SDNodeDbgValue.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Triple.h" +#include "llvm/ADT/Twine.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/EHPersonalities.h" +#include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" -#include "llvm/CodeGen/FastISel.h" +#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GCMetadata.h" -#include "llvm/CodeGen/GCStrategy.h" +#include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineValueType.h" +#include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/SelectionDAGTargetInfo.h" #include "llvm/CodeGen/StackMaps.h" +#include "llvm/CodeGen/ValueTypes.h" #include "llvm/CodeGen/WinEHFuncInfo.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DebugLoc.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/GetElementPtrTypeIterator.h" -#include "llvm/IR/GlobalVariable.h" #include "llvm/IR/InlineAsm.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" #include "llvm/IR/Statepoint.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/Support/AtomicOrdering.h" +#include "llvm/Support/BranchProbability.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" @@ -61,11 +102,23 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOpcodes.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetSelectionDAGInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <cstring> +#include <iterator> +#include <limits> +#include <numeric> +#include <tuple> #include <utility> +#include <vector> + using namespace llvm; #define DEBUG_TYPE "isel" @@ -81,10 +134,6 @@ cl::location(LimitFloatPrecision), cl::init(0)); -static cl::opt<bool> -EnableFMFInDAG("enable-fmf-dag", cl::init(true), cl::Hidden, - cl::desc("Enable fast-math-flags for DAG nodes")); - // Limit the width of DAG chains. This is important in general to prevent // DAG-based analysis from blowing up. For example, alias analysis and // load clustering may not complete in reasonable time. It is difficult to @@ -95,29 +144,51 @@ // MaxParallelChains default is arbitrarily high to avoid affecting // optimization, but could be lowered to improve compile time. Any ld-ld-st-st // sequence over this should have been converted to llvm.memcpy by the -// frontend. It easy to induce this behavior with .ll code such as: +// frontend. It is easy to induce this behavior with .ll code such as: // %buffer = alloca [4096 x i8] // %data = load [4096 x i8]* %argPtr // store [4096 x i8] %data, [4096 x i8]* %buffer static const unsigned MaxParallelChains = 64; -static SDValue getCopyFromPartsVector(SelectionDAG &DAG, SDLoc DL, +// True if the Value passed requires ABI mangling as it is a parameter to a +// function or a return value from a function which is not an intrinsic. +static bool isABIRegCopy(const Value *V) { + const bool IsRetInst = V && isa<ReturnInst>(V); + const bool IsCallInst = V && isa<CallInst>(V); + const bool IsInLineAsm = + IsCallInst && static_cast<const CallInst *>(V)->isInlineAsm(); + const bool IsIndirectFunctionCall = + IsCallInst && !IsInLineAsm && + !static_cast<const CallInst *>(V)->getCalledFunction(); + // It is possible that the call instruction is an inline asm statement or an + // indirect function call in which case the return value of + // getCalledFunction() would be nullptr. + const bool IsInstrinsicCall = + IsCallInst && !IsInLineAsm && !IsIndirectFunctionCall && + static_cast<const CallInst *>(V)->getCalledFunction()->getIntrinsicID() != + Intrinsic::not_intrinsic; + + return IsRetInst || (IsCallInst && (!IsInLineAsm && !IsInstrinsicCall)); +} + +static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, - MVT PartVT, EVT ValueVT, const Value *V); + MVT PartVT, EVT ValueVT, const Value *V, + bool IsABIRegCopy); /// getCopyFromParts - Create a value that contains the specified legal parts /// combined into the value they represent. If the parts combine to a type -/// larger then ValueVT then AssertOp can be used to specify whether the extra +/// larger than ValueVT then AssertOp can be used to specify whether the extra /// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT /// (ISD::AssertSext). -static SDValue getCopyFromParts(SelectionDAG &DAG, SDLoc DL, - const SDValue *Parts, - unsigned NumParts, MVT PartVT, EVT ValueVT, - const Value *V, - ISD::NodeType AssertOp = ISD::DELETED_NODE) { +static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL, + const SDValue *Parts, unsigned NumParts, + MVT PartVT, EVT ValueVT, const Value *V, + Optional<ISD::NodeType> AssertOp = None, + bool IsABIRegCopy = false) { if (ValueVT.isVector()) return getCopyFromPartsVector(DAG, DL, Parts, NumParts, - PartVT, ValueVT, V); + PartVT, ValueVT, V, IsABIRegCopy); assert(NumParts > 0 && "No parts to assemble!"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -169,7 +240,7 @@ Hi = DAG.getNode(ISD::ANY_EXTEND, DL, TotalVT, Hi); Hi = DAG.getNode(ISD::SHL, DL, TotalVT, Hi, - DAG.getConstant(Lo.getValueType().getSizeInBits(), DL, + DAG.getConstant(Lo.getValueSizeInBits(), DL, TLI.getPointerTy(DAG.getDataLayout()))); Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, TotalVT, Lo); Val = DAG.getNode(ISD::OR, DL, TotalVT, Lo, Hi); @@ -194,6 +265,8 @@ } // There is now one part, held in Val. Correct it to match ValueVT. + // PartEVT is the type of the register class that holds the value. + // ValueVT is the type of the inline asm operation. EVT PartEVT = Val.getValueType(); if (PartEVT == ValueVT) @@ -207,13 +280,18 @@ Val = DAG.getNode(ISD::TRUNCATE, DL, PartEVT, Val); } + // Handle types that have the same size. + if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits()) + return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); + + // Handle types with different sizes. if (PartEVT.isInteger() && ValueVT.isInteger()) { if (ValueVT.bitsLT(PartEVT)) { // For a truncate, see if we have any information to // indicate whether the truncated bits will always be // zero or sign-extension. - if (AssertOp != ISD::DELETED_NODE) - Val = DAG.getNode(AssertOp, DL, PartEVT, Val, + if (AssertOp.hasValue()) + Val = DAG.getNode(*AssertOp, DL, PartEVT, Val, DAG.getValueType(ValueVT)); return DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val); } @@ -230,9 +308,6 @@ return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val); } - if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits()) - return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); - llvm_unreachable("Unknown mismatch!"); } @@ -252,12 +327,13 @@ /// getCopyFromPartsVector - Create a value that contains the specified legal /// parts combined into the value they represent. If the parts combine to a -/// type larger then ValueVT then AssertOp can be used to specify whether the +/// type larger than ValueVT then AssertOp can be used to specify whether the /// extra bits are known to be zero (ISD::AssertZext) or sign extended from /// ValueVT (ISD::AssertSext). -static SDValue getCopyFromPartsVector(SelectionDAG &DAG, SDLoc DL, +static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, - MVT PartVT, EVT ValueVT, const Value *V) { + MVT PartVT, EVT ValueVT, const Value *V, + bool IsABIRegCopy) { assert(ValueVT.isVector() && "Not a vector value"); assert(NumParts > 0 && "No parts to assemble!"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -268,9 +344,18 @@ EVT IntermediateVT; MVT RegisterVT; unsigned NumIntermediates; - unsigned NumRegs = - TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT, - NumIntermediates, RegisterVT); + unsigned NumRegs; + + if (IsABIRegCopy) { + NumRegs = TLI.getVectorTypeBreakdownForCallingConv( + *DAG.getContext(), ValueVT, IntermediateVT, NumIntermediates, + RegisterVT); + } else { + NumRegs = + TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT, + NumIntermediates, RegisterVT); + } + assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!"); NumParts = NumRegs; // Silence a compiler warning. assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!"); @@ -299,9 +384,14 @@ // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the // intermediate operands. + EVT BuiltVectorTy = + EVT::getVectorVT(*DAG.getContext(), IntermediateVT.getScalarType(), + (IntermediateVT.isVector() + ? IntermediateVT.getVectorNumElements() * NumParts + : NumIntermediates)); Val = DAG.getNode(IntermediateVT.isVector() ? ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, - DL, ValueVT, Ops); + DL, BuiltVectorTy, Ops); } // There is now one part, held in Val. Correct it to match ValueVT. @@ -340,36 +430,55 @@ TLI.isTypeLegal(ValueVT)) return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); + if (ValueVT.getVectorNumElements() != 1) { + // Certain ABIs require that vectors are passed as integers. For vectors + // are the same size, this is an obvious bitcast. + if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits()) { + return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); + } else if (ValueVT.getSizeInBits() < PartEVT.getSizeInBits()) { + // Bitcast Val back the original type and extract the corresponding + // vector we want. + unsigned Elts = PartEVT.getSizeInBits() / ValueVT.getScalarSizeInBits(); + EVT WiderVecType = EVT::getVectorVT(*DAG.getContext(), + ValueVT.getVectorElementType(), Elts); + Val = DAG.getBitcast(WiderVecType, Val); + return DAG.getNode( + ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val, + DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + } + + diagnosePossiblyInvalidConstraint( + *DAG.getContext(), V, "non-trivial scalar-to-vector conversion"); + return DAG.getUNDEF(ValueVT); + } + // Handle cases such as i8 -> <1 x i1> - if (ValueVT.getVectorNumElements() != 1) { - diagnosePossiblyInvalidConstraint(*DAG.getContext(), V, - "non-trivial scalar-to-vector conversion"); - return DAG.getUNDEF(ValueVT); - } - - if (ValueVT.getVectorNumElements() == 1 && - ValueVT.getVectorElementType() != PartEVT) - Val = DAG.getAnyExtOrTrunc(Val, DL, ValueVT.getScalarType()); - - return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val); -} - -static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc dl, + EVT ValueSVT = ValueVT.getVectorElementType(); + if (ValueVT.getVectorNumElements() == 1 && ValueSVT != PartEVT) + Val = ValueVT.isFloatingPoint() ? DAG.getFPExtendOrRound(Val, DL, ValueSVT) + : DAG.getAnyExtOrTrunc(Val, DL, ValueSVT); + + return DAG.getBuildVector(ValueVT, DL, Val); +} + +static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &dl, SDValue Val, SDValue *Parts, unsigned NumParts, - MVT PartVT, const Value *V); + MVT PartVT, const Value *V, bool IsABIRegCopy); /// getCopyToParts - Create a series of nodes that contain the specified value /// split into legal parts. If the parts contain more bits than Val, then, for /// integers, ExtendKind can be used to specify how to generate the extra bits. -static void getCopyToParts(SelectionDAG &DAG, SDLoc DL, - SDValue Val, SDValue *Parts, unsigned NumParts, - MVT PartVT, const Value *V, - ISD::NodeType ExtendKind = ISD::ANY_EXTEND) { +static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, + SDValue *Parts, unsigned NumParts, MVT PartVT, + const Value *V, + ISD::NodeType ExtendKind = ISD::ANY_EXTEND, + bool IsABIRegCopy = false) { EVT ValueVT = Val.getValueType(); // Handle the vector case separately. if (ValueVT.isVector()) - return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V); + return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V, + IsABIRegCopy); unsigned PartBits = PartVT.getSizeInBits(); unsigned OrigNumParts = NumParts; @@ -428,9 +537,11 @@ "Failed to tile the value with PartVT!"); if (NumParts == 1) { - if (PartEVT != ValueVT) + if (PartEVT != ValueVT) { diagnosePossiblyInvalidConstraint(*DAG.getContext(), V, "scalar-to-vector conversion failed"); + Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); + } Parts[0] = Val; return; @@ -490,9 +601,10 @@ /// getCopyToPartsVector - Create a series of nodes that contain the specified /// value split into legal parts. -static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL, +static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, - MVT PartVT, const Value *V) { + MVT PartVT, const Value *V, + bool IsABIRegCopy) { EVT ValueVT = Val.getValueType(); assert(ValueVT.isVector() && "Not a vector"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -520,7 +632,7 @@ e = PartVT.getVectorNumElements(); i != e; ++i) Ops.push_back(DAG.getUNDEF(ElementVT)); - Val = DAG.getNode(ISD::BUILD_VECTOR, DL, PartVT, Ops); + Val = DAG.getBuildVector(PartVT, DL, Ops); // FIXME: Use CONCAT for 2x -> 4x. @@ -533,17 +645,22 @@ // Promoted vector extract Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT); - } else{ - // Vector -> scalar conversion. - assert(ValueVT.getVectorNumElements() == 1 && - "Only trivial vector-to-scalar conversions should get here!"); - Val = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val, - DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); - - Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT); - } - + } else { + if (ValueVT.getVectorNumElements() == 1) { + Val = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val, + DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + } else { + assert(PartVT.getSizeInBits() > ValueVT.getSizeInBits() && + "lossy conversion of vector to scalar type"); + EVT IntermediateType = + EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits()); + Val = DAG.getBitcast(IntermediateType, Val); + Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT); + } + } + + assert(Val.getValueType() == PartVT && "Unexpected vector part value type"); Parts[0] = Val; return; } @@ -552,15 +669,31 @@ EVT IntermediateVT; MVT RegisterVT; unsigned NumIntermediates; - unsigned NumRegs = TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, - IntermediateVT, - NumIntermediates, RegisterVT); + unsigned NumRegs; + if (IsABIRegCopy) { + NumRegs = TLI.getVectorTypeBreakdownForCallingConv( + *DAG.getContext(), ValueVT, IntermediateVT, NumIntermediates, + RegisterVT); + } else { + NumRegs = + TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT, + NumIntermediates, RegisterVT); + } unsigned NumElements = ValueVT.getVectorNumElements(); assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!"); NumParts = NumRegs; // Silence a compiler warning. assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!"); + // Convert the vector to the appropiate type if necessary. + unsigned DestVectorNoElts = + NumIntermediates * + (IntermediateVT.isVector() ? IntermediateVT.getVectorNumElements() : 1); + EVT BuiltVectorTy = EVT::getVectorVT( + *DAG.getContext(), IntermediateVT.getScalarType(), DestVectorNoElts); + if (Val.getValueType() != BuiltVectorTy) + Val = DAG.getNode(ISD::BITCAST, DL, BuiltVectorTy, Val); + // Split the vector into intermediate operands. SmallVector<SDValue, 8> Ops(NumIntermediates); for (unsigned i = 0; i != NumIntermediates; ++i) { @@ -593,35 +726,37 @@ } } -RegsForValue::RegsForValue() {} - RegsForValue::RegsForValue(const SmallVector<unsigned, 4> ®s, MVT regvt, - EVT valuevt) - : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {} + EVT valuevt, bool IsABIMangledValue) + : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs), + RegCount(1, regs.size()), IsABIMangled(IsABIMangledValue) {} RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI, - const DataLayout &DL, unsigned Reg, Type *Ty) { + const DataLayout &DL, unsigned Reg, Type *Ty, + bool IsABIMangledValue) { ComputeValueVTs(TLI, DL, Ty, ValueVTs); + IsABIMangled = IsABIMangledValue; + for (EVT ValueVT : ValueVTs) { - unsigned NumRegs = TLI.getNumRegisters(Context, ValueVT); - MVT RegisterVT = TLI.getRegisterType(Context, ValueVT); + unsigned NumRegs = IsABIMangledValue + ? TLI.getNumRegistersForCallingConv(Context, ValueVT) + : TLI.getNumRegisters(Context, ValueVT); + MVT RegisterVT = IsABIMangledValue + ? TLI.getRegisterTypeForCallingConv(Context, ValueVT) + : TLI.getRegisterType(Context, ValueVT); for (unsigned i = 0; i != NumRegs; ++i) Regs.push_back(Reg + i); RegVTs.push_back(RegisterVT); + RegCount.push_back(NumRegs); Reg += NumRegs; } } -/// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from -/// this value and returns the result as a ValueVT value. This uses -/// Chain/Flag as the input and updates them for the output Chain/Flag. -/// If the Flag pointer is NULL, no flag is used. SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo, - SDLoc dl, - SDValue &Chain, SDValue *Flag, - const Value *V) const { + const SDLoc &dl, SDValue &Chain, + SDValue *Flag, const Value *V) const { // A Value with type {} or [0 x %t] needs no registers. if (ValueVTs.empty()) return SDValue(); @@ -634,8 +769,10 @@ for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { // Copy the legal parts from the registers. EVT ValueVT = ValueVTs[Value]; - unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVT); - MVT RegisterVT = RegVTs[Value]; + unsigned NumRegs = RegCount[Value]; + MVT RegisterVT = IsABIMangled + ? TLI.getRegisterTypeForCallingConv(RegVTs[Value]) + : RegVTs[Value]; Parts.resize(NumRegs); for (unsigned i = 0; i != NumRegs; ++i) { @@ -663,7 +800,7 @@ unsigned RegSize = RegisterVT.getSizeInBits(); unsigned NumSignBits = LOI->NumSignBits; - unsigned NumZeroBits = LOI->KnownZero.countLeadingOnes(); + unsigned NumZeroBits = LOI->Known.countMinLeadingZeros(); if (NumZeroBits == RegSize) { // The current value is a zero. @@ -677,25 +814,33 @@ // now, just use the tightest assertzext/assertsext possible. bool isSExt = true; EVT FromVT(MVT::Other); - if (NumSignBits == RegSize) - isSExt = true, FromVT = MVT::i1; // ASSERT SEXT 1 - else if (NumZeroBits >= RegSize-1) - isSExt = false, FromVT = MVT::i1; // ASSERT ZEXT 1 - else if (NumSignBits > RegSize-8) - isSExt = true, FromVT = MVT::i8; // ASSERT SEXT 8 - else if (NumZeroBits >= RegSize-8) - isSExt = false, FromVT = MVT::i8; // ASSERT ZEXT 8 - else if (NumSignBits > RegSize-16) - isSExt = true, FromVT = MVT::i16; // ASSERT SEXT 16 - else if (NumZeroBits >= RegSize-16) - isSExt = false, FromVT = MVT::i16; // ASSERT ZEXT 16 - else if (NumSignBits > RegSize-32) - isSExt = true, FromVT = MVT::i32; // ASSERT SEXT 32 - else if (NumZeroBits >= RegSize-32) - isSExt = false, FromVT = MVT::i32; // ASSERT ZEXT 32 - else + if (NumSignBits == RegSize) { + isSExt = true; // ASSERT SEXT 1 + FromVT = MVT::i1; + } else if (NumZeroBits >= RegSize - 1) { + isSExt = false; // ASSERT ZEXT 1 + FromVT = MVT::i1; + } else if (NumSignBits > RegSize - 8) { + isSExt = true; // ASSERT SEXT 8 + FromVT = MVT::i8; + } else if (NumZeroBits >= RegSize - 8) { + isSExt = false; // ASSERT ZEXT 8 + FromVT = MVT::i8; + } else if (NumSignBits > RegSize - 16) { + isSExt = true; // ASSERT SEXT 16 + FromVT = MVT::i16; + } else if (NumZeroBits >= RegSize - 16) { + isSExt = false; // ASSERT ZEXT 16 + FromVT = MVT::i16; + } else if (NumSignBits > RegSize - 32) { + isSExt = true; // ASSERT SEXT 32 + FromVT = MVT::i32; + } else if (NumZeroBits >= RegSize - 32) { + isSExt = false; // ASSERT ZEXT 32 + FromVT = MVT::i32; + } else { continue; - + } // Add an assertion node. assert(FromVT != MVT::Other); Parts[i] = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl, @@ -711,12 +856,9 @@ return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(ValueVTs), Values); } -/// getCopyToRegs - Emit a series of CopyToReg nodes that copies the -/// specified value into the registers specified by this object. This uses -/// Chain/Flag as the input and updates them for the output Chain/Flag. -/// If the Flag pointer is NULL, no flag is used. -void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl, - SDValue &Chain, SDValue *Flag, const Value *V, +void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, + const SDLoc &dl, SDValue &Chain, SDValue *Flag, + const Value *V, ISD::NodeType PreferredExtendType) const { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); ISD::NodeType ExtendKind = PreferredExtendType; @@ -725,9 +867,11 @@ unsigned NumRegs = Regs.size(); SmallVector<SDValue, 8> Parts(NumRegs); for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { - EVT ValueVT = ValueVTs[Value]; - unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), ValueVT); - MVT RegisterVT = RegVTs[Value]; + unsigned NumParts = RegCount[Value]; + + MVT RegisterVT = IsABIMangled + ? TLI.getRegisterTypeForCallingConv(RegVTs[Value]) + : RegVTs[Value]; if (ExtendKind == ISD::ANY_EXTEND && TLI.isZExtFree(Val, RegisterVT)) ExtendKind = ISD::ZERO_EXTEND; @@ -767,11 +911,8 @@ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains); } -/// AddInlineAsmOperands - Add this value to the specified inlineasm node -/// operand list. This adds the code marker and includes the number of -/// values added into it. void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, - unsigned MatchingIdx, SDLoc dl, + unsigned MatchingIdx, const SDLoc &dl, SelectionDAG &DAG, std::vector<SDValue> &Ops) const { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -794,7 +935,24 @@ SDValue Res = DAG.getTargetConstant(Flag, dl, MVT::i32); Ops.push_back(Res); - unsigned SP = TLI.getStackPointerRegisterToSaveRestore(); + if (Code == InlineAsm::Kind_Clobber) { + // Clobbers should always have a 1:1 mapping with registers, and may + // reference registers that have illegal (e.g. vector) types. Hence, we + // shouldn't try to apply any sort of splitting logic to them. + assert(Regs.size() == RegVTs.size() && Regs.size() == ValueVTs.size() && + "No 1:1 mapping from clobbers to regs?"); + unsigned SP = TLI.getStackPointerRegisterToSaveRestore(); + (void)SP; + for (unsigned I = 0, E = ValueVTs.size(); I != E; ++I) { + Ops.push_back(DAG.getRegister(Regs[I], RegVTs[I])); + assert( + (Regs[I] != SP || + DAG.getMachineFunction().getFrameInfo().hasOpaqueSPAdjustment()) && + "If we clobbered the stack pointer, MFI should know about it."); + } + return; + } + for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) { unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value]); MVT RegisterVT = RegVTs[Value]; @@ -802,19 +960,13 @@ assert(Reg < Regs.size() && "Mismatch in # registers expected"); unsigned TheReg = Regs[Reg++]; Ops.push_back(DAG.getRegister(TheReg, RegisterVT)); - - if (TheReg == SP && Code == InlineAsm::Kind_Clobber) { - // If we clobbered the stack pointer, MFI should know about it. - assert(DAG.getMachineFunction().getFrameInfo()-> - hasOpaqueSPAdjustment()); - } - } - } -} - -void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa, + } + } +} + +void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis *aa, const TargetLibraryInfo *li) { - AA = &aa; + AA = aa; GFI = gfi; LibInfo = li; DL = &DAG.getDataLayout(); @@ -822,12 +974,6 @@ LPadToCallSiteMap.clear(); } -/// clear - Clear out the current SelectionDAG and the associated -/// state and prepare this SelectionDAGBuilder object to be used -/// for a new block. This doesn't clear out information about -/// additional blocks that are needed to complete switch lowering -/// or PHI node updating; that information is cleared out as it is -/// consumed. void SelectionDAGBuilder::clear() { NodeMap.clear(); UnusedArgNodeMap.clear(); @@ -839,21 +985,10 @@ StatepointLowering.clear(); } -/// clearDanglingDebugInfo - Clear the dangling debug information -/// map. This function is separated from the clear so that debug -/// information that is dangling in a basic block can be properly -/// resolved in a different basic block. This allows the -/// SelectionDAG to resolve dangling debug information attached -/// to PHI nodes. void SelectionDAGBuilder::clearDanglingDebugInfo() { DanglingDebugInfoMap.clear(); } -/// getRoot - Return the current virtual root of the Selection DAG, -/// flushing any PendingLoad items. This must be done before emitting -/// a store or any other node that may need to be ordered after any -/// prior load instructions. -/// SDValue SelectionDAGBuilder::getRoot() { if (PendingLoads.empty()) return DAG.getRoot(); @@ -873,10 +1008,6 @@ return Root; } -/// getControlRoot - Similar to getRoot, but instead of flushing all the -/// PendingLoad items, flush all the PendingExports items. It is necessary -/// to do this before emitting a terminator instruction. -/// SDValue SelectionDAGBuilder::getControlRoot() { SDValue Root = DAG.getRoot(); @@ -905,10 +1036,13 @@ void SelectionDAGBuilder::visit(const Instruction &I) { // Set up outgoing PHI node register values before emitting the terminator. - if (isa<TerminatorInst>(&I)) + if (isa<TerminatorInst>(&I)) { HandlePHINodesInSuccessorBlocks(I.getParent()); - - ++SDNodeOrder; + } + + // Increase the SDNodeOrder if dealing with a non-debug instruction. + if (!isa<DbgInfoIntrinsic>(I)) + ++SDNodeOrder; CurInst = &I; @@ -950,13 +1084,10 @@ DIExpression *Expr = DI->getExpression(); assert(Variable->isValidLocationForIntrinsic(dl) && "Expected inlined-at fields to agree"); - uint64_t Offset = DI->getOffset(); SDDbgValue *SDV; if (Val.getNode()) { - if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, Offset, false, - Val)) { - SDV = DAG.getDbgValue(Variable, Expr, Val.getNode(), Val.getResNo(), - false, Offset, dl, DbgSDNodeOrder); + if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, false, Val)) { + SDV = getDbgValue(Val, Variable, Expr, dl, DbgSDNodeOrder); DAG.AddDbgValue(SDV, Val.getNode(), false); } } else @@ -973,10 +1104,12 @@ if (It != FuncInfo.ValueMap.end()) { unsigned InReg = It->second; + RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), - DAG.getDataLayout(), InReg, Ty); + DAG.getDataLayout(), InReg, Ty, isABIRegCopy(V)); SDValue Chain = DAG.getEntryNode(); - Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); + Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, + V); resolveDanglingDebugInfo(V, Result); } @@ -993,10 +1126,8 @@ // If there's a virtual register allocated and initialized for this // value, use it. - SDValue copyFromReg = getCopyFromRegs(V, V->getType()); - if (copyFromReg.getNode()) { + if (SDValue copyFromReg = getCopyFromRegs(V, V->getType())) return copyFromReg; - } // Otherwise create a new SDValue and remember it. SDValue Val = getValueImpl(V); @@ -1096,8 +1227,7 @@ if (isa<ArrayType>(CDS->getType())) return DAG.getMergeValues(Ops, getCurSDLoc()); - return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurSDLoc(), - VT, Ops); + return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops); } if (C->getType()->isStructTy() || C->getType()->isArrayTy()) { @@ -1149,7 +1279,7 @@ } // Create a BUILD_VECTOR node. - return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurSDLoc(), VT, Ops); + return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops); } // If this is a static alloca, generate it as the frameindex instead of @@ -1159,14 +1289,15 @@ FuncInfo.StaticAllocaMap.find(AI); if (SI != FuncInfo.StaticAllocaMap.end()) return DAG.getFrameIndex(SI->second, - TLI.getPointerTy(DAG.getDataLayout())); + TLI.getFrameIndexTy(DAG.getDataLayout())); } // If this is an instruction which fast-isel has deferred, select it now. if (const Instruction *Inst = dyn_cast<Instruction>(V)) { unsigned InReg = FuncInfo.InitializeRegForValue(Inst); + RegsForValue RFV(*DAG.getContext(), TLI, DAG.getDataLayout(), InReg, - Inst->getType()); + Inst->getType(), isABIRegCopy(V)); SDValue Chain = DAG.getEntryNode(); return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); } @@ -1315,6 +1446,18 @@ SmallVector<ISD::OutputArg, 8> Outs; SmallVector<SDValue, 8> OutVals; + // Calls to @llvm.experimental.deoptimize don't generate a return value, so + // lower + // + // %val = call <ty> @llvm.experimental.deoptimize() + // ret <ty> %val + // + // differently. + if (I.getParent()->getTerminatingDeoptimizeCall()) { + LowerDeoptimizingReturn(); + return; + } + if (!FuncInfo.CanLowerReturn) { unsigned DemoteReg = FuncInfo.DemoteRegister; const Function *F = I.getParent()->getParent(); @@ -1346,12 +1489,11 @@ RetPtr.getValueType(), RetPtr, DAG.getIntPtrConstant(Offsets[i], getCurSDLoc()), - &Flags); - Chains[i] = - DAG.getStore(Chain, getCurSDLoc(), - SDValue(RetOp.getNode(), RetOp.getResNo() + i), - // FIXME: better loc info would be nice. - Add, MachinePointerInfo(), false, false, 0); + Flags); + Chains[i] = DAG.getStore(Chain, getCurSDLoc(), + SDValue(RetOp.getNode(), RetOp.getResNo() + i), + // FIXME: better loc info would be nice. + Add, MachinePointerInfo()); } Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), @@ -1366,29 +1508,29 @@ const Function *F = I.getParent()->getParent(); ISD::NodeType ExtendKind = ISD::ANY_EXTEND; - if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex, + if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex, Attribute::SExt)) ExtendKind = ISD::SIGN_EXTEND; - else if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex, + else if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt)) ExtendKind = ISD::ZERO_EXTEND; LLVMContext &Context = F->getContext(); - bool RetInReg = F->getAttributes().hasAttribute(AttributeSet::ReturnIndex, - Attribute::InReg); + bool RetInReg = F->getAttributes().hasAttribute( + AttributeList::ReturnIndex, Attribute::InReg); for (unsigned j = 0; j != NumValues; ++j) { EVT VT = ValueVTs[j]; if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) - VT = TLI.getTypeForExtArgOrReturn(Context, VT, ExtendKind); - - unsigned NumParts = TLI.getNumRegisters(Context, VT); - MVT PartVT = TLI.getRegisterType(Context, VT); + VT = TLI.getTypeForExtReturn(Context, VT, ExtendKind); + + unsigned NumParts = TLI.getNumRegistersForCallingConv(Context, VT); + MVT PartVT = TLI.getRegisterTypeForCallingConv(Context, VT); SmallVector<SDValue, 4> Parts(NumParts); getCopyToParts(DAG, getCurSDLoc(), SDValue(RetOp.getNode(), RetOp.getResNo() + j), - &Parts[0], NumParts, PartVT, &I, ExtendKind); + &Parts[0], NumParts, PartVT, &I, ExtendKind, true); // 'inreg' on function refers to return value ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); @@ -1410,6 +1552,26 @@ } } + // Push in swifterror virtual register as the last element of Outs. This makes + // sure swifterror virtual register will be returned in the swifterror + // physical register. + const Function *F = I.getParent()->getParent(); + if (TLI.supportSwiftError() && + F->getAttributes().hasAttrSomewhere(Attribute::SwiftError)) { + assert(FuncInfo.SwiftErrorArg && "Need a swift error argument"); + ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); + Flags.setSwiftError(); + Outs.push_back(ISD::OutputArg(Flags, EVT(TLI.getPointerTy(DL)) /*vt*/, + EVT(TLI.getPointerTy(DL)) /*argvt*/, + true /*isfixed*/, 1 /*origidx*/, + 0 /*partOffs*/)); + // Create SDNode for the swifterror virtual register. + OutVals.push_back( + DAG.getRegister(FuncInfo.getOrCreateSwiftErrorVRegUseAt( + &I, FuncInfo.MBB, FuncInfo.SwiftErrorArg).first, + EVT(TLI.getPointerTy(DL)))); + } + bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); CallingConv::ID CallConv = DAG.getMachineFunction().getFunction()->getCallingConv(); @@ -1518,7 +1680,6 @@ /// EmitBranchForMergedCondition - Helper method for FindMergedConditions. /// This function emits a branch and is used at the leaves of an OR or an /// AND operator tree. -/// void SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, MachineBasicBlock *TBB, @@ -1526,7 +1687,8 @@ MachineBasicBlock *CurBB, MachineBasicBlock *SwitchBB, BranchProbability TProb, - BranchProbability FProb) { + BranchProbability FProb, + bool InvertCond) { const BasicBlock *BB = CurBB->getBasicBlock(); // If the leaf of the tree is a comparison, merge the condition into @@ -1540,24 +1702,29 @@ isExportableFromCurrentBlock(BOp->getOperand(1), BB))) { ISD::CondCode Condition; if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) { - Condition = getICmpCondCode(IC->getPredicate()); + ICmpInst::Predicate Pred = + InvertCond ? IC->getInversePredicate() : IC->getPredicate(); + Condition = getICmpCondCode(Pred); } else { const FCmpInst *FC = cast<FCmpInst>(Cond); - Condition = getFCmpCondCode(FC->getPredicate()); + FCmpInst::Predicate Pred = + InvertCond ? FC->getInversePredicate() : FC->getPredicate(); + Condition = getFCmpCondCode(Pred); if (TM.Options.NoNaNsFPMath) Condition = getFCmpCodeWithoutNaN(Condition); } CaseBlock CB(Condition, BOp->getOperand(0), BOp->getOperand(1), nullptr, - TBB, FBB, CurBB, TProb, FProb); + TBB, FBB, CurBB, getCurSDLoc(), TProb, FProb); SwitchCases.push_back(CB); return; } } // Create a CaseBlock record representing this branch. - CaseBlock CB(ISD::SETEQ, Cond, ConstantInt::getTrue(*DAG.getContext()), - nullptr, TBB, FBB, CurBB, TProb, FProb); + ISD::CondCode Opc = InvertCond ? ISD::SETNE : ISD::SETEQ; + CaseBlock CB(Opc, Cond, ConstantInt::getTrue(*DAG.getContext()), + nullptr, TBB, FBB, CurBB, getCurSDLoc(), TProb, FProb); SwitchCases.push_back(CB); } @@ -1569,16 +1736,44 @@ MachineBasicBlock *SwitchBB, Instruction::BinaryOps Opc, BranchProbability TProb, - BranchProbability FProb) { - // If this node is not part of the or/and tree, emit it as a branch. + BranchProbability FProb, + bool InvertCond) { + // Skip over not part of the tree and remember to invert op and operands at + // next level. + if (BinaryOperator::isNot(Cond) && Cond->hasOneUse()) { + const Value *CondOp = BinaryOperator::getNotArgument(Cond); + if (InBlock(CondOp, CurBB->getBasicBlock())) { + FindMergedConditions(CondOp, TBB, FBB, CurBB, SwitchBB, Opc, TProb, FProb, + !InvertCond); + return; + } + } + const Instruction *BOp = dyn_cast<Instruction>(Cond); + // Compute the effective opcode for Cond, taking into account whether it needs + // to be inverted, e.g. + // and (not (or A, B)), C + // gets lowered as + // and (and (not A, not B), C) + unsigned BOpc = 0; + if (BOp) { + BOpc = BOp->getOpcode(); + if (InvertCond) { + if (BOpc == Instruction::And) + BOpc = Instruction::Or; + else if (BOpc == Instruction::Or) + BOpc = Instruction::And; + } + } + + // If this node is not part of the or/and tree, emit it as a branch. if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) || - (unsigned)BOp->getOpcode() != Opc || !BOp->hasOneUse() || + BOpc != Opc || !BOp->hasOneUse() || BOp->getParent() != CurBB->getBasicBlock() || !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) || !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) { EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB, - TProb, FProb); + TProb, FProb, InvertCond); return; } @@ -1613,14 +1808,14 @@ auto NewFalseProb = TProb / 2 + FProb; // Emit the LHS condition. FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc, - NewTrueProb, NewFalseProb); + NewTrueProb, NewFalseProb, InvertCond); // Normalize A/2 and B to get A/(1+B) and 2B/(1+B). SmallVector<BranchProbability, 2> Probs{TProb / 2, FProb}; BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end()); // Emit the RHS condition into TmpBB. FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc, - Probs[0], Probs[1]); + Probs[0], Probs[1], InvertCond); } else { assert(Opc == Instruction::And && "Unknown merge op!"); // Codegen X & Y as: @@ -1646,14 +1841,14 @@ auto NewFalseProb = FProb / 2; // Emit the LHS condition. FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc, - NewTrueProb, NewFalseProb); + NewTrueProb, NewFalseProb, InvertCond); // Normalize A and B/2 to get 2A/(1+A) and B/(1+A). SmallVector<BranchProbability, 2> Probs{TProb, FProb / 2}; BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end()); // Emit the RHS condition into TmpBB. FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc, - Probs[0], Probs[1]); + Probs[0], Probs[1], InvertCond); } } @@ -1728,7 +1923,6 @@ // je foo // cmp D, E // jle foo - // if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) { Instruction::BinaryOps Opcode = BOp->getOpcode(); if (!DAG.getTargetLoweringInfo().isJumpExpensive() && BOp->hasOneUse() && @@ -1737,7 +1931,8 @@ FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB, Opcode, getEdgeProbability(BrMBB, Succ0MBB), - getEdgeProbability(BrMBB, Succ1MBB)); + getEdgeProbability(BrMBB, Succ1MBB), + /*InvertCond=*/false); // If the compares in later blocks need to use values not currently // exported from this block, export them now. This block should always // be the first entry. @@ -1767,7 +1962,7 @@ // Create a CaseBlock record representing this branch. CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(*DAG.getContext()), - nullptr, Succ0MBB, Succ1MBB, BrMBB); + nullptr, Succ0MBB, Succ1MBB, BrMBB, getCurSDLoc()); // Use visitSwitchCase to actually insert the fast branch sequence for this // cond branch. @@ -1780,7 +1975,7 @@ MachineBasicBlock *SwitchBB) { SDValue Cond; SDValue CondLHS = getValue(CB.CmpLHS); - SDLoc dl = getCurSDLoc(); + SDLoc dl = CB.DL; // Build the setcc now. if (!CB.CmpMHS) { @@ -1907,6 +2102,28 @@ DAG.setRoot(BrCond); } +/// Create a LOAD_STACK_GUARD node, and let it carry the target specific global +/// variable if there exists one. +static SDValue getLoadStackGuard(SelectionDAG &DAG, const SDLoc &DL, + SDValue &Chain) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout()); + MachineFunction &MF = DAG.getMachineFunction(); + Value *Global = TLI.getSDagStackGuard(*MF.getFunction()->getParent()); + MachineSDNode *Node = + DAG.getMachineNode(TargetOpcode::LOAD_STACK_GUARD, DL, PtrTy, Chain); + if (Global) { + MachinePointerInfo MPInfo(Global); + MachineInstr::mmo_iterator MemRefs = MF.allocateMemRefsArray(1); + auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant | + MachineMemOperand::MODereferenceable; + *MemRefs = MF.getMachineMemOperand(MPInfo, Flags, PtrTy.getSizeInBits() / 8, + DAG.getEVTAlignment(PtrTy)); + Node->setMemRefs(MemRefs, MemRefs + 1); + } + return SDValue(Node, 0); +} + /// Codegen a new tail for a stack protector check ParentMBB which has had its /// tail spliced into a stack protector check success bb. /// @@ -1920,35 +2137,62 @@ const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout()); - MachineFrameInfo *MFI = ParentBB->getParent()->getFrameInfo(); - int FI = MFI->getStackProtectorIndex(); - - const Value *IRGuard = SPD.getGuard(); - SDValue GuardPtr = getValue(IRGuard); - SDValue StackSlotPtr = DAG.getFrameIndex(FI, PtrTy); - - unsigned Align = DL->getPrefTypeAlignment(IRGuard->getType()); + MachineFrameInfo &MFI = ParentBB->getParent()->getFrameInfo(); + int FI = MFI.getStackProtectorIndex(); SDValue Guard; SDLoc dl = getCurSDLoc(); - - // If GuardReg is set and useLoadStackGuardNode returns true, retrieve the - // guard value from the virtual register holding the value. Otherwise, emit a - // volatile load to retrieve the stack guard value. - unsigned GuardReg = SPD.getGuardReg(); - - if (GuardReg && TLI.useLoadStackGuardNode()) - Guard = DAG.getCopyFromReg(DAG.getEntryNode(), dl, GuardReg, - PtrTy); - else - Guard = DAG.getLoad(PtrTy, dl, DAG.getEntryNode(), - GuardPtr, MachinePointerInfo(IRGuard, 0), - true, false, false, Align); - + SDValue StackSlotPtr = DAG.getFrameIndex(FI, PtrTy); + const Module &M = *ParentBB->getParent()->getFunction()->getParent(); + unsigned Align = DL->getPrefTypeAlignment(Type::getInt8PtrTy(M.getContext())); + + // Generate code to load the content of the guard slot. SDValue StackSlot = DAG.getLoad( PtrTy, dl, DAG.getEntryNode(), StackSlotPtr, - MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), true, - false, false, Align); + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), Align, + MachineMemOperand::MOVolatile); + + // Retrieve guard check function, nullptr if instrumentation is inlined. + if (const Value *GuardCheck = TLI.getSSPStackGuardCheck(M)) { + // The target provides a guard check function to validate the guard value. + // Generate a call to that function with the content of the guard slot as + // argument. + auto *Fn = cast<Function>(GuardCheck); + FunctionType *FnTy = Fn->getFunctionType(); + assert(FnTy->getNumParams() == 1 && "Invalid function signature"); + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Node = StackSlot; + Entry.Ty = FnTy->getParamType(0); + if (Fn->hasAttribute(1, Attribute::AttrKind::InReg)) + Entry.IsInReg = true; + Args.push_back(Entry); + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(getCurSDLoc()) + .setChain(DAG.getEntryNode()) + .setCallee(Fn->getCallingConv(), FnTy->getReturnType(), + getValue(GuardCheck), std::move(Args)); + + std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI); + DAG.setRoot(Result.second); + return; + } + + // If useLoadStackGuardNode returns true, generate LOAD_STACK_GUARD. + // Otherwise, emit a volatile load to retrieve the stack guard value. + SDValue Chain = DAG.getEntryNode(); + if (TLI.useLoadStackGuardNode()) { + Guard = getLoadStackGuard(DAG, dl, Chain); + } else { + const Value *IRGuard = TLI.getSDagStackGuard(M); + SDValue GuardPtr = getValue(IRGuard); + + Guard = + DAG.getLoad(PtrTy, dl, Chain, GuardPtr, MachinePointerInfo(IRGuard, 0), + Align, MachineMemOperand::MOVolatile); + } // Perform the comparison via a subtract/getsetcc. EVT VT = Guard.getValueType(); @@ -2116,6 +2360,12 @@ MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)]; const BasicBlock *EHPadBB = I.getSuccessor(1); + // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't + // have to do anything here to lower funclet bundles. + assert(!I.hasOperandBundlesOtherThan( + {LLVMContext::OB_deopt, LLVMContext::OB_funclet}) && + "Cannot lower invokes with arbitrary operand bundles yet!"); + const Value *Callee(I.getCalledValue()); const Function *Fn = dyn_cast<Function>(Callee); if (isa<InlineAsm>(Callee)) @@ -2135,8 +2385,15 @@ LowerStatepoint(ImmutableStatepoint(&I), EHPadBB); break; } - } else + } else if (I.countOperandBundlesOfType(LLVMContext::OB_deopt)) { + // Currently we do not lower any intrinsic calls with deopt operand bundles. + // Eventually we will support lowering the @llvm.experimental.deoptimize + // intrinsic, and right now there are no plans to support other intrinsics + // with deopt state. + LowerCallSiteWithDeoptBundle(&I, getValue(Callee), EHPadBB); + } else { LowerCallTo(&I, getValue(Callee), false, EHPadBB); + } // If the value of the invoke is used outside of its defining block, make it // available as a virtual register. @@ -2176,8 +2433,7 @@ "Call to landingpad not in landing pad!"); MachineBasicBlock *MBB = FuncInfo.MBB; - MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); - AddLandingPadInfo(LP, MMI, MBB); + addLandingPadInfo(LP, *MBB); // If there aren't registers to copy the values into (e.g., during SjLj // exceptions), then don't bother to create these DAG nodes. @@ -2310,6 +2566,129 @@ visitBinary(I, ISD::FSUB); } +/// Checks if the given instruction performs a vector reduction, in which case +/// we have the freedom to alter the elements in the result as long as the +/// reduction of them stays unchanged. +static bool isVectorReductionOp(const User *I) { + const Instruction *Inst = dyn_cast<Instruction>(I); + if (!Inst || !Inst->getType()->isVectorTy()) + return false; + + auto OpCode = Inst->getOpcode(); + switch (OpCode) { + case Instruction::Add: + case Instruction::Mul: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + break; + case Instruction::FAdd: + case Instruction::FMul: + if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(Inst)) + if (FPOp->getFastMathFlags().unsafeAlgebra()) + break; + LLVM_FALLTHROUGH; + default: + return false; + } + + unsigned ElemNum = Inst->getType()->getVectorNumElements(); + unsigned ElemNumToReduce = ElemNum; + + // Do DFS search on the def-use chain from the given instruction. We only + // allow four kinds of operations during the search until we reach the + // instruction that extracts the first element from the vector: + // + // 1. The reduction operation of the same opcode as the given instruction. + // + // 2. PHI node. + // + // 3. ShuffleVector instruction together with a reduction operation that + // does a partial reduction. + // + // 4. ExtractElement that extracts the first element from the vector, and we + // stop searching the def-use chain here. + // + // 3 & 4 above perform a reduction on all elements of the vector. We push defs + // from 1-3 to the stack to continue the DFS. The given instruction is not + // a reduction operation if we meet any other instructions other than those + // listed above. + + SmallVector<const User *, 16> UsersToVisit{Inst}; + SmallPtrSet<const User *, 16> Visited; + bool ReduxExtracted = false; + + while (!UsersToVisit.empty()) { + auto User = UsersToVisit.back(); + UsersToVisit.pop_back(); + if (!Visited.insert(User).second) + continue; + + for (const auto &U : User->users()) { + auto Inst = dyn_cast<Instruction>(U); + if (!Inst) + return false; + + if (Inst->getOpcode() == OpCode || isa<PHINode>(U)) { + if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(Inst)) + if (!isa<PHINode>(FPOp) && !FPOp->getFastMathFlags().unsafeAlgebra()) + return false; + UsersToVisit.push_back(U); + } else if (const ShuffleVectorInst *ShufInst = + dyn_cast<ShuffleVectorInst>(U)) { + // Detect the following pattern: A ShuffleVector instruction together + // with a reduction that do partial reduction on the first and second + // ElemNumToReduce / 2 elements, and store the result in + // ElemNumToReduce / 2 elements in another vector. + + unsigned ResultElements = ShufInst->getType()->getVectorNumElements(); + if (ResultElements < ElemNum) + return false; + + if (ElemNumToReduce == 1) + return false; + if (!isa<UndefValue>(U->getOperand(1))) + return false; + for (unsigned i = 0; i < ElemNumToReduce / 2; ++i) + if (ShufInst->getMaskValue(i) != int(i + ElemNumToReduce / 2)) + return false; + for (unsigned i = ElemNumToReduce / 2; i < ElemNum; ++i) + if (ShufInst->getMaskValue(i) != -1) + return false; + + // There is only one user of this ShuffleVector instruction, which + // must be a reduction operation. + if (!U->hasOneUse()) + return false; + + auto U2 = dyn_cast<Instruction>(*U->user_begin()); + if (!U2 || U2->getOpcode() != OpCode) + return false; + + // Check operands of the reduction operation. + if ((U2->getOperand(0) == U->getOperand(0) && U2->getOperand(1) == U) || + (U2->getOperand(1) == U->getOperand(0) && U2->getOperand(0) == U)) { + UsersToVisit.push_back(U2); + ElemNumToReduce /= 2; + } else + return false; + } else if (isa<ExtractElementInst>(U)) { + // At this moment we should have reduced all elements in the vector. + if (ElemNumToReduce != 1) + return false; + + const ConstantInt *Val = dyn_cast<ConstantInt>(U->getOperand(1)); + if (!Val || Val->getZExtValue() != 0) + return false; + + ReduxExtracted = true; + } else + return false; + } + } + return ReduxExtracted; +} + void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) { SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); @@ -2317,6 +2696,7 @@ bool nuw = false; bool nsw = false; bool exact = false; + bool vec_redux = false; FastMathFlags FMF; if (const OverflowingBinaryOperator *OFBinOp = @@ -2330,19 +2710,25 @@ if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(&I)) FMF = FPOp->getFastMathFlags(); + if (isVectorReductionOp(&I)) { + vec_redux = true; + DEBUG(dbgs() << "Detected a reduction operation:" << I << "\n"); + } + SDNodeFlags Flags; Flags.setExact(exact); Flags.setNoSignedWrap(nsw); Flags.setNoUnsignedWrap(nuw); - if (EnableFMFInDAG) { - Flags.setAllowReciprocal(FMF.allowReciprocal()); - Flags.setNoInfs(FMF.noInfs()); - Flags.setNoNaNs(FMF.noNaNs()); - Flags.setNoSignedZeros(FMF.noSignedZeros()); - Flags.setUnsafeAlgebra(FMF.unsafeAlgebra()); - } + Flags.setVectorReduction(vec_redux); + Flags.setAllowReciprocal(FMF.allowReciprocal()); + Flags.setAllowContract(FMF.allowContract()); + Flags.setNoInfs(FMF.noInfs()); + Flags.setNoNaNs(FMF.noNaNs()); + Flags.setNoSignedZeros(FMF.noSignedZeros()); + Flags.setUnsafeAlgebra(FMF.unsafeAlgebra()); + SDValue BinNodeValue = DAG.getNode(OpCode, getCurSDLoc(), Op1.getValueType(), - Op1, Op2, &Flags); + Op1, Op2, Flags); setValue(&I, BinNodeValue); } @@ -2356,7 +2742,7 @@ // Coerce the shift amount to the right type if we can. if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) { unsigned ShiftSize = ShiftTy.getSizeInBits(); - unsigned Op2Size = Op2.getValueType().getSizeInBits(); + unsigned Op2Size = Op2.getValueSizeInBits(); SDLoc DL = getCurSDLoc(); // If the operand is smaller than the shift count type, promote it. @@ -2367,7 +2753,7 @@ // count type has enough bits to represent any shift value, truncate // it now. This is a common case and it exposes the truncate to // optimization early. - else if (ShiftSize >= Log2_32_Ceil(Op2.getValueType().getSizeInBits())) + else if (ShiftSize >= Log2_32_Ceil(Op2.getValueSizeInBits())) Op2 = DAG.getNode(ISD::TRUNCATE, DL, ShiftTy, Op2); // Otherwise we'll need to temporarily settle for some other convenient // type. Type legalization will make adjustments once the shiftee is split. @@ -2395,7 +2781,7 @@ Flags.setNoSignedWrap(nsw); Flags.setNoUnsignedWrap(nuw); SDValue Res = DAG.getNode(Opcode, getCurSDLoc(), Op1.getValueType(), Op1, Op2, - &Flags); + Flags); setValue(&I, Res); } @@ -2407,7 +2793,7 @@ Flags.setExact(isa<PossiblyExactOperator>(&I) && cast<PossiblyExactOperator>(&I)->isExact()); setValue(&I, DAG.getNode(ISD::SDIV, getCurSDLoc(), Op1.getValueType(), Op1, - Op2, &Flags)); + Op2, Flags)); } void SelectionDAGBuilder::visitICmp(const User &I) { @@ -2445,6 +2831,14 @@ setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Condition)); } +// Check if the condition of the select has one use or two users that are both +// selects with the same condition. +static bool hasOnlySelectUsers(const Value *Cond) { + return llvm::all_of(Cond->users(), [](const Value *V) { + return isa<SelectInst>(V); + }); +} + void SelectionDAGBuilder::visitSelect(const User &I) { SmallVector<EVT, 4> ValueVTs; ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), I.getType(), @@ -2530,7 +2924,7 @@ // If the underlying comparison instruction is used by any other // instruction, the consumed instructions won't be destroyed, so it is // not profitable to convert to a min/max. - cast<SelectInst>(&I)->getCondition()->hasOneUse()) { + hasOnlySelectUsers(cast<SelectInst>(I).getCondition())) { OpCode = Opc; LHSVal = getValue(LHS); RHSVal = getValue(RHS); @@ -2659,7 +3053,7 @@ DestVT, N)); // convert types. // Check if the original LLVM IR Operand was a ConstantInt, because getValue() // might fold any kind of constant expression to an integer constant and that - // is not what we are looking for. Only regcognize a bitcast of a genuine + // is not what we are looking for. Only recognize a bitcast of a genuine // constant integer as an opaque constant. else if(ConstantInt *C = dyn_cast<ConstantInt>(I.getOperand(0))) setValue(&I, DAG.getConstant(C->getValue(), dl, DestVT, /*isTarget=*/false, @@ -2704,20 +3098,10 @@ InVec, InIdx)); } -// Utility for visitShuffleVector - Return true if every element in Mask, -// beginning from position Pos and ending in Pos+Size, falls within the -// specified sequential range [L, L+Pos). or is undef. -static bool isSequentialInRange(const SmallVectorImpl<int> &Mask, - unsigned Pos, unsigned Size, int Low) { - for (unsigned i = Pos, e = Pos+Size; i != e; ++i, ++Low) - if (Mask[i] >= 0 && Mask[i] != Low) - return false; - return true; -} - void SelectionDAGBuilder::visitShuffleVector(const User &I) { SDValue Src1 = getValue(I.getOperand(0)); SDValue Src2 = getValue(I.getOperand(1)); + SDLoc DL = getCurSDLoc(); SmallVector<int, 8> Mask; ShuffleVectorInst::getShuffleMask(cast<Constant>(I.getOperand(2)), Mask); @@ -2729,39 +3113,61 @@ unsigned SrcNumElts = SrcVT.getVectorNumElements(); if (SrcNumElts == MaskNumElts) { - setValue(&I, DAG.getVectorShuffle(VT, getCurSDLoc(), Src1, Src2, - &Mask[0])); + setValue(&I, DAG.getVectorShuffle(VT, DL, Src1, Src2, Mask)); return; } // Normalize the shuffle vector since mask and vector length don't match. - if (SrcNumElts < MaskNumElts && MaskNumElts % SrcNumElts == 0) { - // Mask is longer than the source vectors and is a multiple of the source - // vectors. We can use concatenate vector to make the mask and vectors - // lengths match. - if (SrcNumElts*2 == MaskNumElts) { - // First check for Src1 in low and Src2 in high - if (isSequentialInRange(Mask, 0, SrcNumElts, 0) && - isSequentialInRange(Mask, SrcNumElts, SrcNumElts, SrcNumElts)) { - // The shuffle is concatenating two vectors together. - setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurSDLoc(), - VT, Src1, Src2)); + if (SrcNumElts < MaskNumElts) { + // Mask is longer than the source vectors. We can use concatenate vector to + // make the mask and vectors lengths match. + + if (MaskNumElts % SrcNumElts == 0) { + // Mask length is a multiple of the source vector length. + // Check if the shuffle is some kind of concatenation of the input + // vectors. + unsigned NumConcat = MaskNumElts / SrcNumElts; + bool IsConcat = true; + SmallVector<int, 8> ConcatSrcs(NumConcat, -1); + for (unsigned i = 0; i != MaskNumElts; ++i) { + int Idx = Mask[i]; + if (Idx < 0) + continue; + // Ensure the indices in each SrcVT sized piece are sequential and that + // the same source is used for the whole piece. + if ((Idx % SrcNumElts != (i % SrcNumElts)) || + (ConcatSrcs[i / SrcNumElts] >= 0 && + ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts))) { + IsConcat = false; + break; + } + // Remember which source this index came from. + ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts; + } + + // The shuffle is concatenating multiple vectors together. Just emit + // a CONCAT_VECTORS operation. + if (IsConcat) { + SmallVector<SDValue, 8> ConcatOps; + for (auto Src : ConcatSrcs) { + if (Src < 0) + ConcatOps.push_back(DAG.getUNDEF(SrcVT)); + else if (Src == 0) + ConcatOps.push_back(Src1); + else + ConcatOps.push_back(Src2); + } + setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps)); return; } - // Then check for Src2 in low and Src1 in high - if (isSequentialInRange(Mask, 0, SrcNumElts, SrcNumElts) && - isSequentialInRange(Mask, SrcNumElts, SrcNumElts, 0)) { - // The shuffle is concatenating two vectors together. - setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurSDLoc(), - VT, Src2, Src1)); - return; - } - } + } + + unsigned PaddedMaskNumElts = alignTo(MaskNumElts, SrcNumElts); + unsigned NumConcat = PaddedMaskNumElts / SrcNumElts; + EVT PaddedVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(), + PaddedMaskNumElts); // Pad both vectors with undefs to make them the same length as the mask. - unsigned NumConcat = MaskNumElts / SrcNumElts; - bool Src1U = Src1.getOpcode() == ISD::UNDEF; - bool Src2U = Src2.getOpcode() == ISD::UNDEF; SDValue UndefVal = DAG.getUNDEF(SrcVT); SmallVector<SDValue, 8> MOps1(NumConcat, UndefVal); @@ -2769,35 +3175,41 @@ MOps1[0] = Src1; MOps2[0] = Src2; - Src1 = Src1U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS, - getCurSDLoc(), VT, MOps1); - Src2 = Src2U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS, - getCurSDLoc(), VT, MOps2); + Src1 = Src1.isUndef() + ? DAG.getUNDEF(PaddedVT) + : DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps1); + Src2 = Src2.isUndef() + ? DAG.getUNDEF(PaddedVT) + : DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps2); // Readjust mask for new input vector length. - SmallVector<int, 8> MappedOps; + SmallVector<int, 8> MappedOps(PaddedMaskNumElts, -1); for (unsigned i = 0; i != MaskNumElts; ++i) { int Idx = Mask[i]; if (Idx >= (int)SrcNumElts) - Idx -= SrcNumElts - MaskNumElts; - MappedOps.push_back(Idx); - } - - setValue(&I, DAG.getVectorShuffle(VT, getCurSDLoc(), Src1, Src2, - &MappedOps[0])); + Idx -= SrcNumElts - PaddedMaskNumElts; + MappedOps[i] = Idx; + } + + SDValue Result = DAG.getVectorShuffle(PaddedVT, DL, Src1, Src2, MappedOps); + + // If the concatenated vector was padded, extract a subvector with the + // correct number of elements. + if (MaskNumElts != PaddedMaskNumElts) + Result = DAG.getNode( + ISD::EXTRACT_SUBVECTOR, DL, VT, Result, + DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + + setValue(&I, Result); return; } if (SrcNumElts > MaskNumElts) { // Analyze the access pattern of the vector to see if we can extract - // two subvectors and do the shuffle. The analysis is done by calculating - // the range of elements the mask access on both vectors. - int MinRange[2] = { static_cast<int>(SrcNumElts), - static_cast<int>(SrcNumElts)}; - int MaxRange[2] = {-1, -1}; - - for (unsigned i = 0; i != MaskNumElts; ++i) { - int Idx = Mask[i]; + // two subvectors and do the shuffle. + int StartIdx[2] = { -1, -1 }; // StartIdx to extract from + bool CanExtract = true; + for (int Idx : Mask) { unsigned Input = 0; if (Idx < 0) continue; @@ -2806,66 +3218,47 @@ Input = 1; Idx -= SrcNumElts; } - if (Idx > MaxRange[Input]) - MaxRange[Input] = Idx; - if (Idx < MinRange[Input]) - MinRange[Input] = Idx; - } - - // Check if the access is smaller than the vector size and can we find - // a reasonable extract index. - int RangeUse[2] = { -1, -1 }; // 0 = Unused, 1 = Extract, -1 = Can not - // Extract. - int StartIdx[2]; // StartIdx to extract from - for (unsigned Input = 0; Input < 2; ++Input) { - if (MinRange[Input] >= (int)SrcNumElts && MaxRange[Input] < 0) { - RangeUse[Input] = 0; // Unused - StartIdx[Input] = 0; - continue; - } - - // Find a good start index that is a multiple of the mask length. Then - // see if the rest of the elements are in range. - StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts; - if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts && - StartIdx[Input] + MaskNumElts <= SrcNumElts) - RangeUse[Input] = 1; // Extract from a multiple of the mask length. - } - - if (RangeUse[0] == 0 && RangeUse[1] == 0) { + + // If all the indices come from the same MaskNumElts sized portion of + // the sources we can use extract. Also make sure the extract wouldn't + // extract past the end of the source. + int NewStartIdx = alignDown(Idx, MaskNumElts); + if (NewStartIdx + MaskNumElts > SrcNumElts || + (StartIdx[Input] >= 0 && StartIdx[Input] != NewStartIdx)) + CanExtract = false; + // Make sure we always update StartIdx as we use it to track if all + // elements are undef. + StartIdx[Input] = NewStartIdx; + } + + if (StartIdx[0] < 0 && StartIdx[1] < 0) { setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used. return; } - if (RangeUse[0] >= 0 && RangeUse[1] >= 0) { + if (CanExtract) { // Extract appropriate subvector and generate a vector shuffle for (unsigned Input = 0; Input < 2; ++Input) { SDValue &Src = Input == 0 ? Src1 : Src2; - if (RangeUse[Input] == 0) + if (StartIdx[Input] < 0) Src = DAG.getUNDEF(VT); else { - SDLoc dl = getCurSDLoc(); Src = DAG.getNode( - ISD::EXTRACT_SUBVECTOR, dl, VT, Src, - DAG.getConstant(StartIdx[Input], dl, + ISD::EXTRACT_SUBVECTOR, DL, VT, Src, + DAG.getConstant(StartIdx[Input], DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); } } // Calculate new mask. - SmallVector<int, 8> MappedOps; - for (unsigned i = 0; i != MaskNumElts; ++i) { - int Idx = Mask[i]; - if (Idx >= 0) { - if (Idx < (int)SrcNumElts) - Idx -= StartIdx[0]; - else - Idx -= SrcNumElts + StartIdx[1] - MaskNumElts; - } - MappedOps.push_back(Idx); + SmallVector<int, 8> MappedOps(Mask.begin(), Mask.end()); + for (int &Idx : MappedOps) { + if (Idx >= (int)SrcNumElts) + Idx -= SrcNumElts + StartIdx[1] - MaskNumElts; + else if (Idx >= 0) + Idx -= StartIdx[0]; } - setValue(&I, DAG.getVectorShuffle(VT, getCurSDLoc(), Src1, Src2, - &MappedOps[0])); + setValue(&I, DAG.getVectorShuffle(VT, DL, Src1, Src2, MappedOps)); return; } } @@ -2875,10 +3268,8 @@ // to insert and build vector. EVT EltVT = VT.getVectorElementType(); EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout()); - SDLoc dl = getCurSDLoc(); SmallVector<SDValue,8> Ops; - for (unsigned i = 0; i != MaskNumElts; ++i) { - int Idx = Mask[i]; + for (int Idx : Mask) { SDValue Res; if (Idx < 0) { @@ -2887,17 +3278,23 @@ SDValue &Src = Idx < (int)SrcNumElts ? Src1 : Src2; if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts; - Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, - EltVT, Src, DAG.getConstant(Idx, dl, IdxVT)); + Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, + EltVT, Src, DAG.getConstant(Idx, DL, IdxVT)); } Ops.push_back(Res); } - setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops)); -} - -void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { + setValue(&I, DAG.getBuildVector(VT, DL, Ops)); +} + +void SelectionDAGBuilder::visitInsertValue(const User &I) { + ArrayRef<unsigned> Indices; + if (const InsertValueInst *IV = dyn_cast<InsertValueInst>(&I)) + Indices = IV->getIndices(); + else + Indices = cast<ConstantExpr>(&I)->getIndices(); + const Value *Op0 = I.getOperand(0); const Value *Op1 = I.getOperand(1); Type *AggTy = I.getType(); @@ -2905,7 +3302,7 @@ bool IntoUndef = isa<UndefValue>(Op0); bool FromUndef = isa<UndefValue>(Op1); - unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices()); + unsigned LinearIndex = ComputeLinearIndex(AggTy, Indices); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SmallVector<EVT, 4> AggValueVTs; @@ -2945,13 +3342,19 @@ DAG.getVTList(AggValueVTs), Values)); } -void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) { +void SelectionDAGBuilder::visitExtractValue(const User &I) { + ArrayRef<unsigned> Indices; + if (const ExtractValueInst *EV = dyn_cast<ExtractValueInst>(&I)) + Indices = EV->getIndices(); + else + Indices = cast<ConstantExpr>(&I)->getIndices(); + const Value *Op0 = I.getOperand(0); Type *AggTy = Op0->getType(); Type *ValTy = I.getType(); bool OutOfUndef = isa<UndefValue>(Op0); - unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices()); + unsigned LinearIndex = ComputeLinearIndex(AggTy, Indices); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SmallVector<EVT, 4> ValValueVTs; @@ -2993,27 +3396,28 @@ cast<VectorType>(I.getType())->getVectorNumElements() : 0; if (VectorWidth && !N.getValueType().isVector()) { - MVT VT = MVT::getVectorVT(N.getValueType().getSimpleVT(), VectorWidth); - SmallVector<SDValue, 16> Ops(VectorWidth, N); - N = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); - } + LLVMContext &Context = *DAG.getContext(); + EVT VT = EVT::getVectorVT(Context, N.getValueType(), VectorWidth); + N = DAG.getSplatBuildVector(VT, dl, N); + } + for (gep_type_iterator GTI = gep_type_begin(&I), E = gep_type_end(&I); GTI != E; ++GTI) { const Value *Idx = GTI.getOperand(); - if (StructType *StTy = dyn_cast<StructType>(*GTI)) { + if (StructType *StTy = GTI.getStructTypeOrNull()) { unsigned Field = cast<Constant>(Idx)->getUniqueInteger().getZExtValue(); if (Field) { // N = N + Offset uint64_t Offset = DL->getStructLayout(StTy)->getElementOffset(Field); - // In an inbouds GEP with an offset that is nonnegative even when + // In an inbounds GEP with an offset that is nonnegative even when // interpreted as signed, assume there is no unsigned overflow. SDNodeFlags Flags; if (int64_t(Offset) >= 0 && cast<GEPOperator>(I).isInBounds()) Flags.setNoUnsignedWrap(true); N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, - DAG.getConstant(Offset, dl, N.getValueType()), &Flags); + DAG.getConstant(Offset, dl, N.getValueType()), Flags); } } else { MVT PtrTy = @@ -3032,8 +3436,9 @@ if (CI->isZero()) continue; APInt Offs = ElementSize * CI->getValue().sextOrTrunc(PtrSize); + LLVMContext &Context = *DAG.getContext(); SDValue OffsVal = VectorWidth ? - DAG.getConstant(Offs, dl, MVT::getVectorVT(PtrTy, VectorWidth)) : + DAG.getConstant(Offs, dl, EVT::getVectorVT(Context, PtrTy, VectorWidth)) : DAG.getConstant(Offs, dl, PtrTy); // In an inbouds GEP with an offset that is nonnegative even when @@ -3042,7 +3447,7 @@ if (Offs.isNonNegative() && cast<GEPOperator>(I).isInBounds()) Flags.setNoUnsignedWrap(true); - N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal, &Flags); + N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal, Flags); continue; } @@ -3050,10 +3455,10 @@ SDValue IdxN = getValue(Idx); if (!IdxN.getValueType().isVector() && VectorWidth) { - MVT VT = MVT::getVectorVT(IdxN.getValueType().getSimpleVT(), VectorWidth); - SmallVector<SDValue, 16> Ops(VectorWidth, IdxN); - IdxN = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); + EVT VT = EVT::getVectorVT(*Context, IdxN.getValueType(), VectorWidth); + IdxN = DAG.getSplatBuildVector(VT, dl, IdxN); } + // If the index is smaller or larger than intptr_t, truncate or extend // it. IdxN = DAG.getSExtOrTrunc(IdxN, dl, N.getValueType()); @@ -3120,7 +3525,7 @@ Flags.setNoUnsignedWrap(true); AllocSize = DAG.getNode(ISD::ADD, dl, AllocSize.getValueType(), AllocSize, - DAG.getIntPtrConstant(StackAlign - 1, dl), &Flags); + DAG.getIntPtrConstant(StackAlign - 1, dl), Flags); // Mask out the low bits for alignment purposes. AllocSize = DAG.getNode(ISD::AND, dl, @@ -3134,38 +3539,43 @@ setValue(&I, DSA); DAG.setRoot(DSA.getValue(1)); - assert(FuncInfo.MF->getFrameInfo()->hasVarSizedObjects()); + assert(FuncInfo.MF->getFrameInfo().hasVarSizedObjects()); } void SelectionDAGBuilder::visitLoad(const LoadInst &I) { if (I.isAtomic()) return visitAtomicLoad(I); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); const Value *SV = I.getOperand(0); + if (TLI.supportSwiftError()) { + // Swifterror values can come from either a function parameter with + // swifterror attribute or an alloca with swifterror attribute. + if (const Argument *Arg = dyn_cast<Argument>(SV)) { + if (Arg->hasSwiftErrorAttr()) + return visitLoadFromSwiftError(I); + } + + if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) { + if (Alloca->isSwiftError()) + return visitLoadFromSwiftError(I); + } + } + SDValue Ptr = getValue(SV); Type *Ty = I.getType(); bool isVolatile = I.isVolatile(); bool isNonTemporal = I.getMetadata(LLVMContext::MD_nontemporal) != nullptr; - - // The IR notion of invariant_load only guarantees that all *non-faulting* - // invariant loads result in the same value. The MI notion of invariant load - // guarantees that the load can be legally moved to any location within its - // containing function. The MI notion of invariant_load is stronger than the - // IR notion of invariant_load -- an MI invariant_load is an IR invariant_load - // with a guarantee that the location being loaded from is dereferenceable - // throughout the function's lifetime. - - bool isInvariant = I.getMetadata(LLVMContext::MD_invariant_load) != nullptr && - isDereferenceablePointer(SV, DAG.getDataLayout()); + bool isInvariant = I.getMetadata(LLVMContext::MD_invariant_load) != nullptr; + bool isDereferenceable = isDereferenceablePointer(SV, DAG.getDataLayout()); unsigned Alignment = I.getAlignment(); AAMDNodes AAInfo; I.getAAMetadata(AAInfo); const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SmallVector<EVT, 4> ValueVTs; SmallVector<uint64_t, 4> Offsets; ComputeValueVTs(TLI, DAG.getDataLayout(), Ty, ValueVTs, &Offsets); @@ -3178,7 +3588,7 @@ if (isVolatile || NumValues > MaxParallelChains) // Serialize volatile loads with other side effects. Root = getRoot(); - else if (AA->pointsToConstantMemory(MemoryLocation( + else if (AA && AA->pointsToConstantMemory(MemoryLocation( SV, DAG.getDataLayout().getTypeStoreSize(Ty), AAInfo))) { // Do not serialize (non-volatile) loads of constant memory with anything. Root = DAG.getEntryNode(); @@ -3219,11 +3629,21 @@ SDValue A = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr, DAG.getConstant(Offsets[i], dl, PtrVT), - &Flags); - SDValue L = DAG.getLoad(ValueVTs[i], dl, Root, - A, MachinePointerInfo(SV, Offsets[i]), isVolatile, - isNonTemporal, isInvariant, Alignment, AAInfo, - Ranges); + Flags); + auto MMOFlags = MachineMemOperand::MONone; + if (isVolatile) + MMOFlags |= MachineMemOperand::MOVolatile; + if (isNonTemporal) + MMOFlags |= MachineMemOperand::MONonTemporal; + if (isInvariant) + MMOFlags |= MachineMemOperand::MOInvariant; + if (isDereferenceable) + MMOFlags |= MachineMemOperand::MODereferenceable; + MMOFlags |= TLI.getMMOFlags(I); + + SDValue L = DAG.getLoad(ValueVTs[i], dl, Root, A, + MachinePointerInfo(SV, Offsets[i]), Alignment, + MMOFlags, AAInfo, Ranges); Values[i] = L; Chains[ChainI] = L.getValue(1); @@ -3242,6 +3662,64 @@ DAG.getVTList(ValueVTs), Values)); } +void SelectionDAGBuilder::visitStoreToSwiftError(const StoreInst &I) { + assert(DAG.getTargetLoweringInfo().supportSwiftError() && + "call visitStoreToSwiftError when backend supports swifterror"); + + SmallVector<EVT, 4> ValueVTs; + SmallVector<uint64_t, 4> Offsets; + const Value *SrcV = I.getOperand(0); + ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), + SrcV->getType(), ValueVTs, &Offsets); + assert(ValueVTs.size() == 1 && Offsets[0] == 0 && + "expect a single EVT for swifterror"); + + SDValue Src = getValue(SrcV); + // Create a virtual register, then update the virtual register. + unsigned VReg; bool CreatedVReg; + std::tie(VReg, CreatedVReg) = FuncInfo.getOrCreateSwiftErrorVRegDefAt(&I); + // Chain, DL, Reg, N or Chain, DL, Reg, N, Glue + // Chain can be getRoot or getControlRoot. + SDValue CopyNode = DAG.getCopyToReg(getRoot(), getCurSDLoc(), VReg, + SDValue(Src.getNode(), Src.getResNo())); + DAG.setRoot(CopyNode); + if (CreatedVReg) + FuncInfo.setCurrentSwiftErrorVReg(FuncInfo.MBB, I.getOperand(1), VReg); +} + +void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) { + assert(DAG.getTargetLoweringInfo().supportSwiftError() && + "call visitLoadFromSwiftError when backend supports swifterror"); + + assert(!I.isVolatile() && + I.getMetadata(LLVMContext::MD_nontemporal) == nullptr && + I.getMetadata(LLVMContext::MD_invariant_load) == nullptr && + "Support volatile, non temporal, invariant for load_from_swift_error"); + + const Value *SV = I.getOperand(0); + Type *Ty = I.getType(); + AAMDNodes AAInfo; + I.getAAMetadata(AAInfo); + assert((!AA || !AA->pointsToConstantMemory(MemoryLocation( + SV, DAG.getDataLayout().getTypeStoreSize(Ty), AAInfo))) && + "load_from_swift_error should not be constant memory"); + + SmallVector<EVT, 4> ValueVTs; + SmallVector<uint64_t, 4> Offsets; + ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), Ty, + ValueVTs, &Offsets); + assert(ValueVTs.size() == 1 && Offsets[0] == 0 && + "expect a single EVT for swifterror"); + + // Chain, DL, Reg, VT, Glue or Chain, DL, Reg, VT + SDValue L = DAG.getCopyFromReg( + getRoot(), getCurSDLoc(), + FuncInfo.getOrCreateSwiftErrorVRegUseAt(&I, FuncInfo.MBB, SV).first, + ValueVTs[0]); + + setValue(&I, L); +} + void SelectionDAGBuilder::visitStore(const StoreInst &I) { if (I.isAtomic()) return visitAtomicStore(I); @@ -3249,6 +3727,21 @@ const Value *SrcV = I.getOperand(0); const Value *PtrV = I.getOperand(1); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (TLI.supportSwiftError()) { + // Swifterror values can come from either a function parameter with + // swifterror attribute or an alloca with swifterror attribute. + if (const Argument *Arg = dyn_cast<Argument>(PtrV)) { + if (Arg->hasSwiftErrorAttr()) + return visitStoreToSwiftError(I); + } + + if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) { + if (Alloca->isSwiftError()) + return visitStoreToSwiftError(I); + } + } + SmallVector<EVT, 4> ValueVTs; SmallVector<uint64_t, 4> Offsets; ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), @@ -3265,15 +3758,19 @@ SDValue Root = getRoot(); SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues)); + SDLoc dl = getCurSDLoc(); EVT PtrVT = Ptr.getValueType(); - bool isVolatile = I.isVolatile(); - bool isNonTemporal = I.getMetadata(LLVMContext::MD_nontemporal) != nullptr; unsigned Alignment = I.getAlignment(); - SDLoc dl = getCurSDLoc(); - AAMDNodes AAInfo; I.getAAMetadata(AAInfo); + auto MMOFlags = MachineMemOperand::MONone; + if (I.isVolatile()) + MMOFlags |= MachineMemOperand::MOVolatile; + if (I.getMetadata(LLVMContext::MD_nontemporal) != nullptr) + MMOFlags |= MachineMemOperand::MONonTemporal; + MMOFlags |= TLI.getMMOFlags(I); + // An aggregate load cannot wrap around the address space, so offsets to its // parts don't wrap either. SDNodeFlags Flags; @@ -3289,11 +3786,10 @@ ChainI = 0; } SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr, - DAG.getConstant(Offsets[i], dl, PtrVT), &Flags); - SDValue St = DAG.getStore(Root, dl, - SDValue(Src.getNode(), Src.getResNo() + i), - Add, MachinePointerInfo(PtrV, Offsets[i]), - isVolatile, isNonTemporal, Alignment, AAInfo); + DAG.getConstant(Offsets[i], dl, PtrVT), Flags); + SDValue St = DAG.getStore( + Root, dl, SDValue(Src.getNode(), Src.getResNo() + i), Add, + MachinePointerInfo(PtrV, Offsets[i]), Alignment, MMOFlags, AAInfo); Chains[ChainI] = St; } @@ -3302,16 +3798,39 @@ DAG.setRoot(StoreNode); } -void SelectionDAGBuilder::visitMaskedStore(const CallInst &I) { +void SelectionDAGBuilder::visitMaskedStore(const CallInst &I, + bool IsCompressing) { SDLoc sdl = getCurSDLoc(); - // llvm.masked.store.*(Src0, Ptr, alignment, Mask) - Value *PtrOperand = I.getArgOperand(1); + auto getMaskedStoreOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0, + unsigned& Alignment) { + // llvm.masked.store.*(Src0, Ptr, alignment, Mask) + Src0 = I.getArgOperand(0); + Ptr = I.getArgOperand(1); + Alignment = cast<ConstantInt>(I.getArgOperand(2))->getZExtValue(); + Mask = I.getArgOperand(3); + }; + auto getCompressingStoreOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0, + unsigned& Alignment) { + // llvm.masked.compressstore.*(Src0, Ptr, Mask) + Src0 = I.getArgOperand(0); + Ptr = I.getArgOperand(1); + Mask = I.getArgOperand(2); + Alignment = 0; + }; + + Value *PtrOperand, *MaskOperand, *Src0Operand; + unsigned Alignment; + if (IsCompressing) + getCompressingStoreOps(PtrOperand, MaskOperand, Src0Operand, Alignment); + else + getMaskedStoreOps(PtrOperand, MaskOperand, Src0Operand, Alignment); + SDValue Ptr = getValue(PtrOperand); - SDValue Src0 = getValue(I.getArgOperand(0)); - SDValue Mask = getValue(I.getArgOperand(3)); + SDValue Src0 = getValue(Src0Operand); + SDValue Mask = getValue(MaskOperand); + EVT VT = Src0.getValueType(); - unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(2)))->getZExtValue(); if (!Alignment) Alignment = DAG.getEVTAlignment(VT); @@ -3324,7 +3843,8 @@ MachineMemOperand::MOStore, VT.getStoreSize(), Alignment, AAInfo); SDValue StoreNode = DAG.getMaskedStore(getRoot(), sdl, Src0, Ptr, Mask, VT, - MMO, false); + MMO, false /* Truncating */, + IsCompressing); DAG.setRoot(StoreNode); setValue(&I, StoreNode); } @@ -3344,10 +3864,8 @@ // are looking for. If first operand of the GEP is a splat vector - we // extract the spalt value and use it as a uniform base. // In all other cases the function returns 'false'. -// -static bool getUniformBase(const Value *& Ptr, SDValue& Base, SDValue& Index, +static bool getUniformBase(const Value* &Ptr, SDValue& Base, SDValue& Index, SelectionDAGBuilder* SDB) { - SelectionDAG& DAG = SDB->DAG; LLVMContext &Context = *DAG.getContext(); @@ -3382,8 +3900,7 @@ if (!Index.getValueType().isVector()) { unsigned GEPWidth = GEP->getType()->getVectorNumElements(); EVT VT = EVT::getVectorVT(Context, Index.getValueType(), GEPWidth); - SmallVector<SDValue, 16> Ops(GEPWidth, Index); - Index = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Index), VT, Ops); + Index = DAG.getSplatBuildVector(VT, SDLoc(Index), Index); } return true; } @@ -3425,18 +3942,38 @@ setValue(&I, Scatter); } -void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I) { +void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) { SDLoc sdl = getCurSDLoc(); - // @llvm.masked.load.*(Ptr, alignment, Mask, Src0) - Value *PtrOperand = I.getArgOperand(0); + auto getMaskedLoadOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0, + unsigned& Alignment) { + // @llvm.masked.load.*(Ptr, alignment, Mask, Src0) + Ptr = I.getArgOperand(0); + Alignment = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue(); + Mask = I.getArgOperand(2); + Src0 = I.getArgOperand(3); + }; + auto getExpandingLoadOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0, + unsigned& Alignment) { + // @llvm.masked.expandload.*(Ptr, Mask, Src0) + Ptr = I.getArgOperand(0); + Alignment = 0; + Mask = I.getArgOperand(1); + Src0 = I.getArgOperand(2); + }; + + Value *PtrOperand, *MaskOperand, *Src0Operand; + unsigned Alignment; + if (IsExpanding) + getExpandingLoadOps(PtrOperand, MaskOperand, Src0Operand, Alignment); + else + getMaskedLoadOps(PtrOperand, MaskOperand, Src0Operand, Alignment); + SDValue Ptr = getValue(PtrOperand); - SDValue Src0 = getValue(I.getArgOperand(3)); - SDValue Mask = getValue(I.getArgOperand(2)); - - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); - unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(1)))->getZExtValue(); + SDValue Src0 = getValue(Src0Operand); + SDValue Mask = getValue(MaskOperand); + + EVT VT = Src0.getValueType(); if (!Alignment) Alignment = DAG.getEVTAlignment(VT); @@ -3444,13 +3981,10 @@ I.getAAMetadata(AAInfo); const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); - SDValue InChain = DAG.getRoot(); - if (AA->pointsToConstantMemory(MemoryLocation( - PtrOperand, DAG.getDataLayout().getTypeStoreSize(I.getType()), - AAInfo))) { - // Do not serialize (non-volatile) loads of constant memory with anything. - InChain = DAG.getEntryNode(); - } + // Do not serialize masked loads of constant memory with anything. + bool AddToChain = !AA || !AA->pointsToConstantMemory(MemoryLocation( + PtrOperand, DAG.getDataLayout().getTypeStoreSize(I.getType()), AAInfo)); + SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode(); MachineMemOperand *MMO = DAG.getMachineFunction(). @@ -3459,9 +3993,11 @@ Alignment, AAInfo, Ranges); SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Mask, Src0, VT, MMO, - ISD::NON_EXTLOAD); - SDValue OutChain = Load.getValue(1); - DAG.setRoot(OutChain); + ISD::NON_EXTLOAD, IsExpanding); + if (AddToChain) { + SDValue OutChain = Load.getValue(1); + DAG.setRoot(OutChain); + } setValue(&I, Load); } @@ -3490,7 +4026,7 @@ bool UniformBase = getUniformBase(BasePtr, Base, Index, this); bool ConstantMemory = false; if (UniformBase && - AA->pointsToConstantMemory(MemoryLocation( + AA && AA->pointsToConstantMemory(MemoryLocation( BasePtr, DAG.getDataLayout().getTypeStoreSize(I.getType()), AAInfo))) { // Do not serialize (non-volatile) loads of constant memory with anything. @@ -3522,7 +4058,7 @@ SDLoc dl = getCurSDLoc(); AtomicOrdering SuccessOrder = I.getSuccessOrdering(); AtomicOrdering FailureOrder = I.getFailureOrdering(); - SynchronizationScope Scope = I.getSynchScope(); + SyncScope::ID SSID = I.getSyncScopeID(); SDValue InChain = getRoot(); @@ -3532,7 +4068,7 @@ ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl, MemVT, VTs, InChain, getValue(I.getPointerOperand()), getValue(I.getCompareOperand()), getValue(I.getNewValOperand()), MachinePointerInfo(I.getPointerOperand()), - /*Alignment=*/ 0, SuccessOrder, FailureOrder, Scope); + /*Alignment=*/ 0, SuccessOrder, FailureOrder, SSID); SDValue OutChain = L.getValue(2); @@ -3558,7 +4094,7 @@ case AtomicRMWInst::UMin: NT = ISD::ATOMIC_LOAD_UMIN; break; } AtomicOrdering Order = I.getOrdering(); - SynchronizationScope Scope = I.getSynchScope(); + SyncScope::ID SSID = I.getSyncScopeID(); SDValue InChain = getRoot(); @@ -3569,7 +4105,7 @@ getValue(I.getPointerOperand()), getValue(I.getValOperand()), I.getPointerOperand(), - /* Alignment=*/ 0, Order, Scope); + /* Alignment=*/ 0, Order, SSID); SDValue OutChain = L.getValue(1); @@ -3582,17 +4118,17 @@ const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue Ops[3]; Ops[0] = getRoot(); - Ops[1] = DAG.getConstant(I.getOrdering(), dl, - TLI.getPointerTy(DAG.getDataLayout())); - Ops[2] = DAG.getConstant(I.getSynchScope(), dl, - TLI.getPointerTy(DAG.getDataLayout())); + Ops[1] = DAG.getConstant((unsigned)I.getOrdering(), dl, + TLI.getFenceOperandTy(DAG.getDataLayout())); + Ops[2] = DAG.getConstant(I.getSyncScopeID(), dl, + TLI.getFenceOperandTy(DAG.getDataLayout())); DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops)); } void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { SDLoc dl = getCurSDLoc(); AtomicOrdering Order = I.getOrdering(); - SynchronizationScope Scope = I.getSynchScope(); + SyncScope::ID SSID = I.getSyncScopeID(); SDValue InChain = getRoot(); @@ -3609,13 +4145,13 @@ MachineMemOperand::MOLoad, VT.getStoreSize(), I.getAlignment() ? I.getAlignment() : - DAG.getEVTAlignment(VT)); + DAG.getEVTAlignment(VT), + AAMDNodes(), nullptr, SSID, Order); InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG); SDValue L = DAG.getAtomic(ISD::ATOMIC_LOAD, dl, VT, VT, InChain, - getValue(I.getPointerOperand()), MMO, - Order, Scope); + getValue(I.getPointerOperand()), MMO); SDValue OutChain = L.getValue(1); @@ -3627,7 +4163,7 @@ SDLoc dl = getCurSDLoc(); AtomicOrdering Order = I.getOrdering(); - SynchronizationScope Scope = I.getSynchScope(); + SyncScope::ID SSID = I.getSyncScopeID(); SDValue InChain = getRoot(); @@ -3644,7 +4180,7 @@ getValue(I.getPointerOperand()), getValue(I.getValueOperand()), I.getPointerOperand(), I.getAlignment(), - Order, Scope); + Order, SSID); DAG.setRoot(OutChain); } @@ -3653,8 +4189,12 @@ /// node. void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic) { - bool HasChain = !I.doesNotAccessMemory(); - bool OnlyLoad = HasChain && I.onlyReadsMemory(); + // Ignore the callsite's attributes. A specific call site may be marked with + // readnone, but the lowering code will expect the chain based on the + // definition. + const Function *F = I.getCalledFunction(); + bool HasChain = !F->doesNotAccessMemory(); + bool OnlyLoad = HasChain && F->onlyReadsMemory(); // Build the operand list. SmallVector<SDValue, 8> Ops; @@ -3721,7 +4261,8 @@ if (VectorType *PTy = dyn_cast<VectorType>(I.getType())) { EVT VT = TLI.getValueType(DAG.getDataLayout(), PTy); Result = DAG.getNode(ISD::BITCAST, getCurSDLoc(), VT, Result); - } + } else + Result = lowerRangeToAssertZExt(DAG, I, Result); setValue(&I, Result); } @@ -3733,8 +4274,7 @@ /// Op = (Op & 0x007fffff) | 0x3f800000; /// /// where Op is the hexadecimal representation of floating point value. -static SDValue -GetSignificand(SelectionDAG &DAG, SDValue Op, SDLoc dl) { +static SDValue GetSignificand(SelectionDAG &DAG, SDValue Op, const SDLoc &dl) { SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op, DAG.getConstant(0x007fffff, dl, MVT::i32)); SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1, @@ -3747,9 +4287,8 @@ /// (float)(int)(((Op & 0x7f800000) >> 23) - 127); /// /// where Op is the hexadecimal representation of floating point value. -static SDValue -GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI, - SDLoc dl) { +static SDValue GetExponent(SelectionDAG &DAG, SDValue Op, + const TargetLowering &TLI, const SDLoc &dl) { SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op, DAG.getConstant(0x7f800000, dl, MVT::i32)); SDValue t1 = DAG.getNode( @@ -3761,13 +4300,13 @@ } /// getF32Constant - Get 32-bit floating point constant. -static SDValue -getF32Constant(SelectionDAG &DAG, unsigned Flt, SDLoc dl) { - return DAG.getConstantFP(APFloat(APFloat::IEEEsingle, APInt(32, Flt)), dl, +static SDValue getF32Constant(SelectionDAG &DAG, unsigned Flt, + const SDLoc &dl) { + return DAG.getConstantFP(APFloat(APFloat::IEEEsingle(), APInt(32, Flt)), dl, MVT::f32); } -static SDValue getLimitedPrecisionExp2(SDValue t0, SDLoc dl, +static SDValue getLimitedPrecisionExp2(SDValue t0, const SDLoc &dl, SelectionDAG &DAG) { // TODO: What fast-math-flags should be set on the floating-point nodes? @@ -3859,7 +4398,7 @@ /// expandExp - Lower an exp intrinsic. Handles the special sequences for /// limited-precision mode. -static SDValue expandExp(SDLoc dl, SDValue Op, SelectionDAG &DAG, +static SDValue expandExp(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { if (Op.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { @@ -3882,9 +4421,8 @@ /// expandLog - Lower a log intrinsic. Handles the special sequences for /// limited-precision mode. -static SDValue expandLog(SDLoc dl, SDValue Op, SelectionDAG &DAG, +static SDValue expandLog(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { - // TODO: What fast-math-flags should be set on the floating-point nodes? if (Op.getValueType() == MVT::f32 && @@ -3981,9 +4519,8 @@ /// expandLog2 - Lower a log2 intrinsic. Handles the special sequences for /// limited-precision mode. -static SDValue expandLog2(SDLoc dl, SDValue Op, SelectionDAG &DAG, +static SDValue expandLog2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { - // TODO: What fast-math-flags should be set on the floating-point nodes? if (Op.getValueType() == MVT::f32 && @@ -4079,9 +4616,8 @@ /// expandLog10 - Lower a log10 intrinsic. Handles the special sequences for /// limited-precision mode. -static SDValue expandLog10(SDLoc dl, SDValue Op, SelectionDAG &DAG, +static SDValue expandLog10(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { - // TODO: What fast-math-flags should be set on the floating-point nodes? if (Op.getValueType() == MVT::f32 && @@ -4170,7 +4706,7 @@ /// expandExp2 - Lower an exp2 intrinsic. Handles the special sequences for /// limited-precision mode. -static SDValue expandExp2(SDLoc dl, SDValue Op, SelectionDAG &DAG, +static SDValue expandExp2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { if (Op.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) @@ -4182,7 +4718,7 @@ /// visitPow - Lower a pow intrinsic. Handles the special sequences for /// limited-precision mode with x == 10.0f. -static SDValue expandPow(SDLoc dl, SDValue LHS, SDValue RHS, +static SDValue expandPow(const SDLoc &dl, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const TargetLowering &TLI) { bool IsExp10 = false; if (LHS.getValueType() == MVT::f32 && RHS.getValueType() == MVT::f32 && @@ -4209,9 +4745,8 @@ return DAG.getNode(ISD::FPOW, dl, LHS.getValueType(), LHS, RHS); } - /// ExpandPowI - Expand a llvm.powi intrinsic. -static SDValue ExpandPowI(SDLoc DL, SDValue LHS, SDValue RHS, +static SDValue ExpandPowI(const SDLoc &DL, SDValue LHS, SDValue RHS, SelectionDAG &DAG) { // If RHS is a constant, we can expand this out to a multiplication tree, // otherwise we end up lowering to a call to __powidf2 (for example). When @@ -4280,12 +4815,12 @@ } } -/// EmitFuncArgumentDbgValue - If the DbgValueInst is a dbg_value of a function -/// argument, create the corresponding DBG_VALUE machine instruction for it now. -/// At the end of instruction selection, they will be inserted to the entry BB. +/// If the DbgValueInst is a dbg_value of a function argument, create the +/// corresponding DBG_VALUE machine instruction for it now. At the end of +/// instruction selection, they will be inserted to the entry BB. bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( const Value *V, DILocalVariable *Variable, DIExpression *Expr, - DILocation *DL, int64_t Offset, bool IsIndirect, const SDValue &N) { + DILocation *DL, bool IsDbgDeclare, const SDValue &N) { const Argument *Arg = dyn_cast<Argument>(V); if (!Arg) return false; @@ -4299,9 +4834,11 @@ if (!Variable->getScope()->getSubprogram()->describes(MF.getFunction())) return false; + bool IsIndirect = false; Optional<MachineOperand> Op; // Some arguments' frame index is recorded during argument lowering. - if (int FI = FuncInfo.getArgumentFrameIndex(Arg)) + int FI = FuncInfo.getArgumentFrameIndex(Arg); + if (FI != std::numeric_limits<int>::max()) Op = MachineOperand::CreateFI(FI); if (!Op && N.getNode()) { @@ -4312,15 +4849,43 @@ if (PR) Reg = PR; } - if (Reg) + if (Reg) { Op = MachineOperand::CreateReg(Reg, false); + IsIndirect = IsDbgDeclare; + } } if (!Op) { // Check if ValueMap has reg number. DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V); - if (VMI != FuncInfo.ValueMap.end()) + if (VMI != FuncInfo.ValueMap.end()) { + const auto &TLI = DAG.getTargetLoweringInfo(); + RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), VMI->second, + V->getType(), isABIRegCopy(V)); + unsigned NumRegs = + std::accumulate(RFV.RegCount.begin(), RFV.RegCount.end(), 0); + if (NumRegs > 1) { + unsigned I = 0; + unsigned Offset = 0; + auto RegisterVT = RFV.RegVTs.begin(); + for (auto RegCount : RFV.RegCount) { + unsigned RegisterSize = (RegisterVT++)->getSizeInBits(); + for (unsigned E = I + RegCount; I != E; ++I) { + // The vregs are guaranteed to be allocated in sequence. + Op = MachineOperand::CreateReg(VMI->second + I, false); + auto *FragmentExpr = DIExpression::createFragmentExpression( + Expr, Offset, RegisterSize); + FuncInfo.ArgDbgValues.push_back( + BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsDbgDeclare, + Op->getReg(), Variable, FragmentExpr)); + Offset += RegisterSize; + } + } + return true; + } Op = MachineOperand::CreateReg(VMI->second, false); + IsIndirect = IsDbgDeclare; + } } if (!Op && N.getNode()) @@ -4338,18 +4903,35 @@ if (Op->isReg()) FuncInfo.ArgDbgValues.push_back( BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect, - Op->getReg(), Offset, Variable, Expr)); + Op->getReg(), Variable, Expr)); else FuncInfo.ArgDbgValues.push_back( BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE)) - .addOperand(*Op) - .addImm(Offset) + .add(*Op) + .addImm(0) .addMetadata(Variable) .addMetadata(Expr)); return true; } +/// Return the appropriate SDDbgValue based on N. +SDDbgValue *SelectionDAGBuilder::getDbgValue(SDValue N, + DILocalVariable *Variable, + DIExpression *Expr, + const DebugLoc &dl, + unsigned DbgSDNodeOrder) { + if (auto *FISDN = dyn_cast<FrameIndexSDNode>(N.getNode())) { + // Construct a FrameIndexDbgValue for FrameIndexSDNodes so we can describe + // stack slot locations as such instead of as indirectly addressed + // locations. + return DAG.getFrameIndexDbgValue(Variable, Expr, FISDN->getIndex(), dl, + DbgSDNodeOrder); + } + return DAG.getDbgValue(Variable, Expr, N.getNode(), N.getResNo(), false, dl, + DbgSDNodeOrder); +} + // VisualStudio defines setjmp as _setjmp #if defined(_MSC_VER) && defined(setjmp) && \ !defined(setjmp_undefined_for_msvc) @@ -4358,9 +4940,9 @@ # define setjmp_undefined_for_msvc #endif -/// visitIntrinsicCall - Lower the call to the specified intrinsic function. If -/// we want to emit this as a call to a named external function, return the name -/// otherwise lower it and return null. +/// Lower the call to the specified intrinsic function. If we want to emit this +/// as a call to a named external function, return the name. Otherwise, lower it +/// and return null. const char * SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -4381,6 +4963,10 @@ TLI.getPointerTy(DAG.getDataLayout()), getValue(I.getArgOperand(0)))); return nullptr; + case Intrinsic::addressofreturnaddress: + setValue(&I, DAG.getNode(ISD::ADDROFRETURNADDR, sdl, + TLI.getPointerTy(DAG.getDataLayout()))); + return nullptr; case Intrinsic::frameaddress: setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl, TLI.getPointerTy(DAG.getDataLayout()), @@ -4457,21 +5043,162 @@ updateDAGForMaybeTailCall(MM); return nullptr; } + case Intrinsic::memcpy_element_unordered_atomic: { + const ElementUnorderedAtomicMemCpyInst &MI = + cast<ElementUnorderedAtomicMemCpyInst>(I); + SDValue Dst = getValue(MI.getRawDest()); + SDValue Src = getValue(MI.getRawSource()); + SDValue Length = getValue(MI.getLength()); + + // Emit a library call. + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); + Entry.Node = Dst; + Args.push_back(Entry); + + Entry.Node = Src; + Args.push_back(Entry); + + Entry.Ty = MI.getLength()->getType(); + Entry.Node = Length; + Args.push_back(Entry); + + uint64_t ElementSizeConstant = MI.getElementSizeInBytes(); + RTLIB::Libcall LibraryCall = + RTLIB::getMEMCPY_ELEMENT_UNORDERED_ATOMIC(ElementSizeConstant); + if (LibraryCall == RTLIB::UNKNOWN_LIBCALL) + report_fatal_error("Unsupported element size"); + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(sdl).setChain(getRoot()).setLibCallee( + TLI.getLibcallCallingConv(LibraryCall), + Type::getVoidTy(*DAG.getContext()), + DAG.getExternalSymbol(TLI.getLibcallName(LibraryCall), + TLI.getPointerTy(DAG.getDataLayout())), + std::move(Args)); + + std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); + DAG.setRoot(CallResult.second); + return nullptr; + } + case Intrinsic::memmove_element_unordered_atomic: { + auto &MI = cast<ElementUnorderedAtomicMemMoveInst>(I); + SDValue Dst = getValue(MI.getRawDest()); + SDValue Src = getValue(MI.getRawSource()); + SDValue Length = getValue(MI.getLength()); + + // Emit a library call. + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); + Entry.Node = Dst; + Args.push_back(Entry); + + Entry.Node = Src; + Args.push_back(Entry); + + Entry.Ty = MI.getLength()->getType(); + Entry.Node = Length; + Args.push_back(Entry); + + uint64_t ElementSizeConstant = MI.getElementSizeInBytes(); + RTLIB::Libcall LibraryCall = + RTLIB::getMEMMOVE_ELEMENT_UNORDERED_ATOMIC(ElementSizeConstant); + if (LibraryCall == RTLIB::UNKNOWN_LIBCALL) + report_fatal_error("Unsupported element size"); + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(sdl).setChain(getRoot()).setLibCallee( + TLI.getLibcallCallingConv(LibraryCall), + Type::getVoidTy(*DAG.getContext()), + DAG.getExternalSymbol(TLI.getLibcallName(LibraryCall), + TLI.getPointerTy(DAG.getDataLayout())), + std::move(Args)); + + std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); + DAG.setRoot(CallResult.second); + return nullptr; + } + case Intrinsic::memset_element_unordered_atomic: { + auto &MI = cast<ElementUnorderedAtomicMemSetInst>(I); + SDValue Dst = getValue(MI.getRawDest()); + SDValue Val = getValue(MI.getValue()); + SDValue Length = getValue(MI.getLength()); + + // Emit a library call. + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); + Entry.Node = Dst; + Args.push_back(Entry); + + Entry.Ty = Type::getInt8Ty(*DAG.getContext()); + Entry.Node = Val; + Args.push_back(Entry); + + Entry.Ty = MI.getLength()->getType(); + Entry.Node = Length; + Args.push_back(Entry); + + uint64_t ElementSizeConstant = MI.getElementSizeInBytes(); + RTLIB::Libcall LibraryCall = + RTLIB::getMEMSET_ELEMENT_UNORDERED_ATOMIC(ElementSizeConstant); + if (LibraryCall == RTLIB::UNKNOWN_LIBCALL) + report_fatal_error("Unsupported element size"); + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(sdl).setChain(getRoot()).setLibCallee( + TLI.getLibcallCallingConv(LibraryCall), + Type::getVoidTy(*DAG.getContext()), + DAG.getExternalSymbol(TLI.getLibcallName(LibraryCall), + TLI.getPointerTy(DAG.getDataLayout())), + std::move(Args)); + + std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); + DAG.setRoot(CallResult.second); + return nullptr; + } + case Intrinsic::dbg_addr: case Intrinsic::dbg_declare: { - const DbgDeclareInst &DI = cast<DbgDeclareInst>(I); + const DbgInfoIntrinsic &DI = cast<DbgInfoIntrinsic>(I); DILocalVariable *Variable = DI.getVariable(); DIExpression *Expression = DI.getExpression(); - const Value *Address = DI.getAddress(); assert(Variable && "Missing variable"); - if (!Address) { + + // Check if address has undef value. + const Value *Address = DI.getVariableLocation(); + if (!Address || isa<UndefValue>(Address) || + (Address->use_empty() && !isa<Argument>(Address))) { DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); return nullptr; } - // Check if address has undef value. - if (isa<UndefValue>(Address) || - (Address->use_empty() && !isa<Argument>(Address))) { - DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); + bool isParameter = Variable->isParameter() || isa<Argument>(Address); + + // Check if this variable can be described by a frame index, typically + // either as a static alloca or a byval parameter. + int FI = std::numeric_limits<int>::max(); + if (const auto *AI = + dyn_cast<AllocaInst>(Address->stripInBoundsConstantOffsets())) { + if (AI->isStaticAlloca()) { + auto I = FuncInfo.StaticAllocaMap.find(AI); + if (I != FuncInfo.StaticAllocaMap.end()) + FI = I->second; + } + } else if (const auto *Arg = dyn_cast<Argument>( + Address->stripInBoundsConstantOffsets())) { + FI = FuncInfo.getArgumentFrameIndex(Arg); + } + + // llvm.dbg.addr is control dependent and always generates indirect + // DBG_VALUE instructions. llvm.dbg.declare is handled as a frame index in + // the MachineFunction variable table. + if (FI != std::numeric_limits<int>::max()) { + if (Intrinsic == Intrinsic::dbg_addr) + DAG.AddDbgValue(DAG.getFrameIndexDbgValue(Variable, Expression, FI, dl, + SDNodeOrder), + getRoot().getNode(), isParameter); return nullptr; } @@ -4484,42 +5211,26 @@ if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address)) Address = BCI->getOperand(0); // Parameters are handled specially. - bool isParameter = Variable->isParameter() || isa<Argument>(Address); auto FINode = dyn_cast<FrameIndexSDNode>(N.getNode()); if (isParameter && FINode) { // Byval parameter. We have a frame index at this point. SDV = DAG.getFrameIndexDbgValue(Variable, Expression, - FINode->getIndex(), 0, dl, SDNodeOrder); + FINode->getIndex(), dl, SDNodeOrder); } else if (isa<Argument>(Address)) { // Address is an argument, so try to emit its dbg value using // virtual register info from the FuncInfo.ValueMap. - EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, false, - N); + EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, true, N); return nullptr; } else { SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(), - true, 0, dl, SDNodeOrder); + true, dl, SDNodeOrder); } DAG.AddDbgValue(SDV, N.getNode(), isParameter); } else { // If Address is an argument then try to emit its dbg value using // virtual register info from the FuncInfo.ValueMap. - if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, false, + if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, true, N)) { - // If variable is pinned by a alloca in dominating bb then - // use StaticAllocaMap. - if (const AllocaInst *AI = dyn_cast<AllocaInst>(Address)) { - if (AI->getParent() != DI.getParent()) { - DenseMap<const AllocaInst*, int>::iterator SI = - FuncInfo.StaticAllocaMap.find(AI); - if (SI != FuncInfo.StaticAllocaMap.end()) { - SDV = DAG.getFrameIndexDbgValue(Variable, Expression, SI->second, - 0, dl, SDNodeOrder); - DAG.AddDbgValue(SDV, nullptr, false); - return nullptr; - } - } - } DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); } } @@ -4531,63 +5242,47 @@ DILocalVariable *Variable = DI.getVariable(); DIExpression *Expression = DI.getExpression(); - uint64_t Offset = DI.getOffset(); const Value *V = DI.getValue(); if (!V) return nullptr; SDDbgValue *SDV; if (isa<ConstantInt>(V) || isa<ConstantFP>(V) || isa<UndefValue>(V)) { - SDV = DAG.getConstantDbgValue(Variable, Expression, V, Offset, dl, - SDNodeOrder); + SDV = DAG.getConstantDbgValue(Variable, Expression, V, dl, SDNodeOrder); DAG.AddDbgValue(SDV, nullptr, false); - } else { - // Do not use getValue() in here; we don't want to generate code at - // this point if it hasn't been done yet. - SDValue N = NodeMap[V]; - if (!N.getNode() && isa<Argument>(V)) - // Check unused arguments map. - N = UnusedArgNodeMap[V]; - if (N.getNode()) { - if (!EmitFuncArgumentDbgValue(V, Variable, Expression, dl, Offset, - false, N)) { - SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(), - false, Offset, dl, SDNodeOrder); - DAG.AddDbgValue(SDV, N.getNode(), false); - } - } else if (!V->use_empty() ) { - // Do not call getValue(V) yet, as we don't want to generate code. - // Remember it for later. - DanglingDebugInfo DDI(&DI, dl, SDNodeOrder); - DanglingDebugInfoMap[V] = DDI; - } else { - // We may expand this to cover more cases. One case where we have no - // data available is an unreferenced parameter. - DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); - } - } - - // Build a debug info table entry. - if (const BitCastInst *BCI = dyn_cast<BitCastInst>(V)) - V = BCI->getOperand(0); - const AllocaInst *AI = dyn_cast<AllocaInst>(V); - // Don't handle byval struct arguments or VLAs, for example. - if (!AI) { - DEBUG(dbgs() << "Dropping debug location info for:\n " << DI << "\n"); - DEBUG(dbgs() << " Last seen at:\n " << *V << "\n"); return nullptr; } - DenseMap<const AllocaInst*, int>::iterator SI = - FuncInfo.StaticAllocaMap.find(AI); - if (SI == FuncInfo.StaticAllocaMap.end()) - return nullptr; // VLAs. + + // Do not use getValue() in here; we don't want to generate code at + // this point if it hasn't been done yet. + SDValue N = NodeMap[V]; + if (!N.getNode() && isa<Argument>(V)) // Check unused arguments map. + N = UnusedArgNodeMap[V]; + if (N.getNode()) { + if (EmitFuncArgumentDbgValue(V, Variable, Expression, dl, false, N)) + return nullptr; + SDV = getDbgValue(N, Variable, Expression, dl, SDNodeOrder); + DAG.AddDbgValue(SDV, N.getNode(), false); + return nullptr; + } + + if (!V->use_empty() ) { + // Do not call getValue(V) yet, as we don't want to generate code. + // Remember it for later. + DanglingDebugInfo DDI(&DI, dl, SDNodeOrder); + DanglingDebugInfoMap[V] = DDI; + return nullptr; + } + + DEBUG(dbgs() << "Dropping debug location info for:\n " << DI << "\n"); + DEBUG(dbgs() << " Last seen at:\n " << *V << "\n"); return nullptr; } case Intrinsic::eh_typeid_for: { // Find the type id for the given typeinfo. GlobalValue *GV = ExtractTypeInfo(I.getArgOperand(0)); - unsigned TypeID = DAG.getMachineFunction().getMMI().getTypeIDFor(GV); + unsigned TypeID = DAG.getMachineFunction().getTypeIDFor(GV); Res = DAG.getConstant(TypeID, sdl, MVT::i32); setValue(&I, Res); return nullptr; @@ -4595,7 +5290,7 @@ case Intrinsic::eh_return_i32: case Intrinsic::eh_return_i64: - DAG.getMachineFunction().getMMI().setCallsEHReturn(true); + DAG.getMachineFunction().setCallsEHReturn(true); DAG.setRoot(DAG.getNode(ISD::EH_RETURN, sdl, MVT::Other, getControlRoot(), @@ -4603,23 +5298,13 @@ getValue(I.getArgOperand(1)))); return nullptr; case Intrinsic::eh_unwind_init: - DAG.getMachineFunction().getMMI().setCallsUnwindInit(true); + DAG.getMachineFunction().setCallsUnwindInit(true); return nullptr; - case Intrinsic::eh_dwarf_cfa: { - SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getArgOperand(0)), sdl, - TLI.getPointerTy(DAG.getDataLayout())); - SDValue Offset = DAG.getNode(ISD::ADD, sdl, - CfaArg.getValueType(), - DAG.getNode(ISD::FRAME_TO_ARGS_OFFSET, sdl, - CfaArg.getValueType()), - CfaArg); - SDValue FA = DAG.getNode( - ISD::FRAMEADDR, sdl, TLI.getPointerTy(DAG.getDataLayout()), - DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()))); - setValue(&I, DAG.getNode(ISD::ADD, sdl, FA.getValueType(), - FA, Offset)); + case Intrinsic::eh_dwarf_cfa: + setValue(&I, DAG.getNode(ISD::EH_DWARF_CFA, sdl, + TLI.getPointerTy(DAG.getDataLayout()), + getValue(I.getArgOperand(0)))); return nullptr; - } case Intrinsic::eh_sjlj_callsite: { MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(0)); @@ -4631,11 +5316,11 @@ } case Intrinsic::eh_sjlj_functioncontext: { // Get and store the index of the function context. - MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); AllocaInst *FnCtx = cast<AllocaInst>(I.getArgOperand(0)->stripPointerCasts()); int FI = FuncInfo.StaticAllocaMap[FnCtx]; - MFI->setFunctionContextIndex(FI); + MFI.setFunctionContextIndex(FI); return nullptr; } case Intrinsic::eh_sjlj_setjmp: { @@ -4648,17 +5333,14 @@ DAG.setRoot(Op.getValue(1)); return nullptr; } - case Intrinsic::eh_sjlj_longjmp: { + case Intrinsic::eh_sjlj_longjmp: DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, sdl, MVT::Other, getRoot(), getValue(I.getArgOperand(0)))); return nullptr; - } - case Intrinsic::eh_sjlj_setup_dispatch: { + case Intrinsic::eh_sjlj_setup_dispatch: DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_SETUP_DISPATCH, sdl, MVT::Other, getRoot())); return nullptr; - } - case Intrinsic::masked_gather: visitMaskedGather(I); return nullptr; @@ -4671,6 +5353,12 @@ case Intrinsic::masked_store: visitMaskedStore(I); return nullptr; + case Intrinsic::masked_expandload: + visitMaskedLoad(I, true /* IsExpanding */); + return nullptr; + case Intrinsic::masked_compressstore: + visitMaskedStore(I, true /* IsCompressing */); + return nullptr; case Intrinsic::x86_mmx_pslli_w: case Intrinsic::x86_mmx_pslli_d: case Intrinsic::x86_mmx_pslli_q: @@ -4721,7 +5409,7 @@ SDValue ShOps[2]; ShOps[0] = ShAmt; ShOps[1] = DAG.getConstant(0, sdl, MVT::i32); - ShAmt = DAG.getNode(ISD::BUILD_VECTOR, sdl, ShAmtVT, ShOps); + ShAmt = DAG.getBuildVector(ShAmtVT, sdl, ShOps); EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); ShAmt = DAG.getNode(ISD::BITCAST, sdl, DestVT, ShAmt); Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, sdl, DestVT, @@ -4730,39 +5418,6 @@ setValue(&I, Res); return nullptr; } - case Intrinsic::convertff: - case Intrinsic::convertfsi: - case Intrinsic::convertfui: - case Intrinsic::convertsif: - case Intrinsic::convertuif: - case Intrinsic::convertss: - case Intrinsic::convertsu: - case Intrinsic::convertus: - case Intrinsic::convertuu: { - ISD::CvtCode Code = ISD::CVT_INVALID; - switch (Intrinsic) { - default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. - case Intrinsic::convertff: Code = ISD::CVT_FF; break; - case Intrinsic::convertfsi: Code = ISD::CVT_FS; break; - case Intrinsic::convertfui: Code = ISD::CVT_FU; break; - case Intrinsic::convertsif: Code = ISD::CVT_SF; break; - case Intrinsic::convertuif: Code = ISD::CVT_UF; break; - case Intrinsic::convertss: Code = ISD::CVT_SS; break; - case Intrinsic::convertsu: Code = ISD::CVT_SU; break; - case Intrinsic::convertus: Code = ISD::CVT_US; break; - case Intrinsic::convertuu: Code = ISD::CVT_UU; break; - } - EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); - const Value *Op1 = I.getArgOperand(0); - Res = DAG.getConvertRndSat(DestVT, sdl, getValue(Op1), - DAG.getValueType(DestVT), - DAG.getValueType(getValue(Op1).getValueType()), - getValue(I.getArgOperand(1)), - getValue(I.getArgOperand(2)), - Code); - setValue(&I, Res); - return nullptr; - } case Intrinsic::powi: setValue(&I, ExpandPowI(sdl, getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), DAG)); @@ -4795,7 +5450,8 @@ case Intrinsic::trunc: case Intrinsic::rint: case Intrinsic::nearbyint: - case Intrinsic::round: { + case Intrinsic::round: + case Intrinsic::canonicalize: { unsigned Opcode; switch (Intrinsic) { default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. @@ -4809,6 +5465,7 @@ case Intrinsic::rint: Opcode = ISD::FRINT; break; case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break; case Intrinsic::round: Opcode = ISD::FROUND; break; + case Intrinsic::canonicalize: Opcode = ISD::FCANONICALIZE; break; } setValue(&I, DAG.getNode(Opcode, sdl, @@ -4816,18 +5473,28 @@ getValue(I.getArgOperand(0)))); return nullptr; } - case Intrinsic::minnum: - setValue(&I, DAG.getNode(ISD::FMINNUM, sdl, - getValue(I.getArgOperand(0)).getValueType(), + case Intrinsic::minnum: { + auto VT = getValue(I.getArgOperand(0)).getValueType(); + unsigned Opc = + I.hasNoNaNs() && TLI.isOperationLegalOrCustom(ISD::FMINNAN, VT) + ? ISD::FMINNAN + : ISD::FMINNUM; + setValue(&I, DAG.getNode(Opc, sdl, VT, getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)))); return nullptr; - case Intrinsic::maxnum: - setValue(&I, DAG.getNode(ISD::FMAXNUM, sdl, - getValue(I.getArgOperand(0)).getValueType(), + } + case Intrinsic::maxnum: { + auto VT = getValue(I.getArgOperand(0)).getValueType(); + unsigned Opc = + I.hasNoNaNs() && TLI.isOperationLegalOrCustom(ISD::FMAXNAN, VT) + ? ISD::FMAXNAN + : ISD::FMAXNUM; + setValue(&I, DAG.getNode(Opc, sdl, VT, getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)))); return nullptr; + } case Intrinsic::copysign: setValue(&I, DAG.getNode(ISD::FCOPYSIGN, sdl, getValue(I.getArgOperand(0)).getValueType(), @@ -4841,6 +5508,26 @@ getValue(I.getArgOperand(1)), getValue(I.getArgOperand(2)))); return nullptr; + case Intrinsic::experimental_constrained_fadd: + case Intrinsic::experimental_constrained_fsub: + case Intrinsic::experimental_constrained_fmul: + case Intrinsic::experimental_constrained_fdiv: + case Intrinsic::experimental_constrained_frem: + case Intrinsic::experimental_constrained_fma: + case Intrinsic::experimental_constrained_sqrt: + case Intrinsic::experimental_constrained_pow: + case Intrinsic::experimental_constrained_powi: + case Intrinsic::experimental_constrained_sin: + case Intrinsic::experimental_constrained_cos: + case Intrinsic::experimental_constrained_exp: + case Intrinsic::experimental_constrained_exp2: + case Intrinsic::experimental_constrained_log: + case Intrinsic::experimental_constrained_log10: + case Intrinsic::experimental_constrained_log2: + case Intrinsic::experimental_constrained_rint: + case Intrinsic::experimental_constrained_nearbyint: + visitConstrainedFPIntrinsic(cast<ConstrainedFPIntrinsic>(I)); + return nullptr; case Intrinsic::fmuladd: { EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict && @@ -4931,11 +5618,10 @@ DAG.setRoot(Res.getValue(1)); return nullptr; } - case Intrinsic::stackrestore: { + case Intrinsic::stackrestore: Res = getValue(I.getArgOperand(0)); DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, sdl, MVT::Other, getRoot(), Res)); return nullptr; - } case Intrinsic::get_dynamic_area_offset: { SDValue Op = getRoot(); EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout()); @@ -4951,59 +5637,47 @@ setValue(&I, Res); return nullptr; } + case Intrinsic::stackguard: { + EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout()); + MachineFunction &MF = DAG.getMachineFunction(); + const Module &M = *MF.getFunction()->getParent(); + SDValue Chain = getRoot(); + if (TLI.useLoadStackGuardNode()) { + Res = getLoadStackGuard(DAG, sdl, Chain); + } else { + const Value *Global = TLI.getSDagStackGuard(M); + unsigned Align = DL->getPrefTypeAlignment(Global->getType()); + Res = DAG.getLoad(PtrTy, sdl, Chain, getValue(Global), + MachinePointerInfo(Global, 0), Align, + MachineMemOperand::MOVolatile); + } + DAG.setRoot(Chain); + setValue(&I, Res); + return nullptr; + } case Intrinsic::stackprotector: { // Emit code into the DAG to store the stack guard onto the stack. MachineFunction &MF = DAG.getMachineFunction(); - MachineFrameInfo *MFI = MF.getFrameInfo(); + MachineFrameInfo &MFI = MF.getFrameInfo(); EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout()); SDValue Src, Chain = getRoot(); - const Value *Ptr = cast<LoadInst>(I.getArgOperand(0))->getPointerOperand(); - const GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr); - - // See if Ptr is a bitcast. If it is, look through it and see if we can get - // global variable __stack_chk_guard. - if (!GV) - if (const Operator *BC = dyn_cast<Operator>(Ptr)) - if (BC->getOpcode() == Instruction::BitCast) - GV = dyn_cast<GlobalVariable>(BC->getOperand(0)); - - if (GV && TLI.useLoadStackGuardNode()) { - // Emit a LOAD_STACK_GUARD node. - MachineSDNode *Node = DAG.getMachineNode(TargetOpcode::LOAD_STACK_GUARD, - sdl, PtrTy, Chain); - MachinePointerInfo MPInfo(GV); - MachineInstr::mmo_iterator MemRefs = MF.allocateMemRefsArray(1); - unsigned Flags = MachineMemOperand::MOLoad | - MachineMemOperand::MOInvariant; - *MemRefs = MF.getMachineMemOperand(MPInfo, Flags, - PtrTy.getSizeInBits() / 8, - DAG.getEVTAlignment(PtrTy)); - Node->setMemRefs(MemRefs, MemRefs + 1); - - // Copy the guard value to a virtual register so that it can be - // retrieved in the epilogue. - Src = SDValue(Node, 0); - const TargetRegisterClass *RC = - TLI.getRegClassFor(Src.getSimpleValueType()); - unsigned Reg = MF.getRegInfo().createVirtualRegister(RC); - - SPDescriptor.setGuardReg(Reg); - Chain = DAG.getCopyToReg(Chain, sdl, Reg, Src); - } else { + + if (TLI.useLoadStackGuardNode()) + Src = getLoadStackGuard(DAG, sdl, Chain); + else Src = getValue(I.getArgOperand(0)); // The guard's value. - } AllocaInst *Slot = cast<AllocaInst>(I.getArgOperand(1)); int FI = FuncInfo.StaticAllocaMap[Slot]; - MFI->setStackProtectorIndex(FI); + MFI.setStackProtectorIndex(FI); SDValue FIN = DAG.getFrameIndex(FI, PtrTy); // Store the stack protector onto the stack. Res = DAG.getStore(Chain, sdl, Src, FIN, MachinePointerInfo::getFixedStack( DAG.getMachineFunction(), FI), - true, false, 0); + /* Alignment = */ 0, MachineMemOperand::MOVolatile); setValue(&I, Res); DAG.setRoot(Res); return nullptr; @@ -5027,6 +5701,7 @@ } case Intrinsic::annotation: case Intrinsic::ptr_annotation: + case Intrinsic::invariant_group_barrier: // Drop the intrinsic, but forward the value setValue(&I, getValue(I.getOperand(0))); return nullptr; @@ -5035,6 +5710,18 @@ // Discard annotate attributes and assumptions return nullptr; + case Intrinsic::codeview_annotation: { + // Emit a label associated with this metadata. + MachineFunction &MF = DAG.getMachineFunction(); + MCSymbol *Label = + MF.getMMI().getContext().createTempSymbol("annotation", true); + Metadata *MD = cast<MetadataAsValue>(I.getArgOperand(0))->getMetadata(); + MF.addCodeViewAnnotation(Label, cast<MDNode>(MD)); + Res = DAG.getLabelNode(ISD::ANNOTATION_LABEL, sdl, getRoot(), Label); + DAG.setRoot(Res); + return nullptr; + } + case Intrinsic::init_trampoline: { const Function *F = cast<Function>(I.getArgOperand(1)->stripPointerCasts()); @@ -5051,12 +5738,11 @@ DAG.setRoot(Res); return nullptr; } - case Intrinsic::adjust_trampoline: { + case Intrinsic::adjust_trampoline: setValue(&I, DAG.getNode(ISD::ADJUST_TRAMPOLINE, sdl, TLI.getPointerTy(DAG.getDataLayout()), getValue(I.getArgOperand(0)))); return nullptr; - } case Intrinsic::gcroot: { MachineFunction &MF = DAG.getMachineFunction(); const Function *F = MF.getFunction(); @@ -5078,17 +5764,16 @@ setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, sdl, MVT::i32)); return nullptr; - case Intrinsic::expect: { + case Intrinsic::expect: // Just replace __builtin_expect(exp, c) with EXP. setValue(&I, getValue(I.getArgOperand(0))); return nullptr; - } case Intrinsic::debugtrap: case Intrinsic::trap: { StringRef TrapFuncName = I.getAttributes() - .getAttribute(AttributeSet::FunctionIndex, "trap-func-name") + .getAttribute(AttributeList::FunctionIndex, "trap-func-name") .getValueAsString(); if (TrapFuncName.empty()) { ISD::NodeType Op = (Intrinsic == Intrinsic::trap) ? @@ -5099,11 +5784,11 @@ TargetLowering::ArgListTy Args; TargetLowering::CallLoweringInfo CLI(DAG); - CLI.setDebugLoc(sdl).setChain(getRoot()).setCallee( + CLI.setDebugLoc(sdl).setChain(getRoot()).setLibCallee( CallingConv::C, I.getType(), DAG.getExternalSymbol(TrapFuncName.data(), TLI.getPointerTy(DAG.getDataLayout())), - std::move(Args), 0); + std::move(Args)); std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI); DAG.setRoot(Result.second); @@ -5180,7 +5865,7 @@ SDValue Ops[2]; Ops[0] = getRoot(); Ops[1] = - DAG.getFrameIndex(FI, TLI.getPointerTy(DAG.getDataLayout()), true); + DAG.getFrameIndex(FI, TLI.getFrameIndexTy(DAG.getDataLayout()), true); unsigned Opcode = (IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END); Res = DAG.getNode(Opcode, sdl, MVT::Other, Ops); @@ -5195,44 +5880,27 @@ case Intrinsic::invariant_end: // Discard region information. return nullptr; - case Intrinsic::stackprotectorcheck: { - // Do not actually emit anything for this basic block. Instead we initialize - // the stack protector descriptor and export the guard variable so we can - // access it in FinishBasicBlock. - const BasicBlock *BB = I.getParent(); - SPDescriptor.initialize(BB, FuncInfo.MBBMap[BB], I); - ExportFromCurrentBlock(SPDescriptor.getGuard()); - - // Flush our exports since we are going to process a terminator. - (void)getControlRoot(); - return nullptr; - } case Intrinsic::clear_cache: return TLI.getClearCacheBuiltinName(); case Intrinsic::donothing: // ignore return nullptr; - case Intrinsic::experimental_stackmap: { + case Intrinsic::experimental_stackmap: visitStackmap(I); return nullptr; - } case Intrinsic::experimental_patchpoint_void: - case Intrinsic::experimental_patchpoint_i64: { + case Intrinsic::experimental_patchpoint_i64: visitPatchpoint(&I); return nullptr; - } - case Intrinsic::experimental_gc_statepoint: { - visitStatepoint(I); + case Intrinsic::experimental_gc_statepoint: + LowerStatepoint(ImmutableStatepoint(&I)); return nullptr; - } - case Intrinsic::experimental_gc_result: { - visitGCResult(I); + case Intrinsic::experimental_gc_result: + visitGCResult(cast<GCResultInst>(I)); return nullptr; - } - case Intrinsic::experimental_gc_relocate: { + case Intrinsic::experimental_gc_relocate: visitGCRelocate(cast<GCRelocateInst>(I)); return nullptr; - } case Intrinsic::instrprof_increment: llvm_unreachable("instrprof failed to lower an increment"); case Intrinsic::instrprof_value_profile: @@ -5253,7 +5921,7 @@ int FI = FuncInfo.StaticAllocaMap[Slot]; MCSymbol *FrameAllocSym = MF.getMMI().getContext().getOrCreateFrameAllocSymbol( - GlobalValue::getRealLinkageName(MF.getName()), Idx); + GlobalValue::dropLLVMManglingEscape(MF.getName()), Idx); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, dl, TII->get(TargetOpcode::LOCAL_ESCAPE)) .addSym(FrameAllocSym) @@ -5271,10 +5939,11 @@ // Get the symbol that defines the frame offset. auto *Fn = cast<Function>(I.getArgOperand(0)->stripPointerCasts()); auto *Idx = cast<ConstantInt>(I.getArgOperand(2)); - unsigned IdxVal = unsigned(Idx->getLimitedValue(INT_MAX)); + unsigned IdxVal = + unsigned(Idx->getLimitedValue(std::numeric_limits<int>::max())); MCSymbol *FrameAllocSym = MF.getMMI().getContext().getOrCreateFrameAllocSymbol( - GlobalValue::getRealLinkageName(Fn->getName()), IdxVal); + GlobalValue::dropLLVMManglingEscape(Fn->getName()), IdxVal); // Create a MCSymbol for the label to avoid any target lowering // that would make this PC relative. @@ -5305,13 +5974,153 @@ setValue(&I, N); return nullptr; } - } + case Intrinsic::xray_customevent: { + // Here we want to make sure that the intrinsic behaves as if it has a + // specific calling convention, and only for x86_64. + // FIXME: Support other platforms later. + const auto &Triple = DAG.getTarget().getTargetTriple(); + if (Triple.getArch() != Triple::x86_64 || !Triple.isOSLinux()) + return nullptr; + + SDLoc DL = getCurSDLoc(); + SmallVector<SDValue, 8> Ops; + + // We want to say that we always want the arguments in registers. + SDValue LogEntryVal = getValue(I.getArgOperand(0)); + SDValue StrSizeVal = getValue(I.getArgOperand(1)); + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue Chain = getRoot(); + Ops.push_back(LogEntryVal); + Ops.push_back(StrSizeVal); + Ops.push_back(Chain); + + // We need to enforce the calling convention for the callsite, so that + // argument ordering is enforced correctly, and that register allocation can + // see that some registers may be assumed clobbered and have to preserve + // them across calls to the intrinsic. + MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::PATCHABLE_EVENT_CALL, + DL, NodeTys, Ops); + SDValue patchableNode = SDValue(MN, 0); + DAG.setRoot(patchableNode); + setValue(&I, patchableNode); + return nullptr; + } + case Intrinsic::experimental_deoptimize: + LowerDeoptimizeCall(&I); + return nullptr; + + case Intrinsic::experimental_vector_reduce_fadd: + case Intrinsic::experimental_vector_reduce_fmul: + case Intrinsic::experimental_vector_reduce_add: + case Intrinsic::experimental_vector_reduce_mul: + case Intrinsic::experimental_vector_reduce_and: + case Intrinsic::experimental_vector_reduce_or: + case Intrinsic::experimental_vector_reduce_xor: + case Intrinsic::experimental_vector_reduce_smax: + case Intrinsic::experimental_vector_reduce_smin: + case Intrinsic::experimental_vector_reduce_umax: + case Intrinsic::experimental_vector_reduce_umin: + case Intrinsic::experimental_vector_reduce_fmax: + case Intrinsic::experimental_vector_reduce_fmin: + visitVectorReduce(I, Intrinsic); + return nullptr; + } +} + +void SelectionDAGBuilder::visitConstrainedFPIntrinsic( + const ConstrainedFPIntrinsic &FPI) { + SDLoc sdl = getCurSDLoc(); + unsigned Opcode; + switch (FPI.getIntrinsicID()) { + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. + case Intrinsic::experimental_constrained_fadd: + Opcode = ISD::STRICT_FADD; + break; + case Intrinsic::experimental_constrained_fsub: + Opcode = ISD::STRICT_FSUB; + break; + case Intrinsic::experimental_constrained_fmul: + Opcode = ISD::STRICT_FMUL; + break; + case Intrinsic::experimental_constrained_fdiv: + Opcode = ISD::STRICT_FDIV; + break; + case Intrinsic::experimental_constrained_frem: + Opcode = ISD::STRICT_FREM; + break; + case Intrinsic::experimental_constrained_fma: + Opcode = ISD::STRICT_FMA; + break; + case Intrinsic::experimental_constrained_sqrt: + Opcode = ISD::STRICT_FSQRT; + break; + case Intrinsic::experimental_constrained_pow: + Opcode = ISD::STRICT_FPOW; + break; + case Intrinsic::experimental_constrained_powi: + Opcode = ISD::STRICT_FPOWI; + break; + case Intrinsic::experimental_constrained_sin: + Opcode = ISD::STRICT_FSIN; + break; + case Intrinsic::experimental_constrained_cos: + Opcode = ISD::STRICT_FCOS; + break; + case Intrinsic::experimental_constrained_exp: + Opcode = ISD::STRICT_FEXP; + break; + case Intrinsic::experimental_constrained_exp2: + Opcode = ISD::STRICT_FEXP2; + break; + case Intrinsic::experimental_constrained_log: + Opcode = ISD::STRICT_FLOG; + break; + case Intrinsic::experimental_constrained_log10: + Opcode = ISD::STRICT_FLOG10; + break; + case Intrinsic::experimental_constrained_log2: + Opcode = ISD::STRICT_FLOG2; + break; + case Intrinsic::experimental_constrained_rint: + Opcode = ISD::STRICT_FRINT; + break; + case Intrinsic::experimental_constrained_nearbyint: + Opcode = ISD::STRICT_FNEARBYINT; + break; + } + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDValue Chain = getRoot(); + SmallVector<EVT, 4> ValueVTs; + ComputeValueVTs(TLI, DAG.getDataLayout(), FPI.getType(), ValueVTs); + ValueVTs.push_back(MVT::Other); // Out chain + + SDVTList VTs = DAG.getVTList(ValueVTs); + SDValue Result; + if (FPI.isUnaryOp()) + Result = DAG.getNode(Opcode, sdl, VTs, + { Chain, getValue(FPI.getArgOperand(0)) }); + else if (FPI.isTernaryOp()) + Result = DAG.getNode(Opcode, sdl, VTs, + { Chain, getValue(FPI.getArgOperand(0)), + getValue(FPI.getArgOperand(1)), + getValue(FPI.getArgOperand(2)) }); + else + Result = DAG.getNode(Opcode, sdl, VTs, + { Chain, getValue(FPI.getArgOperand(0)), + getValue(FPI.getArgOperand(1)) }); + + assert(Result.getNode()->getNumValues() == 2); + SDValue OutChain = Result.getValue(1); + DAG.setRoot(OutChain); + SDValue FPResult = Result.getValue(0); + setValue(&FPI, FPResult); } std::pair<SDValue, SDValue> SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, const BasicBlock *EHPadBB) { - MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); + MachineFunction &MF = DAG.getMachineFunction(); + MachineModuleInfo &MMI = MF.getMMI(); MCSymbol *BeginLabel = nullptr; if (EHPadBB) { @@ -5323,7 +6132,7 @@ // so as to maintain the ordering of pads in the LSDA. unsigned CallSiteIndex = MMI.getCurrentCallSite(); if (CallSiteIndex) { - MMI.setCallSiteBeginLabel(BeginLabel, CallSiteIndex); + MF.setCallSiteBeginLabel(BeginLabel, CallSiteIndex); LPadToCallSiteMap[FuncInfo.MBBMap[EHPadBB]].push_back(CallSiteIndex); // Now that the call site is handled, stop tracking it. @@ -5364,13 +6173,13 @@ DAG.setRoot(DAG.getEHLabel(getCurSDLoc(), getRoot(), EndLabel)); // Inform MachineModuleInfo of range. - if (MMI.hasEHFunclets()) { + if (MF.hasEHFunclets()) { assert(CLI.CS); WinEHFuncInfo *EHInfo = DAG.getMachineFunction().getWinEHFuncInfo(); - EHInfo->addIPToStateRange(cast<InvokeInst>(CLI.CS->getInstruction()), + EHInfo->addIPToStateRange(cast<InvokeInst>(CLI.CS.getInstruction()), BeginLabel, EndLabel); } else { - MMI.addInvoke(FuncInfo.MBBMap[EHPadBB], BeginLabel, EndLabel); + MF.addInvoke(FuncInfo.MBBMap[EHPadBB], BeginLabel, EndLabel); } } @@ -5380,15 +6189,27 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool isTailCall, const BasicBlock *EHPadBB) { + auto &DL = DAG.getDataLayout(); FunctionType *FTy = CS.getFunctionType(); Type *RetTy = CS.getType(); TargetLowering::ArgListTy Args; - TargetLowering::ArgListEntry Entry; Args.reserve(CS.arg_size()); + const Value *SwiftErrorVal = nullptr; + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + // We can't tail call inside a function with a swifterror argument. Lowering + // does not support this yet. It would have to move into the swifterror + // register before the call. + auto *Caller = CS.getInstruction()->getParent()->getParent(); + if (TLI.supportSwiftError() && + Caller->getAttributes().hasAttrSomewhere(Attribute::SwiftError)) + isTailCall = false; + for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i) { + TargetLowering::ArgListEntry Entry; const Value *V = *i; // Skip empty types @@ -5398,86 +6219,80 @@ SDValue ArgNode = getValue(V); Entry.Node = ArgNode; Entry.Ty = V->getType(); - // Skip the first return-type Attribute to get to params. - Entry.setAttributes(&CS, i - CS.arg_begin() + 1); + Entry.setAttributes(&CS, i - CS.arg_begin()); + + // Use swifterror virtual register as input to the call. + if (Entry.IsSwiftError && TLI.supportSwiftError()) { + SwiftErrorVal = V; + // We find the virtual register for the actual swifterror argument. + // Instead of using the Value, we use the virtual register instead. + Entry.Node = DAG.getRegister(FuncInfo + .getOrCreateSwiftErrorVRegUseAt( + CS.getInstruction(), FuncInfo.MBB, V) + .first, + EVT(TLI.getPointerTy(DL))); + } + Args.push_back(Entry); // If we have an explicit sret argument that is an Instruction, (i.e., it // might point to function-local memory), we can't meaningfully tail-call. - if (Entry.isSRet && isa<Instruction>(V)) + if (Entry.IsSRet && isa<Instruction>(V)) isTailCall = false; } // Check if target-independent constraints permit a tail call here. // Target-dependent constraints are checked within TLI->LowerCallTo. - if (isTailCall && !isInTailCallPosition(CS, DAG.getTarget())) { -#ifndef noCbC - // errs() << CS.getCaller()->getName() + " : Tail call elimination was failed on goto "; - // errs() << "warning: not unreachable instruction in a tail call.\n "; -#endif + if (isTailCall && !isInTailCallPosition(CS, DAG.getTarget())) isTailCall = false; - } + + // Disable tail calls if there is an swifterror argument. Targets have not + // been updated to support tail calls. + if (TLI.supportSwiftError() && SwiftErrorVal) + isTailCall = false; TargetLowering::CallLoweringInfo CLI(DAG); - CLI.setDebugLoc(getCurSDLoc()).setChain(getRoot()) - .setCallee(RetTy, FTy, Callee, std::move(Args), CS) - .setTailCall(isTailCall); -#ifndef noCbC - // variable arguments check. - if (CLI.RetTy->is__CodeTy() && CS.getCaller()->getReturnType()->is__CodeTy() && CLI.IsVarArg) { - CLI.CallConv = CallingConv::C; - errs().changeColor(raw_ostream::MAGENTA, true); - if (CS.getCalledFunction()) {// if this codesegment call is a direct access; ex) goto codesegment(); - errs() << "warning: "; - errs().resetColor(); - errs() << CS.getCaller()->getName() + " : Tail call elimination was failed on goto" - + CS.getCalledFunction()->getName() + ". Write a exactly prototype declaration.\n"; - } - else if (CS.getCalledValue()->getType()->isPointerTy()) {// if it is a pointer access; ex) goto codesegmentPointer; - errs() << "warning: "; - errs().resetColor(); - errs() << CS.getCaller()->getName() + " : Tail call elimination was failed on pointer accessed goto. Write a exactly prototype declaration.\n"; - } - } - // if code segment's tail call flag was changed false , we report it on error. - if (CLI.RetTy->is__CodeTy() && CS.getCaller()->getReturnType()->is__CodeTy() && !isTailCall && !CLI.IsVarArg) { - if (CS.getCalledFunction()) // if this codesegment call is a direct access; ex) goto codesegment(); - DAG.getContext()->emitError(CS.getInstruction(), CS.getCaller()->getName() + " : Tail call elimination was failed on goto " - + CS.getCalledFunction()->getName() + " !"); - else if (CS.getCalledValue()->getType()->isPointerTy()) // if it is a pointer access; ex) goto codesegmentPointer; - DAG.getContext()->emitError(CS.getInstruction(), CS.getCaller()->getName() + - " : Tail call elimination was failed on codesegment which is accessed by pointer!"); // we can't get name from Type... - } -#endif + CLI.setDebugLoc(getCurSDLoc()) + .setChain(getRoot()) + .setCallee(RetTy, FTy, Callee, std::move(Args), CS) + .setTailCall(isTailCall) + .setConvergent(CS.isConvergent()); std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB); - if (Result.first.getNode()) - setValue(CS.getInstruction(), Result.first); -} - -/// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the -/// value is equal or not-equal to zero. -static bool IsOnlyUsedInZeroEqualityComparison(const Value *V) { - for (const User *U : V->users()) { - if (const ICmpInst *IC = dyn_cast<ICmpInst>(U)) - if (IC->isEquality()) - if (const Constant *C = dyn_cast<Constant>(IC->getOperand(1))) - if (C->isNullValue()) - continue; - // Unknown instruction. - return false; - } - return true; + if (Result.first.getNode()) { + const Instruction *Inst = CS.getInstruction(); + Result.first = lowerRangeToAssertZExt(DAG, *Inst, Result.first); + setValue(Inst, Result.first); + } + + // The last element of CLI.InVals has the SDValue for swifterror return. + // Here we copy it to a virtual register and update SwiftErrorMap for + // book-keeping. + if (SwiftErrorVal && TLI.supportSwiftError()) { + // Get the last element of InVals. + SDValue Src = CLI.InVals.back(); + unsigned VReg; bool CreatedVReg; + std::tie(VReg, CreatedVReg) = + FuncInfo.getOrCreateSwiftErrorVRegDefAt(CS.getInstruction()); + SDValue CopyNode = CLI.DAG.getCopyToReg(Result.second, CLI.DL, VReg, Src); + // We update the virtual register for the actual swifterror argument. + if (CreatedVReg) + FuncInfo.setCurrentSwiftErrorVReg(FuncInfo.MBB, SwiftErrorVal, VReg); + DAG.setRoot(CopyNode); + } } static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, - Type *LoadTy, SelectionDAGBuilder &Builder) { - // Check to see if this load can be trivially constant folded, e.g. if the // input is from a string literal. if (const Constant *LoadInput = dyn_cast<Constant>(PtrVal)) { // Cast pointer to the type we really want to load. + Type *LoadTy = + Type::getIntNTy(PtrVal->getContext(), LoadVT.getScalarSizeInBits()); + if (LoadVT.isVector()) + LoadTy = VectorType::get(LoadTy, LoadVT.getVectorNumElements()); + LoadInput = ConstantExpr::getBitCast(const_cast<Constant *>(LoadInput), PointerType::getUnqual(LoadTy)); @@ -5492,7 +6307,7 @@ bool ConstantMemory = false; // Do not serialize (non-volatile) loads of constant memory with anything. - if (Builder.AA->pointsToConstantMemory(PtrVal)) { + if (Builder.AA && Builder.AA->pointsToConstantMemory(PtrVal)) { Root = Builder.DAG.getEntryNode(); ConstantMemory = true; } else { @@ -5503,17 +6318,15 @@ SDValue Ptr = Builder.getValue(PtrVal); SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurSDLoc(), Root, Ptr, MachinePointerInfo(PtrVal), - false /*volatile*/, - false /*nontemporal*/, - false /*isinvariant*/, 1 /* align=1 */); + /* Alignment = */ 1); if (!ConstantMemory) Builder.PendingLoads.push_back(LoadVal.getValue(1)); return LoadVal; } -/// processIntegerCallValue - Record the value for an instruction that -/// produces an integer result, converting the type where necessary. +/// Record the value for an instruction that produces an integer result, +/// converting the type where necessary. void SelectionDAGBuilder::processIntegerCallValue(const Instruction &I, SDValue Value, bool IsSigned) { @@ -5526,20 +6339,13 @@ setValue(&I, Value); } -/// visitMemCmpCall - See if we can lower a call to memcmp in an optimized form. -/// If so, return true and lower it, otherwise return false and it will be -/// lowered like a normal call. +/// See if we can lower a memcmp call into an optimized form. If so, return +/// true and lower it. Otherwise return false, and it will be lowered like a +/// normal call. +/// The caller already checked that \p I calls the appropriate LibFunc with a +/// correct prototype. bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { - // Verify that the prototype makes sense. int memcmp(void*,void*,size_t) - if (I.getNumArgOperands() != 3) - return false; - const Value *LHS = I.getArgOperand(0), *RHS = I.getArgOperand(1); - if (!LHS->getType()->isPointerTy() || !RHS->getType()->isPointerTy() || - !I.getArgOperand(2)->getType()->isIntegerTy() || - !I.getType()->isIntegerTy()) - return false; - const Value *Size = I.getArgOperand(2); const ConstantInt *CSize = dyn_cast<ConstantInt>(Size); if (CSize && CSize->getZExtValue() == 0) { @@ -5549,12 +6355,10 @@ return true; } - const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); - std::pair<SDValue, SDValue> Res = - TSI.EmitTargetCodeForMemcmp(DAG, getCurSDLoc(), DAG.getRoot(), - getValue(LHS), getValue(RHS), getValue(Size), - MachinePointerInfo(LHS), - MachinePointerInfo(RHS)); + const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); + std::pair<SDValue, SDValue> Res = TSI.EmitTargetCodeForMemcmp( + DAG, getCurSDLoc(), DAG.getRoot(), getValue(LHS), getValue(RHS), + getValue(Size), MachinePointerInfo(LHS), MachinePointerInfo(RHS)); if (Res.first.getNode()) { processIntegerCallValue(I, Res.first, true); PendingLoads.push_back(Res.second); @@ -5563,90 +6367,81 @@ // memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS) != 0 // memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS) != 0 - if (CSize && IsOnlyUsedInZeroEqualityComparison(&I)) { - bool ActuallyDoIt = true; - MVT LoadVT; - Type *LoadTy; - switch (CSize->getZExtValue()) { - default: - LoadVT = MVT::Other; - LoadTy = nullptr; - ActuallyDoIt = false; - break; - case 2: - LoadVT = MVT::i16; - LoadTy = Type::getInt16Ty(CSize->getContext()); - break; - case 4: - LoadVT = MVT::i32; - LoadTy = Type::getInt32Ty(CSize->getContext()); - break; - case 8: - LoadVT = MVT::i64; - LoadTy = Type::getInt64Ty(CSize->getContext()); - break; - /* - case 16: - LoadVT = MVT::v4i32; - LoadTy = Type::getInt32Ty(CSize->getContext()); - LoadTy = VectorType::get(LoadTy, 4); - break; - */ - } - - // This turns into unaligned loads. We only do this if the target natively - // supports the MVT we'll be loading or if it is small enough (<= 4) that - // we'll only produce a small number of byte loads. - - // Require that we can find a legal MVT, and only do this if the target - // supports unaligned loads of that type. Expanding into byte loads would - // bloat the code. + if (!CSize || !isOnlyUsedInZeroEqualityComparison(&I)) + return false; + + // If the target has a fast compare for the given size, it will return a + // preferred load type for that size. Require that the load VT is legal and + // that the target supports unaligned loads of that type. Otherwise, return + // INVALID. + auto hasFastLoadsAndCompare = [&](unsigned NumBits) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (ActuallyDoIt && CSize->getZExtValue() > 4) { - unsigned DstAS = LHS->getType()->getPointerAddressSpace(); - unsigned SrcAS = RHS->getType()->getPointerAddressSpace(); + MVT LVT = TLI.hasFastEqualityCompare(NumBits); + if (LVT != MVT::INVALID_SIMPLE_VALUE_TYPE) { // TODO: Handle 5 byte compare as 4-byte + 1 byte. // TODO: Handle 8 byte compare on x86-32 as two 32-bit loads. // TODO: Check alignment of src and dest ptrs. - if (!TLI.isTypeLegal(LoadVT) || - !TLI.allowsMisalignedMemoryAccesses(LoadVT, SrcAS) || - !TLI.allowsMisalignedMemoryAccesses(LoadVT, DstAS)) - ActuallyDoIt = false; - } - - if (ActuallyDoIt) { - SDValue LHSVal = getMemCmpLoad(LHS, LoadVT, LoadTy, *this); - SDValue RHSVal = getMemCmpLoad(RHS, LoadVT, LoadTy, *this); - - SDValue Res = DAG.getSetCC(getCurSDLoc(), MVT::i1, LHSVal, RHSVal, - ISD::SETNE); - processIntegerCallValue(I, Res, false); - return true; - } - } - - - return false; -} - -/// visitMemChrCall -- See if we can lower a memchr call into an optimized -/// form. If so, return true and lower it, otherwise return false and it -/// will be lowered like a normal call. + unsigned DstAS = LHS->getType()->getPointerAddressSpace(); + unsigned SrcAS = RHS->getType()->getPointerAddressSpace(); + if (!TLI.isTypeLegal(LVT) || + !TLI.allowsMisalignedMemoryAccesses(LVT, SrcAS) || + !TLI.allowsMisalignedMemoryAccesses(LVT, DstAS)) + LVT = MVT::INVALID_SIMPLE_VALUE_TYPE; + } + + return LVT; + }; + + // This turns into unaligned loads. We only do this if the target natively + // supports the MVT we'll be loading or if it is small enough (<= 4) that + // we'll only produce a small number of byte loads. + MVT LoadVT; + unsigned NumBitsToCompare = CSize->getZExtValue() * 8; + switch (NumBitsToCompare) { + default: + return false; + case 16: + LoadVT = MVT::i16; + break; + case 32: + LoadVT = MVT::i32; + break; + case 64: + case 128: + case 256: + LoadVT = hasFastLoadsAndCompare(NumBitsToCompare); + break; + } + + if (LoadVT == MVT::INVALID_SIMPLE_VALUE_TYPE) + return false; + + SDValue LoadL = getMemCmpLoad(LHS, LoadVT, *this); + SDValue LoadR = getMemCmpLoad(RHS, LoadVT, *this); + + // Bitcast to a wide integer type if the loads are vectors. + if (LoadVT.isVector()) { + EVT CmpVT = EVT::getIntegerVT(LHS->getContext(), LoadVT.getSizeInBits()); + LoadL = DAG.getBitcast(CmpVT, LoadL); + LoadR = DAG.getBitcast(CmpVT, LoadR); + } + + SDValue Cmp = DAG.getSetCC(getCurSDLoc(), MVT::i1, LoadL, LoadR, ISD::SETNE); + processIntegerCallValue(I, Cmp, false); + return true; +} + +/// See if we can lower a memchr call into an optimized form. If so, return +/// true and lower it. Otherwise return false, and it will be lowered like a +/// normal call. +/// The caller already checked that \p I calls the appropriate LibFunc with a +/// correct prototype. bool SelectionDAGBuilder::visitMemChrCall(const CallInst &I) { - // Verify that the prototype makes sense. void *memchr(void *, int, size_t) - if (I.getNumArgOperands() != 3) - return false; - const Value *Src = I.getArgOperand(0); const Value *Char = I.getArgOperand(1); const Value *Length = I.getArgOperand(2); - if (!Src->getType()->isPointerTy() || - !Char->getType()->isIntegerTy() || - !Length->getType()->isIntegerTy() || - !I.getType()->isPointerTy()) - return false; - - const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); + + const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); std::pair<SDValue, SDValue> Res = TSI.EmitTargetCodeForMemchr(DAG, getCurSDLoc(), DAG.getRoot(), getValue(Src), getValue(Char), getValue(Length), @@ -5660,21 +6455,55 @@ return false; } -/// visitStrCpyCall -- See if we can lower a strcpy or stpcpy call into an -/// optimized form. If so, return true and lower it, otherwise return false -/// and it will be lowered like a normal call. +/// See if we can lower a mempcpy call into an optimized form. If so, return +/// true and lower it. Otherwise return false, and it will be lowered like a +/// normal call. +/// The caller already checked that \p I calls the appropriate LibFunc with a +/// correct prototype. +bool SelectionDAGBuilder::visitMemPCpyCall(const CallInst &I) { + SDValue Dst = getValue(I.getArgOperand(0)); + SDValue Src = getValue(I.getArgOperand(1)); + SDValue Size = getValue(I.getArgOperand(2)); + + unsigned DstAlign = DAG.InferPtrAlignment(Dst); + unsigned SrcAlign = DAG.InferPtrAlignment(Src); + unsigned Align = std::min(DstAlign, SrcAlign); + if (Align == 0) // Alignment of one or both could not be inferred. + Align = 1; // 0 and 1 both specify no alignment, but 0 is reserved. + + bool isVol = false; + SDLoc sdl = getCurSDLoc(); + + // In the mempcpy context we need to pass in a false value for isTailCall + // because the return pointer needs to be adjusted by the size of + // the copied memory. + SDValue MC = DAG.getMemcpy(getRoot(), sdl, Dst, Src, Size, Align, isVol, + false, /*isTailCall=*/false, + MachinePointerInfo(I.getArgOperand(0)), + MachinePointerInfo(I.getArgOperand(1))); + assert(MC.getNode() != nullptr && + "** memcpy should not be lowered as TailCall in mempcpy context **"); + DAG.setRoot(MC); + + // Check if Size needs to be truncated or extended. + Size = DAG.getSExtOrTrunc(Size, sdl, Dst.getValueType()); + + // Adjust return pointer to point just past the last dst byte. + SDValue DstPlusSize = DAG.getNode(ISD::ADD, sdl, Dst.getValueType(), + Dst, Size); + setValue(&I, DstPlusSize); + return true; +} + +/// See if we can lower a strcpy call into an optimized form. If so, return +/// true and lower it, otherwise return false and it will be lowered like a +/// normal call. +/// The caller already checked that \p I calls the appropriate LibFunc with a +/// correct prototype. bool SelectionDAGBuilder::visitStrCpyCall(const CallInst &I, bool isStpcpy) { - // Verify that the prototype makes sense. char *strcpy(char *, char *) - if (I.getNumArgOperands() != 2) - return false; - const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1); - if (!Arg0->getType()->isPointerTy() || - !Arg1->getType()->isPointerTy() || - !I.getType()->isPointerTy()) - return false; - - const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); + + const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); std::pair<SDValue, SDValue> Res = TSI.EmitTargetCodeForStrcpy(DAG, getCurSDLoc(), getRoot(), getValue(Arg0), getValue(Arg1), @@ -5689,21 +6518,15 @@ return false; } -/// visitStrCmpCall - See if we can lower a call to strcmp in an optimized form. -/// If so, return true and lower it, otherwise return false and it will be -/// lowered like a normal call. +/// See if we can lower a strcmp call into an optimized form. If so, return +/// true and lower it, otherwise return false and it will be lowered like a +/// normal call. +/// The caller already checked that \p I calls the appropriate LibFunc with a +/// correct prototype. bool SelectionDAGBuilder::visitStrCmpCall(const CallInst &I) { - // Verify that the prototype makes sense. int strcmp(void*,void*) - if (I.getNumArgOperands() != 2) - return false; - const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1); - if (!Arg0->getType()->isPointerTy() || - !Arg1->getType()->isPointerTy() || - !I.getType()->isIntegerTy()) - return false; - - const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); + + const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); std::pair<SDValue, SDValue> Res = TSI.EmitTargetCodeForStrcmp(DAG, getCurSDLoc(), DAG.getRoot(), getValue(Arg0), getValue(Arg1), @@ -5718,19 +6541,15 @@ return false; } -/// visitStrLenCall -- See if we can lower a strlen call into an optimized -/// form. If so, return true and lower it, otherwise return false and it -/// will be lowered like a normal call. +/// See if we can lower a strlen call into an optimized form. If so, return +/// true and lower it, otherwise return false and it will be lowered like a +/// normal call. +/// The caller already checked that \p I calls the appropriate LibFunc with a +/// correct prototype. bool SelectionDAGBuilder::visitStrLenCall(const CallInst &I) { - // Verify that the prototype makes sense. size_t strlen(char *) - if (I.getNumArgOperands() != 1) - return false; - const Value *Arg0 = I.getArgOperand(0); - if (!Arg0->getType()->isPointerTy() || !I.getType()->isIntegerTy()) - return false; - - const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); + + const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); std::pair<SDValue, SDValue> Res = TSI.EmitTargetCodeForStrlen(DAG, getCurSDLoc(), DAG.getRoot(), getValue(Arg0), MachinePointerInfo(Arg0)); @@ -5743,21 +6562,15 @@ return false; } -/// visitStrNLenCall -- See if we can lower a strnlen call into an optimized -/// form. If so, return true and lower it, otherwise return false and it -/// will be lowered like a normal call. +/// See if we can lower a strnlen call into an optimized form. If so, return +/// true and lower it, otherwise return false and it will be lowered like a +/// normal call. +/// The caller already checked that \p I calls the appropriate LibFunc with a +/// correct prototype. bool SelectionDAGBuilder::visitStrNLenCall(const CallInst &I) { - // Verify that the prototype makes sense. size_t strnlen(char *, size_t) - if (I.getNumArgOperands() != 2) - return false; - const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1); - if (!Arg0->getType()->isPointerTy() || - !Arg1->getType()->isIntegerTy() || - !I.getType()->isIntegerTy()) - return false; - - const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); + + const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); std::pair<SDValue, SDValue> Res = TSI.EmitTargetCodeForStrnlen(DAG, getCurSDLoc(), DAG.getRoot(), getValue(Arg0), getValue(Arg1), @@ -5771,16 +6584,15 @@ return false; } -/// visitUnaryFloatCall - If a call instruction is a unary floating-point -/// operation (as expected), translate it to an SDNode with the specified opcode -/// and return true. +/// See if we can lower a unary floating-point operation into an SDNode with +/// the specified Opcode. If so, return true and lower it, otherwise return +/// false and it will be lowered like a normal call. +/// The caller already checked that \p I calls the appropriate LibFunc with a +/// correct prototype. bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I, unsigned Opcode) { - // Sanity check that it really is a unary floating-point call. - if (I.getNumArgOperands() != 1 || - !I.getArgOperand(0)->getType()->isFloatingPointTy() || - I.getType() != I.getArgOperand(0)->getType() || - !I.onlyReadsMemory()) + // We already checked this call's prototype; verify it doesn't modify errno. + if (!I.onlyReadsMemory()) return false; SDValue Tmp = getValue(I.getArgOperand(0)); @@ -5788,17 +6600,15 @@ return true; } -/// visitBinaryFloatCall - If a call instruction is a binary floating-point -/// operation (as expected), translate it to an SDNode with the specified opcode -/// and return true. +/// See if we can lower a binary floating-point operation into an SDNode with +/// the specified Opcode. If so, return true and lower it. Otherwise return +/// false, and it will be lowered like a normal call. +/// The caller already checked that \p I calls the appropriate LibFunc with a +/// correct prototype. bool SelectionDAGBuilder::visitBinaryFloatCall(const CallInst &I, unsigned Opcode) { - // Sanity check that it really is a binary floating-point call. - if (I.getNumArgOperands() != 2 || - !I.getArgOperand(0)->getType()->isFloatingPointTy() || - I.getType() != I.getArgOperand(0)->getType() || - I.getType() != I.getArgOperand(1)->getType() || - !I.onlyReadsMemory()) + // We already checked this call's prototype; verify it doesn't modify errno. + if (!I.onlyReadsMemory()) return false; SDValue Tmp0 = getValue(I.getArgOperand(0)); @@ -5816,7 +6626,7 @@ } MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); - ComputeUsesVAFloatArgument(I, &MMI); + computeUsesVAFloatArgument(I, MMI); const char *RenameFn = nullptr; if (Function *F = I.getCalledFunction()) { @@ -5836,21 +6646,20 @@ } // Check for well-known libc/libm calls. If the function is internal, it - // can't be a library call. - LibFunc::Func Func; - if (!F->hasLocalLinkage() && F->hasName() && - LibInfo->getLibFunc(F->getName(), Func) && + // can't be a library call. Don't do the check if marked as nobuiltin for + // some reason or the call site requires strict floating point semantics. + LibFunc Func; + if (!I.isNoBuiltin() && !I.isStrictFP() && !F->hasLocalLinkage() && + F->hasName() && LibInfo->getLibFunc(*F, Func) && LibInfo->hasOptimizedCodeGen(Func)) { switch (Func) { default: break; - case LibFunc::copysign: - case LibFunc::copysignf: - case LibFunc::copysignl: - if (I.getNumArgOperands() == 2 && // Basic sanity checks. - I.getArgOperand(0)->getType()->isFloatingPointTy() && - I.getType() == I.getArgOperand(0)->getType() && - I.getType() == I.getArgOperand(1)->getType() && - I.onlyReadsMemory()) { + case LibFunc_copysign: + case LibFunc_copysignf: + case LibFunc_copysignl: + // We already checked this call's prototype; verify it doesn't modify + // errno. + if (I.onlyReadsMemory()) { SDValue LHS = getValue(I.getArgOperand(0)); SDValue RHS = getValue(I.getArgOperand(1)); setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurSDLoc(), @@ -5858,118 +6667,122 @@ return; } break; - case LibFunc::fabs: - case LibFunc::fabsf: - case LibFunc::fabsl: + case LibFunc_fabs: + case LibFunc_fabsf: + case LibFunc_fabsl: if (visitUnaryFloatCall(I, ISD::FABS)) return; break; - case LibFunc::fmin: - case LibFunc::fminf: - case LibFunc::fminl: + case LibFunc_fmin: + case LibFunc_fminf: + case LibFunc_fminl: if (visitBinaryFloatCall(I, ISD::FMINNUM)) return; break; - case LibFunc::fmax: - case LibFunc::fmaxf: - case LibFunc::fmaxl: + case LibFunc_fmax: + case LibFunc_fmaxf: + case LibFunc_fmaxl: if (visitBinaryFloatCall(I, ISD::FMAXNUM)) return; break; - case LibFunc::sin: - case LibFunc::sinf: - case LibFunc::sinl: + case LibFunc_sin: + case LibFunc_sinf: + case LibFunc_sinl: if (visitUnaryFloatCall(I, ISD::FSIN)) return; break; - case LibFunc::cos: - case LibFunc::cosf: - case LibFunc::cosl: + case LibFunc_cos: + case LibFunc_cosf: + case LibFunc_cosl: if (visitUnaryFloatCall(I, ISD::FCOS)) return; break; - case LibFunc::sqrt: - case LibFunc::sqrtf: - case LibFunc::sqrtl: - case LibFunc::sqrt_finite: - case LibFunc::sqrtf_finite: - case LibFunc::sqrtl_finite: + case LibFunc_sqrt: + case LibFunc_sqrtf: + case LibFunc_sqrtl: + case LibFunc_sqrt_finite: + case LibFunc_sqrtf_finite: + case LibFunc_sqrtl_finite: if (visitUnaryFloatCall(I, ISD::FSQRT)) return; break; - case LibFunc::floor: - case LibFunc::floorf: - case LibFunc::floorl: + case LibFunc_floor: + case LibFunc_floorf: + case LibFunc_floorl: if (visitUnaryFloatCall(I, ISD::FFLOOR)) return; break; - case LibFunc::nearbyint: - case LibFunc::nearbyintf: - case LibFunc::nearbyintl: + case LibFunc_nearbyint: + case LibFunc_nearbyintf: + case LibFunc_nearbyintl: if (visitUnaryFloatCall(I, ISD::FNEARBYINT)) return; break; - case LibFunc::ceil: - case LibFunc::ceilf: - case LibFunc::ceill: + case LibFunc_ceil: + case LibFunc_ceilf: + case LibFunc_ceill: if (visitUnaryFloatCall(I, ISD::FCEIL)) return; break; - case LibFunc::rint: - case LibFunc::rintf: - case LibFunc::rintl: + case LibFunc_rint: + case LibFunc_rintf: + case LibFunc_rintl: if (visitUnaryFloatCall(I, ISD::FRINT)) return; break; - case LibFunc::round: - case LibFunc::roundf: - case LibFunc::roundl: + case LibFunc_round: + case LibFunc_roundf: + case LibFunc_roundl: if (visitUnaryFloatCall(I, ISD::FROUND)) return; break; - case LibFunc::trunc: - case LibFunc::truncf: - case LibFunc::truncl: + case LibFunc_trunc: + case LibFunc_truncf: + case LibFunc_truncl: if (visitUnaryFloatCall(I, ISD::FTRUNC)) return; break; - case LibFunc::log2: - case LibFunc::log2f: - case LibFunc::log2l: + case LibFunc_log2: + case LibFunc_log2f: + case LibFunc_log2l: if (visitUnaryFloatCall(I, ISD::FLOG2)) return; break; - case LibFunc::exp2: - case LibFunc::exp2f: - case LibFunc::exp2l: + case LibFunc_exp2: + case LibFunc_exp2f: + case LibFunc_exp2l: if (visitUnaryFloatCall(I, ISD::FEXP2)) return; break; - case LibFunc::memcmp: + case LibFunc_memcmp: if (visitMemCmpCall(I)) return; break; - case LibFunc::memchr: + case LibFunc_mempcpy: + if (visitMemPCpyCall(I)) + return; + break; + case LibFunc_memchr: if (visitMemChrCall(I)) return; break; - case LibFunc::strcpy: + case LibFunc_strcpy: if (visitStrCpyCall(I, false)) return; break; - case LibFunc::stpcpy: + case LibFunc_stpcpy: if (visitStrCpyCall(I, true)) return; break; - case LibFunc::strcmp: + case LibFunc_strcmp: if (visitStrCmpCall(I)) return; break; - case LibFunc::strlen: + case LibFunc_strlen: if (visitStrLenCall(I)) return; break; - case LibFunc::strnlen: + case LibFunc_strnlen: if (visitStrNLenCall(I)) return; break; @@ -5985,9 +6798,19 @@ RenameFn, DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout())); - // Check if we can potentially perform a tail call. More detailed checking is - // be done within LowerCallTo, after more information about the call is known. - LowerCallTo(&I, Callee, I.isTailCall()); + // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't + // have to do anything here to lower funclet bundles. + assert(!I.hasOperandBundlesOtherThan( + {LLVMContext::OB_deopt, LLVMContext::OB_funclet}) && + "Cannot lower calls with arbitrary operand bundles!"); + + if (I.countOperandBundlesOfType(LLVMContext::OB_deopt)) + LowerCallSiteWithDeoptBundle(&I, Callee, nullptr); + else + // Check if we can potentially perform a tail call. More detailed checking + // is be done within LowerCallTo, after more information about the call is + // known. + LowerCallTo(&I, Callee, I.isTailCall()); } namespace { @@ -6006,7 +6829,20 @@ RegsForValue AssignedRegs; explicit SDISelAsmOperandInfo(const TargetLowering::AsmOperandInfo &info) - : TargetLowering::AsmOperandInfo(info), CallOperand(nullptr,0) { + : TargetLowering::AsmOperandInfo(info), CallOperand(nullptr, 0) { + } + + /// Whether or not this operand accesses memory + bool hasMemory(const TargetLowering &TLI) const { + // Indirect operand accesses access memory. + if (isIndirect) + return true; + + for (const auto &Code : Codes) + if (TLI.getConstraintType(Code) == TargetLowering::C_Memory) + return true; + + return false; } /// getCallOperandValEVT - Return the EVT of the Value* that this operand @@ -6025,7 +6861,7 @@ // If this is an indirect operand, the operand is a pointer to the // accessed type. if (isIndirect) { - llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy); + PointerType *PtrTy = dyn_cast<PointerType>(OpTy); if (!PtrTy) report_fatal_error("Indirect operand for inline asm not a pointer!"); OpTy = PtrTy->getElementType(); @@ -6057,10 +6893,79 @@ } }; -typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector; +using SDISelAsmOperandInfoVector = SmallVector<SDISelAsmOperandInfo, 16>; } // end anonymous namespace +/// Make sure that the output operand \p OpInfo and its corresponding input +/// operand \p MatchingOpInfo have compatible constraint types (otherwise error +/// out). +static void patchMatchingInput(const SDISelAsmOperandInfo &OpInfo, + SDISelAsmOperandInfo &MatchingOpInfo, + SelectionDAG &DAG) { + if (OpInfo.ConstraintVT == MatchingOpInfo.ConstraintVT) + return; + + const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo(); + const auto &TLI = DAG.getTargetLoweringInfo(); + + std::pair<unsigned, const TargetRegisterClass *> MatchRC = + TLI.getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode, + OpInfo.ConstraintVT); + std::pair<unsigned, const TargetRegisterClass *> InputRC = + TLI.getRegForInlineAsmConstraint(TRI, MatchingOpInfo.ConstraintCode, + MatchingOpInfo.ConstraintVT); + if ((OpInfo.ConstraintVT.isInteger() != + MatchingOpInfo.ConstraintVT.isInteger()) || + (MatchRC.second != InputRC.second)) { + // FIXME: error out in a more elegant fashion + report_fatal_error("Unsupported asm: input constraint" + " with a matching output constraint of" + " incompatible type!"); + } + MatchingOpInfo.ConstraintVT = OpInfo.ConstraintVT; +} + +/// Get a direct memory input to behave well as an indirect operand. +/// This may introduce stores, hence the need for a \p Chain. +/// \return The (possibly updated) chain. +static SDValue getAddressForMemoryInput(SDValue Chain, const SDLoc &Location, + SDISelAsmOperandInfo &OpInfo, + SelectionDAG &DAG) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + // If we don't have an indirect input, put it in the constpool if we can, + // otherwise spill it to a stack slot. + // TODO: This isn't quite right. We need to handle these according to + // the addressing mode that the constraint wants. Also, this may take + // an additional register for the computation and we don't want that + // either. + + // If the operand is a float, integer, or vector constant, spill to a + // constant pool entry to get its address. + const Value *OpVal = OpInfo.CallOperandVal; + if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) || + isa<ConstantVector>(OpVal) || isa<ConstantDataVector>(OpVal)) { + OpInfo.CallOperand = DAG.getConstantPool( + cast<Constant>(OpVal), TLI.getPointerTy(DAG.getDataLayout())); + return Chain; + } + + // Otherwise, create a stack slot and emit a store to it before the asm. + Type *Ty = OpVal->getType(); + auto &DL = DAG.getDataLayout(); + uint64_t TySize = DL.getTypeAllocSize(Ty); + unsigned Align = DL.getPrefTypeAlignment(Ty); + MachineFunction &MF = DAG.getMachineFunction(); + int SSFI = MF.getFrameInfo().CreateStackObject(TySize, Align, false); + SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getFrameIndexTy(DL)); + Chain = DAG.getStore(Chain, Location, OpInfo.CallOperand, StackSlot, + MachinePointerInfo::getFixedStack(MF, SSFI)); + OpInfo.CallOperand = StackSlot; + + return Chain; +} + /// GetRegistersForValue - Assign registers (virtual or physical) for the /// specified operand. We prefer to assign virtual registers, to allow the /// register allocator to handle the assignment process. However, if the asm @@ -6068,21 +6973,19 @@ /// allocation. This produces generally horrible, but correct, code. /// /// OpInfo describes the operand. -/// -static void GetRegistersForValue(SelectionDAG &DAG, - const TargetLowering &TLI, - SDLoc DL, +static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI, + const SDLoc &DL, SDISelAsmOperandInfo &OpInfo) { LLVMContext &Context = *DAG.getContext(); MachineFunction &MF = DAG.getMachineFunction(); SmallVector<unsigned, 4> Regs; + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); // If this is a constraint for a single physreg, or a constraint for a // register class, find it. std::pair<unsigned, const TargetRegisterClass *> PhysReg = - TLI.getRegForInlineAsmConstraint(MF.getSubtarget().getRegisterInfo(), - OpInfo.ConstraintCode, + TLI.getRegForInlineAsmConstraint(&TRI, OpInfo.ConstraintCode, OpInfo.ConstraintVT); unsigned NumRegs = 1; @@ -6090,12 +6993,12 @@ // If this is a FP input in an integer register (or visa versa) insert a bit // cast of the input value. More generally, handle any case where the input // value disagrees with the register class we plan to stick this in. - if (OpInfo.Type == InlineAsm::isInput && - PhysReg.second && !PhysReg.second->hasType(OpInfo.ConstraintVT)) { + if (OpInfo.Type == InlineAsm::isInput && PhysReg.second && + !TRI.isTypeLegalForClass(*PhysReg.second, OpInfo.ConstraintVT)) { // Try to convert to the first EVT that the reg class contains. If the // types are identical size, use a bitcast to convert (e.g. two differing // vector types). - MVT RegVT = *PhysReg.second->vt_begin(); + MVT RegVT = *TRI.legalclasstypes_begin(*PhysReg.second); if (RegVT.getSizeInBits() == OpInfo.CallOperand.getValueSizeInBits()) { OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL, RegVT, OpInfo.CallOperand); @@ -6123,12 +7026,12 @@ if (unsigned AssignedReg = PhysReg.first) { const TargetRegisterClass *RC = PhysReg.second; if (OpInfo.ConstraintVT == MVT::Other) - ValueVT = *RC->vt_begin(); + ValueVT = *TRI.legalclasstypes_begin(*RC); // Get the actual register value type. This is important, because the user // may have asked for (e.g.) the AX register in i32 type. We need to // remember that AX is actually i16 to get the right extension. - RegVT = *RC->vt_begin(); + RegVT = *TRI.legalclasstypes_begin(*RC); // This is a explicit reference to a physical register. Regs.push_back(AssignedReg); @@ -6154,7 +7057,7 @@ // Otherwise, if this was a reference to an LLVM register class, create vregs // for this reference. if (const TargetRegisterClass *RC = PhysReg.second) { - RegVT = *RC->vt_begin(); + RegVT = *TRI.legalclasstypes_begin(*RC); if (OpInfo.ConstraintVT == MVT::Other) ValueVT = RegVT; @@ -6170,8 +7073,78 @@ // Otherwise, we couldn't allocate enough registers for this. } +static unsigned +findMatchingInlineAsmOperand(unsigned OperandNo, + const std::vector<SDValue> &AsmNodeOperands) { + // Scan until we find the definition we already emitted of this operand. + unsigned CurOp = InlineAsm::Op_FirstOperand; + for (; OperandNo; --OperandNo) { + // Advance to the next operand. + unsigned OpFlag = + cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue(); + assert((InlineAsm::isRegDefKind(OpFlag) || + InlineAsm::isRegDefEarlyClobberKind(OpFlag) || + InlineAsm::isMemKind(OpFlag)) && + "Skipped past definitions?"); + CurOp += InlineAsm::getNumOperandRegisters(OpFlag) + 1; + } + return CurOp; +} + +/// Fill \p Regs with \p NumRegs new virtual registers of type \p RegVT +/// \return true if it has succeeded, false otherwise +static bool createVirtualRegs(SmallVector<unsigned, 4> &Regs, unsigned NumRegs, + MVT RegVT, SelectionDAG &DAG) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo(); + for (unsigned i = 0, e = NumRegs; i != e; ++i) { + if (const TargetRegisterClass *RC = TLI.getRegClassFor(RegVT)) + Regs.push_back(RegInfo.createVirtualRegister(RC)); + else + return false; + } + return true; +} + +namespace { + +class ExtraFlags { + unsigned Flags = 0; + +public: + explicit ExtraFlags(ImmutableCallSite CS) { + const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue()); + if (IA->hasSideEffects()) + Flags |= InlineAsm::Extra_HasSideEffects; + if (IA->isAlignStack()) + Flags |= InlineAsm::Extra_IsAlignStack; + if (CS.isConvergent()) + Flags |= InlineAsm::Extra_IsConvergent; + Flags |= IA->getDialect() * InlineAsm::Extra_AsmDialect; + } + + void update(const TargetLowering::AsmOperandInfo &OpInfo) { + // Ideally, we would only check against memory constraints. However, the + // meaning of an Other constraint can be target-specific and we can't easily + // reason about it. Therefore, be conservative and set MayLoad/MayStore + // for Other constraints as well. + if (OpInfo.ConstraintType == TargetLowering::C_Memory || + OpInfo.ConstraintType == TargetLowering::C_Other) { + if (OpInfo.Type == InlineAsm::isInput) + Flags |= InlineAsm::Extra_MayLoad; + else if (OpInfo.Type == InlineAsm::isOutput) + Flags |= InlineAsm::Extra_MayStore; + else if (OpInfo.Type == InlineAsm::isClobber) + Flags |= (InlineAsm::Extra_MayLoad | InlineAsm::Extra_MayStore); + } + } + + unsigned get() const { return Flags; } +}; + +} // end anonymous namespace + /// visitInlineAsm - Handle a call to an InlineAsm object. -/// void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue()); @@ -6184,6 +7157,9 @@ bool hasMemory = false; + // Remember the HasSideEffect, AlignStack, AsmDialect, MayLoad and MayStore + ExtraFlags ExtraInfo(CS); + unsigned ArgNo = 0; // ArgNo - The argument of the CallInst. unsigned ResNo = 0; // ResNo - The result number of the next output. for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { @@ -6193,14 +7169,25 @@ MVT OpVT = MVT::Other; // Compute the value type for each operand. - switch (OpInfo.Type) { - case InlineAsm::isOutput: - // Indirect outputs just consume an argument. - if (OpInfo.isIndirect) { - OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++)); - break; + if (OpInfo.Type == InlineAsm::isInput || + (OpInfo.Type == InlineAsm::isOutput && OpInfo.isIndirect)) { + OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++)); + + // Process the call argument. BasicBlocks are labels, currently appearing + // only in asm's. + if (const BasicBlock *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) { + OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]); + } else { + OpInfo.CallOperand = getValue(OpInfo.CallOperandVal); } + OpVT = + OpInfo + .getCallOperandValEVT(*DAG.getContext(), TLI, DAG.getDataLayout()) + .getSimpleVT(); + } + + if (OpInfo.Type == InlineAsm::isOutput && !OpInfo.isIndirect) { // The return value of the call is this value. As such, there is no // corresponding argument. assert(!CS.getType()->isVoidTy() && "Bad inline asm!"); @@ -6212,43 +7199,21 @@ OpVT = TLI.getSimpleValueType(DAG.getDataLayout(), CS.getType()); } ++ResNo; - break; - case InlineAsm::isInput: - OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++)); - break; - case InlineAsm::isClobber: - // Nothing to do. - break; - } - - // If this is an input or an indirect output, process the call argument. - // BasicBlocks are labels, currently appearing only in asm's. - if (OpInfo.CallOperandVal) { - if (const BasicBlock *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) { - OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]); - } else { - OpInfo.CallOperand = getValue(OpInfo.CallOperandVal); - } - - OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI, - DAG.getDataLayout()).getSimpleVT(); } OpInfo.ConstraintVT = OpVT; - // Indirect operand accesses access memory. - if (OpInfo.isIndirect) - hasMemory = true; - else { - for (unsigned j = 0, ee = OpInfo.Codes.size(); j != ee; ++j) { - TargetLowering::ConstraintType - CType = TLI.getConstraintType(OpInfo.Codes[j]); - if (CType == TargetLowering::C_Memory) { - hasMemory = true; - break; - } - } - } + if (!hasMemory) + hasMemory = OpInfo.hasMemory(TLI); + + // Determine if this InlineAsm MayLoad or MayStore based on the constraints. + // FIXME: Could we compute this on OpInfo rather than TargetConstraints[i]? + auto TargetConstraint = TargetConstraints[i]; + + // Compute the constraint code and ConstraintType to use. + TLI.ComputeConstraintToUse(TargetConstraint, SDValue()); + + ExtraInfo.update(TargetConstraint); } SDValue Chain, Flag; @@ -6271,24 +7236,7 @@ // error. if (OpInfo.hasMatchingInput()) { SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput]; - - if (OpInfo.ConstraintVT != Input.ConstraintVT) { - const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo(); - std::pair<unsigned, const TargetRegisterClass *> MatchRC = - TLI.getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode, - OpInfo.ConstraintVT); - std::pair<unsigned, const TargetRegisterClass *> InputRC = - TLI.getRegForInlineAsmConstraint(TRI, Input.ConstraintCode, - Input.ConstraintVT); - if ((OpInfo.ConstraintVT.isInteger() != - Input.ConstraintVT.isInteger()) || - (MatchRC.second != InputRC.second)) { - report_fatal_error("Unsupported asm: input constraint" - " with a matching output constraint of" - " incompatible type!"); - } - Input.ConstraintVT = OpInfo.ConstraintVT; - } + patchMatchingInput(OpInfo, Input, DAG); } // Compute the constraint code and ConstraintType to use. @@ -6306,38 +7254,8 @@ (OpInfo.Type == InlineAsm::isInput)) && "Can only indirectify direct input operands!"); - // Memory operands really want the address of the value. If we don't have - // an indirect input, put it in the constpool if we can, otherwise spill - // it to a stack slot. - // TODO: This isn't quite right. We need to handle these according to - // the addressing mode that the constraint wants. Also, this may take - // an additional register for the computation and we don't want that - // either. - - // If the operand is a float, integer, or vector constant, spill to a - // constant pool entry to get its address. - const Value *OpVal = OpInfo.CallOperandVal; - if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) || - isa<ConstantVector>(OpVal) || isa<ConstantDataVector>(OpVal)) { - OpInfo.CallOperand = DAG.getConstantPool( - cast<Constant>(OpVal), TLI.getPointerTy(DAG.getDataLayout())); - } else { - // Otherwise, create a stack slot and emit a store to it before the - // asm. - Type *Ty = OpVal->getType(); - auto &DL = DAG.getDataLayout(); - uint64_t TySize = DL.getTypeAllocSize(Ty); - unsigned Align = DL.getPrefTypeAlignment(Ty); - MachineFunction &MF = DAG.getMachineFunction(); - int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); - SDValue StackSlot = - DAG.getFrameIndex(SSFI, TLI.getPointerTy(DAG.getDataLayout())); - Chain = DAG.getStore( - Chain, getCurSDLoc(), OpInfo.CallOperand, StackSlot, - MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI), - false, false, 0); - OpInfo.CallOperand = StackSlot; - } + // Memory operands really want the address of the value. + Chain = getAddressForMemoryInput(Chain, getCurSDLoc(), OpInfo, DAG); // There is no longer a Value* corresponding to this operand. OpInfo.CallOperandVal = nullptr; @@ -6352,7 +7270,7 @@ GetRegistersForValue(DAG, TLI, getCurSDLoc(), OpInfo); } - // Second pass - Loop over all of the operands, assigning virtual or physregs + // Third pass - Loop over all of the operands, assigning virtual or physregs // to register class operands. for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) { SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i]; @@ -6377,51 +7295,21 @@ // Remember the HasSideEffect, AlignStack, AsmDialect, MayLoad and MayStore // bits as operand 3. - unsigned ExtraInfo = 0; - if (IA->hasSideEffects()) - ExtraInfo |= InlineAsm::Extra_HasSideEffects; - if (IA->isAlignStack()) - ExtraInfo |= InlineAsm::Extra_IsAlignStack; - // Set the asm dialect. - ExtraInfo |= IA->getDialect() * InlineAsm::Extra_AsmDialect; - - // Determine if this InlineAsm MayLoad or MayStore based on the constraints. - for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { - TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i]; - - // Compute the constraint code and ConstraintType to use. - TLI.ComputeConstraintToUse(OpInfo, SDValue()); - - // Ideally, we would only check against memory constraints. However, the - // meaning of an other constraint can be target-specific and we can't easily - // reason about it. Therefore, be conservative and set MayLoad/MayStore - // for other constriants as well. - if (OpInfo.ConstraintType == TargetLowering::C_Memory || - OpInfo.ConstraintType == TargetLowering::C_Other) { - if (OpInfo.Type == InlineAsm::isInput) - ExtraInfo |= InlineAsm::Extra_MayLoad; - else if (OpInfo.Type == InlineAsm::isOutput) - ExtraInfo |= InlineAsm::Extra_MayStore; - else if (OpInfo.Type == InlineAsm::isClobber) - ExtraInfo |= (InlineAsm::Extra_MayLoad | InlineAsm::Extra_MayStore); - } - } - AsmNodeOperands.push_back(DAG.getTargetConstant( - ExtraInfo, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout()))); + ExtraInfo.get(), getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout()))); // Loop over all of the inputs, copying the operand values into the // appropriate registers and processing the output regs. RegsForValue RetValRegs; // IndirectStoresToEmit - The set of stores to emit after the inline asm node. - std::vector<std::pair<RegsForValue, Value*> > IndirectStoresToEmit; + std::vector<std::pair<RegsForValue, Value *>> IndirectStoresToEmit; for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) { SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i]; switch (OpInfo.Type) { - case InlineAsm::isOutput: { + case InlineAsm::isOutput: if (OpInfo.ConstraintType != TargetLowering::C_RegisterClass && OpInfo.ConstraintType != TargetLowering::C_Register) { // Memory output, or 'other' output (e.g. 'X' constraint). @@ -6446,10 +7334,9 @@ // Copy the output from the appropriate register. Find a register that // we can use. if (OpInfo.AssignedRegs.Regs.empty()) { - LLVMContext &Ctx = *DAG.getContext(); - Ctx.emitError(CS.getInstruction(), - "couldn't allocate output register for constraint '" + - Twine(OpInfo.ConstraintCode) + "'"); + emitInlineAsmError( + CS, "couldn't allocate output register for constraint '" + + Twine(OpInfo.ConstraintCode) + "'"); return; } @@ -6473,28 +7360,15 @@ : InlineAsm::Kind_RegDef, false, 0, getCurSDLoc(), DAG, AsmNodeOperands); break; - } + case InlineAsm::isInput: { SDValue InOperandVal = OpInfo.CallOperand; - if (OpInfo.isMatchingInputConstraint()) { // Matching constraint? + if (OpInfo.isMatchingInputConstraint()) { // If this is required to match an output register we have already set, // just use its register. - unsigned OperandNo = OpInfo.getMatchedOperand(); - - // Scan until we find the definition we already emitted of this operand. - // When we find it, create a RegsForValue operand. - unsigned CurOp = InlineAsm::Op_FirstOperand; - for (; OperandNo; --OperandNo) { - // Advance to the next operand. - unsigned OpFlag = - cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue(); - assert((InlineAsm::isRegDefKind(OpFlag) || - InlineAsm::isRegDefEarlyClobberKind(OpFlag) || - InlineAsm::isMemKind(OpFlag)) && "Skipped past definitions?"); - CurOp += InlineAsm::getNumOperandRegisters(OpFlag)+1; - } - + auto CurOp = findMatchingInlineAsmOperand(OpInfo.getMatchedOperand(), + AsmNodeOperands); unsigned OpFlag = cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue(); if (InlineAsm::isRegDefKind(OpFlag) || @@ -6502,34 +7376,29 @@ // Add (OpFlag&0xffff)>>3 registers to MatchedRegs. if (OpInfo.isIndirect) { // This happens on gcc/testsuite/gcc.dg/pr8788-1.c - LLVMContext &Ctx = *DAG.getContext(); - Ctx.emitError(CS.getInstruction(), "inline asm not supported yet:" - " don't know how to handle tied " - "indirect register inputs"); + emitInlineAsmError(CS, "inline asm not supported yet:" + " don't know how to handle tied " + "indirect register inputs"); return; } - RegsForValue MatchedRegs; - MatchedRegs.ValueVTs.push_back(InOperandVal.getValueType()); MVT RegVT = AsmNodeOperands[CurOp+1].getSimpleValueType(); - MatchedRegs.RegVTs.push_back(RegVT); - MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo(); - for (unsigned i = 0, e = InlineAsm::getNumOperandRegisters(OpFlag); - i != e; ++i) { - if (const TargetRegisterClass *RC = TLI.getRegClassFor(RegVT)) - MatchedRegs.Regs.push_back(RegInfo.createVirtualRegister(RC)); - else { - LLVMContext &Ctx = *DAG.getContext(); - Ctx.emitError(CS.getInstruction(), - "inline asm error: This value" - " type register class is not natively supported!"); - return; - } + SmallVector<unsigned, 4> Regs; + + if (!createVirtualRegs(Regs, + InlineAsm::getNumOperandRegisters(OpFlag), + RegVT, DAG)) { + emitInlineAsmError(CS, "inline asm error: This value type register " + "class is not natively supported!"); + return; } + + RegsForValue MatchedRegs(Regs, RegVT, InOperandVal.getValueType()); + SDLoc dl = getCurSDLoc(); // Use the produced MatchedRegs object to - MatchedRegs.getCopyToRegs(InOperandVal, DAG, dl, - Chain, &Flag, CS.getInstruction()); + MatchedRegs.getCopyToRegs(InOperandVal, DAG, dl, Chain, &Flag, + CS.getInstruction()); MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, true, OpInfo.getMatchedOperand(), dl, DAG, AsmNodeOperands); @@ -6560,10 +7429,8 @@ TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode, Ops, DAG); if (Ops.empty()) { - LLVMContext &Ctx = *DAG.getContext(); - Ctx.emitError(CS.getInstruction(), - "invalid operand for inline asm constraint '" + - Twine(OpInfo.ConstraintCode) + "'"); + emitInlineAsmError(CS, "invalid operand for inline asm constraint '" + + Twine(OpInfo.ConstraintCode) + "'"); return; } @@ -6603,20 +7470,17 @@ // TODO: Support this. if (OpInfo.isIndirect) { - LLVMContext &Ctx = *DAG.getContext(); - Ctx.emitError(CS.getInstruction(), - "Don't know how to handle indirect register inputs yet " - "for constraint '" + - Twine(OpInfo.ConstraintCode) + "'"); + emitInlineAsmError( + CS, "Don't know how to handle indirect register inputs yet " + "for constraint '" + + Twine(OpInfo.ConstraintCode) + "'"); return; } // Copy the input into the appropriate registers. if (OpInfo.AssignedRegs.Regs.empty()) { - LLVMContext &Ctx = *DAG.getContext(); - Ctx.emitError(CS.getInstruction(), - "couldn't allocate input reg for constraint '" + - Twine(OpInfo.ConstraintCode) + "'"); + emitInlineAsmError(CS, "couldn't allocate input reg for constraint '" + + Twine(OpInfo.ConstraintCode) + "'"); return; } @@ -6629,7 +7493,7 @@ dl, DAG, AsmNodeOperands); break; } - case InlineAsm::isClobber: { + case InlineAsm::isClobber: // Add the clobbered value to the operand list, so that the register // allocator is aware that the physreg got clobbered. if (!OpInfo.AssignedRegs.Regs.empty()) @@ -6638,7 +7502,6 @@ AsmNodeOperands); break; } - } } // Finish up input operands. Set the input chain and add the flag last. @@ -6685,7 +7548,7 @@ return; } - std::vector<std::pair<SDValue, const Value *> > StoresToEmit; + std::vector<std::pair<SDValue, const Value *>> StoresToEmit; // Process indirect outputs, first output all of the flagged copies out of // physregs. @@ -6700,11 +7563,9 @@ // Emit the non-flagged stores from the physregs. SmallVector<SDValue, 8> OutChains; for (unsigned i = 0, e = StoresToEmit.size(); i != e; ++i) { - SDValue Val = DAG.getStore(Chain, getCurSDLoc(), - StoresToEmit[i].first, + SDValue Val = DAG.getStore(Chain, getCurSDLoc(), StoresToEmit[i].first, getValue(StoresToEmit[i].second), - MachinePointerInfo(StoresToEmit[i].second), - false, false, 0); + MachinePointerInfo(StoresToEmit[i].second)); OutChains.push_back(Val); } @@ -6714,6 +7575,17 @@ DAG.setRoot(Chain); } +void SelectionDAGBuilder::emitInlineAsmError(ImmutableCallSite CS, + const Twine &Message) { + LLVMContext &Ctx = *DAG.getContext(); + Ctx.emitError(CS.getInstruction(), Message); + + // Make sure we leave the DAG in a valid state + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + auto VT = TLI.getValueType(DAG.getDataLayout(), CS.getType()); + setValue(CS.getInstruction(), DAG.getUNDEF(VT)); +} + void SelectionDAGBuilder::visitVAStart(const CallInst &I) { DAG.setRoot(DAG.getNode(ISD::VASTART, getCurSDLoc(), MVT::Other, getRoot(), @@ -6748,22 +7620,59 @@ DAG.getSrcValue(I.getArgOperand(1)))); } -/// \brief Lower an argument list according to the target calling convention. -/// -/// \return A tuple of <return-value, token-chain> +SDValue SelectionDAGBuilder::lowerRangeToAssertZExt(SelectionDAG &DAG, + const Instruction &I, + SDValue Op) { + const MDNode *Range = I.getMetadata(LLVMContext::MD_range); + if (!Range) + return Op; + + ConstantRange CR = getConstantRangeFromMetadata(*Range); + if (CR.isFullSet() || CR.isEmptySet() || CR.isWrappedSet()) + return Op; + + APInt Lo = CR.getUnsignedMin(); + if (!Lo.isMinValue()) + return Op; + + APInt Hi = CR.getUnsignedMax(); + unsigned Bits = Hi.getActiveBits(); + + EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), Bits); + + SDLoc SL = getCurSDLoc(); + + SDValue ZExt = DAG.getNode(ISD::AssertZext, SL, Op.getValueType(), Op, + DAG.getValueType(SmallVT)); + unsigned NumVals = Op.getNode()->getNumValues(); + if (NumVals == 1) + return ZExt; + + SmallVector<SDValue, 4> Ops; + + Ops.push_back(ZExt); + for (unsigned I = 1; I != NumVals; ++I) + Ops.push_back(Op.getValue(I)); + + return DAG.getMergeValues(Ops, SL); +} + +/// \brief Populate a CallLowerinInfo (into \p CLI) based on the properties of +/// the call being lowered. /// /// This is a helper for lowering intrinsics that follow a target calling /// convention or require stack pointer adjustment. Only a subset of the /// intrinsic's operands need to participate in the calling convention. -std::pair<SDValue, SDValue> SelectionDAGBuilder::lowerCallOperands( - ImmutableCallSite CS, unsigned ArgIdx, unsigned NumArgs, SDValue Callee, - Type *ReturnTy, const BasicBlock *EHPadBB, bool IsPatchPoint) { +void SelectionDAGBuilder::populateCallLoweringInfo( + TargetLowering::CallLoweringInfo &CLI, ImmutableCallSite CS, + unsigned ArgIdx, unsigned NumArgs, SDValue Callee, Type *ReturnTy, + bool IsPatchPoint) { TargetLowering::ArgListTy Args; Args.reserve(NumArgs); // Populate the argument list. // Attributes for args start at offset 1, after the return attribute. - for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs, AttrI = ArgIdx + 1; + for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs; ArgI != ArgE; ++ArgI) { const Value *V = CS->getOperand(ArgI); @@ -6772,16 +7681,15 @@ TargetLowering::ArgListEntry Entry; Entry.Node = getValue(V); Entry.Ty = V->getType(); - Entry.setAttributes(&CS, AttrI); + Entry.setAttributes(&CS, ArgIdx); Args.push_back(Entry); } - TargetLowering::CallLoweringInfo CLI(DAG); - CLI.setDebugLoc(getCurSDLoc()).setChain(getRoot()) - .setCallee(CS.getCallingConv(), ReturnTy, Callee, std::move(Args), NumArgs) - .setDiscardResult(CS->use_empty()).setIsPatchPoint(IsPatchPoint); - - return lowerInvokable(CLI, EHPadBB); + CLI.setDebugLoc(getCurSDLoc()) + .setChain(getRoot()) + .setCallee(CS.getCallingConv(), ReturnTy, Callee, std::move(Args)) + .setDiscardResult(CS->use_empty()) + .setIsPatchPoint(IsPatchPoint); } /// \brief Add a stack map intrinsic call's live variable operands to a stackmap @@ -6802,7 +7710,7 @@ /// only available in a register, then the runtime would need to trap when /// execution reaches the StackMap in order to read the alloca's location. static void addStackMapLiveVars(ImmutableCallSite CS, unsigned StartIdx, - SDLoc DL, SmallVectorImpl<SDValue> &Ops, + const SDLoc &DL, SmallVectorImpl<SDValue> &Ops, SelectionDAGBuilder &Builder) { for (unsigned i = StartIdx, e = CS.arg_size(); i != e; ++i) { SDValue OpVal = Builder.getValue(CS.getArgument(i)); @@ -6814,7 +7722,7 @@ } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(OpVal)) { const TargetLowering &TLI = Builder.DAG.getTargetLoweringInfo(); Ops.push_back(Builder.DAG.getTargetFrameIndex( - FI->getIndex(), TLI.getPointerTy(Builder.DAG.getDataLayout()))); + FI->getIndex(), TLI.getFrameIndexTy(Builder.DAG.getDataLayout()))); } else Ops.push_back(OpVal); } @@ -6840,11 +7748,11 @@ // have to worry about calling conventions and target specific lowering code. // Instead we perform the call lowering right here. // - // chain, flag = CALLSEQ_START(chain, 0) + // chain, flag = CALLSEQ_START(chain, 0, 0) // chain, flag = STACKMAP(id, nbytes, ..., chain, flag) // chain, flag = CALLSEQ_END(chain, 0, 0, flag) // - Chain = DAG.getCALLSEQ_START(getRoot(), NullPtr, DL); + Chain = DAG.getCALLSEQ_START(getRoot(), 0, 0, DL); InFlag = Chain.getValue(1); // Add the <id> and <numBytes> constants. @@ -6880,7 +7788,7 @@ DAG.setRoot(Chain); // Inform the Frame Information that we have a stackmap in this function. - FuncInfo.MF->getFrameInfo()->setHasStackMap(); + FuncInfo.MF->getFrameInfo().setHasStackMap(); } /// \brief Lower llvm.experimental.patchpoint directly to its target opcode. @@ -6922,8 +7830,11 @@ unsigned NumCallArgs = IsAnyRegCC ? 0 : NumArgs; Type *ReturnTy = IsAnyRegCC ? Type::getVoidTy(*DAG.getContext()) : CS->getType(); - std::pair<SDValue, SDValue> Result = lowerCallOperands( - CS, NumMetaOpers, NumCallArgs, Callee, ReturnTy, EHPadBB, true); + + TargetLowering::CallLoweringInfo CLI(DAG); + populateCallLoweringInfo(CLI, CS, NumMetaOpers, NumCallArgs, Callee, ReturnTy, + true); + std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB); SDNode *CallEnd = Result.second.getNode(); if (HasDef && (CallEnd->getOpcode() == ISD::CopyFromReg)) @@ -7028,12 +7939,80 @@ DAG.DeleteNode(Call); // Inform the Frame Information that we have a patchpoint in this function. - FuncInfo.MF->getFrameInfo()->setHasPatchPoint(); -} - -/// Returns an AttributeSet representing the attributes applied to the return + FuncInfo.MF->getFrameInfo().setHasPatchPoint(); +} + +void SelectionDAGBuilder::visitVectorReduce(const CallInst &I, + unsigned Intrinsic) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2; + if (I.getNumArgOperands() > 1) + Op2 = getValue(I.getArgOperand(1)); + SDLoc dl = getCurSDLoc(); + EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); + SDValue Res; + FastMathFlags FMF; + if (isa<FPMathOperator>(I)) + FMF = I.getFastMathFlags(); + SDNodeFlags SDFlags; + SDFlags.setNoNaNs(FMF.noNaNs()); + + switch (Intrinsic) { + case Intrinsic::experimental_vector_reduce_fadd: + if (FMF.unsafeAlgebra()) + Res = DAG.getNode(ISD::VECREDUCE_FADD, dl, VT, Op2); + else + Res = DAG.getNode(ISD::VECREDUCE_STRICT_FADD, dl, VT, Op1, Op2); + break; + case Intrinsic::experimental_vector_reduce_fmul: + if (FMF.unsafeAlgebra()) + Res = DAG.getNode(ISD::VECREDUCE_FMUL, dl, VT, Op2); + else + Res = DAG.getNode(ISD::VECREDUCE_STRICT_FMUL, dl, VT, Op1, Op2); + break; + case Intrinsic::experimental_vector_reduce_add: + Res = DAG.getNode(ISD::VECREDUCE_ADD, dl, VT, Op1); + break; + case Intrinsic::experimental_vector_reduce_mul: + Res = DAG.getNode(ISD::VECREDUCE_MUL, dl, VT, Op1); + break; + case Intrinsic::experimental_vector_reduce_and: + Res = DAG.getNode(ISD::VECREDUCE_AND, dl, VT, Op1); + break; + case Intrinsic::experimental_vector_reduce_or: + Res = DAG.getNode(ISD::VECREDUCE_OR, dl, VT, Op1); + break; + case Intrinsic::experimental_vector_reduce_xor: + Res = DAG.getNode(ISD::VECREDUCE_XOR, dl, VT, Op1); + break; + case Intrinsic::experimental_vector_reduce_smax: + Res = DAG.getNode(ISD::VECREDUCE_SMAX, dl, VT, Op1); + break; + case Intrinsic::experimental_vector_reduce_smin: + Res = DAG.getNode(ISD::VECREDUCE_SMIN, dl, VT, Op1); + break; + case Intrinsic::experimental_vector_reduce_umax: + Res = DAG.getNode(ISD::VECREDUCE_UMAX, dl, VT, Op1); + break; + case Intrinsic::experimental_vector_reduce_umin: + Res = DAG.getNode(ISD::VECREDUCE_UMIN, dl, VT, Op1); + break; + case Intrinsic::experimental_vector_reduce_fmax: + Res = DAG.getNode(ISD::VECREDUCE_FMAX, dl, VT, Op1, SDFlags); + break; + case Intrinsic::experimental_vector_reduce_fmin: + Res = DAG.getNode(ISD::VECREDUCE_FMIN, dl, VT, Op1, SDFlags); + break; + default: + llvm_unreachable("Unhandled vector reduce intrinsic"); + } + setValue(&I, Res); +} + +/// Returns an AttributeList representing the attributes applied to the return /// value of the given call. -static AttributeSet getReturnAttrs(TargetLowering::CallLoweringInfo &CLI) { +static AttributeList getReturnAttrs(TargetLowering::CallLoweringInfo &CLI) { SmallVector<Attribute::AttrKind, 2> Attrs; if (CLI.RetSExt) Attrs.push_back(Attribute::SExt); @@ -7042,8 +8021,8 @@ if (CLI.IsInReg) Attrs.push_back(Attribute::InReg); - return AttributeSet::get(CLI.RetTy->getContext(), AttributeSet::ReturnIndex, - Attrs); + return AttributeList::get(CLI.RetTy->getContext(), AttributeList::ReturnIndex, + Attrs); } /// TargetLowering::LowerCallTo - This is the default LowerCallTo @@ -7060,6 +8039,22 @@ auto &DL = CLI.DAG.getDataLayout(); ComputeValueVTs(*this, DL, CLI.RetTy, RetTys, &Offsets); + if (CLI.IsPostTypeLegalization) { + // If we are lowering a libcall after legalization, split the return type. + SmallVector<EVT, 4> OldRetTys = std::move(RetTys); + SmallVector<uint64_t, 4> OldOffsets = std::move(Offsets); + for (size_t i = 0, e = OldRetTys.size(); i != e; ++i) { + EVT RetVT = OldRetTys[i]; + uint64_t Offset = OldOffsets[i]; + MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), RetVT); + unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), RetVT); + unsigned RegisterVTSize = RegisterVT.getSizeInBits(); + RetTys.append(NumRegs, RegisterVT); + for (unsigned j = 0; j != NumRegs; ++j) + Offsets.push_back(Offset + j * RegisterVTSize); + } + } + SmallVector<ISD::OutputArg, 4> Outs; GetReturnInfo(CLI.RetTy, getReturnAttrs(CLI), Outs, *this, DL); @@ -7076,22 +8071,25 @@ uint64_t TySize = DL.getTypeAllocSize(CLI.RetTy); unsigned Align = DL.getPrefTypeAlignment(CLI.RetTy); MachineFunction &MF = CLI.DAG.getMachineFunction(); - DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); + DemoteStackIdx = MF.getFrameInfo().CreateStackObject(TySize, Align, false); Type *StackSlotPtrType = PointerType::getUnqual(CLI.RetTy); - DemoteStackSlot = CLI.DAG.getFrameIndex(DemoteStackIdx, getPointerTy(DL)); + DemoteStackSlot = CLI.DAG.getFrameIndex(DemoteStackIdx, getFrameIndexTy(DL)); ArgListEntry Entry; Entry.Node = DemoteStackSlot; Entry.Ty = StackSlotPtrType; - Entry.isSExt = false; - Entry.isZExt = false; - Entry.isInReg = false; - Entry.isSRet = true; - Entry.isNest = false; - Entry.isByVal = false; - Entry.isReturned = false; + Entry.IsSExt = false; + Entry.IsZExt = false; + Entry.IsInReg = false; + Entry.IsSRet = true; + Entry.IsNest = false; + Entry.IsByVal = false; + Entry.IsReturned = false; + Entry.IsSwiftSelf = false; + Entry.IsSwiftError = false; Entry.Alignment = Align; CLI.getArgs().insert(CLI.getArgs().begin(), Entry); + CLI.NumFixedArgs += 1; CLI.RetTy = Type::getVoidTy(CLI.RetTy->getContext()); // sret demotion isn't compatible with tail-calls, since the sret argument @@ -7100,8 +8098,10 @@ } else { for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { EVT VT = RetTys[I]; - MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT); - unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT); + MVT RegisterVT = + getRegisterTypeForCallingConv(CLI.RetTy->getContext(), VT); + unsigned NumRegs = + getNumRegistersForCallingConv(CLI.RetTy->getContext(), VT); for (unsigned i = 0; i != NumRegs; ++i) { ISD::InputArg MyFlags; MyFlags.VT = RegisterVT; @@ -7118,15 +8118,29 @@ } } + // We push in swifterror return as the last element of CLI.Ins. + ArgListTy &Args = CLI.getArgs(); + if (supportSwiftError()) { + for (unsigned i = 0, e = Args.size(); i != e; ++i) { + if (Args[i].IsSwiftError) { + ISD::InputArg MyFlags; + MyFlags.VT = getPointerTy(DL); + MyFlags.ArgVT = EVT(getPointerTy(DL)); + MyFlags.Flags.setSwiftError(); + CLI.Ins.push_back(MyFlags); + } + } + } + // Handle all of the outgoing arguments. CLI.Outs.clear(); CLI.OutVals.clear(); - ArgListTy &Args = CLI.getArgs(); for (unsigned i = 0, e = Args.size(); i != e; ++i) { SmallVector<EVT, 4> ValueVTs; ComputeValueVTs(*this, DL, Args[i].Ty, ValueVTs); + // FIXME: Split arguments if CLI.IsPostTypeLegalization Type *FinalType = Args[i].Ty; - if (Args[i].isByVal) + if (Args[i].IsByVal) FinalType = cast<PointerType>(Args[i].Ty)->getElementType(); bool NeedsRegBlock = functionArgumentNeedsConsecutiveRegisters( FinalType, CLI.CallConv, CLI.IsVarArg); @@ -7137,19 +8151,38 @@ SDValue Op = SDValue(Args[i].Node.getNode(), Args[i].Node.getResNo() + Value); ISD::ArgFlagsTy Flags; - unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy); - - if (Args[i].isZExt) + + // Certain targets (such as MIPS), may have a different ABI alignment + // for a type depending on the context. Give the target a chance to + // specify the alignment it wants. + unsigned OriginalAlignment = getABIAlignmentForCallingConv(ArgTy, DL); + + if (Args[i].IsZExt) Flags.setZExt(); - if (Args[i].isSExt) + if (Args[i].IsSExt) Flags.setSExt(); - if (Args[i].isInReg) + if (Args[i].IsInReg) { + // If we are using vectorcall calling convention, a structure that is + // passed InReg - is surely an HVA + if (CLI.CallConv == CallingConv::X86_VectorCall && + isa<StructType>(FinalType)) { + // The first value of a structure is marked + if (0 == Value) + Flags.setHvaStart(); + Flags.setHva(); + } + // Set InReg Flag Flags.setInReg(); - if (Args[i].isSRet) + } + if (Args[i].IsSRet) Flags.setSRet(); - if (Args[i].isByVal) + if (Args[i].IsSwiftSelf) + Flags.setSwiftSelf(); + if (Args[i].IsSwiftError) + Flags.setSwiftError(); + if (Args[i].IsByVal) Flags.setByVal(); - if (Args[i].isInAlloca) { + if (Args[i].IsInAlloca) { Flags.setInAlloca(); // Set the byval flag for CCAssignFn callbacks that don't know about // inalloca. This way we can know how many bytes we should've allocated @@ -7158,7 +8191,7 @@ // in the various CC lowering callbacks. Flags.setByVal(); } - if (Args[i].isByVal || Args[i].isInAlloca) { + if (Args[i].IsByVal || Args[i].IsInAlloca) { PointerType *Ty = cast<PointerType>(Args[i].Ty); Type *ElementTy = Ty->getElementType(); Flags.setByValSize(DL.getTypeAllocSize(ElementTy)); @@ -7171,24 +8204,25 @@ FrameAlign = getByValTypeAlignment(ElementTy, DL); Flags.setByValAlign(FrameAlign); } - if (Args[i].isNest) + if (Args[i].IsNest) Flags.setNest(); if (NeedsRegBlock) Flags.setInConsecutiveRegs(); Flags.setOrigAlign(OriginalAlignment); - MVT PartVT = getRegisterType(CLI.RetTy->getContext(), VT); - unsigned NumParts = getNumRegisters(CLI.RetTy->getContext(), VT); + MVT PartVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(), VT); + unsigned NumParts = + getNumRegistersForCallingConv(CLI.RetTy->getContext(), VT); SmallVector<SDValue, 4> Parts(NumParts); ISD::NodeType ExtendKind = ISD::ANY_EXTEND; - if (Args[i].isSExt) + if (Args[i].IsSExt) ExtendKind = ISD::SIGN_EXTEND; - else if (Args[i].isZExt) + else if (Args[i].IsZExt) ExtendKind = ISD::ZERO_EXTEND; // Conservatively only handle 'returned' on non-vectors for now - if (Args[i].isReturned && !Op.getValueType().isVector()) { + if (Args[i].IsReturned && !Op.getValueType().isVector()) { assert(CLI.RetTy == Args[i].Ty && RetTys.size() == NumValues && "unexpected use of 'returned'"); // Before passing 'returned' to the target lowering code, ensure that @@ -7202,13 +8236,13 @@ // parameter extension method is not compatible with the return // extension method if ((NumParts * PartVT.getSizeInBits() == VT.getSizeInBits()) || - (ExtendKind != ISD::ANY_EXTEND && - CLI.RetSExt == Args[i].isSExt && CLI.RetZExt == Args[i].isZExt)) - Flags.setReturned(); + (ExtendKind != ISD::ANY_EXTEND && CLI.RetSExt == Args[i].IsSExt && + CLI.RetZExt == Args[i].IsZExt)) + Flags.setReturned(); } getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, PartVT, - CLI.CS ? CLI.CS->getInstruction() : nullptr, ExtendKind); + CLI.CS.getInstruction(), ExtendKind, true); for (unsigned j = 0; j != NumParts; ++j) { // if it isn't first piece, alignment must be 1 @@ -7235,6 +8269,9 @@ SmallVector<SDValue, 4> InVals; CLI.Chain = LowerCall(CLI, InVals); + // Update CLI.InVals to use outside of this function. + CLI.InVals = InVals; + // Verify that the target's LowerCall behaved as expected. assert(CLI.Chain.getNode() && CLI.Chain.getValueType() == MVT::Other && "LowerCall didn't return a valid chain!"); @@ -7252,12 +8289,13 @@ return std::make_pair(SDValue(), SDValue()); } - DEBUG(for (unsigned i = 0, e = CLI.Ins.size(); i != e; ++i) { - assert(InVals[i].getNode() && - "LowerCall emitted a null value!"); - assert(EVT(CLI.Ins[i].VT) == InVals[i].getValueType() && - "LowerCall emitted a value with the wrong type!"); - }); +#ifndef NDEBUG + for (unsigned i = 0, e = CLI.Ins.size(); i != e; ++i) { + assert(InVals[i].getNode() && "LowerCall emitted a null value!"); + assert(EVT(CLI.Ins[i].VT) == InVals[i].getValueType() && + "LowerCall emitted a value with the wrong type!"); + } +#endif SmallVector<SDValue, 4> ReturnValues; if (!CanLowerReturn) { @@ -7282,12 +8320,12 @@ for (unsigned i = 0; i < NumValues; ++i) { SDValue Add = CLI.DAG.getNode(ISD::ADD, CLI.DL, PtrVT, DemoteStackSlot, CLI.DAG.getConstant(Offsets[i], CLI.DL, - PtrVT), &Flags); + PtrVT), Flags); SDValue L = CLI.DAG.getLoad( RetTys[i], CLI.DL, CLI.Chain, Add, MachinePointerInfo::getFixedStack(CLI.DAG.getMachineFunction(), DemoteStackIdx, Offsets[i]), - false, false, false, 1); + /* Alignment = */ 1); ReturnValues[i] = L; Chains[i] = L.getValue(1); } @@ -7296,7 +8334,7 @@ } else { // Collect the legal value parts into potentially illegal values // that correspond to the original function's return values. - ISD::NodeType AssertOp = ISD::DELETED_NODE; + Optional<ISD::NodeType> AssertOp; if (CLI.RetSExt) AssertOp = ISD::AssertSext; else if (CLI.RetZExt) @@ -7304,12 +8342,14 @@ unsigned CurReg = 0; for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { EVT VT = RetTys[I]; - MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT); - unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT); + MVT RegisterVT = + getRegisterTypeForCallingConv(CLI.RetTy->getContext(), VT); + unsigned NumRegs = + getNumRegistersForCallingConv(CLI.RetTy->getContext(), VT); ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg], NumRegs, RegisterVT, VT, nullptr, - AssertOp)); + AssertOp, true)); CurReg += NumRegs; } @@ -7328,8 +8368,7 @@ void TargetLowering::LowerOperationWrapper(SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const { - SDValue Res = LowerOperation(SDValue(N, 0), DAG); - if (Res.getNode()) + if (SDValue Res = LowerOperation(SDValue(N, 0), DAG)) Results.push_back(Res); } @@ -7346,8 +8385,11 @@ assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + // If this is an InlineAsm we have to match the registers required, not the + // notional registers required by the type. + RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg, - V->getType()); + V->getType(), isABIRegCopy(V)); SDValue Chain = DAG.getEntryNode(); ISD::NodeType ExtendType = (FuncInfo.PreferredExtendType.find(V) == @@ -7377,6 +8419,173 @@ return true; } +using ArgCopyElisionMapTy = + DenseMap<const Argument *, + std::pair<const AllocaInst *, const StoreInst *>>; + +/// Scan the entry block of the function in FuncInfo for arguments that look +/// like copies into a local alloca. Record any copied arguments in +/// ArgCopyElisionCandidates. +static void +findArgumentCopyElisionCandidates(const DataLayout &DL, + FunctionLoweringInfo *FuncInfo, + ArgCopyElisionMapTy &ArgCopyElisionCandidates) { + // Record the state of every static alloca used in the entry block. Argument + // allocas are all used in the entry block, so we need approximately as many + // entries as we have arguments. + enum StaticAllocaInfo { Unknown, Clobbered, Elidable }; + SmallDenseMap<const AllocaInst *, StaticAllocaInfo, 8> StaticAllocas; + unsigned NumArgs = FuncInfo->Fn->arg_size(); + StaticAllocas.reserve(NumArgs * 2); + + auto GetInfoIfStaticAlloca = [&](const Value *V) -> StaticAllocaInfo * { + if (!V) + return nullptr; + V = V->stripPointerCasts(); + const auto *AI = dyn_cast<AllocaInst>(V); + if (!AI || !AI->isStaticAlloca() || !FuncInfo->StaticAllocaMap.count(AI)) + return nullptr; + auto Iter = StaticAllocas.insert({AI, Unknown}); + return &Iter.first->second; + }; + + // Look for stores of arguments to static allocas. Look through bitcasts and + // GEPs to handle type coercions, as long as the alloca is fully initialized + // by the store. Any non-store use of an alloca escapes it and any subsequent + // unanalyzed store might write it. + // FIXME: Handle structs initialized with multiple stores. + for (const Instruction &I : FuncInfo->Fn->getEntryBlock()) { + // Look for stores, and handle non-store uses conservatively. + const auto *SI = dyn_cast<StoreInst>(&I); + if (!SI) { + // We will look through cast uses, so ignore them completely. + if (I.isCast()) + continue; + // Ignore debug info intrinsics, they don't escape or store to allocas. + if (isa<DbgInfoIntrinsic>(I)) + continue; + // This is an unknown instruction. Assume it escapes or writes to all + // static alloca operands. + for (const Use &U : I.operands()) { + if (StaticAllocaInfo *Info = GetInfoIfStaticAlloca(U)) + *Info = StaticAllocaInfo::Clobbered; + } + continue; + } + + // If the stored value is a static alloca, mark it as escaped. + if (StaticAllocaInfo *Info = GetInfoIfStaticAlloca(SI->getValueOperand())) + *Info = StaticAllocaInfo::Clobbered; + + // Check if the destination is a static alloca. + const Value *Dst = SI->getPointerOperand()->stripPointerCasts(); + StaticAllocaInfo *Info = GetInfoIfStaticAlloca(Dst); + if (!Info) + continue; + const AllocaInst *AI = cast<AllocaInst>(Dst); + + // Skip allocas that have been initialized or clobbered. + if (*Info != StaticAllocaInfo::Unknown) + continue; + + // Check if the stored value is an argument, and that this store fully + // initializes the alloca. Don't elide copies from the same argument twice. + const Value *Val = SI->getValueOperand()->stripPointerCasts(); + const auto *Arg = dyn_cast<Argument>(Val); + if (!Arg || Arg->hasInAllocaAttr() || Arg->hasByValAttr() || + Arg->getType()->isEmptyTy() || + DL.getTypeStoreSize(Arg->getType()) != + DL.getTypeAllocSize(AI->getAllocatedType()) || + ArgCopyElisionCandidates.count(Arg)) { + *Info = StaticAllocaInfo::Clobbered; + continue; + } + + DEBUG(dbgs() << "Found argument copy elision candidate: " << *AI << '\n'); + + // Mark this alloca and store for argument copy elision. + *Info = StaticAllocaInfo::Elidable; + ArgCopyElisionCandidates.insert({Arg, {AI, SI}}); + + // Stop scanning if we've seen all arguments. This will happen early in -O0 + // builds, which is useful, because -O0 builds have large entry blocks and + // many allocas. + if (ArgCopyElisionCandidates.size() == NumArgs) + break; + } +} + +/// Try to elide argument copies from memory into a local alloca. Succeeds if +/// ArgVal is a load from a suitable fixed stack object. +static void tryToElideArgumentCopy( + FunctionLoweringInfo *FuncInfo, SmallVectorImpl<SDValue> &Chains, + DenseMap<int, int> &ArgCopyElisionFrameIndexMap, + SmallPtrSetImpl<const Instruction *> &ElidedArgCopyInstrs, + ArgCopyElisionMapTy &ArgCopyElisionCandidates, const Argument &Arg, + SDValue ArgVal, bool &ArgHasUses) { + // Check if this is a load from a fixed stack object. + auto *LNode = dyn_cast<LoadSDNode>(ArgVal); + if (!LNode) + return; + auto *FINode = dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode()); + if (!FINode) + return; + + // Check that the fixed stack object is the right size and alignment. + // Look at the alignment that the user wrote on the alloca instead of looking + // at the stack object. + auto ArgCopyIter = ArgCopyElisionCandidates.find(&Arg); + assert(ArgCopyIter != ArgCopyElisionCandidates.end()); + const AllocaInst *AI = ArgCopyIter->second.first; + int FixedIndex = FINode->getIndex(); + int &AllocaIndex = FuncInfo->StaticAllocaMap[AI]; + int OldIndex = AllocaIndex; + MachineFrameInfo &MFI = FuncInfo->MF->getFrameInfo(); + if (MFI.getObjectSize(FixedIndex) != MFI.getObjectSize(OldIndex)) { + DEBUG(dbgs() << " argument copy elision failed due to bad fixed stack " + "object size\n"); + return; + } + unsigned RequiredAlignment = AI->getAlignment(); + if (!RequiredAlignment) { + RequiredAlignment = FuncInfo->MF->getDataLayout().getABITypeAlignment( + AI->getAllocatedType()); + } + if (MFI.getObjectAlignment(FixedIndex) < RequiredAlignment) { + DEBUG(dbgs() << " argument copy elision failed: alignment of alloca " + "greater than stack argument alignment (" + << RequiredAlignment << " vs " + << MFI.getObjectAlignment(FixedIndex) << ")\n"); + return; + } + + // Perform the elision. Delete the old stack object and replace its only use + // in the variable info map. Mark the stack object as mutable. + DEBUG({ + dbgs() << "Eliding argument copy from " << Arg << " to " << *AI << '\n' + << " Replacing frame index " << OldIndex << " with " << FixedIndex + << '\n'; + }); + MFI.RemoveStackObject(OldIndex); + MFI.setIsImmutableObjectIndex(FixedIndex, false); + AllocaIndex = FixedIndex; + ArgCopyElisionFrameIndexMap.insert({OldIndex, FixedIndex}); + Chains.push_back(ArgVal.getValue(1)); + + // Avoid emitting code for the store implementing the copy. + const StoreInst *SI = ArgCopyIter->second.second; + ElidedArgCopyInstrs.insert(SI); + + // Check for uses of the argument again so that we can avoid exporting ArgVal + // if it is't used by anything other than the store. + for (const Value *U : Arg.users()) { + if (U != SI) { + ArgHasUses = true; + break; + } + } +} + void SelectionDAGISel::LowerArguments(const Function &F) { SelectionDAG &DAG = SDB->DAG; SDLoc dl = SDB->getCurSDLoc(); @@ -7399,16 +8608,21 @@ Ins.push_back(RetArg); } + // Look for stores of arguments to static allocas. Mark such arguments with a + // flag to ask the target to give us the memory location of that argument if + // available. + ArgCopyElisionMapTy ArgCopyElisionCandidates; + findArgumentCopyElisionCandidates(DL, FuncInfo, ArgCopyElisionCandidates); + // Set up the incoming argument description vector. - unsigned Idx = 1; - for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); - I != E; ++I, ++Idx) { + for (const Argument &Arg : F.args()) { + unsigned ArgNo = Arg.getArgNo(); SmallVector<EVT, 4> ValueVTs; - ComputeValueVTs(*TLI, DAG.getDataLayout(), I->getType(), ValueVTs); - bool isArgValueUsed = !I->use_empty(); + ComputeValueVTs(*TLI, DAG.getDataLayout(), Arg.getType(), ValueVTs); + bool isArgValueUsed = !Arg.use_empty(); unsigned PartBase = 0; - Type *FinalType = I->getType(); - if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal)) + Type *FinalType = Arg.getType(); + if (Arg.hasAttribute(Attribute::ByVal)) FinalType = cast<PointerType>(FinalType)->getElementType(); bool NeedsRegBlock = TLI->functionArgumentNeedsConsecutiveRegisters( FinalType, F.getCallingConv(), F.isVarArg()); @@ -7417,19 +8631,39 @@ EVT VT = ValueVTs[Value]; Type *ArgTy = VT.getTypeForEVT(*DAG.getContext()); ISD::ArgFlagsTy Flags; - unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy); - - if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt)) + + // Certain targets (such as MIPS), may have a different ABI alignment + // for a type depending on the context. Give the target a chance to + // specify the alignment it wants. + unsigned OriginalAlignment = + TLI->getABIAlignmentForCallingConv(ArgTy, DL); + + if (Arg.hasAttribute(Attribute::ZExt)) Flags.setZExt(); - if (F.getAttributes().hasAttribute(Idx, Attribute::SExt)) + if (Arg.hasAttribute(Attribute::SExt)) Flags.setSExt(); - if (F.getAttributes().hasAttribute(Idx, Attribute::InReg)) + if (Arg.hasAttribute(Attribute::InReg)) { + // If we are using vectorcall calling convention, a structure that is + // passed InReg - is surely an HVA + if (F.getCallingConv() == CallingConv::X86_VectorCall && + isa<StructType>(Arg.getType())) { + // The first value of a structure is marked + if (0 == Value) + Flags.setHvaStart(); + Flags.setHva(); + } + // Set InReg Flag Flags.setInReg(); - if (F.getAttributes().hasAttribute(Idx, Attribute::StructRet)) + } + if (Arg.hasAttribute(Attribute::StructRet)) Flags.setSRet(); - if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal)) + if (Arg.hasAttribute(Attribute::SwiftSelf)) + Flags.setSwiftSelf(); + if (Arg.hasAttribute(Attribute::SwiftError)) + Flags.setSwiftError(); + if (Arg.hasAttribute(Attribute::ByVal)) Flags.setByVal(); - if (F.getAttributes().hasAttribute(Idx, Attribute::InAlloca)) { + if (Arg.hasAttribute(Attribute::InAlloca)) { Flags.setInAlloca(); // Set the byval flag for CCAssignFn callbacks that don't know about // inalloca. This way we can know how many bytes we should've allocated @@ -7440,33 +8674,37 @@ } if (F.getCallingConv() == CallingConv::X86_INTR) { // IA Interrupt passes frame (1st parameter) by value in the stack. - if (Idx == 1) + if (ArgNo == 0) Flags.setByVal(); } if (Flags.isByVal() || Flags.isInAlloca()) { - PointerType *Ty = cast<PointerType>(I->getType()); + PointerType *Ty = cast<PointerType>(Arg.getType()); Type *ElementTy = Ty->getElementType(); Flags.setByValSize(DL.getTypeAllocSize(ElementTy)); // For ByVal, alignment should be passed from FE. BE will guess if // this info is not there but there are cases it cannot get right. unsigned FrameAlign; - if (F.getParamAlignment(Idx)) - FrameAlign = F.getParamAlignment(Idx); + if (Arg.getParamAlignment()) + FrameAlign = Arg.getParamAlignment(); else FrameAlign = TLI->getByValTypeAlignment(ElementTy, DL); Flags.setByValAlign(FrameAlign); } - if (F.getAttributes().hasAttribute(Idx, Attribute::Nest)) + if (Arg.hasAttribute(Attribute::Nest)) Flags.setNest(); if (NeedsRegBlock) Flags.setInConsecutiveRegs(); Flags.setOrigAlign(OriginalAlignment); - - MVT RegisterVT = TLI->getRegisterType(*CurDAG->getContext(), VT); - unsigned NumRegs = TLI->getNumRegisters(*CurDAG->getContext(), VT); + if (ArgCopyElisionCandidates.count(&Arg)) + Flags.setCopyElisionCandidate(); + + MVT RegisterVT = + TLI->getRegisterTypeForCallingConv(*CurDAG->getContext(), VT); + unsigned NumRegs = + TLI->getNumRegistersForCallingConv(*CurDAG->getContext(), VT); for (unsigned i = 0; i != NumRegs; ++i) { ISD::InputArg MyFlags(Flags, RegisterVT, VT, isArgValueUsed, - Idx-1, PartBase+i*RegisterVT.getStoreSize()); + ArgNo, PartBase+i*RegisterVT.getStoreSize()); if (NumRegs > 1 && i == 0) MyFlags.Flags.setSplit(); // if it isn't first piece, alignment must be 1 @@ -7507,7 +8745,6 @@ // Set up the argument values. unsigned i = 0; - Idx = 1; if (!FuncInfo->CanLowerReturn) { // Create a virtual register for the sret pointer, and put in a copy // from the sret argument into it. @@ -7516,7 +8753,7 @@ PointerType::getUnqual(F.getReturnType()), ValueVTs); MVT VT = ValueVTs[0].getSimpleVT(); MVT RegVT = TLI->getRegisterType(*CurDAG->getContext(), VT); - ISD::NodeType AssertOp = ISD::DELETED_NODE; + Optional<ISD::NodeType> AssertOp = None; SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1, RegVT, VT, nullptr, AssertOp); @@ -7529,43 +8766,63 @@ DAG.setRoot(NewRoot); // i indexes lowered arguments. Bump it past the hidden sret argument. - // Idx indexes LLVM arguments. Don't touch it. ++i; } - for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; - ++I, ++Idx) { + SmallVector<SDValue, 4> Chains; + DenseMap<int, int> ArgCopyElisionFrameIndexMap; + for (const Argument &Arg : F.args()) { SmallVector<SDValue, 4> ArgValues; SmallVector<EVT, 4> ValueVTs; - ComputeValueVTs(*TLI, DAG.getDataLayout(), I->getType(), ValueVTs); + ComputeValueVTs(*TLI, DAG.getDataLayout(), Arg.getType(), ValueVTs); unsigned NumValues = ValueVTs.size(); + if (NumValues == 0) + continue; + + bool ArgHasUses = !Arg.use_empty(); + + // Elide the copying store if the target loaded this argument from a + // suitable fixed stack object. + if (Ins[i].Flags.isCopyElisionCandidate()) { + tryToElideArgumentCopy(FuncInfo, Chains, ArgCopyElisionFrameIndexMap, + ElidedArgCopyInstrs, ArgCopyElisionCandidates, Arg, + InVals[i], ArgHasUses); + } // If this argument is unused then remember its value. It is used to generate // debugging information. - if (I->use_empty() && NumValues) { - SDB->setUnusedArgValue(&*I, InVals[i]); + bool isSwiftErrorArg = + TLI->supportSwiftError() && + Arg.hasAttribute(Attribute::SwiftError); + if (!ArgHasUses && !isSwiftErrorArg) { + SDB->setUnusedArgValue(&Arg, InVals[i]); // Also remember any frame index for use in FastISel. if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(InVals[i].getNode())) - FuncInfo->setArgumentFrameIndex(&*I, FI->getIndex()); + FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex()); } for (unsigned Val = 0; Val != NumValues; ++Val) { EVT VT = ValueVTs[Val]; - MVT PartVT = TLI->getRegisterType(*CurDAG->getContext(), VT); - unsigned NumParts = TLI->getNumRegisters(*CurDAG->getContext(), VT); - - if (!I->use_empty()) { - ISD::NodeType AssertOp = ISD::DELETED_NODE; - if (F.getAttributes().hasAttribute(Idx, Attribute::SExt)) + MVT PartVT = + TLI->getRegisterTypeForCallingConv(*CurDAG->getContext(), VT); + unsigned NumParts = + TLI->getNumRegistersForCallingConv(*CurDAG->getContext(), VT); + + // Even an apparant 'unused' swifterror argument needs to be returned. So + // we do generate a copy for it that can be used on return from the + // function. + if (ArgHasUses || isSwiftErrorArg) { + Optional<ISD::NodeType> AssertOp; + if (Arg.hasAttribute(Attribute::SExt)) AssertOp = ISD::AssertSext; - else if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt)) + else if (Arg.hasAttribute(Attribute::ZExt)) AssertOp = ISD::AssertZext; - ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], - NumParts, PartVT, VT, - nullptr, AssertOp)); + ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], NumParts, + PartVT, VT, nullptr, AssertOp, + true)); } i += NumParts; @@ -7578,18 +8835,34 @@ // Note down frame index. if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode())) - FuncInfo->setArgumentFrameIndex(&*I, FI->getIndex()); + FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex()); SDValue Res = DAG.getMergeValues(makeArrayRef(ArgValues.data(), NumValues), SDB->getCurSDLoc()); - SDB->setValue(&*I, Res); + SDB->setValue(&Arg, Res); if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) { + // We want to associate the argument with the frame index, among + // involved operands, that correspond to the lowest address. The + // getCopyFromParts function, called earlier, is swapping the order of + // the operands to BUILD_PAIR depending on endianness. The result of + // that swapping is that the least significant bits of the argument will + // be in the first operand of the BUILD_PAIR node, and the most + // significant bits will be in the second operand. + unsigned LowAddressOp = DAG.getDataLayout().isBigEndian() ? 1 : 0; if (LoadSDNode *LNode = - dyn_cast<LoadSDNode>(Res.getOperand(0).getNode())) + dyn_cast<LoadSDNode>(Res.getOperand(LowAddressOp).getNode())) if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode())) - FuncInfo->setArgumentFrameIndex(&*I, FI->getIndex()); + FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex()); + } + + // Update the SwiftErrorVRegDefMap. + if (Res.getOpcode() == ISD::CopyFromReg && isSwiftErrorArg) { + unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) + FuncInfo->setCurrentSwiftErrorVReg(FuncInfo->MBB, + FuncInfo->SwiftErrorArg, Reg); } // If this argument is live outside of the entry block, insert a copy from @@ -7601,18 +8874,36 @@ // uses with vregs. unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg(); if (TargetRegisterInfo::isVirtualRegister(Reg)) { - FuncInfo->ValueMap[&*I] = Reg; + FuncInfo->ValueMap[&Arg] = Reg; continue; } } - if (!isOnlyUsedInEntryBlock(&*I, TM.Options.EnableFastISel)) { - FuncInfo->InitializeRegForValue(&*I); - SDB->CopyToExportRegsIfNeeded(&*I); - } - } + if (!isOnlyUsedInEntryBlock(&Arg, TM.Options.EnableFastISel)) { + FuncInfo->InitializeRegForValue(&Arg); + SDB->CopyToExportRegsIfNeeded(&Arg); + } + } + + if (!Chains.empty()) { + Chains.push_back(NewRoot); + NewRoot = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains); + } + + DAG.setRoot(NewRoot); assert(i == InVals.size() && "Argument register count mismatch!"); + // If any argument copy elisions occurred and we have debug info, update the + // stale frame indices used in the dbg.declare variable info table. + MachineFunction::VariableDbgInfoMapTy &DbgDeclareInfo = MF->getVariableDbgInfo(); + if (!DbgDeclareInfo.empty() && !ArgCopyElisionFrameIndexMap.empty()) { + for (MachineFunction::VariableDbgInfo &VI : DbgDeclareInfo) { + auto I = ArgCopyElisionFrameIndexMap.find(VI.Slot); + if (I != ArgCopyElisionFrameIndexMap.end()) + VI.Slot = I->second; + } + } + // Finally, if the target has anything special to do, allow it to do so. EmitFunctionEntryCode(); } @@ -7623,7 +8914,6 @@ /// directly add them, because expansion might result in multiple MBB's for one /// BB. As such, the start of the BB might correspond to a different MBB than /// the end. -/// void SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { const TerminatorInst *TI = LLVMBB->getTerminator(); @@ -7689,7 +8979,8 @@ EVT VT = ValueVTs[vti]; unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT); for (unsigned i = 0, e = NumRegisters; i != e; ++i) - FuncInfo.PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg+i)); + FuncInfo.PHINodesToUpdate.push_back( + std::make_pair(&*MBBI++, Reg + i)); Reg += NumRegisters; } } @@ -7739,38 +9030,32 @@ HasTailCall = true; } -bool SelectionDAGBuilder::isDense(const CaseClusterVector &Clusters, - unsigned *TotalCases, unsigned First, - unsigned Last) { +uint64_t +SelectionDAGBuilder::getJumpTableRange(const CaseClusterVector &Clusters, + unsigned First, unsigned Last) const { assert(Last >= First); - assert(TotalCases[Last] >= TotalCases[First]); - - APInt LowCase = Clusters[First].Low->getValue(); - APInt HighCase = Clusters[Last].High->getValue(); + const APInt &LowCase = Clusters[First].Low->getValue(); + const APInt &HighCase = Clusters[Last].High->getValue(); assert(LowCase.getBitWidth() == HighCase.getBitWidth()); // FIXME: A range of consecutive cases has 100% density, but only requires one // comparison to lower. We should discriminate against such consecutive ranges // in jump tables. - uint64_t Diff = (HighCase - LowCase).getLimitedValue((UINT64_MAX - 1) / 100); - uint64_t Range = Diff + 1; - + return (HighCase - LowCase).getLimitedValue((UINT64_MAX - 1) / 100) + 1; +} + +uint64_t SelectionDAGBuilder::getJumpTableNumCases( + const SmallVectorImpl<unsigned> &TotalCases, unsigned First, + unsigned Last) const { + assert(Last >= First); + assert(TotalCases[Last] >= TotalCases[First]); uint64_t NumCases = TotalCases[Last] - (First == 0 ? 0 : TotalCases[First - 1]); - - assert(NumCases < UINT64_MAX / 100); - assert(Range >= NumCases); - - return NumCases * 100 >= Range * MinJumpTableDensity; -} - -static inline bool areJTsAllowed(const TargetLowering &TLI) { - return TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || - TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other); -} - -bool SelectionDAGBuilder::buildJumpTable(CaseClusterVector &Clusters, + return NumCases; +} + +bool SelectionDAGBuilder::buildJumpTable(const CaseClusterVector &Clusters, unsigned First, unsigned Last, const SwitchInst *SI, MachineBasicBlock *DefaultMBB, @@ -7789,12 +9074,12 @@ for (unsigned I = First; I <= Last; ++I) { assert(Clusters[I].Kind == CC_Range); Prob += Clusters[I].Prob; - APInt Low = Clusters[I].Low->getValue(); - APInt High = Clusters[I].High->getValue(); + const APInt &Low = Clusters[I].Low->getValue(); + const APInt &High = Clusters[I].High->getValue(); NumCmps += (Low == High) ? 1 : 2; if (I != First) { // Fill the gap between this and the previous cluster. - APInt PreviousHigh = Clusters[I - 1].High->getValue(); + const APInt &PreviousHigh = Clusters[I - 1].High->getValue(); assert(PreviousHigh.slt(Low)); uint64_t Gap = (Low - PreviousHigh).getLimitedValue() - 1; for (uint64_t J = 0; J < Gap; J++) @@ -7806,10 +9091,11 @@ JTProbs[Clusters[I].MBB] += Clusters[I].Prob; } + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); unsigned NumDests = JTProbs.size(); - if (isSuitableForBitTests(NumDests, NumCmps, - Clusters[First].Low->getValue(), - Clusters[Last].High->getValue())) { + if (TLI.isSuitableForBitTests( + NumDests, NumCmps, Clusters[First].Low->getValue(), + Clusters[Last].High->getValue(), DAG.getDataLayout())) { // Clusters[First..Last] should be lowered as bit tests instead. return false; } @@ -7830,7 +9116,6 @@ } JumpTableMBB->normalizeSuccProbs(); - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); unsigned JTI = CurMF->getOrCreateJumpTableInfo(TLI.getJumpTableEncoding()) ->createJumpTableIndex(Table); @@ -7859,25 +9144,32 @@ #endif const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (!areJTsAllowed(TLI)) + if (!TLI.areJTsAllowed(SI->getParent()->getParent())) return; const int64_t N = Clusters.size(); - const unsigned MinJumpTableSize = TLI.getMinimumJumpTableEntries(); + const unsigned MinJumpTableEntries = TLI.getMinimumJumpTableEntries(); + const unsigned SmallNumberOfEntries = MinJumpTableEntries / 2; + + if (N < 2 || N < MinJumpTableEntries) + return; // TotalCases[i]: Total nbr of cases in Clusters[0..i]. SmallVector<unsigned, 8> TotalCases(N); - for (unsigned i = 0; i < N; ++i) { - APInt Hi = Clusters[i].High->getValue(); - APInt Lo = Clusters[i].Low->getValue(); + const APInt &Hi = Clusters[i].High->getValue(); + const APInt &Lo = Clusters[i].Low->getValue(); TotalCases[i] = (Hi - Lo).getLimitedValue() + 1; if (i != 0) TotalCases[i] += TotalCases[i - 1]; } - if (N >= MinJumpTableSize && isDense(Clusters, &TotalCases[0], 0, N - 1)) { - // Cheap case: the whole range might be suitable for jump table. + // Cheap case: the whole range may be suitable for jump table. + uint64_t Range = getJumpTableRange(Clusters,0, N - 1); + uint64_t NumCases = getJumpTableNumCases(TotalCases, 0, N - 1); + assert(NumCases < UINT64_MAX / 100); + assert(Range >= NumCases); + if (TLI.isSuitableForJumpTable(SI, NumCases, Range)) { CaseCluster JTCluster; if (buildJumpTable(Clusters, 0, N - 1, SI, DefaultMBB, JTCluster)) { Clusters[0] = JTCluster; @@ -7901,14 +9193,23 @@ SmallVector<unsigned, 8> MinPartitions(N); // LastElement[i] is the last element of the partition starting at i. SmallVector<unsigned, 8> LastElement(N); - // NumTables[i]: nbr of >= MinJumpTableSize partitions from Clusters[i..N-1]. - SmallVector<unsigned, 8> NumTables(N); + // PartitionsScore[i] is used to break ties when choosing between two + // partitionings resulting in the same number of partitions. + SmallVector<unsigned, 8> PartitionsScore(N); + // For PartitionsScore, a small number of comparisons is considered as good as + // a jump table and a single comparison is considered better than a jump + // table. + enum PartitionScores : unsigned { + NoTable = 0, + Table = 1, + FewCases = 1, + SingleCase = 2 + }; // Base case: There is only one way to partition Clusters[N-1]. MinPartitions[N - 1] = 1; LastElement[N - 1] = N - 1; - assert(MinJumpTableSize > 1); - NumTables[N - 1] = 0; + PartitionsScore[N - 1] = PartitionScores::SingleCase; // Note: loop indexes are signed to avoid underflow. for (int64_t i = N - 2; i >= 0; i--) { @@ -7916,23 +9217,34 @@ // Baseline: Put Clusters[i] into a partition on its own. MinPartitions[i] = MinPartitions[i + 1] + 1; LastElement[i] = i; - NumTables[i] = NumTables[i + 1]; + PartitionsScore[i] = PartitionsScore[i + 1] + PartitionScores::SingleCase; // Search for a solution that results in fewer partitions. for (int64_t j = N - 1; j > i; j--) { // Try building a partition from Clusters[i..j]. - if (isDense(Clusters, &TotalCases[0], i, j)) { + uint64_t Range = getJumpTableRange(Clusters, i, j); + uint64_t NumCases = getJumpTableNumCases(TotalCases, i, j); + assert(NumCases < UINT64_MAX / 100); + assert(Range >= NumCases); + if (TLI.isSuitableForJumpTable(SI, NumCases, Range)) { unsigned NumPartitions = 1 + (j == N - 1 ? 0 : MinPartitions[j + 1]); - bool IsTable = j - i + 1 >= MinJumpTableSize; - unsigned Tables = IsTable + (j == N - 1 ? 0 : NumTables[j + 1]); - - // If this j leads to fewer partitions, or same number of partitions - // with more lookup tables, it is a better partitioning. + unsigned Score = j == N - 1 ? 0 : PartitionsScore[j + 1]; + int64_t NumEntries = j - i + 1; + + if (NumEntries == 1) + Score += PartitionScores::SingleCase; + else if (NumEntries <= SmallNumberOfEntries) + Score += PartitionScores::FewCases; + else if (NumEntries >= MinJumpTableEntries) + Score += PartitionScores::Table; + + // If this leads to fewer partitions, or to the same number of + // partitions with better score, it is a better partitioning. if (NumPartitions < MinPartitions[i] || - (NumPartitions == MinPartitions[i] && Tables > NumTables[i])) { + (NumPartitions == MinPartitions[i] && Score > PartitionsScore[i])) { MinPartitions[i] = NumPartitions; LastElement[i] = j; - NumTables[i] = Tables; + PartitionsScore[i] = Score; } } } @@ -7947,7 +9259,7 @@ unsigned NumClusters = Last - First + 1; CaseCluster JTCluster; - if (NumClusters >= MinJumpTableSize && + if (NumClusters >= MinJumpTableEntries && buildJumpTable(Clusters, First, Last, SI, DefaultMBB, JTCluster)) { Clusters[DstIndex++] = JTCluster; } else { @@ -7958,36 +9270,6 @@ Clusters.resize(DstIndex); } -bool SelectionDAGBuilder::rangeFitsInWord(const APInt &Low, const APInt &High) { - // FIXME: Using the pointer type doesn't seem ideal. - uint64_t BW = DAG.getDataLayout().getPointerSizeInBits(); - uint64_t Range = (High - Low).getLimitedValue(UINT64_MAX - 1) + 1; - return Range <= BW; -} - -bool SelectionDAGBuilder::isSuitableForBitTests(unsigned NumDests, - unsigned NumCmps, - const APInt &Low, - const APInt &High) { - // FIXME: I don't think NumCmps is the correct metric: a single case and a - // range of cases both require only one branch to lower. Just looking at the - // number of clusters and destinations should be enough to decide whether to - // build bit tests. - - // To lower a range with bit tests, the range must fit the bitwidth of a - // machine word. - if (!rangeFitsInWord(Low, High)) - return false; - - // Decide whether it's profitable to lower this range with bit tests. Each - // destination requires a bit test and branch, and there is an overall range - // check branch. For a small number of clusters, separate comparisons might be - // cheaper, and for many destinations, splitting the range might be better. - return (NumDests == 1 && NumCmps >= 3) || - (NumDests == 2 && NumCmps >= 5) || - (NumDests == 3 && NumCmps >= 6); -} - bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters, unsigned First, unsigned Last, const SwitchInst *SI, @@ -8009,16 +9291,17 @@ APInt High = Clusters[Last].High->getValue(); assert(Low.slt(High)); - if (!isSuitableForBitTests(NumDests, NumCmps, Low, High)) + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + const DataLayout &DL = DAG.getDataLayout(); + if (!TLI.isSuitableForBitTests(NumDests, NumCmps, Low, High, DL)) return false; APInt LowBound; APInt CmpRange; - const int BitWidth = DAG.getTargetLoweringInfo() - .getPointerTy(DAG.getDataLayout()) - .getSizeInBits(); - assert(rangeFitsInWord(Low, High) && "Case range must fit in bit mask!"); + const int BitWidth = TLI.getPointerTy(DL).getSizeInBits(); + assert(TLI.rangeFitsInWord(Low, High, DL) && + "Case range must fit in bit mask!"); // Check if the clusters cover a contiguous range such that no value in the // range will jump to the default statement. @@ -8108,7 +9391,9 @@ // If target does not have legal shift left, do not emit bit tests at all. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - EVT PTy = TLI.getPointerTy(DAG.getDataLayout()); + const DataLayout &DL = DAG.getDataLayout(); + + EVT PTy = TLI.getPointerTy(DL); if (!TLI.isOperationLegal(ISD::SHL, PTy)) return; @@ -8139,8 +9424,8 @@ // Try building a partition from Clusters[i..j]. // Check the range. - if (!rangeFitsInWord(Clusters[i].Low->getValue(), - Clusters[j].High->getValue())) + if (!TLI.rangeFitsInWord(Clusters[i].Low->getValue(), + Clusters[j].High->getValue(), DL)) continue; // Check nbr of destinations and cluster types. @@ -8385,8 +9670,8 @@ } // The false probability is the sum of all unhandled cases. - CaseBlock CB(CC, LHS, RHS, MHS, I->MBB, Fallthrough, CurMBB, I->Prob, - UnhandledProbs); + CaseBlock CB(CC, LHS, RHS, MHS, I->MBB, Fallthrough, CurMBB, + getCurSDLoc(), I->Prob, UnhandledProbs); if (CurMBB == SwitchMBB) visitSwitchCase(CB, SwitchMBB); @@ -8442,7 +9727,7 @@ I++; } - for (;;) { + while (true) { // Our binary search tree differs from a typical BST in that ours can have up // to three values in each leaf. The pivot selection above doesn't take that // into account, which means the tree might require more nodes and be less @@ -8537,7 +9822,7 @@ // Create the CaseBlock record that will be used to lower the branch. CaseBlock CB(ISD::SETLT, Cond, Pivot, nullptr, LeftMBB, RightMBB, W.MBB, - LeftProb, RightProb); + getCurSDLoc(), LeftProb, RightProb); if (W.MBB == SwitchMBB) visitSwitchCase(CB, SwitchMBB); @@ -8640,7 +9925,8 @@ WorkList.pop_back(); unsigned NumClusters = W.LastCluster - W.FirstCluster + 1; - if (NumClusters > 3 && TM.getOptLevel() != CodeGenOpt::None) { + if (NumClusters > 3 && TM.getOptLevel() != CodeGenOpt::None && + !DefaultMBB->getParent()->getFunction()->optForMinSize()) { // For optimized builds, lower large range as a balanced binary tree. splitWorkItem(WorkList, W, SI.getCondition(), SwitchMBB); continue;
--- a/lib/Transforms/IPO/MergeFunctions.cpp Thu Nov 30 20:04:56 2017 +0900 +++ b/lib/Transforms/IPO/MergeFunctions.cpp Sun Dec 03 20:09:16 2017 +0900 @@ -89,29 +89,45 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/IPO.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/FoldingSet.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/Hashing.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/CallSite.h" +#include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/IR/IRBuilder.h" -#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" -#include "llvm/IR/Operator.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" #include "llvm/IR/ValueHandle.h" #include "llvm/IR/ValueMap.h" #include "llvm/Pass.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/Utils/FunctionComparator.h" +#include <algorithm> +#include <cassert> +#include <iterator> +#include <set> +#include <utility> #include <vector> using namespace llvm; @@ -120,7 +136,6 @@ STATISTIC(NumFunctionsMerged, "Number of functions merged"); STATISTIC(NumThunksWritten, "Number of thunks generated"); -STATISTIC(NumAliasesWritten, "Number of aliases generated"); STATISTIC(NumDoubleWeak, "Number of new functions created"); static cl::opt<unsigned> NumFunctionsForSanityCheck( @@ -130,334 +145,37 @@ "'0' disables this check. Works only with '-debug' key."), cl::init(0), cl::Hidden); -namespace { - -/// GlobalNumberState assigns an integer to each global value in the program, -/// which is used by the comparison routine to order references to globals. This -/// state must be preserved throughout the pass, because Functions and other -/// globals need to maintain their relative order. Globals are assigned a number -/// when they are first visited. This order is deterministic, and so the -/// assigned numbers are as well. When two functions are merged, neither number -/// is updated. If the symbols are weak, this would be incorrect. If they are -/// strong, then one will be replaced at all references to the other, and so -/// direct callsites will now see one or the other symbol, and no update is -/// necessary. Note that if we were guaranteed unique names, we could just -/// compare those, but this would not work for stripped bitcodes or for those -/// few symbols without a name. -class GlobalNumberState { - struct Config : ValueMapConfig<GlobalValue*> { - enum { FollowRAUW = false }; - }; - // Each GlobalValue is mapped to an identifier. The Config ensures when RAUW - // occurs, the mapping does not change. Tracking changes is unnecessary, and - // also problematic for weak symbols (which may be overwritten). - typedef ValueMap<GlobalValue *, uint64_t, Config> ValueNumberMap; - ValueNumberMap GlobalNumbers; - // The next unused serial number to assign to a global. - uint64_t NextNumber; - public: - GlobalNumberState() : GlobalNumbers(), NextNumber(0) {} - uint64_t getNumber(GlobalValue* Global) { - ValueNumberMap::iterator MapIter; - bool Inserted; - std::tie(MapIter, Inserted) = GlobalNumbers.insert({Global, NextNumber}); - if (Inserted) - NextNumber++; - return MapIter->second; - } - void clear() { - GlobalNumbers.clear(); - } -}; - -/// FunctionComparator - Compares two functions to determine whether or not -/// they will generate machine code with the same behaviour. DataLayout is -/// used if available. The comparator always fails conservatively (erring on the -/// side of claiming that two functions are different). -class FunctionComparator { -public: - FunctionComparator(const Function *F1, const Function *F2, - GlobalNumberState* GN) - : FnL(F1), FnR(F2), GlobalNumbers(GN) {} - - /// Test whether the two functions have equivalent behaviour. - int compare(); - /// Hash a function. Equivalent functions will have the same hash, and unequal - /// functions will have different hashes with high probability. - typedef uint64_t FunctionHash; - static FunctionHash functionHash(Function &); - -private: - /// Test whether two basic blocks have equivalent behaviour. - int cmpBasicBlocks(const BasicBlock *BBL, const BasicBlock *BBR); +// Under option -mergefunc-preserve-debug-info we: +// - Do not create a new function for a thunk. +// - Retain the debug info for a thunk's parameters (and associated +// instructions for the debug info) from the entry block. +// Note: -debug will display the algorithm at work. +// - Create debug-info for the call (to the shared implementation) made by +// a thunk and its return value. +// - Erase the rest of the function, retaining the (minimally sized) entry +// block to create a thunk. +// - Preserve a thunk's call site to point to the thunk even when both occur +// within the same translation unit, to aid debugability. Note that this +// behaviour differs from the underlying -mergefunc implementation which +// modifies the thunk's call site to point to the shared implementation +// when both occur within the same translation unit. +static cl::opt<bool> + MergeFunctionsPDI("mergefunc-preserve-debug-info", cl::Hidden, + cl::init(false), + cl::desc("Preserve debug info in thunk when mergefunc " + "transformations are made.")); - /// Constants comparison. - /// Its analog to lexicographical comparison between hypothetical numbers - /// of next format: - /// <bitcastability-trait><raw-bit-contents> - /// - /// 1. Bitcastability. - /// Check whether L's type could be losslessly bitcasted to R's type. - /// On this stage method, in case when lossless bitcast is not possible - /// method returns -1 or 1, thus also defining which type is greater in - /// context of bitcastability. - /// Stage 0: If types are equal in terms of cmpTypes, then we can go straight - /// to the contents comparison. - /// If types differ, remember types comparison result and check - /// whether we still can bitcast types. - /// Stage 1: Types that satisfies isFirstClassType conditions are always - /// greater then others. - /// Stage 2: Vector is greater then non-vector. - /// If both types are vectors, then vector with greater bitwidth is - /// greater. - /// If both types are vectors with the same bitwidth, then types - /// are bitcastable, and we can skip other stages, and go to contents - /// comparison. - /// Stage 3: Pointer types are greater than non-pointers. If both types are - /// pointers of the same address space - go to contents comparison. - /// Different address spaces: pointer with greater address space is - /// greater. - /// Stage 4: Types are neither vectors, nor pointers. And they differ. - /// We don't know how to bitcast them. So, we better don't do it, - /// and return types comparison result (so it determines the - /// relationship among constants we don't know how to bitcast). - /// - /// Just for clearance, let's see how the set of constants could look - /// on single dimension axis: - /// - /// [NFCT], [FCT, "others"], [FCT, pointers], [FCT, vectors] - /// Where: NFCT - Not a FirstClassType - /// FCT - FirstClassTyp: - /// - /// 2. Compare raw contents. - /// It ignores types on this stage and only compares bits from L and R. - /// Returns 0, if L and R has equivalent contents. - /// -1 or 1 if values are different. - /// Pretty trivial: - /// 2.1. If contents are numbers, compare numbers. - /// Ints with greater bitwidth are greater. Ints with same bitwidths - /// compared by their contents. - /// 2.2. "And so on". Just to avoid discrepancies with comments - /// perhaps it would be better to read the implementation itself. - /// 3. And again about overall picture. Let's look back at how the ordered set - /// of constants will look like: - /// [NFCT], [FCT, "others"], [FCT, pointers], [FCT, vectors] - /// - /// Now look, what could be inside [FCT, "others"], for example: - /// [FCT, "others"] = - /// [ - /// [double 0.1], [double 1.23], - /// [i32 1], [i32 2], - /// { double 1.0 }, ; StructTyID, NumElements = 1 - /// { i32 1 }, ; StructTyID, NumElements = 1 - /// { double 1, i32 1 }, ; StructTyID, NumElements = 2 - /// { i32 1, double 1 } ; StructTyID, NumElements = 2 - /// ] - /// - /// Let's explain the order. Float numbers will be less than integers, just - /// because of cmpType terms: FloatTyID < IntegerTyID. - /// Floats (with same fltSemantics) are sorted according to their value. - /// Then you can see integers, and they are, like a floats, - /// could be easy sorted among each others. - /// The structures. Structures are grouped at the tail, again because of their - /// TypeID: StructTyID > IntegerTyID > FloatTyID. - /// Structures with greater number of elements are greater. Structures with - /// greater elements going first are greater. - /// The same logic with vectors, arrays and other possible complex types. - /// - /// Bitcastable constants. - /// Let's assume, that some constant, belongs to some group of - /// "so-called-equal" values with different types, and at the same time - /// belongs to another group of constants with equal types - /// and "really" equal values. - /// - /// Now, prove that this is impossible: - /// - /// If constant A with type TyA is bitcastable to B with type TyB, then: - /// 1. All constants with equal types to TyA, are bitcastable to B. Since - /// those should be vectors (if TyA is vector), pointers - /// (if TyA is pointer), or else (if TyA equal to TyB), those types should - /// be equal to TyB. - /// 2. All constants with non-equal, but bitcastable types to TyA, are - /// bitcastable to B. - /// Once again, just because we allow it to vectors and pointers only. - /// This statement could be expanded as below: - /// 2.1. All vectors with equal bitwidth to vector A, has equal bitwidth to - /// vector B, and thus bitcastable to B as well. - /// 2.2. All pointers of the same address space, no matter what they point to, - /// bitcastable. So if C is pointer, it could be bitcasted to A and to B. - /// So any constant equal or bitcastable to A is equal or bitcastable to B. - /// QED. - /// - /// In another words, for pointers and vectors, we ignore top-level type and - /// look at their particular properties (bit-width for vectors, and - /// address space for pointers). - /// If these properties are equal - compare their contents. - int cmpConstants(const Constant *L, const Constant *R); - - /// Compares two global values by number. Uses the GlobalNumbersState to - /// identify the same gobals across function calls. - int cmpGlobalValues(GlobalValue *L, GlobalValue *R); - - /// Assign or look up previously assigned numbers for the two values, and - /// return whether the numbers are equal. Numbers are assigned in the order - /// visited. - /// Comparison order: - /// Stage 0: Value that is function itself is always greater then others. - /// If left and right values are references to their functions, then - /// they are equal. - /// Stage 1: Constants are greater than non-constants. - /// If both left and right are constants, then the result of - /// cmpConstants is used as cmpValues result. - /// Stage 2: InlineAsm instances are greater than others. If both left and - /// right are InlineAsm instances, InlineAsm* pointers casted to - /// integers and compared as numbers. - /// Stage 3: For all other cases we compare order we meet these values in - /// their functions. If right value was met first during scanning, - /// then left value is greater. - /// In another words, we compare serial numbers, for more details - /// see comments for sn_mapL and sn_mapR. - int cmpValues(const Value *L, const Value *R); - - /// Compare two Instructions for equivalence, similar to - /// Instruction::isSameOperationAs but with modifications to the type - /// comparison. - /// Stages are listed in "most significant stage first" order: - /// On each stage below, we do comparison between some left and right - /// operation parts. If parts are non-equal, we assign parts comparison - /// result to the operation comparison result and exit from method. - /// Otherwise we proceed to the next stage. - /// Stages: - /// 1. Operations opcodes. Compared as numbers. - /// 2. Number of operands. - /// 3. Operation types. Compared with cmpType method. - /// 4. Compare operation subclass optional data as stream of bytes: - /// just convert it to integers and call cmpNumbers. - /// 5. Compare in operation operand types with cmpType in - /// most significant operand first order. - /// 6. Last stage. Check operations for some specific attributes. - /// For example, for Load it would be: - /// 6.1.Load: volatile (as boolean flag) - /// 6.2.Load: alignment (as integer numbers) - /// 6.3.Load: synch-scope (as integer numbers) - /// 6.4.Load: range metadata (as integer numbers) - /// On this stage its better to see the code, since its not more than 10-15 - /// strings for particular instruction, and could change sometimes. - int cmpOperations(const Instruction *L, const Instruction *R) const; - - /// Compare two GEPs for equivalent pointer arithmetic. - /// Parts to be compared for each comparison stage, - /// most significant stage first: - /// 1. Address space. As numbers. - /// 2. Constant offset, (using GEPOperator::accumulateConstantOffset method). - /// 3. Pointer operand type (using cmpType method). - /// 4. Number of operands. - /// 5. Compare operands, using cmpValues method. - int cmpGEPs(const GEPOperator *GEPL, const GEPOperator *GEPR); - int cmpGEPs(const GetElementPtrInst *GEPL, const GetElementPtrInst *GEPR) { - return cmpGEPs(cast<GEPOperator>(GEPL), cast<GEPOperator>(GEPR)); - } - - /// cmpType - compares two types, - /// defines total ordering among the types set. - /// - /// Return values: - /// 0 if types are equal, - /// -1 if Left is less than Right, - /// +1 if Left is greater than Right. - /// - /// Description: - /// Comparison is broken onto stages. Like in lexicographical comparison - /// stage coming first has higher priority. - /// On each explanation stage keep in mind total ordering properties. - /// - /// 0. Before comparison we coerce pointer types of 0 address space to - /// integer. - /// We also don't bother with same type at left and right, so - /// just return 0 in this case. - /// - /// 1. If types are of different kind (different type IDs). - /// Return result of type IDs comparison, treating them as numbers. - /// 2. If types are integers, check that they have the same width. If they - /// are vectors, check that they have the same count and subtype. - /// 3. Types have the same ID, so check whether they are one of: - /// * Void - /// * Float - /// * Double - /// * X86_FP80 - /// * FP128 - /// * PPC_FP128 - /// * Label - /// * Metadata - /// We can treat these types as equal whenever their IDs are same. - /// 4. If Left and Right are pointers, return result of address space - /// comparison (numbers comparison). We can treat pointer types of same - /// address space as equal. - /// 5. If types are complex. - /// Then both Left and Right are to be expanded and their element types will - /// be checked with the same way. If we get Res != 0 on some stage, return it. - /// Otherwise return 0. - /// 6. For all other cases put llvm_unreachable. - int cmpTypes(Type *TyL, Type *TyR) const; - - int cmpNumbers(uint64_t L, uint64_t R) const; - int cmpAPInts(const APInt &L, const APInt &R) const; - int cmpAPFloats(const APFloat &L, const APFloat &R) const; - int cmpInlineAsm(const InlineAsm *L, const InlineAsm *R) const; - int cmpMem(StringRef L, StringRef R) const; - int cmpAttrs(const AttributeSet L, const AttributeSet R) const; - int cmpRangeMetadata(const MDNode* L, const MDNode* R) const; - int cmpOperandBundlesSchema(const Instruction *L, const Instruction *R) const; - - // The two functions undergoing comparison. - const Function *FnL, *FnR; - - /// Assign serial numbers to values from left function, and values from - /// right function. - /// Explanation: - /// Being comparing functions we need to compare values we meet at left and - /// right sides. - /// Its easy to sort things out for external values. It just should be - /// the same value at left and right. - /// But for local values (those were introduced inside function body) - /// we have to ensure they were introduced at exactly the same place, - /// and plays the same role. - /// Let's assign serial number to each value when we meet it first time. - /// Values that were met at same place will be with same serial numbers. - /// In this case it would be good to explain few points about values assigned - /// to BBs and other ways of implementation (see below). - /// - /// 1. Safety of BB reordering. - /// It's safe to change the order of BasicBlocks in function. - /// Relationship with other functions and serial numbering will not be - /// changed in this case. - /// As follows from FunctionComparator::compare(), we do CFG walk: we start - /// from the entry, and then take each terminator. So it doesn't matter how in - /// fact BBs are ordered in function. And since cmpValues are called during - /// this walk, the numbering depends only on how BBs located inside the CFG. - /// So the answer is - yes. We will get the same numbering. - /// - /// 2. Impossibility to use dominance properties of values. - /// If we compare two instruction operands: first is usage of local - /// variable AL from function FL, and second is usage of local variable AR - /// from FR, we could compare their origins and check whether they are - /// defined at the same place. - /// But, we are still not able to compare operands of PHI nodes, since those - /// could be operands from further BBs we didn't scan yet. - /// So it's impossible to use dominance properties in general. - DenseMap<const Value*, int> sn_mapL, sn_mapR; - - // The global state we will use - GlobalNumberState* GlobalNumbers; -}; +namespace { class FunctionNode { mutable AssertingVH<Function> F; FunctionComparator::FunctionHash Hash; + public: // Note the hash is recalculated potentially multiple times, but it is cheap. FunctionNode(Function *F) : F(F), Hash(FunctionComparator::functionHash(*F)) {} + Function *getFunc() const { return F; } FunctionComparator::FunctionHash getHash() const { return Hash; } @@ -469,894 +187,17 @@ void release() { F = nullptr; } }; -} // end anonymous namespace - -int FunctionComparator::cmpNumbers(uint64_t L, uint64_t R) const { - if (L < R) return -1; - if (L > R) return 1; - return 0; -} - -int FunctionComparator::cmpAPInts(const APInt &L, const APInt &R) const { - if (int Res = cmpNumbers(L.getBitWidth(), R.getBitWidth())) - return Res; - if (L.ugt(R)) return 1; - if (R.ugt(L)) return -1; - return 0; -} - -int FunctionComparator::cmpAPFloats(const APFloat &L, const APFloat &R) const { - // Floats are ordered first by semantics (i.e. float, double, half, etc.), - // then by value interpreted as a bitstring (aka APInt). - const fltSemantics &SL = L.getSemantics(), &SR = R.getSemantics(); - if (int Res = cmpNumbers(APFloat::semanticsPrecision(SL), - APFloat::semanticsPrecision(SR))) - return Res; - if (int Res = cmpNumbers(APFloat::semanticsMaxExponent(SL), - APFloat::semanticsMaxExponent(SR))) - return Res; - if (int Res = cmpNumbers(APFloat::semanticsMinExponent(SL), - APFloat::semanticsMinExponent(SR))) - return Res; - if (int Res = cmpNumbers(APFloat::semanticsSizeInBits(SL), - APFloat::semanticsSizeInBits(SR))) - return Res; - return cmpAPInts(L.bitcastToAPInt(), R.bitcastToAPInt()); -} - -int FunctionComparator::cmpMem(StringRef L, StringRef R) const { - // Prevent heavy comparison, compare sizes first. - if (int Res = cmpNumbers(L.size(), R.size())) - return Res; - - // Compare strings lexicographically only when it is necessary: only when - // strings are equal in size. - return L.compare(R); -} - -int FunctionComparator::cmpAttrs(const AttributeSet L, - const AttributeSet R) const { - if (int Res = cmpNumbers(L.getNumSlots(), R.getNumSlots())) - return Res; - - for (unsigned i = 0, e = L.getNumSlots(); i != e; ++i) { - AttributeSet::iterator LI = L.begin(i), LE = L.end(i), RI = R.begin(i), - RE = R.end(i); - for (; LI != LE && RI != RE; ++LI, ++RI) { - Attribute LA = *LI; - Attribute RA = *RI; - if (LA < RA) - return -1; - if (RA < LA) - return 1; - } - if (LI != LE) - return 1; - if (RI != RE) - return -1; - } - return 0; -} - -int FunctionComparator::cmpRangeMetadata(const MDNode* L, - const MDNode* R) const { - if (L == R) - return 0; - if (!L) - return -1; - if (!R) - return 1; - // Range metadata is a sequence of numbers. Make sure they are the same - // sequence. - // TODO: Note that as this is metadata, it is possible to drop and/or merge - // this data when considering functions to merge. Thus this comparison would - // return 0 (i.e. equivalent), but merging would become more complicated - // because the ranges would need to be unioned. It is not likely that - // functions differ ONLY in this metadata if they are actually the same - // function semantically. - if (int Res = cmpNumbers(L->getNumOperands(), R->getNumOperands())) - return Res; - for (size_t I = 0; I < L->getNumOperands(); ++I) { - ConstantInt* LLow = mdconst::extract<ConstantInt>(L->getOperand(I)); - ConstantInt* RLow = mdconst::extract<ConstantInt>(R->getOperand(I)); - if (int Res = cmpAPInts(LLow->getValue(), RLow->getValue())) - return Res; - } - return 0; -} - -int FunctionComparator::cmpOperandBundlesSchema(const Instruction *L, - const Instruction *R) const { - ImmutableCallSite LCS(L); - ImmutableCallSite RCS(R); - - assert(LCS && RCS && "Must be calls or invokes!"); - assert(LCS.isCall() == RCS.isCall() && "Can't compare otherwise!"); - - if (int Res = - cmpNumbers(LCS.getNumOperandBundles(), RCS.getNumOperandBundles())) - return Res; - - for (unsigned i = 0, e = LCS.getNumOperandBundles(); i != e; ++i) { - auto OBL = LCS.getOperandBundleAt(i); - auto OBR = RCS.getOperandBundleAt(i); - - if (int Res = OBL.getTagName().compare(OBR.getTagName())) - return Res; - - if (int Res = cmpNumbers(OBL.Inputs.size(), OBR.Inputs.size())) - return Res; - } - - return 0; -} - -/// Constants comparison: -/// 1. Check whether type of L constant could be losslessly bitcasted to R -/// type. -/// 2. Compare constant contents. -/// For more details see declaration comments. -int FunctionComparator::cmpConstants(const Constant *L, const Constant *R) { - - Type *TyL = L->getType(); - Type *TyR = R->getType(); - - // Check whether types are bitcastable. This part is just re-factored - // Type::canLosslesslyBitCastTo method, but instead of returning true/false, - // we also pack into result which type is "less" for us. - int TypesRes = cmpTypes(TyL, TyR); - if (TypesRes != 0) { - // Types are different, but check whether we can bitcast them. - if (!TyL->isFirstClassType()) { - if (TyR->isFirstClassType()) - return -1; - // Neither TyL nor TyR are values of first class type. Return the result - // of comparing the types - return TypesRes; - } - if (!TyR->isFirstClassType()) { - if (TyL->isFirstClassType()) - return 1; - return TypesRes; - } - - // Vector -> Vector conversions are always lossless if the two vector types - // have the same size, otherwise not. - unsigned TyLWidth = 0; - unsigned TyRWidth = 0; - - if (auto *VecTyL = dyn_cast<VectorType>(TyL)) - TyLWidth = VecTyL->getBitWidth(); - if (auto *VecTyR = dyn_cast<VectorType>(TyR)) - TyRWidth = VecTyR->getBitWidth(); - - if (TyLWidth != TyRWidth) - return cmpNumbers(TyLWidth, TyRWidth); - - // Zero bit-width means neither TyL nor TyR are vectors. - if (!TyLWidth) { - PointerType *PTyL = dyn_cast<PointerType>(TyL); - PointerType *PTyR = dyn_cast<PointerType>(TyR); - if (PTyL && PTyR) { - unsigned AddrSpaceL = PTyL->getAddressSpace(); - unsigned AddrSpaceR = PTyR->getAddressSpace(); - if (int Res = cmpNumbers(AddrSpaceL, AddrSpaceR)) - return Res; - } - if (PTyL) - return 1; - if (PTyR) - return -1; - - // TyL and TyR aren't vectors, nor pointers. We don't know how to - // bitcast them. - return TypesRes; - } - } - - // OK, types are bitcastable, now check constant contents. - - if (L->isNullValue() && R->isNullValue()) - return TypesRes; - if (L->isNullValue() && !R->isNullValue()) - return 1; - if (!L->isNullValue() && R->isNullValue()) - return -1; - - auto GlobalValueL = const_cast<GlobalValue*>(dyn_cast<GlobalValue>(L)); - auto GlobalValueR = const_cast<GlobalValue*>(dyn_cast<GlobalValue>(R)); - if (GlobalValueL && GlobalValueR) { - return cmpGlobalValues(GlobalValueL, GlobalValueR); - } - - if (int Res = cmpNumbers(L->getValueID(), R->getValueID())) - return Res; - - if (const auto *SeqL = dyn_cast<ConstantDataSequential>(L)) { - const auto *SeqR = cast<ConstantDataSequential>(R); - // This handles ConstantDataArray and ConstantDataVector. Note that we - // compare the two raw data arrays, which might differ depending on the host - // endianness. This isn't a problem though, because the endiness of a module - // will affect the order of the constants, but this order is the same - // for a given input module and host platform. - return cmpMem(SeqL->getRawDataValues(), SeqR->getRawDataValues()); - } - - switch (L->getValueID()) { - case Value::UndefValueVal: - case Value::ConstantTokenNoneVal: - return TypesRes; - case Value::ConstantIntVal: { - const APInt &LInt = cast<ConstantInt>(L)->getValue(); - const APInt &RInt = cast<ConstantInt>(R)->getValue(); - return cmpAPInts(LInt, RInt); - } - case Value::ConstantFPVal: { - const APFloat &LAPF = cast<ConstantFP>(L)->getValueAPF(); - const APFloat &RAPF = cast<ConstantFP>(R)->getValueAPF(); - return cmpAPFloats(LAPF, RAPF); - } - case Value::ConstantArrayVal: { - const ConstantArray *LA = cast<ConstantArray>(L); - const ConstantArray *RA = cast<ConstantArray>(R); - uint64_t NumElementsL = cast<ArrayType>(TyL)->getNumElements(); - uint64_t NumElementsR = cast<ArrayType>(TyR)->getNumElements(); - if (int Res = cmpNumbers(NumElementsL, NumElementsR)) - return Res; - for (uint64_t i = 0; i < NumElementsL; ++i) { - if (int Res = cmpConstants(cast<Constant>(LA->getOperand(i)), - cast<Constant>(RA->getOperand(i)))) - return Res; - } - return 0; - } - case Value::ConstantStructVal: { - const ConstantStruct *LS = cast<ConstantStruct>(L); - const ConstantStruct *RS = cast<ConstantStruct>(R); - unsigned NumElementsL = cast<StructType>(TyL)->getNumElements(); - unsigned NumElementsR = cast<StructType>(TyR)->getNumElements(); - if (int Res = cmpNumbers(NumElementsL, NumElementsR)) - return Res; - for (unsigned i = 0; i != NumElementsL; ++i) { - if (int Res = cmpConstants(cast<Constant>(LS->getOperand(i)), - cast<Constant>(RS->getOperand(i)))) - return Res; - } - return 0; - } - case Value::ConstantVectorVal: { - const ConstantVector *LV = cast<ConstantVector>(L); - const ConstantVector *RV = cast<ConstantVector>(R); - unsigned NumElementsL = cast<VectorType>(TyL)->getNumElements(); - unsigned NumElementsR = cast<VectorType>(TyR)->getNumElements(); - if (int Res = cmpNumbers(NumElementsL, NumElementsR)) - return Res; - for (uint64_t i = 0; i < NumElementsL; ++i) { - if (int Res = cmpConstants(cast<Constant>(LV->getOperand(i)), - cast<Constant>(RV->getOperand(i)))) - return Res; - } - return 0; - } - case Value::ConstantExprVal: { - const ConstantExpr *LE = cast<ConstantExpr>(L); - const ConstantExpr *RE = cast<ConstantExpr>(R); - unsigned NumOperandsL = LE->getNumOperands(); - unsigned NumOperandsR = RE->getNumOperands(); - if (int Res = cmpNumbers(NumOperandsL, NumOperandsR)) - return Res; - for (unsigned i = 0; i < NumOperandsL; ++i) { - if (int Res = cmpConstants(cast<Constant>(LE->getOperand(i)), - cast<Constant>(RE->getOperand(i)))) - return Res; - } - return 0; - } - case Value::BlockAddressVal: { - const BlockAddress *LBA = cast<BlockAddress>(L); - const BlockAddress *RBA = cast<BlockAddress>(R); - if (int Res = cmpValues(LBA->getFunction(), RBA->getFunction())) - return Res; - if (LBA->getFunction() == RBA->getFunction()) { - // They are BBs in the same function. Order by which comes first in the - // BB order of the function. This order is deterministic. - Function* F = LBA->getFunction(); - BasicBlock *LBB = LBA->getBasicBlock(); - BasicBlock *RBB = RBA->getBasicBlock(); - if (LBB == RBB) - return 0; - for(BasicBlock &BB : F->getBasicBlockList()) { - if (&BB == LBB) { - assert(&BB != RBB); - return -1; - } - if (&BB == RBB) - return 1; - } - llvm_unreachable("Basic Block Address does not point to a basic block in " - "its function."); - return -1; - } else { - // cmpValues said the functions are the same. So because they aren't - // literally the same pointer, they must respectively be the left and - // right functions. - assert(LBA->getFunction() == FnL && RBA->getFunction() == FnR); - // cmpValues will tell us if these are equivalent BasicBlocks, in the - // context of their respective functions. - return cmpValues(LBA->getBasicBlock(), RBA->getBasicBlock()); - } - } - default: // Unknown constant, abort. - DEBUG(dbgs() << "Looking at valueID " << L->getValueID() << "\n"); - llvm_unreachable("Constant ValueID not recognized."); - return -1; - } -} - -int FunctionComparator::cmpGlobalValues(GlobalValue *L, GlobalValue* R) { - return cmpNumbers(GlobalNumbers->getNumber(L), GlobalNumbers->getNumber(R)); -} - -/// cmpType - compares two types, -/// defines total ordering among the types set. -/// See method declaration comments for more details. -int FunctionComparator::cmpTypes(Type *TyL, Type *TyR) const { - PointerType *PTyL = dyn_cast<PointerType>(TyL); - PointerType *PTyR = dyn_cast<PointerType>(TyR); - - const DataLayout &DL = FnL->getParent()->getDataLayout(); - if (PTyL && PTyL->getAddressSpace() == 0) - TyL = DL.getIntPtrType(TyL); - if (PTyR && PTyR->getAddressSpace() == 0) - TyR = DL.getIntPtrType(TyR); - - if (TyL == TyR) - return 0; - - if (int Res = cmpNumbers(TyL->getTypeID(), TyR->getTypeID())) - return Res; - - switch (TyL->getTypeID()) { - default: - llvm_unreachable("Unknown type!"); - // Fall through in Release mode. - case Type::IntegerTyID: - return cmpNumbers(cast<IntegerType>(TyL)->getBitWidth(), - cast<IntegerType>(TyR)->getBitWidth()); - case Type::VectorTyID: { - VectorType *VTyL = cast<VectorType>(TyL), *VTyR = cast<VectorType>(TyR); - if (int Res = cmpNumbers(VTyL->getNumElements(), VTyR->getNumElements())) - return Res; - return cmpTypes(VTyL->getElementType(), VTyR->getElementType()); - } - // TyL == TyR would have returned true earlier, because types are uniqued. - case Type::VoidTyID: - case Type::FloatTyID: - case Type::DoubleTyID: - case Type::X86_FP80TyID: - case Type::FP128TyID: - case Type::PPC_FP128TyID: - case Type::LabelTyID: - case Type::MetadataTyID: -#ifndef noCbC - case Type::__CodeTyID: -#endif - return 0; - - case Type::PointerTyID: { - assert(PTyL && PTyR && "Both types must be pointers here."); - return cmpNumbers(PTyL->getAddressSpace(), PTyR->getAddressSpace()); - } - - case Type::StructTyID: { - StructType *STyL = cast<StructType>(TyL); - StructType *STyR = cast<StructType>(TyR); - if (STyL->getNumElements() != STyR->getNumElements()) - return cmpNumbers(STyL->getNumElements(), STyR->getNumElements()); - - if (STyL->isPacked() != STyR->isPacked()) - return cmpNumbers(STyL->isPacked(), STyR->isPacked()); - - for (unsigned i = 0, e = STyL->getNumElements(); i != e; ++i) { - if (int Res = cmpTypes(STyL->getElementType(i), STyR->getElementType(i))) - return Res; - } - return 0; - } - - case Type::FunctionTyID: { - FunctionType *FTyL = cast<FunctionType>(TyL); - FunctionType *FTyR = cast<FunctionType>(TyR); - if (FTyL->getNumParams() != FTyR->getNumParams()) - return cmpNumbers(FTyL->getNumParams(), FTyR->getNumParams()); - - if (FTyL->isVarArg() != FTyR->isVarArg()) - return cmpNumbers(FTyL->isVarArg(), FTyR->isVarArg()); - - if (int Res = cmpTypes(FTyL->getReturnType(), FTyR->getReturnType())) - return Res; - - for (unsigned i = 0, e = FTyL->getNumParams(); i != e; ++i) { - if (int Res = cmpTypes(FTyL->getParamType(i), FTyR->getParamType(i))) - return Res; - } - return 0; - } - - case Type::ArrayTyID: { - ArrayType *ATyL = cast<ArrayType>(TyL); - ArrayType *ATyR = cast<ArrayType>(TyR); - if (ATyL->getNumElements() != ATyR->getNumElements()) - return cmpNumbers(ATyL->getNumElements(), ATyR->getNumElements()); - return cmpTypes(ATyL->getElementType(), ATyR->getElementType()); - } - } -} - -// Determine whether the two operations are the same except that pointer-to-A -// and pointer-to-B are equivalent. This should be kept in sync with -// Instruction::isSameOperationAs. -// Read method declaration comments for more details. -int FunctionComparator::cmpOperations(const Instruction *L, - const Instruction *R) const { - // Differences from Instruction::isSameOperationAs: - // * replace type comparison with calls to isEquivalentType. - // * we test for I->hasSameSubclassOptionalData (nuw/nsw/tail) at the top - // * because of the above, we don't test for the tail bit on calls later on - if (int Res = cmpNumbers(L->getOpcode(), R->getOpcode())) - return Res; - - if (int Res = cmpNumbers(L->getNumOperands(), R->getNumOperands())) - return Res; - - if (int Res = cmpTypes(L->getType(), R->getType())) - return Res; - - if (int Res = cmpNumbers(L->getRawSubclassOptionalData(), - R->getRawSubclassOptionalData())) - return Res; - - if (const AllocaInst *AI = dyn_cast<AllocaInst>(L)) { - if (int Res = cmpTypes(AI->getAllocatedType(), - cast<AllocaInst>(R)->getAllocatedType())) - return Res; - if (int Res = - cmpNumbers(AI->getAlignment(), cast<AllocaInst>(R)->getAlignment())) - return Res; - } - - // We have two instructions of identical opcode and #operands. Check to see - // if all operands are the same type - for (unsigned i = 0, e = L->getNumOperands(); i != e; ++i) { - if (int Res = - cmpTypes(L->getOperand(i)->getType(), R->getOperand(i)->getType())) - return Res; - } - - // Check special state that is a part of some instructions. - if (const LoadInst *LI = dyn_cast<LoadInst>(L)) { - if (int Res = cmpNumbers(LI->isVolatile(), cast<LoadInst>(R)->isVolatile())) - return Res; - if (int Res = - cmpNumbers(LI->getAlignment(), cast<LoadInst>(R)->getAlignment())) - return Res; - if (int Res = - cmpNumbers(LI->getOrdering(), cast<LoadInst>(R)->getOrdering())) - return Res; - if (int Res = - cmpNumbers(LI->getSynchScope(), cast<LoadInst>(R)->getSynchScope())) - return Res; - return cmpRangeMetadata(LI->getMetadata(LLVMContext::MD_range), - cast<LoadInst>(R)->getMetadata(LLVMContext::MD_range)); - } - if (const StoreInst *SI = dyn_cast<StoreInst>(L)) { - if (int Res = - cmpNumbers(SI->isVolatile(), cast<StoreInst>(R)->isVolatile())) - return Res; - if (int Res = - cmpNumbers(SI->getAlignment(), cast<StoreInst>(R)->getAlignment())) - return Res; - if (int Res = - cmpNumbers(SI->getOrdering(), cast<StoreInst>(R)->getOrdering())) - return Res; - return cmpNumbers(SI->getSynchScope(), cast<StoreInst>(R)->getSynchScope()); - } - if (const CmpInst *CI = dyn_cast<CmpInst>(L)) - return cmpNumbers(CI->getPredicate(), cast<CmpInst>(R)->getPredicate()); - if (const CallInst *CI = dyn_cast<CallInst>(L)) { - if (int Res = cmpNumbers(CI->getCallingConv(), - cast<CallInst>(R)->getCallingConv())) - return Res; - if (int Res = - cmpAttrs(CI->getAttributes(), cast<CallInst>(R)->getAttributes())) - return Res; - if (int Res = cmpOperandBundlesSchema(CI, R)) - return Res; - return cmpRangeMetadata( - CI->getMetadata(LLVMContext::MD_range), - cast<CallInst>(R)->getMetadata(LLVMContext::MD_range)); - } - if (const InvokeInst *II = dyn_cast<InvokeInst>(L)) { - if (int Res = cmpNumbers(II->getCallingConv(), - cast<InvokeInst>(R)->getCallingConv())) - return Res; - if (int Res = - cmpAttrs(II->getAttributes(), cast<InvokeInst>(R)->getAttributes())) - return Res; - if (int Res = cmpOperandBundlesSchema(II, R)) - return Res; - return cmpRangeMetadata( - II->getMetadata(LLVMContext::MD_range), - cast<InvokeInst>(R)->getMetadata(LLVMContext::MD_range)); - } - if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(L)) { - ArrayRef<unsigned> LIndices = IVI->getIndices(); - ArrayRef<unsigned> RIndices = cast<InsertValueInst>(R)->getIndices(); - if (int Res = cmpNumbers(LIndices.size(), RIndices.size())) - return Res; - for (size_t i = 0, e = LIndices.size(); i != e; ++i) { - if (int Res = cmpNumbers(LIndices[i], RIndices[i])) - return Res; - } - } - if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(L)) { - ArrayRef<unsigned> LIndices = EVI->getIndices(); - ArrayRef<unsigned> RIndices = cast<ExtractValueInst>(R)->getIndices(); - if (int Res = cmpNumbers(LIndices.size(), RIndices.size())) - return Res; - for (size_t i = 0, e = LIndices.size(); i != e; ++i) { - if (int Res = cmpNumbers(LIndices[i], RIndices[i])) - return Res; - } - } - if (const FenceInst *FI = dyn_cast<FenceInst>(L)) { - if (int Res = - cmpNumbers(FI->getOrdering(), cast<FenceInst>(R)->getOrdering())) - return Res; - return cmpNumbers(FI->getSynchScope(), cast<FenceInst>(R)->getSynchScope()); - } - - if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(L)) { - if (int Res = cmpNumbers(CXI->isVolatile(), - cast<AtomicCmpXchgInst>(R)->isVolatile())) - return Res; - if (int Res = cmpNumbers(CXI->isWeak(), - cast<AtomicCmpXchgInst>(R)->isWeak())) - return Res; - if (int Res = cmpNumbers(CXI->getSuccessOrdering(), - cast<AtomicCmpXchgInst>(R)->getSuccessOrdering())) - return Res; - if (int Res = cmpNumbers(CXI->getFailureOrdering(), - cast<AtomicCmpXchgInst>(R)->getFailureOrdering())) - return Res; - return cmpNumbers(CXI->getSynchScope(), - cast<AtomicCmpXchgInst>(R)->getSynchScope()); - } - if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(L)) { - if (int Res = cmpNumbers(RMWI->getOperation(), - cast<AtomicRMWInst>(R)->getOperation())) - return Res; - if (int Res = cmpNumbers(RMWI->isVolatile(), - cast<AtomicRMWInst>(R)->isVolatile())) - return Res; - if (int Res = cmpNumbers(RMWI->getOrdering(), - cast<AtomicRMWInst>(R)->getOrdering())) - return Res; - return cmpNumbers(RMWI->getSynchScope(), - cast<AtomicRMWInst>(R)->getSynchScope()); - } - return 0; -} - -// Determine whether two GEP operations perform the same underlying arithmetic. -// Read method declaration comments for more details. -int FunctionComparator::cmpGEPs(const GEPOperator *GEPL, - const GEPOperator *GEPR) { - - unsigned int ASL = GEPL->getPointerAddressSpace(); - unsigned int ASR = GEPR->getPointerAddressSpace(); - - if (int Res = cmpNumbers(ASL, ASR)) - return Res; - - // When we have target data, we can reduce the GEP down to the value in bytes - // added to the address. - const DataLayout &DL = FnL->getParent()->getDataLayout(); - unsigned BitWidth = DL.getPointerSizeInBits(ASL); - APInt OffsetL(BitWidth, 0), OffsetR(BitWidth, 0); - if (GEPL->accumulateConstantOffset(DL, OffsetL) && - GEPR->accumulateConstantOffset(DL, OffsetR)) - return cmpAPInts(OffsetL, OffsetR); - if (int Res = cmpTypes(GEPL->getSourceElementType(), - GEPR->getSourceElementType())) - return Res; - - if (int Res = cmpNumbers(GEPL->getNumOperands(), GEPR->getNumOperands())) - return Res; - - for (unsigned i = 0, e = GEPL->getNumOperands(); i != e; ++i) { - if (int Res = cmpValues(GEPL->getOperand(i), GEPR->getOperand(i))) - return Res; - } - - return 0; -} - -int FunctionComparator::cmpInlineAsm(const InlineAsm *L, - const InlineAsm *R) const { - // InlineAsm's are uniqued. If they are the same pointer, obviously they are - // the same, otherwise compare the fields. - if (L == R) - return 0; - if (int Res = cmpTypes(L->getFunctionType(), R->getFunctionType())) - return Res; - if (int Res = cmpMem(L->getAsmString(), R->getAsmString())) - return Res; - if (int Res = cmpMem(L->getConstraintString(), R->getConstraintString())) - return Res; - if (int Res = cmpNumbers(L->hasSideEffects(), R->hasSideEffects())) - return Res; - if (int Res = cmpNumbers(L->isAlignStack(), R->isAlignStack())) - return Res; - if (int Res = cmpNumbers(L->getDialect(), R->getDialect())) - return Res; - llvm_unreachable("InlineAsm blocks were not uniqued."); - return 0; -} - -/// Compare two values used by the two functions under pair-wise comparison. If -/// this is the first time the values are seen, they're added to the mapping so -/// that we will detect mismatches on next use. -/// See comments in declaration for more details. -int FunctionComparator::cmpValues(const Value *L, const Value *R) { - // Catch self-reference case. - if (L == FnL) { - if (R == FnR) - return 0; - return -1; - } - if (R == FnR) { - if (L == FnL) - return 0; - return 1; - } - - const Constant *ConstL = dyn_cast<Constant>(L); - const Constant *ConstR = dyn_cast<Constant>(R); - if (ConstL && ConstR) { - if (L == R) - return 0; - return cmpConstants(ConstL, ConstR); - } - - if (ConstL) - return 1; - if (ConstR) - return -1; - - const InlineAsm *InlineAsmL = dyn_cast<InlineAsm>(L); - const InlineAsm *InlineAsmR = dyn_cast<InlineAsm>(R); - - if (InlineAsmL && InlineAsmR) - return cmpInlineAsm(InlineAsmL, InlineAsmR); - if (InlineAsmL) - return 1; - if (InlineAsmR) - return -1; - - auto LeftSN = sn_mapL.insert(std::make_pair(L, sn_mapL.size())), - RightSN = sn_mapR.insert(std::make_pair(R, sn_mapR.size())); - - return cmpNumbers(LeftSN.first->second, RightSN.first->second); -} -// Test whether two basic blocks have equivalent behaviour. -int FunctionComparator::cmpBasicBlocks(const BasicBlock *BBL, - const BasicBlock *BBR) { - BasicBlock::const_iterator InstL = BBL->begin(), InstLE = BBL->end(); - BasicBlock::const_iterator InstR = BBR->begin(), InstRE = BBR->end(); - - do { - if (int Res = cmpValues(&*InstL, &*InstR)) - return Res; - - const GetElementPtrInst *GEPL = dyn_cast<GetElementPtrInst>(InstL); - const GetElementPtrInst *GEPR = dyn_cast<GetElementPtrInst>(InstR); - - if (GEPL && !GEPR) - return 1; - if (GEPR && !GEPL) - return -1; - - if (GEPL && GEPR) { - if (int Res = - cmpValues(GEPL->getPointerOperand(), GEPR->getPointerOperand())) - return Res; - if (int Res = cmpGEPs(GEPL, GEPR)) - return Res; - } else { - if (int Res = cmpOperations(&*InstL, &*InstR)) - return Res; - assert(InstL->getNumOperands() == InstR->getNumOperands()); - - for (unsigned i = 0, e = InstL->getNumOperands(); i != e; ++i) { - Value *OpL = InstL->getOperand(i); - Value *OpR = InstR->getOperand(i); - if (int Res = cmpValues(OpL, OpR)) - return Res; - // cmpValues should ensure this is true. - assert(cmpTypes(OpL->getType(), OpR->getType()) == 0); - } - } - - ++InstL, ++InstR; - } while (InstL != InstLE && InstR != InstRE); - - if (InstL != InstLE && InstR == InstRE) - return 1; - if (InstL == InstLE && InstR != InstRE) - return -1; - return 0; -} - -// Test whether the two functions have equivalent behaviour. -int FunctionComparator::compare() { - sn_mapL.clear(); - sn_mapR.clear(); - - if (int Res = cmpAttrs(FnL->getAttributes(), FnR->getAttributes())) - return Res; - - if (int Res = cmpNumbers(FnL->hasGC(), FnR->hasGC())) - return Res; - - if (FnL->hasGC()) { - if (int Res = cmpMem(FnL->getGC(), FnR->getGC())) - return Res; - } - - if (int Res = cmpNumbers(FnL->hasSection(), FnR->hasSection())) - return Res; - - if (FnL->hasSection()) { - if (int Res = cmpMem(FnL->getSection(), FnR->getSection())) - return Res; - } - - if (int Res = cmpNumbers(FnL->isVarArg(), FnR->isVarArg())) - return Res; - - // TODO: if it's internal and only used in direct calls, we could handle this - // case too. - if (int Res = cmpNumbers(FnL->getCallingConv(), FnR->getCallingConv())) - return Res; - - if (int Res = cmpTypes(FnL->getFunctionType(), FnR->getFunctionType())) - return Res; - - assert(FnL->arg_size() == FnR->arg_size() && - "Identically typed functions have different numbers of args!"); - - // Visit the arguments so that they get enumerated in the order they're - // passed in. - for (Function::const_arg_iterator ArgLI = FnL->arg_begin(), - ArgRI = FnR->arg_begin(), - ArgLE = FnL->arg_end(); - ArgLI != ArgLE; ++ArgLI, ++ArgRI) { - if (cmpValues(&*ArgLI, &*ArgRI) != 0) - llvm_unreachable("Arguments repeat!"); - } - - // We do a CFG-ordered walk since the actual ordering of the blocks in the - // linked list is immaterial. Our walk starts at the entry block for both - // functions, then takes each block from each terminator in order. As an - // artifact, this also means that unreachable blocks are ignored. - SmallVector<const BasicBlock *, 8> FnLBBs, FnRBBs; - SmallSet<const BasicBlock *, 128> VisitedBBs; // in terms of F1. - - FnLBBs.push_back(&FnL->getEntryBlock()); - FnRBBs.push_back(&FnR->getEntryBlock()); - - VisitedBBs.insert(FnLBBs[0]); - while (!FnLBBs.empty()) { - const BasicBlock *BBL = FnLBBs.pop_back_val(); - const BasicBlock *BBR = FnRBBs.pop_back_val(); - - if (int Res = cmpValues(BBL, BBR)) - return Res; - - if (int Res = cmpBasicBlocks(BBL, BBR)) - return Res; - - const TerminatorInst *TermL = BBL->getTerminator(); - const TerminatorInst *TermR = BBR->getTerminator(); - - assert(TermL->getNumSuccessors() == TermR->getNumSuccessors()); - for (unsigned i = 0, e = TermL->getNumSuccessors(); i != e; ++i) { - if (!VisitedBBs.insert(TermL->getSuccessor(i)).second) - continue; - - FnLBBs.push_back(TermL->getSuccessor(i)); - FnRBBs.push_back(TermR->getSuccessor(i)); - } - } - return 0; -} - -namespace { -// Accumulate the hash of a sequence of 64-bit integers. This is similar to a -// hash of a sequence of 64bit ints, but the entire input does not need to be -// available at once. This interface is necessary for functionHash because it -// needs to accumulate the hash as the structure of the function is traversed -// without saving these values to an intermediate buffer. This form of hashing -// is not often needed, as usually the object to hash is just read from a -// buffer. -class HashAccumulator64 { - uint64_t Hash; -public: - // Initialize to random constant, so the state isn't zero. - HashAccumulator64() { Hash = 0x6acaa36bef8325c5ULL; } - void add(uint64_t V) { - Hash = llvm::hashing::detail::hash_16_bytes(Hash, V); - } - // No finishing is required, because the entire hash value is used. - uint64_t getHash() { return Hash; } -}; -} // end anonymous namespace - -// A function hash is calculated by considering only the number of arguments and -// whether a function is varargs, the order of basic blocks (given by the -// successors of each basic block in depth first order), and the order of -// opcodes of each instruction within each of these basic blocks. This mirrors -// the strategy compare() uses to compare functions by walking the BBs in depth -// first order and comparing each instruction in sequence. Because this hash -// does not look at the operands, it is insensitive to things such as the -// target of calls and the constants used in the function, which makes it useful -// when possibly merging functions which are the same modulo constants and call -// targets. -FunctionComparator::FunctionHash FunctionComparator::functionHash(Function &F) { - HashAccumulator64 H; - H.add(F.isVarArg()); - H.add(F.arg_size()); - - SmallVector<const BasicBlock *, 8> BBs; - SmallSet<const BasicBlock *, 16> VisitedBBs; - - // Walk the blocks in the same order as FunctionComparator::cmpBasicBlocks(), - // accumulating the hash of the function "structure." (BB and opcode sequence) - BBs.push_back(&F.getEntryBlock()); - VisitedBBs.insert(BBs[0]); - while (!BBs.empty()) { - const BasicBlock *BB = BBs.pop_back_val(); - // This random value acts as a block header, as otherwise the partition of - // opcodes into BBs wouldn't affect the hash, only the order of the opcodes - H.add(45798); - for (auto &Inst : *BB) { - H.add(Inst.getOpcode()); - } - const TerminatorInst *Term = BB->getTerminator(); - for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) { - if (!VisitedBBs.insert(Term->getSuccessor(i)).second) - continue; - BBs.push_back(Term->getSuccessor(i)); - } - } - return H.getHash(); -} - - -namespace { /// MergeFunctions finds functions which will generate identical machine code, /// by considering all pointer types to be equivalent. Once identified, /// MergeFunctions will fold them by replacing a call to one to a call to a /// bitcast of the other. -/// class MergeFunctions : public ModulePass { public: static char ID; + MergeFunctions() - : ModulePass(ID), FnTree(FunctionNodeCmp(&GlobalNumbers)), FNodesInTree(), - HasGlobalAliases(false) { + : ModulePass(ID), FnTree(FunctionNodeCmp(&GlobalNumbers)) { initializeMergeFunctionsPass(*PassRegistry::getPassRegistry()); } @@ -1367,8 +208,10 @@ // not need to become larger with another pointer. class FunctionNodeCmp { GlobalNumberState* GlobalNumbers; + public: FunctionNodeCmp(GlobalNumberState* GN) : GlobalNumbers(GN) {} + bool operator()(const FunctionNode &LHS, const FunctionNode &RHS) const { // Order first by hashes, then full function comparison. if (LHS.getHash() != RHS.getHash()) @@ -1377,17 +220,19 @@ return FCmp.compare() == -1; } }; - typedef std::set<FunctionNode, FunctionNodeCmp> FnTreeType; + using FnTreeType = std::set<FunctionNode, FunctionNodeCmp>; GlobalNumberState GlobalNumbers; /// A work queue of functions that may have been modified and should be /// analyzed again. - std::vector<WeakVH> Deferred; + std::vector<WeakTrackingVH> Deferred; +#ifndef NDEBUG /// Checks the rules of order relation introduced among functions set. /// Returns true, if sanity check has been passed, and false if failed. - bool doSanityCheck(std::vector<WeakVH> &Worklist); + bool doSanityCheck(std::vector<WeakTrackingVH> &Worklist); +#endif /// Insert a ComparableFunction into the FnTree, or merge it away if it's /// equal to one that's already present. @@ -1410,43 +255,50 @@ /// again. void mergeTwoFunctions(Function *F, Function *G); - /// Replace G with a thunk or an alias to F. Deletes G. - void writeThunkOrAlias(Function *F, Function *G); + /// Fill PDIUnrelatedWL with instructions from the entry block that are + /// unrelated to parameter related debug info. + void filterInstsUnrelatedToPDI(BasicBlock *GEntryBlock, + std::vector<Instruction *> &PDIUnrelatedWL); + + /// Erase the rest of the CFG (i.e. barring the entry block). + void eraseTail(Function *G); - /// Replace G with a simple tail call to bitcast(F). Also replace direct uses - /// of G with bitcast(F). Deletes G. + /// Erase the instructions in PDIUnrelatedWL as they are unrelated to the + /// parameter debug info, from the entry block. + void eraseInstsUnrelatedToPDI(std::vector<Instruction *> &PDIUnrelatedWL); + + /// Replace G with a simple tail call to bitcast(F). Also (unless + /// MergeFunctionsPDI holds) replace direct uses of G with bitcast(F), + /// delete G. void writeThunk(Function *F, Function *G); - /// Replace G with an alias to F. Deletes G. - void writeAlias(Function *F, Function *G); - /// Replace function F with function G in the function tree. void replaceFunctionInTree(const FunctionNode &FN, Function *G); /// The set of all distinct functions. Use the insert() and remove() methods /// to modify it. The map allows efficient lookup and deferring of Functions. FnTreeType FnTree; + // Map functions to the iterators of the FunctionNode which contains them // in the FnTree. This must be updated carefully whenever the FnTree is // modified, i.e. in insert(), remove(), and replaceFunctionInTree(), to avoid // dangling iterators into FnTree. The invariant that preserves this is that // there is exactly one mapping F -> FN for each FunctionNode FN in FnTree. ValueMap<Function*, FnTreeType::iterator> FNodesInTree; - - /// Whether or not the target supports global aliases. - bool HasGlobalAliases; }; } // end anonymous namespace char MergeFunctions::ID = 0; + INITIALIZE_PASS(MergeFunctions, "mergefunc", "Merge Functions", false, false) ModulePass *llvm::createMergeFunctionsPass() { return new MergeFunctions(); } -bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) { +#ifndef NDEBUG +bool MergeFunctions::doSanityCheck(std::vector<WeakTrackingVH> &Worklist) { if (const unsigned Max = NumFunctionsForSanityCheck) { unsigned TripleNumber = 0; bool Valid = true; @@ -1454,10 +306,12 @@ dbgs() << "MERGEFUNC-SANITY: Started for first " << Max << " functions.\n"; unsigned i = 0; - for (std::vector<WeakVH>::iterator I = Worklist.begin(), E = Worklist.end(); + for (std::vector<WeakTrackingVH>::iterator I = Worklist.begin(), + E = Worklist.end(); I != E && i < Max; ++I, ++i) { unsigned j = i; - for (std::vector<WeakVH>::iterator J = I; J != E && j < Max; ++J, ++j) { + for (std::vector<WeakTrackingVH>::iterator J = I; J != E && j < Max; + ++J, ++j) { Function *F1 = cast<Function>(*I); Function *F2 = cast<Function>(*J); int Res1 = FunctionComparator(F1, F2, &GlobalNumbers).compare(); @@ -1467,8 +321,7 @@ if (Res1 != -Res2) { dbgs() << "MERGEFUNC-SANITY: Non-symmetric; triple: " << TripleNumber << "\n"; - F1->dump(); - F2->dump(); + dbgs() << *F1 << '\n' << *F2 << '\n'; Valid = false; } @@ -1476,7 +329,7 @@ continue; unsigned k = j; - for (std::vector<WeakVH>::iterator K = J; K != E && k < Max; + for (std::vector<WeakTrackingVH>::iterator K = J; K != E && k < Max; ++k, ++K, ++TripleNumber) { if (K == J) continue; @@ -1503,9 +356,7 @@ << TripleNumber << "\n"; dbgs() << "Res1, Res3, Res4: " << Res1 << ", " << Res3 << ", " << Res4 << "\n"; - F1->dump(); - F2->dump(); - F3->dump(); + dbgs() << *F1 << '\n' << *F2 << '\n' << *F3 << '\n'; Valid = false; } } @@ -1517,8 +368,12 @@ } return true; } +#endif bool MergeFunctions::runOnModule(Module &M) { + if (skipModule(M)) + return false; + bool Changed = false; // All functions in the module, ordered by hash. Functions with a unique @@ -1544,12 +399,12 @@ // consider merging it. Otherwise it is dropped and never considered again. if ((I != S && std::prev(I)->first == I->first) || (std::next(I) != IE && std::next(I)->first == I->first) ) { - Deferred.push_back(WeakVH(I->second)); + Deferred.push_back(WeakTrackingVH(I->second)); } } do { - std::vector<WeakVH> Worklist; + std::vector<WeakTrackingVH> Worklist; Deferred.swap(Worklist); DEBUG(doSanityCheck(Worklist)); @@ -1557,28 +412,12 @@ DEBUG(dbgs() << "size of module: " << M.size() << '\n'); DEBUG(dbgs() << "size of worklist: " << Worklist.size() << '\n'); - // Insert only strong functions and merge them. Strong function merging - // always deletes one of them. - for (std::vector<WeakVH>::iterator I = Worklist.begin(), - E = Worklist.end(); I != E; ++I) { - if (!*I) continue; - Function *F = cast<Function>(*I); - if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage() && - !F->mayBeOverridden()) { - Changed |= insert(F); - } - } - - // Insert only weak functions and merge them. By doing these second we - // create thunks to the strong function when possible. When two weak - // functions are identical, we create a new strong function with two weak - // weak thunks to it which are identical but not mergable. - for (std::vector<WeakVH>::iterator I = Worklist.begin(), - E = Worklist.end(); I != E; ++I) { - if (!*I) continue; - Function *F = cast<Function>(*I); - if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage() && - F->mayBeOverridden()) { + // Insert functions and merge them. + for (WeakTrackingVH &I : Worklist) { + if (!I) + continue; + Function *F = cast<Function>(I); + if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage()) { Changed |= insert(F); } } @@ -1611,19 +450,15 @@ // Transferring other attributes may help other optimizations, but that // should be done uniformly and not in this ad-hoc way. auto &Context = New->getContext(); - auto NewFuncAttrs = New->getAttributes(); - auto CallSiteAttrs = CS.getAttributes(); - - CallSiteAttrs = CallSiteAttrs.addAttributes( - Context, AttributeSet::ReturnIndex, NewFuncAttrs.getRetAttributes()); - - for (unsigned argIdx = 0; argIdx < CS.arg_size(); argIdx++) { - AttributeSet Attrs = NewFuncAttrs.getParamAttributes(argIdx); - if (Attrs.getNumSlots()) - CallSiteAttrs = CallSiteAttrs.addAttributes(Context, argIdx, Attrs); - } - - CS.setAttributes(CallSiteAttrs); + auto NewPAL = New->getAttributes(); + SmallVector<AttributeSet, 4> NewArgAttrs; + for (unsigned argIdx = 0; argIdx < CS.arg_size(); argIdx++) + NewArgAttrs.push_back(NewPAL.getParamAttributes(argIdx)); + // Don't transfer attributes from the function to the callee. Function + // attributes typically aren't relevant to the calling convention or ABI. + CS.setAttributes(AttributeList::get(Context, /*FnAttrs=*/AttributeSet(), + NewPAL.getRetAttributes(), + NewArgAttrs)); remove(CS.getInstruction()->getParent()->getParent()); U->set(BitcastNew); @@ -1631,23 +466,10 @@ } } -// Replace G with an alias to F if possible, or else a thunk to F. Deletes G. -void MergeFunctions::writeThunkOrAlias(Function *F, Function *G) { - if (HasGlobalAliases && G->hasUnnamedAddr()) { - if (G->hasExternalLinkage() || G->hasLocalLinkage() || - G->hasWeakLinkage()) { - writeAlias(F, G); - return; - } - } - - writeThunk(F, G); -} - // Helper for writeThunk, // Selects proper bitcast operation, // but a bit simpler then CastInst::getCastOpcode. -static Value *createCast(IRBuilder<false> &Builder, Value *V, Type *DestTy) { +static Value *createCast(IRBuilder<> &Builder, Value *V, Type *DestTy) { Type *SrcTy = V->getType(); if (SrcTy->isStructTy()) { assert(DestTy->isStructTy()); @@ -1672,72 +494,265 @@ return Builder.CreateBitCast(V, DestTy); } -// Replace G with a simple tail call to bitcast(F). Also replace direct uses -// of G with bitcast(F). Deletes G. +// Erase the instructions in PDIUnrelatedWL as they are unrelated to the +// parameter debug info, from the entry block. +void MergeFunctions::eraseInstsUnrelatedToPDI( + std::vector<Instruction *> &PDIUnrelatedWL) { + DEBUG(dbgs() << " Erasing instructions (in reverse order of appearance in " + "entry block) unrelated to parameter debug info from entry " + "block: {\n"); + while (!PDIUnrelatedWL.empty()) { + Instruction *I = PDIUnrelatedWL.back(); + DEBUG(dbgs() << " Deleting Instruction: "); + DEBUG(I->print(dbgs())); + DEBUG(dbgs() << "\n"); + I->eraseFromParent(); + PDIUnrelatedWL.pop_back(); + } + DEBUG(dbgs() << " } // Done erasing instructions unrelated to parameter " + "debug info from entry block. \n"); +} + +// Reduce G to its entry block. +void MergeFunctions::eraseTail(Function *G) { + std::vector<BasicBlock *> WorklistBB; + for (Function::iterator BBI = std::next(G->begin()), BBE = G->end(); + BBI != BBE; ++BBI) { + BBI->dropAllReferences(); + WorklistBB.push_back(&*BBI); + } + while (!WorklistBB.empty()) { + BasicBlock *BB = WorklistBB.back(); + BB->eraseFromParent(); + WorklistBB.pop_back(); + } +} + +// We are interested in the following instructions from the entry block as being +// related to parameter debug info: +// - @llvm.dbg.declare +// - stores from the incoming parameters to locations on the stack-frame +// - allocas that create these locations on the stack-frame +// - @llvm.dbg.value +// - the entry block's terminator +// The rest are unrelated to debug info for the parameters; fill up +// PDIUnrelatedWL with such instructions. +void MergeFunctions::filterInstsUnrelatedToPDI( + BasicBlock *GEntryBlock, std::vector<Instruction *> &PDIUnrelatedWL) { + std::set<Instruction *> PDIRelated; + for (BasicBlock::iterator BI = GEntryBlock->begin(), BIE = GEntryBlock->end(); + BI != BIE; ++BI) { + if (auto *DVI = dyn_cast<DbgValueInst>(&*BI)) { + DEBUG(dbgs() << " Deciding: "); + DEBUG(BI->print(dbgs())); + DEBUG(dbgs() << "\n"); + DILocalVariable *DILocVar = DVI->getVariable(); + if (DILocVar->isParameter()) { + DEBUG(dbgs() << " Include (parameter): "); + DEBUG(BI->print(dbgs())); + DEBUG(dbgs() << "\n"); + PDIRelated.insert(&*BI); + } else { + DEBUG(dbgs() << " Delete (!parameter): "); + DEBUG(BI->print(dbgs())); + DEBUG(dbgs() << "\n"); + } + } else if (auto *DDI = dyn_cast<DbgDeclareInst>(&*BI)) { + DEBUG(dbgs() << " Deciding: "); + DEBUG(BI->print(dbgs())); + DEBUG(dbgs() << "\n"); + DILocalVariable *DILocVar = DDI->getVariable(); + if (DILocVar->isParameter()) { + DEBUG(dbgs() << " Parameter: "); + DEBUG(DILocVar->print(dbgs())); + AllocaInst *AI = dyn_cast_or_null<AllocaInst>(DDI->getAddress()); + if (AI) { + DEBUG(dbgs() << " Processing alloca users: "); + DEBUG(dbgs() << "\n"); + for (User *U : AI->users()) { + if (StoreInst *SI = dyn_cast<StoreInst>(U)) { + if (Value *Arg = SI->getValueOperand()) { + if (dyn_cast<Argument>(Arg)) { + DEBUG(dbgs() << " Include: "); + DEBUG(AI->print(dbgs())); + DEBUG(dbgs() << "\n"); + PDIRelated.insert(AI); + DEBUG(dbgs() << " Include (parameter): "); + DEBUG(SI->print(dbgs())); + DEBUG(dbgs() << "\n"); + PDIRelated.insert(SI); + DEBUG(dbgs() << " Include: "); + DEBUG(BI->print(dbgs())); + DEBUG(dbgs() << "\n"); + PDIRelated.insert(&*BI); + } else { + DEBUG(dbgs() << " Delete (!parameter): "); + DEBUG(SI->print(dbgs())); + DEBUG(dbgs() << "\n"); + } + } + } else { + DEBUG(dbgs() << " Defer: "); + DEBUG(U->print(dbgs())); + DEBUG(dbgs() << "\n"); + } + } + } else { + DEBUG(dbgs() << " Delete (alloca NULL): "); + DEBUG(BI->print(dbgs())); + DEBUG(dbgs() << "\n"); + } + } else { + DEBUG(dbgs() << " Delete (!parameter): "); + DEBUG(BI->print(dbgs())); + DEBUG(dbgs() << "\n"); + } + } else if (dyn_cast<TerminatorInst>(BI) == GEntryBlock->getTerminator()) { + DEBUG(dbgs() << " Will Include Terminator: "); + DEBUG(BI->print(dbgs())); + DEBUG(dbgs() << "\n"); + PDIRelated.insert(&*BI); + } else { + DEBUG(dbgs() << " Defer: "); + DEBUG(BI->print(dbgs())); + DEBUG(dbgs() << "\n"); + } + } + DEBUG(dbgs() + << " Report parameter debug info related/related instructions: {\n"); + for (BasicBlock::iterator BI = GEntryBlock->begin(), BE = GEntryBlock->end(); + BI != BE; ++BI) { + + Instruction *I = &*BI; + if (PDIRelated.find(I) == PDIRelated.end()) { + DEBUG(dbgs() << " !PDIRelated: "); + DEBUG(I->print(dbgs())); + DEBUG(dbgs() << "\n"); + PDIUnrelatedWL.push_back(I); + } else { + DEBUG(dbgs() << " PDIRelated: "); + DEBUG(I->print(dbgs())); + DEBUG(dbgs() << "\n"); + } + } + DEBUG(dbgs() << " }\n"); +} + +// Replace G with a simple tail call to bitcast(F). Also (unless +// MergeFunctionsPDI holds) replace direct uses of G with bitcast(F), +// delete G. Under MergeFunctionsPDI, we use G itself for creating +// the thunk as we preserve the debug info (and associated instructions) +// from G's entry block pertaining to G's incoming arguments which are +// passed on as corresponding arguments in the call that G makes to F. +// For better debugability, under MergeFunctionsPDI, we do not modify G's +// call sites to point to F even when within the same translation unit. void MergeFunctions::writeThunk(Function *F, Function *G) { - if (!G->mayBeOverridden()) { - // Redirect direct callers of G to F. - replaceDirectCallers(G, F); + if (!G->isInterposable() && !MergeFunctionsPDI) { + if (G->hasGlobalUnnamedAddr()) { + // G might have been a key in our GlobalNumberState, and it's illegal + // to replace a key in ValueMap<GlobalValue *> with a non-global. + GlobalNumbers.erase(G); + // If G's address is not significant, replace it entirely. + Constant *BitcastF = ConstantExpr::getBitCast(F, G->getType()); + G->replaceAllUsesWith(BitcastF); + } else { + // Redirect direct callers of G to F. (See note on MergeFunctionsPDI + // above). + replaceDirectCallers(G, F); + } } // If G was internal then we may have replaced all uses of G with F. If so, - // stop here and delete G. There's no need for a thunk. - if (G->hasLocalLinkage() && G->use_empty()) { + // stop here and delete G. There's no need for a thunk. (See note on + // MergeFunctionsPDI above). + if (G->hasLocalLinkage() && G->use_empty() && !MergeFunctionsPDI) { G->eraseFromParent(); return; } - Function *NewG = Function::Create(G->getFunctionType(), G->getLinkage(), "", - G->getParent()); - BasicBlock *BB = BasicBlock::Create(F->getContext(), "", NewG); - IRBuilder<false> Builder(BB); + // Don't merge tiny functions using a thunk, since it can just end up + // making the function larger. + if (F->size() == 1) { + if (F->front().size() <= 2) { + DEBUG(dbgs() << "writeThunk: " << F->getName() + << " is too small to bother creating a thunk for\n"); + return; + } + } + BasicBlock *GEntryBlock = nullptr; + std::vector<Instruction *> PDIUnrelatedWL; + BasicBlock *BB = nullptr; + Function *NewG = nullptr; + if (MergeFunctionsPDI) { + DEBUG(dbgs() << "writeThunk: (MergeFunctionsPDI) Do not create a new " + "function as thunk; retain original: " + << G->getName() << "()\n"); + GEntryBlock = &G->getEntryBlock(); + DEBUG(dbgs() << "writeThunk: (MergeFunctionsPDI) filter parameter related " + "debug info for " + << G->getName() << "() {\n"); + filterInstsUnrelatedToPDI(GEntryBlock, PDIUnrelatedWL); + GEntryBlock->getTerminator()->eraseFromParent(); + BB = GEntryBlock; + } else { + NewG = Function::Create(G->getFunctionType(), G->getLinkage(), "", + G->getParent()); + BB = BasicBlock::Create(F->getContext(), "", NewG); + } + + IRBuilder<> Builder(BB); + Function *H = MergeFunctionsPDI ? G : NewG; SmallVector<Value *, 16> Args; unsigned i = 0; FunctionType *FFTy = F->getFunctionType(); - for (Argument & AI : NewG->args()) { + for (Argument &AI : H->args()) { Args.push_back(createCast(Builder, &AI, FFTy->getParamType(i))); ++i; } CallInst *CI = Builder.CreateCall(F, Args); + ReturnInst *RI = nullptr; CI->setTailCall(); CI->setCallingConv(F->getCallingConv()); CI->setAttributes(F->getAttributes()); - if (NewG->getReturnType()->isVoidTy()) { - Builder.CreateRetVoid(); + if (H->getReturnType()->isVoidTy()) { + RI = Builder.CreateRetVoid(); } else { - Builder.CreateRet(createCast(Builder, CI, NewG->getReturnType())); + RI = Builder.CreateRet(createCast(Builder, CI, H->getReturnType())); } - NewG->copyAttributesFrom(G); - NewG->takeName(G); - removeUsers(G); - G->replaceAllUsesWith(NewG); - G->eraseFromParent(); - - DEBUG(dbgs() << "writeThunk: " << NewG->getName() << '\n'); - ++NumThunksWritten; -} + if (MergeFunctionsPDI) { + DISubprogram *DIS = G->getSubprogram(); + if (DIS) { + DebugLoc CIDbgLoc = DebugLoc::get(DIS->getScopeLine(), 0, DIS); + DebugLoc RIDbgLoc = DebugLoc::get(DIS->getScopeLine(), 0, DIS); + CI->setDebugLoc(CIDbgLoc); + RI->setDebugLoc(RIDbgLoc); + } else { + DEBUG(dbgs() << "writeThunk: (MergeFunctionsPDI) No DISubprogram for " + << G->getName() << "()\n"); + } + eraseTail(G); + eraseInstsUnrelatedToPDI(PDIUnrelatedWL); + DEBUG(dbgs() << "} // End of parameter related debug info filtering for: " + << G->getName() << "()\n"); + } else { + NewG->copyAttributesFrom(G); + NewG->takeName(G); + removeUsers(G); + G->replaceAllUsesWith(NewG); + G->eraseFromParent(); + } -// Replace G with an alias to F and delete G. -void MergeFunctions::writeAlias(Function *F, Function *G) { - auto *GA = GlobalAlias::create(G->getLinkage(), "", F); - F->setAlignment(std::max(F->getAlignment(), G->getAlignment())); - GA->takeName(G); - GA->setVisibility(G->getVisibility()); - removeUsers(G); - G->replaceAllUsesWith(GA); - G->eraseFromParent(); - - DEBUG(dbgs() << "writeAlias: " << GA->getName() << '\n'); - ++NumAliasesWritten; + DEBUG(dbgs() << "writeThunk: " << H->getName() << '\n'); + ++NumThunksWritten; } // Merge two equivalent functions. Upon completion, Function G is deleted. void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) { - if (F->mayBeOverridden()) { - assert(G->mayBeOverridden()); + if (F->isInterposable()) { + assert(G->isInterposable()); // Make them both thunks to the same internal function. Function *H = Function::Create(F->getFunctionType(), F->getLinkage(), "", @@ -1749,19 +764,14 @@ unsigned MaxAlignment = std::max(G->getAlignment(), H->getAlignment()); - if (HasGlobalAliases) { - writeAlias(F, G); - writeAlias(F, H); - } else { - writeThunk(F, G); - writeThunk(F, H); - } + writeThunk(F, G); + writeThunk(F, H); F->setAlignment(MaxAlignment); F->setLinkage(GlobalValue::PrivateLinkage); ++NumDoubleWeak; } else { - writeThunkOrAlias(F, G); + writeThunk(F, G); } ++NumFunctionsMerged; @@ -1802,36 +812,20 @@ const FunctionNode &OldF = *Result.first; - // Don't merge tiny functions, since it can just end up making the function - // larger. - // FIXME: Should still merge them if they are unnamed_addr and produce an - // alias. - if (NewFunction->size() == 1) { - if (NewFunction->front().size() <= 2) { - DEBUG(dbgs() << NewFunction->getName() - << " is to small to bother merging\n"); - return false; - } - } - // Impose a total order (by name) on the replacement of functions. This is // important when operating on more than one module independently to prevent // cycles of thunks calling each other when the modules are linked together. // - // When one function is weak and the other is strong there is an order imposed - // already. We process strong functions before weak functions. - if ((OldF.getFunc()->mayBeOverridden() && NewFunction->mayBeOverridden()) || - (!OldF.getFunc()->mayBeOverridden() && !NewFunction->mayBeOverridden())) - if (OldF.getFunc()->getName() > NewFunction->getName()) { - // Swap the two functions. - Function *F = OldF.getFunc(); - replaceFunctionInTree(*Result.first, NewFunction); - NewFunction = F; - assert(OldF.getFunc() != F && "Must have swapped the functions."); - } - - // Never thunk a strong function to a weak function. - assert(!OldF.getFunc()->mayBeOverridden() || NewFunction->mayBeOverridden()); + // First of all, we process strong functions before weak functions. + if ((OldF.getFunc()->isInterposable() && !NewFunction->isInterposable()) || + (OldF.getFunc()->isInterposable() == NewFunction->isInterposable() && + OldF.getFunc()->getName() > NewFunction->getName())) { + // Swap the two functions. + Function *F = OldF.getFunc(); + replaceFunctionInTree(*Result.first, NewFunction); + NewFunction = F; + assert(OldF.getFunc() != F && "Must have swapped the functions."); + } DEBUG(dbgs() << " " << OldF.getFunc()->getName() << " == " << NewFunction->getName() << '\n');
--- a/lib/Transforms/Scalar/SROA.cpp Thu Nov 30 20:04:56 2017 +0900 +++ b/lib/Transforms/Scalar/SROA.cpp Sun Dec 03 20:09:16 2017 +0900 @@ -24,39 +24,76 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar/SROA.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include "llvm/ADT/iterator.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/PtrUseVisitor.h" -#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/ConstantFolder.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DIBuilder.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GetElementPtrTypeIterator.h" +#include "llvm/IR/GlobalAlias.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstVisitor.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" #include "llvm/Pass.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/TimeValue.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" - -#if __cplusplus >= 201103L && !defined(NDEBUG) -// We only use this for a debug check in C++11 +#include <algorithm> +#include <cassert> +#include <chrono> +#include <cstddef> +#include <cstdint> +#include <cstring> +#include <iterator> +#include <string> +#include <tuple> +#include <utility> +#include <vector> + +#ifndef NDEBUG +// We only use this for a debug check. #include <random> #endif @@ -87,43 +124,30 @@ cl::Hidden); namespace { -/// \brief A custom IRBuilder inserter which prefixes all names if they are -/// preserved. -template <bool preserveNames = true> -class IRBuilderPrefixedInserter - : public IRBuilderDefaultInserter<preserveNames> { + +/// \brief A custom IRBuilder inserter which prefixes all names, but only in +/// Assert builds. +class IRBuilderPrefixedInserter : public IRBuilderDefaultInserter { std::string Prefix; + const Twine getNameWithPrefix(const Twine &Name) const { + return Name.isTriviallyEmpty() ? Name : Prefix + Name; + } + public: void SetNamePrefix(const Twine &P) { Prefix = P.str(); } protected: void InsertHelper(Instruction *I, const Twine &Name, BasicBlock *BB, BasicBlock::iterator InsertPt) const { - IRBuilderDefaultInserter<preserveNames>::InsertHelper( - I, Name.isTriviallyEmpty() ? Name : Prefix + Name, BB, InsertPt); + IRBuilderDefaultInserter::InsertHelper(I, getNameWithPrefix(Name), BB, + InsertPt); } }; -// Specialization for not preserving the name is trivial. -template <> -class IRBuilderPrefixedInserter<false> - : public IRBuilderDefaultInserter<false> { -public: - void SetNamePrefix(const Twine &P) {} -}; - -/// \brief Provide a typedef for IRBuilder that drops names in release builds. -#ifndef NDEBUG -typedef llvm::IRBuilder<true, ConstantFolder, IRBuilderPrefixedInserter<true>> - IRBuilderTy; -#else -typedef llvm::IRBuilder<false, ConstantFolder, IRBuilderPrefixedInserter<false>> - IRBuilderTy; -#endif -} - -namespace { +/// \brief Provide a type for IRBuilder that drops names in release builds. +using IRBuilderTy = IRBuilder<ConstantFolder, IRBuilderPrefixedInserter>; + /// \brief A used slice of an alloca. /// /// This structure represents a slice of an alloca used by some instruction. It @@ -132,17 +156,18 @@ /// or not when forming partitions of the alloca. class Slice { /// \brief The beginning offset of the range. - uint64_t BeginOffset; + uint64_t BeginOffset = 0; /// \brief The ending offset, not included in the range. - uint64_t EndOffset; + uint64_t EndOffset = 0; /// \brief Storage for both the use of this slice and whether it can be /// split. PointerIntPair<Use *, 1, bool> UseAndIsSplittable; public: - Slice() : BeginOffset(), EndOffset() {} + Slice() = default; + Slice(uint64_t BeginOffset, uint64_t EndOffset, Use *U, bool IsSplittable) : BeginOffset(BeginOffset), EndOffset(EndOffset), UseAndIsSplittable(U, IsSplittable) {} @@ -192,12 +217,15 @@ } bool operator!=(const Slice &RHS) const { return !operator==(RHS); } }; + } // end anonymous namespace namespace llvm { + template <typename T> struct isPodLike; template <> struct isPodLike<Slice> { static const bool value = true; }; -} + +} // end namespace llvm /// \brief Representation of the alloca slices. /// @@ -219,13 +247,15 @@ /// \brief Support for iterating over the slices. /// @{ - typedef SmallVectorImpl<Slice>::iterator iterator; - typedef iterator_range<iterator> range; + using iterator = SmallVectorImpl<Slice>::iterator; + using range = iterator_range<iterator>; + iterator begin() { return Slices.begin(); } iterator end() { return Slices.end(); } - typedef SmallVectorImpl<Slice>::const_iterator const_iterator; - typedef iterator_range<const_iterator> const_range; + using const_iterator = SmallVectorImpl<Slice>::const_iterator; + using const_range = iterator_range<const_iterator>; + const_iterator begin() const { return Slices.begin(); } const_iterator end() const { return Slices.end(); } /// @} @@ -276,6 +306,7 @@ private: template <typename DerivedT, typename RetT = void> class BuilderBase; class SliceBuilder; + friend class AllocaSlices::SliceBuilder; #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -332,13 +363,13 @@ friend class AllocaSlices; friend class AllocaSlices::partition_iterator; - typedef AllocaSlices::iterator iterator; + using iterator = AllocaSlices::iterator; /// \brief The beginning and ending offsets of the alloca for this /// partition. uint64_t BeginOffset, EndOffset; - /// \brief The start end end iterators of this partition. + /// \brief The start and end iterators of this partition. iterator SI, SJ; /// \brief A collection of split slice tails overlapping the partition. @@ -415,12 +446,12 @@ /// \brief We also need to keep track of the maximum split end offset seen. /// FIXME: Do we really? - uint64_t MaxSplitSliceEndOffset; + uint64_t MaxSplitSliceEndOffset = 0; /// \brief Sets the partition to be empty at given iterator, and sets the /// end iterator. partition_iterator(AllocaSlices::iterator SI, AllocaSlices::iterator SE) - : P(SI), SE(SE), MaxSplitSliceEndOffset(0) { + : P(SI), SE(SE) { // If not already at the end, advance our state to form the initial // partition. if (SI != SE) @@ -444,20 +475,21 @@ // Remove the uses which have ended in the prior partition. This // cannot change the max split slice end because we just checked that // the prior partition ended prior to that max. - P.SplitTails.erase( - std::remove_if( - P.SplitTails.begin(), P.SplitTails.end(), - [&](Slice *S) { return S->endOffset() <= P.EndOffset; }), - P.SplitTails.end()); - assert(std::any_of(P.SplitTails.begin(), P.SplitTails.end(), - [&](Slice *S) { - return S->endOffset() == MaxSplitSliceEndOffset; - }) && + P.SplitTails.erase(llvm::remove_if(P.SplitTails, + [&](Slice *S) { + return S->endOffset() <= + P.EndOffset; + }), + P.SplitTails.end()); + assert(llvm::any_of(P.SplitTails, + [&](Slice *S) { + return S->endOffset() == MaxSplitSliceEndOffset; + }) && "Could not find the current max split slice offset!"); - assert(std::all_of(P.SplitTails.begin(), P.SplitTails.end(), - [&](Slice *S) { - return S->endOffset() <= MaxSplitSliceEndOffset; - }) && + assert(llvm::all_of(P.SplitTails, + [&](Slice *S) { + return S->endOffset() <= MaxSplitSliceEndOffset; + }) && "Max split slice end offset is not actually the max!"); } } @@ -621,7 +653,8 @@ class AllocaSlices::SliceBuilder : public PtrUseVisitor<SliceBuilder> { friend class PtrUseVisitor<SliceBuilder>; friend class InstVisitor<SliceBuilder>; - typedef PtrUseVisitor<SliceBuilder> Base; + + using Base = PtrUseVisitor<SliceBuilder>; const uint64_t AllocSize; AllocaSlices &AS; @@ -694,7 +727,7 @@ // langref in a very strict sense. If we ever want to enable // SROAStrictInbounds, this code should be factored cleanly into // PtrUseVisitor, but it is easier to experiment with SROAStrictInbounds - // by writing out the code here where we have tho underlying allocation + // by writing out the code here where we have the underlying allocation // size readily available. APInt GEPOffset = Offset; const DataLayout &DL = GEPI.getModule()->getDataLayout(); @@ -706,7 +739,7 @@ break; // Handle a struct index, which adds its field offset to the pointer. - if (StructType *STy = dyn_cast<StructType>(*GTI)) { + if (StructType *STy = GTI.getStructTypeOrNull()) { unsigned ElementIdx = OpC->getZExtValue(); const StructLayout *SL = DL.getStructLayout(STy); GEPOffset += @@ -1009,15 +1042,14 @@ return; } - Slices.erase(std::remove_if(Slices.begin(), Slices.end(), - [](const Slice &S) { - return S.isDead(); - }), - Slices.end()); - -#if __cplusplus >= 201103L && !defined(NDEBUG) + Slices.erase( + llvm::remove_if(Slices, [](const Slice &S) { return S.isDead(); }), + Slices.end()); + +#ifndef NDEBUG if (SROARandomShuffleSlices) { - std::mt19937 MT(static_cast<unsigned>(sys::TimeValue::now().msec())); + std::mt19937 MT(static_cast<unsigned>( + std::chrono::system_clock::now().time_since_epoch().count())); std::shuffle(Slices.begin(), Slices.end(), MT); } #endif @@ -1169,6 +1201,8 @@ if (!HaveLoad) return false; + const DataLayout &DL = PN.getModule()->getDataLayout(); + // We can only transform this if it is safe to push the loads into the // predecessor blocks. The only thing to watch out for is that we can't put // a possibly trapping load in the predecessor if it is a critical edge. @@ -1190,7 +1224,7 @@ // If this pointer is always safe to load, or if we can prove that there // is already a load in the block, then we can move the load to the pred // block. - if (isSafeToLoadUnconditionally(InVal, MaxAlign, TI)) + if (isSafeToLoadUnconditionally(InVal, MaxAlign, DL, TI)) continue; return false; @@ -1258,18 +1292,19 @@ static bool isSafeSelectToSpeculate(SelectInst &SI) { Value *TValue = SI.getTrueValue(); Value *FValue = SI.getFalseValue(); + const DataLayout &DL = SI.getModule()->getDataLayout(); for (User *U : SI.users()) { LoadInst *LI = dyn_cast<LoadInst>(U); if (!LI || !LI->isSimple()) return false; - // Both operands to the select need to be dereferencable, either + // Both operands to the select need to be dereferenceable, either // absolutely (e.g. allocas) or at this point because we can see other // accesses to it. - if (!isSafeToLoadUnconditionally(TValue, LI->getAlignment(), LI)) + if (!isSafeToLoadUnconditionally(TValue, LI->getAlignment(), DL, LI)) return false; - if (!isSafeToLoadUnconditionally(FValue, LI->getAlignment(), LI)) + if (!isSafeToLoadUnconditionally(FValue, LI->getAlignment(), DL, LI)) return false; } @@ -1562,7 +1597,7 @@ if (Operator::getOpcode(Ptr) == Instruction::BitCast) { Ptr = cast<Operator>(Ptr)->getOperand(0); } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(Ptr)) { - if (GA->mayBeOverridden()) + if (GA->isInterposable()) break; Ptr = GA->getAliasee(); } else { @@ -1645,10 +1680,21 @@ OldTy = OldTy->getScalarType(); NewTy = NewTy->getScalarType(); if (NewTy->isPointerTy() || OldTy->isPointerTy()) { - if (NewTy->isPointerTy() && OldTy->isPointerTy()) - return true; - if (NewTy->isIntegerTy() || OldTy->isIntegerTy()) - return true; + if (NewTy->isPointerTy() && OldTy->isPointerTy()) { + return cast<PointerType>(NewTy)->getPointerAddressSpace() == + cast<PointerType>(OldTy)->getPointerAddressSpace(); + } + + // We can convert integers to integral pointers, but not to non-integral + // pointers. + if (OldTy->isIntegerTy()) + return !DL.isNonIntegralPointerType(NewTy); + + // We can convert integral pointers to integers, but non-integral pointers + // need to remain pointers. + if (!DL.isNonIntegralPointerType(OldTy)) + return NewTy->isIntegerTy(); + return false; } @@ -1674,8 +1720,7 @@ // See if we need inttoptr for this type pair. A cast involving both scalars // and vectors requires and additional bitcast. - if (OldTy->getScalarType()->isIntegerTy() && - NewTy->getScalarType()->isPointerTy()) { + if (OldTy->isIntOrIntVectorTy() && NewTy->isPtrOrPtrVectorTy()) { // Expand <2 x i32> to i8* --> <2 x i32> to i64 to i8* if (OldTy->isVectorTy() && !NewTy->isVectorTy()) return IRB.CreateIntToPtr(IRB.CreateBitCast(V, DL.getIntPtrType(NewTy)), @@ -1691,8 +1736,7 @@ // See if we need ptrtoint for this type pair. A cast involving both scalars // and vectors requires and additional bitcast. - if (OldTy->getScalarType()->isPointerTy() && - NewTy->getScalarType()->isIntegerTy()) { + if (OldTy->isPtrOrPtrVectorTy() && NewTy->isIntOrIntVectorTy()) { // Expand <2 x i8*> to i128 --> <2 x i8*> to <2 x i64> to i128 if (OldTy->isVectorTy() && !NewTy->isVectorTy()) return IRB.CreateBitCast(IRB.CreatePtrToInt(V, DL.getIntPtrType(OldTy)), @@ -1823,11 +1867,12 @@ // do that until all the backends are known to produce good code for all // integer vector types. if (!HaveCommonEltTy) { - CandidateTys.erase(std::remove_if(CandidateTys.begin(), CandidateTys.end(), - [](VectorType *VTy) { - return !VTy->getElementType()->isIntegerTy(); - }), - CandidateTys.end()); + CandidateTys.erase( + llvm::remove_if(CandidateTys, + [](VectorType *VTy) { + return !VTy->getElementType()->isIntegerTy(); + }), + CandidateTys.end()); // If there were no integer vector types, give up. if (CandidateTys.empty()) @@ -1836,6 +1881,7 @@ // Rank the remaining candidate vector types. This is easy because we know // they're all integer vectors. We sort by ascending number of elements. auto RankVectorTypes = [&DL](VectorType *RHSTy, VectorType *LHSTy) { + (void)DL; assert(DL.getTypeSizeInBits(RHSTy) == DL.getTypeSizeInBits(LHSTy) && "Cannot have vector types of different sizes!"); assert(RHSTy->getElementType()->isIntegerTy() && @@ -2153,8 +2199,9 @@ class llvm::sroa::AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> { // Befriend the base class so it can delegate to private visit methods. - friend class llvm::InstVisitor<AllocaSliceRewriter, bool>; - typedef llvm::InstVisitor<AllocaSliceRewriter, bool> Base; + friend class InstVisitor<AllocaSliceRewriter, bool>; + + using Base = InstVisitor<AllocaSliceRewriter, bool>; const DataLayout &DL; AllocaSlices &AS; @@ -2184,20 +2231,22 @@ // The original offset of the slice currently being rewritten relative to // the original alloca. - uint64_t BeginOffset, EndOffset; + uint64_t BeginOffset = 0; + uint64_t EndOffset = 0; + // The new offsets of the slice currently being rewritten relative to the // original alloca. uint64_t NewBeginOffset, NewEndOffset; uint64_t SliceSize; - bool IsSplittable; - bool IsSplit; - Use *OldUse; - Instruction *OldPtr; + bool IsSplittable = false; + bool IsSplit = false; + Use *OldUse = nullptr; + Instruction *OldPtr = nullptr; // Track post-rewrite users which are PHI nodes and Selects. - SmallPtrSetImpl<PHINode *> &PHIUsers; - SmallPtrSetImpl<SelectInst *> &SelectUsers; + SmallSetVector<PHINode *, 8> &PHIUsers; + SmallSetVector<SelectInst *, 8> &SelectUsers; // Utility IR builder, whose name prefix is setup for each visited use, and // the insertion point is set to point to the user. @@ -2209,8 +2258,8 @@ uint64_t NewAllocaBeginOffset, uint64_t NewAllocaEndOffset, bool IsIntegerPromotable, VectorType *PromotableVecTy, - SmallPtrSetImpl<PHINode *> &PHIUsers, - SmallPtrSetImpl<SelectInst *> &SelectUsers) + SmallSetVector<PHINode *, 8> &PHIUsers, + SmallSetVector<SelectInst *, 8> &SelectUsers) : DL(DL), AS(AS), Pass(Pass), OldAI(OldAI), NewAI(NewAI), NewAllocaBeginOffset(NewAllocaBeginOffset), NewAllocaEndOffset(NewAllocaEndOffset), @@ -2223,8 +2272,7 @@ VecTy(PromotableVecTy), ElementTy(VecTy ? VecTy->getElementType() : nullptr), ElementSize(VecTy ? DL.getTypeSizeInBits(ElementTy) / 8 : 0), - BeginOffset(), EndOffset(), IsSplittable(), IsSplit(), OldUse(), - OldPtr(), PHIUsers(PHIUsers), SelectUsers(SelectUsers), + PHIUsers(PHIUsers), SelectUsers(SelectUsers), IRB(NewAI.getContext(), ConstantFolder()) { if (VecTy) { assert((DL.getTypeSizeInBits(ElementTy) % 8) == 0 && @@ -2305,7 +2353,8 @@ #endif return getAdjustedPtr(IRB, DL, &NewAI, - APInt(DL.getPointerSizeInBits(), Offset), PointerTy, + APInt(DL.getPointerTypeSizeInBits(PointerTy), Offset), + PointerTy, #ifndef NDEBUG Twine(OldName) + "." #else @@ -2380,6 +2429,8 @@ Value *OldOp = LI.getOperand(0); assert(OldOp == OldPtr); + unsigned AS = LI.getPointerAddressSpace(); + Type *TargetTy = IsSplit ? Type::getIntNTy(LI.getContext(), SliceSize * 8) : LI.getType(); const bool IsLoadPastEnd = DL.getTypeStoreSize(TargetTy) > SliceSize; @@ -2397,7 +2448,22 @@ LoadInst *NewLI = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), LI.isVolatile(), LI.getName()); if (LI.isVolatile()) - NewLI->setAtomic(LI.getOrdering(), LI.getSynchScope()); + NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID()); + + // Any !nonnull metadata or !range metadata on the old load is also valid + // on the new load. This is even true in some cases even when the loads + // are different types, for example by mapping !nonnull metadata to + // !range metadata by modeling the null pointer constant converted to the + // integer type. + // FIXME: Add support for range metadata here. Currently the utilities + // for this don't propagate range metadata in trivial cases from one + // integer load to another, don't handle non-addrspace-0 null pointers + // correctly, and don't have any support for mapping ranges as the + // integer type becomes winder or narrower. + if (MDNode *N = LI.getMetadata(LLVMContext::MD_nonnull)) + copyNonnullMetadata(LI, N, *NewLI); + + // Try to preserve nonnull metadata V = NewLI; // If this is an integer load past the end of the slice (which means the @@ -2412,12 +2478,12 @@ "endian_shift"); } } else { - Type *LTy = TargetTy->getPointerTo(); + Type *LTy = TargetTy->getPointerTo(AS); LoadInst *NewLI = IRB.CreateAlignedLoad(getNewAllocaSlicePtr(IRB, LTy), getSliceAlign(TargetTy), LI.isVolatile(), LI.getName()); if (LI.isVolatile()) - NewLI->setAtomic(LI.getOrdering(), LI.getSynchScope()); + NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID()); V = NewLI; IsPtrAdjusted = true; @@ -2440,12 +2506,12 @@ // the computed value, and then replace the placeholder with LI, leaving // LI only used for this computation. Value *Placeholder = - new LoadInst(UndefValue::get(LI.getType()->getPointerTo())); + new LoadInst(UndefValue::get(LI.getType()->getPointerTo(AS))); V = insertInteger(DL, IRB, Placeholder, V, NewBeginOffset - BeginOffset, "insert"); LI.replaceAllUsesWith(V); Placeholder->replaceAllUsesWith(&LI); - delete Placeholder; + Placeholder->deleteValue(); } else { LI.replaceAllUsesWith(V); } @@ -2494,8 +2560,8 @@ } V = convertValue(DL, IRB, V, NewAllocaTy); StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment()); + Store->copyMetadata(SI, LLVMContext::MD_mem_parallel_loop_access); Pass.DeadInsts.insert(&SI); - (void)Store; DEBUG(dbgs() << " to: " << *Store << "\n"); return true; } @@ -2553,12 +2619,14 @@ NewSI = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(), SI.isVolatile()); } else { - Value *NewPtr = getNewAllocaSlicePtr(IRB, V->getType()->getPointerTo()); + unsigned AS = SI.getPointerAddressSpace(); + Value *NewPtr = getNewAllocaSlicePtr(IRB, V->getType()->getPointerTo(AS)); NewSI = IRB.CreateAlignedStore(V, NewPtr, getSliceAlign(V->getType()), SI.isVolatile()); } + NewSI->copyMetadata(SI, LLVMContext::MD_mem_parallel_loop_access); if (SI.isVolatile()) - NewSI->setAtomic(SI.getOrdering(), SI.getSynchScope()); + NewSI->setAtomic(SI.getOrdering(), SI.getSyncScopeID()); Pass.DeadInsts.insert(&SI); deleteIfTriviallyDead(OldOp); @@ -2886,6 +2954,17 @@ // Record this instruction for deletion. Pass.DeadInsts.insert(&II); + // Lifetime intrinsics are only promotable if they cover the whole alloca. + // Therefore, we drop lifetime intrinsics which don't cover the whole + // alloca. + // (In theory, intrinsics which partially cover an alloca could be + // promoted, but PromoteMemToReg doesn't handle that case.) + // FIXME: Check whether the alloca is promotable before dropping the + // lifetime intrinsics? + if (NewBeginOffset != NewAllocaBeginOffset || + NewEndOffset != NewAllocaEndOffset) + return true; + ConstantInt *Size = ConstantInt::get(cast<IntegerType>(II.getArgOperand(0)->getType()), NewEndOffset - NewBeginOffset); @@ -2898,6 +2977,7 @@ (void)New; DEBUG(dbgs() << " to: " << *New << "\n"); + return true; } @@ -2957,6 +3037,7 @@ }; namespace { + /// \brief Visitor to rewrite aggregate loads and stores as scalar. /// /// This pass aggressively rewrites all aggregate loads and stores on @@ -2964,7 +3045,7 @@ /// with scalar loads and stores. class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> { // Befriend the base class so it can delegate to private visit methods. - friend class llvm::InstVisitor<AggLoadStoreRewriter, bool>; + friend class InstVisitor<AggLoadStoreRewriter, bool>; /// Queue of pointer uses to analyze and potentially rewrite. SmallVector<Use *, 8> Queue; @@ -3007,12 +3088,15 @@ protected: /// The builder used to form new instructions. IRBuilderTy IRB; + /// The indices which to be used with insert- or extractvalue to select the /// appropriate value within the aggregate. SmallVector<unsigned, 4> Indices; + /// The indices to a GEP instruction which will move Ptr to the correct slot /// within the aggregate. SmallVector<Value *, 4> GEPIndices; + /// The base pointer of the original op, used as a base for GEPing the /// split operations. Value *Ptr; @@ -3115,9 +3199,14 @@ void emitFunc(Type *Ty, Value *&Agg, const Twine &Name) { assert(Ty->isSingleValueType()); // Extract the single value and store it using the indices. - Value *Store = IRB.CreateStore( - IRB.CreateExtractValue(Agg, Indices, Name + ".extract"), - IRB.CreateInBoundsGEP(nullptr, Ptr, GEPIndices, Name + ".gep")); + // + // The gep and extractvalue values are factored out of the CreateStore + // call to make the output independent of the argument evaluation order. + Value *ExtractValue = + IRB.CreateExtractValue(Agg, Indices, Name + ".extract"); + Value *InBoundsGEP = + IRB.CreateInBoundsGEP(nullptr, Ptr, GEPIndices, Name + ".gep"); + Value *Store = IRB.CreateStore(ExtractValue, InBoundsGEP); (void)Store; DEBUG(dbgs() << " to: " << *Store << "\n"); } @@ -3158,7 +3247,8 @@ return false; } }; -} + +} // end anonymous namespace /// \brief Strip aggregate type wrapping. /// @@ -3212,20 +3302,11 @@ return nullptr; if (SequentialType *SeqTy = dyn_cast<SequentialType>(Ty)) { - // We can't partition pointers... - if (SeqTy->isPointerTy()) - return nullptr; - Type *ElementTy = SeqTy->getElementType(); uint64_t ElementSize = DL.getTypeAllocSize(ElementTy); uint64_t NumSkippedElements = Offset / ElementSize; - if (ArrayType *ArrTy = dyn_cast<ArrayType>(SeqTy)) { - if (NumSkippedElements >= ArrTy->getNumElements()) - return nullptr; - } else if (VectorType *VecTy = dyn_cast<VectorType>(SeqTy)) { - if (NumSkippedElements >= VecTy->getNumElements()) - return nullptr; - } + if (NumSkippedElements >= SeqTy->getNumElements()) + return nullptr; Offset -= NumSkippedElements * ElementSize; // First check if we need to recurse. @@ -3372,11 +3453,15 @@ for (auto &P : AS.partitions()) { for (Slice &S : P) { Instruction *I = cast<Instruction>(S.getUse()->getUser()); - if (!S.isSplittable() ||S.endOffset() <= P.endOffset()) { - // If this was a load we have to track that it can't participate in any - // pre-splitting! + if (!S.isSplittable() || S.endOffset() <= P.endOffset()) { + // If this is a load we have to track that it can't participate in any + // pre-splitting. If this is a store of a load we have to track that + // that load also can't participate in any pre-splitting. if (auto *LI = dyn_cast<LoadInst>(I)) UnsplittableLoads.insert(LI); + else if (auto *SI = dyn_cast<StoreInst>(I)) + if (auto *LI = dyn_cast<LoadInst>(SI->getValueOperand())) + UnsplittableLoads.insert(LI); continue; } assert(P.endOffset() > S.beginOffset() && @@ -3403,9 +3488,9 @@ } Loads.push_back(LI); - } else if (auto *SI = dyn_cast<StoreInst>(S.getUse()->getUser())) { - if (!SI || - S.getUse() != &SI->getOperandUse(SI->getPointerOperandIndex())) + } else if (auto *SI = dyn_cast<StoreInst>(I)) { + if (S.getUse() != &SI->getOperandUse(SI->getPointerOperandIndex())) + // Skip stores *of* pointers. FIXME: This shouldn't even be possible! continue; auto *StoredLoad = dyn_cast<LoadInst>(SI->getValueOperand()); if (!StoredLoad || !StoredLoad->isSimple()) @@ -3455,63 +3540,62 @@ // match relative to their starting offset. We have to verify this prior to // any rewriting. Stores.erase( - std::remove_if(Stores.begin(), Stores.end(), - [&UnsplittableLoads, &SplitOffsetsMap](StoreInst *SI) { - // Lookup the load we are storing in our map of split - // offsets. - auto *LI = cast<LoadInst>(SI->getValueOperand()); - // If it was completely unsplittable, then we're done, - // and this store can't be pre-split. - if (UnsplittableLoads.count(LI)) - return true; - - auto LoadOffsetsI = SplitOffsetsMap.find(LI); - if (LoadOffsetsI == SplitOffsetsMap.end()) - return false; // Unrelated loads are definitely safe. - auto &LoadOffsets = LoadOffsetsI->second; - - // Now lookup the store's offsets. - auto &StoreOffsets = SplitOffsetsMap[SI]; - - // If the relative offsets of each split in the load and - // store match exactly, then we can split them and we - // don't need to remove them here. - if (LoadOffsets.Splits == StoreOffsets.Splits) - return false; - - DEBUG(dbgs() - << " Mismatched splits for load and store:\n" - << " " << *LI << "\n" - << " " << *SI << "\n"); - - // We've found a store and load that we need to split - // with mismatched relative splits. Just give up on them - // and remove both instructions from our list of - // candidates. - UnsplittableLoads.insert(LI); - return true; - }), + llvm::remove_if(Stores, + [&UnsplittableLoads, &SplitOffsetsMap](StoreInst *SI) { + // Lookup the load we are storing in our map of split + // offsets. + auto *LI = cast<LoadInst>(SI->getValueOperand()); + // If it was completely unsplittable, then we're done, + // and this store can't be pre-split. + if (UnsplittableLoads.count(LI)) + return true; + + auto LoadOffsetsI = SplitOffsetsMap.find(LI); + if (LoadOffsetsI == SplitOffsetsMap.end()) + return false; // Unrelated loads are definitely safe. + auto &LoadOffsets = LoadOffsetsI->second; + + // Now lookup the store's offsets. + auto &StoreOffsets = SplitOffsetsMap[SI]; + + // If the relative offsets of each split in the load and + // store match exactly, then we can split them and we + // don't need to remove them here. + if (LoadOffsets.Splits == StoreOffsets.Splits) + return false; + + DEBUG(dbgs() + << " Mismatched splits for load and store:\n" + << " " << *LI << "\n" + << " " << *SI << "\n"); + + // We've found a store and load that we need to split + // with mismatched relative splits. Just give up on them + // and remove both instructions from our list of + // candidates. + UnsplittableLoads.insert(LI); + return true; + }), Stores.end()); // Now we have to go *back* through all the stores, because a later store may // have caused an earlier store's load to become unsplittable and if it is // unsplittable for the later store, then we can't rely on it being split in // the earlier store either. - Stores.erase(std::remove_if(Stores.begin(), Stores.end(), - [&UnsplittableLoads](StoreInst *SI) { - auto *LI = - cast<LoadInst>(SI->getValueOperand()); - return UnsplittableLoads.count(LI); - }), + Stores.erase(llvm::remove_if(Stores, + [&UnsplittableLoads](StoreInst *SI) { + auto *LI = + cast<LoadInst>(SI->getValueOperand()); + return UnsplittableLoads.count(LI); + }), Stores.end()); // Once we've established all the loads that can't be split for some reason, // filter any that made it into our list out. - Loads.erase(std::remove_if(Loads.begin(), Loads.end(), - [&UnsplittableLoads](LoadInst *LI) { - return UnsplittableLoads.count(LI); - }), + Loads.erase(llvm::remove_if(Loads, + [&UnsplittableLoads](LoadInst *LI) { + return UnsplittableLoads.count(LI); + }), Loads.end()); - // If no loads or stores are left, there is no pre-splitting to be done for // this alloca. if (Loads.empty() && Stores.empty()) @@ -3562,13 +3646,15 @@ int Idx = 0, Size = Offsets.Splits.size(); for (;;) { auto *PartTy = Type::getIntNTy(Ty->getContext(), PartSize * 8); - auto *PartPtrTy = PartTy->getPointerTo(LI->getPointerAddressSpace()); + auto AS = LI->getPointerAddressSpace(); + auto *PartPtrTy = PartTy->getPointerTo(AS); LoadInst *PLoad = IRB.CreateAlignedLoad( getAdjustedPtr(IRB, DL, BasePtr, - APInt(DL.getPointerSizeInBits(), PartOffset), + APInt(DL.getPointerSizeInBits(AS), PartOffset), PartPtrTy, BasePtr->getName() + "."), getAdjustedAlignment(LI, PartOffset, DL), /*IsVolatile*/ false, LI->getName()); + PLoad->copyMetadata(*LI, LLVMContext::MD_mem_parallel_loop_access); // Append this load onto the list of split loads so we can find it later // to rewrite the stores. @@ -3616,12 +3702,14 @@ auto *PartPtrTy = PLoad->getType()->getPointerTo(SI->getPointerAddressSpace()); + auto AS = SI->getPointerAddressSpace(); StoreInst *PStore = IRB.CreateAlignedStore( - PLoad, getAdjustedPtr(IRB, DL, StoreBasePtr, - APInt(DL.getPointerSizeInBits(), PartOffset), - PartPtrTy, StoreBasePtr->getName() + "."), + PLoad, + getAdjustedPtr(IRB, DL, StoreBasePtr, + APInt(DL.getPointerSizeInBits(AS), PartOffset), + PartPtrTy, StoreBasePtr->getName() + "."), getAdjustedAlignment(SI, PartOffset, DL), /*IsVolatile*/ false); - (void)PStore; + PStore->copyMetadata(*LI, LLVMContext::MD_mem_parallel_loop_access); DEBUG(dbgs() << " +" << PartOffset << ":" << *PStore << "\n"); } @@ -3688,7 +3776,8 @@ int Idx = 0, Size = Offsets.Splits.size(); for (;;) { auto *PartTy = Type::getIntNTy(Ty->getContext(), PartSize * 8); - auto *PartPtrTy = PartTy->getPointerTo(SI->getPointerAddressSpace()); + auto *LoadPartPtrTy = PartTy->getPointerTo(LI->getPointerAddressSpace()); + auto *StorePartPtrTy = PartTy->getPointerTo(SI->getPointerAddressSpace()); // Either lookup a split load or create one. LoadInst *PLoad; @@ -3696,20 +3785,23 @@ PLoad = (*SplitLoads)[Idx]; } else { IRB.SetInsertPoint(LI); + auto AS = LI->getPointerAddressSpace(); PLoad = IRB.CreateAlignedLoad( getAdjustedPtr(IRB, DL, LoadBasePtr, - APInt(DL.getPointerSizeInBits(), PartOffset), - PartPtrTy, LoadBasePtr->getName() + "."), + APInt(DL.getPointerSizeInBits(AS), PartOffset), + LoadPartPtrTy, LoadBasePtr->getName() + "."), getAdjustedAlignment(LI, PartOffset, DL), /*IsVolatile*/ false, LI->getName()); } // And store this partition. IRB.SetInsertPoint(SI); + auto AS = SI->getPointerAddressSpace(); StoreInst *PStore = IRB.CreateAlignedStore( - PLoad, getAdjustedPtr(IRB, DL, StoreBasePtr, - APInt(DL.getPointerSizeInBits(), PartOffset), - PartPtrTy, StoreBasePtr->getName() + "."), + PLoad, + getAdjustedPtr(IRB, DL, StoreBasePtr, + APInt(DL.getPointerSizeInBits(AS), PartOffset), + StorePartPtrTy, StoreBasePtr->getName() + "."), getAdjustedAlignment(SI, PartOffset, DL), /*IsVolatile*/ false); // Now build a new slice for the alloca. @@ -3769,9 +3861,8 @@ } // Remove the killed slices that have ben pre-split. - AS.erase(std::remove_if(AS.begin(), AS.end(), [](const Slice &S) { - return S.isDead(); - }), AS.end()); + AS.erase(llvm::remove_if(AS, [](const Slice &S) { return S.isDead(); }), + AS.end()); // Insert our new slices. This will sort and merge them into the sorted // sequence. @@ -3786,8 +3877,8 @@ // Finally, don't try to promote any allocas that new require re-splitting. // They have already been added to the worklist above. PromotableAllocas.erase( - std::remove_if( - PromotableAllocas.begin(), PromotableAllocas.end(), + llvm::remove_if( + PromotableAllocas, [&](AllocaInst *AI) { return ResplitPromotableAllocas.count(AI); }), PromotableAllocas.end()); @@ -3859,7 +3950,7 @@ if (Alignment <= DL.getABITypeAlignment(SliceTy)) Alignment = 0; NewAI = new AllocaInst( - SliceTy, nullptr, Alignment, + SliceTy, AI.getType()->getAddressSpace(), nullptr, Alignment, AI.getName() + ".sroa." + Twine(P.begin() - AS.begin()), &AI); ++NumNewAllocas; } @@ -3873,8 +3964,8 @@ // fact scheduled for promotion. unsigned PPWOldSize = PostPromotionWorklist.size(); unsigned NumUses = 0; - SmallPtrSet<PHINode *, 8> PHIUsers; - SmallPtrSet<SelectInst *, 8> SelectUsers; + SmallSetVector<PHINode *, 8> PHIUsers; + SmallSetVector<SelectInst *, 8> SelectUsers; AllocaSliceRewriter Rewriter(DL, AS, *this, AI, *NewAI, P.beginOffset(), P.endOffset(), IsIntegerPromotable, VecTy, @@ -3890,24 +3981,20 @@ } NumAllocaPartitionUses += NumUses; - MaxUsesPerAllocaPartition = - std::max<unsigned>(NumUses, MaxUsesPerAllocaPartition); + MaxUsesPerAllocaPartition.updateMax(NumUses); // Now that we've processed all the slices in the new partition, check if any // PHIs or Selects would block promotion. - for (SmallPtrSetImpl<PHINode *>::iterator I = PHIUsers.begin(), - E = PHIUsers.end(); - I != E; ++I) - if (!isSafePHIToSpeculate(**I)) { + for (PHINode *PHI : PHIUsers) + if (!isSafePHIToSpeculate(*PHI)) { Promotable = false; PHIUsers.clear(); SelectUsers.clear(); break; } - for (SmallPtrSetImpl<SelectInst *>::iterator I = SelectUsers.begin(), - E = SelectUsers.end(); - I != E; ++I) - if (!isSafeSelectToSpeculate(**I)) { + + for (SelectInst *Sel : SelectUsers) + if (!isSafeSelectToSpeculate(*Sel)) { Promotable = false; PHIUsers.clear(); SelectUsers.clear(); @@ -3929,15 +4016,19 @@ Worklist.insert(NewAI); } } else { + // Drop any post-promotion work items if promotion didn't happen. + while (PostPromotionWorklist.size() > PPWOldSize) + PostPromotionWorklist.pop_back(); + + // We couldn't promote and we didn't create a new partition, nothing + // happened. + if (NewAI == &AI) + return nullptr; + // If we can't promote the alloca, iterate on it to check for new // refinements exposed by splitting the current alloca. Don't iterate on an // alloca which didn't actually change and didn't get promoted. - if (NewAI != &AI) - Worklist.insert(NewAI); - - // Drop any post-promotion work items if promotion didn't happen. - while (PostPromotionWorklist.size() > PPWOldSize) - PostPromotionWorklist.pop_back(); + Worklist.insert(NewAI); } return NewAI; @@ -3980,16 +4071,16 @@ if (!IsSorted) std::sort(AS.begin(), AS.end()); - /// \brief Describes the allocas introduced by rewritePartition - /// in order to migrate the debug info. - struct Piece { + /// Describes the allocas introduced by rewritePartition in order to migrate + /// the debug info. + struct Fragment { AllocaInst *Alloca; uint64_t Offset; uint64_t Size; - Piece(AllocaInst *AI, uint64_t O, uint64_t S) + Fragment(AllocaInst *AI, uint64_t O, uint64_t S) : Alloca(AI), Offset(O), Size(S) {} }; - SmallVector<Piece, 4> Pieces; + SmallVector<Fragment, 4> Fragments; // Rewrite each partition. for (auto &P : AS.partitions()) { @@ -4000,52 +4091,58 @@ uint64_t AllocaSize = DL.getTypeSizeInBits(NewAI->getAllocatedType()); // Don't include any padding. uint64_t Size = std::min(AllocaSize, P.size() * SizeOfByte); - Pieces.push_back(Piece(NewAI, P.beginOffset() * SizeOfByte, Size)); + Fragments.push_back(Fragment(NewAI, P.beginOffset() * SizeOfByte, Size)); } } ++NumPartitions; } NumAllocaPartitions += NumPartitions; - MaxPartitionsPerAlloca = - std::max<unsigned>(NumPartitions, MaxPartitionsPerAlloca); + MaxPartitionsPerAlloca.updateMax(NumPartitions); // Migrate debug information from the old alloca to the new alloca(s) // and the individual partitions. - if (DbgDeclareInst *DbgDecl = FindAllocaDbgDeclare(&AI)) { - auto *Var = DbgDecl->getVariable(); - auto *Expr = DbgDecl->getExpression(); + TinyPtrVector<DbgInfoIntrinsic *> DbgDeclares = FindDbgAddrUses(&AI); + if (!DbgDeclares.empty()) { + auto *Var = DbgDeclares.front()->getVariable(); + auto *Expr = DbgDeclares.front()->getExpression(); DIBuilder DIB(*AI.getModule(), /*AllowUnresolved*/ false); uint64_t AllocaSize = DL.getTypeSizeInBits(AI.getAllocatedType()); - for (auto Piece : Pieces) { - // Create a piece expression describing the new partition or reuse AI's + for (auto Fragment : Fragments) { + // Create a fragment expression describing the new partition or reuse AI's // expression if there is only one partition. - auto *PieceExpr = Expr; - if (Piece.Size < AllocaSize || Expr->isBitPiece()) { + auto *FragmentExpr = Expr; + if (Fragment.Size < AllocaSize || Expr->isFragment()) { // If this alloca is already a scalar replacement of a larger aggregate, - // Piece.Offset describes the offset inside the scalar. - uint64_t Offset = Expr->isBitPiece() ? Expr->getBitPieceOffset() : 0; - uint64_t Start = Offset + Piece.Offset; - uint64_t Size = Piece.Size; - if (Expr->isBitPiece()) { - uint64_t AbsEnd = Expr->getBitPieceOffset() + Expr->getBitPieceSize(); + // Fragment.Offset describes the offset inside the scalar. + auto ExprFragment = Expr->getFragmentInfo(); + uint64_t Offset = ExprFragment ? ExprFragment->OffsetInBits : 0; + uint64_t Start = Offset + Fragment.Offset; + uint64_t Size = Fragment.Size; + if (ExprFragment) { + uint64_t AbsEnd = + ExprFragment->OffsetInBits + ExprFragment->SizeInBits; if (Start >= AbsEnd) // No need to describe a SROAed padding. continue; Size = std::min(Size, AbsEnd - Start); } - PieceExpr = DIB.createBitPieceExpression(Start, Size); - } else { - assert(Pieces.size() == 1 && - "partition is as large as original alloca"); + // The new, smaller fragment is stenciled out from the old fragment. + if (auto OrigFragment = FragmentExpr->getFragmentInfo()) { + assert(Start >= OrigFragment->OffsetInBits && + "new fragment is outside of original fragment"); + Start -= OrigFragment->OffsetInBits; + } + FragmentExpr = + DIExpression::createFragmentExpression(Expr, Start, Size); } - // Remove any existing dbg.declare intrinsic describing the same alloca. - if (DbgDeclareInst *OldDDI = FindAllocaDbgDeclare(Piece.Alloca)) - OldDDI->eraseFromParent(); - - DIB.insertDeclare(Piece.Alloca, Var, PieceExpr, DbgDecl->getDebugLoc(), - &AI); + // Remove any existing intrinsics describing the same alloca. + for (DbgInfoIntrinsic *OldDII : FindDbgAddrUses(Fragment.Alloca)) + OldDII->eraseFromParent(); + + DIB.insertDeclare(Fragment.Alloca, Var, FragmentExpr, + DbgDeclares.front()->getDebugLoc(), &AI); } } return Changed; @@ -4150,6 +4247,15 @@ Instruction *I = DeadInsts.pop_back_val(); DEBUG(dbgs() << "Deleting dead instruction: " << *I << "\n"); + // If the instruction is an alloca, find the possible dbg.declare connected + // to it, and remove it too. We must do this before calling RAUW or we will + // not be able to find it. + if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) { + DeletedAllocas.insert(AI); + for (DbgInfoIntrinsic *OldDII : FindDbgAddrUses(AI)) + OldDII->eraseFromParent(); + } + I->replaceAllUsesWith(UndefValue::get(I->getType())); for (Use &Operand : I->operands()) @@ -4160,12 +4266,6 @@ DeadInsts.insert(U); } - if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) { - DeletedAllocas.insert(AI); - if (DbgDeclareInst *DbgDecl = FindAllocaDbgDeclare(AI)) - DbgDecl->eraseFromParent(); - } - ++NumDeleted; I->eraseFromParent(); } @@ -4183,7 +4283,7 @@ NumPromoted += PromotableAllocas.size(); DEBUG(dbgs() << "Promoting allocas with mem2reg...\n"); - PromoteMemToReg(PromotableAllocas, *DT, nullptr, AC); + PromoteMemToReg(PromotableAllocas, *DT, AC); PromotableAllocas.clear(); return true; } @@ -4218,9 +4318,7 @@ auto IsInSet = [&](AllocaInst *AI) { return DeletedAllocas.count(AI); }; Worklist.remove_if(IsInSet); PostPromotionWorklist.remove_if(IsInSet); - PromotableAllocas.erase(std::remove_if(PromotableAllocas.begin(), - PromotableAllocas.end(), - IsInSet), + PromotableAllocas.erase(llvm::remove_if(PromotableAllocas, IsInSet), PromotableAllocas.end()); DeletedAllocas.clear(); } @@ -4232,14 +4330,18 @@ PostPromotionWorklist.clear(); } while (!Worklist.empty()); - // FIXME: Even when promoting allocas we should preserve some abstract set of - // CFG-specific analyses. - return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); + if (!Changed) + return PreservedAnalyses::all(); + + PreservedAnalyses PA; + PA.preserveSet<CFGAnalyses>(); + PA.preserve<GlobalsAA>(); + return PA; } -PreservedAnalyses SROA::run(Function &F, AnalysisManager<Function> *AM) { - return runImpl(F, AM->getResult<DominatorTreeAnalysis>(F), - AM->getResult<AssumptionAnalysis>(F)); +PreservedAnalyses SROA::run(Function &F, FunctionAnalysisManager &AM) { + return runImpl(F, AM.getResult<DominatorTreeAnalysis>(F), + AM.getResult<AssumptionAnalysis>(F)); } /// A legacy pass for the legacy pass manager that wraps the \c SROA pass. @@ -4251,17 +4353,21 @@ SROA Impl; public: + static char ID; + SROALegacyPass() : FunctionPass(ID) { initializeSROALegacyPassPass(*PassRegistry::getPassRegistry()); } + #ifndef noCbC SROALegacyPass(bool forCbC) : FunctionPass(ID) { onlyForCbC = forCbC; initializeSROALegacyPassPass(*PassRegistry::getPassRegistry()); } #endif + bool runOnFunction(Function &F) override { - if (skipOptnoneFunction(F)) + if (skipFunction(F)) return false; auto PA = Impl.runImpl( @@ -4269,6 +4375,7 @@ getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F)); return !PA.areAllPreserved(); } + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<AssumptionCacheTracker>(); AU.addRequired<DominatorTreeWrapperPass>(); @@ -4276,12 +4383,12 @@ AU.setPreservesCFG(); } - const char *getPassName() const override { return "SROA"; } - static char ID; #ifndef noCbC bool onlyForCbC; bool isOnlyForCbC() { return onlyForCbC; } #endif + + StringRef getPassName() const override { return "SROA"; } }; char SROALegacyPass::ID = 0;