Mercurial > hg > CbC > CbC_llvm
diff lib/Analysis/InlineCost.cpp @ 120:1172e4bd9c6f
update 4.0.0
author | mir3636 |
---|---|
date | Fri, 25 Nov 2016 19:14:25 +0900 |
parents | 7d135dc70f03 |
children | 803732b1fca8 |
line wrap: on
line diff
--- a/lib/Analysis/InlineCost.cpp Tue Jan 26 22:56:36 2016 +0900 +++ b/lib/Analysis/InlineCost.cpp Fri Nov 25 19:14:25 2016 +0900 @@ -21,6 +21,7 @@ #include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" @@ -39,18 +40,7 @@ STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed"); -// Threshold to use when optsize is specified (and there is no -// -inline-threshold). -const int OptSizeThreshold = 75; - -// Threshold to use when -Oz is specified (and there is no -inline-threshold). -const int OptMinSizeThreshold = 25; - -// Threshold to use when -O[34] is specified (and there is no -// -inline-threshold). -const int OptAggressiveThreshold = 275; - -static cl::opt<int> DefaultInlineThreshold( +static cl::opt<int> InlineThreshold( "inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore, cl::desc("Control the amount of inlining to perform (default = 225)")); @@ -65,6 +55,11 @@ "inlinecold-threshold", cl::Hidden, cl::init(225), cl::desc("Threshold for inlining functions with cold attribute")); +static cl::opt<int> + HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(3000), + cl::ZeroOrMore, + cl::desc("Threshold for hot callsites ")); + namespace { class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { @@ -74,17 +69,23 @@ /// The TargetTransformInfo available for this compilation. const TargetTransformInfo &TTI; - /// The cache of @llvm.assume intrinsics. - AssumptionCacheTracker *ACT; + /// Getter for the cache of @llvm.assume intrinsics. + std::function<AssumptionCache &(Function &)> &GetAssumptionCache; - // The called function. + /// Profile summary information. + ProfileSummaryInfo *PSI; + + /// The called function. Function &F; - // The candidate callsite being analyzed. Please do not use this to do - // analysis in the caller function; we want the inline cost query to be - // easily cacheable. Instead, use the cover function paramHasAttr. + /// The candidate callsite being analyzed. Please do not use this to do + /// analysis in the caller function; we want the inline cost query to be + /// easily cacheable. Instead, use the cover function paramHasAttr. CallSite CandidateCS; + /// Tunable parameters that control the analysis. + const InlineParams &Params; + int Threshold; int Cost; @@ -103,26 +104,26 @@ int FiftyPercentVectorBonus, TenPercentVectorBonus; int VectorBonus; - // While we walk the potentially-inlined instructions, we build up and - // maintain a mapping of simplified values specific to this callsite. The - // idea is to propagate any special information we have about arguments to - // this call through the inlinable section of the function, and account for - // likely simplifications post-inlining. The most important aspect we track - // is CFG altering simplifications -- when we prove a basic block dead, that - // can cause dramatic shifts in the cost of inlining a function. + /// While we walk the potentially-inlined instructions, we build up and + /// maintain a mapping of simplified values specific to this callsite. The + /// idea is to propagate any special information we have about arguments to + /// this call through the inlinable section of the function, and account for + /// likely simplifications post-inlining. The most important aspect we track + /// is CFG altering simplifications -- when we prove a basic block dead, that + /// can cause dramatic shifts in the cost of inlining a function. DenseMap<Value *, Constant *> SimplifiedValues; - // Keep track of the values which map back (through function arguments) to - // allocas on the caller stack which could be simplified through SROA. + /// Keep track of the values which map back (through function arguments) to + /// allocas on the caller stack which could be simplified through SROA. DenseMap<Value *, Value *> SROAArgValues; - // The mapping of caller Alloca values to their accumulated cost savings. If - // we have to disable SROA for one of the allocas, this tells us how much - // cost must be added. + /// The mapping of caller Alloca values to their accumulated cost savings. If + /// we have to disable SROA for one of the allocas, this tells us how much + /// cost must be added. DenseMap<Value *, int> SROAArgCosts; - // Keep track of values which map to a pointer base and constant offset. - DenseMap<Value *, std::pair<Value *, APInt> > ConstantOffsetPtrs; + /// Keep track of values which map to a pointer base and constant offset. + DenseMap<Value *, std::pair<Value *, APInt>> ConstantOffsetPtrs; // Custom simplification helper routines. bool isAllocaDerivedArg(Value *V); @@ -143,7 +144,7 @@ /// attributes since these can be more precise than the ones on the callee /// itself. bool paramHasAttr(Argument *A, Attribute::AttrKind Attr); - + /// Return true if the given value is known non null within the callee if /// inlined through this particular callsite. bool isKnownNonNullInCallee(Value *V); @@ -154,14 +155,20 @@ /// analysis. void updateThreshold(CallSite CS, Function &Callee); + /// Return true if size growth is allowed when inlining the callee at CS. + bool allowSizeGrowth(CallSite CS); + // Custom analysis routines. bool analyzeBlock(BasicBlock *BB, SmallPtrSetImpl<const Value *> &EphValues); // Disable several entry points to the visitor so we don't accidentally use // them by declaring but not defining them here. - void visit(Module *); void visit(Module &); - void visit(Function *); void visit(Function &); - void visit(BasicBlock *); void visit(BasicBlock &); + void visit(Module *); + void visit(Module &); + void visit(Function *); + void visit(Function &); + void visit(BasicBlock *); + void visit(BasicBlock &); // Provide base case for our instruction visit. bool visitInstruction(Instruction &I); @@ -193,9 +200,12 @@ bool visitUnreachableInst(UnreachableInst &I); public: - CallAnalyzer(const TargetTransformInfo &TTI, AssumptionCacheTracker *ACT, - Function &Callee, int Threshold, CallSite CSArg) - : TTI(TTI), ACT(ACT), F(Callee), CandidateCS(CSArg), Threshold(Threshold), + CallAnalyzer(const TargetTransformInfo &TTI, + std::function<AssumptionCache &(Function &)> &GetAssumptionCache, + ProfileSummaryInfo *PSI, Function &Callee, CallSite CSArg, + const InlineParams &Params) + : TTI(TTI), GetAssumptionCache(GetAssumptionCache), PSI(PSI), F(Callee), + CandidateCS(CSArg), Params(Params), Threshold(Params.DefaultThreshold), Cost(0), IsCallerRecursive(false), IsRecursiveCall(false), ExposesReturnsTwice(false), HasDynamicAlloca(false), ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false), @@ -304,7 +314,8 @@ OpC = dyn_cast<ConstantInt>(SimpleOp); if (!OpC) return false; - if (OpC->isZero()) continue; + if (OpC->isZero()) + continue; // Handle a struct index, which adds its field offset to the pointer. if (StructType *STy = dyn_cast<StructType>(*GTI)) { @@ -322,13 +333,14 @@ bool CallAnalyzer::visitAlloca(AllocaInst &I) { // Check whether inlining will turn a dynamic alloca into a static - // alloca, and handle that case. + // alloca and handle that case. if (I.isArrayAllocation()) { - if (Constant *Size = SimplifiedValues.lookup(I.getArraySize())) { - ConstantInt *AllocSize = dyn_cast<ConstantInt>(Size); - assert(AllocSize && "Allocation size not a constant int?"); + Constant *Size = SimplifiedValues.lookup(I.getArraySize()); + if (auto *AllocSize = dyn_cast_or_null<ConstantInt>(Size)) { + const DataLayout &DL = F.getParent()->getDataLayout(); Type *Ty = I.getAllocatedType(); - AllocatedSize += Ty->getPrimitiveSizeInBits() * AllocSize->getZExtValue(); + AllocatedSize = SaturatingMultiplyAdd( + AllocSize->getLimitedValue(), DL.getTypeAllocSize(Ty), AllocatedSize); return Base::visitAlloca(I); } } @@ -337,7 +349,7 @@ if (I.isStaticAlloca()) { const DataLayout &DL = F.getParent()->getDataLayout(); Type *Ty = I.getAllocatedType(); - AllocatedSize += DL.getTypeAllocSize(Ty); + AllocatedSize = SaturatingAdd(DL.getTypeAllocSize(Ty), AllocatedSize); } // We will happily inline static alloca instructions. @@ -368,8 +380,8 @@ bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) { Value *SROAArg; DenseMap<Value *, int>::iterator CostIt; - bool SROACandidate = lookupSROAArgAndCost(I.getPointerOperand(), - SROAArg, CostIt); + bool SROACandidate = + lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt); // Try to fold GEPs of constant-offset call site argument pointers. This // requires target data and inbounds GEPs. @@ -425,8 +437,8 @@ } // Track base/offsets through casts - std::pair<Value *, APInt> BaseAndOffset - = ConstantOffsetPtrs.lookup(I.getOperand(0)); + std::pair<Value *, APInt> BaseAndOffset = + ConstantOffsetPtrs.lookup(I.getOperand(0)); // Casts don't change the offset, just wrap it up. if (BaseAndOffset.first) ConstantOffsetPtrs[&I] = BaseAndOffset; @@ -457,8 +469,8 @@ unsigned IntegerSize = I.getType()->getScalarSizeInBits(); const DataLayout &DL = F.getParent()->getDataLayout(); if (IntegerSize >= DL.getPointerSizeInBits()) { - std::pair<Value *, APInt> BaseAndOffset - = ConstantOffsetPtrs.lookup(I.getOperand(0)); + std::pair<Value *, APInt> BaseAndOffset = + ConstantOffsetPtrs.lookup(I.getOperand(0)); if (BaseAndOffset.first) ConstantOffsetPtrs[&I] = BaseAndOffset; } @@ -547,7 +559,7 @@ bool CallAnalyzer::paramHasAttr(Argument *A, Attribute::AttrKind Attr) { unsigned ArgNo = A->getArgNo(); - return CandidateCS.paramHasAttr(ArgNo+1, Attr); + return CandidateCS.paramHasAttr(ArgNo + 1, Attr); } bool CallAnalyzer::isKnownNonNullInCallee(Value *V) { @@ -559,7 +571,7 @@ if (Argument *A = dyn_cast<Argument>(V)) if (paramHasAttr(A, Attribute::NonNull)) return true; - + // Is this an alloca in the caller? This is distinct from the attribute case // above because attributes aren't updated within the inliner itself and we // always want to catch the alloca derived case. @@ -568,58 +580,89 @@ // alloca-derived value and null. Note that this fires regardless of // SROA firing. return true; - + return false; } +bool CallAnalyzer::allowSizeGrowth(CallSite CS) { + // If the normal destination of the invoke or the parent block of the call + // site is unreachable-terminated, there is little point in inlining this + // unless there is literally zero cost. + // FIXME: Note that it is possible that an unreachable-terminated block has a + // hot entry. For example, in below scenario inlining hot_call_X() may be + // beneficial : + // main() { + // hot_call_1(); + // ... + // hot_call_N() + // exit(0); + // } + // For now, we are not handling this corner case here as it is rare in real + // code. In future, we should elaborate this based on BPI and BFI in more + // general threshold adjusting heuristics in updateThreshold(). + Instruction *Instr = CS.getInstruction(); + if (InvokeInst *II = dyn_cast<InvokeInst>(Instr)) { + if (isa<UnreachableInst>(II->getNormalDest()->getTerminator())) + return false; + } else if (isa<UnreachableInst>(Instr->getParent()->getTerminator())) + return false; + + return true; +} + void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) { - // If -inline-threshold is not given, listen to the optsize attribute when it - // would decrease the threshold. + // If no size growth is allowed for this inlining, set Threshold to 0. + if (!allowSizeGrowth(CS)) { + Threshold = 0; + return; + } + Function *Caller = CS.getCaller(); - // FIXME: Use Function::optForSize() - bool OptSize = Caller->hasFnAttribute(Attribute::OptimizeForSize); + // return min(A, B) if B is valid. + auto MinIfValid = [](int A, Optional<int> B) { + return B ? std::min(A, B.getValue()) : A; + }; - if (!(DefaultInlineThreshold.getNumOccurrences() > 0) && OptSize && - OptSizeThreshold < Threshold) - Threshold = OptSizeThreshold; + // return max(A, B) if B is valid. + auto MaxIfValid = [](int A, Optional<int> B) { + return B ? std::max(A, B.getValue()) : A; + }; - // If profile information is available, use that to adjust threshold of hot - // and cold functions. - // FIXME: The heuristic used below for determining hotness and coldness are - // based on preliminary SPEC tuning and may not be optimal. Replace this with - // a well-tuned heuristic based on *callsite* hotness and not callee hotness. - uint64_t FunctionCount = 0, MaxFunctionCount = 0; - bool HasPGOCounts = false; - if (Callee.getEntryCount() && Callee.getParent()->getMaximumFunctionCount()) { - HasPGOCounts = true; - FunctionCount = Callee.getEntryCount().getValue(); - MaxFunctionCount = Callee.getParent()->getMaximumFunctionCount().getValue(); + // Use the OptMinSizeThreshold or OptSizeThreshold knob if they are available + // and reduce the threshold if the caller has the necessary attribute. + if (Caller->optForMinSize()) + Threshold = MinIfValid(Threshold, Params.OptMinSizeThreshold); + else if (Caller->optForSize()) + Threshold = MinIfValid(Threshold, Params.OptSizeThreshold); + + bool HotCallsite = false; + uint64_t TotalWeight; + if (CS.getInstruction()->extractProfTotalWeight(TotalWeight) && + PSI->isHotCount(TotalWeight)) { + HotCallsite = true; } // Listen to the inlinehint attribute or profile based hotness information // when it would increase the threshold and the caller does not need to // minimize its size. - bool InlineHint = - Callee.hasFnAttribute(Attribute::InlineHint) || - (HasPGOCounts && - FunctionCount >= (uint64_t)(0.3 * (double)MaxFunctionCount)); - if (InlineHint && HintThreshold > Threshold && !Caller->optForMinSize()) - Threshold = HintThreshold; + bool InlineHint = Callee.hasFnAttribute(Attribute::InlineHint) || + PSI->isFunctionEntryHot(&Callee); + if (InlineHint && !Caller->optForMinSize()) + Threshold = MaxIfValid(Threshold, Params.HintThreshold); + + if (HotCallsite && !Caller->optForMinSize()) + Threshold = MaxIfValid(Threshold, Params.HotCallSiteThreshold); - // Listen to the cold attribute or profile based coldness information - // when it would decrease the threshold. - bool ColdCallee = - Callee.hasFnAttribute(Attribute::Cold) || - (HasPGOCounts && - FunctionCount <= (uint64_t)(0.01 * (double)MaxFunctionCount)); - // Command line argument for DefaultInlineThreshold will override the default - // ColdThreshold. If we have -inline-threshold but no -inlinecold-threshold, - // do not use the default cold threshold even if it is smaller. - if ((DefaultInlineThreshold.getNumOccurrences() == 0 || - ColdThreshold.getNumOccurrences() > 0) && - ColdCallee && ColdThreshold < Threshold) - Threshold = ColdThreshold; + bool ColdCallee = PSI->isFunctionEntryCold(&Callee); + // For cold callees, use the ColdThreshold knob if it is available and reduces + // the threshold. + if (ColdCallee) + Threshold = MinIfValid(Threshold, Params.ColdThreshold); + + // Finally, take the target-specific inlining threshold multiplier into + // account. + Threshold *= TTI.getInliningThresholdMultiplier(); } bool CallAnalyzer::visitCmpInst(CmpInst &I) { @@ -633,7 +676,8 @@ RHS = SimpleRHS; if (Constant *CLHS = dyn_cast<Constant>(LHS)) { if (Constant *CRHS = dyn_cast<Constant>(RHS)) - if (Constant *C = ConstantExpr::getCompare(I.getPredicate(), CLHS, CRHS)) { + if (Constant *C = + ConstantExpr::getCompare(I.getPredicate(), CLHS, CRHS)) { SimplifiedValues[&I] = C; return true; } @@ -794,8 +838,8 @@ if (!InsertedC) InsertedC = SimplifiedValues.lookup(I.getInsertedValueOperand()); if (AggC && InsertedC) { - SimplifiedValues[&I] = ConstantExpr::getInsertValue(AggC, InsertedC, - I.getIndices()); + SimplifiedValues[&I] = + ConstantExpr::getInsertValue(AggC, InsertedC, I.getIndices()); return true; } @@ -820,8 +864,8 @@ // Try to re-map the arguments to constants. SmallVector<Constant *, 4> ConstantArgs; ConstantArgs.reserve(CS.arg_size()); - for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); - I != E; ++I) { + for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); I != E; + ++I) { Constant *C = dyn_cast<Constant>(*I); if (!C) C = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(*I)); @@ -845,8 +889,7 @@ ExposesReturnsTwice = true; return false; } - if (CS.isCall() && - cast<CallInst>(CS.getInstruction())->cannotDuplicate()) + if (CS.isCall() && cast<CallInst>(CS.getInstruction())->cannotDuplicate()) ContainsNoDuplicateCall = true; if (Function *F = CS.getCalledFunction()) { @@ -861,6 +904,11 @@ default: return Base::visitCallSite(CS); + case Intrinsic::load_relative: + // This is normally lowered to 4 LLVM instructions. + Cost += 3 * InlineConstants::InstrCost; + return false; + case Intrinsic::memset: case Intrinsic::memcpy: case Intrinsic::memmove: @@ -912,7 +960,9 @@ // during devirtualization and so we want to give it a hefty bonus for // inlining, but cap that bonus in the event that inlining wouldn't pan // out. Pretend to inline the function, with a custom threshold. - CallAnalyzer CA(TTI, ACT, *F, InlineConstants::IndirectCallThreshold, CS); + auto IndirectCallParams = Params; + IndirectCallParams.DefaultThreshold = InlineConstants::IndirectCallThreshold; + CallAnalyzer CA(TTI, GetAssumptionCache, PSI, *F, CS, IndirectCallParams); if (CA.analyzeCall(CS)) { // We were able to inline the indirect call! Subtract the cost from the // threshold to get the bonus we want to apply, but don't go below zero. @@ -1019,7 +1069,6 @@ return false; } - /// \brief Analyze a basic block for its contribution to the inline cost. /// /// This method walks the analyzer over every instruction in the given basic @@ -1125,7 +1174,7 @@ } else if (Operator::getOpcode(V) == Instruction::BitCast) { V = cast<Operator>(V)->getOperand(0); } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) { - if (GA->mayBeOverridden()) + if (GA->isInterposable()) break; V = GA->getAliasee(); } else { @@ -1206,24 +1255,15 @@ Cost -= InlineConstants::InstrCost; } } - + // The call instruction also disappears after inlining. + Cost -= InlineConstants::InstrCost + InlineConstants::CallPenalty; + // If there is only one call of the function, and it has internal linkage, // the cost of inlining it drops dramatically. - bool OnlyOneCallAndLocalLinkage = F.hasLocalLinkage() && F.hasOneUse() && - &F == CS.getCalledFunction(); + bool OnlyOneCallAndLocalLinkage = + F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction(); if (OnlyOneCallAndLocalLinkage) - Cost += InlineConstants::LastCallToStaticBonus; - - // If the instruction after the call, or if the normal destination of the - // invoke is an unreachable instruction, the function is noreturn. As such, - // there is little point in inlining this unless there is literally zero - // cost. - Instruction *Instr = CS.getInstruction(); - if (InvokeInst *II = dyn_cast<InvokeInst>(Instr)) { - if (isa<UnreachableInst>(II->getNormalDest()->begin())) - Threshold = 0; - } else if (isa<UnreachableInst>(++BasicBlock::iterator(Instr))) - Threshold = 0; + Cost -= InlineConstants::LastCallToStaticBonus; // If this function uses the coldcc calling convention, prefer not to inline // it. @@ -1278,7 +1318,7 @@ // the ephemeral values multiple times (and they're completely determined by // the callee, so this is purely duplicate work). SmallPtrSet<const Value *, 32> EphValues; - CodeMetrics::collectEphemeralValues(&F, &ACT->getAssumptionCache(F), EphValues); + CodeMetrics::collectEphemeralValues(&F, &GetAssumptionCache(F), EphValues); // The worklist of live basic blocks in the callee *after* inlining. We avoid // adding basic blocks of the callee which can be proven to be dead for this @@ -1288,7 +1328,8 @@ // accomplish this, prioritizing for small iterations because we exit after // crossing our threshold, we use a small-size optimized SetVector. typedef SetVector<BasicBlock *, SmallVector<BasicBlock *, 16>, - SmallPtrSet<BasicBlock *, 16> > BBSetVector; + SmallPtrSet<BasicBlock *, 16>> + BBSetVector; BBSetVector BBWorklist; BBWorklist.insert(&F.getEntryBlock()); // Note that we *must not* cache the size, this loop grows the worklist. @@ -1313,20 +1354,8 @@ // Analyze the cost of this block. If we blow through the threshold, this // returns false, and we can bail on out. - if (!analyzeBlock(BB, EphValues)) { - if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca || - HasIndirectBr || HasFrameEscape) - return false; - - // If the caller is a recursive function then we don't want to inline - // functions which allocate a lot of stack space because it would increase - // the caller stack usage dramatically. - if (IsCallerRecursive && - AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller) - return false; - - break; - } + if (!analyzeBlock(BB, EphValues)) + return false; TerminatorInst *TI = BB->getTerminator(); @@ -1335,16 +1364,16 @@ if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { if (BI->isConditional()) { Value *Cond = BI->getCondition(); - if (ConstantInt *SimpleCond - = dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) { + if (ConstantInt *SimpleCond = + dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) { BBWorklist.insert(BI->getSuccessor(SimpleCond->isZero() ? 1 : 0)); continue; } } } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { Value *Cond = SI->getCondition(); - if (ConstantInt *SimpleCond - = dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) { + if (ConstantInt *SimpleCond = + dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) { BBWorklist.insert(SI->findCaseValue(SimpleCond).getCaseSuccessor()); continue; } @@ -1381,12 +1410,12 @@ else if (NumVectorInstructions <= NumInstructions / 2) Threshold -= (FiftyPercentVectorBonus - TenPercentVectorBonus); - return Cost <= std::max(0, Threshold); + return Cost < std::max(1, Threshold); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// \brief Dump stats about this call's analysis. -void CallAnalyzer::dump() { +LLVM_DUMP_METHOD void CallAnalyzer::dump() { #define DEBUG_PRINT_STAT(x) dbgs() << " " #x ": " << x << "\n" DEBUG_PRINT_STAT(NumConstantArgs); DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs); @@ -1406,7 +1435,7 @@ /// \brief Test that two functions either have or have not the given attribute /// at the same time. -template<typename AttrKind> +template <typename AttrKind> static bool attributeMatches(Function *F1, Function *F2, AttrKind Attr) { return F1->getFnAttribute(Attr) == F2->getFnAttribute(Attr); } @@ -1420,30 +1449,19 @@ AttributeFuncs::areInlineCompatible(*Caller, *Callee); } -InlineCost llvm::getInlineCost(CallSite CS, int DefaultThreshold, - TargetTransformInfo &CalleeTTI, - AssumptionCacheTracker *ACT) { - return getInlineCost(CS, CS.getCalledFunction(), DefaultThreshold, CalleeTTI, - ACT); +InlineCost llvm::getInlineCost( + CallSite CS, const InlineParams &Params, TargetTransformInfo &CalleeTTI, + std::function<AssumptionCache &(Function &)> &GetAssumptionCache, + ProfileSummaryInfo *PSI) { + return getInlineCost(CS, CS.getCalledFunction(), Params, CalleeTTI, + GetAssumptionCache, PSI); } -int llvm::computeThresholdFromOptLevels(unsigned OptLevel, - unsigned SizeOptLevel) { - if (OptLevel > 2) - return OptAggressiveThreshold; - if (SizeOptLevel == 1) // -Os - return OptSizeThreshold; - if (SizeOptLevel == 2) // -Oz - return OptMinSizeThreshold; - return DefaultInlineThreshold; -} - -int llvm::getDefaultInlineThreshold() { return DefaultInlineThreshold; } - -InlineCost llvm::getInlineCost(CallSite CS, Function *Callee, - int DefaultThreshold, - TargetTransformInfo &CalleeTTI, - AssumptionCacheTracker *ACT) { +InlineCost llvm::getInlineCost( + CallSite CS, Function *Callee, const InlineParams &Params, + TargetTransformInfo &CalleeTTI, + std::function<AssumptionCache &(Function &)> &GetAssumptionCache, + ProfileSummaryInfo *PSI) { // Cannot inline indirect calls. if (!Callee) @@ -1466,17 +1484,18 @@ if (CS.getCaller()->hasFnAttribute(Attribute::OptimizeNone)) return llvm::InlineCost::getNever(); - // Don't inline functions which can be redefined at link-time to mean - // something else. Don't inline functions marked noinline or call sites - // marked noinline. - if (Callee->mayBeOverridden() || - Callee->hasFnAttribute(Attribute::NoInline) || CS.isNoInline()) + // Don't inline functions which can be interposed at link-time. Don't inline + // functions marked noinline or call sites marked noinline. + // Note: inlining non-exact non-interposable fucntions is fine, since we know + // we have *a* correct implementation of the source level function. + if (Callee->isInterposable() || Callee->hasFnAttribute(Attribute::NoInline) || + CS.isNoInline()) return llvm::InlineCost::getNever(); DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName() - << "...\n"); + << "...\n"); - CallAnalyzer CA(CalleeTTI, ACT, *Callee, DefaultThreshold, CS); + CallAnalyzer CA(CalleeTTI, GetAssumptionCache, PSI, *Callee, CS, Params); bool ShouldInline = CA.analyzeCall(CS); DEBUG(CA.dump()); @@ -1524,3 +1543,67 @@ return true; } + +// APIs to create InlineParams based on command line flags and/or other +// parameters. + +InlineParams llvm::getInlineParams(int Threshold) { + InlineParams Params; + + // This field is the threshold to use for a callee by default. This is + // derived from one or more of: + // * optimization or size-optimization levels, + // * a value passed to createFunctionInliningPass function, or + // * the -inline-threshold flag. + // If the -inline-threshold flag is explicitly specified, that is used + // irrespective of anything else. + if (InlineThreshold.getNumOccurrences() > 0) + Params.DefaultThreshold = InlineThreshold; + else + Params.DefaultThreshold = Threshold; + + // Set the HintThreshold knob from the -inlinehint-threshold. + Params.HintThreshold = HintThreshold; + + // Set the HotCallSiteThreshold knob from the -hot-callsite-threshold. + Params.HotCallSiteThreshold = HotCallSiteThreshold; + + // Set the OptMinSizeThreshold and OptSizeThreshold params only if the + // Set the OptMinSizeThreshold and OptSizeThreshold params only if the + // -inlinehint-threshold commandline option is not explicitly given. If that + // option is present, then its value applies even for callees with size and + // minsize attributes. + // If the -inline-threshold is not specified, set the ColdThreshold from the + // -inlinecold-threshold even if it is not explicitly passed. If + // -inline-threshold is specified, then -inlinecold-threshold needs to be + // explicitly specified to set the ColdThreshold knob + if (InlineThreshold.getNumOccurrences() == 0) { + Params.OptMinSizeThreshold = InlineConstants::OptMinSizeThreshold; + Params.OptSizeThreshold = InlineConstants::OptSizeThreshold; + Params.ColdThreshold = ColdThreshold; + } else if (ColdThreshold.getNumOccurrences() > 0) { + Params.ColdThreshold = ColdThreshold; + } + return Params; +} + +InlineParams llvm::getInlineParams() { + return getInlineParams(InlineThreshold); +} + +// Compute the default threshold for inlining based on the opt level and the +// size opt level. +static int computeThresholdFromOptLevels(unsigned OptLevel, + unsigned SizeOptLevel) { + if (OptLevel > 2) + return InlineConstants::OptAggressiveThreshold; + if (SizeOptLevel == 1) // -Os + return InlineConstants::OptSizeThreshold; + if (SizeOptLevel == 2) // -Oz + return InlineConstants::OptMinSizeThreshold; + return InlineThreshold; +} + +InlineParams llvm::getInlineParams(unsigned OptLevel, unsigned SizeOptLevel) { + return getInlineParams(computeThresholdFromOptLevels(OptLevel, SizeOptLevel)); +}