Mercurial > hg > CbC > CbC_llvm
diff lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp @ 95:afa8332a0e37 LLVM3.8
LLVM 3.8
author | Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 13 Oct 2015 17:48:58 +0900 |
parents | 54457678186b |
children | 7d135dc70f03 |
line wrap: on
line diff
--- a/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp Wed Feb 18 14:56:07 2015 +0900 +++ b/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp Tue Oct 13 17:48:58 2015 +0900 @@ -10,34 +10,54 @@ // When a load/store accesses the generic address space, checks whether the // address is casted from a non-generic address space. If so, remove this // addrspacecast because accessing non-generic address spaces is typically -// faster. Besides seeking addrspacecasts, this optimization also traces into -// the base pointer of a GEP. +// faster. Besides removing addrspacecasts directly used by loads/stores, this +// optimization also recursively traces into a GEP's pointer operand and a +// bitcast's source to find more eliminable addrspacecasts. // // For instance, the code below loads a float from an array allocated in // addrspace(3). // -// %0 = addrspacecast [10 x float] addrspace(3)* @a to [10 x float]* -// %1 = gep [10 x float]* %0, i64 0, i64 %i -// %2 = load float* %1 ; emits ld.f32 +// %0 = addrspacecast [10 x float] addrspace(3)* @a to [10 x float]* +// %1 = gep [10 x float]* %0, i64 0, i64 %i +// %2 = bitcast float* %1 to i32* +// %3 = load i32* %2 ; emits ld.u32 // -// First, function hoistAddrSpaceCastFromGEP reorders the addrspacecast -// and the GEP to expose more optimization opportunities to function +// First, function hoistAddrSpaceCastFrom reorders the addrspacecast, the GEP, +// and the bitcast to expose more optimization opportunities to function // optimizeMemoryInst. The intermediate code looks like: // -// %0 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i -// %1 = addrspacecast float addrspace(3)* %0 to float* -// %2 = load float* %1 ; still emits ld.f32, but will be optimized shortly +// %0 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i +// %1 = bitcast float addrspace(3)* %0 to i32 addrspace(3)* +// %2 = addrspacecast i32 addrspace(3)* %1 to i32* +// %3 = load i32* %2 ; still emits ld.u32, but will be optimized shortly // // Then, function optimizeMemoryInstruction detects a load from addrspacecast'ed // generic pointers, and folds the load and the addrspacecast into a load from // the original address space. The final code looks like: // -// %0 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i -// %2 = load float addrspace(3)* %0 ; emits ld.shared.f32 +// %0 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i +// %1 = bitcast float addrspace(3)* %0 to i32 addrspace(3)* +// %3 = load i32 addrspace(3)* %1 ; emits ld.shared.f32 // // This pass may remove an addrspacecast in a different BB. Therefore, we // implement it as a FunctionPass. // +// TODO: +// The current implementation doesn't handle PHINodes. Eliminating +// addrspacecasts used by PHINodes is trickier because PHINodes can introduce +// loops in data flow. For example, +// +// %generic.input = addrspacecast float addrspace(3)* %input to float* +// loop: +// %y = phi [ %generic.input, %y2 ] +// %y2 = getelementptr %y, 1 +// %v = load %y2 +// br ..., label %loop, ... +// +// Marking %y2 shared depends on marking %y shared, but %y also data-flow +// depends on %y2. We probably need an iterative fix-point algorithm on handle +// this case. +// //===----------------------------------------------------------------------===// #include "NVPTX.h" @@ -62,17 +82,30 @@ public: static char ID; NVPTXFavorNonGenericAddrSpaces() : FunctionPass(ID) {} - bool runOnFunction(Function &F) override; +private: /// Optimizes load/store instructions. Idx is the index of the pointer operand /// (0 for load, and 1 for store). Returns true if it changes anything. bool optimizeMemoryInstruction(Instruction *I, unsigned Idx); - /// Transforms "gep (addrspacecast X), indices" into "addrspacecast (gep X, - /// indices)". This reordering exposes to optimizeMemoryInstruction more - /// optimization opportunities on loads and stores. Returns true if it changes - /// the program. - bool hoistAddrSpaceCastFromGEP(GEPOperator *GEP); + /// Recursively traces into a GEP's pointer operand or a bitcast's source to + /// find an eliminable addrspacecast, and hoists that addrspacecast to the + /// outermost level. For example, this function transforms + /// bitcast(gep(gep(addrspacecast(X)))) + /// to + /// addrspacecast(bitcast(gep(gep(X)))). + /// + /// This reordering exposes to optimizeMemoryInstruction more + /// optimization opportunities on loads and stores. + /// + /// If this function successfully hoists an eliminable addrspacecast or V is + /// already such an addrspacecast, it returns the transformed value (which is + /// guaranteed to be an addrspacecast); otherwise, it returns nullptr. + Value *hoistAddrSpaceCastFrom(Value *V, int Depth = 0); + /// Helper function for GEPs. + Value *hoistAddrSpaceCastFromGEP(GEPOperator *GEP, int Depth); + /// Helper function for bitcasts. + Value *hoistAddrSpaceCastFromBitCast(BitCastOperator *BC, int Depth); }; } @@ -85,11 +118,12 @@ "Remove unnecessary non-generic-to-generic addrspacecasts", false, false) -// Decides whether removing Cast is valid and beneficial. Cast can be an -// instruction or a constant expression. -static bool IsEliminableAddrSpaceCast(Operator *Cast) { - // Returns false if not even an addrspacecast. - if (Cast->getOpcode() != Instruction::AddrSpaceCast) +// Decides whether V is an addrspacecast and shortcutting V in load/store is +// valid and beneficial. +static bool isEliminableAddrSpaceCast(Value *V) { + // Returns false if V is not even an addrspacecast. + Operator *Cast = dyn_cast<Operator>(V); + if (Cast == nullptr || Cast->getOpcode() != Instruction::AddrSpaceCast) return false; Value *Src = Cast->getOperand(0); @@ -108,48 +142,109 @@ DestTy->getAddressSpace() == AddressSpace::ADDRESS_SPACE_GENERIC); } -bool NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFromGEP( - GEPOperator *GEP) { - Operator *Cast = dyn_cast<Operator>(GEP->getPointerOperand()); - if (!Cast) - return false; +Value *NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFromGEP( + GEPOperator *GEP, int Depth) { + Value *NewOperand = + hoistAddrSpaceCastFrom(GEP->getPointerOperand(), Depth + 1); + if (NewOperand == nullptr) + return nullptr; - if (!IsEliminableAddrSpaceCast(Cast)) - return false; + // hoistAddrSpaceCastFrom returns an eliminable addrspacecast or nullptr. + assert(isEliminableAddrSpaceCast(NewOperand)); + Operator *Cast = cast<Operator>(NewOperand); SmallVector<Value *, 8> Indices(GEP->idx_begin(), GEP->idx_end()); + Value *NewASC; if (Instruction *GEPI = dyn_cast<Instruction>(GEP)) { - // %1 = gep (addrspacecast X), indices + // GEP = gep (addrspacecast X), indices // => - // %0 = gep X, indices - // %1 = addrspacecast %0 - GetElementPtrInst *NewGEPI = GetElementPtrInst::Create(Cast->getOperand(0), - Indices, - GEP->getName(), - GEPI); - NewGEPI->setIsInBounds(GEP->isInBounds()); - GEP->replaceAllUsesWith( - new AddrSpaceCastInst(NewGEPI, GEP->getType(), "", GEPI)); + // NewGEP = gep X, indices + // NewASC = addrspacecast NewGEP + GetElementPtrInst *NewGEP = GetElementPtrInst::Create( + GEP->getSourceElementType(), Cast->getOperand(0), Indices, + "", GEPI); + NewGEP->setIsInBounds(GEP->isInBounds()); + NewASC = new AddrSpaceCastInst(NewGEP, GEP->getType(), "", GEPI); + NewASC->takeName(GEP); + // Without RAUWing GEP, the compiler would visit GEP again and emit + // redundant instructions. This is exercised in test @rauw in + // access-non-generic.ll. + GEP->replaceAllUsesWith(NewASC); } else { // GEP is a constant expression. - Constant *NewGEPCE = ConstantExpr::getGetElementPtr( - cast<Constant>(Cast->getOperand(0)), - Indices, - GEP->isInBounds()); - GEP->replaceAllUsesWith( - ConstantExpr::getAddrSpaceCast(NewGEPCE, GEP->getType())); + Constant *NewGEP = ConstantExpr::getGetElementPtr( + GEP->getSourceElementType(), cast<Constant>(Cast->getOperand(0)), + Indices, GEP->isInBounds()); + NewASC = ConstantExpr::getAddrSpaceCast(NewGEP, GEP->getType()); } + return NewASC; +} - return true; +Value *NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFromBitCast( + BitCastOperator *BC, int Depth) { + Value *NewOperand = hoistAddrSpaceCastFrom(BC->getOperand(0), Depth + 1); + if (NewOperand == nullptr) + return nullptr; + + // hoistAddrSpaceCastFrom returns an eliminable addrspacecast or nullptr. + assert(isEliminableAddrSpaceCast(NewOperand)); + Operator *Cast = cast<Operator>(NewOperand); + + // Cast = addrspacecast Src + // BC = bitcast Cast + // => + // Cast' = bitcast Src + // BC' = addrspacecast Cast' + Value *Src = Cast->getOperand(0); + Type *TypeOfNewCast = + PointerType::get(BC->getType()->getPointerElementType(), + Src->getType()->getPointerAddressSpace()); + Value *NewBC; + if (BitCastInst *BCI = dyn_cast<BitCastInst>(BC)) { + Value *NewCast = new BitCastInst(Src, TypeOfNewCast, "", BCI); + NewBC = new AddrSpaceCastInst(NewCast, BC->getType(), "", BCI); + NewBC->takeName(BC); + // Without RAUWing BC, the compiler would visit BC again and emit + // redundant instructions. This is exercised in test @rauw in + // access-non-generic.ll. + BC->replaceAllUsesWith(NewBC); + } else { + // BC is a constant expression. + Constant *NewCast = + ConstantExpr::getBitCast(cast<Constant>(Src), TypeOfNewCast); + NewBC = ConstantExpr::getAddrSpaceCast(NewCast, BC->getType()); + } + return NewBC; +} + +Value *NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFrom(Value *V, + int Depth) { + // Returns V if V is already an eliminable addrspacecast. + if (isEliminableAddrSpaceCast(V)) + return V; + + // Limit the depth to prevent this recursive function from running too long. + const int MaxDepth = 20; + if (Depth >= MaxDepth) + return nullptr; + + // If V is a GEP or bitcast, hoist the addrspacecast if any from its pointer + // operand. This enables optimizeMemoryInstruction to shortcut addrspacecasts + // that are not directly used by the load/store. + if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) + return hoistAddrSpaceCastFromGEP(GEP, Depth); + + if (BitCastOperator *BC = dyn_cast<BitCastOperator>(V)) + return hoistAddrSpaceCastFromBitCast(BC, Depth); + + return nullptr; } bool NVPTXFavorNonGenericAddrSpaces::optimizeMemoryInstruction(Instruction *MI, unsigned Idx) { - // If the pointer operand is a GEP, hoist the addrspacecast if any from the - // GEP to expose more optimization opportunites. - if (GEPOperator *GEP = dyn_cast<GEPOperator>(MI->getOperand(Idx))) { - hoistAddrSpaceCastFromGEP(GEP); - } + Value *NewOperand = hoistAddrSpaceCastFrom(MI->getOperand(Idx)); + if (NewOperand == nullptr) + return false; // load/store (addrspacecast X) => load/store X if shortcutting the // addrspacecast is valid and can improve performance. @@ -161,14 +256,10 @@ // %2 = load float addrspace(3)* %0 // // Note: the addrspacecast can also be a constant expression. - if (Operator *Cast = dyn_cast<Operator>(MI->getOperand(Idx))) { - if (IsEliminableAddrSpaceCast(Cast)) { - MI->setOperand(Idx, Cast->getOperand(0)); - return true; - } - } - - return false; + assert(isEliminableAddrSpaceCast(NewOperand)); + Operator *ASC = dyn_cast<Operator>(NewOperand); + MI->setOperand(Idx, ASC->getOperand(0)); + return true; } bool NVPTXFavorNonGenericAddrSpaces::runOnFunction(Function &F) {