comparison lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp @ 95:afa8332a0e37 LLVM3.8

LLVM 3.8
author Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
date Tue, 13 Oct 2015 17:48:58 +0900
parents 54457678186b
children 7d135dc70f03
comparison
equal deleted inserted replaced
84:f3e34b893a5f 95:afa8332a0e37
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 // 9 //
10 // When a load/store accesses the generic address space, checks whether the 10 // When a load/store accesses the generic address space, checks whether the
11 // address is casted from a non-generic address space. If so, remove this 11 // address is casted from a non-generic address space. If so, remove this
12 // addrspacecast because accessing non-generic address spaces is typically 12 // addrspacecast because accessing non-generic address spaces is typically
13 // faster. Besides seeking addrspacecasts, this optimization also traces into 13 // faster. Besides removing addrspacecasts directly used by loads/stores, this
14 // the base pointer of a GEP. 14 // optimization also recursively traces into a GEP's pointer operand and a
15 // bitcast's source to find more eliminable addrspacecasts.
15 // 16 //
16 // For instance, the code below loads a float from an array allocated in 17 // For instance, the code below loads a float from an array allocated in
17 // addrspace(3). 18 // addrspace(3).
18 // 19 //
19 // %0 = addrspacecast [10 x float] addrspace(3)* @a to [10 x float]* 20 // %0 = addrspacecast [10 x float] addrspace(3)* @a to [10 x float]*
20 // %1 = gep [10 x float]* %0, i64 0, i64 %i 21 // %1 = gep [10 x float]* %0, i64 0, i64 %i
21 // %2 = load float* %1 ; emits ld.f32 22 // %2 = bitcast float* %1 to i32*
22 // 23 // %3 = load i32* %2 ; emits ld.u32
23 // First, function hoistAddrSpaceCastFromGEP reorders the addrspacecast 24 //
24 // and the GEP to expose more optimization opportunities to function 25 // First, function hoistAddrSpaceCastFrom reorders the addrspacecast, the GEP,
26 // and the bitcast to expose more optimization opportunities to function
25 // optimizeMemoryInst. The intermediate code looks like: 27 // optimizeMemoryInst. The intermediate code looks like:
26 // 28 //
27 // %0 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i 29 // %0 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i
28 // %1 = addrspacecast float addrspace(3)* %0 to float* 30 // %1 = bitcast float addrspace(3)* %0 to i32 addrspace(3)*
29 // %2 = load float* %1 ; still emits ld.f32, but will be optimized shortly 31 // %2 = addrspacecast i32 addrspace(3)* %1 to i32*
32 // %3 = load i32* %2 ; still emits ld.u32, but will be optimized shortly
30 // 33 //
31 // Then, function optimizeMemoryInstruction detects a load from addrspacecast'ed 34 // Then, function optimizeMemoryInstruction detects a load from addrspacecast'ed
32 // generic pointers, and folds the load and the addrspacecast into a load from 35 // generic pointers, and folds the load and the addrspacecast into a load from
33 // the original address space. The final code looks like: 36 // the original address space. The final code looks like:
34 // 37 //
35 // %0 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i 38 // %0 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i
36 // %2 = load float addrspace(3)* %0 ; emits ld.shared.f32 39 // %1 = bitcast float addrspace(3)* %0 to i32 addrspace(3)*
40 // %3 = load i32 addrspace(3)* %1 ; emits ld.shared.f32
37 // 41 //
38 // This pass may remove an addrspacecast in a different BB. Therefore, we 42 // This pass may remove an addrspacecast in a different BB. Therefore, we
39 // implement it as a FunctionPass. 43 // implement it as a FunctionPass.
44 //
45 // TODO:
46 // The current implementation doesn't handle PHINodes. Eliminating
47 // addrspacecasts used by PHINodes is trickier because PHINodes can introduce
48 // loops in data flow. For example,
49 //
50 // %generic.input = addrspacecast float addrspace(3)* %input to float*
51 // loop:
52 // %y = phi [ %generic.input, %y2 ]
53 // %y2 = getelementptr %y, 1
54 // %v = load %y2
55 // br ..., label %loop, ...
56 //
57 // Marking %y2 shared depends on marking %y shared, but %y also data-flow
58 // depends on %y2. We probably need an iterative fix-point algorithm on handle
59 // this case.
40 // 60 //
41 //===----------------------------------------------------------------------===// 61 //===----------------------------------------------------------------------===//
42 62
43 #include "NVPTX.h" 63 #include "NVPTX.h"
44 #include "llvm/IR/Function.h" 64 #include "llvm/IR/Function.h"
60 /// \brief NVPTXFavorNonGenericAddrSpaces 80 /// \brief NVPTXFavorNonGenericAddrSpaces
61 class NVPTXFavorNonGenericAddrSpaces : public FunctionPass { 81 class NVPTXFavorNonGenericAddrSpaces : public FunctionPass {
62 public: 82 public:
63 static char ID; 83 static char ID;
64 NVPTXFavorNonGenericAddrSpaces() : FunctionPass(ID) {} 84 NVPTXFavorNonGenericAddrSpaces() : FunctionPass(ID) {}
65
66 bool runOnFunction(Function &F) override; 85 bool runOnFunction(Function &F) override;
67 86
87 private:
68 /// Optimizes load/store instructions. Idx is the index of the pointer operand 88 /// Optimizes load/store instructions. Idx is the index of the pointer operand
69 /// (0 for load, and 1 for store). Returns true if it changes anything. 89 /// (0 for load, and 1 for store). Returns true if it changes anything.
70 bool optimizeMemoryInstruction(Instruction *I, unsigned Idx); 90 bool optimizeMemoryInstruction(Instruction *I, unsigned Idx);
71 /// Transforms "gep (addrspacecast X), indices" into "addrspacecast (gep X, 91 /// Recursively traces into a GEP's pointer operand or a bitcast's source to
72 /// indices)". This reordering exposes to optimizeMemoryInstruction more 92 /// find an eliminable addrspacecast, and hoists that addrspacecast to the
73 /// optimization opportunities on loads and stores. Returns true if it changes 93 /// outermost level. For example, this function transforms
74 /// the program. 94 /// bitcast(gep(gep(addrspacecast(X))))
75 bool hoistAddrSpaceCastFromGEP(GEPOperator *GEP); 95 /// to
96 /// addrspacecast(bitcast(gep(gep(X)))).
97 ///
98 /// This reordering exposes to optimizeMemoryInstruction more
99 /// optimization opportunities on loads and stores.
100 ///
101 /// If this function successfully hoists an eliminable addrspacecast or V is
102 /// already such an addrspacecast, it returns the transformed value (which is
103 /// guaranteed to be an addrspacecast); otherwise, it returns nullptr.
104 Value *hoistAddrSpaceCastFrom(Value *V, int Depth = 0);
105 /// Helper function for GEPs.
106 Value *hoistAddrSpaceCastFromGEP(GEPOperator *GEP, int Depth);
107 /// Helper function for bitcasts.
108 Value *hoistAddrSpaceCastFromBitCast(BitCastOperator *BC, int Depth);
76 }; 109 };
77 } 110 }
78 111
79 char NVPTXFavorNonGenericAddrSpaces::ID = 0; 112 char NVPTXFavorNonGenericAddrSpaces::ID = 0;
80 113
83 } 116 }
84 INITIALIZE_PASS(NVPTXFavorNonGenericAddrSpaces, "nvptx-favor-non-generic", 117 INITIALIZE_PASS(NVPTXFavorNonGenericAddrSpaces, "nvptx-favor-non-generic",
85 "Remove unnecessary non-generic-to-generic addrspacecasts", 118 "Remove unnecessary non-generic-to-generic addrspacecasts",
86 false, false) 119 false, false)
87 120
88 // Decides whether removing Cast is valid and beneficial. Cast can be an 121 // Decides whether V is an addrspacecast and shortcutting V in load/store is
89 // instruction or a constant expression. 122 // valid and beneficial.
90 static bool IsEliminableAddrSpaceCast(Operator *Cast) { 123 static bool isEliminableAddrSpaceCast(Value *V) {
91 // Returns false if not even an addrspacecast. 124 // Returns false if V is not even an addrspacecast.
92 if (Cast->getOpcode() != Instruction::AddrSpaceCast) 125 Operator *Cast = dyn_cast<Operator>(V);
126 if (Cast == nullptr || Cast->getOpcode() != Instruction::AddrSpaceCast)
93 return false; 127 return false;
94 128
95 Value *Src = Cast->getOperand(0); 129 Value *Src = Cast->getOperand(0);
96 PointerType *SrcTy = cast<PointerType>(Src->getType()); 130 PointerType *SrcTy = cast<PointerType>(Src->getType());
97 PointerType *DestTy = cast<PointerType>(Cast->getType()); 131 PointerType *DestTy = cast<PointerType>(Cast->getType());
106 // generic address space. 140 // generic address space.
107 return (SrcTy->getAddressSpace() != AddressSpace::ADDRESS_SPACE_GENERIC && 141 return (SrcTy->getAddressSpace() != AddressSpace::ADDRESS_SPACE_GENERIC &&
108 DestTy->getAddressSpace() == AddressSpace::ADDRESS_SPACE_GENERIC); 142 DestTy->getAddressSpace() == AddressSpace::ADDRESS_SPACE_GENERIC);
109 } 143 }
110 144
111 bool NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFromGEP( 145 Value *NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFromGEP(
112 GEPOperator *GEP) { 146 GEPOperator *GEP, int Depth) {
113 Operator *Cast = dyn_cast<Operator>(GEP->getPointerOperand()); 147 Value *NewOperand =
114 if (!Cast) 148 hoistAddrSpaceCastFrom(GEP->getPointerOperand(), Depth + 1);
115 return false; 149 if (NewOperand == nullptr)
116 150 return nullptr;
117 if (!IsEliminableAddrSpaceCast(Cast)) 151
118 return false; 152 // hoistAddrSpaceCastFrom returns an eliminable addrspacecast or nullptr.
153 assert(isEliminableAddrSpaceCast(NewOperand));
154 Operator *Cast = cast<Operator>(NewOperand);
119 155
120 SmallVector<Value *, 8> Indices(GEP->idx_begin(), GEP->idx_end()); 156 SmallVector<Value *, 8> Indices(GEP->idx_begin(), GEP->idx_end());
157 Value *NewASC;
121 if (Instruction *GEPI = dyn_cast<Instruction>(GEP)) { 158 if (Instruction *GEPI = dyn_cast<Instruction>(GEP)) {
122 // %1 = gep (addrspacecast X), indices 159 // GEP = gep (addrspacecast X), indices
123 // => 160 // =>
124 // %0 = gep X, indices 161 // NewGEP = gep X, indices
125 // %1 = addrspacecast %0 162 // NewASC = addrspacecast NewGEP
126 GetElementPtrInst *NewGEPI = GetElementPtrInst::Create(Cast->getOperand(0), 163 GetElementPtrInst *NewGEP = GetElementPtrInst::Create(
127 Indices, 164 GEP->getSourceElementType(), Cast->getOperand(0), Indices,
128 GEP->getName(), 165 "", GEPI);
129 GEPI); 166 NewGEP->setIsInBounds(GEP->isInBounds());
130 NewGEPI->setIsInBounds(GEP->isInBounds()); 167 NewASC = new AddrSpaceCastInst(NewGEP, GEP->getType(), "", GEPI);
131 GEP->replaceAllUsesWith( 168 NewASC->takeName(GEP);
132 new AddrSpaceCastInst(NewGEPI, GEP->getType(), "", GEPI)); 169 // Without RAUWing GEP, the compiler would visit GEP again and emit
170 // redundant instructions. This is exercised in test @rauw in
171 // access-non-generic.ll.
172 GEP->replaceAllUsesWith(NewASC);
133 } else { 173 } else {
134 // GEP is a constant expression. 174 // GEP is a constant expression.
135 Constant *NewGEPCE = ConstantExpr::getGetElementPtr( 175 Constant *NewGEP = ConstantExpr::getGetElementPtr(
136 cast<Constant>(Cast->getOperand(0)), 176 GEP->getSourceElementType(), cast<Constant>(Cast->getOperand(0)),
137 Indices, 177 Indices, GEP->isInBounds());
138 GEP->isInBounds()); 178 NewASC = ConstantExpr::getAddrSpaceCast(NewGEP, GEP->getType());
139 GEP->replaceAllUsesWith(
140 ConstantExpr::getAddrSpaceCast(NewGEPCE, GEP->getType()));
141 } 179 }
142 180 return NewASC;
143 return true; 181 }
182
183 Value *NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFromBitCast(
184 BitCastOperator *BC, int Depth) {
185 Value *NewOperand = hoistAddrSpaceCastFrom(BC->getOperand(0), Depth + 1);
186 if (NewOperand == nullptr)
187 return nullptr;
188
189 // hoistAddrSpaceCastFrom returns an eliminable addrspacecast or nullptr.
190 assert(isEliminableAddrSpaceCast(NewOperand));
191 Operator *Cast = cast<Operator>(NewOperand);
192
193 // Cast = addrspacecast Src
194 // BC = bitcast Cast
195 // =>
196 // Cast' = bitcast Src
197 // BC' = addrspacecast Cast'
198 Value *Src = Cast->getOperand(0);
199 Type *TypeOfNewCast =
200 PointerType::get(BC->getType()->getPointerElementType(),
201 Src->getType()->getPointerAddressSpace());
202 Value *NewBC;
203 if (BitCastInst *BCI = dyn_cast<BitCastInst>(BC)) {
204 Value *NewCast = new BitCastInst(Src, TypeOfNewCast, "", BCI);
205 NewBC = new AddrSpaceCastInst(NewCast, BC->getType(), "", BCI);
206 NewBC->takeName(BC);
207 // Without RAUWing BC, the compiler would visit BC again and emit
208 // redundant instructions. This is exercised in test @rauw in
209 // access-non-generic.ll.
210 BC->replaceAllUsesWith(NewBC);
211 } else {
212 // BC is a constant expression.
213 Constant *NewCast =
214 ConstantExpr::getBitCast(cast<Constant>(Src), TypeOfNewCast);
215 NewBC = ConstantExpr::getAddrSpaceCast(NewCast, BC->getType());
216 }
217 return NewBC;
218 }
219
220 Value *NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFrom(Value *V,
221 int Depth) {
222 // Returns V if V is already an eliminable addrspacecast.
223 if (isEliminableAddrSpaceCast(V))
224 return V;
225
226 // Limit the depth to prevent this recursive function from running too long.
227 const int MaxDepth = 20;
228 if (Depth >= MaxDepth)
229 return nullptr;
230
231 // If V is a GEP or bitcast, hoist the addrspacecast if any from its pointer
232 // operand. This enables optimizeMemoryInstruction to shortcut addrspacecasts
233 // that are not directly used by the load/store.
234 if (GEPOperator *GEP = dyn_cast<GEPOperator>(V))
235 return hoistAddrSpaceCastFromGEP(GEP, Depth);
236
237 if (BitCastOperator *BC = dyn_cast<BitCastOperator>(V))
238 return hoistAddrSpaceCastFromBitCast(BC, Depth);
239
240 return nullptr;
144 } 241 }
145 242
146 bool NVPTXFavorNonGenericAddrSpaces::optimizeMemoryInstruction(Instruction *MI, 243 bool NVPTXFavorNonGenericAddrSpaces::optimizeMemoryInstruction(Instruction *MI,
147 unsigned Idx) { 244 unsigned Idx) {
148 // If the pointer operand is a GEP, hoist the addrspacecast if any from the 245 Value *NewOperand = hoistAddrSpaceCastFrom(MI->getOperand(Idx));
149 // GEP to expose more optimization opportunites. 246 if (NewOperand == nullptr)
150 if (GEPOperator *GEP = dyn_cast<GEPOperator>(MI->getOperand(Idx))) { 247 return false;
151 hoistAddrSpaceCastFromGEP(GEP);
152 }
153 248
154 // load/store (addrspacecast X) => load/store X if shortcutting the 249 // load/store (addrspacecast X) => load/store X if shortcutting the
155 // addrspacecast is valid and can improve performance. 250 // addrspacecast is valid and can improve performance.
156 // 251 //
157 // e.g., 252 // e.g.,
159 // %2 = load float* %1 254 // %2 = load float* %1
160 // -> 255 // ->
161 // %2 = load float addrspace(3)* %0 256 // %2 = load float addrspace(3)* %0
162 // 257 //
163 // Note: the addrspacecast can also be a constant expression. 258 // Note: the addrspacecast can also be a constant expression.
164 if (Operator *Cast = dyn_cast<Operator>(MI->getOperand(Idx))) { 259 assert(isEliminableAddrSpaceCast(NewOperand));
165 if (IsEliminableAddrSpaceCast(Cast)) { 260 Operator *ASC = dyn_cast<Operator>(NewOperand);
166 MI->setOperand(Idx, Cast->getOperand(0)); 261 MI->setOperand(Idx, ASC->getOperand(0));
167 return true; 262 return true;
168 }
169 }
170
171 return false;
172 } 263 }
173 264
174 bool NVPTXFavorNonGenericAddrSpaces::runOnFunction(Function &F) { 265 bool NVPTXFavorNonGenericAddrSpaces::runOnFunction(Function &F) {
175 if (DisableFavorNonGeneric) 266 if (DisableFavorNonGeneric)
176 return false; 267 return false;