Mercurial > hg > CbC > CbC_llvm
comparison lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp @ 95:afa8332a0e37 LLVM3.8
LLVM 3.8
author | Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 13 Oct 2015 17:48:58 +0900 |
parents | 54457678186b |
children | 7d135dc70f03 |
comparison
equal
deleted
inserted
replaced
84:f3e34b893a5f | 95:afa8332a0e37 |
---|---|
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 // | 9 // |
10 // When a load/store accesses the generic address space, checks whether the | 10 // When a load/store accesses the generic address space, checks whether the |
11 // address is casted from a non-generic address space. If so, remove this | 11 // address is casted from a non-generic address space. If so, remove this |
12 // addrspacecast because accessing non-generic address spaces is typically | 12 // addrspacecast because accessing non-generic address spaces is typically |
13 // faster. Besides seeking addrspacecasts, this optimization also traces into | 13 // faster. Besides removing addrspacecasts directly used by loads/stores, this |
14 // the base pointer of a GEP. | 14 // optimization also recursively traces into a GEP's pointer operand and a |
15 // bitcast's source to find more eliminable addrspacecasts. | |
15 // | 16 // |
16 // For instance, the code below loads a float from an array allocated in | 17 // For instance, the code below loads a float from an array allocated in |
17 // addrspace(3). | 18 // addrspace(3). |
18 // | 19 // |
19 // %0 = addrspacecast [10 x float] addrspace(3)* @a to [10 x float]* | 20 // %0 = addrspacecast [10 x float] addrspace(3)* @a to [10 x float]* |
20 // %1 = gep [10 x float]* %0, i64 0, i64 %i | 21 // %1 = gep [10 x float]* %0, i64 0, i64 %i |
21 // %2 = load float* %1 ; emits ld.f32 | 22 // %2 = bitcast float* %1 to i32* |
22 // | 23 // %3 = load i32* %2 ; emits ld.u32 |
23 // First, function hoistAddrSpaceCastFromGEP reorders the addrspacecast | 24 // |
24 // and the GEP to expose more optimization opportunities to function | 25 // First, function hoistAddrSpaceCastFrom reorders the addrspacecast, the GEP, |
26 // and the bitcast to expose more optimization opportunities to function | |
25 // optimizeMemoryInst. The intermediate code looks like: | 27 // optimizeMemoryInst. The intermediate code looks like: |
26 // | 28 // |
27 // %0 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i | 29 // %0 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i |
28 // %1 = addrspacecast float addrspace(3)* %0 to float* | 30 // %1 = bitcast float addrspace(3)* %0 to i32 addrspace(3)* |
29 // %2 = load float* %1 ; still emits ld.f32, but will be optimized shortly | 31 // %2 = addrspacecast i32 addrspace(3)* %1 to i32* |
32 // %3 = load i32* %2 ; still emits ld.u32, but will be optimized shortly | |
30 // | 33 // |
31 // Then, function optimizeMemoryInstruction detects a load from addrspacecast'ed | 34 // Then, function optimizeMemoryInstruction detects a load from addrspacecast'ed |
32 // generic pointers, and folds the load and the addrspacecast into a load from | 35 // generic pointers, and folds the load and the addrspacecast into a load from |
33 // the original address space. The final code looks like: | 36 // the original address space. The final code looks like: |
34 // | 37 // |
35 // %0 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i | 38 // %0 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i |
36 // %2 = load float addrspace(3)* %0 ; emits ld.shared.f32 | 39 // %1 = bitcast float addrspace(3)* %0 to i32 addrspace(3)* |
40 // %3 = load i32 addrspace(3)* %1 ; emits ld.shared.f32 | |
37 // | 41 // |
38 // This pass may remove an addrspacecast in a different BB. Therefore, we | 42 // This pass may remove an addrspacecast in a different BB. Therefore, we |
39 // implement it as a FunctionPass. | 43 // implement it as a FunctionPass. |
44 // | |
45 // TODO: | |
46 // The current implementation doesn't handle PHINodes. Eliminating | |
47 // addrspacecasts used by PHINodes is trickier because PHINodes can introduce | |
48 // loops in data flow. For example, | |
49 // | |
50 // %generic.input = addrspacecast float addrspace(3)* %input to float* | |
51 // loop: | |
52 // %y = phi [ %generic.input, %y2 ] | |
53 // %y2 = getelementptr %y, 1 | |
54 // %v = load %y2 | |
55 // br ..., label %loop, ... | |
56 // | |
57 // Marking %y2 shared depends on marking %y shared, but %y also data-flow | |
58 // depends on %y2. We probably need an iterative fix-point algorithm on handle | |
59 // this case. | |
40 // | 60 // |
41 //===----------------------------------------------------------------------===// | 61 //===----------------------------------------------------------------------===// |
42 | 62 |
43 #include "NVPTX.h" | 63 #include "NVPTX.h" |
44 #include "llvm/IR/Function.h" | 64 #include "llvm/IR/Function.h" |
60 /// \brief NVPTXFavorNonGenericAddrSpaces | 80 /// \brief NVPTXFavorNonGenericAddrSpaces |
61 class NVPTXFavorNonGenericAddrSpaces : public FunctionPass { | 81 class NVPTXFavorNonGenericAddrSpaces : public FunctionPass { |
62 public: | 82 public: |
63 static char ID; | 83 static char ID; |
64 NVPTXFavorNonGenericAddrSpaces() : FunctionPass(ID) {} | 84 NVPTXFavorNonGenericAddrSpaces() : FunctionPass(ID) {} |
65 | |
66 bool runOnFunction(Function &F) override; | 85 bool runOnFunction(Function &F) override; |
67 | 86 |
87 private: | |
68 /// Optimizes load/store instructions. Idx is the index of the pointer operand | 88 /// Optimizes load/store instructions. Idx is the index of the pointer operand |
69 /// (0 for load, and 1 for store). Returns true if it changes anything. | 89 /// (0 for load, and 1 for store). Returns true if it changes anything. |
70 bool optimizeMemoryInstruction(Instruction *I, unsigned Idx); | 90 bool optimizeMemoryInstruction(Instruction *I, unsigned Idx); |
71 /// Transforms "gep (addrspacecast X), indices" into "addrspacecast (gep X, | 91 /// Recursively traces into a GEP's pointer operand or a bitcast's source to |
72 /// indices)". This reordering exposes to optimizeMemoryInstruction more | 92 /// find an eliminable addrspacecast, and hoists that addrspacecast to the |
73 /// optimization opportunities on loads and stores. Returns true if it changes | 93 /// outermost level. For example, this function transforms |
74 /// the program. | 94 /// bitcast(gep(gep(addrspacecast(X)))) |
75 bool hoistAddrSpaceCastFromGEP(GEPOperator *GEP); | 95 /// to |
96 /// addrspacecast(bitcast(gep(gep(X)))). | |
97 /// | |
98 /// This reordering exposes to optimizeMemoryInstruction more | |
99 /// optimization opportunities on loads and stores. | |
100 /// | |
101 /// If this function successfully hoists an eliminable addrspacecast or V is | |
102 /// already such an addrspacecast, it returns the transformed value (which is | |
103 /// guaranteed to be an addrspacecast); otherwise, it returns nullptr. | |
104 Value *hoistAddrSpaceCastFrom(Value *V, int Depth = 0); | |
105 /// Helper function for GEPs. | |
106 Value *hoistAddrSpaceCastFromGEP(GEPOperator *GEP, int Depth); | |
107 /// Helper function for bitcasts. | |
108 Value *hoistAddrSpaceCastFromBitCast(BitCastOperator *BC, int Depth); | |
76 }; | 109 }; |
77 } | 110 } |
78 | 111 |
79 char NVPTXFavorNonGenericAddrSpaces::ID = 0; | 112 char NVPTXFavorNonGenericAddrSpaces::ID = 0; |
80 | 113 |
83 } | 116 } |
84 INITIALIZE_PASS(NVPTXFavorNonGenericAddrSpaces, "nvptx-favor-non-generic", | 117 INITIALIZE_PASS(NVPTXFavorNonGenericAddrSpaces, "nvptx-favor-non-generic", |
85 "Remove unnecessary non-generic-to-generic addrspacecasts", | 118 "Remove unnecessary non-generic-to-generic addrspacecasts", |
86 false, false) | 119 false, false) |
87 | 120 |
88 // Decides whether removing Cast is valid and beneficial. Cast can be an | 121 // Decides whether V is an addrspacecast and shortcutting V in load/store is |
89 // instruction or a constant expression. | 122 // valid and beneficial. |
90 static bool IsEliminableAddrSpaceCast(Operator *Cast) { | 123 static bool isEliminableAddrSpaceCast(Value *V) { |
91 // Returns false if not even an addrspacecast. | 124 // Returns false if V is not even an addrspacecast. |
92 if (Cast->getOpcode() != Instruction::AddrSpaceCast) | 125 Operator *Cast = dyn_cast<Operator>(V); |
126 if (Cast == nullptr || Cast->getOpcode() != Instruction::AddrSpaceCast) | |
93 return false; | 127 return false; |
94 | 128 |
95 Value *Src = Cast->getOperand(0); | 129 Value *Src = Cast->getOperand(0); |
96 PointerType *SrcTy = cast<PointerType>(Src->getType()); | 130 PointerType *SrcTy = cast<PointerType>(Src->getType()); |
97 PointerType *DestTy = cast<PointerType>(Cast->getType()); | 131 PointerType *DestTy = cast<PointerType>(Cast->getType()); |
106 // generic address space. | 140 // generic address space. |
107 return (SrcTy->getAddressSpace() != AddressSpace::ADDRESS_SPACE_GENERIC && | 141 return (SrcTy->getAddressSpace() != AddressSpace::ADDRESS_SPACE_GENERIC && |
108 DestTy->getAddressSpace() == AddressSpace::ADDRESS_SPACE_GENERIC); | 142 DestTy->getAddressSpace() == AddressSpace::ADDRESS_SPACE_GENERIC); |
109 } | 143 } |
110 | 144 |
111 bool NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFromGEP( | 145 Value *NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFromGEP( |
112 GEPOperator *GEP) { | 146 GEPOperator *GEP, int Depth) { |
113 Operator *Cast = dyn_cast<Operator>(GEP->getPointerOperand()); | 147 Value *NewOperand = |
114 if (!Cast) | 148 hoistAddrSpaceCastFrom(GEP->getPointerOperand(), Depth + 1); |
115 return false; | 149 if (NewOperand == nullptr) |
116 | 150 return nullptr; |
117 if (!IsEliminableAddrSpaceCast(Cast)) | 151 |
118 return false; | 152 // hoistAddrSpaceCastFrom returns an eliminable addrspacecast or nullptr. |
153 assert(isEliminableAddrSpaceCast(NewOperand)); | |
154 Operator *Cast = cast<Operator>(NewOperand); | |
119 | 155 |
120 SmallVector<Value *, 8> Indices(GEP->idx_begin(), GEP->idx_end()); | 156 SmallVector<Value *, 8> Indices(GEP->idx_begin(), GEP->idx_end()); |
157 Value *NewASC; | |
121 if (Instruction *GEPI = dyn_cast<Instruction>(GEP)) { | 158 if (Instruction *GEPI = dyn_cast<Instruction>(GEP)) { |
122 // %1 = gep (addrspacecast X), indices | 159 // GEP = gep (addrspacecast X), indices |
123 // => | 160 // => |
124 // %0 = gep X, indices | 161 // NewGEP = gep X, indices |
125 // %1 = addrspacecast %0 | 162 // NewASC = addrspacecast NewGEP |
126 GetElementPtrInst *NewGEPI = GetElementPtrInst::Create(Cast->getOperand(0), | 163 GetElementPtrInst *NewGEP = GetElementPtrInst::Create( |
127 Indices, | 164 GEP->getSourceElementType(), Cast->getOperand(0), Indices, |
128 GEP->getName(), | 165 "", GEPI); |
129 GEPI); | 166 NewGEP->setIsInBounds(GEP->isInBounds()); |
130 NewGEPI->setIsInBounds(GEP->isInBounds()); | 167 NewASC = new AddrSpaceCastInst(NewGEP, GEP->getType(), "", GEPI); |
131 GEP->replaceAllUsesWith( | 168 NewASC->takeName(GEP); |
132 new AddrSpaceCastInst(NewGEPI, GEP->getType(), "", GEPI)); | 169 // Without RAUWing GEP, the compiler would visit GEP again and emit |
170 // redundant instructions. This is exercised in test @rauw in | |
171 // access-non-generic.ll. | |
172 GEP->replaceAllUsesWith(NewASC); | |
133 } else { | 173 } else { |
134 // GEP is a constant expression. | 174 // GEP is a constant expression. |
135 Constant *NewGEPCE = ConstantExpr::getGetElementPtr( | 175 Constant *NewGEP = ConstantExpr::getGetElementPtr( |
136 cast<Constant>(Cast->getOperand(0)), | 176 GEP->getSourceElementType(), cast<Constant>(Cast->getOperand(0)), |
137 Indices, | 177 Indices, GEP->isInBounds()); |
138 GEP->isInBounds()); | 178 NewASC = ConstantExpr::getAddrSpaceCast(NewGEP, GEP->getType()); |
139 GEP->replaceAllUsesWith( | |
140 ConstantExpr::getAddrSpaceCast(NewGEPCE, GEP->getType())); | |
141 } | 179 } |
142 | 180 return NewASC; |
143 return true; | 181 } |
182 | |
183 Value *NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFromBitCast( | |
184 BitCastOperator *BC, int Depth) { | |
185 Value *NewOperand = hoistAddrSpaceCastFrom(BC->getOperand(0), Depth + 1); | |
186 if (NewOperand == nullptr) | |
187 return nullptr; | |
188 | |
189 // hoistAddrSpaceCastFrom returns an eliminable addrspacecast or nullptr. | |
190 assert(isEliminableAddrSpaceCast(NewOperand)); | |
191 Operator *Cast = cast<Operator>(NewOperand); | |
192 | |
193 // Cast = addrspacecast Src | |
194 // BC = bitcast Cast | |
195 // => | |
196 // Cast' = bitcast Src | |
197 // BC' = addrspacecast Cast' | |
198 Value *Src = Cast->getOperand(0); | |
199 Type *TypeOfNewCast = | |
200 PointerType::get(BC->getType()->getPointerElementType(), | |
201 Src->getType()->getPointerAddressSpace()); | |
202 Value *NewBC; | |
203 if (BitCastInst *BCI = dyn_cast<BitCastInst>(BC)) { | |
204 Value *NewCast = new BitCastInst(Src, TypeOfNewCast, "", BCI); | |
205 NewBC = new AddrSpaceCastInst(NewCast, BC->getType(), "", BCI); | |
206 NewBC->takeName(BC); | |
207 // Without RAUWing BC, the compiler would visit BC again and emit | |
208 // redundant instructions. This is exercised in test @rauw in | |
209 // access-non-generic.ll. | |
210 BC->replaceAllUsesWith(NewBC); | |
211 } else { | |
212 // BC is a constant expression. | |
213 Constant *NewCast = | |
214 ConstantExpr::getBitCast(cast<Constant>(Src), TypeOfNewCast); | |
215 NewBC = ConstantExpr::getAddrSpaceCast(NewCast, BC->getType()); | |
216 } | |
217 return NewBC; | |
218 } | |
219 | |
220 Value *NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFrom(Value *V, | |
221 int Depth) { | |
222 // Returns V if V is already an eliminable addrspacecast. | |
223 if (isEliminableAddrSpaceCast(V)) | |
224 return V; | |
225 | |
226 // Limit the depth to prevent this recursive function from running too long. | |
227 const int MaxDepth = 20; | |
228 if (Depth >= MaxDepth) | |
229 return nullptr; | |
230 | |
231 // If V is a GEP or bitcast, hoist the addrspacecast if any from its pointer | |
232 // operand. This enables optimizeMemoryInstruction to shortcut addrspacecasts | |
233 // that are not directly used by the load/store. | |
234 if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) | |
235 return hoistAddrSpaceCastFromGEP(GEP, Depth); | |
236 | |
237 if (BitCastOperator *BC = dyn_cast<BitCastOperator>(V)) | |
238 return hoistAddrSpaceCastFromBitCast(BC, Depth); | |
239 | |
240 return nullptr; | |
144 } | 241 } |
145 | 242 |
146 bool NVPTXFavorNonGenericAddrSpaces::optimizeMemoryInstruction(Instruction *MI, | 243 bool NVPTXFavorNonGenericAddrSpaces::optimizeMemoryInstruction(Instruction *MI, |
147 unsigned Idx) { | 244 unsigned Idx) { |
148 // If the pointer operand is a GEP, hoist the addrspacecast if any from the | 245 Value *NewOperand = hoistAddrSpaceCastFrom(MI->getOperand(Idx)); |
149 // GEP to expose more optimization opportunites. | 246 if (NewOperand == nullptr) |
150 if (GEPOperator *GEP = dyn_cast<GEPOperator>(MI->getOperand(Idx))) { | 247 return false; |
151 hoistAddrSpaceCastFromGEP(GEP); | |
152 } | |
153 | 248 |
154 // load/store (addrspacecast X) => load/store X if shortcutting the | 249 // load/store (addrspacecast X) => load/store X if shortcutting the |
155 // addrspacecast is valid and can improve performance. | 250 // addrspacecast is valid and can improve performance. |
156 // | 251 // |
157 // e.g., | 252 // e.g., |
159 // %2 = load float* %1 | 254 // %2 = load float* %1 |
160 // -> | 255 // -> |
161 // %2 = load float addrspace(3)* %0 | 256 // %2 = load float addrspace(3)* %0 |
162 // | 257 // |
163 // Note: the addrspacecast can also be a constant expression. | 258 // Note: the addrspacecast can also be a constant expression. |
164 if (Operator *Cast = dyn_cast<Operator>(MI->getOperand(Idx))) { | 259 assert(isEliminableAddrSpaceCast(NewOperand)); |
165 if (IsEliminableAddrSpaceCast(Cast)) { | 260 Operator *ASC = dyn_cast<Operator>(NewOperand); |
166 MI->setOperand(Idx, Cast->getOperand(0)); | 261 MI->setOperand(Idx, ASC->getOperand(0)); |
167 return true; | 262 return true; |
168 } | |
169 } | |
170 | |
171 return false; | |
172 } | 263 } |
173 | 264 |
174 bool NVPTXFavorNonGenericAddrSpaces::runOnFunction(Function &F) { | 265 bool NVPTXFavorNonGenericAddrSpaces::runOnFunction(Function &F) { |
175 if (DisableFavorNonGeneric) | 266 if (DisableFavorNonGeneric) |
176 return false; | 267 return false; |