Mercurial > hg > CbC > CbC_llvm
comparison lib/Target/NVPTX/NVPTXTargetMachine.cpp @ 120:1172e4bd9c6f
update 4.0.0
author | mir3636 |
---|---|
date | Fri, 25 Nov 2016 19:14:25 +0900 |
parents | afa8332a0e37 |
children | 803732b1fca8 |
comparison
equal
deleted
inserted
replaced
101:34baf5011add | 120:1172e4bd9c6f |
---|---|
18 #include "NVPTXLowerAggrCopies.h" | 18 #include "NVPTXLowerAggrCopies.h" |
19 #include "NVPTXTargetObjectFile.h" | 19 #include "NVPTXTargetObjectFile.h" |
20 #include "NVPTXTargetTransformInfo.h" | 20 #include "NVPTXTargetTransformInfo.h" |
21 #include "llvm/Analysis/Passes.h" | 21 #include "llvm/Analysis/Passes.h" |
22 #include "llvm/CodeGen/AsmPrinter.h" | 22 #include "llvm/CodeGen/AsmPrinter.h" |
23 #include "llvm/CodeGen/MachineFunctionAnalysis.h" | |
24 #include "llvm/CodeGen/MachineModuleInfo.h" | 23 #include "llvm/CodeGen/MachineModuleInfo.h" |
25 #include "llvm/CodeGen/Passes.h" | 24 #include "llvm/CodeGen/Passes.h" |
25 #include "llvm/CodeGen/TargetPassConfig.h" | |
26 #include "llvm/IR/DataLayout.h" | 26 #include "llvm/IR/DataLayout.h" |
27 #include "llvm/IR/IRPrintingPasses.h" | 27 #include "llvm/IR/IRPrintingPasses.h" |
28 #include "llvm/IR/LegacyPassManager.h" | 28 #include "llvm/IR/LegacyPassManager.h" |
29 #include "llvm/IR/Verifier.h" | 29 #include "llvm/IR/Verifier.h" |
30 #include "llvm/MC/MCAsmInfo.h" | 30 #include "llvm/MC/MCAsmInfo.h" |
42 #include "llvm/Target/TargetMachine.h" | 42 #include "llvm/Target/TargetMachine.h" |
43 #include "llvm/Target/TargetOptions.h" | 43 #include "llvm/Target/TargetOptions.h" |
44 #include "llvm/Target/TargetRegisterInfo.h" | 44 #include "llvm/Target/TargetRegisterInfo.h" |
45 #include "llvm/Target/TargetSubtargetInfo.h" | 45 #include "llvm/Target/TargetSubtargetInfo.h" |
46 #include "llvm/Transforms/Scalar.h" | 46 #include "llvm/Transforms/Scalar.h" |
47 #include "llvm/Transforms/Scalar/GVN.h" | |
48 #include "llvm/Transforms/Vectorize.h" | |
47 | 49 |
48 using namespace llvm; | 50 using namespace llvm; |
49 | 51 |
52 // LSV is still relatively new; this switch lets us turn it off in case we | |
53 // encounter (or suspect) a bug. | |
54 static cl::opt<bool> | |
55 DisableLoadStoreVectorizer("disable-nvptx-load-store-vectorizer", | |
56 cl::desc("Disable load/store vectorizer"), | |
57 cl::init(false), cl::Hidden); | |
58 | |
50 namespace llvm { | 59 namespace llvm { |
60 void initializeNVVMIntrRangePass(PassRegistry&); | |
51 void initializeNVVMReflectPass(PassRegistry&); | 61 void initializeNVVMReflectPass(PassRegistry&); |
52 void initializeGenericToNVVMPass(PassRegistry&); | 62 void initializeGenericToNVVMPass(PassRegistry&); |
53 void initializeNVPTXAllocaHoistingPass(PassRegistry &); | 63 void initializeNVPTXAllocaHoistingPass(PassRegistry &); |
54 void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&); | 64 void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&); |
55 void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &); | 65 void initializeNVPTXInferAddressSpacesPass(PassRegistry &); |
56 void initializeNVPTXLowerAggrCopiesPass(PassRegistry &); | 66 void initializeNVPTXLowerAggrCopiesPass(PassRegistry &); |
57 void initializeNVPTXLowerKernelArgsPass(PassRegistry &); | 67 void initializeNVPTXLowerArgsPass(PassRegistry &); |
58 void initializeNVPTXLowerAllocaPass(PassRegistry &); | 68 void initializeNVPTXLowerAllocaPass(PassRegistry &); |
59 } | 69 } |
60 | 70 |
61 extern "C" void LLVMInitializeNVPTXTarget() { | 71 extern "C" void LLVMInitializeNVPTXTarget() { |
62 // Register the target. | 72 // Register the target. |
63 RegisterTargetMachine<NVPTXTargetMachine32> X(TheNVPTXTarget32); | 73 RegisterTargetMachine<NVPTXTargetMachine32> X(getTheNVPTXTarget32()); |
64 RegisterTargetMachine<NVPTXTargetMachine64> Y(TheNVPTXTarget64); | 74 RegisterTargetMachine<NVPTXTargetMachine64> Y(getTheNVPTXTarget64()); |
65 | 75 |
66 // FIXME: This pass is really intended to be invoked during IR optimization, | 76 // FIXME: This pass is really intended to be invoked during IR optimization, |
67 // but it's very NVPTX-specific. | 77 // but it's very NVPTX-specific. |
68 PassRegistry &PR = *PassRegistry::getPassRegistry(); | 78 PassRegistry &PR = *PassRegistry::getPassRegistry(); |
69 initializeNVVMReflectPass(PR); | 79 initializeNVVMReflectPass(PR); |
80 initializeNVVMIntrRangePass(PR); | |
70 initializeGenericToNVVMPass(PR); | 81 initializeGenericToNVVMPass(PR); |
71 initializeNVPTXAllocaHoistingPass(PR); | 82 initializeNVPTXAllocaHoistingPass(PR); |
72 initializeNVPTXAssignValidGlobalNamesPass(PR); | 83 initializeNVPTXAssignValidGlobalNamesPass(PR); |
73 initializeNVPTXFavorNonGenericAddrSpacesPass(PR); | 84 initializeNVPTXInferAddressSpacesPass(PR); |
74 initializeNVPTXLowerKernelArgsPass(PR); | 85 initializeNVPTXLowerArgsPass(PR); |
75 initializeNVPTXLowerAllocaPass(PR); | 86 initializeNVPTXLowerAllocaPass(PR); |
76 initializeNVPTXLowerAggrCopiesPass(PR); | 87 initializeNVPTXLowerAggrCopiesPass(PR); |
77 } | 88 } |
78 | 89 |
79 static std::string computeDataLayout(bool is64Bit) { | 90 static std::string computeDataLayout(bool is64Bit) { |
88 } | 99 } |
89 | 100 |
90 NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, const Triple &TT, | 101 NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, const Triple &TT, |
91 StringRef CPU, StringRef FS, | 102 StringRef CPU, StringRef FS, |
92 const TargetOptions &Options, | 103 const TargetOptions &Options, |
93 Reloc::Model RM, CodeModel::Model CM, | 104 Optional<Reloc::Model> RM, |
105 CodeModel::Model CM, | |
94 CodeGenOpt::Level OL, bool is64bit) | 106 CodeGenOpt::Level OL, bool is64bit) |
95 : LLVMTargetMachine(T, computeDataLayout(is64bit), TT, CPU, FS, Options, RM, | 107 // The pic relocation model is used regardless of what the client has |
96 CM, OL), | 108 // specified, as it is the only relocation model currently supported. |
97 is64bit(is64bit), TLOF(make_unique<NVPTXTargetObjectFile>()), | 109 : LLVMTargetMachine(T, computeDataLayout(is64bit), TT, CPU, FS, Options, |
110 Reloc::PIC_, CM, OL), | |
111 is64bit(is64bit), | |
112 TLOF(make_unique<NVPTXTargetObjectFile>()), | |
98 Subtarget(TT, CPU, FS, *this) { | 113 Subtarget(TT, CPU, FS, *this) { |
99 if (TT.getOS() == Triple::NVCL) | 114 if (TT.getOS() == Triple::NVCL) |
100 drvInterface = NVPTX::NVCL; | 115 drvInterface = NVPTX::NVCL; |
101 else | 116 else |
102 drvInterface = NVPTX::CUDA; | 117 drvInterface = NVPTX::CUDA; |
108 void NVPTXTargetMachine32::anchor() {} | 123 void NVPTXTargetMachine32::anchor() {} |
109 | 124 |
110 NVPTXTargetMachine32::NVPTXTargetMachine32(const Target &T, const Triple &TT, | 125 NVPTXTargetMachine32::NVPTXTargetMachine32(const Target &T, const Triple &TT, |
111 StringRef CPU, StringRef FS, | 126 StringRef CPU, StringRef FS, |
112 const TargetOptions &Options, | 127 const TargetOptions &Options, |
113 Reloc::Model RM, CodeModel::Model CM, | 128 Optional<Reloc::Model> RM, |
129 CodeModel::Model CM, | |
114 CodeGenOpt::Level OL) | 130 CodeGenOpt::Level OL) |
115 : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {} | 131 : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {} |
116 | 132 |
117 void NVPTXTargetMachine64::anchor() {} | 133 void NVPTXTargetMachine64::anchor() {} |
118 | 134 |
119 NVPTXTargetMachine64::NVPTXTargetMachine64(const Target &T, const Triple &TT, | 135 NVPTXTargetMachine64::NVPTXTargetMachine64(const Target &T, const Triple &TT, |
120 StringRef CPU, StringRef FS, | 136 StringRef CPU, StringRef FS, |
121 const TargetOptions &Options, | 137 const TargetOptions &Options, |
122 Reloc::Model RM, CodeModel::Model CM, | 138 Optional<Reloc::Model> RM, |
139 CodeModel::Model CM, | |
123 CodeGenOpt::Level OL) | 140 CodeGenOpt::Level OL) |
124 : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {} | 141 : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {} |
125 | 142 |
126 namespace { | 143 namespace { |
127 class NVPTXPassConfig : public TargetPassConfig { | 144 class NVPTXPassConfig : public TargetPassConfig { |
141 FunctionPass *createTargetRegisterAllocator(bool) override; | 158 FunctionPass *createTargetRegisterAllocator(bool) override; |
142 void addFastRegAlloc(FunctionPass *RegAllocPass) override; | 159 void addFastRegAlloc(FunctionPass *RegAllocPass) override; |
143 void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override; | 160 void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override; |
144 | 161 |
145 private: | 162 private: |
146 // if the opt level is aggressive, add GVN; otherwise, add EarlyCSE. | 163 // If the opt level is aggressive, add GVN; otherwise, add EarlyCSE. This |
164 // function is only called in opt mode. | |
147 void addEarlyCSEOrGVNPass(); | 165 void addEarlyCSEOrGVNPass(); |
166 | |
167 // Add passes that propagate special memory spaces. | |
168 void addAddressSpaceInferencePasses(); | |
169 | |
170 // Add passes that perform straight-line scalar optimizations. | |
171 void addStraightLineScalarOptimizationPasses(); | |
148 }; | 172 }; |
149 } // end anonymous namespace | 173 } // end anonymous namespace |
150 | 174 |
151 TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) { | 175 TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) { |
152 NVPTXPassConfig *PassConfig = new NVPTXPassConfig(this, PM); | 176 return new NVPTXPassConfig(this, PM); |
153 return PassConfig; | 177 } |
178 | |
179 void NVPTXTargetMachine::addEarlyAsPossiblePasses(PassManagerBase &PM) { | |
180 PM.add(createNVVMReflectPass()); | |
181 PM.add(createNVVMIntrRangePass(Subtarget.getSmVersion())); | |
154 } | 182 } |
155 | 183 |
156 TargetIRAnalysis NVPTXTargetMachine::getTargetIRAnalysis() { | 184 TargetIRAnalysis NVPTXTargetMachine::getTargetIRAnalysis() { |
157 return TargetIRAnalysis([this](const Function &F) { | 185 return TargetIRAnalysis([this](const Function &F) { |
158 return TargetTransformInfo(NVPTXTTIImpl(this, F)); | 186 return TargetTransformInfo(NVPTXTTIImpl(this, F)); |
164 addPass(createGVNPass()); | 192 addPass(createGVNPass()); |
165 else | 193 else |
166 addPass(createEarlyCSEPass()); | 194 addPass(createEarlyCSEPass()); |
167 } | 195 } |
168 | 196 |
169 void NVPTXPassConfig::addIRPasses() { | 197 void NVPTXPassConfig::addAddressSpaceInferencePasses() { |
170 // The following passes are known to not play well with virtual regs hanging | 198 // NVPTXLowerArgs emits alloca for byval parameters which can often |
171 // around after register allocation (which in our case, is *all* registers). | |
172 // We explicitly disable them here. We do, however, need some functionality | |
173 // of the PrologEpilogCodeInserter pass, so we emulate that behavior in the | |
174 // NVPTXPrologEpilog pass (see NVPTXPrologEpilogPass.cpp). | |
175 disablePass(&PrologEpilogCodeInserterID); | |
176 disablePass(&MachineCopyPropagationID); | |
177 disablePass(&TailDuplicateID); | |
178 | |
179 addPass(createNVVMReflectPass()); | |
180 addPass(createNVPTXImageOptimizerPass()); | |
181 addPass(createNVPTXAssignValidGlobalNamesPass()); | |
182 addPass(createGenericToNVVMPass()); | |
183 | |
184 // === Propagate special address spaces === | |
185 addPass(createNVPTXLowerKernelArgsPass(&getNVPTXTargetMachine())); | |
186 // NVPTXLowerKernelArgs emits alloca for byval parameters which can often | |
187 // be eliminated by SROA. | 199 // be eliminated by SROA. |
188 addPass(createSROAPass()); | 200 addPass(createSROAPass()); |
189 addPass(createNVPTXLowerAllocaPass()); | 201 addPass(createNVPTXLowerAllocaPass()); |
190 addPass(createNVPTXFavorNonGenericAddrSpacesPass()); | 202 addPass(createNVPTXInferAddressSpacesPass()); |
191 // FavorNonGenericAddrSpaces shortcuts unnecessary addrspacecasts, and leave | 203 } |
192 // them unused. We could remove dead code in an ad-hoc manner, but that | 204 |
193 // requires manual work and might be error-prone. | 205 void NVPTXPassConfig::addStraightLineScalarOptimizationPasses() { |
194 addPass(createDeadCodeEliminationPass()); | |
195 | |
196 // === Straight-line scalar optimizations === | |
197 addPass(createSeparateConstOffsetFromGEPPass()); | 206 addPass(createSeparateConstOffsetFromGEPPass()); |
198 addPass(createSpeculativeExecutionPass()); | 207 addPass(createSpeculativeExecutionPass()); |
199 // ReassociateGEPs exposes more opportunites for SLSR. See | 208 // ReassociateGEPs exposes more opportunites for SLSR. See |
200 // the example in reassociate-geps-and-slsr.ll. | 209 // the example in reassociate-geps-and-slsr.ll. |
201 addPass(createStraightLineStrengthReducePass()); | 210 addPass(createStraightLineStrengthReducePass()); |
206 // Run NaryReassociate after EarlyCSE/GVN to be more effective. | 215 // Run NaryReassociate after EarlyCSE/GVN to be more effective. |
207 addPass(createNaryReassociatePass()); | 216 addPass(createNaryReassociatePass()); |
208 // NaryReassociate on GEPs creates redundant common expressions, so run | 217 // NaryReassociate on GEPs creates redundant common expressions, so run |
209 // EarlyCSE after it. | 218 // EarlyCSE after it. |
210 addPass(createEarlyCSEPass()); | 219 addPass(createEarlyCSEPass()); |
220 } | |
221 | |
222 void NVPTXPassConfig::addIRPasses() { | |
223 // The following passes are known to not play well with virtual regs hanging | |
224 // around after register allocation (which in our case, is *all* registers). | |
225 // We explicitly disable them here. We do, however, need some functionality | |
226 // of the PrologEpilogCodeInserter pass, so we emulate that behavior in the | |
227 // NVPTXPrologEpilog pass (see NVPTXPrologEpilogPass.cpp). | |
228 disablePass(&PrologEpilogCodeInserterID); | |
229 disablePass(&MachineCopyPropagationID); | |
230 disablePass(&TailDuplicateID); | |
231 disablePass(&StackMapLivenessID); | |
232 disablePass(&LiveDebugValuesID); | |
233 disablePass(&PostRASchedulerID); | |
234 disablePass(&FuncletLayoutID); | |
235 disablePass(&PatchableFunctionID); | |
236 | |
237 // NVVMReflectPass is added in addEarlyAsPossiblePasses, so hopefully running | |
238 // it here does nothing. But since we need it for correctness when lowering | |
239 // to NVPTX, run it here too, in case whoever built our pass pipeline didn't | |
240 // call addEarlyAsPossiblePasses. | |
241 addPass(createNVVMReflectPass()); | |
242 | |
243 if (getOptLevel() != CodeGenOpt::None) | |
244 addPass(createNVPTXImageOptimizerPass()); | |
245 addPass(createNVPTXAssignValidGlobalNamesPass()); | |
246 addPass(createGenericToNVVMPass()); | |
247 | |
248 // NVPTXLowerArgs is required for correctness and should be run right | |
249 // before the address space inference passes. | |
250 addPass(createNVPTXLowerArgsPass(&getNVPTXTargetMachine())); | |
251 if (getOptLevel() != CodeGenOpt::None) { | |
252 addAddressSpaceInferencePasses(); | |
253 if (!DisableLoadStoreVectorizer) | |
254 addPass(createLoadStoreVectorizerPass()); | |
255 addStraightLineScalarOptimizationPasses(); | |
256 } | |
211 | 257 |
212 // === LSR and other generic IR passes === | 258 // === LSR and other generic IR passes === |
213 TargetPassConfig::addIRPasses(); | 259 TargetPassConfig::addIRPasses(); |
214 // EarlyCSE is not always strong enough to clean up what LSR produces. For | 260 // EarlyCSE is not always strong enough to clean up what LSR produces. For |
215 // example, GVN can combine | 261 // example, GVN can combine |
221 // | 267 // |
222 // %0 = shl nsw %a, 2 | 268 // %0 = shl nsw %a, 2 |
223 // %1 = shl %a, 2 | 269 // %1 = shl %a, 2 |
224 // | 270 // |
225 // but EarlyCSE can do neither of them. | 271 // but EarlyCSE can do neither of them. |
226 addEarlyCSEOrGVNPass(); | 272 if (getOptLevel() != CodeGenOpt::None) |
273 addEarlyCSEOrGVNPass(); | |
227 } | 274 } |
228 | 275 |
229 bool NVPTXPassConfig::addInstSelector() { | 276 bool NVPTXPassConfig::addInstSelector() { |
230 const NVPTXSubtarget &ST = *getTM<NVPTXTargetMachine>().getSubtargetImpl(); | 277 const NVPTXSubtarget &ST = *getTM<NVPTXTargetMachine>().getSubtargetImpl(); |
231 | 278 |
239 return false; | 286 return false; |
240 } | 287 } |
241 | 288 |
242 void NVPTXPassConfig::addPostRegAlloc() { | 289 void NVPTXPassConfig::addPostRegAlloc() { |
243 addPass(createNVPTXPrologEpilogPass(), false); | 290 addPass(createNVPTXPrologEpilogPass(), false); |
244 // NVPTXPrologEpilogPass calculates frame object offset and replace frame | 291 if (getOptLevel() != CodeGenOpt::None) { |
245 // index with VRFrame register. NVPTXPeephole need to be run after that and | 292 // NVPTXPrologEpilogPass calculates frame object offset and replace frame |
246 // will replace VRFrame with VRFrameLocal when possible. | 293 // index with VRFrame register. NVPTXPeephole need to be run after that and |
247 addPass(createNVPTXPeephole()); | 294 // will replace VRFrame with VRFrameLocal when possible. |
295 addPass(createNVPTXPeephole()); | |
296 } | |
248 } | 297 } |
249 | 298 |
250 FunctionPass *NVPTXPassConfig::createTargetRegisterAllocator(bool) { | 299 FunctionPass *NVPTXPassConfig::createTargetRegisterAllocator(bool) { |
251 return nullptr; // No reg alloc | 300 return nullptr; // No reg alloc |
252 } | 301 } |