comparison lib/Target/NVPTX/NVPTXTargetMachine.cpp @ 120:1172e4bd9c6f

update 4.0.0
author mir3636
date Fri, 25 Nov 2016 19:14:25 +0900
parents afa8332a0e37
children 803732b1fca8
comparison
equal deleted inserted replaced
101:34baf5011add 120:1172e4bd9c6f
18 #include "NVPTXLowerAggrCopies.h" 18 #include "NVPTXLowerAggrCopies.h"
19 #include "NVPTXTargetObjectFile.h" 19 #include "NVPTXTargetObjectFile.h"
20 #include "NVPTXTargetTransformInfo.h" 20 #include "NVPTXTargetTransformInfo.h"
21 #include "llvm/Analysis/Passes.h" 21 #include "llvm/Analysis/Passes.h"
22 #include "llvm/CodeGen/AsmPrinter.h" 22 #include "llvm/CodeGen/AsmPrinter.h"
23 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
24 #include "llvm/CodeGen/MachineModuleInfo.h" 23 #include "llvm/CodeGen/MachineModuleInfo.h"
25 #include "llvm/CodeGen/Passes.h" 24 #include "llvm/CodeGen/Passes.h"
25 #include "llvm/CodeGen/TargetPassConfig.h"
26 #include "llvm/IR/DataLayout.h" 26 #include "llvm/IR/DataLayout.h"
27 #include "llvm/IR/IRPrintingPasses.h" 27 #include "llvm/IR/IRPrintingPasses.h"
28 #include "llvm/IR/LegacyPassManager.h" 28 #include "llvm/IR/LegacyPassManager.h"
29 #include "llvm/IR/Verifier.h" 29 #include "llvm/IR/Verifier.h"
30 #include "llvm/MC/MCAsmInfo.h" 30 #include "llvm/MC/MCAsmInfo.h"
42 #include "llvm/Target/TargetMachine.h" 42 #include "llvm/Target/TargetMachine.h"
43 #include "llvm/Target/TargetOptions.h" 43 #include "llvm/Target/TargetOptions.h"
44 #include "llvm/Target/TargetRegisterInfo.h" 44 #include "llvm/Target/TargetRegisterInfo.h"
45 #include "llvm/Target/TargetSubtargetInfo.h" 45 #include "llvm/Target/TargetSubtargetInfo.h"
46 #include "llvm/Transforms/Scalar.h" 46 #include "llvm/Transforms/Scalar.h"
47 #include "llvm/Transforms/Scalar/GVN.h"
48 #include "llvm/Transforms/Vectorize.h"
47 49
48 using namespace llvm; 50 using namespace llvm;
49 51
52 // LSV is still relatively new; this switch lets us turn it off in case we
53 // encounter (or suspect) a bug.
54 static cl::opt<bool>
55 DisableLoadStoreVectorizer("disable-nvptx-load-store-vectorizer",
56 cl::desc("Disable load/store vectorizer"),
57 cl::init(false), cl::Hidden);
58
50 namespace llvm { 59 namespace llvm {
60 void initializeNVVMIntrRangePass(PassRegistry&);
51 void initializeNVVMReflectPass(PassRegistry&); 61 void initializeNVVMReflectPass(PassRegistry&);
52 void initializeGenericToNVVMPass(PassRegistry&); 62 void initializeGenericToNVVMPass(PassRegistry&);
53 void initializeNVPTXAllocaHoistingPass(PassRegistry &); 63 void initializeNVPTXAllocaHoistingPass(PassRegistry &);
54 void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&); 64 void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&);
55 void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &); 65 void initializeNVPTXInferAddressSpacesPass(PassRegistry &);
56 void initializeNVPTXLowerAggrCopiesPass(PassRegistry &); 66 void initializeNVPTXLowerAggrCopiesPass(PassRegistry &);
57 void initializeNVPTXLowerKernelArgsPass(PassRegistry &); 67 void initializeNVPTXLowerArgsPass(PassRegistry &);
58 void initializeNVPTXLowerAllocaPass(PassRegistry &); 68 void initializeNVPTXLowerAllocaPass(PassRegistry &);
59 } 69 }
60 70
61 extern "C" void LLVMInitializeNVPTXTarget() { 71 extern "C" void LLVMInitializeNVPTXTarget() {
62 // Register the target. 72 // Register the target.
63 RegisterTargetMachine<NVPTXTargetMachine32> X(TheNVPTXTarget32); 73 RegisterTargetMachine<NVPTXTargetMachine32> X(getTheNVPTXTarget32());
64 RegisterTargetMachine<NVPTXTargetMachine64> Y(TheNVPTXTarget64); 74 RegisterTargetMachine<NVPTXTargetMachine64> Y(getTheNVPTXTarget64());
65 75
66 // FIXME: This pass is really intended to be invoked during IR optimization, 76 // FIXME: This pass is really intended to be invoked during IR optimization,
67 // but it's very NVPTX-specific. 77 // but it's very NVPTX-specific.
68 PassRegistry &PR = *PassRegistry::getPassRegistry(); 78 PassRegistry &PR = *PassRegistry::getPassRegistry();
69 initializeNVVMReflectPass(PR); 79 initializeNVVMReflectPass(PR);
80 initializeNVVMIntrRangePass(PR);
70 initializeGenericToNVVMPass(PR); 81 initializeGenericToNVVMPass(PR);
71 initializeNVPTXAllocaHoistingPass(PR); 82 initializeNVPTXAllocaHoistingPass(PR);
72 initializeNVPTXAssignValidGlobalNamesPass(PR); 83 initializeNVPTXAssignValidGlobalNamesPass(PR);
73 initializeNVPTXFavorNonGenericAddrSpacesPass(PR); 84 initializeNVPTXInferAddressSpacesPass(PR);
74 initializeNVPTXLowerKernelArgsPass(PR); 85 initializeNVPTXLowerArgsPass(PR);
75 initializeNVPTXLowerAllocaPass(PR); 86 initializeNVPTXLowerAllocaPass(PR);
76 initializeNVPTXLowerAggrCopiesPass(PR); 87 initializeNVPTXLowerAggrCopiesPass(PR);
77 } 88 }
78 89
79 static std::string computeDataLayout(bool is64Bit) { 90 static std::string computeDataLayout(bool is64Bit) {
88 } 99 }
89 100
90 NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, const Triple &TT, 101 NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, const Triple &TT,
91 StringRef CPU, StringRef FS, 102 StringRef CPU, StringRef FS,
92 const TargetOptions &Options, 103 const TargetOptions &Options,
93 Reloc::Model RM, CodeModel::Model CM, 104 Optional<Reloc::Model> RM,
105 CodeModel::Model CM,
94 CodeGenOpt::Level OL, bool is64bit) 106 CodeGenOpt::Level OL, bool is64bit)
95 : LLVMTargetMachine(T, computeDataLayout(is64bit), TT, CPU, FS, Options, RM, 107 // The pic relocation model is used regardless of what the client has
96 CM, OL), 108 // specified, as it is the only relocation model currently supported.
97 is64bit(is64bit), TLOF(make_unique<NVPTXTargetObjectFile>()), 109 : LLVMTargetMachine(T, computeDataLayout(is64bit), TT, CPU, FS, Options,
110 Reloc::PIC_, CM, OL),
111 is64bit(is64bit),
112 TLOF(make_unique<NVPTXTargetObjectFile>()),
98 Subtarget(TT, CPU, FS, *this) { 113 Subtarget(TT, CPU, FS, *this) {
99 if (TT.getOS() == Triple::NVCL) 114 if (TT.getOS() == Triple::NVCL)
100 drvInterface = NVPTX::NVCL; 115 drvInterface = NVPTX::NVCL;
101 else 116 else
102 drvInterface = NVPTX::CUDA; 117 drvInterface = NVPTX::CUDA;
108 void NVPTXTargetMachine32::anchor() {} 123 void NVPTXTargetMachine32::anchor() {}
109 124
110 NVPTXTargetMachine32::NVPTXTargetMachine32(const Target &T, const Triple &TT, 125 NVPTXTargetMachine32::NVPTXTargetMachine32(const Target &T, const Triple &TT,
111 StringRef CPU, StringRef FS, 126 StringRef CPU, StringRef FS,
112 const TargetOptions &Options, 127 const TargetOptions &Options,
113 Reloc::Model RM, CodeModel::Model CM, 128 Optional<Reloc::Model> RM,
129 CodeModel::Model CM,
114 CodeGenOpt::Level OL) 130 CodeGenOpt::Level OL)
115 : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {} 131 : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
116 132
117 void NVPTXTargetMachine64::anchor() {} 133 void NVPTXTargetMachine64::anchor() {}
118 134
119 NVPTXTargetMachine64::NVPTXTargetMachine64(const Target &T, const Triple &TT, 135 NVPTXTargetMachine64::NVPTXTargetMachine64(const Target &T, const Triple &TT,
120 StringRef CPU, StringRef FS, 136 StringRef CPU, StringRef FS,
121 const TargetOptions &Options, 137 const TargetOptions &Options,
122 Reloc::Model RM, CodeModel::Model CM, 138 Optional<Reloc::Model> RM,
139 CodeModel::Model CM,
123 CodeGenOpt::Level OL) 140 CodeGenOpt::Level OL)
124 : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {} 141 : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
125 142
126 namespace { 143 namespace {
127 class NVPTXPassConfig : public TargetPassConfig { 144 class NVPTXPassConfig : public TargetPassConfig {
141 FunctionPass *createTargetRegisterAllocator(bool) override; 158 FunctionPass *createTargetRegisterAllocator(bool) override;
142 void addFastRegAlloc(FunctionPass *RegAllocPass) override; 159 void addFastRegAlloc(FunctionPass *RegAllocPass) override;
143 void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override; 160 void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override;
144 161
145 private: 162 private:
146 // if the opt level is aggressive, add GVN; otherwise, add EarlyCSE. 163 // If the opt level is aggressive, add GVN; otherwise, add EarlyCSE. This
164 // function is only called in opt mode.
147 void addEarlyCSEOrGVNPass(); 165 void addEarlyCSEOrGVNPass();
166
167 // Add passes that propagate special memory spaces.
168 void addAddressSpaceInferencePasses();
169
170 // Add passes that perform straight-line scalar optimizations.
171 void addStraightLineScalarOptimizationPasses();
148 }; 172 };
149 } // end anonymous namespace 173 } // end anonymous namespace
150 174
151 TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) { 175 TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) {
152 NVPTXPassConfig *PassConfig = new NVPTXPassConfig(this, PM); 176 return new NVPTXPassConfig(this, PM);
153 return PassConfig; 177 }
178
179 void NVPTXTargetMachine::addEarlyAsPossiblePasses(PassManagerBase &PM) {
180 PM.add(createNVVMReflectPass());
181 PM.add(createNVVMIntrRangePass(Subtarget.getSmVersion()));
154 } 182 }
155 183
156 TargetIRAnalysis NVPTXTargetMachine::getTargetIRAnalysis() { 184 TargetIRAnalysis NVPTXTargetMachine::getTargetIRAnalysis() {
157 return TargetIRAnalysis([this](const Function &F) { 185 return TargetIRAnalysis([this](const Function &F) {
158 return TargetTransformInfo(NVPTXTTIImpl(this, F)); 186 return TargetTransformInfo(NVPTXTTIImpl(this, F));
164 addPass(createGVNPass()); 192 addPass(createGVNPass());
165 else 193 else
166 addPass(createEarlyCSEPass()); 194 addPass(createEarlyCSEPass());
167 } 195 }
168 196
169 void NVPTXPassConfig::addIRPasses() { 197 void NVPTXPassConfig::addAddressSpaceInferencePasses() {
170 // The following passes are known to not play well with virtual regs hanging 198 // NVPTXLowerArgs emits alloca for byval parameters which can often
171 // around after register allocation (which in our case, is *all* registers).
172 // We explicitly disable them here. We do, however, need some functionality
173 // of the PrologEpilogCodeInserter pass, so we emulate that behavior in the
174 // NVPTXPrologEpilog pass (see NVPTXPrologEpilogPass.cpp).
175 disablePass(&PrologEpilogCodeInserterID);
176 disablePass(&MachineCopyPropagationID);
177 disablePass(&TailDuplicateID);
178
179 addPass(createNVVMReflectPass());
180 addPass(createNVPTXImageOptimizerPass());
181 addPass(createNVPTXAssignValidGlobalNamesPass());
182 addPass(createGenericToNVVMPass());
183
184 // === Propagate special address spaces ===
185 addPass(createNVPTXLowerKernelArgsPass(&getNVPTXTargetMachine()));
186 // NVPTXLowerKernelArgs emits alloca for byval parameters which can often
187 // be eliminated by SROA. 199 // be eliminated by SROA.
188 addPass(createSROAPass()); 200 addPass(createSROAPass());
189 addPass(createNVPTXLowerAllocaPass()); 201 addPass(createNVPTXLowerAllocaPass());
190 addPass(createNVPTXFavorNonGenericAddrSpacesPass()); 202 addPass(createNVPTXInferAddressSpacesPass());
191 // FavorNonGenericAddrSpaces shortcuts unnecessary addrspacecasts, and leave 203 }
192 // them unused. We could remove dead code in an ad-hoc manner, but that 204
193 // requires manual work and might be error-prone. 205 void NVPTXPassConfig::addStraightLineScalarOptimizationPasses() {
194 addPass(createDeadCodeEliminationPass());
195
196 // === Straight-line scalar optimizations ===
197 addPass(createSeparateConstOffsetFromGEPPass()); 206 addPass(createSeparateConstOffsetFromGEPPass());
198 addPass(createSpeculativeExecutionPass()); 207 addPass(createSpeculativeExecutionPass());
199 // ReassociateGEPs exposes more opportunites for SLSR. See 208 // ReassociateGEPs exposes more opportunites for SLSR. See
200 // the example in reassociate-geps-and-slsr.ll. 209 // the example in reassociate-geps-and-slsr.ll.
201 addPass(createStraightLineStrengthReducePass()); 210 addPass(createStraightLineStrengthReducePass());
206 // Run NaryReassociate after EarlyCSE/GVN to be more effective. 215 // Run NaryReassociate after EarlyCSE/GVN to be more effective.
207 addPass(createNaryReassociatePass()); 216 addPass(createNaryReassociatePass());
208 // NaryReassociate on GEPs creates redundant common expressions, so run 217 // NaryReassociate on GEPs creates redundant common expressions, so run
209 // EarlyCSE after it. 218 // EarlyCSE after it.
210 addPass(createEarlyCSEPass()); 219 addPass(createEarlyCSEPass());
220 }
221
222 void NVPTXPassConfig::addIRPasses() {
223 // The following passes are known to not play well with virtual regs hanging
224 // around after register allocation (which in our case, is *all* registers).
225 // We explicitly disable them here. We do, however, need some functionality
226 // of the PrologEpilogCodeInserter pass, so we emulate that behavior in the
227 // NVPTXPrologEpilog pass (see NVPTXPrologEpilogPass.cpp).
228 disablePass(&PrologEpilogCodeInserterID);
229 disablePass(&MachineCopyPropagationID);
230 disablePass(&TailDuplicateID);
231 disablePass(&StackMapLivenessID);
232 disablePass(&LiveDebugValuesID);
233 disablePass(&PostRASchedulerID);
234 disablePass(&FuncletLayoutID);
235 disablePass(&PatchableFunctionID);
236
237 // NVVMReflectPass is added in addEarlyAsPossiblePasses, so hopefully running
238 // it here does nothing. But since we need it for correctness when lowering
239 // to NVPTX, run it here too, in case whoever built our pass pipeline didn't
240 // call addEarlyAsPossiblePasses.
241 addPass(createNVVMReflectPass());
242
243 if (getOptLevel() != CodeGenOpt::None)
244 addPass(createNVPTXImageOptimizerPass());
245 addPass(createNVPTXAssignValidGlobalNamesPass());
246 addPass(createGenericToNVVMPass());
247
248 // NVPTXLowerArgs is required for correctness and should be run right
249 // before the address space inference passes.
250 addPass(createNVPTXLowerArgsPass(&getNVPTXTargetMachine()));
251 if (getOptLevel() != CodeGenOpt::None) {
252 addAddressSpaceInferencePasses();
253 if (!DisableLoadStoreVectorizer)
254 addPass(createLoadStoreVectorizerPass());
255 addStraightLineScalarOptimizationPasses();
256 }
211 257
212 // === LSR and other generic IR passes === 258 // === LSR and other generic IR passes ===
213 TargetPassConfig::addIRPasses(); 259 TargetPassConfig::addIRPasses();
214 // EarlyCSE is not always strong enough to clean up what LSR produces. For 260 // EarlyCSE is not always strong enough to clean up what LSR produces. For
215 // example, GVN can combine 261 // example, GVN can combine
221 // 267 //
222 // %0 = shl nsw %a, 2 268 // %0 = shl nsw %a, 2
223 // %1 = shl %a, 2 269 // %1 = shl %a, 2
224 // 270 //
225 // but EarlyCSE can do neither of them. 271 // but EarlyCSE can do neither of them.
226 addEarlyCSEOrGVNPass(); 272 if (getOptLevel() != CodeGenOpt::None)
273 addEarlyCSEOrGVNPass();
227 } 274 }
228 275
229 bool NVPTXPassConfig::addInstSelector() { 276 bool NVPTXPassConfig::addInstSelector() {
230 const NVPTXSubtarget &ST = *getTM<NVPTXTargetMachine>().getSubtargetImpl(); 277 const NVPTXSubtarget &ST = *getTM<NVPTXTargetMachine>().getSubtargetImpl();
231 278
239 return false; 286 return false;
240 } 287 }
241 288
242 void NVPTXPassConfig::addPostRegAlloc() { 289 void NVPTXPassConfig::addPostRegAlloc() {
243 addPass(createNVPTXPrologEpilogPass(), false); 290 addPass(createNVPTXPrologEpilogPass(), false);
244 // NVPTXPrologEpilogPass calculates frame object offset and replace frame 291 if (getOptLevel() != CodeGenOpt::None) {
245 // index with VRFrame register. NVPTXPeephole need to be run after that and 292 // NVPTXPrologEpilogPass calculates frame object offset and replace frame
246 // will replace VRFrame with VRFrameLocal when possible. 293 // index with VRFrame register. NVPTXPeephole need to be run after that and
247 addPass(createNVPTXPeephole()); 294 // will replace VRFrame with VRFrameLocal when possible.
295 addPass(createNVPTXPeephole());
296 }
248 } 297 }
249 298
250 FunctionPass *NVPTXPassConfig::createTargetRegisterAllocator(bool) { 299 FunctionPass *NVPTXPassConfig::createTargetRegisterAllocator(bool) {
251 return nullptr; // No reg alloc 300 return nullptr; // No reg alloc
252 } 301 }