Mercurial > hg > CbC > CbC_llvm
comparison lib/Target/NVPTX/NVPTXTargetMachine.cpp @ 95:afa8332a0e37 LLVM3.8
LLVM 3.8
author | Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 13 Oct 2015 17:48:58 +0900 |
parents | 60c9769439b8 |
children | 1172e4bd9c6f |
comparison
equal
deleted
inserted
replaced
84:f3e34b893a5f | 95:afa8332a0e37 |
---|---|
48 using namespace llvm; | 48 using namespace llvm; |
49 | 49 |
50 namespace llvm { | 50 namespace llvm { |
51 void initializeNVVMReflectPass(PassRegistry&); | 51 void initializeNVVMReflectPass(PassRegistry&); |
52 void initializeGenericToNVVMPass(PassRegistry&); | 52 void initializeGenericToNVVMPass(PassRegistry&); |
53 void initializeNVPTXAllocaHoistingPass(PassRegistry &); | |
53 void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&); | 54 void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&); |
54 void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &); | 55 void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &); |
55 void initializeNVPTXLowerStructArgsPass(PassRegistry &); | 56 void initializeNVPTXLowerAggrCopiesPass(PassRegistry &); |
57 void initializeNVPTXLowerKernelArgsPass(PassRegistry &); | |
58 void initializeNVPTXLowerAllocaPass(PassRegistry &); | |
56 } | 59 } |
57 | 60 |
58 extern "C" void LLVMInitializeNVPTXTarget() { | 61 extern "C" void LLVMInitializeNVPTXTarget() { |
59 // Register the target. | 62 // Register the target. |
60 RegisterTargetMachine<NVPTXTargetMachine32> X(TheNVPTXTarget32); | 63 RegisterTargetMachine<NVPTXTargetMachine32> X(TheNVPTXTarget32); |
61 RegisterTargetMachine<NVPTXTargetMachine64> Y(TheNVPTXTarget64); | 64 RegisterTargetMachine<NVPTXTargetMachine64> Y(TheNVPTXTarget64); |
62 | 65 |
63 // FIXME: This pass is really intended to be invoked during IR optimization, | 66 // FIXME: This pass is really intended to be invoked during IR optimization, |
64 // but it's very NVPTX-specific. | 67 // but it's very NVPTX-specific. |
65 initializeNVVMReflectPass(*PassRegistry::getPassRegistry()); | 68 PassRegistry &PR = *PassRegistry::getPassRegistry(); |
66 initializeGenericToNVVMPass(*PassRegistry::getPassRegistry()); | 69 initializeNVVMReflectPass(PR); |
67 initializeNVPTXAssignValidGlobalNamesPass(*PassRegistry::getPassRegistry()); | 70 initializeGenericToNVVMPass(PR); |
68 initializeNVPTXFavorNonGenericAddrSpacesPass( | 71 initializeNVPTXAllocaHoistingPass(PR); |
69 *PassRegistry::getPassRegistry()); | 72 initializeNVPTXAssignValidGlobalNamesPass(PR); |
70 initializeNVPTXLowerStructArgsPass(*PassRegistry::getPassRegistry()); | 73 initializeNVPTXFavorNonGenericAddrSpacesPass(PR); |
74 initializeNVPTXLowerKernelArgsPass(PR); | |
75 initializeNVPTXLowerAllocaPass(PR); | |
76 initializeNVPTXLowerAggrCopiesPass(PR); | |
71 } | 77 } |
72 | 78 |
73 static std::string computeDataLayout(bool is64Bit) { | 79 static std::string computeDataLayout(bool is64Bit) { |
74 std::string Ret = "e"; | 80 std::string Ret = "e"; |
75 | 81 |
79 Ret += "-i64:64-v16:16-v32:32-n16:32:64"; | 85 Ret += "-i64:64-v16:16-v32:32-n16:32:64"; |
80 | 86 |
81 return Ret; | 87 return Ret; |
82 } | 88 } |
83 | 89 |
84 NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, StringRef TT, | 90 NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, const Triple &TT, |
85 StringRef CPU, StringRef FS, | 91 StringRef CPU, StringRef FS, |
86 const TargetOptions &Options, | 92 const TargetOptions &Options, |
87 Reloc::Model RM, CodeModel::Model CM, | 93 Reloc::Model RM, CodeModel::Model CM, |
88 CodeGenOpt::Level OL, bool is64bit) | 94 CodeGenOpt::Level OL, bool is64bit) |
89 : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), | 95 : LLVMTargetMachine(T, computeDataLayout(is64bit), TT, CPU, FS, Options, RM, |
90 TLOF(make_unique<NVPTXTargetObjectFile>()), | 96 CM, OL), |
91 DL(computeDataLayout(is64bit)), | 97 is64bit(is64bit), TLOF(make_unique<NVPTXTargetObjectFile>()), |
92 Subtarget(TT, CPU, FS, *this, is64bit) { | 98 Subtarget(TT, CPU, FS, *this) { |
99 if (TT.getOS() == Triple::NVCL) | |
100 drvInterface = NVPTX::NVCL; | |
101 else | |
102 drvInterface = NVPTX::CUDA; | |
93 initAsmInfo(); | 103 initAsmInfo(); |
94 } | 104 } |
95 | 105 |
96 NVPTXTargetMachine::~NVPTXTargetMachine() {} | 106 NVPTXTargetMachine::~NVPTXTargetMachine() {} |
97 | 107 |
98 void NVPTXTargetMachine32::anchor() {} | 108 void NVPTXTargetMachine32::anchor() {} |
99 | 109 |
100 NVPTXTargetMachine32::NVPTXTargetMachine32( | 110 NVPTXTargetMachine32::NVPTXTargetMachine32(const Target &T, const Triple &TT, |
101 const Target &T, StringRef TT, StringRef CPU, StringRef FS, | 111 StringRef CPU, StringRef FS, |
102 const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, | 112 const TargetOptions &Options, |
103 CodeGenOpt::Level OL) | 113 Reloc::Model RM, CodeModel::Model CM, |
114 CodeGenOpt::Level OL) | |
104 : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {} | 115 : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {} |
105 | 116 |
106 void NVPTXTargetMachine64::anchor() {} | 117 void NVPTXTargetMachine64::anchor() {} |
107 | 118 |
108 NVPTXTargetMachine64::NVPTXTargetMachine64( | 119 NVPTXTargetMachine64::NVPTXTargetMachine64(const Target &T, const Triple &TT, |
109 const Target &T, StringRef TT, StringRef CPU, StringRef FS, | 120 StringRef CPU, StringRef FS, |
110 const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, | 121 const TargetOptions &Options, |
111 CodeGenOpt::Level OL) | 122 Reloc::Model RM, CodeModel::Model CM, |
123 CodeGenOpt::Level OL) | |
112 : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {} | 124 : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {} |
113 | 125 |
114 namespace { | 126 namespace { |
115 class NVPTXPassConfig : public TargetPassConfig { | 127 class NVPTXPassConfig : public TargetPassConfig { |
116 public: | 128 public: |
127 void addMachineSSAOptimization() override; | 139 void addMachineSSAOptimization() override; |
128 | 140 |
129 FunctionPass *createTargetRegisterAllocator(bool) override; | 141 FunctionPass *createTargetRegisterAllocator(bool) override; |
130 void addFastRegAlloc(FunctionPass *RegAllocPass) override; | 142 void addFastRegAlloc(FunctionPass *RegAllocPass) override; |
131 void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override; | 143 void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override; |
144 | |
145 private: | |
146 // if the opt level is aggressive, add GVN; otherwise, add EarlyCSE. | |
147 void addEarlyCSEOrGVNPass(); | |
132 }; | 148 }; |
133 } // end anonymous namespace | 149 } // end anonymous namespace |
134 | 150 |
135 TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) { | 151 TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) { |
136 NVPTXPassConfig *PassConfig = new NVPTXPassConfig(this, PM); | 152 NVPTXPassConfig *PassConfig = new NVPTXPassConfig(this, PM); |
137 return PassConfig; | 153 return PassConfig; |
138 } | 154 } |
139 | 155 |
140 TargetIRAnalysis NVPTXTargetMachine::getTargetIRAnalysis() { | 156 TargetIRAnalysis NVPTXTargetMachine::getTargetIRAnalysis() { |
141 return TargetIRAnalysis( | 157 return TargetIRAnalysis([this](const Function &F) { |
142 [this](Function &) { return TargetTransformInfo(NVPTXTTIImpl(this)); }); | 158 return TargetTransformInfo(NVPTXTTIImpl(this, F)); |
159 }); | |
160 } | |
161 | |
162 void NVPTXPassConfig::addEarlyCSEOrGVNPass() { | |
163 if (getOptLevel() == CodeGenOpt::Aggressive) | |
164 addPass(createGVNPass()); | |
165 else | |
166 addPass(createEarlyCSEPass()); | |
143 } | 167 } |
144 | 168 |
145 void NVPTXPassConfig::addIRPasses() { | 169 void NVPTXPassConfig::addIRPasses() { |
146 // The following passes are known to not play well with virtual regs hanging | 170 // The following passes are known to not play well with virtual regs hanging |
147 // around after register allocation (which in our case, is *all* registers). | 171 // around after register allocation (which in our case, is *all* registers). |
148 // We explicitly disable them here. We do, however, need some functionality | 172 // We explicitly disable them here. We do, however, need some functionality |
149 // of the PrologEpilogCodeInserter pass, so we emulate that behavior in the | 173 // of the PrologEpilogCodeInserter pass, so we emulate that behavior in the |
150 // NVPTXPrologEpilog pass (see NVPTXPrologEpilogPass.cpp). | 174 // NVPTXPrologEpilog pass (see NVPTXPrologEpilogPass.cpp). |
151 disablePass(&PrologEpilogCodeInserterID); | 175 disablePass(&PrologEpilogCodeInserterID); |
152 disablePass(&MachineCopyPropagationID); | 176 disablePass(&MachineCopyPropagationID); |
153 disablePass(&BranchFolderPassID); | |
154 disablePass(&TailDuplicateID); | 177 disablePass(&TailDuplicateID); |
155 | 178 |
179 addPass(createNVVMReflectPass()); | |
156 addPass(createNVPTXImageOptimizerPass()); | 180 addPass(createNVPTXImageOptimizerPass()); |
157 TargetPassConfig::addIRPasses(); | |
158 addPass(createNVPTXAssignValidGlobalNamesPass()); | 181 addPass(createNVPTXAssignValidGlobalNamesPass()); |
159 addPass(createGenericToNVVMPass()); | 182 addPass(createGenericToNVVMPass()); |
183 | |
184 // === Propagate special address spaces === | |
185 addPass(createNVPTXLowerKernelArgsPass(&getNVPTXTargetMachine())); | |
186 // NVPTXLowerKernelArgs emits alloca for byval parameters which can often | |
187 // be eliminated by SROA. | |
188 addPass(createSROAPass()); | |
189 addPass(createNVPTXLowerAllocaPass()); | |
160 addPass(createNVPTXFavorNonGenericAddrSpacesPass()); | 190 addPass(createNVPTXFavorNonGenericAddrSpacesPass()); |
191 // FavorNonGenericAddrSpaces shortcuts unnecessary addrspacecasts, and leave | |
192 // them unused. We could remove dead code in an ad-hoc manner, but that | |
193 // requires manual work and might be error-prone. | |
194 addPass(createDeadCodeEliminationPass()); | |
195 | |
196 // === Straight-line scalar optimizations === | |
197 addPass(createSeparateConstOffsetFromGEPPass()); | |
198 addPass(createSpeculativeExecutionPass()); | |
199 // ReassociateGEPs exposes more opportunites for SLSR. See | |
200 // the example in reassociate-geps-and-slsr.ll. | |
161 addPass(createStraightLineStrengthReducePass()); | 201 addPass(createStraightLineStrengthReducePass()); |
162 addPass(createSeparateConstOffsetFromGEPPass()); | 202 // SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or |
163 // The SeparateConstOffsetFromGEP pass creates variadic bases that can be used | 203 // EarlyCSE can reuse. GVN generates significantly better code than EarlyCSE |
164 // by multiple GEPs. Run GVN or EarlyCSE to really reuse them. GVN generates | 204 // for some of our benchmarks. |
165 // significantly better code than EarlyCSE for some of our benchmarks. | 205 addEarlyCSEOrGVNPass(); |
166 if (getOptLevel() == CodeGenOpt::Aggressive) | 206 // Run NaryReassociate after EarlyCSE/GVN to be more effective. |
167 addPass(createGVNPass()); | 207 addPass(createNaryReassociatePass()); |
168 else | 208 // NaryReassociate on GEPs creates redundant common expressions, so run |
169 addPass(createEarlyCSEPass()); | 209 // EarlyCSE after it. |
170 // Both FavorNonGenericAddrSpaces and SeparateConstOffsetFromGEP may leave | 210 addPass(createEarlyCSEPass()); |
171 // some dead code. We could remove dead code in an ad-hoc manner, but that | 211 |
172 // requires manual work and might be error-prone. | 212 // === LSR and other generic IR passes === |
213 TargetPassConfig::addIRPasses(); | |
214 // EarlyCSE is not always strong enough to clean up what LSR produces. For | |
215 // example, GVN can combine | |
173 // | 216 // |
174 // The FavorNonGenericAddrSpaces pass shortcuts unnecessary addrspacecasts, | 217 // %0 = add %a, %b |
175 // and leave them unused. | 218 // %1 = add %b, %a |
176 // | 219 // |
177 // SeparateConstOffsetFromGEP rebuilds a new index from the old index, and the | 220 // and |
178 // old index and some of its intermediate results may become unused. | 221 // |
179 addPass(createDeadCodeEliminationPass()); | 222 // %0 = shl nsw %a, 2 |
223 // %1 = shl %a, 2 | |
224 // | |
225 // but EarlyCSE can do neither of them. | |
226 addEarlyCSEOrGVNPass(); | |
180 } | 227 } |
181 | 228 |
182 bool NVPTXPassConfig::addInstSelector() { | 229 bool NVPTXPassConfig::addInstSelector() { |
183 const NVPTXSubtarget &ST = | 230 const NVPTXSubtarget &ST = *getTM<NVPTXTargetMachine>().getSubtargetImpl(); |
184 getTM<NVPTXTargetMachine>().getSubtarget<NVPTXSubtarget>(); | |
185 | 231 |
186 addPass(createLowerAggrCopies()); | 232 addPass(createLowerAggrCopies()); |
187 addPass(createAllocaHoisting()); | 233 addPass(createAllocaHoisting()); |
188 addPass(createNVPTXISelDag(getNVPTXTargetMachine(), getOptLevel())); | 234 addPass(createNVPTXISelDag(getNVPTXTargetMachine(), getOptLevel())); |
189 | 235 |
193 return false; | 239 return false; |
194 } | 240 } |
195 | 241 |
196 void NVPTXPassConfig::addPostRegAlloc() { | 242 void NVPTXPassConfig::addPostRegAlloc() { |
197 addPass(createNVPTXPrologEpilogPass(), false); | 243 addPass(createNVPTXPrologEpilogPass(), false); |
244 // NVPTXPrologEpilogPass calculates frame object offset and replace frame | |
245 // index with VRFrame register. NVPTXPeephole need to be run after that and | |
246 // will replace VRFrame with VRFrameLocal when possible. | |
247 addPass(createNVPTXPeephole()); | |
198 } | 248 } |
199 | 249 |
200 FunctionPass *NVPTXPassConfig::createTargetRegisterAllocator(bool) { | 250 FunctionPass *NVPTXPassConfig::createTargetRegisterAllocator(bool) { |
201 return nullptr; // No reg alloc | 251 return nullptr; // No reg alloc |
202 } | 252 } |