comparison lib/Target/NVPTX/NVPTXTargetMachine.cpp @ 95:afa8332a0e37 LLVM3.8

LLVM 3.8
author Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
date Tue, 13 Oct 2015 17:48:58 +0900
parents 60c9769439b8
children 1172e4bd9c6f
comparison
equal deleted inserted replaced
84:f3e34b893a5f 95:afa8332a0e37
48 using namespace llvm; 48 using namespace llvm;
49 49
50 namespace llvm { 50 namespace llvm {
51 void initializeNVVMReflectPass(PassRegistry&); 51 void initializeNVVMReflectPass(PassRegistry&);
52 void initializeGenericToNVVMPass(PassRegistry&); 52 void initializeGenericToNVVMPass(PassRegistry&);
53 void initializeNVPTXAllocaHoistingPass(PassRegistry &);
53 void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&); 54 void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&);
54 void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &); 55 void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &);
55 void initializeNVPTXLowerStructArgsPass(PassRegistry &); 56 void initializeNVPTXLowerAggrCopiesPass(PassRegistry &);
57 void initializeNVPTXLowerKernelArgsPass(PassRegistry &);
58 void initializeNVPTXLowerAllocaPass(PassRegistry &);
56 } 59 }
57 60
58 extern "C" void LLVMInitializeNVPTXTarget() { 61 extern "C" void LLVMInitializeNVPTXTarget() {
59 // Register the target. 62 // Register the target.
60 RegisterTargetMachine<NVPTXTargetMachine32> X(TheNVPTXTarget32); 63 RegisterTargetMachine<NVPTXTargetMachine32> X(TheNVPTXTarget32);
61 RegisterTargetMachine<NVPTXTargetMachine64> Y(TheNVPTXTarget64); 64 RegisterTargetMachine<NVPTXTargetMachine64> Y(TheNVPTXTarget64);
62 65
63 // FIXME: This pass is really intended to be invoked during IR optimization, 66 // FIXME: This pass is really intended to be invoked during IR optimization,
64 // but it's very NVPTX-specific. 67 // but it's very NVPTX-specific.
65 initializeNVVMReflectPass(*PassRegistry::getPassRegistry()); 68 PassRegistry &PR = *PassRegistry::getPassRegistry();
66 initializeGenericToNVVMPass(*PassRegistry::getPassRegistry()); 69 initializeNVVMReflectPass(PR);
67 initializeNVPTXAssignValidGlobalNamesPass(*PassRegistry::getPassRegistry()); 70 initializeGenericToNVVMPass(PR);
68 initializeNVPTXFavorNonGenericAddrSpacesPass( 71 initializeNVPTXAllocaHoistingPass(PR);
69 *PassRegistry::getPassRegistry()); 72 initializeNVPTXAssignValidGlobalNamesPass(PR);
70 initializeNVPTXLowerStructArgsPass(*PassRegistry::getPassRegistry()); 73 initializeNVPTXFavorNonGenericAddrSpacesPass(PR);
74 initializeNVPTXLowerKernelArgsPass(PR);
75 initializeNVPTXLowerAllocaPass(PR);
76 initializeNVPTXLowerAggrCopiesPass(PR);
71 } 77 }
72 78
73 static std::string computeDataLayout(bool is64Bit) { 79 static std::string computeDataLayout(bool is64Bit) {
74 std::string Ret = "e"; 80 std::string Ret = "e";
75 81
79 Ret += "-i64:64-v16:16-v32:32-n16:32:64"; 85 Ret += "-i64:64-v16:16-v32:32-n16:32:64";
80 86
81 return Ret; 87 return Ret;
82 } 88 }
83 89
84 NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, StringRef TT, 90 NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, const Triple &TT,
85 StringRef CPU, StringRef FS, 91 StringRef CPU, StringRef FS,
86 const TargetOptions &Options, 92 const TargetOptions &Options,
87 Reloc::Model RM, CodeModel::Model CM, 93 Reloc::Model RM, CodeModel::Model CM,
88 CodeGenOpt::Level OL, bool is64bit) 94 CodeGenOpt::Level OL, bool is64bit)
89 : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), 95 : LLVMTargetMachine(T, computeDataLayout(is64bit), TT, CPU, FS, Options, RM,
90 TLOF(make_unique<NVPTXTargetObjectFile>()), 96 CM, OL),
91 DL(computeDataLayout(is64bit)), 97 is64bit(is64bit), TLOF(make_unique<NVPTXTargetObjectFile>()),
92 Subtarget(TT, CPU, FS, *this, is64bit) { 98 Subtarget(TT, CPU, FS, *this) {
99 if (TT.getOS() == Triple::NVCL)
100 drvInterface = NVPTX::NVCL;
101 else
102 drvInterface = NVPTX::CUDA;
93 initAsmInfo(); 103 initAsmInfo();
94 } 104 }
95 105
96 NVPTXTargetMachine::~NVPTXTargetMachine() {} 106 NVPTXTargetMachine::~NVPTXTargetMachine() {}
97 107
98 void NVPTXTargetMachine32::anchor() {} 108 void NVPTXTargetMachine32::anchor() {}
99 109
100 NVPTXTargetMachine32::NVPTXTargetMachine32( 110 NVPTXTargetMachine32::NVPTXTargetMachine32(const Target &T, const Triple &TT,
101 const Target &T, StringRef TT, StringRef CPU, StringRef FS, 111 StringRef CPU, StringRef FS,
102 const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, 112 const TargetOptions &Options,
103 CodeGenOpt::Level OL) 113 Reloc::Model RM, CodeModel::Model CM,
114 CodeGenOpt::Level OL)
104 : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {} 115 : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
105 116
106 void NVPTXTargetMachine64::anchor() {} 117 void NVPTXTargetMachine64::anchor() {}
107 118
108 NVPTXTargetMachine64::NVPTXTargetMachine64( 119 NVPTXTargetMachine64::NVPTXTargetMachine64(const Target &T, const Triple &TT,
109 const Target &T, StringRef TT, StringRef CPU, StringRef FS, 120 StringRef CPU, StringRef FS,
110 const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, 121 const TargetOptions &Options,
111 CodeGenOpt::Level OL) 122 Reloc::Model RM, CodeModel::Model CM,
123 CodeGenOpt::Level OL)
112 : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {} 124 : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
113 125
114 namespace { 126 namespace {
115 class NVPTXPassConfig : public TargetPassConfig { 127 class NVPTXPassConfig : public TargetPassConfig {
116 public: 128 public:
127 void addMachineSSAOptimization() override; 139 void addMachineSSAOptimization() override;
128 140
129 FunctionPass *createTargetRegisterAllocator(bool) override; 141 FunctionPass *createTargetRegisterAllocator(bool) override;
130 void addFastRegAlloc(FunctionPass *RegAllocPass) override; 142 void addFastRegAlloc(FunctionPass *RegAllocPass) override;
131 void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override; 143 void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override;
144
145 private:
146 // if the opt level is aggressive, add GVN; otherwise, add EarlyCSE.
147 void addEarlyCSEOrGVNPass();
132 }; 148 };
133 } // end anonymous namespace 149 } // end anonymous namespace
134 150
135 TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) { 151 TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) {
136 NVPTXPassConfig *PassConfig = new NVPTXPassConfig(this, PM); 152 NVPTXPassConfig *PassConfig = new NVPTXPassConfig(this, PM);
137 return PassConfig; 153 return PassConfig;
138 } 154 }
139 155
140 TargetIRAnalysis NVPTXTargetMachine::getTargetIRAnalysis() { 156 TargetIRAnalysis NVPTXTargetMachine::getTargetIRAnalysis() {
141 return TargetIRAnalysis( 157 return TargetIRAnalysis([this](const Function &F) {
142 [this](Function &) { return TargetTransformInfo(NVPTXTTIImpl(this)); }); 158 return TargetTransformInfo(NVPTXTTIImpl(this, F));
159 });
160 }
161
162 void NVPTXPassConfig::addEarlyCSEOrGVNPass() {
163 if (getOptLevel() == CodeGenOpt::Aggressive)
164 addPass(createGVNPass());
165 else
166 addPass(createEarlyCSEPass());
143 } 167 }
144 168
145 void NVPTXPassConfig::addIRPasses() { 169 void NVPTXPassConfig::addIRPasses() {
146 // The following passes are known to not play well with virtual regs hanging 170 // The following passes are known to not play well with virtual regs hanging
147 // around after register allocation (which in our case, is *all* registers). 171 // around after register allocation (which in our case, is *all* registers).
148 // We explicitly disable them here. We do, however, need some functionality 172 // We explicitly disable them here. We do, however, need some functionality
149 // of the PrologEpilogCodeInserter pass, so we emulate that behavior in the 173 // of the PrologEpilogCodeInserter pass, so we emulate that behavior in the
150 // NVPTXPrologEpilog pass (see NVPTXPrologEpilogPass.cpp). 174 // NVPTXPrologEpilog pass (see NVPTXPrologEpilogPass.cpp).
151 disablePass(&PrologEpilogCodeInserterID); 175 disablePass(&PrologEpilogCodeInserterID);
152 disablePass(&MachineCopyPropagationID); 176 disablePass(&MachineCopyPropagationID);
153 disablePass(&BranchFolderPassID);
154 disablePass(&TailDuplicateID); 177 disablePass(&TailDuplicateID);
155 178
179 addPass(createNVVMReflectPass());
156 addPass(createNVPTXImageOptimizerPass()); 180 addPass(createNVPTXImageOptimizerPass());
157 TargetPassConfig::addIRPasses();
158 addPass(createNVPTXAssignValidGlobalNamesPass()); 181 addPass(createNVPTXAssignValidGlobalNamesPass());
159 addPass(createGenericToNVVMPass()); 182 addPass(createGenericToNVVMPass());
183
184 // === Propagate special address spaces ===
185 addPass(createNVPTXLowerKernelArgsPass(&getNVPTXTargetMachine()));
186 // NVPTXLowerKernelArgs emits alloca for byval parameters which can often
187 // be eliminated by SROA.
188 addPass(createSROAPass());
189 addPass(createNVPTXLowerAllocaPass());
160 addPass(createNVPTXFavorNonGenericAddrSpacesPass()); 190 addPass(createNVPTXFavorNonGenericAddrSpacesPass());
191 // FavorNonGenericAddrSpaces shortcuts unnecessary addrspacecasts, and leave
192 // them unused. We could remove dead code in an ad-hoc manner, but that
193 // requires manual work and might be error-prone.
194 addPass(createDeadCodeEliminationPass());
195
196 // === Straight-line scalar optimizations ===
197 addPass(createSeparateConstOffsetFromGEPPass());
198 addPass(createSpeculativeExecutionPass());
199 // ReassociateGEPs exposes more opportunites for SLSR. See
200 // the example in reassociate-geps-and-slsr.ll.
161 addPass(createStraightLineStrengthReducePass()); 201 addPass(createStraightLineStrengthReducePass());
162 addPass(createSeparateConstOffsetFromGEPPass()); 202 // SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or
163 // The SeparateConstOffsetFromGEP pass creates variadic bases that can be used 203 // EarlyCSE can reuse. GVN generates significantly better code than EarlyCSE
164 // by multiple GEPs. Run GVN or EarlyCSE to really reuse them. GVN generates 204 // for some of our benchmarks.
165 // significantly better code than EarlyCSE for some of our benchmarks. 205 addEarlyCSEOrGVNPass();
166 if (getOptLevel() == CodeGenOpt::Aggressive) 206 // Run NaryReassociate after EarlyCSE/GVN to be more effective.
167 addPass(createGVNPass()); 207 addPass(createNaryReassociatePass());
168 else 208 // NaryReassociate on GEPs creates redundant common expressions, so run
169 addPass(createEarlyCSEPass()); 209 // EarlyCSE after it.
170 // Both FavorNonGenericAddrSpaces and SeparateConstOffsetFromGEP may leave 210 addPass(createEarlyCSEPass());
171 // some dead code. We could remove dead code in an ad-hoc manner, but that 211
172 // requires manual work and might be error-prone. 212 // === LSR and other generic IR passes ===
213 TargetPassConfig::addIRPasses();
214 // EarlyCSE is not always strong enough to clean up what LSR produces. For
215 // example, GVN can combine
173 // 216 //
174 // The FavorNonGenericAddrSpaces pass shortcuts unnecessary addrspacecasts, 217 // %0 = add %a, %b
175 // and leave them unused. 218 // %1 = add %b, %a
176 // 219 //
177 // SeparateConstOffsetFromGEP rebuilds a new index from the old index, and the 220 // and
178 // old index and some of its intermediate results may become unused. 221 //
179 addPass(createDeadCodeEliminationPass()); 222 // %0 = shl nsw %a, 2
223 // %1 = shl %a, 2
224 //
225 // but EarlyCSE can do neither of them.
226 addEarlyCSEOrGVNPass();
180 } 227 }
181 228
182 bool NVPTXPassConfig::addInstSelector() { 229 bool NVPTXPassConfig::addInstSelector() {
183 const NVPTXSubtarget &ST = 230 const NVPTXSubtarget &ST = *getTM<NVPTXTargetMachine>().getSubtargetImpl();
184 getTM<NVPTXTargetMachine>().getSubtarget<NVPTXSubtarget>();
185 231
186 addPass(createLowerAggrCopies()); 232 addPass(createLowerAggrCopies());
187 addPass(createAllocaHoisting()); 233 addPass(createAllocaHoisting());
188 addPass(createNVPTXISelDag(getNVPTXTargetMachine(), getOptLevel())); 234 addPass(createNVPTXISelDag(getNVPTXTargetMachine(), getOptLevel()));
189 235
193 return false; 239 return false;
194 } 240 }
195 241
196 void NVPTXPassConfig::addPostRegAlloc() { 242 void NVPTXPassConfig::addPostRegAlloc() {
197 addPass(createNVPTXPrologEpilogPass(), false); 243 addPass(createNVPTXPrologEpilogPass(), false);
244 // NVPTXPrologEpilogPass calculates frame object offset and replace frame
245 // index with VRFrame register. NVPTXPeephole need to be run after that and
246 // will replace VRFrame with VRFrameLocal when possible.
247 addPass(createNVPTXPeephole());
198 } 248 }
199 249
200 FunctionPass *NVPTXPassConfig::createTargetRegisterAllocator(bool) { 250 FunctionPass *NVPTXPassConfig::createTargetRegisterAllocator(bool) {
201 return nullptr; // No reg alloc 251 return nullptr; // No reg alloc
202 } 252 }