Mercurial > hg > CbC > CbC_llvm
comparison lib/Target/NVPTX/NVPTXTargetMachine.cpp @ 77:54457678186b LLVM3.6
LLVM 3.6
author | Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp> |
---|---|
date | Mon, 08 Sep 2014 22:06:00 +0900 |
parents | 95c75e76d11b |
children | 60c9769439b8 |
comparison
equal
deleted
inserted
replaced
34:e874dbf0ad9d | 77:54457678186b |
---|---|
14 #include "NVPTXTargetMachine.h" | 14 #include "NVPTXTargetMachine.h" |
15 #include "MCTargetDesc/NVPTXMCAsmInfo.h" | 15 #include "MCTargetDesc/NVPTXMCAsmInfo.h" |
16 #include "NVPTX.h" | 16 #include "NVPTX.h" |
17 #include "NVPTXAllocaHoisting.h" | 17 #include "NVPTXAllocaHoisting.h" |
18 #include "NVPTXLowerAggrCopies.h" | 18 #include "NVPTXLowerAggrCopies.h" |
19 #include "NVPTXSplitBBatBar.h" | |
20 #include "llvm/ADT/OwningPtr.h" | |
21 #include "llvm/Analysis/Passes.h" | 19 #include "llvm/Analysis/Passes.h" |
22 #include "llvm/Analysis/Verifier.h" | |
23 #include "llvm/Assembly/PrintModulePass.h" | |
24 #include "llvm/CodeGen/AsmPrinter.h" | 20 #include "llvm/CodeGen/AsmPrinter.h" |
25 #include "llvm/CodeGen/MachineFunctionAnalysis.h" | 21 #include "llvm/CodeGen/MachineFunctionAnalysis.h" |
26 #include "llvm/CodeGen/MachineModuleInfo.h" | 22 #include "llvm/CodeGen/MachineModuleInfo.h" |
27 #include "llvm/CodeGen/Passes.h" | 23 #include "llvm/CodeGen/Passes.h" |
28 #include "llvm/IR/DataLayout.h" | 24 #include "llvm/IR/DataLayout.h" |
25 #include "llvm/IR/IRPrintingPasses.h" | |
26 #include "llvm/IR/Verifier.h" | |
29 #include "llvm/MC/MCAsmInfo.h" | 27 #include "llvm/MC/MCAsmInfo.h" |
30 #include "llvm/MC/MCInstrInfo.h" | 28 #include "llvm/MC/MCInstrInfo.h" |
31 #include "llvm/MC/MCStreamer.h" | 29 #include "llvm/MC/MCStreamer.h" |
32 #include "llvm/MC/MCSubtargetInfo.h" | 30 #include "llvm/MC/MCSubtargetInfo.h" |
33 #include "llvm/PassManager.h" | 31 #include "llvm/PassManager.h" |
48 using namespace llvm; | 46 using namespace llvm; |
49 | 47 |
50 namespace llvm { | 48 namespace llvm { |
51 void initializeNVVMReflectPass(PassRegistry&); | 49 void initializeNVVMReflectPass(PassRegistry&); |
52 void initializeGenericToNVVMPass(PassRegistry&); | 50 void initializeGenericToNVVMPass(PassRegistry&); |
51 void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&); | |
52 void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &); | |
53 } | 53 } |
54 | 54 |
55 extern "C" void LLVMInitializeNVPTXTarget() { | 55 extern "C" void LLVMInitializeNVPTXTarget() { |
56 // Register the target. | 56 // Register the target. |
57 RegisterTargetMachine<NVPTXTargetMachine32> X(TheNVPTXTarget32); | 57 RegisterTargetMachine<NVPTXTargetMachine32> X(TheNVPTXTarget32); |
59 | 59 |
60 // FIXME: This pass is really intended to be invoked during IR optimization, | 60 // FIXME: This pass is really intended to be invoked during IR optimization, |
61 // but it's very NVPTX-specific. | 61 // but it's very NVPTX-specific. |
62 initializeNVVMReflectPass(*PassRegistry::getPassRegistry()); | 62 initializeNVVMReflectPass(*PassRegistry::getPassRegistry()); |
63 initializeGenericToNVVMPass(*PassRegistry::getPassRegistry()); | 63 initializeGenericToNVVMPass(*PassRegistry::getPassRegistry()); |
64 } | 64 initializeNVPTXAssignValidGlobalNamesPass(*PassRegistry::getPassRegistry()); |
65 | 65 initializeNVPTXFavorNonGenericAddrSpacesPass( |
66 NVPTXTargetMachine::NVPTXTargetMachine( | 66 *PassRegistry::getPassRegistry()); |
67 const Target &T, StringRef TT, StringRef CPU, StringRef FS, | 67 } |
68 const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, | 68 |
69 CodeGenOpt::Level OL, bool is64bit) | 69 NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, StringRef TT, |
70 StringRef CPU, StringRef FS, | |
71 const TargetOptions &Options, | |
72 Reloc::Model RM, CodeModel::Model CM, | |
73 CodeGenOpt::Level OL, bool is64bit) | |
70 : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), | 74 : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), |
71 Subtarget(TT, CPU, FS, is64bit), DL(Subtarget.getDataLayout()), | 75 Subtarget(TT, CPU, FS, *this, is64bit) { |
72 InstrInfo(*this), TLInfo(*this), TSInfo(*this), | |
73 FrameLowering( | |
74 *this, is64bit) /*FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0)*/ { | |
75 initAsmInfo(); | 76 initAsmInfo(); |
76 } | 77 } |
77 | 78 |
78 void NVPTXTargetMachine32::anchor() {} | 79 void NVPTXTargetMachine32::anchor() {} |
79 | 80 |
99 | 100 |
100 NVPTXTargetMachine &getNVPTXTargetMachine() const { | 101 NVPTXTargetMachine &getNVPTXTargetMachine() const { |
101 return getTM<NVPTXTargetMachine>(); | 102 return getTM<NVPTXTargetMachine>(); |
102 } | 103 } |
103 | 104 |
104 virtual void addIRPasses(); | 105 void addIRPasses() override; |
105 virtual bool addInstSelector(); | 106 bool addInstSelector() override; |
106 virtual bool addPreRegAlloc(); | 107 bool addPreRegAlloc() override; |
107 virtual bool addPostRegAlloc(); | 108 bool addPostRegAlloc() override; |
108 | 109 void addMachineSSAOptimization() override; |
109 virtual FunctionPass *createTargetRegisterAllocator(bool) LLVM_OVERRIDE; | 110 |
110 virtual void addFastRegAlloc(FunctionPass *RegAllocPass); | 111 FunctionPass *createTargetRegisterAllocator(bool) override; |
111 virtual void addOptimizedRegAlloc(FunctionPass *RegAllocPass); | 112 void addFastRegAlloc(FunctionPass *RegAllocPass) override; |
113 void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override; | |
112 }; | 114 }; |
113 } // end anonymous namespace | 115 } // end anonymous namespace |
114 | 116 |
115 TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) { | 117 TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) { |
116 NVPTXPassConfig *PassConfig = new NVPTXPassConfig(this, PM); | 118 NVPTXPassConfig *PassConfig = new NVPTXPassConfig(this, PM); |
126 disablePass(&PrologEpilogCodeInserterID); | 128 disablePass(&PrologEpilogCodeInserterID); |
127 disablePass(&MachineCopyPropagationID); | 129 disablePass(&MachineCopyPropagationID); |
128 disablePass(&BranchFolderPassID); | 130 disablePass(&BranchFolderPassID); |
129 disablePass(&TailDuplicateID); | 131 disablePass(&TailDuplicateID); |
130 | 132 |
133 addPass(createNVPTXImageOptimizerPass()); | |
131 TargetPassConfig::addIRPasses(); | 134 TargetPassConfig::addIRPasses(); |
135 addPass(createNVPTXAssignValidGlobalNamesPass()); | |
132 addPass(createGenericToNVVMPass()); | 136 addPass(createGenericToNVVMPass()); |
137 addPass(createNVPTXFavorNonGenericAddrSpacesPass()); | |
138 addPass(createSeparateConstOffsetFromGEPPass()); | |
139 // The SeparateConstOffsetFromGEP pass creates variadic bases that can be used | |
140 // by multiple GEPs. Run GVN or EarlyCSE to really reuse them. GVN generates | |
141 // significantly better code than EarlyCSE for some of our benchmarks. | |
142 if (getOptLevel() == CodeGenOpt::Aggressive) | |
143 addPass(createGVNPass()); | |
144 else | |
145 addPass(createEarlyCSEPass()); | |
146 // Both FavorNonGenericAddrSpaces and SeparateConstOffsetFromGEP may leave | |
147 // some dead code. We could remove dead code in an ad-hoc manner, but that | |
148 // requires manual work and might be error-prone. | |
149 // | |
150 // The FavorNonGenericAddrSpaces pass shortcuts unnecessary addrspacecasts, | |
151 // and leave them unused. | |
152 // | |
153 // SeparateConstOffsetFromGEP rebuilds a new index from the old index, and the | |
154 // old index and some of its intermediate results may become unused. | |
155 addPass(createDeadCodeEliminationPass()); | |
133 } | 156 } |
134 | 157 |
135 bool NVPTXPassConfig::addInstSelector() { | 158 bool NVPTXPassConfig::addInstSelector() { |
159 const NVPTXSubtarget &ST = | |
160 getTM<NVPTXTargetMachine>().getSubtarget<NVPTXSubtarget>(); | |
161 | |
136 addPass(createLowerAggrCopies()); | 162 addPass(createLowerAggrCopies()); |
137 addPass(createSplitBBatBarPass()); | |
138 addPass(createAllocaHoisting()); | 163 addPass(createAllocaHoisting()); |
139 addPass(createNVPTXISelDag(getNVPTXTargetMachine(), getOptLevel())); | 164 addPass(createNVPTXISelDag(getNVPTXTargetMachine(), getOptLevel())); |
165 | |
166 if (!ST.hasImageHandles()) | |
167 addPass(createNVPTXReplaceImageHandlesPass()); | |
168 | |
140 return false; | 169 return false; |
141 } | 170 } |
142 | 171 |
143 bool NVPTXPassConfig::addPreRegAlloc() { return false; } | 172 bool NVPTXPassConfig::addPreRegAlloc() { return false; } |
144 bool NVPTXPassConfig::addPostRegAlloc() { | 173 bool NVPTXPassConfig::addPostRegAlloc() { |
145 addPass(createNVPTXPrologEpilogPass()); | 174 addPass(createNVPTXPrologEpilogPass()); |
146 return false; | 175 return false; |
147 } | 176 } |
148 | 177 |
149 FunctionPass *NVPTXPassConfig::createTargetRegisterAllocator(bool) { | 178 FunctionPass *NVPTXPassConfig::createTargetRegisterAllocator(bool) { |
150 return 0; // No reg alloc | 179 return nullptr; // No reg alloc |
151 } | 180 } |
152 | 181 |
153 void NVPTXPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) { | 182 void NVPTXPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) { |
154 assert(!RegAllocPass && "NVPTX uses no regalloc!"); | 183 assert(!RegAllocPass && "NVPTX uses no regalloc!"); |
155 addPass(&PHIEliminationID); | 184 addPass(&PHIEliminationID); |
177 // FIXME: Needs physical registers | 206 // FIXME: Needs physical registers |
178 //addPass(&PostRAMachineLICMID); | 207 //addPass(&PostRAMachineLICMID); |
179 | 208 |
180 printAndVerify("After StackSlotColoring"); | 209 printAndVerify("After StackSlotColoring"); |
181 } | 210 } |
211 | |
212 void NVPTXPassConfig::addMachineSSAOptimization() { | |
213 // Pre-ra tail duplication. | |
214 if (addPass(&EarlyTailDuplicateID)) | |
215 printAndVerify("After Pre-RegAlloc TailDuplicate"); | |
216 | |
217 // Optimize PHIs before DCE: removing dead PHI cycles may make more | |
218 // instructions dead. | |
219 addPass(&OptimizePHIsID); | |
220 | |
221 // This pass merges large allocas. StackSlotColoring is a different pass | |
222 // which merges spill slots. | |
223 addPass(&StackColoringID); | |
224 | |
225 // If the target requests it, assign local variables to stack slots relative | |
226 // to one another and simplify frame index references where possible. | |
227 addPass(&LocalStackSlotAllocationID); | |
228 | |
229 // With optimization, dead code should already be eliminated. However | |
230 // there is one known exception: lowered code for arguments that are only | |
231 // used by tail calls, where the tail calls reuse the incoming stack | |
232 // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll). | |
233 addPass(&DeadMachineInstructionElimID); | |
234 printAndVerify("After codegen DCE pass"); | |
235 | |
236 // Allow targets to insert passes that improve instruction level parallelism, | |
237 // like if-conversion. Such passes will typically need dominator trees and | |
238 // loop info, just like LICM and CSE below. | |
239 if (addILPOpts()) | |
240 printAndVerify("After ILP optimizations"); | |
241 | |
242 addPass(&MachineLICMID); | |
243 addPass(&MachineCSEID); | |
244 | |
245 addPass(&MachineSinkingID); | |
246 printAndVerify("After Machine LICM, CSE and Sinking passes"); | |
247 | |
248 addPass(&PeepholeOptimizerID); | |
249 printAndVerify("After codegen peephole optimization pass"); | |
250 } |