comparison lib/Target/NVPTX/NVPTXTargetMachine.cpp @ 77:54457678186b LLVM3.6

LLVM 3.6
author Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
date Mon, 08 Sep 2014 22:06:00 +0900
parents 95c75e76d11b
children 60c9769439b8
comparison
equal deleted inserted replaced
34:e874dbf0ad9d 77:54457678186b
14 #include "NVPTXTargetMachine.h" 14 #include "NVPTXTargetMachine.h"
15 #include "MCTargetDesc/NVPTXMCAsmInfo.h" 15 #include "MCTargetDesc/NVPTXMCAsmInfo.h"
16 #include "NVPTX.h" 16 #include "NVPTX.h"
17 #include "NVPTXAllocaHoisting.h" 17 #include "NVPTXAllocaHoisting.h"
18 #include "NVPTXLowerAggrCopies.h" 18 #include "NVPTXLowerAggrCopies.h"
19 #include "NVPTXSplitBBatBar.h"
20 #include "llvm/ADT/OwningPtr.h"
21 #include "llvm/Analysis/Passes.h" 19 #include "llvm/Analysis/Passes.h"
22 #include "llvm/Analysis/Verifier.h"
23 #include "llvm/Assembly/PrintModulePass.h"
24 #include "llvm/CodeGen/AsmPrinter.h" 20 #include "llvm/CodeGen/AsmPrinter.h"
25 #include "llvm/CodeGen/MachineFunctionAnalysis.h" 21 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
26 #include "llvm/CodeGen/MachineModuleInfo.h" 22 #include "llvm/CodeGen/MachineModuleInfo.h"
27 #include "llvm/CodeGen/Passes.h" 23 #include "llvm/CodeGen/Passes.h"
28 #include "llvm/IR/DataLayout.h" 24 #include "llvm/IR/DataLayout.h"
25 #include "llvm/IR/IRPrintingPasses.h"
26 #include "llvm/IR/Verifier.h"
29 #include "llvm/MC/MCAsmInfo.h" 27 #include "llvm/MC/MCAsmInfo.h"
30 #include "llvm/MC/MCInstrInfo.h" 28 #include "llvm/MC/MCInstrInfo.h"
31 #include "llvm/MC/MCStreamer.h" 29 #include "llvm/MC/MCStreamer.h"
32 #include "llvm/MC/MCSubtargetInfo.h" 30 #include "llvm/MC/MCSubtargetInfo.h"
33 #include "llvm/PassManager.h" 31 #include "llvm/PassManager.h"
48 using namespace llvm; 46 using namespace llvm;
49 47
50 namespace llvm { 48 namespace llvm {
51 void initializeNVVMReflectPass(PassRegistry&); 49 void initializeNVVMReflectPass(PassRegistry&);
52 void initializeGenericToNVVMPass(PassRegistry&); 50 void initializeGenericToNVVMPass(PassRegistry&);
51 void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&);
52 void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &);
53 } 53 }
54 54
55 extern "C" void LLVMInitializeNVPTXTarget() { 55 extern "C" void LLVMInitializeNVPTXTarget() {
56 // Register the target. 56 // Register the target.
57 RegisterTargetMachine<NVPTXTargetMachine32> X(TheNVPTXTarget32); 57 RegisterTargetMachine<NVPTXTargetMachine32> X(TheNVPTXTarget32);
59 59
60 // FIXME: This pass is really intended to be invoked during IR optimization, 60 // FIXME: This pass is really intended to be invoked during IR optimization,
61 // but it's very NVPTX-specific. 61 // but it's very NVPTX-specific.
62 initializeNVVMReflectPass(*PassRegistry::getPassRegistry()); 62 initializeNVVMReflectPass(*PassRegistry::getPassRegistry());
63 initializeGenericToNVVMPass(*PassRegistry::getPassRegistry()); 63 initializeGenericToNVVMPass(*PassRegistry::getPassRegistry());
64 } 64 initializeNVPTXAssignValidGlobalNamesPass(*PassRegistry::getPassRegistry());
65 65 initializeNVPTXFavorNonGenericAddrSpacesPass(
66 NVPTXTargetMachine::NVPTXTargetMachine( 66 *PassRegistry::getPassRegistry());
67 const Target &T, StringRef TT, StringRef CPU, StringRef FS, 67 }
68 const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, 68
69 CodeGenOpt::Level OL, bool is64bit) 69 NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, StringRef TT,
70 StringRef CPU, StringRef FS,
71 const TargetOptions &Options,
72 Reloc::Model RM, CodeModel::Model CM,
73 CodeGenOpt::Level OL, bool is64bit)
70 : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), 74 : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
71 Subtarget(TT, CPU, FS, is64bit), DL(Subtarget.getDataLayout()), 75 Subtarget(TT, CPU, FS, *this, is64bit) {
72 InstrInfo(*this), TLInfo(*this), TSInfo(*this),
73 FrameLowering(
74 *this, is64bit) /*FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0)*/ {
75 initAsmInfo(); 76 initAsmInfo();
76 } 77 }
77 78
78 void NVPTXTargetMachine32::anchor() {} 79 void NVPTXTargetMachine32::anchor() {}
79 80
99 100
100 NVPTXTargetMachine &getNVPTXTargetMachine() const { 101 NVPTXTargetMachine &getNVPTXTargetMachine() const {
101 return getTM<NVPTXTargetMachine>(); 102 return getTM<NVPTXTargetMachine>();
102 } 103 }
103 104
104 virtual void addIRPasses(); 105 void addIRPasses() override;
105 virtual bool addInstSelector(); 106 bool addInstSelector() override;
106 virtual bool addPreRegAlloc(); 107 bool addPreRegAlloc() override;
107 virtual bool addPostRegAlloc(); 108 bool addPostRegAlloc() override;
108 109 void addMachineSSAOptimization() override;
109 virtual FunctionPass *createTargetRegisterAllocator(bool) LLVM_OVERRIDE; 110
110 virtual void addFastRegAlloc(FunctionPass *RegAllocPass); 111 FunctionPass *createTargetRegisterAllocator(bool) override;
111 virtual void addOptimizedRegAlloc(FunctionPass *RegAllocPass); 112 void addFastRegAlloc(FunctionPass *RegAllocPass) override;
113 void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override;
112 }; 114 };
113 } // end anonymous namespace 115 } // end anonymous namespace
114 116
115 TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) { 117 TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) {
116 NVPTXPassConfig *PassConfig = new NVPTXPassConfig(this, PM); 118 NVPTXPassConfig *PassConfig = new NVPTXPassConfig(this, PM);
126 disablePass(&PrologEpilogCodeInserterID); 128 disablePass(&PrologEpilogCodeInserterID);
127 disablePass(&MachineCopyPropagationID); 129 disablePass(&MachineCopyPropagationID);
128 disablePass(&BranchFolderPassID); 130 disablePass(&BranchFolderPassID);
129 disablePass(&TailDuplicateID); 131 disablePass(&TailDuplicateID);
130 132
133 addPass(createNVPTXImageOptimizerPass());
131 TargetPassConfig::addIRPasses(); 134 TargetPassConfig::addIRPasses();
135 addPass(createNVPTXAssignValidGlobalNamesPass());
132 addPass(createGenericToNVVMPass()); 136 addPass(createGenericToNVVMPass());
137 addPass(createNVPTXFavorNonGenericAddrSpacesPass());
138 addPass(createSeparateConstOffsetFromGEPPass());
139 // The SeparateConstOffsetFromGEP pass creates variadic bases that can be used
140 // by multiple GEPs. Run GVN or EarlyCSE to really reuse them. GVN generates
141 // significantly better code than EarlyCSE for some of our benchmarks.
142 if (getOptLevel() == CodeGenOpt::Aggressive)
143 addPass(createGVNPass());
144 else
145 addPass(createEarlyCSEPass());
146 // Both FavorNonGenericAddrSpaces and SeparateConstOffsetFromGEP may leave
147 // some dead code. We could remove dead code in an ad-hoc manner, but that
148 // requires manual work and might be error-prone.
149 //
150 // The FavorNonGenericAddrSpaces pass shortcuts unnecessary addrspacecasts,
151 // and leave them unused.
152 //
153 // SeparateConstOffsetFromGEP rebuilds a new index from the old index, and the
154 // old index and some of its intermediate results may become unused.
155 addPass(createDeadCodeEliminationPass());
133 } 156 }
134 157
135 bool NVPTXPassConfig::addInstSelector() { 158 bool NVPTXPassConfig::addInstSelector() {
159 const NVPTXSubtarget &ST =
160 getTM<NVPTXTargetMachine>().getSubtarget<NVPTXSubtarget>();
161
136 addPass(createLowerAggrCopies()); 162 addPass(createLowerAggrCopies());
137 addPass(createSplitBBatBarPass());
138 addPass(createAllocaHoisting()); 163 addPass(createAllocaHoisting());
139 addPass(createNVPTXISelDag(getNVPTXTargetMachine(), getOptLevel())); 164 addPass(createNVPTXISelDag(getNVPTXTargetMachine(), getOptLevel()));
165
166 if (!ST.hasImageHandles())
167 addPass(createNVPTXReplaceImageHandlesPass());
168
140 return false; 169 return false;
141 } 170 }
142 171
143 bool NVPTXPassConfig::addPreRegAlloc() { return false; } 172 bool NVPTXPassConfig::addPreRegAlloc() { return false; }
144 bool NVPTXPassConfig::addPostRegAlloc() { 173 bool NVPTXPassConfig::addPostRegAlloc() {
145 addPass(createNVPTXPrologEpilogPass()); 174 addPass(createNVPTXPrologEpilogPass());
146 return false; 175 return false;
147 } 176 }
148 177
149 FunctionPass *NVPTXPassConfig::createTargetRegisterAllocator(bool) { 178 FunctionPass *NVPTXPassConfig::createTargetRegisterAllocator(bool) {
150 return 0; // No reg alloc 179 return nullptr; // No reg alloc
151 } 180 }
152 181
153 void NVPTXPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) { 182 void NVPTXPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) {
154 assert(!RegAllocPass && "NVPTX uses no regalloc!"); 183 assert(!RegAllocPass && "NVPTX uses no regalloc!");
155 addPass(&PHIEliminationID); 184 addPass(&PHIEliminationID);
177 // FIXME: Needs physical registers 206 // FIXME: Needs physical registers
178 //addPass(&PostRAMachineLICMID); 207 //addPass(&PostRAMachineLICMID);
179 208
180 printAndVerify("After StackSlotColoring"); 209 printAndVerify("After StackSlotColoring");
181 } 210 }
211
212 void NVPTXPassConfig::addMachineSSAOptimization() {
213 // Pre-ra tail duplication.
214 if (addPass(&EarlyTailDuplicateID))
215 printAndVerify("After Pre-RegAlloc TailDuplicate");
216
217 // Optimize PHIs before DCE: removing dead PHI cycles may make more
218 // instructions dead.
219 addPass(&OptimizePHIsID);
220
221 // This pass merges large allocas. StackSlotColoring is a different pass
222 // which merges spill slots.
223 addPass(&StackColoringID);
224
225 // If the target requests it, assign local variables to stack slots relative
226 // to one another and simplify frame index references where possible.
227 addPass(&LocalStackSlotAllocationID);
228
229 // With optimization, dead code should already be eliminated. However
230 // there is one known exception: lowered code for arguments that are only
231 // used by tail calls, where the tail calls reuse the incoming stack
232 // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll).
233 addPass(&DeadMachineInstructionElimID);
234 printAndVerify("After codegen DCE pass");
235
236 // Allow targets to insert passes that improve instruction level parallelism,
237 // like if-conversion. Such passes will typically need dominator trees and
238 // loop info, just like LICM and CSE below.
239 if (addILPOpts())
240 printAndVerify("After ILP optimizations");
241
242 addPass(&MachineLICMID);
243 addPass(&MachineCSEID);
244
245 addPass(&MachineSinkingID);
246 printAndVerify("After Machine LICM, CSE and Sinking passes");
247
248 addPass(&PeepholeOptimizerID);
249 printAndVerify("After codegen peephole optimization pass");
250 }