Mercurial > hg > CbC > CbC_llvm
diff lib/Target/NVPTX/NVPTXTargetMachine.cpp @ 95:afa8332a0e37 LLVM3.8
LLVM 3.8
author | Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 13 Oct 2015 17:48:58 +0900 |
parents | 60c9769439b8 |
children | 1172e4bd9c6f |
line wrap: on
line diff
--- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp Wed Feb 18 14:56:07 2015 +0900 +++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp Tue Oct 13 17:48:58 2015 +0900 @@ -50,9 +50,12 @@ namespace llvm { void initializeNVVMReflectPass(PassRegistry&); void initializeGenericToNVVMPass(PassRegistry&); +void initializeNVPTXAllocaHoistingPass(PassRegistry &); void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&); void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &); -void initializeNVPTXLowerStructArgsPass(PassRegistry &); +void initializeNVPTXLowerAggrCopiesPass(PassRegistry &); +void initializeNVPTXLowerKernelArgsPass(PassRegistry &); +void initializeNVPTXLowerAllocaPass(PassRegistry &); } extern "C" void LLVMInitializeNVPTXTarget() { @@ -62,12 +65,15 @@ // FIXME: This pass is really intended to be invoked during IR optimization, // but it's very NVPTX-specific. - initializeNVVMReflectPass(*PassRegistry::getPassRegistry()); - initializeGenericToNVVMPass(*PassRegistry::getPassRegistry()); - initializeNVPTXAssignValidGlobalNamesPass(*PassRegistry::getPassRegistry()); - initializeNVPTXFavorNonGenericAddrSpacesPass( - *PassRegistry::getPassRegistry()); - initializeNVPTXLowerStructArgsPass(*PassRegistry::getPassRegistry()); + PassRegistry &PR = *PassRegistry::getPassRegistry(); + initializeNVVMReflectPass(PR); + initializeGenericToNVVMPass(PR); + initializeNVPTXAllocaHoistingPass(PR); + initializeNVPTXAssignValidGlobalNamesPass(PR); + initializeNVPTXFavorNonGenericAddrSpacesPass(PR); + initializeNVPTXLowerKernelArgsPass(PR); + initializeNVPTXLowerAllocaPass(PR); + initializeNVPTXLowerAggrCopiesPass(PR); } static std::string computeDataLayout(bool is64Bit) { @@ -81,15 +87,19 @@ return Ret; } -NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, StringRef TT, +NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL, bool is64bit) - : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), - TLOF(make_unique<NVPTXTargetObjectFile>()), - DL(computeDataLayout(is64bit)), - Subtarget(TT, CPU, FS, *this, is64bit) { + : LLVMTargetMachine(T, computeDataLayout(is64bit), TT, CPU, FS, Options, RM, + CM, OL), + is64bit(is64bit), TLOF(make_unique<NVPTXTargetObjectFile>()), + Subtarget(TT, CPU, FS, *this) { + if (TT.getOS() == Triple::NVCL) + drvInterface = NVPTX::NVCL; + else + drvInterface = NVPTX::CUDA; initAsmInfo(); } @@ -97,18 +107,20 @@ void NVPTXTargetMachine32::anchor() {} -NVPTXTargetMachine32::NVPTXTargetMachine32( - const Target &T, StringRef TT, StringRef CPU, StringRef FS, - const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL) +NVPTXTargetMachine32::NVPTXTargetMachine32(const Target &T, const Triple &TT, + StringRef CPU, StringRef FS, + const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {} void NVPTXTargetMachine64::anchor() {} -NVPTXTargetMachine64::NVPTXTargetMachine64( - const Target &T, StringRef TT, StringRef CPU, StringRef FS, - const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL) +NVPTXTargetMachine64::NVPTXTargetMachine64(const Target &T, const Triple &TT, + StringRef CPU, StringRef FS, + const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {} namespace { @@ -129,6 +141,10 @@ FunctionPass *createTargetRegisterAllocator(bool) override; void addFastRegAlloc(FunctionPass *RegAllocPass) override; void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override; + +private: + // if the opt level is aggressive, add GVN; otherwise, add EarlyCSE. + void addEarlyCSEOrGVNPass(); }; } // end anonymous namespace @@ -138,8 +154,16 @@ } TargetIRAnalysis NVPTXTargetMachine::getTargetIRAnalysis() { - return TargetIRAnalysis( - [this](Function &) { return TargetTransformInfo(NVPTXTTIImpl(this)); }); + return TargetIRAnalysis([this](const Function &F) { + return TargetTransformInfo(NVPTXTTIImpl(this, F)); + }); +} + +void NVPTXPassConfig::addEarlyCSEOrGVNPass() { + if (getOptLevel() == CodeGenOpt::Aggressive) + addPass(createGVNPass()); + else + addPass(createEarlyCSEPass()); } void NVPTXPassConfig::addIRPasses() { @@ -150,38 +174,60 @@ // NVPTXPrologEpilog pass (see NVPTXPrologEpilogPass.cpp). disablePass(&PrologEpilogCodeInserterID); disablePass(&MachineCopyPropagationID); - disablePass(&BranchFolderPassID); disablePass(&TailDuplicateID); + addPass(createNVVMReflectPass()); addPass(createNVPTXImageOptimizerPass()); - TargetPassConfig::addIRPasses(); addPass(createNVPTXAssignValidGlobalNamesPass()); addPass(createGenericToNVVMPass()); + + // === Propagate special address spaces === + addPass(createNVPTXLowerKernelArgsPass(&getNVPTXTargetMachine())); + // NVPTXLowerKernelArgs emits alloca for byval parameters which can often + // be eliminated by SROA. + addPass(createSROAPass()); + addPass(createNVPTXLowerAllocaPass()); addPass(createNVPTXFavorNonGenericAddrSpacesPass()); - addPass(createStraightLineStrengthReducePass()); + // FavorNonGenericAddrSpaces shortcuts unnecessary addrspacecasts, and leave + // them unused. We could remove dead code in an ad-hoc manner, but that + // requires manual work and might be error-prone. + addPass(createDeadCodeEliminationPass()); + + // === Straight-line scalar optimizations === addPass(createSeparateConstOffsetFromGEPPass()); - // The SeparateConstOffsetFromGEP pass creates variadic bases that can be used - // by multiple GEPs. Run GVN or EarlyCSE to really reuse them. GVN generates - // significantly better code than EarlyCSE for some of our benchmarks. - if (getOptLevel() == CodeGenOpt::Aggressive) - addPass(createGVNPass()); - else - addPass(createEarlyCSEPass()); - // Both FavorNonGenericAddrSpaces and SeparateConstOffsetFromGEP may leave - // some dead code. We could remove dead code in an ad-hoc manner, but that - // requires manual work and might be error-prone. + addPass(createSpeculativeExecutionPass()); + // ReassociateGEPs exposes more opportunites for SLSR. See + // the example in reassociate-geps-and-slsr.ll. + addPass(createStraightLineStrengthReducePass()); + // SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or + // EarlyCSE can reuse. GVN generates significantly better code than EarlyCSE + // for some of our benchmarks. + addEarlyCSEOrGVNPass(); + // Run NaryReassociate after EarlyCSE/GVN to be more effective. + addPass(createNaryReassociatePass()); + // NaryReassociate on GEPs creates redundant common expressions, so run + // EarlyCSE after it. + addPass(createEarlyCSEPass()); + + // === LSR and other generic IR passes === + TargetPassConfig::addIRPasses(); + // EarlyCSE is not always strong enough to clean up what LSR produces. For + // example, GVN can combine // - // The FavorNonGenericAddrSpaces pass shortcuts unnecessary addrspacecasts, - // and leave them unused. + // %0 = add %a, %b + // %1 = add %b, %a + // + // and // - // SeparateConstOffsetFromGEP rebuilds a new index from the old index, and the - // old index and some of its intermediate results may become unused. - addPass(createDeadCodeEliminationPass()); + // %0 = shl nsw %a, 2 + // %1 = shl %a, 2 + // + // but EarlyCSE can do neither of them. + addEarlyCSEOrGVNPass(); } bool NVPTXPassConfig::addInstSelector() { - const NVPTXSubtarget &ST = - getTM<NVPTXTargetMachine>().getSubtarget<NVPTXSubtarget>(); + const NVPTXSubtarget &ST = *getTM<NVPTXTargetMachine>().getSubtargetImpl(); addPass(createLowerAggrCopies()); addPass(createAllocaHoisting()); @@ -195,6 +241,10 @@ void NVPTXPassConfig::addPostRegAlloc() { addPass(createNVPTXPrologEpilogPass(), false); + // NVPTXPrologEpilogPass calculates frame object offset and replace frame + // index with VRFrame register. NVPTXPeephole need to be run after that and + // will replace VRFrame with VRFrameLocal when possible. + addPass(createNVPTXPeephole()); } FunctionPass *NVPTXPassConfig::createTargetRegisterAllocator(bool) {