Mercurial > hg > CbC > CbC_llvm
diff lib/Target/NVPTX/NVPTXTargetMachine.cpp @ 120:1172e4bd9c6f
update 4.0.0
author | mir3636 |
---|---|
date | Fri, 25 Nov 2016 19:14:25 +0900 |
parents | afa8332a0e37 |
children | 803732b1fca8 |
line wrap: on
line diff
--- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp Tue Jan 26 22:56:36 2016 +0900 +++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp Fri Nov 25 19:14:25 2016 +0900 @@ -20,9 +20,9 @@ #include "NVPTXTargetTransformInfo.h" #include "llvm/Analysis/Passes.h" #include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/CodeGen/MachineFunctionAnalysis.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/IRPrintingPasses.h" #include "llvm/IR/LegacyPassManager.h" @@ -44,34 +44,45 @@ #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar/GVN.h" +#include "llvm/Transforms/Vectorize.h" using namespace llvm; +// LSV is still relatively new; this switch lets us turn it off in case we +// encounter (or suspect) a bug. +static cl::opt<bool> + DisableLoadStoreVectorizer("disable-nvptx-load-store-vectorizer", + cl::desc("Disable load/store vectorizer"), + cl::init(false), cl::Hidden); + namespace llvm { +void initializeNVVMIntrRangePass(PassRegistry&); void initializeNVVMReflectPass(PassRegistry&); void initializeGenericToNVVMPass(PassRegistry&); void initializeNVPTXAllocaHoistingPass(PassRegistry &); void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&); -void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &); +void initializeNVPTXInferAddressSpacesPass(PassRegistry &); void initializeNVPTXLowerAggrCopiesPass(PassRegistry &); -void initializeNVPTXLowerKernelArgsPass(PassRegistry &); +void initializeNVPTXLowerArgsPass(PassRegistry &); void initializeNVPTXLowerAllocaPass(PassRegistry &); } extern "C" void LLVMInitializeNVPTXTarget() { // Register the target. - RegisterTargetMachine<NVPTXTargetMachine32> X(TheNVPTXTarget32); - RegisterTargetMachine<NVPTXTargetMachine64> Y(TheNVPTXTarget64); + RegisterTargetMachine<NVPTXTargetMachine32> X(getTheNVPTXTarget32()); + RegisterTargetMachine<NVPTXTargetMachine64> Y(getTheNVPTXTarget64()); // FIXME: This pass is really intended to be invoked during IR optimization, // but it's very NVPTX-specific. PassRegistry &PR = *PassRegistry::getPassRegistry(); initializeNVVMReflectPass(PR); + initializeNVVMIntrRangePass(PR); initializeGenericToNVVMPass(PR); initializeNVPTXAllocaHoistingPass(PR); initializeNVPTXAssignValidGlobalNamesPass(PR); - initializeNVPTXFavorNonGenericAddrSpacesPass(PR); - initializeNVPTXLowerKernelArgsPass(PR); + initializeNVPTXInferAddressSpacesPass(PR); + initializeNVPTXLowerArgsPass(PR); initializeNVPTXLowerAllocaPass(PR); initializeNVPTXLowerAggrCopiesPass(PR); } @@ -90,11 +101,15 @@ NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, + Optional<Reloc::Model> RM, + CodeModel::Model CM, CodeGenOpt::Level OL, bool is64bit) - : LLVMTargetMachine(T, computeDataLayout(is64bit), TT, CPU, FS, Options, RM, - CM, OL), - is64bit(is64bit), TLOF(make_unique<NVPTXTargetObjectFile>()), + // The pic relocation model is used regardless of what the client has + // specified, as it is the only relocation model currently supported. + : LLVMTargetMachine(T, computeDataLayout(is64bit), TT, CPU, FS, Options, + Reloc::PIC_, CM, OL), + is64bit(is64bit), + TLOF(make_unique<NVPTXTargetObjectFile>()), Subtarget(TT, CPU, FS, *this) { if (TT.getOS() == Triple::NVCL) drvInterface = NVPTX::NVCL; @@ -110,7 +125,8 @@ NVPTXTargetMachine32::NVPTXTargetMachine32(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, + Optional<Reloc::Model> RM, + CodeModel::Model CM, CodeGenOpt::Level OL) : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {} @@ -119,7 +135,8 @@ NVPTXTargetMachine64::NVPTXTargetMachine64(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, + Optional<Reloc::Model> RM, + CodeModel::Model CM, CodeGenOpt::Level OL) : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {} @@ -143,14 +160,25 @@ void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override; private: - // if the opt level is aggressive, add GVN; otherwise, add EarlyCSE. + // If the opt level is aggressive, add GVN; otherwise, add EarlyCSE. This + // function is only called in opt mode. void addEarlyCSEOrGVNPass(); + + // Add passes that propagate special memory spaces. + void addAddressSpaceInferencePasses(); + + // Add passes that perform straight-line scalar optimizations. + void addStraightLineScalarOptimizationPasses(); }; } // end anonymous namespace TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) { - NVPTXPassConfig *PassConfig = new NVPTXPassConfig(this, PM); - return PassConfig; + return new NVPTXPassConfig(this, PM); +} + +void NVPTXTargetMachine::addEarlyAsPossiblePasses(PassManagerBase &PM) { + PM.add(createNVVMReflectPass()); + PM.add(createNVVMIntrRangePass(Subtarget.getSmVersion())); } TargetIRAnalysis NVPTXTargetMachine::getTargetIRAnalysis() { @@ -166,34 +194,15 @@ addPass(createEarlyCSEPass()); } -void NVPTXPassConfig::addIRPasses() { - // The following passes are known to not play well with virtual regs hanging - // around after register allocation (which in our case, is *all* registers). - // We explicitly disable them here. We do, however, need some functionality - // of the PrologEpilogCodeInserter pass, so we emulate that behavior in the - // NVPTXPrologEpilog pass (see NVPTXPrologEpilogPass.cpp). - disablePass(&PrologEpilogCodeInserterID); - disablePass(&MachineCopyPropagationID); - disablePass(&TailDuplicateID); - - addPass(createNVVMReflectPass()); - addPass(createNVPTXImageOptimizerPass()); - addPass(createNVPTXAssignValidGlobalNamesPass()); - addPass(createGenericToNVVMPass()); - - // === Propagate special address spaces === - addPass(createNVPTXLowerKernelArgsPass(&getNVPTXTargetMachine())); - // NVPTXLowerKernelArgs emits alloca for byval parameters which can often +void NVPTXPassConfig::addAddressSpaceInferencePasses() { + // NVPTXLowerArgs emits alloca for byval parameters which can often // be eliminated by SROA. addPass(createSROAPass()); addPass(createNVPTXLowerAllocaPass()); - addPass(createNVPTXFavorNonGenericAddrSpacesPass()); - // FavorNonGenericAddrSpaces shortcuts unnecessary addrspacecasts, and leave - // them unused. We could remove dead code in an ad-hoc manner, but that - // requires manual work and might be error-prone. - addPass(createDeadCodeEliminationPass()); + addPass(createNVPTXInferAddressSpacesPass()); +} - // === Straight-line scalar optimizations === +void NVPTXPassConfig::addStraightLineScalarOptimizationPasses() { addPass(createSeparateConstOffsetFromGEPPass()); addPass(createSpeculativeExecutionPass()); // ReassociateGEPs exposes more opportunites for SLSR. See @@ -208,6 +217,43 @@ // NaryReassociate on GEPs creates redundant common expressions, so run // EarlyCSE after it. addPass(createEarlyCSEPass()); +} + +void NVPTXPassConfig::addIRPasses() { + // The following passes are known to not play well with virtual regs hanging + // around after register allocation (which in our case, is *all* registers). + // We explicitly disable them here. We do, however, need some functionality + // of the PrologEpilogCodeInserter pass, so we emulate that behavior in the + // NVPTXPrologEpilog pass (see NVPTXPrologEpilogPass.cpp). + disablePass(&PrologEpilogCodeInserterID); + disablePass(&MachineCopyPropagationID); + disablePass(&TailDuplicateID); + disablePass(&StackMapLivenessID); + disablePass(&LiveDebugValuesID); + disablePass(&PostRASchedulerID); + disablePass(&FuncletLayoutID); + disablePass(&PatchableFunctionID); + + // NVVMReflectPass is added in addEarlyAsPossiblePasses, so hopefully running + // it here does nothing. But since we need it for correctness when lowering + // to NVPTX, run it here too, in case whoever built our pass pipeline didn't + // call addEarlyAsPossiblePasses. + addPass(createNVVMReflectPass()); + + if (getOptLevel() != CodeGenOpt::None) + addPass(createNVPTXImageOptimizerPass()); + addPass(createNVPTXAssignValidGlobalNamesPass()); + addPass(createGenericToNVVMPass()); + + // NVPTXLowerArgs is required for correctness and should be run right + // before the address space inference passes. + addPass(createNVPTXLowerArgsPass(&getNVPTXTargetMachine())); + if (getOptLevel() != CodeGenOpt::None) { + addAddressSpaceInferencePasses(); + if (!DisableLoadStoreVectorizer) + addPass(createLoadStoreVectorizerPass()); + addStraightLineScalarOptimizationPasses(); + } // === LSR and other generic IR passes === TargetPassConfig::addIRPasses(); @@ -223,7 +269,8 @@ // %1 = shl %a, 2 // // but EarlyCSE can do neither of them. - addEarlyCSEOrGVNPass(); + if (getOptLevel() != CodeGenOpt::None) + addEarlyCSEOrGVNPass(); } bool NVPTXPassConfig::addInstSelector() { @@ -241,10 +288,12 @@ void NVPTXPassConfig::addPostRegAlloc() { addPass(createNVPTXPrologEpilogPass(), false); - // NVPTXPrologEpilogPass calculates frame object offset and replace frame - // index with VRFrame register. NVPTXPeephole need to be run after that and - // will replace VRFrame with VRFrameLocal when possible. - addPass(createNVPTXPeephole()); + if (getOptLevel() != CodeGenOpt::None) { + // NVPTXPrologEpilogPass calculates frame object offset and replace frame + // index with VRFrame register. NVPTXPeephole need to be run after that and + // will replace VRFrame with VRFrameLocal when possible. + addPass(createNVPTXPeephole()); + } } FunctionPass *NVPTXPassConfig::createTargetRegisterAllocator(bool) {