Mercurial > hg > CbC > CbC_llvm
comparison lib/Passes/PassBuilder.cpp @ 134:3a76565eade5 LLVM5.0.1
update 5.0.1
author | mir3636 |
---|---|
date | Sat, 17 Feb 2018 09:57:20 +0900 |
parents | 803732b1fca8 |
children | c2174574ed3a |
comparison
equal
deleted
inserted
replaced
133:c60214abe0e8 | 134:3a76565eade5 |
---|---|
20 #include "llvm/Analysis/AliasAnalysis.h" | 20 #include "llvm/Analysis/AliasAnalysis.h" |
21 #include "llvm/Analysis/AliasAnalysisEvaluator.h" | 21 #include "llvm/Analysis/AliasAnalysisEvaluator.h" |
22 #include "llvm/Analysis/AssumptionCache.h" | 22 #include "llvm/Analysis/AssumptionCache.h" |
23 #include "llvm/Analysis/BasicAliasAnalysis.h" | 23 #include "llvm/Analysis/BasicAliasAnalysis.h" |
24 #include "llvm/Analysis/BlockFrequencyInfo.h" | 24 #include "llvm/Analysis/BlockFrequencyInfo.h" |
25 #include "llvm/Analysis/BlockFrequencyInfoImpl.h" | |
26 #include "llvm/Analysis/BranchProbabilityInfo.h" | 25 #include "llvm/Analysis/BranchProbabilityInfo.h" |
27 #include "llvm/Analysis/CFGPrinter.h" | 26 #include "llvm/Analysis/CFGPrinter.h" |
28 #include "llvm/Analysis/CFLAndersAliasAnalysis.h" | 27 #include "llvm/Analysis/CFLAndersAliasAnalysis.h" |
29 #include "llvm/Analysis/CFLSteensAliasAnalysis.h" | 28 #include "llvm/Analysis/CFLSteensAliasAnalysis.h" |
30 #include "llvm/Analysis/CGSCCPassManager.h" | 29 #include "llvm/Analysis/CGSCCPassManager.h" |
58 #include "llvm/IR/PassManager.h" | 57 #include "llvm/IR/PassManager.h" |
59 #include "llvm/IR/Verifier.h" | 58 #include "llvm/IR/Verifier.h" |
60 #include "llvm/Support/Debug.h" | 59 #include "llvm/Support/Debug.h" |
61 #include "llvm/Support/Regex.h" | 60 #include "llvm/Support/Regex.h" |
62 #include "llvm/Target/TargetMachine.h" | 61 #include "llvm/Target/TargetMachine.h" |
62 #include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h" | |
63 #include "llvm/Transforms/GCOVProfiler.h" | 63 #include "llvm/Transforms/GCOVProfiler.h" |
64 #include "llvm/Transforms/IPO/AlwaysInliner.h" | 64 #include "llvm/Transforms/IPO/AlwaysInliner.h" |
65 #include "llvm/Transforms/IPO/ArgumentPromotion.h" | 65 #include "llvm/Transforms/IPO/ArgumentPromotion.h" |
66 #include "llvm/Transforms/IPO/CalledValuePropagation.h" | 66 #include "llvm/Transforms/IPO/CalledValuePropagation.h" |
67 #include "llvm/Transforms/IPO/ConstantMerge.h" | 67 #include "llvm/Transforms/IPO/ConstantMerge.h" |
79 #include "llvm/Transforms/IPO/Internalize.h" | 79 #include "llvm/Transforms/IPO/Internalize.h" |
80 #include "llvm/Transforms/IPO/LowerTypeTests.h" | 80 #include "llvm/Transforms/IPO/LowerTypeTests.h" |
81 #include "llvm/Transforms/IPO/PartialInlining.h" | 81 #include "llvm/Transforms/IPO/PartialInlining.h" |
82 #include "llvm/Transforms/IPO/SCCP.h" | 82 #include "llvm/Transforms/IPO/SCCP.h" |
83 #include "llvm/Transforms/IPO/StripDeadPrototypes.h" | 83 #include "llvm/Transforms/IPO/StripDeadPrototypes.h" |
84 #include "llvm/Transforms/IPO/SyntheticCountsPropagation.h" | |
84 #include "llvm/Transforms/IPO/WholeProgramDevirt.h" | 85 #include "llvm/Transforms/IPO/WholeProgramDevirt.h" |
85 #include "llvm/Transforms/InstCombine/InstCombine.h" | 86 #include "llvm/Transforms/InstCombine/InstCombine.h" |
86 #include "llvm/Transforms/InstrProfiling.h" | 87 #include "llvm/Transforms/InstrProfiling.h" |
88 #include "llvm/Transforms/Instrumentation/BoundsChecking.h" | |
87 #include "llvm/Transforms/PGOInstrumentation.h" | 89 #include "llvm/Transforms/PGOInstrumentation.h" |
88 #include "llvm/Transforms/SampleProfile.h" | 90 #include "llvm/Transforms/SampleProfile.h" |
89 #include "llvm/Transforms/Scalar/ADCE.h" | 91 #include "llvm/Transforms/Scalar/ADCE.h" |
90 #include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h" | 92 #include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h" |
91 #include "llvm/Transforms/Scalar/BDCE.h" | 93 #include "llvm/Transforms/Scalar/BDCE.h" |
94 #include "llvm/Transforms/Scalar/CallSiteSplitting.h" | |
92 #include "llvm/Transforms/Scalar/ConstantHoisting.h" | 95 #include "llvm/Transforms/Scalar/ConstantHoisting.h" |
93 #include "llvm/Transforms/Scalar/CorrelatedValuePropagation.h" | 96 #include "llvm/Transforms/Scalar/CorrelatedValuePropagation.h" |
94 #include "llvm/Transforms/Scalar/DCE.h" | 97 #include "llvm/Transforms/Scalar/DCE.h" |
95 #include "llvm/Transforms/Scalar/DeadStoreElimination.h" | 98 #include "llvm/Transforms/Scalar/DeadStoreElimination.h" |
96 #include "llvm/Transforms/Scalar/DivRemPairs.h" | 99 #include "llvm/Transforms/Scalar/DivRemPairs.h" |
123 #include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h" | 126 #include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h" |
124 #include "llvm/Transforms/Scalar/NaryReassociate.h" | 127 #include "llvm/Transforms/Scalar/NaryReassociate.h" |
125 #include "llvm/Transforms/Scalar/NewGVN.h" | 128 #include "llvm/Transforms/Scalar/NewGVN.h" |
126 #include "llvm/Transforms/Scalar/PartiallyInlineLibCalls.h" | 129 #include "llvm/Transforms/Scalar/PartiallyInlineLibCalls.h" |
127 #include "llvm/Transforms/Scalar/Reassociate.h" | 130 #include "llvm/Transforms/Scalar/Reassociate.h" |
131 #include "llvm/Transforms/Scalar/RewriteStatepointsForGC.h" | |
128 #include "llvm/Transforms/Scalar/SCCP.h" | 132 #include "llvm/Transforms/Scalar/SCCP.h" |
129 #include "llvm/Transforms/Scalar/SROA.h" | 133 #include "llvm/Transforms/Scalar/SROA.h" |
130 #include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h" | 134 #include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h" |
131 #include "llvm/Transforms/Scalar/SimplifyCFG.h" | 135 #include "llvm/Transforms/Scalar/SimplifyCFG.h" |
132 #include "llvm/Transforms/Scalar/Sink.h" | 136 #include "llvm/Transforms/Scalar/Sink.h" |
137 #include "llvm/Transforms/Scalar/SpeculateAroundPHIs.h" | |
133 #include "llvm/Transforms/Scalar/SpeculativeExecution.h" | 138 #include "llvm/Transforms/Scalar/SpeculativeExecution.h" |
134 #include "llvm/Transforms/Scalar/TailRecursionElimination.h" | 139 #include "llvm/Transforms/Scalar/TailRecursionElimination.h" |
135 #include "llvm/Transforms/Utils/AddDiscriminators.h" | 140 #include "llvm/Transforms/Utils/AddDiscriminators.h" |
136 #include "llvm/Transforms/Utils/BreakCriticalEdges.h" | 141 #include "llvm/Transforms/Utils/BreakCriticalEdges.h" |
142 #include "llvm/Transforms/Utils/EntryExitInstrumenter.h" | |
137 #include "llvm/Transforms/Utils/LCSSA.h" | 143 #include "llvm/Transforms/Utils/LCSSA.h" |
138 #include "llvm/Transforms/Utils/LibCallsShrinkWrap.h" | 144 #include "llvm/Transforms/Utils/LibCallsShrinkWrap.h" |
139 #include "llvm/Transforms/Utils/LoopSimplify.h" | 145 #include "llvm/Transforms/Utils/LoopSimplify.h" |
140 #include "llvm/Transforms/Utils/LowerInvoke.h" | 146 #include "llvm/Transforms/Utils/LowerInvoke.h" |
141 #include "llvm/Transforms/Utils/Mem2Reg.h" | 147 #include "llvm/Transforms/Utils/Mem2Reg.h" |
142 #include "llvm/Transforms/Utils/NameAnonGlobals.h" | 148 #include "llvm/Transforms/Utils/NameAnonGlobals.h" |
143 #include "llvm/Transforms/Utils/PredicateInfo.h" | |
144 #include "llvm/Transforms/Utils/SimplifyInstructions.h" | 149 #include "llvm/Transforms/Utils/SimplifyInstructions.h" |
145 #include "llvm/Transforms/Utils/SymbolRewriter.h" | 150 #include "llvm/Transforms/Utils/SymbolRewriter.h" |
146 #include "llvm/Transforms/Vectorize/LoopVectorize.h" | 151 #include "llvm/Transforms/Vectorize/LoopVectorize.h" |
147 #include "llvm/Transforms/Vectorize/SLPVectorizer.h" | 152 #include "llvm/Transforms/Vectorize/SLPVectorizer.h" |
148 | |
149 #include <type_traits> | |
150 | 153 |
151 using namespace llvm; | 154 using namespace llvm; |
152 | 155 |
153 static cl::opt<unsigned> MaxDevirtIterations("pm-max-devirt-iterations", | 156 static cl::opt<unsigned> MaxDevirtIterations("pm-max-devirt-iterations", |
154 cl::ReallyHidden, cl::init(4)); | 157 cl::ReallyHidden, cl::init(4)); |
171 cl::desc("Enable the GVN hoisting pass for the new PM (default = off)")); | 174 cl::desc("Enable the GVN hoisting pass for the new PM (default = off)")); |
172 | 175 |
173 static cl::opt<bool> EnableGVNSink( | 176 static cl::opt<bool> EnableGVNSink( |
174 "enable-npm-gvn-sink", cl::init(false), cl::Hidden, | 177 "enable-npm-gvn-sink", cl::init(false), cl::Hidden, |
175 cl::desc("Enable the GVN hoisting pass for the new PM (default = off)")); | 178 cl::desc("Enable the GVN hoisting pass for the new PM (default = off)")); |
179 | |
180 static cl::opt<bool> EnableSyntheticCounts( | |
181 "enable-npm-synthetic-counts", cl::init(false), cl::Hidden, cl::ZeroOrMore, | |
182 cl::desc("Run synthetic function entry count generation " | |
183 "pass")); | |
176 | 184 |
177 static Regex DefaultAliasRegex( | 185 static Regex DefaultAliasRegex( |
178 "^(default|thinlto-pre-link|thinlto|lto-pre-link|lto)<(O[0123sz])>$"); | 186 "^(default|thinlto-pre-link|thinlto|lto-pre-link|lto)<(O[0123sz])>$"); |
179 | 187 |
180 static bool isOptimizingForSize(PassBuilder::OptimizationLevel Level) { | 188 static bool isOptimizingForSize(PassBuilder::OptimizationLevel Level) { |
354 | 362 |
355 // Optimize based on known information about branches, and cleanup afterward. | 363 // Optimize based on known information about branches, and cleanup afterward. |
356 FPM.addPass(JumpThreadingPass()); | 364 FPM.addPass(JumpThreadingPass()); |
357 FPM.addPass(CorrelatedValuePropagationPass()); | 365 FPM.addPass(CorrelatedValuePropagationPass()); |
358 FPM.addPass(SimplifyCFGPass()); | 366 FPM.addPass(SimplifyCFGPass()); |
367 if (Level == O3) | |
368 FPM.addPass(AggressiveInstCombinePass()); | |
359 FPM.addPass(InstCombinePass()); | 369 FPM.addPass(InstCombinePass()); |
360 | 370 |
361 if (!isOptimizingForSize(Level)) | 371 if (!isOptimizingForSize(Level)) |
362 FPM.addPass(LibCallsShrinkWrapPass()); | 372 FPM.addPass(LibCallsShrinkWrapPass()); |
363 | 373 |
408 C(LPM2, Level); | 418 C(LPM2, Level); |
409 | 419 |
410 // We provide the opt remark emitter pass for LICM to use. We only need to do | 420 // We provide the opt remark emitter pass for LICM to use. We only need to do |
411 // this once as it is immutable. | 421 // this once as it is immutable. |
412 FPM.addPass(RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>()); | 422 FPM.addPass(RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>()); |
413 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1))); | 423 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1), DebugLogging)); |
414 FPM.addPass(SimplifyCFGPass()); | 424 FPM.addPass(SimplifyCFGPass()); |
415 FPM.addPass(InstCombinePass()); | 425 FPM.addPass(InstCombinePass()); |
416 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2))); | 426 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2), DebugLogging)); |
417 | 427 |
418 // Eliminate redundancies. | 428 // Eliminate redundancies. |
419 if (Level != O1) { | 429 if (Level != O1) { |
420 // These passes add substantial compile time so skip them at O1. | 430 // These passes add substantial compile time so skip them at O1. |
421 FPM.addPass(MergedLoadStoreMotionPass()); | 431 FPM.addPass(MergedLoadStoreMotionPass()); |
446 // Re-consider control flow based optimizations after redundancy elimination, | 456 // Re-consider control flow based optimizations after redundancy elimination, |
447 // redo DCE, etc. | 457 // redo DCE, etc. |
448 FPM.addPass(JumpThreadingPass()); | 458 FPM.addPass(JumpThreadingPass()); |
449 FPM.addPass(CorrelatedValuePropagationPass()); | 459 FPM.addPass(CorrelatedValuePropagationPass()); |
450 FPM.addPass(DSEPass()); | 460 FPM.addPass(DSEPass()); |
451 FPM.addPass(createFunctionToLoopPassAdaptor(LICMPass())); | 461 FPM.addPass(createFunctionToLoopPassAdaptor(LICMPass(), DebugLogging)); |
452 | 462 |
453 for (auto &C : ScalarOptimizerLateEPCallbacks) | 463 for (auto &C : ScalarOptimizerLateEPCallbacks) |
454 C(FPM, Level); | 464 C(FPM, Level); |
455 | 465 |
456 // Finally, do an expensive DCE pass to catch all the dead code exposed by | 466 // Finally, do an expensive DCE pass to catch all the dead code exposed by |
506 | 516 |
507 if (RunProfileGen) { | 517 if (RunProfileGen) { |
508 MPM.addPass(PGOInstrumentationGen()); | 518 MPM.addPass(PGOInstrumentationGen()); |
509 | 519 |
510 FunctionPassManager FPM; | 520 FunctionPassManager FPM; |
511 FPM.addPass(createFunctionToLoopPassAdaptor(LoopRotatePass())); | 521 FPM.addPass( |
522 createFunctionToLoopPassAdaptor(LoopRotatePass(), DebugLogging)); | |
512 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); | 523 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); |
513 | 524 |
514 // Add the profile lowering pass. | 525 // Add the profile lowering pass. |
515 InstrProfOptions Options; | 526 InstrProfOptions Options; |
516 if (!ProfileGenFile.empty()) | 527 if (!ProfileGenFile.empty()) |
546 FunctionPassManager EarlyFPM(DebugLogging); | 557 FunctionPassManager EarlyFPM(DebugLogging); |
547 EarlyFPM.addPass(SimplifyCFGPass()); | 558 EarlyFPM.addPass(SimplifyCFGPass()); |
548 EarlyFPM.addPass(SROA()); | 559 EarlyFPM.addPass(SROA()); |
549 EarlyFPM.addPass(EarlyCSEPass()); | 560 EarlyFPM.addPass(EarlyCSEPass()); |
550 EarlyFPM.addPass(LowerExpectIntrinsicPass()); | 561 EarlyFPM.addPass(LowerExpectIntrinsicPass()); |
562 if (Level == O3) | |
563 EarlyFPM.addPass(CallSiteSplittingPass()); | |
564 | |
551 // In SamplePGO ThinLTO backend, we need instcombine before profile annotation | 565 // In SamplePGO ThinLTO backend, we need instcombine before profile annotation |
552 // to convert bitcast to direct calls so that they can be inlined during the | 566 // to convert bitcast to direct calls so that they can be inlined during the |
553 // profile annotation prepration step. | 567 // profile annotation prepration step. |
554 // More details about SamplePGO design can be found in: | 568 // More details about SamplePGO design can be found in: |
555 // https://research.google.com/pubs/pub45290.html | 569 // https://research.google.com/pubs/pub45290.html |
573 // removed. | 587 // removed. |
574 MPM.addPass(PGOIndirectCallPromotion(Phase == ThinLTOPhase::PostLink, | 588 MPM.addPass(PGOIndirectCallPromotion(Phase == ThinLTOPhase::PostLink, |
575 true)); | 589 true)); |
576 } | 590 } |
577 | 591 |
578 // Interprocedural constant propagation now that basic cleanup has occured | 592 // Interprocedural constant propagation now that basic cleanup has occurred |
579 // and prior to optimizing globals. | 593 // and prior to optimizing globals. |
580 // FIXME: This position in the pipeline hasn't been carefully considered in | 594 // FIXME: This position in the pipeline hasn't been carefully considered in |
581 // years, it should be re-analyzed. | 595 // years, it should be re-analyzed. |
582 MPM.addPass(IPSCCPPass()); | 596 MPM.addPass(IPSCCPPass()); |
583 | 597 |
613 (!PGOOpt->ProfileGenFile.empty() || !PGOOpt->ProfileUseFile.empty())) { | 627 (!PGOOpt->ProfileGenFile.empty() || !PGOOpt->ProfileUseFile.empty())) { |
614 addPGOInstrPasses(MPM, DebugLogging, Level, PGOOpt->RunProfileGen, | 628 addPGOInstrPasses(MPM, DebugLogging, Level, PGOOpt->RunProfileGen, |
615 PGOOpt->ProfileGenFile, PGOOpt->ProfileUseFile); | 629 PGOOpt->ProfileGenFile, PGOOpt->ProfileUseFile); |
616 MPM.addPass(PGOIndirectCallPromotion(false, false)); | 630 MPM.addPass(PGOIndirectCallPromotion(false, false)); |
617 } | 631 } |
632 | |
633 // Synthesize function entry counts for non-PGO compilation. | |
634 if (EnableSyntheticCounts && !PGOOpt) | |
635 MPM.addPass(SyntheticCountsPropagation()); | |
618 | 636 |
619 // Require the GlobalsAA analysis for the module so we can query it within | 637 // Require the GlobalsAA analysis for the module so we can query it within |
620 // the CGSCC pipeline. | 638 // the CGSCC pipeline. |
621 MPM.addPass(RequireAnalysisPass<GlobalsAA, Module>()); | 639 MPM.addPass(RequireAnalysisPass<GlobalsAA, Module>()); |
622 | 640 |
723 | 741 |
724 for (auto &C : VectorizerStartEPCallbacks) | 742 for (auto &C : VectorizerStartEPCallbacks) |
725 C(OptimizePM, Level); | 743 C(OptimizePM, Level); |
726 | 744 |
727 // First rotate loops that may have been un-rotated by prior passes. | 745 // First rotate loops that may have been un-rotated by prior passes. |
728 OptimizePM.addPass(createFunctionToLoopPassAdaptor(LoopRotatePass())); | 746 OptimizePM.addPass( |
747 createFunctionToLoopPassAdaptor(LoopRotatePass(), DebugLogging)); | |
729 | 748 |
730 // Distribute loops to allow partial vectorization. I.e. isolate dependences | 749 // Distribute loops to allow partial vectorization. I.e. isolate dependences |
731 // into separate loop that would otherwise inhibit vectorization. This is | 750 // into separate loop that would otherwise inhibit vectorization. This is |
732 // currently only performed for loops marked with the metadata | 751 // currently only performed for loops marked with the metadata |
733 // llvm.loop.distribute=true or when -enable-loop-distribute is specified. | 752 // llvm.loop.distribute=true or when -enable-loop-distribute is specified. |
741 OptimizePM.addPass(LoopLoadEliminationPass()); | 760 OptimizePM.addPass(LoopLoadEliminationPass()); |
742 | 761 |
743 // Cleanup after the loop optimization passes. | 762 // Cleanup after the loop optimization passes. |
744 OptimizePM.addPass(InstCombinePass()); | 763 OptimizePM.addPass(InstCombinePass()); |
745 | 764 |
746 | |
747 // Now that we've formed fast to execute loop structures, we do further | 765 // Now that we've formed fast to execute loop structures, we do further |
748 // optimizations. These are run afterward as they might block doing complex | 766 // optimizations. These are run afterward as they might block doing complex |
749 // analyses and transforms such as what are needed for loop vectorization. | 767 // analyses and transforms such as what are needed for loop vectorization. |
750 | 768 |
769 // Cleanup after loop vectorization, etc. Simplification passes like CVP and | |
770 // GVN, loop transforms, and others have already run, so it's now better to | |
771 // convert to more optimized IR using more aggressive simplify CFG options. | |
772 // The extra sinking transform can create larger basic blocks, so do this | |
773 // before SLP vectorization. | |
774 OptimizePM.addPass(SimplifyCFGPass(SimplifyCFGOptions(). | |
775 forwardSwitchCondToPhi(true). | |
776 convertSwitchToLookupTable(true). | |
777 needCanonicalLoops(false). | |
778 sinkCommonInsts(true))); | |
779 | |
751 // Optimize parallel scalar instruction chains into SIMD instructions. | 780 // Optimize parallel scalar instruction chains into SIMD instructions. |
752 OptimizePM.addPass(SLPVectorizerPass()); | 781 OptimizePM.addPass(SLPVectorizerPass()); |
753 | 782 |
754 // Cleanup after all of the vectorizers. | |
755 OptimizePM.addPass(SimplifyCFGPass()); | |
756 OptimizePM.addPass(InstCombinePass()); | 783 OptimizePM.addPass(InstCombinePass()); |
757 | 784 |
758 // Unroll small loops to hide loop backedge latency and saturate any parallel | 785 // Unroll small loops to hide loop backedge latency and saturate any parallel |
759 // execution resources of an out-of-order processor. We also then need to | 786 // execution resources of an out-of-order processor. We also then need to |
760 // clean up redundancies and loop invariant code. | 787 // clean up redundancies and loop invariant code. |
762 // combiner for cleanup here so that the unrolling and LICM can be pipelined | 789 // combiner for cleanup here so that the unrolling and LICM can be pipelined |
763 // across the loop nests. | 790 // across the loop nests. |
764 OptimizePM.addPass(LoopUnrollPass(Level)); | 791 OptimizePM.addPass(LoopUnrollPass(Level)); |
765 OptimizePM.addPass(InstCombinePass()); | 792 OptimizePM.addPass(InstCombinePass()); |
766 OptimizePM.addPass(RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>()); | 793 OptimizePM.addPass(RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>()); |
767 OptimizePM.addPass(createFunctionToLoopPassAdaptor(LICMPass())); | 794 OptimizePM.addPass(createFunctionToLoopPassAdaptor(LICMPass(), DebugLogging)); |
768 | 795 |
769 // Now that we've vectorized and unrolled loops, we may have more refined | 796 // Now that we've vectorized and unrolled loops, we may have more refined |
770 // alignment information, try to re-derive it here. | 797 // alignment information, try to re-derive it here. |
771 OptimizePM.addPass(AlignmentFromAssumptionsPass()); | 798 OptimizePM.addPass(AlignmentFromAssumptionsPass()); |
772 | 799 |
785 OptimizePM.addPass(DivRemPairsPass()); | 812 OptimizePM.addPass(DivRemPairsPass()); |
786 | 813 |
787 // LoopSink (and other loop passes since the last simplifyCFG) might have | 814 // LoopSink (and other loop passes since the last simplifyCFG) might have |
788 // resulted in single-entry-single-exit or empty blocks. Clean up the CFG. | 815 // resulted in single-entry-single-exit or empty blocks. Clean up the CFG. |
789 OptimizePM.addPass(SimplifyCFGPass()); | 816 OptimizePM.addPass(SimplifyCFGPass()); |
817 | |
818 // Optimize PHIs by speculating around them when profitable. Note that this | |
819 // pass needs to be run after any PRE or similar pass as it is essentially | |
820 // inserting redudnancies into the progrem. This even includes SimplifyCFG. | |
821 OptimizePM.addPass(SpeculateAroundPHIsPass()); | |
790 | 822 |
791 // Add the core optimizing pipeline. | 823 // Add the core optimizing pipeline. |
792 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM))); | 824 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM))); |
793 | 825 |
794 // Now we need to do some global optimization transforms. | 826 // Now we need to do some global optimization transforms. |
809 ModulePassManager MPM(DebugLogging); | 841 ModulePassManager MPM(DebugLogging); |
810 | 842 |
811 // Force any function attributes we want the rest of the pipeline to observe. | 843 // Force any function attributes we want the rest of the pipeline to observe. |
812 MPM.addPass(ForceFunctionAttrsPass()); | 844 MPM.addPass(ForceFunctionAttrsPass()); |
813 | 845 |
846 // Apply module pipeline start EP callback. | |
847 for (auto &C : PipelineStartEPCallbacks) | |
848 C(MPM); | |
849 | |
814 if (PGOOpt && PGOOpt->SamplePGOSupport) | 850 if (PGOOpt && PGOOpt->SamplePGOSupport) |
815 MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass())); | 851 MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass())); |
816 | 852 |
817 // Add the core simplification pipeline. | 853 // Add the core simplification pipeline. |
818 MPM.addPass(buildModuleSimplificationPipeline(Level, ThinLTOPhase::None, | 854 MPM.addPass(buildModuleSimplificationPipeline(Level, ThinLTOPhase::None, |
834 // Force any function attributes we want the rest of the pipeline to observe. | 870 // Force any function attributes we want the rest of the pipeline to observe. |
835 MPM.addPass(ForceFunctionAttrsPass()); | 871 MPM.addPass(ForceFunctionAttrsPass()); |
836 | 872 |
837 if (PGOOpt && PGOOpt->SamplePGOSupport) | 873 if (PGOOpt && PGOOpt->SamplePGOSupport) |
838 MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass())); | 874 MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass())); |
875 | |
876 // Apply module pipeline start EP callback. | |
877 for (auto &C : PipelineStartEPCallbacks) | |
878 C(MPM); | |
839 | 879 |
840 // If we are planning to perform ThinLTO later, we don't bloat the code with | 880 // If we are planning to perform ThinLTO later, we don't bloat the code with |
841 // unrolling/vectorization/... now. Just simplify the module as much as we | 881 // unrolling/vectorization/... now. Just simplify the module as much as we |
842 // can. | 882 // can. |
843 MPM.addPass(buildModuleSimplificationPipeline(Level, ThinLTOPhase::PreLink, | 883 MPM.addPass(buildModuleSimplificationPipeline(Level, ThinLTOPhase::PreLink, |
913 // Do basic inference of function attributes from known properties of system | 953 // Do basic inference of function attributes from known properties of system |
914 // libraries and other oracles. | 954 // libraries and other oracles. |
915 MPM.addPass(InferFunctionAttrsPass()); | 955 MPM.addPass(InferFunctionAttrsPass()); |
916 | 956 |
917 if (Level > 1) { | 957 if (Level > 1) { |
958 FunctionPassManager EarlyFPM(DebugLogging); | |
959 EarlyFPM.addPass(CallSiteSplittingPass()); | |
960 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(EarlyFPM))); | |
961 | |
918 // Indirect call promotion. This should promote all the targets that are | 962 // Indirect call promotion. This should promote all the targets that are |
919 // left by the earlier promotion pass that promotes intra-module targets. | 963 // left by the earlier promotion pass that promotes intra-module targets. |
920 // This two-step promotion is to save the compile time. For LTO, it should | 964 // This two-step promotion is to save the compile time. For LTO, it should |
921 // produce the same result as if we only do promotion here. | 965 // produce the same result as if we only do promotion here. |
922 MPM.addPass(PGOIndirectCallPromotion( | 966 MPM.addPass(PGOIndirectCallPromotion( |
923 true /* InLTO */, PGOOpt && !PGOOpt->SampleProfileFile.empty())); | 967 true /* InLTO */, PGOOpt && !PGOOpt->SampleProfileFile.empty())); |
924 | |
925 // Propagate constants at call sites into the functions they call. This | 968 // Propagate constants at call sites into the functions they call. This |
926 // opens opportunities for globalopt (and inlining) by substituting function | 969 // opens opportunities for globalopt (and inlining) by substituting function |
927 // pointers passed as arguments to direct uses of functions. | 970 // pointers passed as arguments to direct uses of functions. |
928 MPM.addPass(IPSCCPPass()); | 971 MPM.addPass(IPSCCPPass()); |
929 | 972 |
968 // Reduce the code after globalopt and ipsccp. Both can open up significant | 1011 // Reduce the code after globalopt and ipsccp. Both can open up significant |
969 // simplification opportunities, and both can propagate functions through | 1012 // simplification opportunities, and both can propagate functions through |
970 // function pointers. When this happens, we often have to resolve varargs | 1013 // function pointers. When this happens, we often have to resolve varargs |
971 // calls, etc, so let instcombine do this. | 1014 // calls, etc, so let instcombine do this. |
972 FunctionPassManager PeepholeFPM(DebugLogging); | 1015 FunctionPassManager PeepholeFPM(DebugLogging); |
1016 if (Level == O3) | |
1017 PeepholeFPM.addPass(AggressiveInstCombinePass()); | |
973 PeepholeFPM.addPass(InstCombinePass()); | 1018 PeepholeFPM.addPass(InstCombinePass()); |
974 invokePeepholeEPCallbacks(PeepholeFPM, Level); | 1019 invokePeepholeEPCallbacks(PeepholeFPM, Level); |
975 | 1020 |
976 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(PeepholeFPM))); | 1021 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(PeepholeFPM))); |
977 | 1022 |
1510 LoopPassManager LPM(DebugLogging); | 1555 LoopPassManager LPM(DebugLogging); |
1511 if (!parseLoopPassPipeline(LPM, InnerPipeline, VerifyEachPass, | 1556 if (!parseLoopPassPipeline(LPM, InnerPipeline, VerifyEachPass, |
1512 DebugLogging)) | 1557 DebugLogging)) |
1513 return false; | 1558 return false; |
1514 // Add the nested pass manager with the appropriate adaptor. | 1559 // Add the nested pass manager with the appropriate adaptor. |
1515 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM))); | 1560 FPM.addPass( |
1561 createFunctionToLoopPassAdaptor(std::move(LPM), DebugLogging)); | |
1516 return true; | 1562 return true; |
1517 } | 1563 } |
1518 if (auto Count = parseRepeatPassName(Name)) { | 1564 if (auto Count = parseRepeatPassName(Name)) { |
1519 FunctionPassManager NestedFPM(DebugLogging); | 1565 FunctionPassManager NestedFPM(DebugLogging); |
1520 if (!parseFunctionPassPipeline(NestedFPM, InnerPipeline, VerifyEachPass, | 1566 if (!parseFunctionPassPipeline(NestedFPM, InnerPipeline, VerifyEachPass, |