120
|
1 //===-LTOBackend.cpp - LLVM Link Time Optimizer Backend -------------------===//
|
|
2 //
|
|
3 // The LLVM Compiler Infrastructure
|
|
4 //
|
|
5 // This file is distributed under the University of Illinois Open Source
|
|
6 // License. See LICENSE.TXT for details.
|
|
7 //
|
|
8 //===----------------------------------------------------------------------===//
|
|
9 //
|
|
10 // This file implements the "backend" phase of LTO, i.e. it performs
|
|
11 // optimization and code generation on a loaded module. It is generally used
|
|
12 // internally by the LTO class but can also be used independently, for example
|
|
13 // to implement a standalone ThinLTO backend.
|
|
14 //
|
|
15 //===----------------------------------------------------------------------===//
|
|
16
|
|
17 #include "llvm/LTO/LTOBackend.h"
|
|
18 #include "llvm/Analysis/AliasAnalysis.h"
|
|
19 #include "llvm/Analysis/CGSCCPassManager.h"
|
|
20 #include "llvm/Analysis/LoopPassManager.h"
|
|
21 #include "llvm/Analysis/TargetLibraryInfo.h"
|
|
22 #include "llvm/Analysis/TargetTransformInfo.h"
|
|
23 #include "llvm/Bitcode/BitcodeReader.h"
|
|
24 #include "llvm/Bitcode/BitcodeWriter.h"
|
|
25 #include "llvm/IR/LegacyPassManager.h"
|
|
26 #include "llvm/IR/PassManager.h"
|
|
27 #include "llvm/IR/Verifier.h"
|
|
28 #include "llvm/LTO/LTO.h"
|
|
29 #include "llvm/LTO/legacy/UpdateCompilerUsed.h"
|
|
30 #include "llvm/MC/SubtargetFeature.h"
|
|
31 #include "llvm/Passes/PassBuilder.h"
|
|
32 #include "llvm/Support/Error.h"
|
|
33 #include "llvm/Support/FileSystem.h"
|
|
34 #include "llvm/Support/TargetRegistry.h"
|
|
35 #include "llvm/Support/ThreadPool.h"
|
|
36 #include "llvm/Target/TargetMachine.h"
|
|
37 #include "llvm/Transforms/IPO.h"
|
|
38 #include "llvm/Transforms/IPO/PassManagerBuilder.h"
|
|
39 #include "llvm/Transforms/Utils/FunctionImportUtils.h"
|
|
40 #include "llvm/Transforms/Utils/SplitModule.h"
|
|
41
|
|
42 using namespace llvm;
|
|
43 using namespace lto;
|
|
44
|
|
45 LLVM_ATTRIBUTE_NORETURN static void reportOpenError(StringRef Path, Twine Msg) {
|
|
46 errs() << "failed to open " << Path << ": " << Msg << '\n';
|
|
47 errs().flush();
|
|
48 exit(1);
|
|
49 }
|
|
50
|
|
51 Error Config::addSaveTemps(std::string OutputFileName,
|
|
52 bool UseInputModulePath) {
|
|
53 ShouldDiscardValueNames = false;
|
|
54
|
|
55 std::error_code EC;
|
|
56 ResolutionFile = llvm::make_unique<raw_fd_ostream>(
|
|
57 OutputFileName + "resolution.txt", EC, sys::fs::OpenFlags::F_Text);
|
|
58 if (EC)
|
|
59 return errorCodeToError(EC);
|
|
60
|
|
61 auto setHook = [&](std::string PathSuffix, ModuleHookFn &Hook) {
|
|
62 // Keep track of the hook provided by the linker, which also needs to run.
|
|
63 ModuleHookFn LinkerHook = Hook;
|
|
64 Hook = [=](unsigned Task, const Module &M) {
|
|
65 // If the linker's hook returned false, we need to pass that result
|
|
66 // through.
|
|
67 if (LinkerHook && !LinkerHook(Task, M))
|
|
68 return false;
|
|
69
|
|
70 std::string PathPrefix;
|
|
71 // If this is the combined module (not a ThinLTO backend compile) or the
|
|
72 // user hasn't requested using the input module's path, emit to a file
|
|
73 // named from the provided OutputFileName with the Task ID appended.
|
|
74 if (M.getModuleIdentifier() == "ld-temp.o" || !UseInputModulePath) {
|
|
75 PathPrefix = OutputFileName + utostr(Task);
|
|
76 } else
|
|
77 PathPrefix = M.getModuleIdentifier();
|
|
78 std::string Path = PathPrefix + "." + PathSuffix + ".bc";
|
|
79 std::error_code EC;
|
|
80 raw_fd_ostream OS(Path, EC, sys::fs::OpenFlags::F_None);
|
|
81 // Because -save-temps is a debugging feature, we report the error
|
|
82 // directly and exit.
|
|
83 if (EC)
|
|
84 reportOpenError(Path, EC.message());
|
|
85 WriteBitcodeToFile(&M, OS, /*ShouldPreserveUseListOrder=*/false);
|
|
86 return true;
|
|
87 };
|
|
88 };
|
|
89
|
|
90 setHook("0.preopt", PreOptModuleHook);
|
|
91 setHook("1.promote", PostPromoteModuleHook);
|
|
92 setHook("2.internalize", PostInternalizeModuleHook);
|
|
93 setHook("3.import", PostImportModuleHook);
|
|
94 setHook("4.opt", PostOptModuleHook);
|
|
95 setHook("5.precodegen", PreCodeGenModuleHook);
|
|
96
|
|
97 CombinedIndexHook = [=](const ModuleSummaryIndex &Index) {
|
|
98 std::string Path = OutputFileName + "index.bc";
|
|
99 std::error_code EC;
|
|
100 raw_fd_ostream OS(Path, EC, sys::fs::OpenFlags::F_None);
|
|
101 // Because -save-temps is a debugging feature, we report the error
|
|
102 // directly and exit.
|
|
103 if (EC)
|
|
104 reportOpenError(Path, EC.message());
|
|
105 WriteIndexToFile(Index, OS);
|
|
106 return true;
|
|
107 };
|
|
108
|
|
109 return Error::success();
|
|
110 }
|
|
111
|
|
112 namespace {
|
|
113
|
|
114 std::unique_ptr<TargetMachine>
|
|
115 createTargetMachine(Config &Conf, StringRef TheTriple,
|
|
116 const Target *TheTarget) {
|
|
117 SubtargetFeatures Features;
|
|
118 Features.getDefaultSubtargetFeatures(Triple(TheTriple));
|
|
119 for (const std::string &A : Conf.MAttrs)
|
|
120 Features.AddFeature(A);
|
|
121
|
|
122 return std::unique_ptr<TargetMachine>(TheTarget->createTargetMachine(
|
|
123 TheTriple, Conf.CPU, Features.getString(), Conf.Options, Conf.RelocModel,
|
|
124 Conf.CodeModel, Conf.CGOptLevel));
|
|
125 }
|
|
126
|
|
127 static void runNewPMCustomPasses(Module &Mod, TargetMachine *TM,
|
|
128 std::string PipelineDesc,
|
|
129 std::string AAPipelineDesc,
|
|
130 bool DisableVerify) {
|
|
131 PassBuilder PB(TM);
|
|
132 AAManager AA;
|
|
133
|
|
134 // Parse a custom AA pipeline if asked to.
|
|
135 if (!AAPipelineDesc.empty())
|
|
136 if (!PB.parseAAPipeline(AA, AAPipelineDesc))
|
|
137 report_fatal_error("unable to parse AA pipeline description: " +
|
|
138 AAPipelineDesc);
|
|
139
|
|
140 LoopAnalysisManager LAM;
|
|
141 FunctionAnalysisManager FAM;
|
|
142 CGSCCAnalysisManager CGAM;
|
|
143 ModuleAnalysisManager MAM;
|
|
144
|
|
145 // Register the AA manager first so that our version is the one used.
|
|
146 FAM.registerPass([&] { return std::move(AA); });
|
|
147
|
|
148 // Register all the basic analyses with the managers.
|
|
149 PB.registerModuleAnalyses(MAM);
|
|
150 PB.registerCGSCCAnalyses(CGAM);
|
|
151 PB.registerFunctionAnalyses(FAM);
|
|
152 PB.registerLoopAnalyses(LAM);
|
|
153 PB.crossRegisterProxies(LAM, FAM, CGAM, MAM);
|
|
154
|
|
155 ModulePassManager MPM;
|
|
156
|
|
157 // Always verify the input.
|
|
158 MPM.addPass(VerifierPass());
|
|
159
|
|
160 // Now, add all the passes we've been requested to.
|
|
161 if (!PB.parsePassPipeline(MPM, PipelineDesc))
|
|
162 report_fatal_error("unable to parse pass pipeline description: " +
|
|
163 PipelineDesc);
|
|
164
|
|
165 if (!DisableVerify)
|
|
166 MPM.addPass(VerifierPass());
|
|
167 MPM.run(Mod, MAM);
|
|
168 }
|
|
169
|
|
170 static void runOldPMPasses(Config &Conf, Module &Mod, TargetMachine *TM,
|
|
171 bool IsThinLTO) {
|
|
172 legacy::PassManager passes;
|
|
173 passes.add(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis()));
|
|
174
|
|
175 PassManagerBuilder PMB;
|
|
176 PMB.LibraryInfo = new TargetLibraryInfoImpl(Triple(TM->getTargetTriple()));
|
|
177 PMB.Inliner = createFunctionInliningPass();
|
|
178 // Unconditionally verify input since it is not verified before this
|
|
179 // point and has unknown origin.
|
|
180 PMB.VerifyInput = true;
|
|
181 PMB.VerifyOutput = !Conf.DisableVerify;
|
|
182 PMB.LoopVectorize = true;
|
|
183 PMB.SLPVectorize = true;
|
|
184 PMB.OptLevel = Conf.OptLevel;
|
|
185 if (IsThinLTO)
|
|
186 PMB.populateThinLTOPassManager(passes);
|
|
187 else
|
|
188 PMB.populateLTOPassManager(passes);
|
|
189 passes.run(Mod);
|
|
190 }
|
|
191
|
|
192 bool opt(Config &Conf, TargetMachine *TM, unsigned Task, Module &Mod,
|
|
193 bool IsThinLTO) {
|
|
194 Mod.setDataLayout(TM->createDataLayout());
|
|
195 if (Conf.OptPipeline.empty())
|
|
196 runOldPMPasses(Conf, Mod, TM, IsThinLTO);
|
|
197 else
|
|
198 runNewPMCustomPasses(Mod, TM, Conf.OptPipeline, Conf.AAPipeline,
|
|
199 Conf.DisableVerify);
|
|
200 return !Conf.PostOptModuleHook || Conf.PostOptModuleHook(Task, Mod);
|
|
201 }
|
|
202
|
|
203 void codegen(Config &Conf, TargetMachine *TM, AddStreamFn AddStream,
|
|
204 unsigned Task, Module &Mod) {
|
|
205 if (Conf.PreCodeGenModuleHook && !Conf.PreCodeGenModuleHook(Task, Mod))
|
|
206 return;
|
|
207
|
|
208 auto Stream = AddStream(Task);
|
|
209 legacy::PassManager CodeGenPasses;
|
|
210 if (TM->addPassesToEmitFile(CodeGenPasses, *Stream->OS,
|
|
211 TargetMachine::CGFT_ObjectFile))
|
|
212 report_fatal_error("Failed to setup codegen");
|
|
213 CodeGenPasses.run(Mod);
|
|
214 }
|
|
215
|
|
216 void splitCodeGen(Config &C, TargetMachine *TM, AddStreamFn AddStream,
|
|
217 unsigned ParallelCodeGenParallelismLevel,
|
|
218 std::unique_ptr<Module> Mod) {
|
|
219 ThreadPool CodegenThreadPool(ParallelCodeGenParallelismLevel);
|
|
220 unsigned ThreadCount = 0;
|
|
221 const Target *T = &TM->getTarget();
|
|
222
|
|
223 SplitModule(
|
|
224 std::move(Mod), ParallelCodeGenParallelismLevel,
|
|
225 [&](std::unique_ptr<Module> MPart) {
|
|
226 // We want to clone the module in a new context to multi-thread the
|
|
227 // codegen. We do it by serializing partition modules to bitcode
|
|
228 // (while still on the main thread, in order to avoid data races) and
|
|
229 // spinning up new threads which deserialize the partitions into
|
|
230 // separate contexts.
|
|
231 // FIXME: Provide a more direct way to do this in LLVM.
|
|
232 SmallString<0> BC;
|
|
233 raw_svector_ostream BCOS(BC);
|
|
234 WriteBitcodeToFile(MPart.get(), BCOS);
|
|
235
|
|
236 // Enqueue the task
|
|
237 CodegenThreadPool.async(
|
|
238 [&](const SmallString<0> &BC, unsigned ThreadId) {
|
|
239 LTOLLVMContext Ctx(C);
|
|
240 Expected<std::unique_ptr<Module>> MOrErr = parseBitcodeFile(
|
|
241 MemoryBufferRef(StringRef(BC.data(), BC.size()), "ld-temp.o"),
|
|
242 Ctx);
|
|
243 if (!MOrErr)
|
|
244 report_fatal_error("Failed to read bitcode");
|
|
245 std::unique_ptr<Module> MPartInCtx = std::move(MOrErr.get());
|
|
246
|
|
247 std::unique_ptr<TargetMachine> TM =
|
|
248 createTargetMachine(C, MPartInCtx->getTargetTriple(), T);
|
|
249
|
|
250 codegen(C, TM.get(), AddStream, ThreadId, *MPartInCtx);
|
|
251 },
|
|
252 // Pass BC using std::move to ensure that it get moved rather than
|
|
253 // copied into the thread's context.
|
|
254 std::move(BC), ThreadCount++);
|
|
255 },
|
|
256 false);
|
|
257
|
|
258 // Because the inner lambda (which runs in a worker thread) captures our local
|
|
259 // variables, we need to wait for the worker threads to terminate before we
|
|
260 // can leave the function scope.
|
|
261 CodegenThreadPool.wait();
|
|
262 }
|
|
263
|
|
264 Expected<const Target *> initAndLookupTarget(Config &C, Module &Mod) {
|
|
265 if (!C.OverrideTriple.empty())
|
|
266 Mod.setTargetTriple(C.OverrideTriple);
|
|
267 else if (Mod.getTargetTriple().empty())
|
|
268 Mod.setTargetTriple(C.DefaultTriple);
|
|
269
|
|
270 std::string Msg;
|
|
271 const Target *T = TargetRegistry::lookupTarget(Mod.getTargetTriple(), Msg);
|
|
272 if (!T)
|
|
273 return make_error<StringError>(Msg, inconvertibleErrorCode());
|
|
274 return T;
|
|
275 }
|
|
276
|
|
277 }
|
|
278
|
|
279 static void handleAsmUndefinedRefs(Module &Mod, TargetMachine &TM) {
|
|
280 // Collect the list of undefined symbols used in asm and update
|
|
281 // llvm.compiler.used to prevent optimization to drop these from the output.
|
|
282 StringSet<> AsmUndefinedRefs;
|
|
283 object::IRObjectFile::CollectAsmUndefinedRefs(
|
|
284 Triple(Mod.getTargetTriple()), Mod.getModuleInlineAsm(),
|
|
285 [&AsmUndefinedRefs](StringRef Name, object::BasicSymbolRef::Flags Flags) {
|
|
286 if (Flags & object::BasicSymbolRef::SF_Undefined)
|
|
287 AsmUndefinedRefs.insert(Name);
|
|
288 });
|
|
289 updateCompilerUsed(Mod, TM, AsmUndefinedRefs);
|
|
290 }
|
|
291
|
|
292 Error lto::backend(Config &C, AddStreamFn AddStream,
|
|
293 unsigned ParallelCodeGenParallelismLevel,
|
|
294 std::unique_ptr<Module> Mod) {
|
|
295 Expected<const Target *> TOrErr = initAndLookupTarget(C, *Mod);
|
|
296 if (!TOrErr)
|
|
297 return TOrErr.takeError();
|
|
298
|
|
299 std::unique_ptr<TargetMachine> TM =
|
|
300 createTargetMachine(C, Mod->getTargetTriple(), *TOrErr);
|
|
301
|
|
302 handleAsmUndefinedRefs(*Mod, *TM);
|
|
303
|
|
304 if (!C.CodeGenOnly)
|
|
305 if (!opt(C, TM.get(), 0, *Mod, /*IsThinLTO=*/false))
|
|
306 return Error::success();
|
|
307
|
|
308 if (ParallelCodeGenParallelismLevel == 1) {
|
|
309 codegen(C, TM.get(), AddStream, 0, *Mod);
|
|
310 } else {
|
|
311 splitCodeGen(C, TM.get(), AddStream, ParallelCodeGenParallelismLevel,
|
|
312 std::move(Mod));
|
|
313 }
|
|
314 return Error::success();
|
|
315 }
|
|
316
|
|
317 Error lto::thinBackend(Config &Conf, unsigned Task, AddStreamFn AddStream,
|
|
318 Module &Mod, ModuleSummaryIndex &CombinedIndex,
|
|
319 const FunctionImporter::ImportMapTy &ImportList,
|
|
320 const GVSummaryMapTy &DefinedGlobals,
|
|
321 MapVector<StringRef, MemoryBufferRef> &ModuleMap) {
|
|
322 Expected<const Target *> TOrErr = initAndLookupTarget(Conf, Mod);
|
|
323 if (!TOrErr)
|
|
324 return TOrErr.takeError();
|
|
325
|
|
326 std::unique_ptr<TargetMachine> TM =
|
|
327 createTargetMachine(Conf, Mod.getTargetTriple(), *TOrErr);
|
|
328
|
|
329 handleAsmUndefinedRefs(Mod, *TM);
|
|
330
|
|
331 if (Conf.CodeGenOnly) {
|
|
332 codegen(Conf, TM.get(), AddStream, Task, Mod);
|
|
333 return Error::success();
|
|
334 }
|
|
335
|
|
336 if (Conf.PreOptModuleHook && !Conf.PreOptModuleHook(Task, Mod))
|
|
337 return Error::success();
|
|
338
|
|
339 renameModuleForThinLTO(Mod, CombinedIndex);
|
|
340
|
|
341 thinLTOResolveWeakForLinkerModule(Mod, DefinedGlobals);
|
|
342
|
|
343 if (Conf.PostPromoteModuleHook && !Conf.PostPromoteModuleHook(Task, Mod))
|
|
344 return Error::success();
|
|
345
|
|
346 if (!DefinedGlobals.empty())
|
|
347 thinLTOInternalizeModule(Mod, DefinedGlobals);
|
|
348
|
|
349 if (Conf.PostInternalizeModuleHook &&
|
|
350 !Conf.PostInternalizeModuleHook(Task, Mod))
|
|
351 return Error::success();
|
|
352
|
|
353 auto ModuleLoader = [&](StringRef Identifier) {
|
|
354 assert(Mod.getContext().isODRUniquingDebugTypes() &&
|
|
355 "ODR Type uniquing should be enabled on the context");
|
|
356 return getLazyBitcodeModule(ModuleMap[Identifier], Mod.getContext(),
|
|
357 /*ShouldLazyLoadMetadata=*/true);
|
|
358 };
|
|
359
|
|
360 FunctionImporter Importer(CombinedIndex, ModuleLoader);
|
|
361 if (Error Err = Importer.importFunctions(Mod, ImportList).takeError())
|
|
362 return Err;
|
|
363
|
|
364 if (Conf.PostImportModuleHook && !Conf.PostImportModuleHook(Task, Mod))
|
|
365 return Error::success();
|
|
366
|
|
367 if (!opt(Conf, TM.get(), Task, Mod, /*IsThinLTO=*/true))
|
|
368 return Error::success();
|
|
369
|
|
370 codegen(Conf, TM.get(), AddStream, Task, Mod);
|
|
371 return Error::success();
|
|
372 }
|