diff lib/LTO/ThinLTOCodeGenerator.cpp @ 148:63bd29f05246

merged
author Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date Wed, 14 Aug 2019 19:46:37 +0900
parents c2174574ed3a
children
line wrap: on
line diff
--- a/lib/LTO/ThinLTOCodeGenerator.cpp	Sun Dec 23 19:23:36 2018 +0900
+++ b/lib/LTO/ThinLTOCodeGenerator.cpp	Wed Aug 14 19:46:37 2019 +0900
@@ -1,9 +1,8 @@
 //===-ThinLTOCodeGenerator.cpp - LLVM Link Time Optimizer -----------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -23,15 +22,18 @@
 #include "llvm/Bitcode/BitcodeReader.h"
 #include "llvm/Bitcode/BitcodeWriter.h"
 #include "llvm/Bitcode/BitcodeWriterPass.h"
-#include "llvm/ExecutionEngine/ObjectMemoryBuffer.h"
+#include "llvm/Config/llvm-config.h"
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/DiagnosticPrinter.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/LegacyPassManager.h"
 #include "llvm/IR/Mangler.h"
+#include "llvm/IR/PassTimingInfo.h"
+#include "llvm/IR/RemarkStreamer.h"
 #include "llvm/IR/Verifier.h"
 #include "llvm/IRReader/IRReader.h"
 #include "llvm/LTO/LTO.h"
+#include "llvm/LTO/SummaryBasedOptimizations.h"
 #include "llvm/MC/SubtargetFeature.h"
 #include "llvm/Object/IRObjectFile.h"
 #include "llvm/Support/CachePruning.h"
@@ -39,6 +41,7 @@
 #include "llvm/Support/Error.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/SHA1.h"
+#include "llvm/Support/SmallVectorMemoryBuffer.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/ThreadPool.h"
 #include "llvm/Support/Threading.h"
@@ -54,6 +57,12 @@
 
 #include <numeric>
 
+#if !defined(_MSC_VER) && !defined(__MINGW32__)
+#include <unistd.h>
+#else
+#include <io.h>
+#endif
+
 using namespace llvm;
 
 #define DEBUG_TYPE "thinlto"
@@ -61,8 +70,10 @@
 namespace llvm {
 // Flags -discard-value-names, defined in LTOCodeGenerator.cpp
 extern cl::opt<bool> LTODiscardValueNames;
-extern cl::opt<std::string> LTORemarksFilename;
-extern cl::opt<bool> LTOPassRemarksWithHotness;
+extern cl::opt<std::string> RemarksFilename;
+extern cl::opt<std::string> RemarksPasses;
+extern cl::opt<bool> RemarksWithHotness;
+extern cl::opt<std::string> RemarksFormat;
 }
 
 namespace {
@@ -78,7 +89,7 @@
   // User asked to save temps, let dump the bitcode file after import.
   std::string SaveTempPath = (TempDir + llvm::Twine(count) + Suffix).str();
   std::error_code EC;
-  raw_fd_ostream OS(SaveTempPath, EC, sys::fs::F_None);
+  raw_fd_ostream OS(SaveTempPath, EC, sys::fs::OF_None);
   if (EC)
     report_fatal_error(Twine("Failed to open ") + SaveTempPath +
                        " to save optimized bitcode\n");
@@ -126,14 +137,13 @@
   }
 }
 
-static StringMap<MemoryBufferRef>
-generateModuleMap(const std::vector<ThinLTOBuffer> &Modules) {
-  StringMap<MemoryBufferRef> ModuleMap;
-  for (auto &ModuleBuffer : Modules) {
-    assert(ModuleMap.find(ModuleBuffer.getBufferIdentifier()) ==
-               ModuleMap.end() &&
+static StringMap<lto::InputFile *>
+generateModuleMap(std::vector<std::unique_ptr<lto::InputFile>> &Modules) {
+  StringMap<lto::InputFile *> ModuleMap;
+  for (auto &M : Modules) {
+    assert(ModuleMap.find(M->getName()) == ModuleMap.end() &&
            "Expect unique Buffer Identifier");
-    ModuleMap[ModuleBuffer.getBufferIdentifier()] = ModuleBuffer.getMemBuffer();
+    ModuleMap[M->getName()] = M.get();
   }
   return ModuleMap;
 }
@@ -166,18 +176,19 @@
   }
 }
 
-static std::unique_ptr<Module>
-loadModuleFromBuffer(const MemoryBufferRef &Buffer, LLVMContext &Context,
-                     bool Lazy, bool IsImporting) {
+static std::unique_ptr<Module> loadModuleFromInput(lto::InputFile *Input,
+                                                   LLVMContext &Context,
+                                                   bool Lazy,
+                                                   bool IsImporting) {
+  auto &Mod = Input->getSingleBitcodeModule();
   SMDiagnostic Err;
   Expected<std::unique_ptr<Module>> ModuleOrErr =
-      Lazy
-          ? getLazyBitcodeModule(Buffer, Context,
-                                 /* ShouldLazyLoadMetadata */ true, IsImporting)
-          : parseBitcodeFile(Buffer, Context);
+      Lazy ? Mod.getLazyModule(Context,
+                               /* ShouldLazyLoadMetadata */ true, IsImporting)
+           : Mod.parseModule(Context);
   if (!ModuleOrErr) {
     handleAllErrors(ModuleOrErr.takeError(), [&](ErrorInfoBase &EIB) {
-      SMDiagnostic Err = SMDiagnostic(Buffer.getBufferIdentifier(),
+      SMDiagnostic Err = SMDiagnostic(Mod.getModuleIdentifier(),
                                       SourceMgr::DK_Error, EIB.message());
       Err.print("ThinLTO", errs());
     });
@@ -185,16 +196,17 @@
   }
   if (!Lazy)
     verifyLoadedModule(*ModuleOrErr.get());
-  return std::move(ModuleOrErr.get());
+  return std::move(*ModuleOrErr);
 }
 
 static void
 crossImportIntoModule(Module &TheModule, const ModuleSummaryIndex &Index,
-                      StringMap<MemoryBufferRef> &ModuleMap,
+                      StringMap<lto::InputFile*> &ModuleMap,
                       const FunctionImporter::ImportMapTy &ImportList) {
   auto Loader = [&](StringRef Identifier) {
-    return loadModuleFromBuffer(ModuleMap[Identifier], TheModule.getContext(),
-                                /*Lazy=*/true, /*IsImporting*/ true);
+    auto &Input = ModuleMap[Identifier];
+    return loadModuleFromInput(Input, TheModule.getContext(),
+                               /*Lazy=*/true, /*IsImporting*/ true);
   };
 
   FunctionImporter Importer(Index, Loader);
@@ -239,6 +251,15 @@
   PM.run(TheModule);
 }
 
+static void
+addUsedSymbolToPreservedGUID(const lto::InputFile &File,
+                             DenseSet<GlobalValue::GUID> &PreservedGUID) {
+  for (const auto &Sym : File.symbols()) {
+    if (Sym.isUsed())
+      PreservedGUID.insert(GlobalValue::getGUID(Sym.getIRName()));
+  }
+}
+
 // Convert the PreservedSymbols map from "Name" based to "GUID" based.
 static DenseSet<GlobalValue::GUID>
 computeGUIDPreservedSymbols(const StringSet<> &PreservedSymbols,
@@ -267,14 +288,14 @@
     PM.add(createObjCARCContractPass());
 
     // Setup the codegen now.
-    if (TM.addPassesToEmitFile(PM, OS, TargetMachine::CGFT_ObjectFile,
+    if (TM.addPassesToEmitFile(PM, OS, nullptr, TargetMachine::CGFT_ObjectFile,
                                /* DisableVerify */ true))
       report_fatal_error("Failed to setup codegen");
 
     // Run codegen now. resulting binary is in OutputBuffer.
     PM.run(TheModule);
   }
-  return make_unique<ObjectMemoryBuffer>(std::move(OutputBuffer));
+  return make_unique<SmallVectorMemoryBuffer>(std::move(OutputBuffer));
 }
 
 /// Manage caching for a single Module.
@@ -290,8 +311,7 @@
       const FunctionImporter::ImportMapTy &ImportList,
       const FunctionImporter::ExportSetTy &ExportList,
       const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
-      const GVSummaryMapTy &DefinedFunctions,
-      const DenseSet<GlobalValue::GUID> &PreservedSymbols, unsigned OptLevel,
+      const GVSummaryMapTy &DefinedGVSummaries, unsigned OptLevel,
       bool Freestanding, const TargetMachineBuilder &TMBuilder) {
     if (CachePath.empty())
       return;
@@ -300,87 +320,26 @@
       // The module does not have an entry, it can't have a hash at all
       return;
 
-    // Compute the unique hash for this entry
-    // This is based on the current compiler version, the module itself, the
-    // export list, the hash for every single module in the import list, the
-    // list of ResolvedODR for the module, and the list of preserved symbols.
-
-    // Include the hash for the current module
-    auto ModHash = Index.getModuleHash(ModuleID);
-
-    if (all_of(ModHash, [](uint32_t V) { return V == 0; }))
+    if (all_of(Index.getModuleHash(ModuleID),
+               [](uint32_t V) { return V == 0; }))
       // No hash entry, no caching!
       return;
 
-    SHA1 Hasher;
-
-    // Include the parts of the LTO configuration that affect code generation.
-    auto AddString = [&](StringRef Str) {
-      Hasher.update(Str);
-      Hasher.update(ArrayRef<uint8_t>{0});
-    };
-    auto AddUnsigned = [&](unsigned I) {
-      uint8_t Data[4];
-      Data[0] = I;
-      Data[1] = I >> 8;
-      Data[2] = I >> 16;
-      Data[3] = I >> 24;
-      Hasher.update(ArrayRef<uint8_t>{Data, 4});
-    };
-
-    // Start with the compiler revision
-    Hasher.update(LLVM_VERSION_STRING);
-#ifdef LLVM_REVISION
-    Hasher.update(LLVM_REVISION);
-#endif
-
-    // Hash the optimization level and the target machine settings.
-    AddString(TMBuilder.MCpu);
-    // FIXME: Hash more of Options. For now all clients initialize Options from
-    // command-line flags (which is unsupported in production), but may set
-    // RelaxELFRelocations. The clang driver can also pass FunctionSections,
-    // DataSections and DebuggerTuning via command line flags.
-    AddUnsigned(TMBuilder.Options.RelaxELFRelocations);
-    AddUnsigned(TMBuilder.Options.FunctionSections);
-    AddUnsigned(TMBuilder.Options.DataSections);
-    AddUnsigned((unsigned)TMBuilder.Options.DebuggerTuning);
-    AddString(TMBuilder.MAttr);
-    if (TMBuilder.RelocModel)
-      AddUnsigned(*TMBuilder.RelocModel);
-    AddUnsigned(TMBuilder.CGOptLevel);
-    AddUnsigned(OptLevel);
-    AddUnsigned(Freestanding);
-
-    Hasher.update(ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash)));
-    for (auto F : ExportList)
-      // The export list can impact the internalization, be conservative here
-      Hasher.update(ArrayRef<uint8_t>((uint8_t *)&F, sizeof(F)));
-
-    // Include the hash for every module we import functions from
-    for (auto &Entry : ImportList) {
-      auto ModHash = Index.getModuleHash(Entry.first());
-      Hasher.update(ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash)));
-    }
-
-    // Include the hash for the resolved ODR.
-    for (auto &Entry : ResolvedODR) {
-      Hasher.update(ArrayRef<uint8_t>((const uint8_t *)&Entry.first,
-                                      sizeof(GlobalValue::GUID)));
-      Hasher.update(ArrayRef<uint8_t>((const uint8_t *)&Entry.second,
-                                      sizeof(GlobalValue::LinkageTypes)));
-    }
-
-    // Include the hash for the preserved symbols.
-    for (auto &Entry : PreservedSymbols) {
-      if (DefinedFunctions.count(Entry))
-        Hasher.update(
-            ArrayRef<uint8_t>((const uint8_t *)&Entry, sizeof(GlobalValue::GUID)));
-    }
+    llvm::lto::Config Conf;
+    Conf.OptLevel = OptLevel;
+    Conf.Options = TMBuilder.Options;
+    Conf.CPU = TMBuilder.MCpu;
+    Conf.MAttrs.push_back(TMBuilder.MAttr);
+    Conf.RelocModel = TMBuilder.RelocModel;
+    Conf.CGOptLevel = TMBuilder.CGOptLevel;
+    Conf.Freestanding = Freestanding;
+    SmallString<40> Key;
+    computeLTOCacheKey(Key, Conf, Index, ModuleID, ImportList, ExportList,
+                       ResolvedODR, DefinedGVSummaries);
 
     // This choice of file name allows the cache to be pruned (see pruneCache()
     // in include/llvm/Support/CachePruning.h).
-    sys::path::append(EntryPath, CachePath,
-                      "llvmcache-" + toHex(Hasher.result()));
+    sys::path::append(EntryPath, CachePath, "llvmcache-" + Key);
   }
 
   // Access the path to this entry in the cache.
@@ -390,7 +349,15 @@
   ErrorOr<std::unique_ptr<MemoryBuffer>> tryLoadingBuffer() {
     if (EntryPath.empty())
       return std::error_code();
-    return MemoryBuffer::getFile(EntryPath);
+    SmallString<64> ResultPath;
+    Expected<sys::fs::file_t> FDOrErr = sys::fs::openNativeFileForRead(
+        Twine(EntryPath), sys::fs::OF_UpdateAtime, &ResultPath);
+    if (!FDOrErr)
+      return errorToErrorCode(FDOrErr.takeError());
+    ErrorOr<std::unique_ptr<MemoryBuffer>> MBOrErr = MemoryBuffer::getOpenFile(
+        *FDOrErr, EntryPath, /*FileSize=*/-1, /*RequiresNullTerminator=*/false);
+    sys::fs::closeFile(*FDOrErr);
+    return MBOrErr;
   }
 
   // Cache the Produced object file
@@ -400,9 +367,12 @@
 
     // Write to a temporary to avoid race condition
     SmallString<128> TempFilename;
+    SmallString<128> CachePath(EntryPath);
     int TempFD;
+    llvm::sys::path::remove_filename(CachePath);
+    sys::path::append(TempFilename, CachePath, "Thin-%%%%%%.tmp.o");
     std::error_code EC =
-        sys::fs::createTemporaryFile("Thin", "tmp.o", TempFD, TempFilename);
+      sys::fs::createUniqueFile(TempFilename, TempFD, TempFilename);
     if (EC) {
       errs() << "Error: " << EC.message() << "\n";
       report_fatal_error("ThinLTO: Can't get a temporary file");
@@ -411,22 +381,16 @@
       raw_fd_ostream OS(TempFD, /* ShouldClose */ true);
       OS << OutputBuffer.getBuffer();
     }
-    // Rename to final destination (hopefully race condition won't matter here)
+    // Rename temp file to final destination; rename is atomic
     EC = sys::fs::rename(TempFilename, EntryPath);
-    if (EC) {
+    if (EC)
       sys::fs::remove(TempFilename);
-      raw_fd_ostream OS(EntryPath, EC, sys::fs::F_None);
-      if (EC)
-        report_fatal_error(Twine("Failed to open ") + EntryPath +
-                           " to save cached entry\n");
-      OS << OutputBuffer.getBuffer();
-    }
   }
 };
 
 static std::unique_ptr<MemoryBuffer>
 ProcessThinLTOModule(Module &TheModule, ModuleSummaryIndex &Index,
-                     StringMap<MemoryBufferRef> &ModuleMap, TargetMachine &TM,
+                     StringMap<lto::InputFile *> &ModuleMap, TargetMachine &TM,
                      const FunctionImporter::ImportMapTy &ImportList,
                      const FunctionImporter::ExportSetTy &ExportList,
                      const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols,
@@ -441,8 +405,8 @@
   if (!SingleModule) {
     promoteModule(TheModule, Index);
 
-    // Apply summary-based LinkOnce/Weak resolution decisions.
-    thinLTOResolveWeakForLinkerModule(TheModule, DefinedGlobals);
+    // Apply summary-based prevailing-symbol resolution decisions.
+    thinLTOResolvePrevailingInModule(TheModule, DefinedGlobals);
 
     // Save temps: after promotion.
     saveTempBitcode(TheModule, SaveTempsDir, count, ".1.promoted.bc");
@@ -478,21 +442,22 @@
       auto Index = buildModuleSummaryIndex(TheModule, nullptr, &PSI);
       WriteBitcodeToFile(TheModule, OS, true, &Index);
     }
-    return make_unique<ObjectMemoryBuffer>(std::move(OutputBuffer));
+    return make_unique<SmallVectorMemoryBuffer>(std::move(OutputBuffer));
   }
 
   return codegenModule(TheModule, TM);
 }
 
-/// Resolve LinkOnce/Weak symbols. Record resolutions in the \p ResolvedODR map
+/// Resolve prevailing symbols. Record resolutions in the \p ResolvedODR map
 /// for caching, and in the \p Index for application during the ThinLTO
 /// backends. This is needed for correctness for exported symbols (ensure
 /// at least one copy kept) and a compile-time optimization (to drop duplicate
 /// copies when possible).
-static void resolveWeakForLinkerInIndex(
+static void resolvePrevailingInIndex(
     ModuleSummaryIndex &Index,
     StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>>
-        &ResolvedODR) {
+        &ResolvedODR,
+    const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) {
 
   DenseMap<GlobalValue::GUID, const GlobalValueSummary *> PrevailingCopy;
   computePrevailingCopies(Index, PrevailingCopy);
@@ -511,7 +476,8 @@
     ResolvedODR[ModuleIdentifier][GUID] = NewLinkage;
   };
 
-  thinLTOResolveWeakForLinkerInIndex(Index, isPrevailing, recordNewLinkage);
+  thinLTOResolvePrevailingInIndex(Index, isPrevailing, recordNewLinkage,
+                                  GUIDPreservedSymbols);
 }
 
 // Initialize the TargetMachine builder for a given Triple
@@ -533,15 +499,14 @@
 } // end anonymous namespace
 
 void ThinLTOCodeGenerator::addModule(StringRef Identifier, StringRef Data) {
-  ThinLTOBuffer Buffer(Data, Identifier);
-  LLVMContext Context;
-  StringRef TripleStr;
-  ErrorOr<std::string> TripleOrErr = expectedToErrorOrAndEmitErrors(
-      Context, getBitcodeTargetTriple(Buffer.getMemBuffer()));
+  MemoryBufferRef Buffer(Data, Identifier);
 
-  if (TripleOrErr)
-    TripleStr = *TripleOrErr;
+  auto InputOrError = lto::InputFile::create(Buffer);
+  if (!InputOrError)
+    report_fatal_error("ThinLTO cannot create input file: " +
+                       toString(InputOrError.takeError()));
 
+  auto TripleStr = (*InputOrError)->getTargetTriple();
   Triple TheTriple(TripleStr);
 
   if (Modules.empty())
@@ -553,7 +518,7 @@
     initTMBuilder(TMBuilder, Triple(TMBuilder.TheTriple.merge(TheTriple)));
   }
 
-  Modules.push_back(Buffer);
+  Modules.emplace_back(std::move(*InputOrError));
 }
 
 void ThinLTOCodeGenerator::preserveSymbol(StringRef Name) {
@@ -592,11 +557,12 @@
  */
 std::unique_ptr<ModuleSummaryIndex> ThinLTOCodeGenerator::linkCombinedIndex() {
   std::unique_ptr<ModuleSummaryIndex> CombinedIndex =
-      llvm::make_unique<ModuleSummaryIndex>(/*IsPeformingAnalysis=*/false);
+      llvm::make_unique<ModuleSummaryIndex>(/*HaveGVs=*/false);
   uint64_t NextModuleId = 0;
-  for (auto &ModuleBuffer : Modules) {
-    if (Error Err = readModuleSummaryIndex(ModuleBuffer.getMemBuffer(),
-                                           *CombinedIndex, NextModuleId++)) {
+  for (auto &Mod : Modules) {
+    auto &M = Mod->getSingleBitcodeModule();
+    if (Error Err =
+            M.readSummary(*CombinedIndex, Mod->getName(), NextModuleId++)) {
       // FIXME diagnose
       logAllUnhandledErrors(
           std::move(Err), errs(),
@@ -630,15 +596,16 @@
   auto isPrevailing = [&](GlobalValue::GUID G) {
     return PrevailingType::Unknown;
   };
-  computeDeadSymbols(Index, GUIDPreservedSymbols, isPrevailing);
+  computeDeadSymbolsWithConstProp(Index, GUIDPreservedSymbols, isPrevailing,
+                                  /* ImportEnabled = */ true);
 }
 
 /**
  * Perform promotion and renaming of exported internal functions.
  * Index is updated to reflect linkage changes from weak resolution.
  */
-void ThinLTOCodeGenerator::promote(Module &TheModule,
-                                   ModuleSummaryIndex &Index) {
+void ThinLTOCodeGenerator::promote(Module &TheModule, ModuleSummaryIndex &Index,
+                                   const lto::InputFile &File) {
   auto ModuleCount = Index.modulePaths().size();
   auto ModuleIdentifier = TheModule.getModuleIdentifier();
 
@@ -650,6 +617,9 @@
   auto GUIDPreservedSymbols = computeGUIDPreservedSymbols(
       PreservedSymbols, Triple(TheModule.getTargetTriple()));
 
+  // Add used symbol to the preserved symbols.
+  addUsedSymbolToPreservedGUID(File, GUIDPreservedSymbols);
+
   // Compute "dead" symbols, we don't want to import/export these!
   computeDeadSymbolsInIndex(Index, GUIDPreservedSymbols);
 
@@ -659,11 +629,11 @@
   ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists,
                            ExportLists);
 
-  // Resolve LinkOnce/Weak symbols.
+  // Resolve prevailing symbols
   StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>> ResolvedODR;
-  resolveWeakForLinkerInIndex(Index, ResolvedODR);
+  resolvePrevailingInIndex(Index, ResolvedODR, GUIDPreservedSymbols);
 
-  thinLTOResolveWeakForLinkerModule(
+  thinLTOResolvePrevailingInModule(
       TheModule, ModuleToDefinedGVSummaries[ModuleIdentifier]);
 
   // Promote the exported values in the index, so that they are promoted
@@ -677,7 +647,8 @@
  * Perform cross-module importing for the module identified by ModuleIdentifier.
  */
 void ThinLTOCodeGenerator::crossModuleImport(Module &TheModule,
-                                             ModuleSummaryIndex &Index) {
+                                             ModuleSummaryIndex &Index,
+                                             const lto::InputFile &File) {
   auto ModuleMap = generateModuleMap(Modules);
   auto ModuleCount = Index.modulePaths().size();
 
@@ -689,6 +660,8 @@
   auto GUIDPreservedSymbols = computeGUIDPreservedSymbols(
       PreservedSymbols, Triple(TheModule.getTargetTriple()));
 
+  addUsedSymbolToPreservedGUID(File, GUIDPreservedSymbols);
+
   // Compute "dead" symbols, we don't want to import/export these!
   computeDeadSymbolsInIndex(Index, GUIDPreservedSymbols);
 
@@ -706,54 +679,83 @@
  * Compute the list of summaries needed for importing into module.
  */
 void ThinLTOCodeGenerator::gatherImportedSummariesForModule(
-    StringRef ModulePath, ModuleSummaryIndex &Index,
-    std::map<std::string, GVSummaryMapTy> &ModuleToSummariesForIndex) {
+    Module &TheModule, ModuleSummaryIndex &Index,
+    std::map<std::string, GVSummaryMapTy> &ModuleToSummariesForIndex,
+    const lto::InputFile &File) {
   auto ModuleCount = Index.modulePaths().size();
+  auto ModuleIdentifier = TheModule.getModuleIdentifier();
 
   // Collect for each module the list of function it defines (GUID -> Summary).
   StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount);
   Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);
 
+  // Convert the preserved symbols set from string to GUID
+  auto GUIDPreservedSymbols = computeGUIDPreservedSymbols(
+      PreservedSymbols, Triple(TheModule.getTargetTriple()));
+
+  addUsedSymbolToPreservedGUID(File, GUIDPreservedSymbols);
+
+  // Compute "dead" symbols, we don't want to import/export these!
+  computeDeadSymbolsInIndex(Index, GUIDPreservedSymbols);
+
   // Generate import/export list
   StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
   StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
   ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists,
                            ExportLists);
 
-  llvm::gatherImportedSummariesForModule(ModulePath, ModuleToDefinedGVSummaries,
-                                         ImportLists[ModulePath],
-                                         ModuleToSummariesForIndex);
+  llvm::gatherImportedSummariesForModule(
+      ModuleIdentifier, ModuleToDefinedGVSummaries,
+      ImportLists[ModuleIdentifier], ModuleToSummariesForIndex);
 }
 
 /**
  * Emit the list of files needed for importing into module.
  */
-void ThinLTOCodeGenerator::emitImports(StringRef ModulePath,
-                                       StringRef OutputName,
-                                       ModuleSummaryIndex &Index) {
+void ThinLTOCodeGenerator::emitImports(Module &TheModule, StringRef OutputName,
+                                       ModuleSummaryIndex &Index,
+                                       const lto::InputFile &File) {
   auto ModuleCount = Index.modulePaths().size();
+  auto ModuleIdentifier = TheModule.getModuleIdentifier();
 
   // Collect for each module the list of function it defines (GUID -> Summary).
   StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount);
   Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);
 
+  // Convert the preserved symbols set from string to GUID
+  auto GUIDPreservedSymbols = computeGUIDPreservedSymbols(
+      PreservedSymbols, Triple(TheModule.getTargetTriple()));
+
+  addUsedSymbolToPreservedGUID(File, GUIDPreservedSymbols);
+
+  // Compute "dead" symbols, we don't want to import/export these!
+  computeDeadSymbolsInIndex(Index, GUIDPreservedSymbols);
+
   // Generate import/export list
   StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
   StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
   ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists,
                            ExportLists);
 
+  std::map<std::string, GVSummaryMapTy> ModuleToSummariesForIndex;
+  llvm::gatherImportedSummariesForModule(
+      ModuleIdentifier, ModuleToDefinedGVSummaries,
+      ImportLists[ModuleIdentifier], ModuleToSummariesForIndex);
+
   std::error_code EC;
-  if ((EC = EmitImportsFiles(ModulePath, OutputName, ImportLists[ModulePath])))
+  if ((EC = EmitImportsFiles(ModuleIdentifier, OutputName,
+                             ModuleToSummariesForIndex)))
     report_fatal_error(Twine("Failed to open ") + OutputName +
                        " to save imports lists\n");
 }
 
 /**
- * Perform internalization. Index is updated to reflect linkage changes.
+ * Perform internalization. Runs promote and internalization together.
+ * Index is updated to reflect linkage changes.
  */
 void ThinLTOCodeGenerator::internalize(Module &TheModule,
-                                       ModuleSummaryIndex &Index) {
+                                       ModuleSummaryIndex &Index,
+                                       const lto::InputFile &File) {
   initTMBuilder(TMBuilder, Triple(TheModule.getTargetTriple()));
   auto ModuleCount = Index.modulePaths().size();
   auto ModuleIdentifier = TheModule.getModuleIdentifier();
@@ -762,6 +764,8 @@
   auto GUIDPreservedSymbols =
       computeGUIDPreservedSymbols(PreservedSymbols, TMBuilder.TheTriple);
 
+  addUsedSymbolToPreservedGUID(File, GUIDPreservedSymbols);
+
   // Collect for each module the list of function it defines (GUID -> Summary).
   StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount);
   Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);
@@ -781,8 +785,20 @@
   if (ExportList.empty() && GUIDPreservedSymbols.empty())
     return;
 
+  // Resolve prevailing symbols
+  StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>> ResolvedODR;
+  resolvePrevailingInIndex(Index, ResolvedODR, GUIDPreservedSymbols);
+
+  // Promote the exported values in the index, so that they are promoted
+  // in the module.
+  internalizeAndPromoteInIndex(ExportLists, GUIDPreservedSymbols, Index);
+
+  promoteModule(TheModule, Index);
+
   // Internalization
-  internalizeAndPromoteInIndex(ExportLists, GUIDPreservedSymbols, Index);
+  thinLTOResolvePrevailingInModule(
+      TheModule, ModuleToDefinedGVSummaries[ModuleIdentifier]);
+
   thinLTOInternalizeModule(TheModule,
                            ModuleToDefinedGVSummaries[ModuleIdentifier]);
 }
@@ -797,22 +813,16 @@
   optimizeModule(TheModule, *TMBuilder.create(), OptLevel, Freestanding);
 }
 
-/**
- * Perform ThinLTO CodeGen.
- */
-std::unique_ptr<MemoryBuffer> ThinLTOCodeGenerator::codegen(Module &TheModule) {
-  initTMBuilder(TMBuilder, Triple(TheModule.getTargetTriple()));
-  return codegenModule(TheModule, *TMBuilder.create());
-}
-
 /// Write out the generated object file, either from CacheEntryPath or from
 /// OutputBuffer, preferring hard-link when possible.
 /// Returns the path to the generated file in SavedObjectsDirectoryPath.
-static std::string writeGeneratedObject(int count, StringRef CacheEntryPath,
-                                        StringRef SavedObjectsDirectoryPath,
-                                        const MemoryBuffer &OutputBuffer) {
+std::string
+ThinLTOCodeGenerator::writeGeneratedObject(int count, StringRef CacheEntryPath,
+                                           const MemoryBuffer &OutputBuffer) {
+  auto ArchName = TMBuilder.TheTriple.getArchName();
   SmallString<128> OutputPath(SavedObjectsDirectoryPath);
-  llvm::sys::path::append(OutputPath, Twine(count) + ".thinlto.o");
+  llvm::sys::path::append(OutputPath,
+                          Twine(count) + "." + ArchName + ".thinlto.o");
   OutputPath.c_str(); // Ensure the string is null terminated.
   if (sys::fs::exists(OutputPath))
     sys::fs::remove(OutputPath);
@@ -835,7 +845,7 @@
   }
   // No cache entry, just write out the buffer.
   std::error_code Err;
-  raw_fd_ostream OS(OutputPath, Err, sys::fs::F_None);
+  raw_fd_ostream OS(OutputPath, Err, sys::fs::OF_None);
   if (Err)
     report_fatal_error("Can't open output '" + OutputPath + "'\n");
   OS << OutputBuffer.getBuffer();
@@ -861,23 +871,22 @@
     // Perform only parallel codegen and return.
     ThreadPool Pool;
     int count = 0;
-    for (auto &ModuleBuffer : Modules) {
+    for (auto &Mod : Modules) {
       Pool.async([&](int count) {
         LLVMContext Context;
         Context.setDiscardValueNames(LTODiscardValueNames);
 
         // Parse module now
-        auto TheModule =
-            loadModuleFromBuffer(ModuleBuffer.getMemBuffer(), Context, false,
-                                 /*IsImporting*/ false);
+        auto TheModule = loadModuleFromInput(Mod.get(), Context, false,
+                                             /*IsImporting*/ false);
 
         // CodeGen
-        auto OutputBuffer = codegen(*TheModule);
+        auto OutputBuffer = codegenModule(*TheModule, *TMBuilder.create());
         if (SavedObjectsDirectoryPath.empty())
           ProducedBinaries[count] = std::move(OutputBuffer);
         else
-          ProducedBinaryFiles[count] = writeGeneratedObject(
-              count, "", SavedObjectsDirectoryPath, *OutputBuffer);
+          ProducedBinaryFiles[count] =
+              writeGeneratedObject(count, "", *OutputBuffer);
       }, count++);
     }
 
@@ -891,7 +900,7 @@
   if (!SaveTempsDir.empty()) {
     auto SaveTempPath = SaveTempsDir + "index.bc";
     std::error_code EC;
-    raw_fd_ostream OS(SaveTempPath, EC, sys::fs::F_None);
+    raw_fd_ostream OS(SaveTempPath, EC, sys::fs::OF_None);
     if (EC)
       report_fatal_error(Twine("Failed to open ") + SaveTempPath +
                          " to save optimized bitcode\n");
@@ -912,9 +921,16 @@
   auto GUIDPreservedSymbols =
       computeGUIDPreservedSymbols(PreservedSymbols, TMBuilder.TheTriple);
 
+  // Add used symbol from inputs to the preserved symbols.
+  for (const auto &M : Modules)
+    addUsedSymbolToPreservedGUID(*M, GUIDPreservedSymbols);
+
   // Compute "dead" symbols, we don't want to import/export these!
   computeDeadSymbolsInIndex(*Index, GUIDPreservedSymbols);
 
+  // Synthesize entry counts for functions in the combined index.
+  computeSyntheticCounts(*Index);
+
   // Collect the import/export lists for all modules from the call-graph in the
   // combined index.
   StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
@@ -928,20 +944,24 @@
   // on the index, and nuke this map.
   StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>> ResolvedODR;
 
-  // Resolve LinkOnce/Weak symbols, this has to be computed early because it
+  // Resolve prevailing symbols, this has to be computed early because it
   // impacts the caching.
-  resolveWeakForLinkerInIndex(*Index, ResolvedODR);
+  resolvePrevailingInIndex(*Index, ResolvedODR, GUIDPreservedSymbols);
 
   // Use global summary-based analysis to identify symbols that can be
   // internalized (because they aren't exported or preserved as per callback).
   // Changes are made in the index, consumed in the ThinLTO backends.
   internalizeAndPromoteInIndex(ExportLists, GUIDPreservedSymbols, *Index);
 
-  // Make sure that every module has an entry in the ExportLists and
-  // ResolvedODR maps to enable threaded access to these maps below.
-  for (auto &DefinedGVSummaries : ModuleToDefinedGVSummaries) {
-    ExportLists[DefinedGVSummaries.first()];
-    ResolvedODR[DefinedGVSummaries.first()];
+  // Make sure that every module has an entry in the ExportLists, ImportList,
+  // GVSummary and ResolvedODR maps to enable threaded access to these maps
+  // below.
+  for (auto &Module : Modules) {
+    auto ModuleIdentifier = Module->getName();
+    ExportLists[ModuleIdentifier];
+    ImportLists[ModuleIdentifier];
+    ResolvedODR[ModuleIdentifier];
+    ModuleToDefinedGVSummaries[ModuleIdentifier];
   }
 
   // Compute the ordering we will process the inputs: the rough heuristic here
@@ -950,37 +970,38 @@
   std::vector<int> ModulesOrdering;
   ModulesOrdering.resize(Modules.size());
   std::iota(ModulesOrdering.begin(), ModulesOrdering.end(), 0);
-  std::sort(ModulesOrdering.begin(), ModulesOrdering.end(),
-            [&](int LeftIndex, int RightIndex) {
-              auto LSize = Modules[LeftIndex].getBuffer().size();
-              auto RSize = Modules[RightIndex].getBuffer().size();
-              return LSize > RSize;
-            });
+  llvm::sort(ModulesOrdering, [&](int LeftIndex, int RightIndex) {
+    auto LSize =
+        Modules[LeftIndex]->getSingleBitcodeModule().getBuffer().size();
+    auto RSize =
+        Modules[RightIndex]->getSingleBitcodeModule().getBuffer().size();
+    return LSize > RSize;
+  });
 
   // Parallel optimizer + codegen
   {
     ThreadPool Pool(ThreadCount);
     for (auto IndexCount : ModulesOrdering) {
-      auto &ModuleBuffer = Modules[IndexCount];
+      auto &Mod = Modules[IndexCount];
       Pool.async([&](int count) {
-        auto ModuleIdentifier = ModuleBuffer.getBufferIdentifier();
+        auto ModuleIdentifier = Mod->getName();
         auto &ExportList = ExportLists[ModuleIdentifier];
 
-        auto &DefinedFunctions = ModuleToDefinedGVSummaries[ModuleIdentifier];
+        auto &DefinedGVSummaries = ModuleToDefinedGVSummaries[ModuleIdentifier];
 
         // The module may be cached, this helps handling it.
         ModuleCacheEntry CacheEntry(CacheOptions.Path, *Index, ModuleIdentifier,
                                     ImportLists[ModuleIdentifier], ExportList,
                                     ResolvedODR[ModuleIdentifier],
-                                    DefinedFunctions, GUIDPreservedSymbols,
-                                    OptLevel, Freestanding, TMBuilder);
+                                    DefinedGVSummaries, OptLevel, Freestanding,
+                                    TMBuilder);
         auto CacheEntryPath = CacheEntry.getEntryPath();
 
         {
           auto ErrOrBuffer = CacheEntry.tryLoadingBuffer();
-          DEBUG(dbgs() << "Cache " << (ErrOrBuffer ? "hit" : "miss") << " '"
-                       << CacheEntryPath << "' for buffer " << count << " "
-                       << ModuleIdentifier << "\n");
+          LLVM_DEBUG(dbgs() << "Cache " << (ErrOrBuffer ? "hit" : "miss")
+                            << " '" << CacheEntryPath << "' for buffer "
+                            << count << " " << ModuleIdentifier << "\n");
 
           if (ErrOrBuffer) {
             // Cache Hit!
@@ -988,8 +1009,7 @@
               ProducedBinaries[count] = std::move(ErrOrBuffer.get());
             else
               ProducedBinaryFiles[count] = writeGeneratedObject(
-                  count, CacheEntryPath, SavedObjectsDirectoryPath,
-                  *ErrOrBuffer.get());
+                  count, CacheEntryPath, *ErrOrBuffer.get());
             return;
           }
         }
@@ -998,7 +1018,8 @@
         Context.setDiscardValueNames(LTODiscardValueNames);
         Context.enableDebugTypeODRUniquing();
         auto DiagFileOrErr = lto::setupOptimizationRemarks(
-            Context, LTORemarksFilename, LTOPassRemarksWithHotness, count);
+            Context, RemarksFilename, RemarksPasses, RemarksFormat,
+            RemarksWithHotness, count);
         if (!DiagFileOrErr) {
           errs() << "Error: " << toString(DiagFileOrErr.takeError()) << "\n";
           report_fatal_error("ThinLTO: Can't get an output file for the "
@@ -1006,9 +1027,8 @@
         }
 
         // Parse module now
-        auto TheModule =
-            loadModuleFromBuffer(ModuleBuffer.getMemBuffer(), Context, false,
-                                 /*IsImporting*/ false);
+        auto TheModule = loadModuleFromInput(Mod.get(), Context, false,
+                                             /*IsImporting*/ false);
 
         // Save temps: original file.
         saveTempBitcode(*TheModule, SaveTempsDir, count, ".0.original.bc");
@@ -1027,15 +1047,15 @@
         if (SavedObjectsDirectoryPath.empty()) {
           // We need to generated a memory buffer for the linker.
           if (!CacheEntryPath.empty()) {
-            // Cache is enabled, reload from the cache
-            // We do this to lower memory pressuree: the buffer is on the heap
-            // and releasing it frees memory that can be used for the next input
-            // file. The final binary link will read from the VFS cache
-            // (hopefully!) or from disk if the memory pressure wasn't too high.
+            // When cache is enabled, reload from the cache if possible.
+            // Releasing the buffer from the heap and reloading it from the
+            // cache file with mmap helps us to lower memory pressure.
+            // The freed memory can be used for the next input file.
+            // The final binary link will read from the VFS cache (hopefully!)
+            // or from disk (if the memory pressure was too high).
             auto ReloadedBufferOrErr = CacheEntry.tryLoadingBuffer();
             if (auto EC = ReloadedBufferOrErr.getError()) {
-              // On error, keeping the preexisting buffer and printing a
-              // diagnostic is more friendly than just crashing.
+              // On error, keep the preexisting buffer and print a diagnostic.
               errs() << "error: can't reload cached file '" << CacheEntryPath
                      << "': " << EC.message() << "\n";
             } else {
@@ -1046,7 +1066,7 @@
           return;
         }
         ProducedBinaryFiles[count] = writeGeneratedObject(
-            count, CacheEntryPath, SavedObjectsDirectoryPath, *OutputBuffer);
+            count, CacheEntryPath, *OutputBuffer);
       }, IndexCount);
     }
   }