diff lld/MachO/SyntheticSections.h @ 207:2e18cbf3894f

LLVM12
author Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date Tue, 08 Jun 2021 06:07:14 +0900
parents 0572611fdcc8
children 5f17cb93ff66
line wrap: on
line diff
--- a/lld/MachO/SyntheticSections.h	Mon May 25 11:55:54 2020 +0900
+++ b/lld/MachO/SyntheticSections.h	Tue Jun 08 06:07:14 2021 +0900
@@ -9,32 +9,30 @@
 #ifndef LLD_MACHO_SYNTHETIC_SECTIONS_H
 #define LLD_MACHO_SYNTHETIC_SECTIONS_H
 
+#include "Config.h"
 #include "ExportTrie.h"
 #include "InputSection.h"
 #include "OutputSection.h"
+#include "OutputSegment.h"
 #include "Target.h"
 
+#include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/SetVector.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 
+namespace llvm {
+class DWARFUnit;
+} // namespace llvm
+
 namespace lld {
 namespace macho {
 
-namespace section_names {
-
-constexpr const char *pageZero = "__pagezero";
-constexpr const char *header = "__mach_header";
-constexpr const char *binding = "__binding";
-constexpr const char *lazyBinding = "__lazy_binding";
-constexpr const char *export_ = "__export";
-constexpr const char *symbolTable = "__symbol_table";
-constexpr const char *stringTable = "__string_table";
-constexpr const char *got = "__got";
-
-} // namespace section_names
-
+class Defined;
 class DylibSymbol;
 class LoadCommand;
+class ObjFile;
+class UnwindInfoSection;
 
 class SyntheticSection : public OutputSection {
 public:
@@ -44,18 +42,52 @@
   static bool classof(const OutputSection *sec) {
     return sec->kind() == SyntheticKind;
   }
+
+  const StringRef segname;
+  // This fake InputSection makes it easier for us to write code that applies
+  // generically to both user inputs and synthetics.
+  InputSection *isec;
+};
+
+// All sections in __LINKEDIT should inherit from this.
+class LinkEditSection : public SyntheticSection {
+public:
+  LinkEditSection(const char *segname, const char *name)
+      : SyntheticSection(segname, name) {
+    align = target->wordSize;
+  }
+
+  virtual void finalizeContents() {}
+
+  // Sections in __LINKEDIT are special: their offsets are recorded in the
+  // load commands like LC_DYLD_INFO_ONLY and LC_SYMTAB, instead of in section
+  // headers.
+  bool isHidden() const override final { return true; }
+
+  virtual uint64_t getRawSize() const = 0;
+
+  // codesign (or more specifically libstuff) checks that each section in
+  // __LINKEDIT ends where the next one starts -- no gaps are permitted. We
+  // therefore align every section's start and end points to WordSize.
+  //
+  // NOTE: This assumes that the extra bytes required for alignment can be
+  // zero-valued bytes.
+  uint64_t getSize() const override final {
+    return llvm::alignTo(getRawSize(), align);
+  }
 };
 
 // The header of the Mach-O file, which must have a file offset of zero.
 class MachHeaderSection : public SyntheticSection {
 public:
   MachHeaderSection();
-  void addLoadCommand(LoadCommand *);
   bool isHidden() const override { return true; }
-  size_t getSize() const override;
+  uint64_t getSize() const override;
   void writeTo(uint8_t *buf) const override;
 
-private:
+  void addLoadCommand(LoadCommand *);
+
+protected:
   std::vector<LoadCommand *> loadCommands;
   uint32_t sizeOfCmds = 0;
 };
@@ -66,62 +98,173 @@
 public:
   PageZeroSection();
   bool isHidden() const override { return true; }
-  size_t getSize() const override { return ImageBase; }
+  uint64_t getSize() const override { return target->pageZeroSize; }
   uint64_t getFileSize() const override { return 0; }
   void writeTo(uint8_t *buf) const override {}
 };
 
-// This section will be populated by dyld with addresses to non-lazily-loaded
-// dylib symbols.
-class GotSection : public SyntheticSection {
+// This is the base class for the GOT and TLVPointer sections, which are nearly
+// functionally identical -- they will both be populated by dyld with addresses
+// to non-lazily-loaded dylib symbols. The main difference is that the
+// TLVPointerSection stores references to thread-local variables.
+class NonLazyPointerSectionBase : public SyntheticSection {
 public:
-  GotSection();
+  NonLazyPointerSectionBase(const char *segname, const char *name);
 
-  const llvm::SetVector<const DylibSymbol *> &getEntries() const {
-    return entries;
-  }
+  const llvm::SetVector<const Symbol *> &getEntries() const { return entries; }
 
   bool isNeeded() const override { return !entries.empty(); }
 
-  size_t getSize() const override { return entries.size() * WordSize; }
+  uint64_t getSize() const override {
+    return entries.size() * target->wordSize;
+  }
 
-  void writeTo(uint8_t *buf) const override {
-    // Nothing to write, GOT contains all zeros at link time; it's populated at
-    // runtime by dyld.
+  void writeTo(uint8_t *buf) const override;
+
+  void addEntry(Symbol *sym);
+
+  uint64_t getVA(uint32_t gotIndex) const {
+    return addr + gotIndex * target->wordSize;
   }
 
-  void addEntry(DylibSymbol &sym);
+private:
+  llvm::SetVector<const Symbol *> entries;
+};
+
+class GotSection : public NonLazyPointerSectionBase {
+public:
+  GotSection()
+      : NonLazyPointerSectionBase(segment_names::dataConst,
+                                  section_names::got) {
+    // TODO: section_64::reserved1 should be an index into the indirect symbol
+    // table, which we do not currently emit
+  }
+};
+
+class TlvPointerSection : public NonLazyPointerSectionBase {
+public:
+  TlvPointerSection()
+      : NonLazyPointerSectionBase(segment_names::data,
+                                  section_names::threadPtrs) {}
+};
+
+struct Location {
+  const InputSection *isec;
+  uint64_t offset;
+
+  Location(const InputSection *isec, uint64_t offset)
+      : isec(isec), offset(offset) {}
+  uint64_t getVA() const { return isec->getVA() + offset; }
+};
+
+// Stores rebase opcodes, which tell dyld where absolute addresses have been
+// encoded in the binary. If the binary is not loaded at its preferred address,
+// dyld has to rebase these addresses by adding an offset to them.
+class RebaseSection : public LinkEditSection {
+public:
+  RebaseSection();
+  void finalizeContents() override;
+  uint64_t getRawSize() const override { return contents.size(); }
+  bool isNeeded() const override { return !locations.empty(); }
+  void writeTo(uint8_t *buf) const override;
+
+  void addEntry(const InputSection *isec, uint64_t offset) {
+    if (config->isPic)
+      locations.push_back({isec, offset});
+  }
 
 private:
-  llvm::SetVector<const DylibSymbol *> entries;
+  std::vector<Location> locations;
+  SmallVector<char, 128> contents;
+};
+
+struct BindingEntry {
+  const DylibSymbol *dysym;
+  int64_t addend;
+  Location target;
+  BindingEntry(const DylibSymbol *dysym, int64_t addend, Location target)
+      : dysym(dysym), addend(addend), target(std::move(target)) {}
 };
 
 // Stores bind opcodes for telling dyld which symbols to load non-lazily.
-class BindingSection : public SyntheticSection {
+class BindingSection : public LinkEditSection {
 public:
   BindingSection();
-  void finalizeContents();
-  size_t getSize() const override { return contents.size(); }
-  // Like other sections in __LINKEDIT, the binding section is special: its
-  // offsets are recorded in the LC_DYLD_INFO_ONLY load command, instead of in
-  // section headers.
-  bool isHidden() const override { return true; }
-  bool isNeeded() const override;
+  void finalizeContents() override;
+  uint64_t getRawSize() const override { return contents.size(); }
+  bool isNeeded() const override { return !bindings.empty(); }
   void writeTo(uint8_t *buf) const override;
 
+  void addEntry(const DylibSymbol *dysym, const InputSection *isec,
+                uint64_t offset, int64_t addend = 0) {
+    bindings.emplace_back(dysym, addend, Location(isec, offset));
+  }
+
+private:
+  std::vector<BindingEntry> bindings;
+  SmallVector<char, 128> contents;
+};
+
+struct WeakBindingEntry {
+  const Symbol *symbol;
+  int64_t addend;
+  Location target;
+  WeakBindingEntry(const Symbol *symbol, int64_t addend, Location target)
+      : symbol(symbol), addend(addend), target(std::move(target)) {}
+};
+
+// Stores bind opcodes for telling dyld which weak symbols need coalescing.
+// There are two types of entries in this section:
+//
+//   1) Non-weak definitions: This is a symbol definition that weak symbols in
+//   other dylibs should coalesce to.
+//
+//   2) Weak bindings: These tell dyld that a given symbol reference should
+//   coalesce to a non-weak definition if one is found. Note that unlike the
+//   entries in the BindingSection, the bindings here only refer to these
+//   symbols by name, but do not specify which dylib to load them from.
+class WeakBindingSection : public LinkEditSection {
+public:
+  WeakBindingSection();
+  void finalizeContents() override;
+  uint64_t getRawSize() const override { return contents.size(); }
+  bool isNeeded() const override {
+    return !bindings.empty() || !definitions.empty();
+  }
+
+  void writeTo(uint8_t *buf) const override;
+
+  void addEntry(const Symbol *symbol, const InputSection *isec, uint64_t offset,
+                int64_t addend = 0) {
+    bindings.emplace_back(symbol, addend, Location(isec, offset));
+  }
+
+  bool hasEntry() const { return !bindings.empty(); }
+
+  void addNonWeakDefinition(const Defined *defined) {
+    definitions.emplace_back(defined);
+  }
+
+  bool hasNonWeakDefinition() const { return !definitions.empty(); }
+
+private:
+  std::vector<WeakBindingEntry> bindings;
+  std::vector<const Defined *> definitions;
   SmallVector<char, 128> contents;
 };
 
 // The following sections implement lazy symbol binding -- very similar to the
 // PLT mechanism in ELF.
 //
-// ELF's .plt section is broken up into two sections in Mach-O: StubsSection and
-// StubHelperSection. Calls to functions in dylibs will end up calling into
+// ELF's .plt section is broken up into two sections in Mach-O: StubsSection
+// and StubHelperSection. Calls to functions in dylibs will end up calling into
 // StubsSection, which contains indirect jumps to addresses stored in the
 // LazyPointerSection (the counterpart to ELF's .plt.got).
 //
-// Initially, the LazyPointerSection contains addresses that point into one of
-// the entry points in the middle of the StubHelperSection. The code in
+// We will first describe how non-weak symbols are handled.
+//
+// At program start, the LazyPointerSection contains addresses that point into
+// one of the entry points in the middle of the StubHelperSection. The code in
 // StubHelperSection will push on the stack an offset into the
 // LazyBindingSection. The push is followed by a jump to the beginning of the
 // StubHelperSection (similar to PLT0), which then calls into dyld_stub_binder.
@@ -129,36 +272,54 @@
 // the GOT.
 //
 // The stub binder will look up the bind opcodes in the LazyBindingSection at
-// the given offset. The bind opcodes will tell the binder to update the address
-// in the LazyPointerSection to point to the symbol, so that subsequent calls
-// don't have to redo the symbol resolution. The binder will then jump to the
-// resolved symbol.
+// the given offset. The bind opcodes will tell the binder to update the
+// address in the LazyPointerSection to point to the symbol, so that subsequent
+// calls don't have to redo the symbol resolution. The binder will then jump to
+// the resolved symbol.
+//
+// With weak symbols, the situation is slightly different. Since there is no
+// "weak lazy" lookup, function calls to weak symbols are always non-lazily
+// bound. We emit both regular non-lazy bindings as well as weak bindings, in
+// order that the weak bindings may overwrite the non-lazy bindings if an
+// appropriate symbol is found at runtime. However, the bound addresses will
+// still be written (non-lazily) into the LazyPointerSection.
 
 class StubsSection : public SyntheticSection {
 public:
   StubsSection();
-  size_t getSize() const override;
+  uint64_t getSize() const override;
   bool isNeeded() const override { return !entries.empty(); }
+  void finalize() override;
   void writeTo(uint8_t *buf) const override;
+  const llvm::SetVector<Symbol *> &getEntries() const { return entries; }
+  // Returns whether the symbol was added. Note that every stubs entry will
+  // have a corresponding entry in the LazyPointerSection.
+  bool addEntry(Symbol *);
+  uint64_t getVA(uint32_t stubsIndex) const {
+    // ConcatOutputSection::finalize() can seek the address of a
+    // stub before its address is assigned. Before __stubs is
+    // finalized, return a contrived out-of-range address.
+    return isFinal ? addr + stubsIndex * target->stubSize
+                   : TargetInfo::outOfRangeVA;
+  }
 
-  const llvm::SetVector<DylibSymbol *> &getEntries() const { return entries; }
-
-  void addEntry(DylibSymbol &sym);
+  bool isFinal = false; // is address assigned?
 
 private:
-  llvm::SetVector<DylibSymbol *> entries;
+  llvm::SetVector<Symbol *> entries;
 };
 
 class StubHelperSection : public SyntheticSection {
 public:
   StubHelperSection();
-  size_t getSize() const override;
+  uint64_t getSize() const override;
   bool isNeeded() const override;
   void writeTo(uint8_t *buf) const override;
 
   void setup();
 
   DylibSymbol *stubBinder = nullptr;
+  Defined *dyldPrivate = nullptr;
 };
 
 // This section contains space for just a single word, and will be used by dyld
@@ -168,71 +329,82 @@
 class ImageLoaderCacheSection : public InputSection {
 public:
   ImageLoaderCacheSection();
-  size_t getSize() const override { return WordSize; }
+  uint64_t getSize() const override { return target->wordSize; }
 };
 
+// Note that this section may also be targeted by non-lazy bindings. In
+// particular, this happens when branch relocations target weak symbols.
 class LazyPointerSection : public SyntheticSection {
 public:
   LazyPointerSection();
-  size_t getSize() const override;
+  uint64_t getSize() const override;
   bool isNeeded() const override;
   void writeTo(uint8_t *buf) const override;
 };
 
-class LazyBindingSection : public SyntheticSection {
+class LazyBindingSection : public LinkEditSection {
 public:
   LazyBindingSection();
-  void finalizeContents();
-  size_t getSize() const override { return contents.size(); }
-  uint32_t encode(const DylibSymbol &);
-  // Like other sections in __LINKEDIT, the lazy binding section is special: its
-  // offsets are recorded in the LC_DYLD_INFO_ONLY load command, instead of in
-  // section headers.
-  bool isHidden() const override { return true; }
-  bool isNeeded() const override;
+  void finalizeContents() override;
+  uint64_t getRawSize() const override { return contents.size(); }
+  bool isNeeded() const override { return !entries.empty(); }
   void writeTo(uint8_t *buf) const override;
+  // Note that every entry here will by referenced by a corresponding entry in
+  // the StubHelperSection.
+  void addEntry(DylibSymbol *dysym);
+  const llvm::SetVector<DylibSymbol *> &getEntries() const { return entries; }
 
 private:
+  uint32_t encode(const DylibSymbol &);
+
+  llvm::SetVector<DylibSymbol *> entries;
   SmallVector<char, 128> contents;
   llvm::raw_svector_ostream os{contents};
 };
 
 // Stores a trie that describes the set of exported symbols.
-class ExportSection : public SyntheticSection {
+class ExportSection : public LinkEditSection {
 public:
   ExportSection();
-  void finalizeContents();
-  size_t getSize() const override { return size; }
-  // Like other sections in __LINKEDIT, the export section is special: its
-  // offsets are recorded in the LC_DYLD_INFO_ONLY load command, instead of in
-  // section headers.
-  bool isHidden() const override { return true; }
+  void finalizeContents() override;
+  uint64_t getRawSize() const override { return size; }
   void writeTo(uint8_t *buf) const override;
 
+  bool hasWeakSymbol = false;
+
 private:
   TrieBuilder trieBuilder;
   size_t size = 0;
 };
 
+class FunctionStartsSection : public LinkEditSection {
+public:
+  FunctionStartsSection();
+  void finalizeContents() override;
+  uint64_t getRawSize() const override { return contents.size(); }
+  void writeTo(uint8_t *buf) const override;
+
+private:
+  SmallVector<char, 128> contents;
+};
+
 // Stores the strings referenced by the symbol table.
-class StringTableSection : public SyntheticSection {
+class StringTableSection : public LinkEditSection {
 public:
   StringTableSection();
   // Returns the start offset of the added string.
   uint32_t addString(StringRef);
-  size_t getSize() const override { return size; }
-  // Like other sections in __LINKEDIT, the string table section is special: its
-  // offsets are recorded in the LC_SYMTAB load command, instead of in section
-  // headers.
-  bool isHidden() const override { return true; }
+  uint64_t getRawSize() const override { return size; }
   void writeTo(uint8_t *buf) const override;
 
+  static constexpr size_t emptyStringIndex = 1;
+
 private:
-  // An n_strx value of 0 always indicates the empty string, so we must locate
-  // our non-empty string values at positive offsets in the string table.
-  // Therefore we insert a dummy value at position zero.
-  std::vector<StringRef> strings{"\0"};
-  size_t size = 1;
+  // ld64 emits string tables which start with a space and a zero byte. We
+  // match its behavior here since some tools depend on it.
+  // Consequently, the empty string will be at index 1, not zero.
+  std::vector<StringRef> strings{" "};
+  size_t size = 2;
 };
 
 struct SymtabEntry {
@@ -240,32 +412,129 @@
   size_t strx;
 };
 
-class SymtabSection : public SyntheticSection {
+struct StabsEntry {
+  uint8_t type = 0;
+  uint32_t strx = StringTableSection::emptyStringIndex;
+  uint8_t sect = 0;
+  uint16_t desc = 0;
+  uint64_t value = 0;
+
+  StabsEntry() = default;
+  explicit StabsEntry(uint8_t type) : type(type) {}
+};
+
+// Symbols of the same type must be laid out contiguously: we choose to emit
+// all local symbols first, then external symbols, and finally undefined
+// symbols. For each symbol type, the LC_DYSYMTAB load command will record the
+// range (start index and total number) of those symbols in the symbol table.
+class SymtabSection : public LinkEditSection {
 public:
+  void finalizeContents() override;
+  uint32_t getNumSymbols() const;
+  uint32_t getNumLocalSymbols() const {
+    return stabs.size() + localSymbols.size();
+  }
+  uint32_t getNumExternalSymbols() const { return externalSymbols.size(); }
+  uint32_t getNumUndefinedSymbols() const { return undefinedSymbols.size(); }
+
+private:
+  void emitBeginSourceStab(llvm::DWARFUnit *compileUnit);
+  void emitEndSourceStab();
+  void emitObjectFileStab(ObjFile *);
+  void emitEndFunStab(Defined *);
+  void emitStabs();
+
+protected:
   SymtabSection(StringTableSection &);
-  void finalizeContents();
-  size_t getNumSymbols() const { return symbols.size(); }
-  size_t getSize() const override;
-  // Like other sections in __LINKEDIT, the symtab section is special: its
-  // offsets are recorded in the LC_SYMTAB load command, instead of in section
-  // headers.
-  bool isHidden() const override { return true; }
+
+  StringTableSection &stringTableSection;
+  // STABS symbols are always local symbols, but we represent them with special
+  // entries because they may use fields like n_sect and n_desc differently.
+  std::vector<StabsEntry> stabs;
+  std::vector<SymtabEntry> localSymbols;
+  std::vector<SymtabEntry> externalSymbols;
+  std::vector<SymtabEntry> undefinedSymbols;
+};
+
+template <class LP> SymtabSection *makeSymtabSection(StringTableSection &);
+
+// The indirect symbol table is a list of 32-bit integers that serve as indices
+// into the (actual) symbol table. The indirect symbol table is a
+// concatenation of several sub-arrays of indices, each sub-array belonging to
+// a separate section. The starting offset of each sub-array is stored in the
+// reserved1 header field of the respective section.
+//
+// These sub-arrays provide symbol information for sections that store
+// contiguous sequences of symbol references. These references can be pointers
+// (e.g. those in the GOT and TLVP sections) or assembly sequences (e.g.
+// function stubs).
+class IndirectSymtabSection : public LinkEditSection {
+public:
+  IndirectSymtabSection();
+  void finalizeContents() override;
+  uint32_t getNumSymbols() const;
+  uint64_t getRawSize() const override {
+    return getNumSymbols() * sizeof(uint32_t);
+  }
+  bool isNeeded() const override;
+  void writeTo(uint8_t *buf) const override;
+};
+
+// The code signature comes at the very end of the linked output file.
+class CodeSignatureSection : public LinkEditSection {
+public:
+  static constexpr uint8_t blockSizeShift = 12;
+  static constexpr size_t blockSize = (1 << blockSizeShift); // 4 KiB
+  static constexpr size_t hashSize = 256 / 8;
+  static constexpr size_t blobHeadersSize = llvm::alignTo<8>(
+      sizeof(llvm::MachO::CS_SuperBlob) + sizeof(llvm::MachO::CS_BlobIndex));
+  static constexpr uint32_t fixedHeadersSize =
+      blobHeadersSize + sizeof(llvm::MachO::CS_CodeDirectory);
+
+  uint32_t fileNamePad = 0;
+  uint32_t allHeadersSize = 0;
+  StringRef fileName;
+
+  CodeSignatureSection();
+  uint64_t getRawSize() const override;
+  bool isNeeded() const override { return true; }
+  void writeTo(uint8_t *buf) const override;
+  uint32_t getBlockCount() const;
+  void writeHashes(uint8_t *buf) const;
+};
+
+class BitcodeBundleSection : public SyntheticSection {
+public:
+  BitcodeBundleSection();
+  uint64_t getSize() const override { return xarSize; }
+  void finalize() override;
   void writeTo(uint8_t *buf) const override;
 
 private:
-  StringTableSection &stringTableSection;
-  std::vector<SymtabEntry> symbols;
+  llvm::SmallString<261> xarPath;
+  uint64_t xarSize;
 };
 
 struct InStruct {
+  MachHeaderSection *header = nullptr;
+  RebaseSection *rebase = nullptr;
+  BindingSection *binding = nullptr;
+  WeakBindingSection *weakBinding = nullptr;
+  LazyBindingSection *lazyBinding = nullptr;
+  ExportSection *exports = nullptr;
   GotSection *got = nullptr;
+  TlvPointerSection *tlvPointers = nullptr;
   LazyPointerSection *lazyPointers = nullptr;
   StubsSection *stubs = nullptr;
   StubHelperSection *stubHelper = nullptr;
   ImageLoaderCacheSection *imageLoaderCache = nullptr;
+  UnwindInfoSection *unwindInfo = nullptr;
 };
 
 extern InStruct in;
+extern std::vector<SyntheticSection *> syntheticSections;
+
+void createSyntheticSymbols();
 
 } // namespace macho
 } // namespace lld