Mercurial > hg > CbC > CbC_llvm
diff lld/MachO/SyntheticSections.h @ 236:c4bab56944e8 llvm-original
LLVM 16
author | kono |
---|---|
date | Wed, 09 Nov 2022 17:45:10 +0900 |
parents | 5f17cb93ff66 |
children | 1f2b6ac9f198 |
line wrap: on
line diff
--- a/lld/MachO/SyntheticSections.h Wed Jul 21 10:27:27 2021 +0900 +++ b/lld/MachO/SyntheticSections.h Wed Nov 09 17:45:10 2022 +0900 @@ -19,8 +19,9 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Hashing.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/SetVector.h" -#include "llvm/MC/StringTableBuilder.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" @@ -30,8 +31,7 @@ class DWARFUnit; } // namespace llvm -namespace lld { -namespace macho { +namespace lld::macho { class Defined; class DylibSymbol; @@ -62,12 +62,14 @@ align = target->wordSize; } + // Implementations of this method can assume that the regular (non-__LINKEDIT) + // sections already have their addresses assigned. virtual void finalizeContents() {} // Sections in __LINKEDIT are special: their offsets are recorded in the // load commands like LC_DYLD_INFO_ONLY and LC_SYMTAB, instead of in section // headers. - bool isHidden() const override final { return true; } + bool isHidden() const final { return true; } virtual uint64_t getRawSize() const = 0; @@ -77,9 +79,7 @@ // // NOTE: This assumes that the extra bytes required for alignment can be // zero-valued bytes. - uint64_t getSize() const override final { - return llvm::alignTo(getRawSize(), align); - } + uint64_t getSize() const final { return llvm::alignTo(getRawSize(), align); } }; // The header of the Mach-O file, which must have a file offset of zero. @@ -103,6 +103,7 @@ public: PageZeroSection(); bool isHidden() const override { return true; } + bool isNeeded() const override { return target->pageZeroSize != 0; } uint64_t getSize() const override { return target->pageZeroSize; } uint64_t getFileSize() const override { return 0; } void writeTo(uint8_t *buf) const override {} @@ -189,13 +190,13 @@ bool isNeeded() const override { return !bindingsMap.empty(); } void writeTo(uint8_t *buf) const override; - void addEntry(const DylibSymbol *dysym, const InputSection *isec, - uint64_t offset, int64_t addend = 0) { + void addEntry(const Symbol *dysym, const InputSection *isec, uint64_t offset, + int64_t addend = 0) { bindingsMap[dysym].emplace_back(addend, Location(isec, offset)); } private: - BindingsMap<const DylibSymbol *> bindingsMap; + BindingsMap<const Symbol *> bindingsMap; SmallVector<char, 128> contents; }; @@ -269,6 +270,12 @@ // order that the weak bindings may overwrite the non-lazy bindings if an // appropriate symbol is found at runtime. However, the bound addresses will // still be written (non-lazily) into the LazyPointerSection. +// +// Symbols are always bound eagerly when chained fixups are used. In that case, +// StubsSection contains indirect jumps to addresses stored in the GotSection. +// The GOT directly contains the fixup entries, which will be replaced by the +// address of the target symbols on load. LazyPointerSection and +// StubHelperSection are not used. class StubsSection final : public SyntheticSection { public: @@ -278,9 +285,9 @@ void finalize() override; void writeTo(uint8_t *buf) const override; const llvm::SetVector<Symbol *> &getEntries() const { return entries; } - // Returns whether the symbol was added. Note that every stubs entry will - // have a corresponding entry in the LazyPointerSection. - bool addEntry(Symbol *); + // Creates a stub for the symbol and the corresponding entry in the + // LazyPointerSection. + void addEntry(Symbol *); uint64_t getVA(uint32_t stubsIndex) const { assert(isFinal || target->usesThunks()); // ConcatOutputSection::finalize() can seek the address of a @@ -303,12 +310,36 @@ bool isNeeded() const override; void writeTo(uint8_t *buf) const override; - void setup(); + void setUp(); DylibSymbol *stubBinder = nullptr; Defined *dyldPrivate = nullptr; }; +// Objective-C stubs are hoisted objc_msgSend calls per selector called in the +// program. Apple Clang produces undefined symbols to each stub, such as +// '_objc_msgSend$foo', which are then synthesized by the linker. The stubs +// load the particular selector 'foo' from __objc_selrefs, setting it to the +// first argument of the objc_msgSend call, and then jumps to objc_msgSend. The +// actual stub contents are mirrored from ld64. +class ObjCStubsSection final : public SyntheticSection { +public: + ObjCStubsSection(); + void addEntry(Symbol *sym); + uint64_t getSize() const override; + bool isNeeded() const override { return !symbols.empty(); } + void finalize() override { isec->isFinal = true; } + void writeTo(uint8_t *buf) const override; + void setUp(); + + static constexpr llvm::StringLiteral symbolPrefix = "_objc_msgSend$"; + +private: + std::vector<Defined *> symbols; + std::vector<uint32_t> offsets; + int objcMsgSendGotIndex = 0; +}; + // Note that this section may also be targeted by non-lazy bindings. In // particular, this happens when branch relocations target weak symbols. class LazyPointerSection final : public SyntheticSection { @@ -317,6 +348,9 @@ uint64_t getSize() const override; bool isNeeded() const override; void writeTo(uint8_t *buf) const override; + uint64_t getVA(uint32_t index) const { + return addr + (index << target->p2WordSize); + } }; class LazyBindingSection final : public LinkEditSection { @@ -328,13 +362,13 @@ void writeTo(uint8_t *buf) const override; // Note that every entry here will by referenced by a corresponding entry in // the StubHelperSection. - void addEntry(DylibSymbol *dysym); - const llvm::SetVector<DylibSymbol *> &getEntries() const { return entries; } + void addEntry(Symbol *dysym); + const llvm::SetVector<Symbol *> &getEntries() const { return entries; } private: - uint32_t encode(const DylibSymbol &); + uint32_t encode(const Symbol &); - llvm::SetVector<DylibSymbol *> entries; + llvm::SetVector<Symbol *> entries; SmallVector<char, 128> contents; llvm::raw_svector_ostream os{contents}; }; @@ -345,6 +379,7 @@ ExportSection(); void finalizeContents() override; uint64_t getRawSize() const override { return size; } + bool isNeeded() const override { return size; } void writeTo(uint8_t *buf) const override; bool hasWeakSymbol = false; @@ -354,8 +389,9 @@ size_t size = 0; }; -// Stores 'data in code' entries that describe the locations of -// data regions inside code sections. +// Stores 'data in code' entries that describe the locations of data regions +// inside code sections. This is used by llvm-objdump to distinguish jump tables +// and stop them from being disassembled as instructions. class DataInCodeSection final : public LinkEditSection { public: DataInCodeSection(); @@ -431,7 +467,7 @@ uint32_t getNumUndefinedSymbols() const { return undefinedSymbols.size(); } private: - void emitBeginSourceStab(llvm::DWARFUnit *compileUnit); + void emitBeginSourceStab(StringRef); void emitEndSourceStab(); void emitObjectFileStab(ObjFile *); void emitEndFunStab(Defined *); @@ -476,6 +512,8 @@ // The code signature comes at the very end of the linked output file. class CodeSignatureSection final : public LinkEditSection { public: + // NOTE: These values are duplicated in llvm-objcopy's MachO/Object.h file + // and any changes here, should be repeated there. static constexpr uint8_t blockSizeShift = 12; static constexpr size_t blockSize = (1 << blockSizeShift); // 4 KiB static constexpr size_t hashSize = 256 / 8; @@ -510,7 +548,7 @@ class CStringSection : public SyntheticSection { public: - CStringSection(); + CStringSection(const char *name); void addInput(CStringInputSection *); uint64_t getSize() const override { return size; } virtual void finalizeContents(); @@ -525,13 +563,23 @@ class DeduplicatedCStringSection final : public CStringSection { public: - DeduplicatedCStringSection(); - uint64_t getSize() const override { return builder.getSize(); } + DeduplicatedCStringSection(const char *name) : CStringSection(name){}; + uint64_t getSize() const override { return size; } void finalizeContents() override; - void writeTo(uint8_t *buf) const override { builder.write(buf); } + void writeTo(uint8_t *buf) const override; + + struct StringOffset { + uint8_t trailingZeros; + uint64_t outSecOff = UINT64_MAX; + + explicit StringOffset(uint8_t zeros) : trailingZeros(zeros) {} + }; + + StringOffset getStringOffset(StringRef str) const; private: - llvm::StringTableBuilder builder; + llvm::DenseMap<llvm::CachedHashStringRef, StringOffset> stringOffsetMap; + size_t size = 0; }; /* @@ -543,7 +591,7 @@ using UInt128 = std::pair<uint64_t, uint64_t>; // I don't think the standard guarantees the size of a pair, so let's make // sure it's exact -- that way we can construct it via `mmap`. - static_assert(sizeof(UInt128) == 16, ""); + static_assert(sizeof(UInt128) == 16); WordLiteralSection(); void addInput(WordLiteralInputSection *); @@ -560,16 +608,16 @@ !literal8Map.empty(); } - uint64_t getLiteral16Offset(const uint8_t *buf) const { + uint64_t getLiteral16Offset(uintptr_t buf) const { return literal16Map.at(*reinterpret_cast<const UInt128 *>(buf)) * 16; } - uint64_t getLiteral8Offset(const uint8_t *buf) const { + uint64_t getLiteral8Offset(uintptr_t buf) const { return literal16Map.size() * 16 + literal8Map.at(*reinterpret_cast<const uint64_t *>(buf)) * 8; } - uint64_t getLiteral4Offset(const uint8_t *buf) const { + uint64_t getLiteral4Offset(uintptr_t buf) const { return literal16Map.size() * 16 + literal8Map.size() * 8 + literal4Map.at(*reinterpret_cast<const uint32_t *>(buf)) * 4; } @@ -588,9 +636,163 @@ std::unordered_map<uint32_t, uint64_t> literal4Map; }; +class ObjCImageInfoSection final : public SyntheticSection { +public: + ObjCImageInfoSection(); + bool isNeeded() const override { return !files.empty(); } + uint64_t getSize() const override { return 8; } + void addFile(const InputFile *file) { + assert(!file->objCImageInfo.empty()); + files.push_back(file); + } + void finalizeContents(); + void writeTo(uint8_t *buf) const override; + +private: + struct ImageInfo { + uint8_t swiftVersion = 0; + bool hasCategoryClassProperties = false; + } info; + static ImageInfo parseImageInfo(const InputFile *); + std::vector<const InputFile *> files; // files with image info +}; + +// This section stores 32-bit __TEXT segment offsets of initializer functions. +// +// The compiler stores pointers to initializers in __mod_init_func. These need +// to be fixed up at load time, which takes time and dirties memory. By +// synthesizing InitOffsetsSection from them, this data can live in the +// read-only __TEXT segment instead. This section is used by default when +// chained fixups are enabled. +// +// There is no similar counterpart to __mod_term_func, as that section is +// deprecated, and static destructors are instead handled by registering them +// via __cxa_atexit from an autogenerated initializer function (see D121736). +class InitOffsetsSection final : public SyntheticSection { +public: + InitOffsetsSection(); + bool isNeeded() const override { return !sections.empty(); } + uint64_t getSize() const override; + void writeTo(uint8_t *buf) const override; + void setUp(); + + void addInput(ConcatInputSection *isec) { sections.push_back(isec); } + const std::vector<ConcatInputSection *> &inputs() const { return sections; } + +private: + std::vector<ConcatInputSection *> sections; +}; + +// Chained fixups are a replacement for classic dyld opcodes. In this format, +// most of the metadata necessary for binding symbols and rebasing addresses is +// stored directly in the memory location that will have the fixup applied. +// +// The fixups form singly linked lists; each one covering a single page in +// memory. The __LINKEDIT,__chainfixups section stores the page offset of the +// first fixup of each page; the rest can be found by walking the chain using +// the offset that is embedded in each entry. +// +// This setup allows pages to be relocated lazily at page-in time and without +// being dirtied. The kernel can discard and load them again as needed. This +// technique, called page-in linking, was introduced in macOS 13. +// +// The benefits of this format are: +// - smaller __LINKEDIT segment, as most of the fixup information is stored in +// the data segment +// - faster startup, since not all relocations need to be done upfront +// - slightly lower memory usage, as fewer pages are dirtied +// +// Userspace x86_64 and arm64 binaries have two types of fixup entries: +// - Rebase entries contain an absolute address, to which the object's load +// address will be added to get the final value. This is used for loading +// the address of a symbol defined in the same binary. +// - Binding entries are mostly used for symbols imported from other dylibs, +// but for weakly bound and interposable symbols as well. They are looked up +// by a (symbol name, library) pair stored in __chainfixups. This import +// entry also encodes whether the import is weak (i.e. if the symbol is +// missing, it should be set to null instead of producing a load error). +// The fixup encodes an ordinal associated with the import, and an optional +// addend. +// +// The entries are tightly packed 64-bit bitfields. One of the bits specifies +// which kind of fixup to interpret them as. +// +// LLD generates the fixup data in 5 stages: +// 1. While scanning relocations, we make a note of each location that needs +// a fixup by calling addRebase() or addBinding(). During this, we assign +// a unique ordinal for each (symbol name, library, addend) import tuple. +// 2. After addresses have been assigned to all sections, and thus the memory +// layout of the linked image is final; finalizeContents() is called. Here, +// the page offsets of the chain start entries are calculated. +// 3. ChainedFixupsSection::writeTo() writes the page start offsets and the +// imports table to the output file. +// 4. Each section's fixup entries are encoded and written to disk in +// ConcatInputSection::writeTo(), but without writing the offsets that form +// the chain. +// 5. Finally, each page's (which might correspond to multiple sections) +// fixups are linked together in Writer::buildFixupChains(). +class ChainedFixupsSection final : public LinkEditSection { +public: + ChainedFixupsSection(); + void finalizeContents() override; + uint64_t getRawSize() const override { return size; } + bool isNeeded() const override; + void writeTo(uint8_t *buf) const override; + + void addRebase(const InputSection *isec, uint64_t offset) { + locations.emplace_back(isec, offset); + } + void addBinding(const Symbol *dysym, const InputSection *isec, + uint64_t offset, int64_t addend = 0); + + void setHasNonWeakDefinition() { hasNonWeakDef = true; } + + // Returns an (ordinal, inline addend) tuple used by dyld_chained_ptr_64_bind. + std::pair<uint32_t, uint8_t> getBinding(const Symbol *sym, + int64_t addend) const; + + const std::vector<Location> &getLocations() const { return locations; } + + bool hasWeakBinding() const { return hasWeakBind; } + bool hasNonWeakDefinition() const { return hasNonWeakDef; } + +private: + // Location::offset initially stores the offset within an InputSection, but + // contains output segment offsets after finalizeContents(). + std::vector<Location> locations; + // (target symbol, addend) => import ordinal + llvm::MapVector<std::pair<const Symbol *, int64_t>, uint32_t> bindings; + + struct SegmentInfo { + SegmentInfo(const OutputSegment *oseg) : oseg(oseg) {} + + const OutputSegment *oseg; + // (page index, fixup starts offset) + llvm::SmallVector<std::pair<uint16_t, uint16_t>> pageStarts; + + size_t getSize() const; + size_t writeTo(uint8_t *buf) const; + }; + llvm::SmallVector<SegmentInfo, 4> fixupSegments; + + size_t symtabSize = 0; + size_t size = 0; + + bool needsAddend = false; + bool needsLargeAddend = false; + bool hasWeakBind = false; + bool hasNonWeakDef = false; + llvm::MachO::ChainedImportFormat importFormat; +}; + +void writeChainedRebase(uint8_t *buf, uint64_t targetVA); +void writeChainedFixup(uint8_t *buf, const Symbol *sym, int64_t addend); + struct InStruct { + const uint8_t *bufferStart = nullptr; MachHeaderSection *header = nullptr; CStringSection *cStringSection = nullptr; + DeduplicatedCStringSection *objcMethnameSection = nullptr; WordLiteralSection *wordLiteralSection = nullptr; RebaseSection *rebase = nullptr; BindingSection *binding = nullptr; @@ -602,8 +804,13 @@ LazyPointerSection *lazyPointers = nullptr; StubsSection *stubs = nullptr; StubHelperSection *stubHelper = nullptr; + ObjCStubsSection *objcStubs = nullptr; + ConcatInputSection *objcSelrefs = nullptr; UnwindInfoSection *unwindInfo = nullptr; + ObjCImageInfoSection *objCImageInfo = nullptr; ConcatInputSection *imageLoaderCache = nullptr; + InitOffsetsSection *initOffsets = nullptr; + ChainedFixupsSection *chainedFixups = nullptr; }; extern InStruct in; @@ -611,7 +818,6 @@ void createSyntheticSymbols(); -} // namespace macho -} // namespace lld +} // namespace lld::macho #endif