Mercurial > hg > CbC > CbC_llvm
diff lld/MachO/Symbols.h @ 207:2e18cbf3894f
LLVM12
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 08 Jun 2021 06:07:14 +0900 |
parents | 0572611fdcc8 |
children | 5f17cb93ff66 |
line wrap: on
line diff
--- a/lld/MachO/Symbols.h Mon May 25 11:55:54 2020 +0900 +++ b/lld/MachO/Symbols.h Tue Jun 08 06:07:14 2021 +0900 @@ -9,17 +9,19 @@ #ifndef LLD_MACHO_SYMBOLS_H #define LLD_MACHO_SYMBOLS_H +#include "InputFiles.h" #include "InputSection.h" #include "Target.h" +#include "lld/Common/ErrorHandler.h" #include "lld/Common/Strings.h" #include "llvm/Object/Archive.h" +#include "llvm/Support/MathExtras.h" namespace lld { namespace macho { class InputSection; -class DylibFile; -class ArchiveFile; +class MachHeaderSection; struct StringRefZ { StringRefZ(const char *s) : data(s), size(-1) {} @@ -34,95 +36,291 @@ enum Kind { DefinedKind, UndefinedKind, + CommonKind, DylibKind, LazyKind, }; - Kind kind() const { return static_cast<Kind>(symbolKind); } + virtual ~Symbol() {} + + Kind kind() const { return symbolKind; } + + StringRef getName() const { + if (nameSize == (uint32_t)-1) + nameSize = strlen(nameData); + return {nameData, nameSize}; + } + + bool isLive() const; + + virtual uint64_t getVA() const { return 0; } + + virtual uint64_t getFileOffset() const { + llvm_unreachable("attempt to get an offset from a non-defined symbol"); + } + + virtual bool isWeakDef() const { llvm_unreachable("cannot be weak def"); } + + // Only undefined or dylib symbols can be weak references. A weak reference + // need not be satisfied at runtime, e.g. due to the symbol not being + // available on a given target platform. + virtual bool isWeakRef() const { llvm_unreachable("cannot be a weak ref"); } + + virtual bool isTlv() const { llvm_unreachable("cannot be TLV"); } - StringRef getName() const { return {name.data, name.size}; } + // Whether this symbol is in the GOT or TLVPointer sections. + bool isInGot() const { return gotIndex != UINT32_MAX; } + + // Whether this symbol is in the StubsSection. + bool isInStubs() const { return stubsIndex != UINT32_MAX; } - uint64_t getVA() const; + uint64_t getStubVA() const; + uint64_t getGotVA() const; + uint64_t getTlvVA() const; + uint64_t resolveBranchVA() const { + assert(isa<Defined>(this) || isa<DylibSymbol>(this)); + return isInStubs() ? getStubVA() : getVA(); + } + uint64_t resolveGotVA() const { return isInGot() ? getGotVA() : getVA(); } + uint64_t resolveTlvVA() const { return isInGot() ? getTlvVA() : getVA(); } + + // The index of this symbol in the GOT or the TLVPointer section, depending + // on whether it is a thread-local. A given symbol cannot be referenced by + // both these sections at once. + uint32_t gotIndex = UINT32_MAX; + + uint32_t stubsIndex = UINT32_MAX; + + uint32_t symtabIndex = UINT32_MAX; + + InputFile *getFile() const { return file; } protected: - Symbol(Kind k, StringRefZ name) : symbolKind(k), name(name) {} + Symbol(Kind k, StringRefZ name, InputFile *file) + : symbolKind(k), nameData(name.data), nameSize(name.size), file(file), + isUsedInRegularObj(!file || isa<ObjFile>(file)), + used(!config->deadStrip) {} Kind symbolKind; - StringRefZ name; + const char *nameData; + mutable uint32_t nameSize; + InputFile *file; + +public: + // True if this symbol was referenced by a regular (non-bitcode) object. + bool isUsedInRegularObj : 1; + + // True if an undefined or dylib symbol is used from a live section. + bool used : 1; }; class Defined : public Symbol { public: - Defined(StringRefZ name, InputSection *isec, uint32_t value) - : Symbol(DefinedKind, name), isec(isec), value(value) {} + Defined(StringRefZ name, InputFile *file, InputSection *isec, uint64_t value, + uint64_t size, bool isWeakDef, bool isExternal, bool isPrivateExtern, + bool isThumb, bool isReferencedDynamically, bool noDeadStrip) + : Symbol(DefinedKind, name, file), isec(isec), value(value), size(size), + overridesWeakDef(false), privateExtern(isPrivateExtern), + includeInSymtab(true), thumb(isThumb), + referencedDynamically(isReferencedDynamically), + noDeadStrip(noDeadStrip), weakDef(isWeakDef), external(isExternal) { + if (isec) + isec->numRefs++; + } + + bool isWeakDef() const override { return weakDef; } + bool isExternalWeakDef() const { + return isWeakDef() && isExternal() && !privateExtern; + } + bool isTlv() const override { + return !isAbsolute() && isThreadLocalVariables(isec->flags); + } + + bool isExternal() const { return external; } + bool isAbsolute() const { return isec == nullptr; } + + uint64_t getVA() const override; + uint64_t getFileOffset() const override; + + static bool classof(const Symbol *s) { return s->kind() == DefinedKind; } InputSection *isec; - uint32_t value; + // Contains the offset from the containing subsection. Note that this is + // different from nlist::n_value, which is the absolute address of the symbol. + uint64_t value; + // size is only calculated for regular (non-bitcode) symbols. + uint64_t size; - static bool classof(const Symbol *s) { return s->kind() == DefinedKind; } + bool overridesWeakDef : 1; + // Whether this symbol should appear in the output binary's export trie. + bool privateExtern : 1; + // Whether this symbol should appear in the output symbol table. + bool includeInSymtab : 1; + // Only relevant when compiling for Thumb-supporting arm32 archs. + bool thumb : 1; + // Symbols marked referencedDynamically won't be removed from the output's + // symbol table by tools like strip. In theory, this could be set on arbitrary + // symbols in input object files. In practice, it's used solely for the + // synthetic __mh_execute_header symbol. + // This is information for the static linker, and it's also written to the + // output file's symbol table for tools running later (such as `strip`). + bool referencedDynamically : 1; + // Set on symbols that should not be removed by dead code stripping. + // Set for example on `__attribute__((used))` globals, or on some Objective-C + // metadata. This is information only for the static linker and not written + // to the output. + bool noDeadStrip : 1; + +private: + const bool weakDef : 1; + const bool external : 1; }; +// This enum does double-duty: as a symbol property, it indicates whether & how +// a dylib symbol is referenced. As a DylibFile property, it indicates the kind +// of referenced symbols contained within the file. If there are both weak +// and strong references to the same file, we will count the file as +// strongly-referenced. +enum class RefState : uint8_t { Unreferenced = 0, Weak = 1, Strong = 2 }; + class Undefined : public Symbol { public: - Undefined(StringRefZ name) : Symbol(UndefinedKind, name) {} + Undefined(StringRefZ name, InputFile *file, RefState refState) + : Symbol(UndefinedKind, name, file), refState(refState) { + assert(refState != RefState::Unreferenced); + } + + bool isWeakRef() const override { return refState == RefState::Weak; } static bool classof(const Symbol *s) { return s->kind() == UndefinedKind; } + + RefState refState : 2; +}; + +// On Unix, it is traditionally allowed to write variable definitions without +// initialization expressions (such as "int foo;") to header files. These are +// called tentative definitions. +// +// Using tentative definitions is usually considered a bad practice; you should +// write only declarations (such as "extern int foo;") to header files. +// Nevertheless, the linker and the compiler have to do something to support +// bad code by allowing duplicate definitions for this particular case. +// +// The compiler creates common symbols when it sees tentative definitions. +// (You can suppress this behavior and let the compiler create a regular +// defined symbol by passing -fno-common. -fno-common is the default in clang +// as of LLVM 11.0.) When linking the final binary, if there are remaining +// common symbols after name resolution is complete, the linker converts them +// to regular defined symbols in a __common section. +class CommonSymbol : public Symbol { +public: + CommonSymbol(StringRefZ name, InputFile *file, uint64_t size, uint32_t align, + bool isPrivateExtern) + : Symbol(CommonKind, name, file), size(size), + align(align != 1 ? align : llvm::PowerOf2Ceil(size)), + privateExtern(isPrivateExtern) { + // TODO: cap maximum alignment + } + + static bool classof(const Symbol *s) { return s->kind() == CommonKind; } + + const uint64_t size; + const uint32_t align; + const bool privateExtern; }; class DylibSymbol : public Symbol { public: - DylibSymbol(DylibFile *file, StringRefZ name) - : Symbol(DylibKind, name), file(file) {} + DylibSymbol(DylibFile *file, StringRefZ name, bool isWeakDef, + RefState refState, bool isTlv) + : Symbol(DylibKind, name, file), refState(refState), weakDef(isWeakDef), + tlv(isTlv) { + if (file && refState > RefState::Unreferenced) + file->numReferencedSymbols++; + } + + uint64_t getVA() const override; + bool isWeakDef() const override { return weakDef; } + bool isWeakRef() const override { return refState == RefState::Weak; } + bool isReferenced() const { return refState != RefState::Unreferenced; } + bool isTlv() const override { return tlv; } + bool isDynamicLookup() const { return file == nullptr; } + bool hasStubsHelper() const { return stubsHelperIndex != UINT32_MAX; } + + DylibFile *getFile() const { + assert(!isDynamicLookup()); + return cast<DylibFile>(file); + } static bool classof(const Symbol *s) { return s->kind() == DylibKind; } - DylibFile *file; - uint32_t gotIndex = UINT32_MAX; - uint32_t stubsIndex = UINT32_MAX; + uint32_t stubsHelperIndex = UINT32_MAX; uint32_t lazyBindOffset = UINT32_MAX; + + RefState getRefState() const { return refState; } + + void reference(RefState newState) { + assert(newState > RefState::Unreferenced); + if (refState == RefState::Unreferenced && file) + getFile()->numReferencedSymbols++; + refState = std::max(refState, newState); + } + + void unreference() { + // dynamic_lookup symbols have no file. + if (refState > RefState::Unreferenced && file) { + assert(getFile()->numReferencedSymbols > 0); + getFile()->numReferencedSymbols--; + } + } + +private: + RefState refState : 2; + const bool weakDef : 1; + const bool tlv : 1; }; class LazySymbol : public Symbol { public: LazySymbol(ArchiveFile *file, const llvm::object::Archive::Symbol &sym) - : Symbol(LazyKind, sym.getName()), file(file), sym(sym) {} + : Symbol(LazyKind, sym.getName(), file), sym(sym) {} + + ArchiveFile *getFile() const { return cast<ArchiveFile>(file); } + void fetchArchiveMember(); static bool classof(const Symbol *s) { return s->kind() == LazyKind; } - void fetchArchiveMember(); - private: - ArchiveFile *file; const llvm::object::Archive::Symbol sym; }; -inline uint64_t Symbol::getVA() const { - if (auto *d = dyn_cast<Defined>(this)) - return d->isec->getVA() + d->value; - return 0; -} - union SymbolUnion { alignas(Defined) char a[sizeof(Defined)]; alignas(Undefined) char b[sizeof(Undefined)]; - alignas(DylibSymbol) char c[sizeof(DylibSymbol)]; - alignas(LazySymbol) char d[sizeof(LazySymbol)]; + alignas(CommonSymbol) char c[sizeof(CommonSymbol)]; + alignas(DylibSymbol) char d[sizeof(DylibSymbol)]; + alignas(LazySymbol) char e[sizeof(LazySymbol)]; }; template <typename T, typename... ArgT> -void replaceSymbol(Symbol *s, ArgT &&... arg) { +T *replaceSymbol(Symbol *s, ArgT &&...arg) { static_assert(sizeof(T) <= sizeof(SymbolUnion), "SymbolUnion too small"); static_assert(alignof(T) <= alignof(SymbolUnion), "SymbolUnion not aligned enough"); assert(static_cast<Symbol *>(static_cast<T *>(nullptr)) == nullptr && "Not a Symbol"); - new (s) T(std::forward<ArgT>(arg)...); + bool isUsedInRegularObj = s->isUsedInRegularObj; + T *sym = new (s) T(std::forward<ArgT>(arg)...); + sym->isUsedInRegularObj |= isUsedInRegularObj; + return sym; } } // namespace macho std::string toString(const macho::Symbol &); +std::string toMachOString(const llvm::object::Archive::Symbol &); + } // namespace lld #endif