173
|
1 //===- Symbols.h ------------------------------------------------*- C++ -*-===//
|
|
2 //
|
|
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
4 // See https://llvm.org/LICENSE.txt for license information.
|
|
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
6 //
|
|
7 //===----------------------------------------------------------------------===//
|
|
8
|
|
9 #ifndef LLD_MACHO_SYMBOLS_H
|
|
10 #define LLD_MACHO_SYMBOLS_H
|
|
11
|
207
|
12 #include "InputFiles.h"
|
173
|
13 #include "InputSection.h"
|
|
14 #include "Target.h"
|
207
|
15 #include "lld/Common/ErrorHandler.h"
|
173
|
16 #include "lld/Common/Strings.h"
|
|
17 #include "llvm/Object/Archive.h"
|
207
|
18 #include "llvm/Support/MathExtras.h"
|
173
|
19
|
|
20 namespace lld {
|
|
21 namespace macho {
|
|
22
|
|
23 class InputSection;
|
207
|
24 class MachHeaderSection;
|
173
|
25
|
|
26 struct StringRefZ {
|
|
27 StringRefZ(const char *s) : data(s), size(-1) {}
|
|
28 StringRefZ(StringRef s) : data(s.data()), size(s.size()) {}
|
|
29
|
|
30 const char *data;
|
|
31 const uint32_t size;
|
|
32 };
|
|
33
|
|
34 class Symbol {
|
|
35 public:
|
|
36 enum Kind {
|
|
37 DefinedKind,
|
|
38 UndefinedKind,
|
207
|
39 CommonKind,
|
173
|
40 DylibKind,
|
|
41 LazyKind,
|
|
42 };
|
|
43
|
207
|
44 virtual ~Symbol() {}
|
|
45
|
|
46 Kind kind() const { return symbolKind; }
|
|
47
|
|
48 StringRef getName() const {
|
|
49 if (nameSize == (uint32_t)-1)
|
|
50 nameSize = strlen(nameData);
|
|
51 return {nameData, nameSize};
|
|
52 }
|
|
53
|
|
54 bool isLive() const;
|
|
55
|
|
56 virtual uint64_t getVA() const { return 0; }
|
|
57
|
|
58 virtual uint64_t getFileOffset() const {
|
|
59 llvm_unreachable("attempt to get an offset from a non-defined symbol");
|
|
60 }
|
|
61
|
|
62 virtual bool isWeakDef() const { llvm_unreachable("cannot be weak def"); }
|
|
63
|
|
64 // Only undefined or dylib symbols can be weak references. A weak reference
|
|
65 // need not be satisfied at runtime, e.g. due to the symbol not being
|
|
66 // available on a given target platform.
|
|
67 virtual bool isWeakRef() const { llvm_unreachable("cannot be a weak ref"); }
|
|
68
|
|
69 virtual bool isTlv() const { llvm_unreachable("cannot be TLV"); }
|
173
|
70
|
207
|
71 // Whether this symbol is in the GOT or TLVPointer sections.
|
|
72 bool isInGot() const { return gotIndex != UINT32_MAX; }
|
|
73
|
|
74 // Whether this symbol is in the StubsSection.
|
|
75 bool isInStubs() const { return stubsIndex != UINT32_MAX; }
|
173
|
76
|
207
|
77 uint64_t getStubVA() const;
|
|
78 uint64_t getGotVA() const;
|
|
79 uint64_t getTlvVA() const;
|
|
80 uint64_t resolveBranchVA() const {
|
|
81 assert(isa<Defined>(this) || isa<DylibSymbol>(this));
|
|
82 return isInStubs() ? getStubVA() : getVA();
|
|
83 }
|
|
84 uint64_t resolveGotVA() const { return isInGot() ? getGotVA() : getVA(); }
|
|
85 uint64_t resolveTlvVA() const { return isInGot() ? getTlvVA() : getVA(); }
|
|
86
|
|
87 // The index of this symbol in the GOT or the TLVPointer section, depending
|
|
88 // on whether it is a thread-local. A given symbol cannot be referenced by
|
|
89 // both these sections at once.
|
|
90 uint32_t gotIndex = UINT32_MAX;
|
|
91
|
|
92 uint32_t stubsIndex = UINT32_MAX;
|
|
93
|
|
94 uint32_t symtabIndex = UINT32_MAX;
|
|
95
|
|
96 InputFile *getFile() const { return file; }
|
173
|
97
|
|
98 protected:
|
207
|
99 Symbol(Kind k, StringRefZ name, InputFile *file)
|
|
100 : symbolKind(k), nameData(name.data), nameSize(name.size), file(file),
|
|
101 isUsedInRegularObj(!file || isa<ObjFile>(file)),
|
|
102 used(!config->deadStrip) {}
|
173
|
103
|
|
104 Kind symbolKind;
|
207
|
105 const char *nameData;
|
|
106 mutable uint32_t nameSize;
|
|
107 InputFile *file;
|
|
108
|
|
109 public:
|
|
110 // True if this symbol was referenced by a regular (non-bitcode) object.
|
|
111 bool isUsedInRegularObj : 1;
|
|
112
|
|
113 // True if an undefined or dylib symbol is used from a live section.
|
|
114 bool used : 1;
|
173
|
115 };
|
|
116
|
|
117 class Defined : public Symbol {
|
|
118 public:
|
207
|
119 Defined(StringRefZ name, InputFile *file, InputSection *isec, uint64_t value,
|
|
120 uint64_t size, bool isWeakDef, bool isExternal, bool isPrivateExtern,
|
|
121 bool isThumb, bool isReferencedDynamically, bool noDeadStrip)
|
|
122 : Symbol(DefinedKind, name, file), isec(isec), value(value), size(size),
|
|
123 overridesWeakDef(false), privateExtern(isPrivateExtern),
|
|
124 includeInSymtab(true), thumb(isThumb),
|
|
125 referencedDynamically(isReferencedDynamically),
|
|
126 noDeadStrip(noDeadStrip), weakDef(isWeakDef), external(isExternal) {
|
|
127 if (isec)
|
|
128 isec->numRefs++;
|
|
129 }
|
|
130
|
|
131 bool isWeakDef() const override { return weakDef; }
|
|
132 bool isExternalWeakDef() const {
|
|
133 return isWeakDef() && isExternal() && !privateExtern;
|
|
134 }
|
|
135 bool isTlv() const override {
|
|
136 return !isAbsolute() && isThreadLocalVariables(isec->flags);
|
|
137 }
|
|
138
|
|
139 bool isExternal() const { return external; }
|
|
140 bool isAbsolute() const { return isec == nullptr; }
|
|
141
|
|
142 uint64_t getVA() const override;
|
|
143 uint64_t getFileOffset() const override;
|
|
144
|
|
145 static bool classof(const Symbol *s) { return s->kind() == DefinedKind; }
|
173
|
146
|
|
147 InputSection *isec;
|
207
|
148 // Contains the offset from the containing subsection. Note that this is
|
|
149 // different from nlist::n_value, which is the absolute address of the symbol.
|
|
150 uint64_t value;
|
|
151 // size is only calculated for regular (non-bitcode) symbols.
|
|
152 uint64_t size;
|
173
|
153
|
207
|
154 bool overridesWeakDef : 1;
|
|
155 // Whether this symbol should appear in the output binary's export trie.
|
|
156 bool privateExtern : 1;
|
|
157 // Whether this symbol should appear in the output symbol table.
|
|
158 bool includeInSymtab : 1;
|
|
159 // Only relevant when compiling for Thumb-supporting arm32 archs.
|
|
160 bool thumb : 1;
|
|
161 // Symbols marked referencedDynamically won't be removed from the output's
|
|
162 // symbol table by tools like strip. In theory, this could be set on arbitrary
|
|
163 // symbols in input object files. In practice, it's used solely for the
|
|
164 // synthetic __mh_execute_header symbol.
|
|
165 // This is information for the static linker, and it's also written to the
|
|
166 // output file's symbol table for tools running later (such as `strip`).
|
|
167 bool referencedDynamically : 1;
|
|
168 // Set on symbols that should not be removed by dead code stripping.
|
|
169 // Set for example on `__attribute__((used))` globals, or on some Objective-C
|
|
170 // metadata. This is information only for the static linker and not written
|
|
171 // to the output.
|
|
172 bool noDeadStrip : 1;
|
|
173
|
|
174 private:
|
|
175 const bool weakDef : 1;
|
|
176 const bool external : 1;
|
173
|
177 };
|
|
178
|
207
|
179 // This enum does double-duty: as a symbol property, it indicates whether & how
|
|
180 // a dylib symbol is referenced. As a DylibFile property, it indicates the kind
|
|
181 // of referenced symbols contained within the file. If there are both weak
|
|
182 // and strong references to the same file, we will count the file as
|
|
183 // strongly-referenced.
|
|
184 enum class RefState : uint8_t { Unreferenced = 0, Weak = 1, Strong = 2 };
|
|
185
|
173
|
186 class Undefined : public Symbol {
|
|
187 public:
|
207
|
188 Undefined(StringRefZ name, InputFile *file, RefState refState)
|
|
189 : Symbol(UndefinedKind, name, file), refState(refState) {
|
|
190 assert(refState != RefState::Unreferenced);
|
|
191 }
|
|
192
|
|
193 bool isWeakRef() const override { return refState == RefState::Weak; }
|
173
|
194
|
|
195 static bool classof(const Symbol *s) { return s->kind() == UndefinedKind; }
|
207
|
196
|
|
197 RefState refState : 2;
|
|
198 };
|
|
199
|
|
200 // On Unix, it is traditionally allowed to write variable definitions without
|
|
201 // initialization expressions (such as "int foo;") to header files. These are
|
|
202 // called tentative definitions.
|
|
203 //
|
|
204 // Using tentative definitions is usually considered a bad practice; you should
|
|
205 // write only declarations (such as "extern int foo;") to header files.
|
|
206 // Nevertheless, the linker and the compiler have to do something to support
|
|
207 // bad code by allowing duplicate definitions for this particular case.
|
|
208 //
|
|
209 // The compiler creates common symbols when it sees tentative definitions.
|
|
210 // (You can suppress this behavior and let the compiler create a regular
|
|
211 // defined symbol by passing -fno-common. -fno-common is the default in clang
|
|
212 // as of LLVM 11.0.) When linking the final binary, if there are remaining
|
|
213 // common symbols after name resolution is complete, the linker converts them
|
|
214 // to regular defined symbols in a __common section.
|
|
215 class CommonSymbol : public Symbol {
|
|
216 public:
|
|
217 CommonSymbol(StringRefZ name, InputFile *file, uint64_t size, uint32_t align,
|
|
218 bool isPrivateExtern)
|
|
219 : Symbol(CommonKind, name, file), size(size),
|
|
220 align(align != 1 ? align : llvm::PowerOf2Ceil(size)),
|
|
221 privateExtern(isPrivateExtern) {
|
|
222 // TODO: cap maximum alignment
|
|
223 }
|
|
224
|
|
225 static bool classof(const Symbol *s) { return s->kind() == CommonKind; }
|
|
226
|
|
227 const uint64_t size;
|
|
228 const uint32_t align;
|
|
229 const bool privateExtern;
|
173
|
230 };
|
|
231
|
|
232 class DylibSymbol : public Symbol {
|
|
233 public:
|
207
|
234 DylibSymbol(DylibFile *file, StringRefZ name, bool isWeakDef,
|
|
235 RefState refState, bool isTlv)
|
|
236 : Symbol(DylibKind, name, file), refState(refState), weakDef(isWeakDef),
|
|
237 tlv(isTlv) {
|
|
238 if (file && refState > RefState::Unreferenced)
|
|
239 file->numReferencedSymbols++;
|
|
240 }
|
|
241
|
|
242 uint64_t getVA() const override;
|
|
243 bool isWeakDef() const override { return weakDef; }
|
|
244 bool isWeakRef() const override { return refState == RefState::Weak; }
|
|
245 bool isReferenced() const { return refState != RefState::Unreferenced; }
|
|
246 bool isTlv() const override { return tlv; }
|
|
247 bool isDynamicLookup() const { return file == nullptr; }
|
|
248 bool hasStubsHelper() const { return stubsHelperIndex != UINT32_MAX; }
|
|
249
|
|
250 DylibFile *getFile() const {
|
|
251 assert(!isDynamicLookup());
|
|
252 return cast<DylibFile>(file);
|
|
253 }
|
173
|
254
|
|
255 static bool classof(const Symbol *s) { return s->kind() == DylibKind; }
|
|
256
|
207
|
257 uint32_t stubsHelperIndex = UINT32_MAX;
|
173
|
258 uint32_t lazyBindOffset = UINT32_MAX;
|
207
|
259
|
|
260 RefState getRefState() const { return refState; }
|
|
261
|
|
262 void reference(RefState newState) {
|
|
263 assert(newState > RefState::Unreferenced);
|
|
264 if (refState == RefState::Unreferenced && file)
|
|
265 getFile()->numReferencedSymbols++;
|
|
266 refState = std::max(refState, newState);
|
|
267 }
|
|
268
|
|
269 void unreference() {
|
|
270 // dynamic_lookup symbols have no file.
|
|
271 if (refState > RefState::Unreferenced && file) {
|
|
272 assert(getFile()->numReferencedSymbols > 0);
|
|
273 getFile()->numReferencedSymbols--;
|
|
274 }
|
|
275 }
|
|
276
|
|
277 private:
|
|
278 RefState refState : 2;
|
|
279 const bool weakDef : 1;
|
|
280 const bool tlv : 1;
|
173
|
281 };
|
|
282
|
|
283 class LazySymbol : public Symbol {
|
|
284 public:
|
|
285 LazySymbol(ArchiveFile *file, const llvm::object::Archive::Symbol &sym)
|
207
|
286 : Symbol(LazyKind, sym.getName(), file), sym(sym) {}
|
|
287
|
|
288 ArchiveFile *getFile() const { return cast<ArchiveFile>(file); }
|
|
289 void fetchArchiveMember();
|
173
|
290
|
|
291 static bool classof(const Symbol *s) { return s->kind() == LazyKind; }
|
|
292
|
|
293 private:
|
|
294 const llvm::object::Archive::Symbol sym;
|
|
295 };
|
|
296
|
|
297 union SymbolUnion {
|
|
298 alignas(Defined) char a[sizeof(Defined)];
|
|
299 alignas(Undefined) char b[sizeof(Undefined)];
|
207
|
300 alignas(CommonSymbol) char c[sizeof(CommonSymbol)];
|
|
301 alignas(DylibSymbol) char d[sizeof(DylibSymbol)];
|
|
302 alignas(LazySymbol) char e[sizeof(LazySymbol)];
|
173
|
303 };
|
|
304
|
|
305 template <typename T, typename... ArgT>
|
207
|
306 T *replaceSymbol(Symbol *s, ArgT &&...arg) {
|
173
|
307 static_assert(sizeof(T) <= sizeof(SymbolUnion), "SymbolUnion too small");
|
|
308 static_assert(alignof(T) <= alignof(SymbolUnion),
|
|
309 "SymbolUnion not aligned enough");
|
|
310 assert(static_cast<Symbol *>(static_cast<T *>(nullptr)) == nullptr &&
|
|
311 "Not a Symbol");
|
|
312
|
207
|
313 bool isUsedInRegularObj = s->isUsedInRegularObj;
|
|
314 T *sym = new (s) T(std::forward<ArgT>(arg)...);
|
|
315 sym->isUsedInRegularObj |= isUsedInRegularObj;
|
|
316 return sym;
|
173
|
317 }
|
|
318
|
|
319 } // namespace macho
|
|
320
|
|
321 std::string toString(const macho::Symbol &);
|
207
|
322 std::string toMachOString(const llvm::object::Archive::Symbol &);
|
|
323
|
173
|
324 } // namespace lld
|
|
325
|
|
326 #endif
|