173
|
1 //===- ExportTrie.cpp -----------------------------------------------------===//
|
|
2 //
|
|
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
4 // See https://llvm.org/LICENSE.txt for license information.
|
|
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
6 //
|
|
7 //===----------------------------------------------------------------------===//
|
|
8 //
|
|
9 // This is a partial implementation of the Mach-O export trie format. It's
|
|
10 // essentially a symbol table encoded as a compressed prefix trie, meaning that
|
|
11 // the common prefixes of each symbol name are shared for a more compact
|
|
12 // representation. The prefixes are stored on the edges of the trie, and one
|
|
13 // edge can represent multiple characters. For example, given two exported
|
|
14 // symbols _bar and _baz, we will have a trie like this (terminal nodes are
|
|
15 // marked with an asterisk):
|
|
16 //
|
|
17 // +-+-+
|
|
18 // | | // root node
|
|
19 // +-+-+
|
|
20 // |
|
|
21 // | _ba
|
|
22 // |
|
|
23 // +-+-+
|
|
24 // | |
|
|
25 // +-+-+
|
|
26 // r / \ z
|
|
27 // / \
|
|
28 // +-+-+ +-+-+
|
|
29 // | * | | * |
|
|
30 // +-+-+ +-+-+
|
|
31 //
|
|
32 // More documentation of the format can be found in
|
|
33 // llvm/tools/obj2yaml/macho2yaml.cpp.
|
|
34 //
|
|
35 //===----------------------------------------------------------------------===//
|
|
36
|
|
37 #include "ExportTrie.h"
|
|
38 #include "Symbols.h"
|
|
39
|
|
40 #include "lld/Common/ErrorHandler.h"
|
|
41 #include "lld/Common/Memory.h"
|
|
42 #include "llvm/ADT/Optional.h"
|
|
43 #include "llvm/BinaryFormat/MachO.h"
|
|
44 #include "llvm/Support/LEB128.h"
|
|
45
|
|
46 using namespace llvm;
|
|
47 using namespace lld;
|
|
48 using namespace lld::macho;
|
|
49
|
|
50 namespace {
|
|
51
|
|
52 struct Edge {
|
|
53 Edge(StringRef s, TrieNode *node) : substring(s), child(node) {}
|
|
54
|
|
55 StringRef substring;
|
|
56 struct TrieNode *child;
|
|
57 };
|
|
58
|
|
59 struct ExportInfo {
|
|
60 uint64_t address;
|
207
|
61 uint8_t flags = 0;
|
|
62 ExportInfo(const Symbol &sym, uint64_t imageBase)
|
|
63 : address(sym.getVA() - imageBase) {
|
|
64 using namespace llvm::MachO;
|
|
65 // Set the symbol type.
|
|
66 if (sym.isWeakDef())
|
|
67 flags |= EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION;
|
|
68 // TODO: Add proper support for re-exports & stub-and-resolver flags.
|
|
69
|
|
70 // Set the symbol kind.
|
|
71 if (sym.isTlv()) {
|
|
72 flags |= EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL;
|
|
73 } else if (auto *defined = dyn_cast<Defined>(&sym)) {
|
|
74 if (defined->isAbsolute())
|
|
75 flags |= EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE;
|
|
76 }
|
|
77 }
|
173
|
78 };
|
|
79
|
|
80 } // namespace
|
|
81
|
207
|
82 struct macho::TrieNode {
|
173
|
83 std::vector<Edge> edges;
|
|
84 Optional<ExportInfo> info;
|
|
85 // Estimated offset from the start of the serialized trie to the current node.
|
|
86 // This will converge to the true offset when updateOffset() is run to a
|
|
87 // fixpoint.
|
|
88 size_t offset = 0;
|
|
89
|
|
90 // Returns whether the new estimated offset differs from the old one.
|
|
91 bool updateOffset(size_t &nextOffset);
|
|
92 void writeTo(uint8_t *buf) const;
|
|
93 };
|
|
94
|
|
95 bool TrieNode::updateOffset(size_t &nextOffset) {
|
|
96 // Size of the whole node (including the terminalSize and the outgoing edges.)
|
|
97 // In contrast, terminalSize only records the size of the other data in the
|
|
98 // node.
|
|
99 size_t nodeSize;
|
|
100 if (info) {
|
|
101 uint32_t terminalSize =
|
207
|
102 getULEB128Size(info->flags) + getULEB128Size(info->address);
|
173
|
103 // Overall node size so far is the uleb128 size of the length of the symbol
|
|
104 // info + the symbol info itself.
|
|
105 nodeSize = terminalSize + getULEB128Size(terminalSize);
|
|
106 } else {
|
|
107 nodeSize = 1; // Size of terminalSize (which has a value of 0)
|
|
108 }
|
|
109 // Compute size of all child edges.
|
|
110 ++nodeSize; // Byte for number of children.
|
207
|
111 for (const Edge &edge : edges) {
|
173
|
112 nodeSize += edge.substring.size() + 1 // String length.
|
|
113 + getULEB128Size(edge.child->offset); // Offset len.
|
|
114 }
|
|
115 // On input, 'nextOffset' is the new preferred location for this node.
|
|
116 bool result = (offset != nextOffset);
|
|
117 // Store new location in node object for use by parents.
|
|
118 offset = nextOffset;
|
|
119 nextOffset += nodeSize;
|
|
120 return result;
|
|
121 }
|
|
122
|
|
123 void TrieNode::writeTo(uint8_t *buf) const {
|
|
124 buf += offset;
|
|
125 if (info) {
|
|
126 // TrieNodes with Symbol info: size, flags address
|
|
127 uint32_t terminalSize =
|
207
|
128 getULEB128Size(info->flags) + getULEB128Size(info->address);
|
173
|
129 buf += encodeULEB128(terminalSize, buf);
|
207
|
130 buf += encodeULEB128(info->flags, buf);
|
173
|
131 buf += encodeULEB128(info->address, buf);
|
|
132 } else {
|
|
133 // TrieNode with no Symbol info.
|
|
134 *buf++ = 0; // terminalSize
|
|
135 }
|
|
136 // Add number of children. TODO: Handle case where we have more than 256.
|
|
137 assert(edges.size() < 256);
|
|
138 *buf++ = edges.size();
|
|
139 // Append each child edge substring and node offset.
|
|
140 for (const Edge &edge : edges) {
|
|
141 memcpy(buf, edge.substring.data(), edge.substring.size());
|
|
142 buf += edge.substring.size();
|
|
143 *buf++ = '\0';
|
|
144 buf += encodeULEB128(edge.child->offset, buf);
|
|
145 }
|
|
146 }
|
|
147
|
|
148 TrieNode *TrieBuilder::makeNode() {
|
|
149 auto *node = make<TrieNode>();
|
|
150 nodes.emplace_back(node);
|
|
151 return node;
|
|
152 }
|
|
153
|
|
154 static int charAt(const Symbol *sym, size_t pos) {
|
|
155 StringRef str = sym->getName();
|
|
156 if (pos >= str.size())
|
|
157 return -1;
|
|
158 return str[pos];
|
|
159 }
|
|
160
|
|
161 // Build the trie by performing a three-way radix quicksort: We start by sorting
|
|
162 // the strings by their first characters, then sort the strings with the same
|
|
163 // first characters by their second characters, and so on recursively. Each
|
|
164 // time the prefixes diverge, we add a node to the trie.
|
|
165 //
|
|
166 // node: The most recently created node along this path in the trie (i.e.
|
|
167 // the furthest from the root.)
|
|
168 // lastPos: The prefix length of the most recently created node, i.e. the number
|
|
169 // of characters along its path from the root.
|
|
170 // pos: The string index we are currently sorting on. Note that each symbol
|
|
171 // S contained in vec has the same prefix S[0...pos).
|
|
172 void TrieBuilder::sortAndBuild(MutableArrayRef<const Symbol *> vec,
|
|
173 TrieNode *node, size_t lastPos, size_t pos) {
|
|
174 tailcall:
|
|
175 if (vec.empty())
|
|
176 return;
|
|
177
|
|
178 // Partition items so that items in [0, i) are less than the pivot,
|
|
179 // [i, j) are the same as the pivot, and [j, vec.size()) are greater than
|
|
180 // the pivot.
|
|
181 const Symbol *pivotSymbol = vec[vec.size() / 2];
|
|
182 int pivot = charAt(pivotSymbol, pos);
|
|
183 size_t i = 0;
|
|
184 size_t j = vec.size();
|
|
185 for (size_t k = 0; k < j;) {
|
|
186 int c = charAt(vec[k], pos);
|
|
187 if (c < pivot)
|
|
188 std::swap(vec[i++], vec[k++]);
|
|
189 else if (c > pivot)
|
|
190 std::swap(vec[--j], vec[k]);
|
|
191 else
|
|
192 k++;
|
|
193 }
|
|
194
|
|
195 bool isTerminal = pivot == -1;
|
|
196 bool prefixesDiverge = i != 0 || j != vec.size();
|
|
197 if (lastPos != pos && (isTerminal || prefixesDiverge)) {
|
|
198 TrieNode *newNode = makeNode();
|
|
199 node->edges.emplace_back(pivotSymbol->getName().slice(lastPos, pos),
|
|
200 newNode);
|
|
201 node = newNode;
|
|
202 lastPos = pos;
|
|
203 }
|
|
204
|
|
205 sortAndBuild(vec.slice(0, i), node, lastPos, pos);
|
|
206 sortAndBuild(vec.slice(j), node, lastPos, pos);
|
|
207
|
|
208 if (isTerminal) {
|
|
209 assert(j - i == 1); // no duplicate symbols
|
207
|
210 node->info = ExportInfo(*pivotSymbol, imageBase);
|
173
|
211 } else {
|
|
212 // This is the tail-call-optimized version of the following:
|
|
213 // sortAndBuild(vec.slice(i, j - i), node, lastPos, pos + 1);
|
|
214 vec = vec.slice(i, j - i);
|
|
215 ++pos;
|
|
216 goto tailcall;
|
|
217 }
|
|
218 }
|
|
219
|
|
220 size_t TrieBuilder::build() {
|
|
221 if (exported.empty())
|
|
222 return 0;
|
|
223
|
|
224 TrieNode *root = makeNode();
|
|
225 sortAndBuild(exported, root, 0, 0);
|
|
226
|
|
227 // Assign each node in the vector an offset in the trie stream, iterating
|
|
228 // until all uleb128 sizes have stabilized.
|
|
229 size_t offset;
|
|
230 bool more;
|
|
231 do {
|
|
232 offset = 0;
|
|
233 more = false;
|
|
234 for (TrieNode *node : nodes)
|
|
235 more |= node->updateOffset(offset);
|
|
236 } while (more);
|
|
237
|
|
238 return offset;
|
|
239 }
|
|
240
|
|
241 void TrieBuilder::writeTo(uint8_t *buf) const {
|
|
242 for (TrieNode *node : nodes)
|
|
243 node->writeTo(buf);
|
|
244 }
|
|
245
|
|
246 namespace {
|
|
247
|
|
248 // Parse a serialized trie and invoke a callback for each entry.
|
|
249 class TrieParser {
|
|
250 public:
|
|
251 TrieParser(const uint8_t *buf, size_t size, const TrieEntryCallback &callback)
|
|
252 : start(buf), end(start + size), callback(callback) {}
|
|
253
|
|
254 void parse(const uint8_t *buf, const Twine &cumulativeString);
|
|
255
|
|
256 void parse() { parse(start, ""); }
|
|
257
|
|
258 const uint8_t *start;
|
|
259 const uint8_t *end;
|
|
260 const TrieEntryCallback &callback;
|
|
261 };
|
|
262
|
|
263 } // namespace
|
|
264
|
|
265 void TrieParser::parse(const uint8_t *buf, const Twine &cumulativeString) {
|
|
266 if (buf >= end)
|
|
267 fatal("Node offset points outside export section");
|
|
268
|
|
269 unsigned ulebSize;
|
|
270 uint64_t terminalSize = decodeULEB128(buf, &ulebSize);
|
|
271 buf += ulebSize;
|
|
272 uint64_t flags = 0;
|
|
273 size_t offset;
|
|
274 if (terminalSize != 0) {
|
|
275 flags = decodeULEB128(buf, &ulebSize);
|
|
276 callback(cumulativeString, flags);
|
|
277 }
|
|
278 buf += terminalSize;
|
|
279 uint8_t numEdges = *buf++;
|
|
280 for (uint8_t i = 0; i < numEdges; ++i) {
|
|
281 const char *cbuf = reinterpret_cast<const char *>(buf);
|
|
282 StringRef substring = StringRef(cbuf, strnlen(cbuf, end - buf));
|
|
283 buf += substring.size() + 1;
|
|
284 offset = decodeULEB128(buf, &ulebSize);
|
|
285 buf += ulebSize;
|
|
286 parse(start + offset, cumulativeString + substring);
|
|
287 }
|
|
288 }
|
|
289
|
207
|
290 void macho::parseTrie(const uint8_t *buf, size_t size,
|
|
291 const TrieEntryCallback &callback) {
|
173
|
292 if (size == 0)
|
|
293 return;
|
|
294
|
|
295 TrieParser(buf, size, callback).parse();
|
|
296 }
|