annotate lld/MachO/ExportTrie.cpp @ 207:2e18cbf3894f

LLVM12
author Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date Tue, 08 Jun 2021 06:07:14 +0900
parents 0572611fdcc8
children c4bab56944e8
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1 //===- ExportTrie.cpp -----------------------------------------------------===//
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2 //
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
4 // See https://llvm.org/LICENSE.txt for license information.
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
6 //
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
7 //===----------------------------------------------------------------------===//
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
8 //
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
9 // This is a partial implementation of the Mach-O export trie format. It's
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
10 // essentially a symbol table encoded as a compressed prefix trie, meaning that
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
11 // the common prefixes of each symbol name are shared for a more compact
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
12 // representation. The prefixes are stored on the edges of the trie, and one
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
13 // edge can represent multiple characters. For example, given two exported
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
14 // symbols _bar and _baz, we will have a trie like this (terminal nodes are
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
15 // marked with an asterisk):
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
16 //
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
17 // +-+-+
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
18 // | | // root node
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
19 // +-+-+
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
20 // |
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
21 // | _ba
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
22 // |
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
23 // +-+-+
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
24 // | |
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
25 // +-+-+
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
26 // r / \ z
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
27 // / \
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
28 // +-+-+ +-+-+
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
29 // | * | | * |
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
30 // +-+-+ +-+-+
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
31 //
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
32 // More documentation of the format can be found in
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
33 // llvm/tools/obj2yaml/macho2yaml.cpp.
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
34 //
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
35 //===----------------------------------------------------------------------===//
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
36
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
37 #include "ExportTrie.h"
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
38 #include "Symbols.h"
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
39
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
40 #include "lld/Common/ErrorHandler.h"
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
41 #include "lld/Common/Memory.h"
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
42 #include "llvm/ADT/Optional.h"
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
43 #include "llvm/BinaryFormat/MachO.h"
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
44 #include "llvm/Support/LEB128.h"
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
45
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
46 using namespace llvm;
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
47 using namespace lld;
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
48 using namespace lld::macho;
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
49
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
50 namespace {
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
51
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
52 struct Edge {
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
53 Edge(StringRef s, TrieNode *node) : substring(s), child(node) {}
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
54
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
55 StringRef substring;
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
56 struct TrieNode *child;
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
57 };
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
58
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
59 struct ExportInfo {
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
60 uint64_t address;
207
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
61 uint8_t flags = 0;
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
62 ExportInfo(const Symbol &sym, uint64_t imageBase)
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
63 : address(sym.getVA() - imageBase) {
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
64 using namespace llvm::MachO;
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
65 // Set the symbol type.
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
66 if (sym.isWeakDef())
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
67 flags |= EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION;
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
68 // TODO: Add proper support for re-exports & stub-and-resolver flags.
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
69
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
70 // Set the symbol kind.
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
71 if (sym.isTlv()) {
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
72 flags |= EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL;
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
73 } else if (auto *defined = dyn_cast<Defined>(&sym)) {
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
74 if (defined->isAbsolute())
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
75 flags |= EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE;
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
76 }
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
77 }
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
78 };
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
79
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
80 } // namespace
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
81
207
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
82 struct macho::TrieNode {
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
83 std::vector<Edge> edges;
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
84 Optional<ExportInfo> info;
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
85 // Estimated offset from the start of the serialized trie to the current node.
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
86 // This will converge to the true offset when updateOffset() is run to a
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
87 // fixpoint.
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
88 size_t offset = 0;
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
89
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
90 // Returns whether the new estimated offset differs from the old one.
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
91 bool updateOffset(size_t &nextOffset);
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
92 void writeTo(uint8_t *buf) const;
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
93 };
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
94
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
95 bool TrieNode::updateOffset(size_t &nextOffset) {
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
96 // Size of the whole node (including the terminalSize and the outgoing edges.)
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
97 // In contrast, terminalSize only records the size of the other data in the
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
98 // node.
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
99 size_t nodeSize;
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
100 if (info) {
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
101 uint32_t terminalSize =
207
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
102 getULEB128Size(info->flags) + getULEB128Size(info->address);
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
103 // Overall node size so far is the uleb128 size of the length of the symbol
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
104 // info + the symbol info itself.
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
105 nodeSize = terminalSize + getULEB128Size(terminalSize);
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
106 } else {
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
107 nodeSize = 1; // Size of terminalSize (which has a value of 0)
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
108 }
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
109 // Compute size of all child edges.
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
110 ++nodeSize; // Byte for number of children.
207
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
111 for (const Edge &edge : edges) {
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
112 nodeSize += edge.substring.size() + 1 // String length.
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
113 + getULEB128Size(edge.child->offset); // Offset len.
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
114 }
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
115 // On input, 'nextOffset' is the new preferred location for this node.
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
116 bool result = (offset != nextOffset);
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
117 // Store new location in node object for use by parents.
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
118 offset = nextOffset;
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
119 nextOffset += nodeSize;
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
120 return result;
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
121 }
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
122
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
123 void TrieNode::writeTo(uint8_t *buf) const {
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
124 buf += offset;
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
125 if (info) {
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
126 // TrieNodes with Symbol info: size, flags address
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
127 uint32_t terminalSize =
207
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
128 getULEB128Size(info->flags) + getULEB128Size(info->address);
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
129 buf += encodeULEB128(terminalSize, buf);
207
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
130 buf += encodeULEB128(info->flags, buf);
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
131 buf += encodeULEB128(info->address, buf);
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
132 } else {
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
133 // TrieNode with no Symbol info.
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
134 *buf++ = 0; // terminalSize
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
135 }
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
136 // Add number of children. TODO: Handle case where we have more than 256.
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
137 assert(edges.size() < 256);
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
138 *buf++ = edges.size();
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
139 // Append each child edge substring and node offset.
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
140 for (const Edge &edge : edges) {
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
141 memcpy(buf, edge.substring.data(), edge.substring.size());
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
142 buf += edge.substring.size();
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
143 *buf++ = '\0';
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
144 buf += encodeULEB128(edge.child->offset, buf);
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
145 }
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
146 }
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
147
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
148 TrieNode *TrieBuilder::makeNode() {
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
149 auto *node = make<TrieNode>();
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
150 nodes.emplace_back(node);
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
151 return node;
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
152 }
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
153
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
154 static int charAt(const Symbol *sym, size_t pos) {
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
155 StringRef str = sym->getName();
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
156 if (pos >= str.size())
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
157 return -1;
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
158 return str[pos];
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
159 }
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
160
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
161 // Build the trie by performing a three-way radix quicksort: We start by sorting
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
162 // the strings by their first characters, then sort the strings with the same
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
163 // first characters by their second characters, and so on recursively. Each
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
164 // time the prefixes diverge, we add a node to the trie.
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
165 //
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
166 // node: The most recently created node along this path in the trie (i.e.
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
167 // the furthest from the root.)
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
168 // lastPos: The prefix length of the most recently created node, i.e. the number
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
169 // of characters along its path from the root.
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
170 // pos: The string index we are currently sorting on. Note that each symbol
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
171 // S contained in vec has the same prefix S[0...pos).
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
172 void TrieBuilder::sortAndBuild(MutableArrayRef<const Symbol *> vec,
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
173 TrieNode *node, size_t lastPos, size_t pos) {
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
174 tailcall:
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
175 if (vec.empty())
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
176 return;
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
177
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
178 // Partition items so that items in [0, i) are less than the pivot,
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
179 // [i, j) are the same as the pivot, and [j, vec.size()) are greater than
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
180 // the pivot.
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
181 const Symbol *pivotSymbol = vec[vec.size() / 2];
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
182 int pivot = charAt(pivotSymbol, pos);
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
183 size_t i = 0;
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
184 size_t j = vec.size();
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
185 for (size_t k = 0; k < j;) {
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
186 int c = charAt(vec[k], pos);
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
187 if (c < pivot)
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
188 std::swap(vec[i++], vec[k++]);
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
189 else if (c > pivot)
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
190 std::swap(vec[--j], vec[k]);
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
191 else
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
192 k++;
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
193 }
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
194
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
195 bool isTerminal = pivot == -1;
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
196 bool prefixesDiverge = i != 0 || j != vec.size();
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
197 if (lastPos != pos && (isTerminal || prefixesDiverge)) {
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
198 TrieNode *newNode = makeNode();
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
199 node->edges.emplace_back(pivotSymbol->getName().slice(lastPos, pos),
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
200 newNode);
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
201 node = newNode;
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
202 lastPos = pos;
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
203 }
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
204
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
205 sortAndBuild(vec.slice(0, i), node, lastPos, pos);
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
206 sortAndBuild(vec.slice(j), node, lastPos, pos);
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
207
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
208 if (isTerminal) {
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
209 assert(j - i == 1); // no duplicate symbols
207
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
210 node->info = ExportInfo(*pivotSymbol, imageBase);
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
211 } else {
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
212 // This is the tail-call-optimized version of the following:
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
213 // sortAndBuild(vec.slice(i, j - i), node, lastPos, pos + 1);
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
214 vec = vec.slice(i, j - i);
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
215 ++pos;
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
216 goto tailcall;
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
217 }
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
218 }
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
219
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
220 size_t TrieBuilder::build() {
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
221 if (exported.empty())
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
222 return 0;
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
223
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
224 TrieNode *root = makeNode();
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
225 sortAndBuild(exported, root, 0, 0);
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
226
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
227 // Assign each node in the vector an offset in the trie stream, iterating
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
228 // until all uleb128 sizes have stabilized.
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
229 size_t offset;
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
230 bool more;
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
231 do {
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
232 offset = 0;
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
233 more = false;
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
234 for (TrieNode *node : nodes)
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
235 more |= node->updateOffset(offset);
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
236 } while (more);
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
237
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
238 return offset;
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
239 }
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
240
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
241 void TrieBuilder::writeTo(uint8_t *buf) const {
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
242 for (TrieNode *node : nodes)
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
243 node->writeTo(buf);
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
244 }
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
245
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
246 namespace {
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
247
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
248 // Parse a serialized trie and invoke a callback for each entry.
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
249 class TrieParser {
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
250 public:
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
251 TrieParser(const uint8_t *buf, size_t size, const TrieEntryCallback &callback)
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
252 : start(buf), end(start + size), callback(callback) {}
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
253
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
254 void parse(const uint8_t *buf, const Twine &cumulativeString);
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
255
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
256 void parse() { parse(start, ""); }
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
257
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
258 const uint8_t *start;
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
259 const uint8_t *end;
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
260 const TrieEntryCallback &callback;
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
261 };
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
262
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
263 } // namespace
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
264
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
265 void TrieParser::parse(const uint8_t *buf, const Twine &cumulativeString) {
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
266 if (buf >= end)
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
267 fatal("Node offset points outside export section");
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
268
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
269 unsigned ulebSize;
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
270 uint64_t terminalSize = decodeULEB128(buf, &ulebSize);
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
271 buf += ulebSize;
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
272 uint64_t flags = 0;
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
273 size_t offset;
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
274 if (terminalSize != 0) {
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
275 flags = decodeULEB128(buf, &ulebSize);
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
276 callback(cumulativeString, flags);
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
277 }
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
278 buf += terminalSize;
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
279 uint8_t numEdges = *buf++;
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
280 for (uint8_t i = 0; i < numEdges; ++i) {
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
281 const char *cbuf = reinterpret_cast<const char *>(buf);
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
282 StringRef substring = StringRef(cbuf, strnlen(cbuf, end - buf));
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
283 buf += substring.size() + 1;
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
284 offset = decodeULEB128(buf, &ulebSize);
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
285 buf += ulebSize;
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
286 parse(start + offset, cumulativeString + substring);
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
287 }
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
288 }
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
289
207
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
290 void macho::parseTrie(const uint8_t *buf, size_t size,
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
291 const TrieEntryCallback &callback) {
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
292 if (size == 0)
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
293 return;
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
294
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
295 TrieParser(buf, size, callback).parse();
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
296 }