annotate llvm/utils/extract_symbols.py @ 150:1d019706d866

LLVM10
author anatofuz
date Thu, 13 Feb 2020 15:10:13 +0900
parents
children 0572611fdcc8
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
150
anatofuz
parents:
diff changeset
1 #!/usr/bin/env python
anatofuz
parents:
diff changeset
2
anatofuz
parents:
diff changeset
3 """A tool for extracting a list of symbols to export
anatofuz
parents:
diff changeset
4
anatofuz
parents:
diff changeset
5 When exporting symbols from a dll or exe we either need to mark the symbols in
anatofuz
parents:
diff changeset
6 the source code as __declspec(dllexport) or supply a list of symbols to the
anatofuz
parents:
diff changeset
7 linker. This program automates the latter by inspecting the symbol tables of a
anatofuz
parents:
diff changeset
8 list of link inputs and deciding which of those symbols need to be exported.
anatofuz
parents:
diff changeset
9
anatofuz
parents:
diff changeset
10 We can't just export all the defined symbols, as there's a limit of 65535
anatofuz
parents:
diff changeset
11 exported symbols and in clang we go way over that, particularly in a debug
anatofuz
parents:
diff changeset
12 build. Therefore a large part of the work is pruning symbols either which can't
anatofuz
parents:
diff changeset
13 be imported, or which we think are things that have definitions in public header
anatofuz
parents:
diff changeset
14 files (i.e. template instantiations) and we would get defined in the thing
anatofuz
parents:
diff changeset
15 importing these symbols anyway.
anatofuz
parents:
diff changeset
16 """
anatofuz
parents:
diff changeset
17
anatofuz
parents:
diff changeset
18 from __future__ import print_function
anatofuz
parents:
diff changeset
19 import sys
anatofuz
parents:
diff changeset
20 import re
anatofuz
parents:
diff changeset
21 import os
anatofuz
parents:
diff changeset
22 import subprocess
anatofuz
parents:
diff changeset
23 import multiprocessing
anatofuz
parents:
diff changeset
24 import argparse
anatofuz
parents:
diff changeset
25
anatofuz
parents:
diff changeset
26 # Define functions which extract a list of symbols from a library using several
anatofuz
parents:
diff changeset
27 # different tools. We use subprocess.Popen and yield a symbol at a time instead
anatofuz
parents:
diff changeset
28 # of using subprocess.check_output and returning a list as, especially on
anatofuz
parents:
diff changeset
29 # Windows, waiting for the entire output to be ready can take a significant
anatofuz
parents:
diff changeset
30 # amount of time.
anatofuz
parents:
diff changeset
31
anatofuz
parents:
diff changeset
32 def dumpbin_get_symbols(lib):
anatofuz
parents:
diff changeset
33 process = subprocess.Popen(['dumpbin','/symbols',lib], bufsize=1,
anatofuz
parents:
diff changeset
34 stdout=subprocess.PIPE, stdin=subprocess.PIPE,
anatofuz
parents:
diff changeset
35 universal_newlines=True)
anatofuz
parents:
diff changeset
36 process.stdin.close()
anatofuz
parents:
diff changeset
37 for line in process.stdout:
anatofuz
parents:
diff changeset
38 # Look for external symbols that are defined in some section
anatofuz
parents:
diff changeset
39 match = re.match("^.+SECT.+External\s+\|\s+(\S+).*$", line)
anatofuz
parents:
diff changeset
40 if match:
anatofuz
parents:
diff changeset
41 yield match.group(1)
anatofuz
parents:
diff changeset
42 process.wait()
anatofuz
parents:
diff changeset
43
anatofuz
parents:
diff changeset
44 def nm_get_symbols(lib):
anatofuz
parents:
diff changeset
45 process = subprocess.Popen(['nm','-P',lib], bufsize=1,
anatofuz
parents:
diff changeset
46 stdout=subprocess.PIPE, stdin=subprocess.PIPE,
anatofuz
parents:
diff changeset
47 universal_newlines=True)
anatofuz
parents:
diff changeset
48 process.stdin.close()
anatofuz
parents:
diff changeset
49 for line in process.stdout:
anatofuz
parents:
diff changeset
50 # Look for external symbols that are defined in some section
anatofuz
parents:
diff changeset
51 match = re.match("^(\S+)\s+[BDGRSTVW]\s+\S+\s+\S+$", line)
anatofuz
parents:
diff changeset
52 if match:
anatofuz
parents:
diff changeset
53 yield match.group(1)
anatofuz
parents:
diff changeset
54 process.wait()
anatofuz
parents:
diff changeset
55
anatofuz
parents:
diff changeset
56 def readobj_get_symbols(lib):
anatofuz
parents:
diff changeset
57 process = subprocess.Popen(['llvm-readobj','-symbols',lib], bufsize=1,
anatofuz
parents:
diff changeset
58 stdout=subprocess.PIPE, stdin=subprocess.PIPE,
anatofuz
parents:
diff changeset
59 universal_newlines=True)
anatofuz
parents:
diff changeset
60 process.stdin.close()
anatofuz
parents:
diff changeset
61 for line in process.stdout:
anatofuz
parents:
diff changeset
62 # When looking through the output of llvm-readobj we expect to see Name,
anatofuz
parents:
diff changeset
63 # Section, then StorageClass, so record Name and Section when we see
anatofuz
parents:
diff changeset
64 # them and decide if this is a defined external symbol when we see
anatofuz
parents:
diff changeset
65 # StorageClass.
anatofuz
parents:
diff changeset
66 match = re.search('Name: (\S+)', line)
anatofuz
parents:
diff changeset
67 if match:
anatofuz
parents:
diff changeset
68 name = match.group(1)
anatofuz
parents:
diff changeset
69 match = re.search('Section: (\S+)', line)
anatofuz
parents:
diff changeset
70 if match:
anatofuz
parents:
diff changeset
71 section = match.group(1)
anatofuz
parents:
diff changeset
72 match = re.search('StorageClass: (\S+)', line)
anatofuz
parents:
diff changeset
73 if match:
anatofuz
parents:
diff changeset
74 storageclass = match.group(1)
anatofuz
parents:
diff changeset
75 if section != 'IMAGE_SYM_ABSOLUTE' and \
anatofuz
parents:
diff changeset
76 section != 'IMAGE_SYM_UNDEFINED' and \
anatofuz
parents:
diff changeset
77 storageclass == 'External':
anatofuz
parents:
diff changeset
78 yield name
anatofuz
parents:
diff changeset
79 process.wait()
anatofuz
parents:
diff changeset
80
anatofuz
parents:
diff changeset
81 # Define functions which determine if the target is 32-bit Windows (as that's
anatofuz
parents:
diff changeset
82 # where calling convention name decoration happens).
anatofuz
parents:
diff changeset
83
anatofuz
parents:
diff changeset
84 def dumpbin_is_32bit_windows(lib):
anatofuz
parents:
diff changeset
85 # dumpbin /headers can output a huge amount of data (>100MB in a debug
anatofuz
parents:
diff changeset
86 # build) so we read only up to the 'machine' line then close the output.
anatofuz
parents:
diff changeset
87 process = subprocess.Popen(['dumpbin','/headers',lib], bufsize=1,
anatofuz
parents:
diff changeset
88 stdout=subprocess.PIPE, stdin=subprocess.PIPE,
anatofuz
parents:
diff changeset
89 universal_newlines=True)
anatofuz
parents:
diff changeset
90 process.stdin.close()
anatofuz
parents:
diff changeset
91 retval = False
anatofuz
parents:
diff changeset
92 for line in process.stdout:
anatofuz
parents:
diff changeset
93 match = re.match('.+machine \((\S+)\)', line)
anatofuz
parents:
diff changeset
94 if match:
anatofuz
parents:
diff changeset
95 retval = (match.group(1) == 'x86')
anatofuz
parents:
diff changeset
96 break
anatofuz
parents:
diff changeset
97 process.stdout.close()
anatofuz
parents:
diff changeset
98 process.wait()
anatofuz
parents:
diff changeset
99 return retval
anatofuz
parents:
diff changeset
100
anatofuz
parents:
diff changeset
101 def objdump_is_32bit_windows(lib):
anatofuz
parents:
diff changeset
102 output = subprocess.check_output(['objdump','-f',lib],
anatofuz
parents:
diff changeset
103 universal_newlines=True)
anatofuz
parents:
diff changeset
104 for line in output:
anatofuz
parents:
diff changeset
105 match = re.match('.+file format (\S+)', line)
anatofuz
parents:
diff changeset
106 if match:
anatofuz
parents:
diff changeset
107 return (match.group(1) == 'pe-i386')
anatofuz
parents:
diff changeset
108 return False
anatofuz
parents:
diff changeset
109
anatofuz
parents:
diff changeset
110 def readobj_is_32bit_windows(lib):
anatofuz
parents:
diff changeset
111 output = subprocess.check_output(['llvm-readobj','-file-headers',lib],
anatofuz
parents:
diff changeset
112 universal_newlines=True)
anatofuz
parents:
diff changeset
113 for line in output:
anatofuz
parents:
diff changeset
114 match = re.match('Format: (\S+)', line)
anatofuz
parents:
diff changeset
115 if match:
anatofuz
parents:
diff changeset
116 return (match.group(1) == 'COFF-i386')
anatofuz
parents:
diff changeset
117 return False
anatofuz
parents:
diff changeset
118
anatofuz
parents:
diff changeset
119 # MSVC mangles names to ?<identifier_mangling>@<type_mangling>. By examining the
anatofuz
parents:
diff changeset
120 # identifier/type mangling we can decide which symbols could possibly be
anatofuz
parents:
diff changeset
121 # required and which we can discard.
anatofuz
parents:
diff changeset
122 def should_keep_microsoft_symbol(symbol, calling_convention_decoration):
anatofuz
parents:
diff changeset
123 # Keep unmangled (i.e. extern "C") names
anatofuz
parents:
diff changeset
124 if not '?' in symbol:
anatofuz
parents:
diff changeset
125 if calling_convention_decoration:
anatofuz
parents:
diff changeset
126 # Remove calling convention decoration from names
anatofuz
parents:
diff changeset
127 match = re.match('[_@]([^@]+)', symbol)
anatofuz
parents:
diff changeset
128 if match:
anatofuz
parents:
diff changeset
129 return match.group(1)
anatofuz
parents:
diff changeset
130 return symbol
anatofuz
parents:
diff changeset
131 # Function template instantiations start with ?$; keep the instantiations of
anatofuz
parents:
diff changeset
132 # clang::Type::getAs, as some of them are explipict specializations that are
anatofuz
parents:
diff changeset
133 # defined in clang's lib/AST/Type.cpp; discard the rest as it's assumed that
anatofuz
parents:
diff changeset
134 # the definition is public
anatofuz
parents:
diff changeset
135 elif re.match('\?\?\$getAs@.+@Type@clang@@', symbol):
anatofuz
parents:
diff changeset
136 return symbol
anatofuz
parents:
diff changeset
137 elif symbol.startswith('??$'):
anatofuz
parents:
diff changeset
138 return None
anatofuz
parents:
diff changeset
139 # Deleting destructors start with ?_G or ?_E and can be discarded because
anatofuz
parents:
diff changeset
140 # link.exe gives you a warning telling you they can't be exported if you
anatofuz
parents:
diff changeset
141 # don't
anatofuz
parents:
diff changeset
142 elif symbol.startswith('??_G') or symbol.startswith('??_E'):
anatofuz
parents:
diff changeset
143 return None
anatofuz
parents:
diff changeset
144 # Constructors (?0) and destructors (?1) of templates (?$) are assumed to be
anatofuz
parents:
diff changeset
145 # defined in headers and not required to be kept
anatofuz
parents:
diff changeset
146 elif symbol.startswith('??0?$') or symbol.startswith('??1?$'):
anatofuz
parents:
diff changeset
147 return None
anatofuz
parents:
diff changeset
148 # An anonymous namespace is mangled as ?A(maybe hex number)@. Any symbol
anatofuz
parents:
diff changeset
149 # that mentions an anonymous namespace can be discarded, as the anonymous
anatofuz
parents:
diff changeset
150 # namespace doesn't exist outside of that translation unit.
anatofuz
parents:
diff changeset
151 elif re.search('\?A(0x\w+)?@', symbol):
anatofuz
parents:
diff changeset
152 return None
anatofuz
parents:
diff changeset
153 # Keep mangled llvm:: and clang:: function symbols. How we detect these is a
anatofuz
parents:
diff changeset
154 # bit of a mess and imprecise, but that avoids having to completely demangle
anatofuz
parents:
diff changeset
155 # the symbol name. The outermost namespace is at the end of the identifier
anatofuz
parents:
diff changeset
156 # mangling, and the identifier mangling is followed by the type mangling, so
anatofuz
parents:
diff changeset
157 # we look for (llvm|clang)@@ followed by something that looks like a
anatofuz
parents:
diff changeset
158 # function type mangling. To spot a function type we use (this is derived
anatofuz
parents:
diff changeset
159 # from clang/lib/AST/MicrosoftMangle.cpp):
anatofuz
parents:
diff changeset
160 # <function-type> ::= <function-class> <this-cvr-qualifiers>
anatofuz
parents:
diff changeset
161 # <calling-convention> <return-type>
anatofuz
parents:
diff changeset
162 # <argument-list> <throw-spec>
anatofuz
parents:
diff changeset
163 # <function-class> ::= [A-Z]
anatofuz
parents:
diff changeset
164 # <this-cvr-qualifiers> ::= [A-Z0-9_]*
anatofuz
parents:
diff changeset
165 # <calling-convention> ::= [A-JQ]
anatofuz
parents:
diff changeset
166 # <return-type> ::= .+
anatofuz
parents:
diff changeset
167 # <argument-list> ::= X (void)
anatofuz
parents:
diff changeset
168 # ::= .+@ (list of types)
anatofuz
parents:
diff changeset
169 # ::= .*Z (list of types, varargs)
anatofuz
parents:
diff changeset
170 # <throw-spec> ::= exceptions are not allowed
anatofuz
parents:
diff changeset
171 elif re.search('(llvm|clang)@@[A-Z][A-Z0-9_]*[A-JQ].+(X|.+@|.*Z)$', symbol):
anatofuz
parents:
diff changeset
172 return symbol
anatofuz
parents:
diff changeset
173 return None
anatofuz
parents:
diff changeset
174
anatofuz
parents:
diff changeset
175 # Itanium manglings are of the form _Z<identifier_mangling><type_mangling>. We
anatofuz
parents:
diff changeset
176 # demangle the identifier mangling to identify symbols that can be safely
anatofuz
parents:
diff changeset
177 # discarded.
anatofuz
parents:
diff changeset
178 def should_keep_itanium_symbol(symbol, calling_convention_decoration):
anatofuz
parents:
diff changeset
179 # Start by removing any calling convention decoration (which we expect to
anatofuz
parents:
diff changeset
180 # see on all symbols, even mangled C++ symbols)
anatofuz
parents:
diff changeset
181 if calling_convention_decoration and symbol.startswith('_'):
anatofuz
parents:
diff changeset
182 symbol = symbol[1:]
anatofuz
parents:
diff changeset
183 # Keep unmangled names
anatofuz
parents:
diff changeset
184 if not symbol.startswith('_') and not symbol.startswith('.'):
anatofuz
parents:
diff changeset
185 return symbol
anatofuz
parents:
diff changeset
186 # Discard manglings that aren't nested names
anatofuz
parents:
diff changeset
187 match = re.match('_Z(T[VTIS])?(N.+)', symbol)
anatofuz
parents:
diff changeset
188 if not match:
anatofuz
parents:
diff changeset
189 return None
anatofuz
parents:
diff changeset
190 # Demangle the name. If the name is too complex then we don't need to keep
anatofuz
parents:
diff changeset
191 # it, but it the demangling fails then keep the symbol just in case.
anatofuz
parents:
diff changeset
192 try:
anatofuz
parents:
diff changeset
193 names, _ = parse_itanium_nested_name(match.group(2))
anatofuz
parents:
diff changeset
194 except TooComplexName:
anatofuz
parents:
diff changeset
195 return None
anatofuz
parents:
diff changeset
196 if not names:
anatofuz
parents:
diff changeset
197 return symbol
anatofuz
parents:
diff changeset
198 # Constructors and destructors of templates classes are assumed to be
anatofuz
parents:
diff changeset
199 # defined in headers and not required to be kept
anatofuz
parents:
diff changeset
200 if re.match('[CD][123]', names[-1][0]) and names[-2][1]:
anatofuz
parents:
diff changeset
201 return None
anatofuz
parents:
diff changeset
202 # Keep the instantiations of clang::Type::getAs, as some of them are
anatofuz
parents:
diff changeset
203 # explipict specializations that are defined in clang's lib/AST/Type.cpp;
anatofuz
parents:
diff changeset
204 # discard any other function template instantiations as it's assumed that
anatofuz
parents:
diff changeset
205 # the definition is public
anatofuz
parents:
diff changeset
206 elif symbol.startswith('_ZNK5clang4Type5getAs'):
anatofuz
parents:
diff changeset
207 return symbol
anatofuz
parents:
diff changeset
208 elif names[-1][1]:
anatofuz
parents:
diff changeset
209 return None
anatofuz
parents:
diff changeset
210 # Keep llvm:: and clang:: names
anatofuz
parents:
diff changeset
211 elif names[0][0] == '4llvm' or names[0][0] == '5clang':
anatofuz
parents:
diff changeset
212 return symbol
anatofuz
parents:
diff changeset
213 # Discard everything else
anatofuz
parents:
diff changeset
214 else:
anatofuz
parents:
diff changeset
215 return None
anatofuz
parents:
diff changeset
216
anatofuz
parents:
diff changeset
217 # Certain kinds of complex manglings we assume cannot be part of a public
anatofuz
parents:
diff changeset
218 # interface, and we handle them by raising an exception.
anatofuz
parents:
diff changeset
219 class TooComplexName(Exception):
anatofuz
parents:
diff changeset
220 pass
anatofuz
parents:
diff changeset
221
anatofuz
parents:
diff changeset
222 # Parse an itanium mangled name from the start of a string and return a
anatofuz
parents:
diff changeset
223 # (name, rest of string) pair.
anatofuz
parents:
diff changeset
224 def parse_itanium_name(arg):
anatofuz
parents:
diff changeset
225 # Check for a normal name
anatofuz
parents:
diff changeset
226 match = re.match('(\d+)(.+)', arg)
anatofuz
parents:
diff changeset
227 if match:
anatofuz
parents:
diff changeset
228 n = int(match.group(1))
anatofuz
parents:
diff changeset
229 name = match.group(1)+match.group(2)[:n]
anatofuz
parents:
diff changeset
230 rest = match.group(2)[n:]
anatofuz
parents:
diff changeset
231 return name, rest
anatofuz
parents:
diff changeset
232 # Check for constructor/destructor names
anatofuz
parents:
diff changeset
233 match = re.match('([CD][123])(.+)', arg)
anatofuz
parents:
diff changeset
234 if match:
anatofuz
parents:
diff changeset
235 return match.group(1), match.group(2)
anatofuz
parents:
diff changeset
236 # Assume that a sequence of characters that doesn't end a nesting is an
anatofuz
parents:
diff changeset
237 # operator (this is very imprecise, but appears to be good enough)
anatofuz
parents:
diff changeset
238 match = re.match('([^E]+)(.+)', arg)
anatofuz
parents:
diff changeset
239 if match:
anatofuz
parents:
diff changeset
240 return match.group(1), match.group(2)
anatofuz
parents:
diff changeset
241 # Anything else: we can't handle it
anatofuz
parents:
diff changeset
242 return None, arg
anatofuz
parents:
diff changeset
243
anatofuz
parents:
diff changeset
244 # Parse an itanium mangled template argument list from the start of a string
anatofuz
parents:
diff changeset
245 # and throw it away, returning the rest of the string.
anatofuz
parents:
diff changeset
246 def skip_itanium_template(arg):
anatofuz
parents:
diff changeset
247 # A template argument list starts with I
anatofuz
parents:
diff changeset
248 assert arg.startswith('I'), arg
anatofuz
parents:
diff changeset
249 tmp = arg[1:]
anatofuz
parents:
diff changeset
250 while tmp:
anatofuz
parents:
diff changeset
251 # Check for names
anatofuz
parents:
diff changeset
252 match = re.match('(\d+)(.+)', tmp)
anatofuz
parents:
diff changeset
253 if match:
anatofuz
parents:
diff changeset
254 n = int(match.group(1))
anatofuz
parents:
diff changeset
255 tmp = match.group(2)[n:]
anatofuz
parents:
diff changeset
256 continue
anatofuz
parents:
diff changeset
257 # Check for substitutions
anatofuz
parents:
diff changeset
258 match = re.match('S[A-Z0-9]*_(.+)', tmp)
anatofuz
parents:
diff changeset
259 if match:
anatofuz
parents:
diff changeset
260 tmp = match.group(1)
anatofuz
parents:
diff changeset
261 # Start of a template
anatofuz
parents:
diff changeset
262 elif tmp.startswith('I'):
anatofuz
parents:
diff changeset
263 tmp = skip_itanium_template(tmp)
anatofuz
parents:
diff changeset
264 # Start of a nested name
anatofuz
parents:
diff changeset
265 elif tmp.startswith('N'):
anatofuz
parents:
diff changeset
266 _, tmp = parse_itanium_nested_name(tmp)
anatofuz
parents:
diff changeset
267 # Start of an expression: assume that it's too complicated
anatofuz
parents:
diff changeset
268 elif tmp.startswith('L') or tmp.startswith('X'):
anatofuz
parents:
diff changeset
269 raise TooComplexName
anatofuz
parents:
diff changeset
270 # End of the template
anatofuz
parents:
diff changeset
271 elif tmp.startswith('E'):
anatofuz
parents:
diff changeset
272 return tmp[1:]
anatofuz
parents:
diff changeset
273 # Something else: probably a type, skip it
anatofuz
parents:
diff changeset
274 else:
anatofuz
parents:
diff changeset
275 tmp = tmp[1:]
anatofuz
parents:
diff changeset
276 return None
anatofuz
parents:
diff changeset
277
anatofuz
parents:
diff changeset
278 # Parse an itanium mangled nested name and transform it into a list of pairs of
anatofuz
parents:
diff changeset
279 # (name, is_template), returning (list, rest of string).
anatofuz
parents:
diff changeset
280 def parse_itanium_nested_name(arg):
anatofuz
parents:
diff changeset
281 # A nested name starts with N
anatofuz
parents:
diff changeset
282 assert arg.startswith('N'), arg
anatofuz
parents:
diff changeset
283 ret = []
anatofuz
parents:
diff changeset
284
anatofuz
parents:
diff changeset
285 # Skip past the N, and possibly a substitution
anatofuz
parents:
diff changeset
286 match = re.match('NS[A-Z0-9]*_(.+)', arg)
anatofuz
parents:
diff changeset
287 if match:
anatofuz
parents:
diff changeset
288 tmp = match.group(1)
anatofuz
parents:
diff changeset
289 else:
anatofuz
parents:
diff changeset
290 tmp = arg[1:]
anatofuz
parents:
diff changeset
291
anatofuz
parents:
diff changeset
292 # Skip past CV-qualifiers and ref qualifiers
anatofuz
parents:
diff changeset
293 match = re.match('[rVKRO]*(.+)', tmp);
anatofuz
parents:
diff changeset
294 if match:
anatofuz
parents:
diff changeset
295 tmp = match.group(1)
anatofuz
parents:
diff changeset
296
anatofuz
parents:
diff changeset
297 # Repeatedly parse names from the string until we reach the end of the
anatofuz
parents:
diff changeset
298 # nested name
anatofuz
parents:
diff changeset
299 while tmp:
anatofuz
parents:
diff changeset
300 # An E ends the nested name
anatofuz
parents:
diff changeset
301 if tmp.startswith('E'):
anatofuz
parents:
diff changeset
302 return ret, tmp[1:]
anatofuz
parents:
diff changeset
303 # Parse a name
anatofuz
parents:
diff changeset
304 name_part, tmp = parse_itanium_name(tmp)
anatofuz
parents:
diff changeset
305 if not name_part:
anatofuz
parents:
diff changeset
306 # If we failed then we don't know how to demangle this
anatofuz
parents:
diff changeset
307 return None, None
anatofuz
parents:
diff changeset
308 is_template = False
anatofuz
parents:
diff changeset
309 # If this name is a template record that, then skip the template
anatofuz
parents:
diff changeset
310 # arguments
anatofuz
parents:
diff changeset
311 if tmp.startswith('I'):
anatofuz
parents:
diff changeset
312 tmp = skip_itanium_template(tmp)
anatofuz
parents:
diff changeset
313 is_template = True
anatofuz
parents:
diff changeset
314 # Add the name to the list
anatofuz
parents:
diff changeset
315 ret.append((name_part, is_template))
anatofuz
parents:
diff changeset
316
anatofuz
parents:
diff changeset
317 # If we get here then something went wrong
anatofuz
parents:
diff changeset
318 return None, None
anatofuz
parents:
diff changeset
319
anatofuz
parents:
diff changeset
320 def extract_symbols(arg):
anatofuz
parents:
diff changeset
321 get_symbols, should_keep_symbol, calling_convention_decoration, lib = arg
anatofuz
parents:
diff changeset
322 symbols = dict()
anatofuz
parents:
diff changeset
323 for symbol in get_symbols(lib):
anatofuz
parents:
diff changeset
324 symbol = should_keep_symbol(symbol, calling_convention_decoration)
anatofuz
parents:
diff changeset
325 if symbol:
anatofuz
parents:
diff changeset
326 symbols[symbol] = 1 + symbols.setdefault(symbol,0)
anatofuz
parents:
diff changeset
327 return symbols
anatofuz
parents:
diff changeset
328
anatofuz
parents:
diff changeset
329 if __name__ == '__main__':
anatofuz
parents:
diff changeset
330 tool_exes = ['dumpbin','nm','objdump','llvm-readobj']
anatofuz
parents:
diff changeset
331 parser = argparse.ArgumentParser(
anatofuz
parents:
diff changeset
332 description='Extract symbols to export from libraries')
anatofuz
parents:
diff changeset
333 parser.add_argument('--mangling', choices=['itanium','microsoft'],
anatofuz
parents:
diff changeset
334 required=True, help='expected symbol mangling scheme')
anatofuz
parents:
diff changeset
335 parser.add_argument('--tools', choices=tool_exes, nargs='*',
anatofuz
parents:
diff changeset
336 help='tools to use to extract symbols and determine the'
anatofuz
parents:
diff changeset
337 ' target')
anatofuz
parents:
diff changeset
338 parser.add_argument('libs', metavar='lib', type=str, nargs='+',
anatofuz
parents:
diff changeset
339 help='libraries to extract symbols from')
anatofuz
parents:
diff changeset
340 parser.add_argument('-o', metavar='file', type=str, help='output to file')
anatofuz
parents:
diff changeset
341 args = parser.parse_args()
anatofuz
parents:
diff changeset
342
anatofuz
parents:
diff changeset
343 # Determine the function to use to get the list of symbols from the inputs,
anatofuz
parents:
diff changeset
344 # and the function to use to determine if the target is 32-bit windows.
anatofuz
parents:
diff changeset
345 tools = { 'dumpbin' : (dumpbin_get_symbols, dumpbin_is_32bit_windows),
anatofuz
parents:
diff changeset
346 'nm' : (nm_get_symbols, None),
anatofuz
parents:
diff changeset
347 'objdump' : (None, objdump_is_32bit_windows),
anatofuz
parents:
diff changeset
348 'llvm-readobj' : (readobj_get_symbols, readobj_is_32bit_windows) }
anatofuz
parents:
diff changeset
349 get_symbols = None
anatofuz
parents:
diff changeset
350 is_32bit_windows = None
anatofuz
parents:
diff changeset
351 # If we have a tools argument then use that for the list of tools to check
anatofuz
parents:
diff changeset
352 if args.tools:
anatofuz
parents:
diff changeset
353 tool_exes = args.tools
anatofuz
parents:
diff changeset
354 # Find a tool to use by trying each in turn until we find one that exists
anatofuz
parents:
diff changeset
355 # (subprocess.call will throw OSError when the program does not exist)
anatofuz
parents:
diff changeset
356 get_symbols = None
anatofuz
parents:
diff changeset
357 for exe in tool_exes:
anatofuz
parents:
diff changeset
358 try:
anatofuz
parents:
diff changeset
359 # Close std streams as we don't want any output and we don't
anatofuz
parents:
diff changeset
360 # want the process to wait for something on stdin.
anatofuz
parents:
diff changeset
361 p = subprocess.Popen([exe], stdout=subprocess.PIPE,
anatofuz
parents:
diff changeset
362 stderr=subprocess.PIPE,
anatofuz
parents:
diff changeset
363 stdin=subprocess.PIPE,
anatofuz
parents:
diff changeset
364 universal_newlines=True)
anatofuz
parents:
diff changeset
365 p.stdout.close()
anatofuz
parents:
diff changeset
366 p.stderr.close()
anatofuz
parents:
diff changeset
367 p.stdin.close()
anatofuz
parents:
diff changeset
368 p.wait()
anatofuz
parents:
diff changeset
369 # Keep going until we have a tool to use for both get_symbols and
anatofuz
parents:
diff changeset
370 # is_32bit_windows
anatofuz
parents:
diff changeset
371 if not get_symbols:
anatofuz
parents:
diff changeset
372 get_symbols = tools[exe][0]
anatofuz
parents:
diff changeset
373 if not is_32bit_windows:
anatofuz
parents:
diff changeset
374 is_32bit_windows = tools[exe][1]
anatofuz
parents:
diff changeset
375 if get_symbols and is_32bit_windows:
anatofuz
parents:
diff changeset
376 break
anatofuz
parents:
diff changeset
377 except OSError:
anatofuz
parents:
diff changeset
378 continue
anatofuz
parents:
diff changeset
379 if not get_symbols:
anatofuz
parents:
diff changeset
380 print("Couldn't find a program to read symbols with", file=sys.stderr)
anatofuz
parents:
diff changeset
381 exit(1)
anatofuz
parents:
diff changeset
382 if not is_32bit_windows:
anatofuz
parents:
diff changeset
383 print("Couldn't find a program to determining the target", file=sys.stderr)
anatofuz
parents:
diff changeset
384 exit(1)
anatofuz
parents:
diff changeset
385
anatofuz
parents:
diff changeset
386 # How we determine which symbols to keep and which to discard depends on
anatofuz
parents:
diff changeset
387 # the mangling scheme
anatofuz
parents:
diff changeset
388 if args.mangling == 'microsoft':
anatofuz
parents:
diff changeset
389 should_keep_symbol = should_keep_microsoft_symbol
anatofuz
parents:
diff changeset
390 else:
anatofuz
parents:
diff changeset
391 should_keep_symbol = should_keep_itanium_symbol
anatofuz
parents:
diff changeset
392
anatofuz
parents:
diff changeset
393 # Get the list of libraries to extract symbols from
anatofuz
parents:
diff changeset
394 libs = list()
anatofuz
parents:
diff changeset
395 for lib in args.libs:
anatofuz
parents:
diff changeset
396 # When invoked by cmake the arguments are the cmake target names of the
anatofuz
parents:
diff changeset
397 # libraries, so we need to add .lib/.a to the end and maybe lib to the
anatofuz
parents:
diff changeset
398 # start to get the filename. Also allow objects.
anatofuz
parents:
diff changeset
399 suffixes = ['.lib','.a','.obj','.o']
anatofuz
parents:
diff changeset
400 if not any([lib.endswith(s) for s in suffixes]):
anatofuz
parents:
diff changeset
401 for s in suffixes:
anatofuz
parents:
diff changeset
402 if os.path.exists(lib+s):
anatofuz
parents:
diff changeset
403 lib = lib+s
anatofuz
parents:
diff changeset
404 break
anatofuz
parents:
diff changeset
405 if os.path.exists('lib'+lib+s):
anatofuz
parents:
diff changeset
406 lib = 'lib'+lib+s
anatofuz
parents:
diff changeset
407 break
anatofuz
parents:
diff changeset
408 if not any([lib.endswith(s) for s in suffixes]):
anatofuz
parents:
diff changeset
409 print("Don't know what to do with argument "+lib, file=sys.stderr)
anatofuz
parents:
diff changeset
410 exit(1)
anatofuz
parents:
diff changeset
411 libs.append(lib)
anatofuz
parents:
diff changeset
412
anatofuz
parents:
diff changeset
413 # Check if calling convention decoration is used by inspecting the first
anatofuz
parents:
diff changeset
414 # library in the list
anatofuz
parents:
diff changeset
415 calling_convention_decoration = is_32bit_windows(libs[0])
anatofuz
parents:
diff changeset
416
anatofuz
parents:
diff changeset
417 # Extract symbols from libraries in parallel. This is a huge time saver when
anatofuz
parents:
diff changeset
418 # doing a debug build, as there are hundreds of thousands of symbols in each
anatofuz
parents:
diff changeset
419 # library.
anatofuz
parents:
diff changeset
420 pool = multiprocessing.Pool()
anatofuz
parents:
diff changeset
421 try:
anatofuz
parents:
diff changeset
422 # Only one argument can be passed to the mapping function, and we can't
anatofuz
parents:
diff changeset
423 # use a lambda or local function definition as that doesn't work on
anatofuz
parents:
diff changeset
424 # windows, so create a list of tuples which duplicates the arguments
anatofuz
parents:
diff changeset
425 # that are the same in all calls.
anatofuz
parents:
diff changeset
426 vals = [(get_symbols, should_keep_symbol, calling_convention_decoration, x) for x in libs]
anatofuz
parents:
diff changeset
427 # Do an async map then wait for the result to make sure that
anatofuz
parents:
diff changeset
428 # KeyboardInterrupt gets caught correctly (see
anatofuz
parents:
diff changeset
429 # http://bugs.python.org/issue8296)
anatofuz
parents:
diff changeset
430 result = pool.map_async(extract_symbols, vals)
anatofuz
parents:
diff changeset
431 pool.close()
anatofuz
parents:
diff changeset
432 libs_symbols = result.get(3600)
anatofuz
parents:
diff changeset
433 except KeyboardInterrupt:
anatofuz
parents:
diff changeset
434 # On Ctrl-C terminate everything and exit
anatofuz
parents:
diff changeset
435 pool.terminate()
anatofuz
parents:
diff changeset
436 pool.join()
anatofuz
parents:
diff changeset
437 exit(1)
anatofuz
parents:
diff changeset
438
anatofuz
parents:
diff changeset
439 # Merge everything into a single dict
anatofuz
parents:
diff changeset
440 symbols = dict()
anatofuz
parents:
diff changeset
441 for this_lib_symbols in libs_symbols:
anatofuz
parents:
diff changeset
442 for k,v in list(this_lib_symbols.items()):
anatofuz
parents:
diff changeset
443 symbols[k] = v + symbols.setdefault(k,0)
anatofuz
parents:
diff changeset
444
anatofuz
parents:
diff changeset
445 # Count instances of member functions of template classes, and map the
anatofuz
parents:
diff changeset
446 # symbol name to the function+class. We do this under the assumption that if
anatofuz
parents:
diff changeset
447 # a member function of a template class is instantiated many times it's
anatofuz
parents:
diff changeset
448 # probably declared in a public header file.
anatofuz
parents:
diff changeset
449 template_function_count = dict()
anatofuz
parents:
diff changeset
450 template_function_mapping = dict()
anatofuz
parents:
diff changeset
451 template_function_count[""] = 0
anatofuz
parents:
diff changeset
452 for k in symbols:
anatofuz
parents:
diff changeset
453 name = None
anatofuz
parents:
diff changeset
454 if args.mangling == 'microsoft':
anatofuz
parents:
diff changeset
455 # Member functions of templates start with
anatofuz
parents:
diff changeset
456 # ?<fn_name>@?$<class_name>@, so we map to <fn_name>@?$<class_name>.
anatofuz
parents:
diff changeset
457 # As manglings go from the innermost scope to the outermost scope
anatofuz
parents:
diff changeset
458 # this means:
anatofuz
parents:
diff changeset
459 # * When we have a function member of a subclass of a template
anatofuz
parents:
diff changeset
460 # class then <fn_name> will actually contain the mangling of
anatofuz
parents:
diff changeset
461 # both the subclass and the function member. This is fine.
anatofuz
parents:
diff changeset
462 # * When we have a function member of a template subclass of a
anatofuz
parents:
diff changeset
463 # (possibly template) class then it's the innermost template
anatofuz
parents:
diff changeset
464 # subclass that becomes <class_name>. This should be OK so long
anatofuz
parents:
diff changeset
465 # as we don't have multiple classes with a template subclass of
anatofuz
parents:
diff changeset
466 # the same name.
anatofuz
parents:
diff changeset
467 match = re.search("^\?(\??\w+\@\?\$\w+)\@", k)
anatofuz
parents:
diff changeset
468 if match:
anatofuz
parents:
diff changeset
469 name = match.group(1)
anatofuz
parents:
diff changeset
470 else:
anatofuz
parents:
diff changeset
471 # Find member functions of templates by demangling the name and
anatofuz
parents:
diff changeset
472 # checking if the second-to-last name in the list is a template.
anatofuz
parents:
diff changeset
473 match = re.match('_Z(T[VTIS])?(N.+)', k)
anatofuz
parents:
diff changeset
474 if match:
anatofuz
parents:
diff changeset
475 try:
anatofuz
parents:
diff changeset
476 names, _ = parse_itanium_nested_name(match.group(2))
anatofuz
parents:
diff changeset
477 if names and names[-2][1]:
anatofuz
parents:
diff changeset
478 name = ''.join([x for x,_ in names])
anatofuz
parents:
diff changeset
479 except TooComplexName:
anatofuz
parents:
diff changeset
480 # Manglings that are too complex should already have been
anatofuz
parents:
diff changeset
481 # filtered out, but if we happen to somehow see one here
anatofuz
parents:
diff changeset
482 # just leave it as-is.
anatofuz
parents:
diff changeset
483 pass
anatofuz
parents:
diff changeset
484 if name:
anatofuz
parents:
diff changeset
485 old_count = template_function_count.setdefault(name,0)
anatofuz
parents:
diff changeset
486 template_function_count[name] = old_count + 1
anatofuz
parents:
diff changeset
487 template_function_mapping[k] = name
anatofuz
parents:
diff changeset
488 else:
anatofuz
parents:
diff changeset
489 template_function_mapping[k] = ""
anatofuz
parents:
diff changeset
490
anatofuz
parents:
diff changeset
491 # Print symbols which both:
anatofuz
parents:
diff changeset
492 # * Appear in exactly one input, as symbols defined in multiple
anatofuz
parents:
diff changeset
493 # objects/libraries are assumed to have public definitions.
anatofuz
parents:
diff changeset
494 # * Aren't instances of member functions of templates which have been
anatofuz
parents:
diff changeset
495 # instantiated 100 times or more, which are assumed to have public
anatofuz
parents:
diff changeset
496 # definitions. (100 is an arbitrary guess here.)
anatofuz
parents:
diff changeset
497 if args.o:
anatofuz
parents:
diff changeset
498 outfile = open(args.o,'w')
anatofuz
parents:
diff changeset
499 else:
anatofuz
parents:
diff changeset
500 outfile = sys.stdout
anatofuz
parents:
diff changeset
501 for k,v in list(symbols.items()):
anatofuz
parents:
diff changeset
502 template_count = template_function_count[template_function_mapping[k]]
anatofuz
parents:
diff changeset
503 if v == 1 and template_count < 100:
anatofuz
parents:
diff changeset
504 print(k, file=outfile)