comparison utils/demangle_tree.py @ 171:66f3bfe93da9

git version 2c4ca6832fa6b306ee6a7010bfb80a3f2596f824
author Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date Mon, 25 May 2020 11:07:02 +0900
parents c2174574ed3a
children
comparison
equal deleted inserted replaced
150:1d019706d866 171:66f3bfe93da9
1 # Given a path to llvm-objdump and a directory tree, spider the directory tree
2 # dumping every object file encountered with correct options needed to demangle
3 # symbols in the object file, and collect statistics about failed / crashed
4 # demanglings. Useful for stress testing the demangler against a large corpus
5 # of inputs.
6
7 from __future__ import print_function
8
9 import argparse
10 import functools
11 import os
12 import re
13 import sys
14 import subprocess
15 import traceback
16 from multiprocessing import Pool
17 import multiprocessing
18
19 args = None
20
21 def parse_line(line):
22 question = line.find('?')
23 if question == -1:
24 return None, None
25
26 open_paren = line.find('(', question)
27 if open_paren == -1:
28 return None, None
29 close_paren = line.rfind(')', open_paren)
30 if open_paren == -1:
31 return None, None
32 mangled = line[question : open_paren]
33 demangled = line[open_paren+1 : close_paren]
34 return mangled.strip(), demangled.strip()
35
36 class Result(object):
37 def __init__(self):
38 self.crashed = []
39 self.file = None
40 self.nsymbols = 0
41 self.errors = set()
42 self.nfiles = 0
43
44 class MapContext(object):
45 def __init__(self):
46 self.rincomplete = None
47 self.rcumulative = Result()
48 self.pending_objs = []
49 self.npending = 0
50
51 def process_file(path, objdump):
52 r = Result()
53 r.file = path
54
55 popen_args = [objdump, '-t', '-demangle', path]
56 p = subprocess.Popen(popen_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
57 stdout, stderr = p.communicate()
58 if p.returncode != 0:
59 r.crashed = [r.file]
60 return r
61
62 output = stdout.decode('utf-8')
63
64 for line in output.splitlines():
65 mangled, demangled = parse_line(line)
66 if mangled is None:
67 continue
68 r.nsymbols += 1
69 if "invalid mangled name" in demangled:
70 r.errors.add(mangled)
71 return r
72
73 def add_results(r1, r2):
74 r1.crashed.extend(r2.crashed)
75 r1.errors.update(r2.errors)
76 r1.nsymbols += r2.nsymbols
77 r1.nfiles += r2.nfiles
78
79 def print_result_row(directory, result):
80 print("[{0} files, {1} crashes, {2} errors, {3} symbols]: '{4}'".format(
81 result.nfiles, len(result.crashed), len(result.errors), result.nsymbols, directory))
82
83 def process_one_chunk(pool, chunk_size, objdump, context):
84 objs = []
85
86 incomplete = False
87 dir_results = {}
88 ordered_dirs = []
89 while context.npending > 0 and len(objs) < chunk_size:
90 this_dir = context.pending_objs[0][0]
91 ordered_dirs.append(this_dir)
92 re = Result()
93 if context.rincomplete is not None:
94 re = context.rincomplete
95 context.rincomplete = None
96
97 dir_results[this_dir] = re
98 re.file = this_dir
99
100 nneeded = chunk_size - len(objs)
101 objs_this_dir = context.pending_objs[0][1]
102 navail = len(objs_this_dir)
103 ntaken = min(nneeded, navail)
104 objs.extend(objs_this_dir[0:ntaken])
105 remaining_objs_this_dir = objs_this_dir[ntaken:]
106 context.pending_objs[0] = (context.pending_objs[0][0], remaining_objs_this_dir)
107 context.npending -= ntaken
108 if ntaken == navail:
109 context.pending_objs.pop(0)
110 else:
111 incomplete = True
112
113 re.nfiles += ntaken
114
115 assert(len(objs) == chunk_size or context.npending == 0)
116
117 copier = functools.partial(process_file, objdump=objdump)
118 mapped_results = list(pool.map(copier, objs))
119
120 for mr in mapped_results:
121 result_dir = os.path.dirname(mr.file)
122 result_entry = dir_results[result_dir]
123 add_results(result_entry, mr)
124
125 # It's only possible that a single item is incomplete, and it has to be the
126 # last item.
127 if incomplete:
128 context.rincomplete = dir_results[ordered_dirs[-1]]
129 ordered_dirs.pop()
130
131 # Now ordered_dirs contains a list of all directories which *did* complete.
132 for c in ordered_dirs:
133 re = dir_results[c]
134 add_results(context.rcumulative, re)
135 print_result_row(c, re)
136
137 def process_pending_files(pool, chunk_size, objdump, context):
138 while context.npending >= chunk_size:
139 process_one_chunk(pool, chunk_size, objdump, context)
140
141 def go():
142 global args
143
144 obj_dir = args.dir
145 extensions = args.extensions.split(',')
146 extensions = [x if x[0] == '.' else '.' + x for x in extensions]
147
148
149 pool_size = 48
150 pool = Pool(processes=pool_size)
151
152 try:
153 nfiles = 0
154 context = MapContext()
155
156 for root, dirs, files in os.walk(obj_dir):
157 root = os.path.normpath(root)
158 pending = []
159 for f in files:
160 file, ext = os.path.splitext(f)
161 if not ext in extensions:
162 continue
163
164 nfiles += 1
165 full_path = os.path.join(root, f)
166 full_path = os.path.normpath(full_path)
167 pending.append(full_path)
168
169 # If this directory had no object files, just print a default
170 # status line and continue with the next dir
171 if len(pending) == 0:
172 print_result_row(root, Result())
173 continue
174
175 context.npending += len(pending)
176 context.pending_objs.append((root, pending))
177 # Drain the tasks, `pool_size` at a time, until we have less than
178 # `pool_size` tasks remaining.
179 process_pending_files(pool, pool_size, args.objdump, context)
180
181 assert(context.npending < pool_size);
182 process_one_chunk(pool, pool_size, args.objdump, context)
183
184 total = context.rcumulative
185 nfailed = len(total.errors)
186 nsuccess = total.nsymbols - nfailed
187 ncrashed = len(total.crashed)
188
189 if (nfailed > 0):
190 print("Failures:")
191 for m in sorted(total.errors):
192 print(" " + m)
193 if (ncrashed > 0):
194 print("Crashes:")
195 for f in sorted(total.crashed):
196 print(" " + f)
197 print("Summary:")
198 spct = float(nsuccess)/float(total.nsymbols)
199 fpct = float(nfailed)/float(total.nsymbols)
200 cpct = float(ncrashed)/float(nfiles)
201 print("Processed {0} object files.".format(nfiles))
202 print("{0}/{1} symbols successfully demangled ({2:.4%})".format(nsuccess, total.nsymbols, spct))
203 print("{0} symbols could not be demangled ({1:.4%})".format(nfailed, fpct))
204 print("{0} files crashed while demangling ({1:.4%})".format(ncrashed, cpct))
205
206 except:
207 traceback.print_exc()
208
209 pool.close()
210 pool.join()
211
212 if __name__ == "__main__":
213 def_obj = 'obj' if sys.platform == 'win32' else 'o'
214
215 parser = argparse.ArgumentParser(description='Demangle all symbols in a tree of object files, looking for failures.')
216 parser.add_argument('dir', type=str, help='the root directory at which to start crawling')
217 parser.add_argument('--objdump', type=str, help='path to llvm-objdump. If not specified ' +
218 'the tool is located as if by `which llvm-objdump`.')
219 parser.add_argument('--extensions', type=str, default=def_obj,
220 help='comma separated list of extensions to demangle (e.g. `o,obj`). ' +
221 'By default this will be `obj` on Windows and `o` otherwise.')
222
223 args = parser.parse_args()
224
225
226 multiprocessing.freeze_support()
227 go()
228