Mercurial > hg > CbC > CbC_llvm
comparison utils/demangle_tree.py @ 171:66f3bfe93da9
git version 2c4ca6832fa6b306ee6a7010bfb80a3f2596f824
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Mon, 25 May 2020 11:07:02 +0900 |
parents | c2174574ed3a |
children |
comparison
equal
deleted
inserted
replaced
150:1d019706d866 | 171:66f3bfe93da9 |
---|---|
1 # Given a path to llvm-objdump and a directory tree, spider the directory tree | |
2 # dumping every object file encountered with correct options needed to demangle | |
3 # symbols in the object file, and collect statistics about failed / crashed | |
4 # demanglings. Useful for stress testing the demangler against a large corpus | |
5 # of inputs. | |
6 | |
7 from __future__ import print_function | |
8 | |
9 import argparse | |
10 import functools | |
11 import os | |
12 import re | |
13 import sys | |
14 import subprocess | |
15 import traceback | |
16 from multiprocessing import Pool | |
17 import multiprocessing | |
18 | |
19 args = None | |
20 | |
21 def parse_line(line): | |
22 question = line.find('?') | |
23 if question == -1: | |
24 return None, None | |
25 | |
26 open_paren = line.find('(', question) | |
27 if open_paren == -1: | |
28 return None, None | |
29 close_paren = line.rfind(')', open_paren) | |
30 if open_paren == -1: | |
31 return None, None | |
32 mangled = line[question : open_paren] | |
33 demangled = line[open_paren+1 : close_paren] | |
34 return mangled.strip(), demangled.strip() | |
35 | |
36 class Result(object): | |
37 def __init__(self): | |
38 self.crashed = [] | |
39 self.file = None | |
40 self.nsymbols = 0 | |
41 self.errors = set() | |
42 self.nfiles = 0 | |
43 | |
44 class MapContext(object): | |
45 def __init__(self): | |
46 self.rincomplete = None | |
47 self.rcumulative = Result() | |
48 self.pending_objs = [] | |
49 self.npending = 0 | |
50 | |
51 def process_file(path, objdump): | |
52 r = Result() | |
53 r.file = path | |
54 | |
55 popen_args = [objdump, '-t', '-demangle', path] | |
56 p = subprocess.Popen(popen_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) | |
57 stdout, stderr = p.communicate() | |
58 if p.returncode != 0: | |
59 r.crashed = [r.file] | |
60 return r | |
61 | |
62 output = stdout.decode('utf-8') | |
63 | |
64 for line in output.splitlines(): | |
65 mangled, demangled = parse_line(line) | |
66 if mangled is None: | |
67 continue | |
68 r.nsymbols += 1 | |
69 if "invalid mangled name" in demangled: | |
70 r.errors.add(mangled) | |
71 return r | |
72 | |
73 def add_results(r1, r2): | |
74 r1.crashed.extend(r2.crashed) | |
75 r1.errors.update(r2.errors) | |
76 r1.nsymbols += r2.nsymbols | |
77 r1.nfiles += r2.nfiles | |
78 | |
79 def print_result_row(directory, result): | |
80 print("[{0} files, {1} crashes, {2} errors, {3} symbols]: '{4}'".format( | |
81 result.nfiles, len(result.crashed), len(result.errors), result.nsymbols, directory)) | |
82 | |
83 def process_one_chunk(pool, chunk_size, objdump, context): | |
84 objs = [] | |
85 | |
86 incomplete = False | |
87 dir_results = {} | |
88 ordered_dirs = [] | |
89 while context.npending > 0 and len(objs) < chunk_size: | |
90 this_dir = context.pending_objs[0][0] | |
91 ordered_dirs.append(this_dir) | |
92 re = Result() | |
93 if context.rincomplete is not None: | |
94 re = context.rincomplete | |
95 context.rincomplete = None | |
96 | |
97 dir_results[this_dir] = re | |
98 re.file = this_dir | |
99 | |
100 nneeded = chunk_size - len(objs) | |
101 objs_this_dir = context.pending_objs[0][1] | |
102 navail = len(objs_this_dir) | |
103 ntaken = min(nneeded, navail) | |
104 objs.extend(objs_this_dir[0:ntaken]) | |
105 remaining_objs_this_dir = objs_this_dir[ntaken:] | |
106 context.pending_objs[0] = (context.pending_objs[0][0], remaining_objs_this_dir) | |
107 context.npending -= ntaken | |
108 if ntaken == navail: | |
109 context.pending_objs.pop(0) | |
110 else: | |
111 incomplete = True | |
112 | |
113 re.nfiles += ntaken | |
114 | |
115 assert(len(objs) == chunk_size or context.npending == 0) | |
116 | |
117 copier = functools.partial(process_file, objdump=objdump) | |
118 mapped_results = list(pool.map(copier, objs)) | |
119 | |
120 for mr in mapped_results: | |
121 result_dir = os.path.dirname(mr.file) | |
122 result_entry = dir_results[result_dir] | |
123 add_results(result_entry, mr) | |
124 | |
125 # It's only possible that a single item is incomplete, and it has to be the | |
126 # last item. | |
127 if incomplete: | |
128 context.rincomplete = dir_results[ordered_dirs[-1]] | |
129 ordered_dirs.pop() | |
130 | |
131 # Now ordered_dirs contains a list of all directories which *did* complete. | |
132 for c in ordered_dirs: | |
133 re = dir_results[c] | |
134 add_results(context.rcumulative, re) | |
135 print_result_row(c, re) | |
136 | |
137 def process_pending_files(pool, chunk_size, objdump, context): | |
138 while context.npending >= chunk_size: | |
139 process_one_chunk(pool, chunk_size, objdump, context) | |
140 | |
141 def go(): | |
142 global args | |
143 | |
144 obj_dir = args.dir | |
145 extensions = args.extensions.split(',') | |
146 extensions = [x if x[0] == '.' else '.' + x for x in extensions] | |
147 | |
148 | |
149 pool_size = 48 | |
150 pool = Pool(processes=pool_size) | |
151 | |
152 try: | |
153 nfiles = 0 | |
154 context = MapContext() | |
155 | |
156 for root, dirs, files in os.walk(obj_dir): | |
157 root = os.path.normpath(root) | |
158 pending = [] | |
159 for f in files: | |
160 file, ext = os.path.splitext(f) | |
161 if not ext in extensions: | |
162 continue | |
163 | |
164 nfiles += 1 | |
165 full_path = os.path.join(root, f) | |
166 full_path = os.path.normpath(full_path) | |
167 pending.append(full_path) | |
168 | |
169 # If this directory had no object files, just print a default | |
170 # status line and continue with the next dir | |
171 if len(pending) == 0: | |
172 print_result_row(root, Result()) | |
173 continue | |
174 | |
175 context.npending += len(pending) | |
176 context.pending_objs.append((root, pending)) | |
177 # Drain the tasks, `pool_size` at a time, until we have less than | |
178 # `pool_size` tasks remaining. | |
179 process_pending_files(pool, pool_size, args.objdump, context) | |
180 | |
181 assert(context.npending < pool_size); | |
182 process_one_chunk(pool, pool_size, args.objdump, context) | |
183 | |
184 total = context.rcumulative | |
185 nfailed = len(total.errors) | |
186 nsuccess = total.nsymbols - nfailed | |
187 ncrashed = len(total.crashed) | |
188 | |
189 if (nfailed > 0): | |
190 print("Failures:") | |
191 for m in sorted(total.errors): | |
192 print(" " + m) | |
193 if (ncrashed > 0): | |
194 print("Crashes:") | |
195 for f in sorted(total.crashed): | |
196 print(" " + f) | |
197 print("Summary:") | |
198 spct = float(nsuccess)/float(total.nsymbols) | |
199 fpct = float(nfailed)/float(total.nsymbols) | |
200 cpct = float(ncrashed)/float(nfiles) | |
201 print("Processed {0} object files.".format(nfiles)) | |
202 print("{0}/{1} symbols successfully demangled ({2:.4%})".format(nsuccess, total.nsymbols, spct)) | |
203 print("{0} symbols could not be demangled ({1:.4%})".format(nfailed, fpct)) | |
204 print("{0} files crashed while demangling ({1:.4%})".format(ncrashed, cpct)) | |
205 | |
206 except: | |
207 traceback.print_exc() | |
208 | |
209 pool.close() | |
210 pool.join() | |
211 | |
212 if __name__ == "__main__": | |
213 def_obj = 'obj' if sys.platform == 'win32' else 'o' | |
214 | |
215 parser = argparse.ArgumentParser(description='Demangle all symbols in a tree of object files, looking for failures.') | |
216 parser.add_argument('dir', type=str, help='the root directory at which to start crawling') | |
217 parser.add_argument('--objdump', type=str, help='path to llvm-objdump. If not specified ' + | |
218 'the tool is located as if by `which llvm-objdump`.') | |
219 parser.add_argument('--extensions', type=str, default=def_obj, | |
220 help='comma separated list of extensions to demangle (e.g. `o,obj`). ' + | |
221 'By default this will be `obj` on Windows and `o` otherwise.') | |
222 | |
223 args = parser.parse_args() | |
224 | |
225 | |
226 multiprocessing.freeze_support() | |
227 go() | |
228 |