252
|
1 #!/usr/bin/env python3
|
150
|
2 """Calls C-Reduce to create a minimal reproducer for clang crashes.
|
|
3
|
|
4 Output files:
|
|
5 *.reduced.sh -- crash reproducer with minimal arguments
|
|
6 *.reduced.cpp -- the reduced file
|
|
7 *.test.sh -- interestingness test for C-Reduce
|
|
8 """
|
|
9
|
|
10 from __future__ import print_function
|
|
11 from argparse import ArgumentParser, RawTextHelpFormatter
|
|
12 import os
|
|
13 import re
|
252
|
14 import shutil
|
150
|
15 import stat
|
|
16 import sys
|
|
17 import subprocess
|
|
18 import pipes
|
|
19 import shlex
|
|
20 import tempfile
|
|
21 import shutil
|
221
|
22 import multiprocessing
|
150
|
23
|
|
24 verbose = False
|
|
25 creduce_cmd = None
|
|
26 clang_cmd = None
|
|
27
|
252
|
28
|
150
|
29 def verbose_print(*args, **kwargs):
|
252
|
30 if verbose:
|
|
31 print(*args, **kwargs)
|
|
32
|
150
|
33
|
|
34 def check_file(fname):
|
252
|
35 fname = os.path.normpath(fname)
|
|
36 if not os.path.isfile(fname):
|
|
37 sys.exit("ERROR: %s does not exist" % (fname))
|
|
38 return fname
|
|
39
|
150
|
40
|
|
41 def check_cmd(cmd_name, cmd_dir, cmd_path=None):
|
252
|
42 """
|
|
43 Returns absolute path to cmd_path if it is given,
|
|
44 or absolute path to cmd_dir/cmd_name.
|
|
45 """
|
|
46 if cmd_path:
|
|
47 # Make the path absolute so the creduce test can be run from any directory.
|
|
48 cmd_path = os.path.abspath(cmd_path)
|
|
49 cmd = shutil.which(cmd_path)
|
|
50 if cmd:
|
|
51 return cmd
|
|
52 sys.exit("ERROR: executable `%s` not found" % (cmd_path))
|
|
53
|
|
54 cmd = shutil.which(cmd_name, path=cmd_dir)
|
150
|
55 if cmd:
|
252
|
56 return cmd
|
150
|
57
|
252
|
58 if not cmd_dir:
|
|
59 cmd_dir = "$PATH"
|
|
60 sys.exit("ERROR: `%s` not found in %s" % (cmd_name, cmd_dir))
|
150
|
61
|
|
62
|
|
63 def quote_cmd(cmd):
|
252
|
64 return " ".join(pipes.quote(arg) for arg in cmd)
|
|
65
|
150
|
66
|
|
67 def write_to_script(text, filename):
|
252
|
68 with open(filename, "w") as f:
|
|
69 f.write(text)
|
|
70 os.chmod(filename, os.stat(filename).st_mode | stat.S_IEXEC)
|
|
71
|
150
|
72
|
|
73 class Reduce(object):
|
252
|
74 def __init__(self, crash_script, file_to_reduce, core_number):
|
|
75 crash_script_name, crash_script_ext = os.path.splitext(crash_script)
|
|
76 file_reduce_name, file_reduce_ext = os.path.splitext(file_to_reduce)
|
150
|
77
|
252
|
78 self.testfile = file_reduce_name + ".test.sh"
|
|
79 self.crash_script = crash_script_name + ".reduced" + crash_script_ext
|
|
80 self.file_to_reduce = file_reduce_name + ".reduced" + file_reduce_ext
|
|
81 shutil.copy(file_to_reduce, self.file_to_reduce)
|
150
|
82
|
252
|
83 self.clang = clang_cmd
|
|
84 self.clang_args = []
|
|
85 self.expected_output = []
|
|
86 self.needs_stack_trace = False
|
|
87 self.creduce_flags = ["--tidy"]
|
|
88 self.creduce_flags = ["--n", str(core_number)]
|
150
|
89
|
252
|
90 self.read_clang_args(crash_script, file_to_reduce)
|
|
91 self.read_expected_output()
|
150
|
92
|
252
|
93 def get_crash_cmd(self, cmd=None, args=None, filename=None):
|
|
94 if not cmd:
|
|
95 cmd = self.clang
|
|
96 if not args:
|
|
97 args = self.clang_args
|
|
98 if not filename:
|
|
99 filename = self.file_to_reduce
|
150
|
100
|
252
|
101 return [cmd] + args + [filename]
|
150
|
102
|
252
|
103 def read_clang_args(self, crash_script, filename):
|
|
104 print("\nReading arguments from crash script...")
|
|
105 with open(crash_script) as f:
|
|
106 # Assume clang call is the first non comment line.
|
|
107 cmd = []
|
|
108 for line in f:
|
|
109 if not line.lstrip().startswith("#"):
|
|
110 cmd = shlex.split(line)
|
|
111 break
|
|
112 if not cmd:
|
|
113 sys.exit("Could not find command in the crash script.")
|
150
|
114
|
252
|
115 # Remove clang and filename from the command
|
|
116 # Assume the last occurrence of the filename is the clang input file
|
|
117 del cmd[0]
|
|
118 for i in range(len(cmd) - 1, -1, -1):
|
|
119 if cmd[i] == filename:
|
|
120 del cmd[i]
|
|
121 break
|
|
122 self.clang_args = cmd
|
|
123 verbose_print("Clang arguments:", quote_cmd(self.clang_args))
|
150
|
124
|
252
|
125 def read_expected_output(self):
|
|
126 print("\nGetting expected crash output...")
|
|
127 p = subprocess.Popen(
|
|
128 self.get_crash_cmd(), stdout=subprocess.PIPE, stderr=subprocess.STDOUT
|
|
129 )
|
|
130 crash_output, _ = p.communicate()
|
|
131 result = []
|
150
|
132
|
252
|
133 # Remove color codes
|
|
134 ansi_escape = r"\x1b\[[0-?]*m"
|
|
135 crash_output = re.sub(ansi_escape, "", crash_output.decode("utf-8"))
|
150
|
136
|
252
|
137 # Look for specific error messages
|
|
138 regexes = [
|
|
139 r"Assertion .+ failed", # Linux assert()
|
|
140 r"Assertion failed: .+,", # FreeBSD/Mac assert()
|
|
141 r"fatal error: error in backend: .+",
|
|
142 r"LLVM ERROR: .+",
|
|
143 r"UNREACHABLE executed at .+?!",
|
|
144 r"LLVM IR generation of declaration '.+'",
|
|
145 r"Generating code for declaration '.+'",
|
|
146 r"\*\*\* Bad machine code: .+ \*\*\*",
|
|
147 r"ERROR: .*Sanitizer: [^ ]+ ",
|
|
148 ]
|
|
149 for msg_re in regexes:
|
|
150 match = re.search(msg_re, crash_output)
|
|
151 if match:
|
|
152 msg = match.group(0)
|
|
153 result = [msg]
|
|
154 print("Found message:", msg)
|
|
155 break
|
150
|
156
|
252
|
157 # If no message was found, use the top five stack trace functions,
|
|
158 # ignoring some common functions
|
|
159 # Five is a somewhat arbitrary number; the goal is to get a small number
|
|
160 # of identifying functions with some leeway for common functions
|
|
161 if not result:
|
|
162 self.needs_stack_trace = True
|
|
163 stacktrace_re = r"[0-9]+\s+0[xX][0-9a-fA-F]+\s*([^(]+)\("
|
|
164 filters = [
|
|
165 "PrintStackTrace",
|
|
166 "RunSignalHandlers",
|
|
167 "CleanupOnSignal",
|
|
168 "HandleCrash",
|
|
169 "SignalHandler",
|
|
170 "__restore_rt",
|
|
171 "gsignal",
|
|
172 "abort",
|
|
173 ]
|
221
|
174
|
252
|
175 def skip_function(func_name):
|
|
176 return any(name in func_name for name in filters)
|
150
|
177
|
252
|
178 matches = re.findall(stacktrace_re, crash_output)
|
|
179 result = [x for x in matches if x and not skip_function(x)][:5]
|
|
180 for msg in result:
|
|
181 print("Found stack trace function:", msg)
|
150
|
182
|
252
|
183 if not result:
|
|
184 print("ERROR: no crash was found")
|
|
185 print("The crash output was:\n========\n%s========" % crash_output)
|
|
186 sys.exit(1)
|
|
187
|
|
188 self.expected_output = result
|
150
|
189
|
252
|
190 def check_expected_output(self, args=None, filename=None):
|
|
191 if not args:
|
|
192 args = self.clang_args
|
|
193 if not filename:
|
|
194 filename = self.file_to_reduce
|
150
|
195
|
252
|
196 p = subprocess.Popen(
|
|
197 self.get_crash_cmd(args=args, filename=filename),
|
|
198 stdout=subprocess.PIPE,
|
|
199 stderr=subprocess.STDOUT,
|
|
200 )
|
|
201 crash_output, _ = p.communicate()
|
|
202 return all(msg in crash_output.decode("utf-8") for msg in self.expected_output)
|
150
|
203
|
252
|
204 def write_interestingness_test(self):
|
|
205 print("\nCreating the interestingness test...")
|
150
|
206
|
252
|
207 # Disable symbolization if it's not required to avoid slow symbolization.
|
|
208 disable_symbolization = ""
|
|
209 if not self.needs_stack_trace:
|
|
210 disable_symbolization = "export LLVM_DISABLE_SYMBOLIZATION=1"
|
150
|
211
|
252
|
212 output = """#!/bin/bash
|
221
|
213 %s
|
|
214 if %s >& t.log ; then
|
|
215 exit 1
|
|
216 fi
|
252
|
217 """ % (
|
|
218 disable_symbolization,
|
|
219 quote_cmd(self.get_crash_cmd()),
|
|
220 )
|
150
|
221
|
252
|
222 for msg in self.expected_output:
|
|
223 output += "grep -F %s t.log || exit 1\n" % pipes.quote(msg)
|
150
|
224
|
252
|
225 write_to_script(output, self.testfile)
|
|
226 self.check_interestingness()
|
150
|
227
|
252
|
228 def check_interestingness(self):
|
|
229 testfile = os.path.abspath(self.testfile)
|
150
|
230
|
252
|
231 # Check that the test considers the original file interesting
|
|
232 with open(os.devnull, "w") as devnull:
|
|
233 returncode = subprocess.call(testfile, stdout=devnull)
|
|
234 if returncode:
|
|
235 sys.exit("The interestingness test does not pass for the original file.")
|
|
236
|
|
237 # Check that an empty file is not interesting
|
|
238 # Instead of modifying the filename in the test file, just run the command
|
|
239 with tempfile.NamedTemporaryFile() as empty_file:
|
|
240 is_interesting = self.check_expected_output(filename=empty_file.name)
|
|
241 if is_interesting:
|
|
242 sys.exit("The interestingness test passes for an empty file.")
|
150
|
243
|
252
|
244 def clang_preprocess(self):
|
|
245 print("\nTrying to preprocess the source file...")
|
|
246 with tempfile.NamedTemporaryFile() as tmpfile:
|
|
247 cmd_preprocess = self.get_crash_cmd() + ["-E", "-o", tmpfile.name]
|
|
248 cmd_preprocess_no_lines = cmd_preprocess + ["-P"]
|
|
249 try:
|
|
250 subprocess.check_call(cmd_preprocess_no_lines)
|
|
251 if self.check_expected_output(filename=tmpfile.name):
|
|
252 print("Successfully preprocessed with line markers removed")
|
|
253 shutil.copy(tmpfile.name, self.file_to_reduce)
|
|
254 else:
|
|
255 subprocess.check_call(cmd_preprocess)
|
|
256 if self.check_expected_output(filename=tmpfile.name):
|
|
257 print("Successfully preprocessed without removing line markers")
|
|
258 shutil.copy(tmpfile.name, self.file_to_reduce)
|
|
259 else:
|
|
260 print(
|
|
261 "No longer crashes after preprocessing -- "
|
|
262 "using original source"
|
|
263 )
|
|
264 except subprocess.CalledProcessError:
|
|
265 print("Preprocessing failed")
|
150
|
266
|
252
|
267 @staticmethod
|
|
268 def filter_args(
|
|
269 args, opts_equal=[], opts_startswith=[], opts_one_arg_startswith=[]
|
|
270 ):
|
|
271 result = []
|
150
|
272 skip_next = False
|
252
|
273 for arg in args:
|
|
274 if skip_next:
|
|
275 skip_next = False
|
|
276 continue
|
|
277 if any(arg == a for a in opts_equal):
|
|
278 continue
|
|
279 if any(arg.startswith(a) for a in opts_startswith):
|
|
280 continue
|
|
281 if any(arg.startswith(a) for a in opts_one_arg_startswith):
|
|
282 skip_next = True
|
|
283 continue
|
|
284 result.append(arg)
|
|
285 return result
|
150
|
286
|
252
|
287 def try_remove_args(self, args, msg=None, extra_arg=None, **kwargs):
|
|
288 new_args = self.filter_args(args, **kwargs)
|
150
|
289
|
252
|
290 if extra_arg:
|
|
291 if extra_arg in new_args:
|
|
292 new_args.remove(extra_arg)
|
|
293 new_args.append(extra_arg)
|
|
294
|
|
295 if new_args != args and self.check_expected_output(args=new_args):
|
|
296 if msg:
|
|
297 verbose_print(msg)
|
|
298 return new_args
|
|
299 return args
|
150
|
300
|
252
|
301 def try_remove_arg_by_index(self, args, index):
|
|
302 new_args = args[:index] + args[index + 1 :]
|
|
303 removed_arg = args[index]
|
150
|
304
|
252
|
305 # Heuristic for grouping arguments:
|
|
306 # remove next argument if it doesn't start with "-"
|
|
307 if index < len(new_args) and not new_args[index].startswith("-"):
|
|
308 del new_args[index]
|
|
309 removed_arg += " " + args[index + 1]
|
150
|
310
|
252
|
311 if self.check_expected_output(args=new_args):
|
|
312 verbose_print("Removed", removed_arg)
|
|
313 return new_args, index
|
|
314 return args, index + 1
|
150
|
315
|
252
|
316 def simplify_clang_args(self):
|
|
317 """Simplify clang arguments before running C-Reduce to reduce the time the
|
|
318 interestingness test takes to run.
|
|
319 """
|
|
320 print("\nSimplifying the clang command...")
|
150
|
321
|
252
|
322 # Remove some clang arguments to speed up the interestingness test
|
|
323 new_args = self.clang_args
|
|
324 new_args = self.try_remove_args(
|
|
325 new_args,
|
|
326 msg="Removed debug info options",
|
|
327 opts_startswith=["-gcodeview", "-debug-info-kind=", "-debugger-tuning="],
|
|
328 )
|
150
|
329
|
252
|
330 new_args = self.try_remove_args(
|
|
331 new_args, msg="Removed --show-includes", opts_startswith=["--show-includes"]
|
|
332 )
|
|
333 # Not suppressing warnings (-w) sometimes prevents the crash from occurring
|
|
334 # after preprocessing
|
|
335 new_args = self.try_remove_args(
|
|
336 new_args,
|
|
337 msg="Replaced -W options with -w",
|
|
338 extra_arg="-w",
|
|
339 opts_startswith=["-W"],
|
|
340 )
|
|
341 new_args = self.try_remove_args(
|
|
342 new_args,
|
|
343 msg="Replaced optimization level with -O0",
|
|
344 extra_arg="-O0",
|
|
345 opts_startswith=["-O"],
|
|
346 )
|
150
|
347
|
252
|
348 # Try to remove compilation steps
|
|
349 new_args = self.try_remove_args(
|
|
350 new_args, msg="Added -emit-llvm", extra_arg="-emit-llvm"
|
|
351 )
|
|
352 new_args = self.try_remove_args(
|
|
353 new_args, msg="Added -fsyntax-only", extra_arg="-fsyntax-only"
|
|
354 )
|
150
|
355
|
252
|
356 # Try to make implicit int an error for more sensible test output
|
|
357 new_args = self.try_remove_args(
|
|
358 new_args,
|
|
359 msg="Added -Werror=implicit-int",
|
|
360 opts_equal=["-w"],
|
|
361 extra_arg="-Werror=implicit-int",
|
|
362 )
|
|
363
|
|
364 self.clang_args = new_args
|
|
365 verbose_print("Simplified command:", quote_cmd(self.get_crash_cmd()))
|
150
|
366
|
252
|
367 def reduce_clang_args(self):
|
|
368 """Minimize the clang arguments after running C-Reduce, to get the smallest
|
|
369 command that reproduces the crash on the reduced file.
|
|
370 """
|
|
371 print("\nReducing the clang crash command...")
|
150
|
372
|
252
|
373 new_args = self.clang_args
|
150
|
374
|
252
|
375 # Remove some often occurring args
|
|
376 new_args = self.try_remove_args(
|
|
377 new_args, msg="Removed -D options", opts_startswith=["-D"]
|
|
378 )
|
|
379 new_args = self.try_remove_args(
|
|
380 new_args, msg="Removed -D options", opts_one_arg_startswith=["-D"]
|
|
381 )
|
|
382 new_args = self.try_remove_args(
|
|
383 new_args, msg="Removed -I options", opts_startswith=["-I"]
|
|
384 )
|
|
385 new_args = self.try_remove_args(
|
|
386 new_args, msg="Removed -I options", opts_one_arg_startswith=["-I"]
|
|
387 )
|
|
388 new_args = self.try_remove_args(
|
|
389 new_args, msg="Removed -W options", opts_startswith=["-W"]
|
|
390 )
|
150
|
391
|
252
|
392 # Remove other cases that aren't covered by the heuristic
|
|
393 new_args = self.try_remove_args(
|
|
394 new_args, msg="Removed -mllvm", opts_one_arg_startswith=["-mllvm"]
|
|
395 )
|
150
|
396
|
252
|
397 i = 0
|
|
398 while i < len(new_args):
|
|
399 new_args, i = self.try_remove_arg_by_index(new_args, i)
|
150
|
400
|
252
|
401 self.clang_args = new_args
|
|
402
|
|
403 reduced_cmd = quote_cmd(self.get_crash_cmd())
|
|
404 write_to_script(reduced_cmd, self.crash_script)
|
|
405 print("Reduced command:", reduced_cmd)
|
150
|
406
|
252
|
407 def run_creduce(self):
|
|
408 print("\nRunning C-Reduce...")
|
|
409 try:
|
|
410 p = subprocess.Popen(
|
|
411 [creduce_cmd]
|
|
412 + self.creduce_flags
|
|
413 + [self.testfile, self.file_to_reduce]
|
|
414 )
|
|
415 p.communicate()
|
|
416 except KeyboardInterrupt:
|
|
417 # Hack to kill C-Reduce because it jumps into its own pgid
|
|
418 print("\n\nctrl-c detected, killed creduce")
|
|
419 p.kill()
|
150
|
420
|
|
421
|
|
422 def main():
|
252
|
423 global verbose
|
|
424 global creduce_cmd
|
|
425 global clang_cmd
|
150
|
426
|
252
|
427 parser = ArgumentParser(description=__doc__, formatter_class=RawTextHelpFormatter)
|
|
428 parser.add_argument(
|
|
429 "crash_script",
|
|
430 type=str,
|
|
431 nargs=1,
|
|
432 help="Name of the script that generates the crash.",
|
|
433 )
|
|
434 parser.add_argument(
|
|
435 "file_to_reduce", type=str, nargs=1, help="Name of the file to be reduced."
|
|
436 )
|
|
437 parser.add_argument(
|
|
438 "--llvm-bin", dest="llvm_bin", type=str, help="Path to the LLVM bin directory."
|
|
439 )
|
|
440 parser.add_argument(
|
|
441 "--clang",
|
|
442 dest="clang",
|
|
443 type=str,
|
|
444 help="The path to the `clang` executable. "
|
|
445 "By default uses the llvm-bin directory.",
|
|
446 )
|
|
447 parser.add_argument(
|
|
448 "--creduce",
|
|
449 dest="creduce",
|
|
450 type=str,
|
|
451 help="The path to the `creduce` executable. "
|
|
452 "Required if `creduce` is not in PATH environment.",
|
|
453 )
|
|
454 parser.add_argument(
|
|
455 "--n",
|
|
456 dest="core_number",
|
|
457 type=int,
|
|
458 default=max(4, multiprocessing.cpu_count() // 2),
|
|
459 help="Number of cores to use.",
|
|
460 )
|
|
461 parser.add_argument("-v", "--verbose", action="store_true")
|
|
462 args = parser.parse_args()
|
150
|
463
|
252
|
464 verbose = args.verbose
|
|
465 llvm_bin = os.path.abspath(args.llvm_bin) if args.llvm_bin else None
|
|
466 creduce_cmd = check_cmd("creduce", None, args.creduce)
|
|
467 clang_cmd = check_cmd("clang", llvm_bin, args.clang)
|
|
468 core_number = args.core_number
|
150
|
469
|
252
|
470 crash_script = check_file(args.crash_script[0])
|
|
471 file_to_reduce = check_file(args.file_to_reduce[0])
|
150
|
472
|
252
|
473 r = Reduce(crash_script, file_to_reduce, core_number)
|
150
|
474
|
252
|
475 r.simplify_clang_args()
|
|
476 r.write_interestingness_test()
|
|
477 r.clang_preprocess()
|
|
478 r.run_creduce()
|
|
479 r.reduce_clang_args()
|
150
|
480
|
252
|
481
|
|
482 if __name__ == "__main__":
|
|
483 main()
|