150
|
1 #!/usr/bin/env python
|
|
2 """Calls C-Reduce to create a minimal reproducer for clang crashes.
|
|
3
|
|
4 Output files:
|
|
5 *.reduced.sh -- crash reproducer with minimal arguments
|
|
6 *.reduced.cpp -- the reduced file
|
|
7 *.test.sh -- interestingness test for C-Reduce
|
|
8 """
|
|
9
|
|
10 from __future__ import print_function
|
|
11 from argparse import ArgumentParser, RawTextHelpFormatter
|
|
12 import os
|
|
13 import re
|
|
14 import stat
|
|
15 import sys
|
|
16 import subprocess
|
|
17 import pipes
|
|
18 import shlex
|
|
19 import tempfile
|
|
20 import shutil
|
|
21 from distutils.spawn import find_executable
|
|
22
|
|
23 verbose = False
|
|
24 creduce_cmd = None
|
|
25 clang_cmd = None
|
|
26 not_cmd = None
|
|
27
|
|
28 def verbose_print(*args, **kwargs):
|
|
29 if verbose:
|
|
30 print(*args, **kwargs)
|
|
31
|
|
32 def check_file(fname):
|
|
33 fname = os.path.normpath(fname)
|
|
34 if not os.path.isfile(fname):
|
|
35 sys.exit("ERROR: %s does not exist" % (fname))
|
|
36 return fname
|
|
37
|
|
38 def check_cmd(cmd_name, cmd_dir, cmd_path=None):
|
|
39 """
|
|
40 Returns absolute path to cmd_path if it is given,
|
|
41 or absolute path to cmd_dir/cmd_name.
|
|
42 """
|
|
43 if cmd_path:
|
|
44 # Make the path absolute so the creduce test can be run from any directory.
|
|
45 cmd_path = os.path.abspath(cmd_path)
|
|
46 cmd = find_executable(cmd_path)
|
|
47 if cmd:
|
|
48 return cmd
|
|
49 sys.exit("ERROR: executable `%s` not found" % (cmd_path))
|
|
50
|
|
51 cmd = find_executable(cmd_name, path=cmd_dir)
|
|
52 if cmd:
|
|
53 return cmd
|
|
54
|
|
55 if not cmd_dir:
|
|
56 cmd_dir = "$PATH"
|
|
57 sys.exit("ERROR: `%s` not found in %s" % (cmd_name, cmd_dir))
|
|
58
|
|
59 def quote_cmd(cmd):
|
|
60 return ' '.join(pipes.quote(arg) for arg in cmd)
|
|
61
|
|
62 def write_to_script(text, filename):
|
|
63 with open(filename, 'w') as f:
|
|
64 f.write(text)
|
|
65 os.chmod(filename, os.stat(filename).st_mode | stat.S_IEXEC)
|
|
66
|
|
67 class Reduce(object):
|
|
68 def __init__(self, crash_script, file_to_reduce):
|
|
69 crash_script_name, crash_script_ext = os.path.splitext(crash_script)
|
|
70 file_reduce_name, file_reduce_ext = os.path.splitext(file_to_reduce)
|
|
71
|
|
72 self.testfile = file_reduce_name + '.test.sh'
|
|
73 self.crash_script = crash_script_name + '.reduced' + crash_script_ext
|
|
74 self.file_to_reduce = file_reduce_name + '.reduced' + file_reduce_ext
|
|
75 shutil.copy(file_to_reduce, self.file_to_reduce)
|
|
76
|
|
77 self.clang = clang_cmd
|
|
78 self.clang_args = []
|
|
79 self.expected_output = []
|
|
80 self.is_crash = True
|
|
81 self.creduce_flags = ["--tidy"]
|
|
82
|
|
83 self.read_clang_args(crash_script, file_to_reduce)
|
|
84 self.read_expected_output()
|
|
85
|
|
86 def get_crash_cmd(self, cmd=None, args=None, filename=None):
|
|
87 if not cmd:
|
|
88 cmd = self.clang
|
|
89 if not args:
|
|
90 args = self.clang_args
|
|
91 if not filename:
|
|
92 filename = self.file_to_reduce
|
|
93
|
|
94 return [cmd] + args + [filename]
|
|
95
|
|
96 def read_clang_args(self, crash_script, filename):
|
|
97 print("\nReading arguments from crash script...")
|
|
98 with open(crash_script) as f:
|
|
99 # Assume clang call is the first non comment line.
|
|
100 cmd = []
|
|
101 for line in f:
|
|
102 if not line.lstrip().startswith('#'):
|
|
103 cmd = shlex.split(line)
|
|
104 break
|
|
105 if not cmd:
|
|
106 sys.exit("Could not find command in the crash script.");
|
|
107
|
|
108 # Remove clang and filename from the command
|
|
109 # Assume the last occurrence of the filename is the clang input file
|
|
110 del cmd[0]
|
|
111 for i in range(len(cmd)-1, -1, -1):
|
|
112 if cmd[i] == filename:
|
|
113 del cmd[i]
|
|
114 break
|
|
115 self.clang_args = cmd
|
|
116 verbose_print("Clang arguments:", quote_cmd(self.clang_args))
|
|
117
|
|
118 def read_expected_output(self):
|
|
119 print("\nGetting expected crash output...")
|
|
120 p = subprocess.Popen(self.get_crash_cmd(),
|
|
121 stdout=subprocess.PIPE,
|
|
122 stderr=subprocess.STDOUT)
|
|
123 crash_output, _ = p.communicate()
|
|
124 result = []
|
|
125
|
|
126 # Remove color codes
|
|
127 ansi_escape = r'\x1b\[[0-?]*m'
|
|
128 crash_output = re.sub(ansi_escape, '', crash_output.decode('utf-8'))
|
|
129
|
|
130 # Look for specific error messages
|
|
131 regexes = [r"Assertion `(.+)' failed", # Linux assert()
|
|
132 r"Assertion failed: (.+),", # FreeBSD/Mac assert()
|
|
133 r"fatal error: error in backend: (.+)",
|
|
134 r"LLVM ERROR: (.+)",
|
|
135 r"UNREACHABLE executed (at .+)?!",
|
|
136 r"LLVM IR generation of declaration '(.+)'",
|
|
137 r"Generating code for declaration '(.+)'",
|
|
138 r"\*\*\* Bad machine code: (.+) \*\*\*"]
|
|
139 for msg_re in regexes:
|
|
140 match = re.search(msg_re, crash_output)
|
|
141 if match:
|
|
142 msg = match.group(1)
|
|
143 result = [msg]
|
|
144 print("Found message:", msg)
|
|
145
|
|
146 if "fatal error:" in msg_re:
|
|
147 self.is_crash = False
|
|
148 break
|
|
149
|
|
150 # If no message was found, use the top five stack trace functions,
|
|
151 # ignoring some common functions
|
|
152 # Five is a somewhat arbitrary number; the goal is to get a small number
|
|
153 # of identifying functions with some leeway for common functions
|
|
154 if not result:
|
|
155 stacktrace_re = r'[0-9]+\s+0[xX][0-9a-fA-F]+\s*([^(]+)\('
|
|
156 filters = ["PrintStackTraceSignalHandler",
|
|
157 "llvm::sys::RunSignalHandlers",
|
|
158 "SignalHandler", "__restore_rt", "gsignal", "abort"]
|
|
159 matches = re.findall(stacktrace_re, crash_output)
|
|
160 result = [x for x in matches if x and x.strip() not in filters][:5]
|
|
161 for msg in result:
|
|
162 print("Found stack trace function:", msg)
|
|
163
|
|
164 if not result:
|
|
165 print("ERROR: no crash was found")
|
|
166 print("The crash output was:\n========\n%s========" % crash_output)
|
|
167 sys.exit(1)
|
|
168
|
|
169 self.expected_output = result
|
|
170
|
|
171 def check_expected_output(self, args=None, filename=None):
|
|
172 if not args:
|
|
173 args = self.clang_args
|
|
174 if not filename:
|
|
175 filename = self.file_to_reduce
|
|
176
|
|
177 p = subprocess.Popen(self.get_crash_cmd(args=args, filename=filename),
|
|
178 stdout=subprocess.PIPE,
|
|
179 stderr=subprocess.STDOUT)
|
|
180 crash_output, _ = p.communicate()
|
|
181 return all(msg in crash_output.decode('utf-8') for msg in
|
|
182 self.expected_output)
|
|
183
|
|
184 def write_interestingness_test(self):
|
|
185 print("\nCreating the interestingness test...")
|
|
186
|
|
187 crash_flag = "--crash" if self.is_crash else ""
|
|
188
|
|
189 output = "#!/bin/bash\n%s %s %s >& t.log || exit 1\n" % \
|
|
190 (pipes.quote(not_cmd), crash_flag, quote_cmd(self.get_crash_cmd()))
|
|
191
|
|
192 for msg in self.expected_output:
|
|
193 output += 'grep -F %s t.log || exit 1\n' % pipes.quote(msg)
|
|
194
|
|
195 write_to_script(output, self.testfile)
|
|
196 self.check_interestingness()
|
|
197
|
|
198 def check_interestingness(self):
|
|
199 testfile = os.path.abspath(self.testfile)
|
|
200
|
|
201 # Check that the test considers the original file interesting
|
|
202 with open(os.devnull, 'w') as devnull:
|
|
203 returncode = subprocess.call(testfile, stdout=devnull)
|
|
204 if returncode:
|
|
205 sys.exit("The interestingness test does not pass for the original file.")
|
|
206
|
|
207 # Check that an empty file is not interesting
|
|
208 # Instead of modifying the filename in the test file, just run the command
|
|
209 with tempfile.NamedTemporaryFile() as empty_file:
|
|
210 is_interesting = self.check_expected_output(filename=empty_file.name)
|
|
211 if is_interesting:
|
|
212 sys.exit("The interestingness test passes for an empty file.")
|
|
213
|
|
214 def clang_preprocess(self):
|
|
215 print("\nTrying to preprocess the source file...")
|
|
216 with tempfile.NamedTemporaryFile() as tmpfile:
|
|
217 cmd_preprocess = self.get_crash_cmd() + ['-E', '-o', tmpfile.name]
|
|
218 cmd_preprocess_no_lines = cmd_preprocess + ['-P']
|
|
219 try:
|
|
220 subprocess.check_call(cmd_preprocess_no_lines)
|
|
221 if self.check_expected_output(filename=tmpfile.name):
|
|
222 print("Successfully preprocessed with line markers removed")
|
|
223 shutil.copy(tmpfile.name, self.file_to_reduce)
|
|
224 else:
|
|
225 subprocess.check_call(cmd_preprocess)
|
|
226 if self.check_expected_output(filename=tmpfile.name):
|
|
227 print("Successfully preprocessed without removing line markers")
|
|
228 shutil.copy(tmpfile.name, self.file_to_reduce)
|
|
229 else:
|
|
230 print("No longer crashes after preprocessing -- "
|
|
231 "using original source")
|
|
232 except subprocess.CalledProcessError:
|
|
233 print("Preprocessing failed")
|
|
234
|
|
235 @staticmethod
|
|
236 def filter_args(args, opts_equal=[], opts_startswith=[],
|
|
237 opts_one_arg_startswith=[]):
|
|
238 result = []
|
|
239 skip_next = False
|
|
240 for arg in args:
|
|
241 if skip_next:
|
|
242 skip_next = False
|
|
243 continue
|
|
244 if any(arg == a for a in opts_equal):
|
|
245 continue
|
|
246 if any(arg.startswith(a) for a in opts_startswith):
|
|
247 continue
|
|
248 if any(arg.startswith(a) for a in opts_one_arg_startswith):
|
|
249 skip_next = True
|
|
250 continue
|
|
251 result.append(arg)
|
|
252 return result
|
|
253
|
|
254 def try_remove_args(self, args, msg=None, extra_arg=None, **kwargs):
|
|
255 new_args = self.filter_args(args, **kwargs)
|
|
256
|
|
257 if extra_arg:
|
|
258 if extra_arg in new_args:
|
|
259 new_args.remove(extra_arg)
|
|
260 new_args.append(extra_arg)
|
|
261
|
|
262 if (new_args != args and
|
|
263 self.check_expected_output(args=new_args)):
|
|
264 if msg:
|
|
265 verbose_print(msg)
|
|
266 return new_args
|
|
267 return args
|
|
268
|
|
269 def try_remove_arg_by_index(self, args, index):
|
|
270 new_args = args[:index] + args[index+1:]
|
|
271 removed_arg = args[index]
|
|
272
|
|
273 # Heuristic for grouping arguments:
|
|
274 # remove next argument if it doesn't start with "-"
|
|
275 if index < len(new_args) and not new_args[index].startswith('-'):
|
|
276 del new_args[index]
|
|
277 removed_arg += ' ' + args[index+1]
|
|
278
|
|
279 if self.check_expected_output(args=new_args):
|
|
280 verbose_print("Removed", removed_arg)
|
|
281 return new_args, index
|
|
282 return args, index+1
|
|
283
|
|
284 def simplify_clang_args(self):
|
|
285 """Simplify clang arguments before running C-Reduce to reduce the time the
|
|
286 interestingness test takes to run.
|
|
287 """
|
|
288 print("\nSimplifying the clang command...")
|
|
289
|
|
290 # Remove some clang arguments to speed up the interestingness test
|
|
291 new_args = self.clang_args
|
|
292 new_args = self.try_remove_args(new_args,
|
|
293 msg="Removed debug info options",
|
|
294 opts_startswith=["-gcodeview",
|
|
295 "-debug-info-kind=",
|
|
296 "-debugger-tuning="])
|
|
297
|
|
298 new_args = self.try_remove_args(new_args,
|
|
299 msg="Removed --show-includes",
|
|
300 opts_startswith=["--show-includes"])
|
|
301 # Not suppressing warnings (-w) sometimes prevents the crash from occurring
|
|
302 # after preprocessing
|
|
303 new_args = self.try_remove_args(new_args,
|
|
304 msg="Replaced -W options with -w",
|
|
305 extra_arg='-w',
|
|
306 opts_startswith=["-W"])
|
|
307 new_args = self.try_remove_args(new_args,
|
|
308 msg="Replaced optimization level with -O0",
|
|
309 extra_arg="-O0",
|
|
310 opts_startswith=["-O"])
|
|
311
|
|
312 # Try to remove compilation steps
|
|
313 new_args = self.try_remove_args(new_args, msg="Added -emit-llvm",
|
|
314 extra_arg="-emit-llvm")
|
|
315 new_args = self.try_remove_args(new_args, msg="Added -fsyntax-only",
|
|
316 extra_arg="-fsyntax-only")
|
|
317
|
|
318 # Try to make implicit int an error for more sensible test output
|
|
319 new_args = self.try_remove_args(new_args, msg="Added -Werror=implicit-int",
|
|
320 opts_equal=["-w"],
|
|
321 extra_arg="-Werror=implicit-int")
|
|
322
|
|
323 self.clang_args = new_args
|
|
324 verbose_print("Simplified command:", quote_cmd(self.get_crash_cmd()))
|
|
325
|
|
326 def reduce_clang_args(self):
|
|
327 """Minimize the clang arguments after running C-Reduce, to get the smallest
|
|
328 command that reproduces the crash on the reduced file.
|
|
329 """
|
|
330 print("\nReducing the clang crash command...")
|
|
331
|
|
332 new_args = self.clang_args
|
|
333
|
|
334 # Remove some often occurring args
|
|
335 new_args = self.try_remove_args(new_args, msg="Removed -D options",
|
|
336 opts_startswith=["-D"])
|
|
337 new_args = self.try_remove_args(new_args, msg="Removed -D options",
|
|
338 opts_one_arg_startswith=["-D"])
|
|
339 new_args = self.try_remove_args(new_args, msg="Removed -I options",
|
|
340 opts_startswith=["-I"])
|
|
341 new_args = self.try_remove_args(new_args, msg="Removed -I options",
|
|
342 opts_one_arg_startswith=["-I"])
|
|
343 new_args = self.try_remove_args(new_args, msg="Removed -W options",
|
|
344 opts_startswith=["-W"])
|
|
345
|
|
346 # Remove other cases that aren't covered by the heuristic
|
|
347 new_args = self.try_remove_args(new_args, msg="Removed -mllvm",
|
|
348 opts_one_arg_startswith=["-mllvm"])
|
|
349
|
|
350 i = 0
|
|
351 while i < len(new_args):
|
|
352 new_args, i = self.try_remove_arg_by_index(new_args, i)
|
|
353
|
|
354 self.clang_args = new_args
|
|
355
|
|
356 reduced_cmd = quote_cmd(self.get_crash_cmd())
|
|
357 write_to_script(reduced_cmd, self.crash_script)
|
|
358 print("Reduced command:", reduced_cmd)
|
|
359
|
|
360 def run_creduce(self):
|
|
361 print("\nRunning C-Reduce...")
|
|
362 try:
|
|
363 p = subprocess.Popen([creduce_cmd] + self.creduce_flags +
|
|
364 [self.testfile, self.file_to_reduce])
|
|
365 p.communicate()
|
|
366 except KeyboardInterrupt:
|
|
367 # Hack to kill C-Reduce because it jumps into its own pgid
|
|
368 print('\n\nctrl-c detected, killed creduce')
|
|
369 p.kill()
|
|
370
|
|
371 def main():
|
|
372 global verbose
|
|
373 global creduce_cmd
|
|
374 global clang_cmd
|
|
375 global not_cmd
|
|
376
|
|
377 parser = ArgumentParser(description=__doc__,
|
|
378 formatter_class=RawTextHelpFormatter)
|
|
379 parser.add_argument('crash_script', type=str, nargs=1,
|
|
380 help="Name of the script that generates the crash.")
|
|
381 parser.add_argument('file_to_reduce', type=str, nargs=1,
|
|
382 help="Name of the file to be reduced.")
|
|
383 parser.add_argument('--llvm-bin', dest='llvm_bin', type=str,
|
|
384 help="Path to the LLVM bin directory.")
|
|
385 parser.add_argument('--llvm-not', dest='llvm_not', type=str,
|
|
386 help="The path to the `not` executable. "
|
|
387 "By default uses the llvm-bin directory.")
|
|
388 parser.add_argument('--clang', dest='clang', type=str,
|
|
389 help="The path to the `clang` executable. "
|
|
390 "By default uses the llvm-bin directory.")
|
|
391 parser.add_argument('--creduce', dest='creduce', type=str,
|
|
392 help="The path to the `creduce` executable. "
|
|
393 "Required if `creduce` is not in PATH environment.")
|
|
394 parser.add_argument('-v', '--verbose', action='store_true')
|
|
395 args = parser.parse_args()
|
|
396
|
|
397 verbose = args.verbose
|
|
398 llvm_bin = os.path.abspath(args.llvm_bin) if args.llvm_bin else None
|
|
399 creduce_cmd = check_cmd('creduce', None, args.creduce)
|
|
400 clang_cmd = check_cmd('clang', llvm_bin, args.clang)
|
|
401 not_cmd = check_cmd('not', llvm_bin, args.llvm_not)
|
|
402
|
|
403 crash_script = check_file(args.crash_script[0])
|
|
404 file_to_reduce = check_file(args.file_to_reduce[0])
|
|
405
|
|
406 r = Reduce(crash_script, file_to_reduce)
|
|
407
|
|
408 r.simplify_clang_args()
|
|
409 r.write_interestingness_test()
|
|
410 r.clang_preprocess()
|
|
411 r.run_creduce()
|
|
412 r.reduce_clang_args()
|
|
413
|
|
414 if __name__ == '__main__':
|
|
415 main()
|