Mercurial > hg > CbC > CbC_llvm
comparison utils/update_mca_test_checks.py @ 171:66f3bfe93da9
git version 2c4ca6832fa6b306ee6a7010bfb80a3f2596f824
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Mon, 25 May 2020 11:07:02 +0900 |
parents | c2174574ed3a |
children |
comparison
equal
deleted
inserted
replaced
150:1d019706d866 | 171:66f3bfe93da9 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 """A test case update script. | |
4 | |
5 This script is a utility to update LLVM 'llvm-mca' based test cases with new | |
6 FileCheck patterns. | |
7 """ | |
8 | |
9 import argparse | |
10 from collections import defaultdict | |
11 import glob | |
12 import os | |
13 import sys | |
14 import warnings | |
15 | |
16 from UpdateTestChecks import common | |
17 | |
18 | |
19 COMMENT_CHAR = '#' | |
20 ADVERT_PREFIX = '{} NOTE: Assertions have been autogenerated by '.format( | |
21 COMMENT_CHAR) | |
22 ADVERT = '{}utils/{}'.format(ADVERT_PREFIX, os.path.basename(__file__)) | |
23 | |
24 | |
25 class Error(Exception): | |
26 """ Generic Error that can be raised without printing a traceback. | |
27 """ | |
28 pass | |
29 | |
30 | |
31 def _warn(msg): | |
32 """ Log a user warning to stderr. | |
33 """ | |
34 warnings.warn(msg, Warning, stacklevel=2) | |
35 | |
36 | |
37 def _configure_warnings(args): | |
38 warnings.resetwarnings() | |
39 if args.w: | |
40 warnings.simplefilter('ignore') | |
41 if args.Werror: | |
42 warnings.simplefilter('error') | |
43 | |
44 | |
45 def _showwarning(message, category, filename, lineno, file=None, line=None): | |
46 """ Version of warnings.showwarning that won't attempt to print out the | |
47 line at the location of the warning if the line text is not explicitly | |
48 specified. | |
49 """ | |
50 if file is None: | |
51 file = sys.stderr | |
52 if line is None: | |
53 line = '' | |
54 file.write(warnings.formatwarning(message, category, filename, lineno, line)) | |
55 | |
56 | |
57 def _parse_args(): | |
58 parser = argparse.ArgumentParser(description=__doc__) | |
59 parser.add_argument('-v', '--verbose', | |
60 action='store_true', | |
61 help='show verbose output') | |
62 parser.add_argument('-w', | |
63 action='store_true', | |
64 help='suppress warnings') | |
65 parser.add_argument('-Werror', | |
66 action='store_true', | |
67 help='promote warnings to errors') | |
68 parser.add_argument('--llvm-mca-binary', | |
69 metavar='<path>', | |
70 default='llvm-mca', | |
71 help='the binary to use to generate the test case ' | |
72 '(default: llvm-mca)') | |
73 parser.add_argument('tests', | |
74 metavar='<test-path>', | |
75 nargs='+') | |
76 args = parser.parse_args() | |
77 | |
78 _configure_warnings(args) | |
79 | |
80 if not args.llvm_mca_binary: | |
81 raise Error('--llvm-mca-binary value cannot be empty string') | |
82 | |
83 if 'llvm-mca' not in os.path.basename(args.llvm_mca_binary): | |
84 _warn('unexpected binary name: {}'.format(args.llvm_mca_binary)) | |
85 | |
86 return args | |
87 | |
88 | |
89 def _find_run_lines(input_lines, args): | |
90 raw_lines = [m.group(1) | |
91 for m in [common.RUN_LINE_RE.match(l) for l in input_lines] | |
92 if m] | |
93 run_lines = [raw_lines[0]] if len(raw_lines) > 0 else [] | |
94 for l in raw_lines[1:]: | |
95 if run_lines[-1].endswith(r'\\'): | |
96 run_lines[-1] = run_lines[-1].rstrip('\\') + ' ' + l | |
97 else: | |
98 run_lines.append(l) | |
99 | |
100 if args.verbose: | |
101 sys.stderr.write('Found {} RUN line{}:\n'.format( | |
102 len(run_lines), '' if len(run_lines) == 1 else 's')) | |
103 for line in run_lines: | |
104 sys.stderr.write(' RUN: {}\n'.format(line)) | |
105 | |
106 return run_lines | |
107 | |
108 | |
109 def _get_run_infos(run_lines, args): | |
110 run_infos = [] | |
111 for run_line in run_lines: | |
112 try: | |
113 (tool_cmd, filecheck_cmd) = tuple([cmd.strip() | |
114 for cmd in run_line.split('|', 1)]) | |
115 except ValueError: | |
116 _warn('could not split tool and filecheck commands: {}'.format(run_line)) | |
117 continue | |
118 | |
119 common.verify_filecheck_prefixes(filecheck_cmd) | |
120 tool_basename = os.path.splitext(os.path.basename(args.llvm_mca_binary))[0] | |
121 | |
122 if not tool_cmd.startswith(tool_basename + ' '): | |
123 _warn('skipping non-{} RUN line: {}'.format(tool_basename, run_line)) | |
124 continue | |
125 | |
126 if not filecheck_cmd.startswith('FileCheck '): | |
127 _warn('skipping non-FileCheck RUN line: {}'.format(run_line)) | |
128 continue | |
129 | |
130 tool_cmd_args = tool_cmd[len(tool_basename):].strip() | |
131 tool_cmd_args = tool_cmd_args.replace('< %s', '').replace('%s', '').strip() | |
132 | |
133 check_prefixes = [item | |
134 for m in common.CHECK_PREFIX_RE.finditer(filecheck_cmd) | |
135 for item in m.group(1).split(',')] | |
136 if not check_prefixes: | |
137 check_prefixes = ['CHECK'] | |
138 | |
139 run_infos.append((check_prefixes, tool_cmd_args)) | |
140 | |
141 return run_infos | |
142 | |
143 | |
144 def _break_down_block(block_info, common_prefix): | |
145 """ Given a block_info, see if we can analyze it further to let us break it | |
146 down by prefix per-line rather than per-block. | |
147 """ | |
148 texts = block_info.keys() | |
149 prefixes = list(block_info.values()) | |
150 # Split the lines from each of the incoming block_texts and zip them so that | |
151 # each element contains the corresponding lines from each text. E.g. | |
152 # | |
153 # block_text_1: A # line 1 | |
154 # B # line 2 | |
155 # | |
156 # block_text_2: A # line 1 | |
157 # C # line 2 | |
158 # | |
159 # would become: | |
160 # | |
161 # [(A, A), # line 1 | |
162 # (B, C)] # line 2 | |
163 # | |
164 line_tuples = list(zip(*list((text.splitlines() for text in texts)))) | |
165 | |
166 # To simplify output, we'll only proceed if the very first line of the block | |
167 # texts is common to each of them. | |
168 if len(set(line_tuples[0])) != 1: | |
169 return [] | |
170 | |
171 result = [] | |
172 lresult = defaultdict(list) | |
173 for i, line in enumerate(line_tuples): | |
174 if len(set(line)) == 1: | |
175 # We're about to output a line with the common prefix. This is a sync | |
176 # point so flush any batched-up lines one prefix at a time to the output | |
177 # first. | |
178 for prefix in sorted(lresult): | |
179 result.extend(lresult[prefix]) | |
180 lresult = defaultdict(list) | |
181 | |
182 # The line is common to each block so output with the common prefix. | |
183 result.append((common_prefix, line[0])) | |
184 else: | |
185 # The line is not common to each block, or we don't have a common prefix. | |
186 # If there are no prefixes available, warn and bail out. | |
187 if not prefixes[0]: | |
188 _warn('multiple lines not disambiguated by prefixes:\n{}\n' | |
189 'Some blocks may be skipped entirely as a result.'.format( | |
190 '\n'.join(' - {}'.format(l) for l in line))) | |
191 return [] | |
192 | |
193 # Iterate through the line from each of the blocks and add the line with | |
194 # the corresponding prefix to the current batch of results so that we can | |
195 # later output them per-prefix. | |
196 for i, l in enumerate(line): | |
197 for prefix in prefixes[i]: | |
198 lresult[prefix].append((prefix, l)) | |
199 | |
200 # Flush any remaining batched-up lines one prefix at a time to the output. | |
201 for prefix in sorted(lresult): | |
202 result.extend(lresult[prefix]) | |
203 return result | |
204 | |
205 | |
206 def _get_useful_prefix_info(run_infos): | |
207 """ Given the run_infos, calculate any prefixes that are common to every one, | |
208 and the length of the longest prefix string. | |
209 """ | |
210 try: | |
211 all_sets = [set(s) for s in list(zip(*run_infos))[0]] | |
212 common_to_all = set.intersection(*all_sets) | |
213 longest_prefix_len = max(len(p) for p in set.union(*all_sets)) | |
214 except IndexError: | |
215 common_to_all = [] | |
216 longest_prefix_len = 0 | |
217 else: | |
218 if len(common_to_all) > 1: | |
219 _warn('Multiple prefixes common to all RUN lines: {}'.format( | |
220 common_to_all)) | |
221 if common_to_all: | |
222 common_to_all = sorted(common_to_all)[0] | |
223 return common_to_all, longest_prefix_len | |
224 | |
225 | |
226 def _align_matching_blocks(all_blocks, farthest_indexes): | |
227 """ Some sub-sequences of blocks may be common to multiple lists of blocks, | |
228 but at different indexes in each one. | |
229 | |
230 For example, in the following case, A,B,E,F, and H are common to both | |
231 sets, but only A and B would be identified as such due to the indexes | |
232 matching: | |
233 | |
234 index | 0 1 2 3 4 5 6 | |
235 ------+-------------- | |
236 setA | A B C D E F H | |
237 setB | A B E F G H | |
238 | |
239 This function attempts to align the indexes of matching blocks by | |
240 inserting empty blocks into the block list. With this approach, A, B, E, | |
241 F, and H would now be able to be identified as matching blocks: | |
242 | |
243 index | 0 1 2 3 4 5 6 7 | |
244 ------+---------------- | |
245 setA | A B C D E F H | |
246 setB | A B E F G H | |
247 """ | |
248 | |
249 # "Farthest block analysis": essentially, iterate over all blocks and find | |
250 # the highest index into a block list for the first instance of each block. | |
251 # This is relatively expensive, but we're dealing with small numbers of | |
252 # blocks so it doesn't make a perceivable difference to user time. | |
253 for blocks in all_blocks.values(): | |
254 for block in blocks: | |
255 if not block: | |
256 continue | |
257 | |
258 index = blocks.index(block) | |
259 | |
260 if index > farthest_indexes[block]: | |
261 farthest_indexes[block] = index | |
262 | |
263 # Use the results of the above analysis to identify any blocks that can be | |
264 # shunted along to match the farthest index value. | |
265 for blocks in all_blocks.values(): | |
266 for index, block in enumerate(blocks): | |
267 if not block: | |
268 continue | |
269 | |
270 changed = False | |
271 # If the block has not already been subject to alignment (i.e. if the | |
272 # previous block is not empty) then insert empty blocks until the index | |
273 # matches the farthest index identified for that block. | |
274 if (index > 0) and blocks[index - 1]: | |
275 while(index < farthest_indexes[block]): | |
276 blocks.insert(index, '') | |
277 index += 1 | |
278 changed = True | |
279 | |
280 if changed: | |
281 # Bail out. We'll need to re-do the farthest block analysis now that | |
282 # we've inserted some blocks. | |
283 return True | |
284 | |
285 return False | |
286 | |
287 | |
288 def _get_block_infos(run_infos, test_path, args, common_prefix): # noqa | |
289 """ For each run line, run the tool with the specified args and collect the | |
290 output. We use the concept of 'blocks' for uniquing, where a block is | |
291 a series of lines of text with no more than one newline character between | |
292 each one. For example: | |
293 | |
294 This | |
295 is | |
296 one | |
297 block | |
298 | |
299 This is | |
300 another block | |
301 | |
302 This is yet another block | |
303 | |
304 We then build up a 'block_infos' structure containing a dict where the | |
305 text of each block is the key and a list of the sets of prefixes that may | |
306 generate that particular block. This then goes through a series of | |
307 transformations to minimise the amount of CHECK lines that need to be | |
308 written by taking advantage of common prefixes. | |
309 """ | |
310 | |
311 def _block_key(tool_args, prefixes): | |
312 """ Get a hashable key based on the current tool_args and prefixes. | |
313 """ | |
314 return ' '.join([tool_args] + prefixes) | |
315 | |
316 all_blocks = {} | |
317 max_block_len = 0 | |
318 | |
319 # A cache of the furthest-back position in any block list of the first | |
320 # instance of each block, indexed by the block itself. | |
321 farthest_indexes = defaultdict(int) | |
322 | |
323 # Run the tool for each run line to generate all of the blocks. | |
324 for prefixes, tool_args in run_infos: | |
325 key = _block_key(tool_args, prefixes) | |
326 raw_tool_output = common.invoke_tool(args.llvm_mca_binary, | |
327 tool_args, | |
328 test_path) | |
329 | |
330 # Replace any lines consisting of purely whitespace with empty lines. | |
331 raw_tool_output = '\n'.join(line if line.strip() else '' | |
332 for line in raw_tool_output.splitlines()) | |
333 | |
334 # Split blocks, stripping all trailing whitespace, but keeping preceding | |
335 # whitespace except for newlines so that columns will line up visually. | |
336 all_blocks[key] = [b.lstrip('\n').rstrip() | |
337 for b in raw_tool_output.split('\n\n')] | |
338 max_block_len = max(max_block_len, len(all_blocks[key])) | |
339 | |
340 # Attempt to align matching blocks until no more changes can be made. | |
341 made_changes = True | |
342 while made_changes: | |
343 made_changes = _align_matching_blocks(all_blocks, farthest_indexes) | |
344 | |
345 # If necessary, pad the lists of blocks with empty blocks so that they are | |
346 # all the same length. | |
347 for key in all_blocks: | |
348 len_to_pad = max_block_len - len(all_blocks[key]) | |
349 all_blocks[key] += [''] * len_to_pad | |
350 | |
351 # Create the block_infos structure where it is a nested dict in the form of: | |
352 # block number -> block text -> list of prefix sets | |
353 block_infos = defaultdict(lambda: defaultdict(list)) | |
354 for prefixes, tool_args in run_infos: | |
355 key = _block_key(tool_args, prefixes) | |
356 for block_num, block_text in enumerate(all_blocks[key]): | |
357 block_infos[block_num][block_text].append(set(prefixes)) | |
358 | |
359 # Now go through the block_infos structure and attempt to smartly prune the | |
360 # number of prefixes per block to the minimal set possible to output. | |
361 for block_num in range(len(block_infos)): | |
362 # When there are multiple block texts for a block num, remove any | |
363 # prefixes that are common to more than one of them. | |
364 # E.g. [ [{ALL,FOO}] , [{ALL,BAR}] ] -> [ [{FOO}] , [{BAR}] ] | |
365 all_sets = [s for s in block_infos[block_num].values()] | |
366 pruned_sets = [] | |
367 | |
368 for i, setlist in enumerate(all_sets): | |
369 other_set_values = set([elem for j, setlist2 in enumerate(all_sets) | |
370 for set_ in setlist2 for elem in set_ | |
371 if i != j]) | |
372 pruned_sets.append([s - other_set_values for s in setlist]) | |
373 | |
374 for i, block_text in enumerate(block_infos[block_num]): | |
375 | |
376 # When a block text matches multiple sets of prefixes, try removing any | |
377 # prefixes that aren't common to all of them. | |
378 # E.g. [ {ALL,FOO} , {ALL,BAR} ] -> [{ALL}] | |
379 common_values = set.intersection(*pruned_sets[i]) | |
380 if common_values: | |
381 pruned_sets[i] = [common_values] | |
382 | |
383 # Everything should be uniqued as much as possible by now. Apply the | |
384 # newly pruned sets to the block_infos structure. | |
385 # If there are any blocks of text that still match multiple prefixes, | |
386 # output a warning. | |
387 current_set = set() | |
388 for s in pruned_sets[i]: | |
389 s = sorted(list(s)) | |
390 if s: | |
391 current_set.add(s[0]) | |
392 if len(s) > 1: | |
393 _warn('Multiple prefixes generating same output: {} ' | |
394 '(discarding {})'.format(','.join(s), ','.join(s[1:]))) | |
395 | |
396 if block_text and not current_set: | |
397 raise Error( | |
398 'block not captured by existing prefixes:\n\n{}'.format(block_text)) | |
399 block_infos[block_num][block_text] = sorted(list(current_set)) | |
400 | |
401 # If we have multiple block_texts, try to break them down further to avoid | |
402 # the case where we have very similar block_texts repeated after each | |
403 # other. | |
404 if common_prefix and len(block_infos[block_num]) > 1: | |
405 # We'll only attempt this if each of the block_texts have the same number | |
406 # of lines as each other. | |
407 same_num_Lines = (len(set(len(k.splitlines()) | |
408 for k in block_infos[block_num].keys())) == 1) | |
409 if same_num_Lines: | |
410 breakdown = _break_down_block(block_infos[block_num], common_prefix) | |
411 if breakdown: | |
412 block_infos[block_num] = breakdown | |
413 | |
414 return block_infos | |
415 | |
416 | |
417 def _write_block(output, block, not_prefix_set, common_prefix, prefix_pad): | |
418 """ Write an individual block, with correct padding on the prefixes. | |
419 Returns a set of all of the prefixes that it has written. | |
420 """ | |
421 end_prefix = ': ' | |
422 previous_prefix = None | |
423 num_lines_of_prefix = 0 | |
424 written_prefixes = set() | |
425 | |
426 for prefix, line in block: | |
427 if prefix in not_prefix_set: | |
428 _warn('not writing for prefix {0} due to presence of "{0}-NOT:" ' | |
429 'in input file.'.format(prefix)) | |
430 continue | |
431 | |
432 # If the previous line isn't already blank and we're writing more than one | |
433 # line for the current prefix output a blank line first, unless either the | |
434 # current of previous prefix is common to all. | |
435 num_lines_of_prefix += 1 | |
436 if prefix != previous_prefix: | |
437 if output and output[-1]: | |
438 if num_lines_of_prefix > 1 or any(p == common_prefix | |
439 for p in (prefix, previous_prefix)): | |
440 output.append('') | |
441 num_lines_of_prefix = 0 | |
442 previous_prefix = prefix | |
443 | |
444 written_prefixes.add(prefix) | |
445 output.append( | |
446 '{} {}{}{} {}'.format(COMMENT_CHAR, | |
447 prefix, | |
448 end_prefix, | |
449 ' ' * (prefix_pad - len(prefix)), | |
450 line).rstrip()) | |
451 end_prefix = '-NEXT:' | |
452 | |
453 output.append('') | |
454 return written_prefixes | |
455 | |
456 | |
457 def _write_output(test_path, input_lines, prefix_list, block_infos, # noqa | |
458 args, common_prefix, prefix_pad): | |
459 prefix_set = set([prefix for prefixes, _ in prefix_list | |
460 for prefix in prefixes]) | |
461 not_prefix_set = set() | |
462 | |
463 output_lines = [] | |
464 for input_line in input_lines: | |
465 if input_line.startswith(ADVERT_PREFIX): | |
466 continue | |
467 | |
468 if input_line.startswith(COMMENT_CHAR): | |
469 m = common.CHECK_RE.match(input_line) | |
470 try: | |
471 prefix = m.group(1) | |
472 except AttributeError: | |
473 prefix = None | |
474 | |
475 if '{}-NOT:'.format(prefix) in input_line: | |
476 not_prefix_set.add(prefix) | |
477 | |
478 if prefix not in prefix_set or prefix in not_prefix_set: | |
479 output_lines.append(input_line) | |
480 continue | |
481 | |
482 if common.should_add_line_to_output(input_line, prefix_set): | |
483 # This input line of the function body will go as-is into the output. | |
484 # Except make leading whitespace uniform: 2 spaces. | |
485 input_line = common.SCRUB_LEADING_WHITESPACE_RE.sub(r' ', input_line) | |
486 | |
487 # Skip empty lines if the previous output line is also empty. | |
488 if input_line or output_lines[-1]: | |
489 output_lines.append(input_line) | |
490 else: | |
491 continue | |
492 | |
493 # Add a blank line before the new checks if required. | |
494 if len(output_lines) > 0 and output_lines[-1]: | |
495 output_lines.append('') | |
496 | |
497 output_check_lines = [] | |
498 used_prefixes = set() | |
499 for block_num in range(len(block_infos)): | |
500 if type(block_infos[block_num]) is list: | |
501 # The block is of the type output from _break_down_block(). | |
502 used_prefixes |= _write_block(output_check_lines, | |
503 block_infos[block_num], | |
504 not_prefix_set, | |
505 common_prefix, | |
506 prefix_pad) | |
507 else: | |
508 # _break_down_block() was unable to do do anything so output the block | |
509 # as-is. | |
510 | |
511 # Rather than writing out each block as soon we encounter it, save it | |
512 # indexed by prefix so that we can write all of the blocks out sorted by | |
513 # prefix at the end. | |
514 output_blocks = defaultdict(list) | |
515 | |
516 for block_text in sorted(block_infos[block_num]): | |
517 | |
518 if not block_text: | |
519 continue | |
520 | |
521 lines = block_text.split('\n') | |
522 for prefix in block_infos[block_num][block_text]: | |
523 assert prefix not in output_blocks | |
524 used_prefixes |= _write_block(output_blocks[prefix], | |
525 [(prefix, line) for line in lines], | |
526 not_prefix_set, | |
527 common_prefix, | |
528 prefix_pad) | |
529 | |
530 for prefix in sorted(output_blocks): | |
531 output_check_lines.extend(output_blocks[prefix]) | |
532 | |
533 unused_prefixes = (prefix_set - not_prefix_set) - used_prefixes | |
534 if unused_prefixes: | |
535 raise Error('unused prefixes: {}'.format(sorted(unused_prefixes))) | |
536 | |
537 if output_check_lines: | |
538 output_lines.insert(0, ADVERT) | |
539 output_lines.extend(output_check_lines) | |
540 | |
541 # The file should not end with two newlines. It creates unnecessary churn. | |
542 while len(output_lines) > 0 and output_lines[-1] == '': | |
543 output_lines.pop() | |
544 | |
545 if input_lines == output_lines: | |
546 sys.stderr.write(' [unchanged]\n') | |
547 return | |
548 sys.stderr.write(' [{} lines total]\n'.format(len(output_lines))) | |
549 | |
550 if args.verbose: | |
551 sys.stderr.write( | |
552 'Writing {} lines to {}...\n\n'.format(len(output_lines), test_path)) | |
553 | |
554 with open(test_path, 'wb') as f: | |
555 f.writelines(['{}\n'.format(l).encode('utf-8') for l in output_lines]) | |
556 | |
557 def main(): | |
558 args = _parse_args() | |
559 test_paths = [test for pattern in args.tests for test in glob.glob(pattern)] | |
560 for test_path in test_paths: | |
561 sys.stderr.write('Test: {}\n'.format(test_path)) | |
562 | |
563 # Call this per test. By default each warning will only be written once | |
564 # per source location. Reset the warning filter so that now each warning | |
565 # will be written once per source location per test. | |
566 _configure_warnings(args) | |
567 | |
568 if args.verbose: | |
569 sys.stderr.write( | |
570 'Scanning for RUN lines in test file: {}\n'.format(test_path)) | |
571 | |
572 if not os.path.isfile(test_path): | |
573 raise Error('could not find test file: {}'.format(test_path)) | |
574 | |
575 with open(test_path) as f: | |
576 input_lines = [l.rstrip() for l in f] | |
577 | |
578 run_lines = _find_run_lines(input_lines, args) | |
579 run_infos = _get_run_infos(run_lines, args) | |
580 common_prefix, prefix_pad = _get_useful_prefix_info(run_infos) | |
581 block_infos = _get_block_infos(run_infos, test_path, args, common_prefix) | |
582 _write_output(test_path, | |
583 input_lines, | |
584 run_infos, | |
585 block_infos, | |
586 args, | |
587 common_prefix, | |
588 prefix_pad) | |
589 | |
590 return 0 | |
591 | |
592 | |
593 if __name__ == '__main__': | |
594 try: | |
595 warnings.showwarning = _showwarning | |
596 sys.exit(main()) | |
597 except Error as e: | |
598 sys.stdout.write('error: {}\n'.format(e)) | |
599 sys.exit(1) |