Mercurial > hg > CbC > CbC_llvm
view bolt/test/link_fdata.py @ 266:00f31e85ec16 default tip
Added tag current for changeset 31d058e83c98
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Sat, 14 Oct 2023 10:13:55 +0900 |
parents | 1f2b6ac9f198 |
children |
line wrap: on
line source
#!/usr/bin/env python3 """ This script reads the input from stdin, extracts all lines starting with "# FDATA: " (or a given prefix instead of "FDATA"), parses the directives, replaces symbol names ("#name#") with either symbol values or with offsets from respective anchor symbols, and prints the resulting file to stdout. """ import argparse import subprocess import sys import re parser = argparse.ArgumentParser() parser.add_argument("input") parser.add_argument("objfile", help="Object file to extract symbol values from") parser.add_argument("output") parser.add_argument("prefix", nargs="?", default="FDATA", help="Custom FDATA prefix") parser.add_argument("--nmtool", default="nm", help="Path to nm tool") parser.add_argument("--no-lbr", action="store_true") args = parser.parse_args() # Regexes to extract FDATA lines from input and parse FDATA and pre-aggregated # profile data prefix_pat = re.compile(f"^# {args.prefix}: (.*)") # FDATA records: # <is symbol?> <closest elf symbol or DSO name> <relative FROM address> # <is symbol?> <closest elf symbol or DSO name> <relative TO address> # <number of mispredictions> <number of branches> fdata_pat = re.compile(r"([01].*) (?P<exec>\d+) (?P<mispred>\d+)") # Pre-aggregated profile: # {B|F|f} [<start_id>:]<start_offset> [<end_id>:]<end_offset> <count> # [<mispred_count>] preagg_pat = re.compile(r"(?P<type>[BFf]) (?P<offsets_count>.*)") # No-LBR profile: # <is symbol?> <closest elf symbol or DSO name> <relative address> <count> nolbr_pat = re.compile(r"([01].*) (?P<count>\d+)") # Replacement symbol: #symname# replace_pat = re.compile(r"#(?P<symname>[^#]+)#") # Read input and construct the representation of fdata expressions # as (src_tuple, dst_tuple, mispred_count, exec_count) tuples, where src and dst # are represented as (is_sym, anchor, offset) tuples exprs = [] with open(args.input, "r") as f: for line in f.readlines(): prefix_match = prefix_pat.match(line) if not prefix_match: continue profile_line = prefix_match.group(1) fdata_match = fdata_pat.match(profile_line) preagg_match = preagg_pat.match(profile_line) nolbr_match = nolbr_pat.match(profile_line) if fdata_match: src_dst, execnt, mispred = fdata_match.groups() # Split by whitespaces not preceded by a backslash (negative lookbehind) chunks = re.split(r"(?<!\\) +", src_dst) # Check if the number of records separated by non-escaped whitespace # exactly matches the format. assert ( len(chunks) == 6 ), f"ERROR: wrong format/whitespaces must be escaped:\n{line}" exprs.append(("FDATA", (*chunks, execnt, mispred))) elif nolbr_match: loc, count = nolbr_match.groups() # Split by whitespaces not preceded by a backslash (negative lookbehind) chunks = re.split(r"(?<!\\) +", loc) # Check if the number of records separated by non-escaped whitespace # exactly matches the format. assert ( len(chunks) == 3 ), f"ERROR: wrong format/whitespaces must be escaped:\n{line}" exprs.append(("NOLBR", (*chunks, count))) elif preagg_match: exprs.append(("PREAGG", preagg_match.groups())) else: exit("ERROR: unexpected input:\n%s" % line) # Read nm output: <symbol value> <symbol type> <symbol name> nm_output = subprocess.run( [args.nmtool, "--defined-only", args.objfile], text=True, capture_output=True ).stdout # Populate symbol map symbols = {} for symline in nm_output.splitlines(): symval, _, symname = symline.split(maxsplit=2) symbols[symname] = symval def evaluate_symbol(issym, anchor, offsym): sym_match = replace_pat.match(offsym) if not sym_match: # No need to evaluate symbol value, return as is return f"{issym} {anchor} {offsym}" symname = sym_match.group("symname") assert symname in symbols, f"ERROR: symbol {symname} is not defined in binary" # Evaluate to an absolute offset if issym is false if issym == "0": return f"{issym} {anchor} {symbols[symname]}" # Evaluate symbol against its anchor if issym is true assert anchor in symbols, f"ERROR: symbol {anchor} is not defined in binary" anchor_value = int(symbols[anchor], 16) symbol_value = int(symbols[symname], 16) sym_offset = symbol_value - anchor_value return f'{issym} {anchor} {format(sym_offset, "x")}' def replace_symbol(matchobj): """ Expects matchobj to only capture one group which contains the symbol name. """ symname = matchobj.group("symname") assert symname in symbols, f"ERROR: symbol {symname} is not defined in binary" return symbols[symname] with open(args.output, "w", newline="\n") as f: if args.no_lbr: print("no_lbr", file=f) for etype, expr in exprs: if etype == "FDATA": issym1, anchor1, offsym1, issym2, anchor2, offsym2, execnt, mispred = expr print( evaluate_symbol(issym1, anchor1, offsym1), evaluate_symbol(issym2, anchor2, offsym2), execnt, mispred, file=f, ) elif etype == "NOLBR": issym, anchor, offsym, count = expr print(evaluate_symbol(issym, anchor, offsym), count, file=f) elif etype == "PREAGG": # Replace all symbols enclosed in ## print(expr[0], re.sub(replace_pat, replace_symbol, expr[1]), file=f) else: exit("ERROR: unhandled expression type:\n%s" % etype)