Mercurial > hg > CbC > CbC_llvm
view clang/utils/analyzer/exploded-graph-rewriter.py @ 266:00f31e85ec16 default tip
Added tag current for changeset 31d058e83c98
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Sat, 14 Oct 2023 10:13:55 +0900 |
parents | 1f2b6ac9f198 |
children |
line wrap: on
line source
#!/usr/bin/env python # # ===- exploded-graph-rewriter.py - ExplodedGraph dump tool -----*- python -*--# # # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. # See https://llvm.org/LICENSE.txt for license information. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception # # ===-----------------------------------------------------------------------===# from __future__ import print_function import argparse import collections import difflib import json import logging import os import re # ===-----------------------------------------------------------------------===# # These data structures represent a deserialized ExplodedGraph. # ===-----------------------------------------------------------------------===# # A helper function for finding the difference between two dictionaries. def diff_dicts(curr, prev): removed = [k for k in prev if k not in curr or curr[k] != prev[k]] added = [k for k in curr if k not in prev or curr[k] != prev[k]] return (removed, added) # Represents any program state trait that is a dictionary of key-value pairs. class GenericMap: def __init__(self, items): self.generic_map = collections.OrderedDict(items) def diff(self, prev): return diff_dicts(self.generic_map, prev.generic_map) def is_different(self, prev): removed, added = self.diff(prev) return len(removed) != 0 or len(added) != 0 # A deserialized source location. class SourceLocation: def __init__(self, json_loc): logging.debug("json: %s" % json_loc) self.line = json_loc["line"] self.col = json_loc["column"] self.filename = ( os.path.basename(json_loc["file"]) if "file" in json_loc else "(main file)" ) self.spelling = ( SourceLocation(json_loc["spelling"]) if "spelling" in json_loc else None ) def is_macro(self): return self.spelling is not None # A deserialized program point. class ProgramPoint: def __init__(self, json_pp): self.kind = json_pp["kind"] self.tag = json_pp["tag"] self.node_id = json_pp["node_id"] self.is_sink = bool(json_pp["is_sink"]) self.has_report = bool(json_pp["has_report"]) if self.kind == "Edge": self.src_id = json_pp["src_id"] self.dst_id = json_pp["dst_id"] elif self.kind == "Statement": logging.debug(json_pp) self.stmt_kind = json_pp["stmt_kind"] self.cast_kind = json_pp["cast_kind"] if "cast_kind" in json_pp else None self.stmt_point_kind = json_pp["stmt_point_kind"] self.stmt_id = json_pp["stmt_id"] self.pointer = json_pp["pointer"] self.pretty = json_pp["pretty"] self.loc = ( SourceLocation(json_pp["location"]) if json_pp["location"] is not None else None ) elif self.kind == "BlockEntrance": self.block_id = json_pp["block_id"] # A single expression acting as a key in a deserialized Environment. class EnvironmentBindingKey: def __init__(self, json_ek): # CXXCtorInitializer is not a Stmt! self.stmt_id = ( json_ek["stmt_id"] if "stmt_id" in json_ek else json_ek["init_id"] ) self.pretty = json_ek["pretty"] self.kind = json_ek["kind"] if "kind" in json_ek else None def _key(self): return self.stmt_id def __eq__(self, other): return self._key() == other._key() def __hash__(self): return hash(self._key()) # Deserialized description of a location context. class LocationContext: def __init__(self, json_frame): self.lctx_id = json_frame["lctx_id"] self.caption = json_frame["location_context"] self.decl = json_frame["calling"] self.loc = ( SourceLocation(json_frame["location"]) if json_frame["location"] is not None else None ) def _key(self): return self.lctx_id def __eq__(self, other): return self._key() == other._key() def __hash__(self): return hash(self._key()) # A group of deserialized Environment bindings that correspond to a specific # location context. class EnvironmentFrame: def __init__(self, json_frame): self.location_context = LocationContext(json_frame) self.bindings = collections.OrderedDict( [(EnvironmentBindingKey(b), b["value"]) for b in json_frame["items"]] if json_frame["items"] is not None else [] ) def diff_bindings(self, prev): return diff_dicts(self.bindings, prev.bindings) def is_different(self, prev): removed, added = self.diff_bindings(prev) return len(removed) != 0 or len(added) != 0 # A deserialized Environment. This class can also hold other entities that # are similar to Environment, such as Objects Under Construction or # Indices Of Elements Under Construction. class GenericEnvironment: def __init__(self, json_e): self.frames = [EnvironmentFrame(f) for f in json_e] def diff_frames(self, prev): # TODO: It's difficult to display a good diff when frame numbers shift. if len(self.frames) != len(prev.frames): return None updated = [] for i in range(len(self.frames)): f = self.frames[i] prev_f = prev.frames[i] if f.location_context == prev_f.location_context: if f.is_different(prev_f): updated.append(i) else: # We have the whole frame replaced with another frame. # TODO: Produce a nice diff. return None # TODO: Add support for added/removed. return updated def is_different(self, prev): updated = self.diff_frames(prev) return updated is None or len(updated) > 0 # A single binding key in a deserialized RegionStore cluster. class StoreBindingKey: def __init__(self, json_sk): self.kind = json_sk["kind"] self.offset = json_sk["offset"] def _key(self): return (self.kind, self.offset) def __eq__(self, other): return self._key() == other._key() def __hash__(self): return hash(self._key()) # A single cluster of the deserialized RegionStore. class StoreCluster: def __init__(self, json_sc): self.base_region = json_sc["cluster"] self.bindings = collections.OrderedDict( [(StoreBindingKey(b), b["value"]) for b in json_sc["items"]] ) def diff_bindings(self, prev): return diff_dicts(self.bindings, prev.bindings) def is_different(self, prev): removed, added = self.diff_bindings(prev) return len(removed) != 0 or len(added) != 0 # A deserialized RegionStore. class Store: def __init__(self, json_s): self.ptr = json_s["pointer"] self.clusters = collections.OrderedDict( [(c["pointer"], StoreCluster(c)) for c in json_s["items"]] ) def diff_clusters(self, prev): removed = [k for k in prev.clusters if k not in self.clusters] added = [k for k in self.clusters if k not in prev.clusters] updated = [ k for k in prev.clusters if k in self.clusters and prev.clusters[k].is_different(self.clusters[k]) ] return (removed, added, updated) def is_different(self, prev): removed, added, updated = self.diff_clusters(prev) return len(removed) != 0 or len(added) != 0 or len(updated) != 0 # Deserialized messages from a single checker in a single program state. # Basically a list of raw strings. class CheckerLines: def __init__(self, json_lines): self.lines = json_lines def diff_lines(self, prev): lines = difflib.ndiff(prev.lines, self.lines) return [l.strip() for l in lines if l.startswith("+") or l.startswith("-")] def is_different(self, prev): return len(self.diff_lines(prev)) > 0 # Deserialized messages of all checkers, separated by checker. class CheckerMessages: def __init__(self, json_m): self.items = collections.OrderedDict( [(m["checker"], CheckerLines(m["messages"])) for m in json_m] ) def diff_messages(self, prev): removed = [k for k in prev.items if k not in self.items] added = [k for k in self.items if k not in prev.items] updated = [ k for k in prev.items if k in self.items and prev.items[k].is_different(self.items[k]) ] return (removed, added, updated) def is_different(self, prev): removed, added, updated = self.diff_messages(prev) return len(removed) != 0 or len(added) != 0 or len(updated) != 0 # A deserialized program state. class ProgramState: def __init__(self, state_id, json_ps): logging.debug("Adding ProgramState " + str(state_id)) store_key = "store" env_key = "environment" constraints_key = "constraints" dyn_ty_key = "dynamic_types" ctor_key = "constructing_objects" ind_key = "index_of_element" init_loop_key = "pending_init_loops" dtor_key = "pending_destructors" msg_key = "checker_messages" if json_ps is None: json_ps = { store_key: None, env_key: None, constraints_key: None, dyn_ty_key: None, ctor_key: None, ind_key: None, init_loop_key: None, dtor_key: None, msg_key: None, } self.state_id = state_id self.store = ( Store(json_ps[store_key]) if json_ps[store_key] is not None else None ) self.environment = ( GenericEnvironment(json_ps[env_key]["items"]) if json_ps[env_key] is not None else None ) self.constraints = ( GenericMap([(c["symbol"], c["range"]) for c in json_ps[constraints_key]]) if json_ps[constraints_key] is not None else None ) self.dynamic_types = ( GenericMap( [ ( t["region"], "%s%s" % ( t["dyn_type"], " (or a sub-class)" if t["sub_classable"] else "", ), ) for t in json_ps[dyn_ty_key] ] ) if json_ps[dyn_ty_key] is not None else None ) self.checker_messages = ( CheckerMessages(json_ps[msg_key]) if json_ps[msg_key] is not None else None ) # State traits # # For traits we always check if a key exists because if a trait # has no imformation, nothing will be printed in the .dot file # we parse. self.constructing_objects = ( GenericEnvironment(json_ps[ctor_key]) if ctor_key in json_ps and json_ps[ctor_key] is not None else None ) self.index_of_element = ( GenericEnvironment(json_ps[ind_key]) if ind_key in json_ps and json_ps[ind_key] is not None else None ) self.pending_init_loops = ( GenericEnvironment(json_ps[init_loop_key]) if init_loop_key in json_ps and json_ps[init_loop_key] is not None else None ) self.pending_destructors = ( GenericEnvironment(json_ps[dtor_key]) if dtor_key in json_ps and json_ps[dtor_key] is not None else None ) # A deserialized exploded graph node. Has a default constructor because it # may be referenced as part of an edge before its contents are deserialized, # and in this moment we already need a room for predecessors and successors. class ExplodedNode: def __init__(self): self.predecessors = [] self.successors = [] def construct(self, node_id, json_node): logging.debug("Adding " + node_id) self.ptr = node_id[4:] self.points = [ProgramPoint(p) for p in json_node["program_points"]] self.node_id = self.points[-1].node_id self.state = ProgramState( json_node["state_id"], json_node["program_state"] if json_node["program_state"] is not None else None, ) assert self.node_name() == node_id def node_name(self): return "Node" + self.ptr # A deserialized ExplodedGraph. Constructed by consuming a .dot file # line-by-line. class ExplodedGraph: # Parse .dot files with regular expressions. node_re = re.compile( '^(Node0x[0-9a-f]*) \\[shape=record,.*label="{(.*)\\\\l}"\\];$' ) edge_re = re.compile("^(Node0x[0-9a-f]*) -> (Node0x[0-9a-f]*);$") def __init__(self): self.nodes = collections.defaultdict(ExplodedNode) self.root_id = None self.incomplete_line = "" def add_raw_line(self, raw_line): if raw_line.startswith("//"): return # Allow line breaks by waiting for ';'. This is not valid in # a .dot file, but it is useful for writing tests. if len(raw_line) > 0 and raw_line[-1] != ";": self.incomplete_line += raw_line return raw_line = self.incomplete_line + raw_line self.incomplete_line = "" # Apply regexps one by one to see if it's a node or an edge # and extract contents if necessary. logging.debug("Line: " + raw_line) result = self.edge_re.match(raw_line) if result is not None: logging.debug("Classified as edge line.") pred = result.group(1) succ = result.group(2) self.nodes[pred].successors.append(succ) self.nodes[succ].predecessors.append(pred) return result = self.node_re.match(raw_line) if result is not None: logging.debug("Classified as node line.") node_id = result.group(1) if len(self.nodes) == 0: self.root_id = node_id # Note: when writing tests you don't need to escape everything, # even though in a valid dot file everything is escaped. node_label = ( result.group(2) .replace(" ", "") .replace('\\"', '"') .replace("\\{", "{") .replace("\\}", "}") .replace("\\\\", "\\") .replace("\\|", "|") .replace("\\<", "\\\\<") .replace("\\>", "\\\\>") .rstrip(",") ) # Handle `\l` separately because a string literal can be in code # like "string\\literal" with the `\l` inside. # Also on Windows macros __FILE__ produces specific delimiters `\` # and a directory or file may starts with the letter `l`. # Find all `\l` (like `,\l`, `}\l`, `[\l`) except `\\l`, # because the literal as a rule contains multiple `\` before `\l`. node_label = re.sub(r"(?<!\\)\\l", "", node_label) logging.debug(node_label) json_node = json.loads(node_label) self.nodes[node_id].construct(node_id, json_node) return logging.debug("Skipping.") # ===-----------------------------------------------------------------------===# # Visitors traverse a deserialized ExplodedGraph and do different things # with every node and edge. # ===-----------------------------------------------------------------------===# # A visitor that dumps the ExplodedGraph into a DOT file with fancy HTML-based # syntax highlighing. class DotDumpVisitor: def __init__(self, do_diffs, dark_mode, gray_mode, topo_mode, dump_dot_only): self._do_diffs = do_diffs self._dark_mode = dark_mode self._gray_mode = gray_mode self._topo_mode = topo_mode self._dump_dot_only = dump_dot_only self._output = [] def _dump_raw(self, s): if self._dump_dot_only: print(s, end="") else: self._output.append(s) def output(self): assert not self._dump_dot_only return "".join(self._output) def _dump(self, s): s = ( s.replace("&", "&") .replace("{", "\\{") .replace("}", "\\}") .replace("\\<", "<") .replace("\\>", ">") .replace("|", "\\|") ) s = re.sub(r"(?<!\\)\\l", "<br />", s) if self._gray_mode: s = re.sub(r'<font color="[a-z0-9]*">', "", s) s = re.sub(r"</font>", "", s) self._dump_raw(s) @staticmethod def _diff_plus_minus(is_added): if is_added is None: return "" if is_added: return '<font color="forestgreen">+</font>' return '<font color="red">-</font>' @staticmethod def _short_pretty(s): if s is None: return None if len(s) < 20: return s left = s.find("{") right = s.rfind("}") if left == -1 or right == -1 or left >= right: return s candidate = s[0 : left + 1] + " ... " + s[right:] if len(candidate) >= len(s): return s return candidate @staticmethod def _make_sloc(loc): if loc is None: return "<i>Invalid Source Location</i>" def make_plain_loc(loc): return "%s:<b>%s</b>:<b>%s</b>" % (loc.filename, loc.line, loc.col) if loc.is_macro(): return '%s <font color="royalblue1">' "(<i>spelling at </i> %s)</font>" % ( make_plain_loc(loc), make_plain_loc(loc.spelling), ) return make_plain_loc(loc) def visit_begin_graph(self, graph): self._graph = graph self._dump_raw('digraph "ExplodedGraph" {\n') if self._dark_mode: self._dump_raw('bgcolor="gray10";\n') self._dump_raw('label="";\n') def visit_program_point(self, p): if p.kind in ["Edge", "BlockEntrance", "BlockExit"]: color = "gold3" elif p.kind in ["PreStmtPurgeDeadSymbols", "PostStmtPurgeDeadSymbols"]: color = "red" elif p.kind in ["CallEnter", "CallExitBegin", "CallExitEnd"]: color = "dodgerblue" if self._dark_mode else "blue" elif p.kind in ["Statement"]: color = "cyan4" else: color = "forestgreen" self._dump('<tr><td align="left">%s.</td>' % p.node_id) if p.kind == "Statement": # This avoids pretty-printing huge statements such as CompoundStmt. # Such statements show up only at [Pre|Post]StmtPurgeDeadSymbols skip_pretty = "PurgeDeadSymbols" in p.stmt_point_kind stmt_color = "cyan3" self._dump( '<td align="left" width="0">%s:</td>' '<td align="left" width="0"><font color="%s">' "%s</font> </td>" '<td align="left"><i>S%s</i></td>' '<td align="left"><font color="%s">%s</font></td>' '<td align="left">%s</td></tr>' % ( self._make_sloc(p.loc), color, "%s (%s)" % (p.stmt_kind, p.cast_kind) if p.cast_kind is not None else p.stmt_kind, p.stmt_id, stmt_color, p.stmt_point_kind, self._short_pretty(p.pretty) if not skip_pretty else "", ) ) elif p.kind == "Edge": self._dump( '<td width="0"></td>' '<td align="left" width="0">' '<font color="%s">%s</font></td><td align="left">' "[B%d] -\\> [B%d]</td></tr>" % (color, "BlockEdge", p.src_id, p.dst_id) ) elif p.kind == "BlockEntrance": self._dump( '<td width="0"></td>' '<td align="left" width="0">' '<font color="%s">%s</font></td>' '<td align="left">[B%d]</td></tr>' % (color, p.kind, p.block_id) ) else: # TODO: Print more stuff for other kinds of points. self._dump( '<td width="0"></td>' '<td align="left" width="0" colspan="2">' '<font color="%s">%s</font></td></tr>' % (color, p.kind) ) if p.tag is not None: self._dump( '<tr><td width="0"></td><td width="0"></td>' '<td colspan="3" align="left">' '<b>Tag: </b> <font color="crimson">' "%s</font></td></tr>" % p.tag ) if p.has_report: self._dump( '<tr><td width="0"></td><td width="0"></td>' '<td colspan="3" align="left">' '<font color="red"><b>Bug Report Attached' "</b></font></td></tr>" ) if p.is_sink: self._dump( '<tr><td width="0"></td><td width="0"></td>' '<td colspan="3" align="left">' '<font color="cornflowerblue"><b>Sink Node' "</b></font></td></tr>" ) def visit_environment(self, e, prev_e=None): self._dump('<table border="0">') def dump_location_context(lc, is_added=None): self._dump( "<tr><td>%s</td>" '<td align="left"><b>%s</b></td>' '<td align="left" colspan="2">' '<font color="gray60">%s </font>' "%s</td></tr>" % ( self._diff_plus_minus(is_added), lc.caption, lc.decl, ("(%s)" % self._make_sloc(lc.loc)) if lc.loc is not None else "", ) ) def dump_binding(f, b, is_added=None): self._dump( "<tr><td>%s</td>" '<td align="left"><i>S%s</i></td>' "%s" '<td align="left">%s</td>' '<td align="left">%s</td></tr>' % ( self._diff_plus_minus(is_added), b.stmt_id, '<td align="left"><font color="%s"><i>' "%s</i></font></td>" % ( "lavender" if self._dark_mode else "darkgreen", ("(%s)" % b.kind) if b.kind is not None else " ", ), self._short_pretty(b.pretty), f.bindings[b], ) ) frames_updated = e.diff_frames(prev_e) if prev_e is not None else None if frames_updated: for i in frames_updated: f = e.frames[i] prev_f = prev_e.frames[i] dump_location_context(f.location_context) bindings_removed, bindings_added = f.diff_bindings(prev_f) for b in bindings_removed: dump_binding(prev_f, b, False) for b in bindings_added: dump_binding(f, b, True) else: for f in e.frames: dump_location_context(f.location_context) for b in f.bindings: dump_binding(f, b) self._dump("</table>") def visit_environment_in_state(self, selector, title, s, prev_s=None): e = getattr(s, selector) prev_e = getattr(prev_s, selector) if prev_s is not None else None if e is None and prev_e is None: return self._dump('<hr /><tr><td align="left"><b>%s: </b>' % title) if e is None: self._dump("<i> Nothing!</i>") else: if prev_e is not None: if e.is_different(prev_e): self._dump('</td></tr><tr><td align="left">') self.visit_environment(e, prev_e) else: self._dump("<i> No changes!</i>") else: self._dump('</td></tr><tr><td align="left">') self.visit_environment(e) self._dump("</td></tr>") def visit_store(self, s, prev_s=None): self._dump('<table border="0">') def dump_binding(s, c, b, is_added=None): self._dump( "<tr><td>%s</td>" '<td align="left">%s</td>' '<td align="left">%s</td>' '<td align="left">%s</td>' '<td align="left">%s</td></tr>' % ( self._diff_plus_minus(is_added), s.clusters[c].base_region, b.offset, "(<i>Default</i>)" if b.kind == "Default" else "", s.clusters[c].bindings[b], ) ) if prev_s is not None: clusters_removed, clusters_added, clusters_updated = s.diff_clusters(prev_s) for c in clusters_removed: for b in prev_s.clusters[c].bindings: dump_binding(prev_s, c, b, False) for c in clusters_updated: bindings_removed, bindings_added = s.clusters[c].diff_bindings( prev_s.clusters[c] ) for b in bindings_removed: dump_binding(prev_s, c, b, False) for b in bindings_added: dump_binding(s, c, b, True) for c in clusters_added: for b in s.clusters[c].bindings: dump_binding(s, c, b, True) else: for c in s.clusters: for b in s.clusters[c].bindings: dump_binding(s, c, b) self._dump("</table>") def visit_store_in_state(self, s, prev_s=None): st = s.store prev_st = prev_s.store if prev_s is not None else None if st is None and prev_st is None: return self._dump('<hr /><tr><td align="left"><b>Store: </b>') if st is None: self._dump("<i> Nothing!</i>") else: if self._dark_mode: self._dump(' <font color="gray30">(%s)</font>' % st.ptr) else: self._dump(' <font color="gray">(%s)</font>' % st.ptr) if prev_st is not None: if s.store.is_different(prev_st): self._dump('</td></tr><tr><td align="left">') self.visit_store(st, prev_st) else: self._dump("<i> No changes!</i>") else: self._dump('</td></tr><tr><td align="left">') self.visit_store(st) self._dump("</td></tr>") def visit_generic_map(self, m, prev_m=None): self._dump('<table border="0">') def dump_pair(m, k, is_added=None): self._dump( "<tr><td>%s</td>" '<td align="left">%s</td>' '<td align="left">%s</td></tr>' % (self._diff_plus_minus(is_added), k, m.generic_map[k]) ) if prev_m is not None: removed, added = m.diff(prev_m) for k in removed: dump_pair(prev_m, k, False) for k in added: dump_pair(m, k, True) else: for k in m.generic_map: dump_pair(m, k, None) self._dump("</table>") def visit_generic_map_in_state(self, selector, title, s, prev_s=None): m = getattr(s, selector) prev_m = getattr(prev_s, selector) if prev_s is not None else None if m is None and prev_m is None: return self._dump("<hr />") self._dump('<tr><td align="left">' "<b>%s: </b>" % title) if m is None: self._dump("<i> Nothing!</i>") else: if prev_m is not None: if m.is_different(prev_m): self._dump('</td></tr><tr><td align="left">') self.visit_generic_map(m, prev_m) else: self._dump("<i> No changes!</i>") else: self._dump('</td></tr><tr><td align="left">') self.visit_generic_map(m) self._dump("</td></tr>") def visit_checker_messages(self, m, prev_m=None): self._dump('<table border="0">') def dump_line(l, is_added=None): self._dump( "<tr><td>%s</td>" '<td align="left">%s</td></tr>' % (self._diff_plus_minus(is_added), l) ) def dump_chk(chk, is_added=None): dump_line("<i>%s</i>:" % chk, is_added) if prev_m is not None: removed, added, updated = m.diff_messages(prev_m) for chk in removed: dump_chk(chk, False) for l in prev_m.items[chk].lines: dump_line(l, False) for chk in updated: dump_chk(chk) for l in m.items[chk].diff_lines(prev_m.items[chk]): dump_line(l[1:], l.startswith("+")) for chk in added: dump_chk(chk, True) for l in m.items[chk].lines: dump_line(l, True) else: for chk in m.items: dump_chk(chk) for l in m.items[chk].lines: dump_line(l) self._dump("</table>") def visit_checker_messages_in_state(self, s, prev_s=None): m = s.checker_messages prev_m = prev_s.checker_messages if prev_s is not None else None if m is None and prev_m is None: return self._dump("<hr />") self._dump('<tr><td align="left">' "<b>Checker State: </b>") if m is None: self._dump("<i> Nothing!</i>") else: if prev_m is not None: if m.is_different(prev_m): self._dump('</td></tr><tr><td align="left">') self.visit_checker_messages(m, prev_m) else: self._dump("<i> No changes!</i>") else: self._dump('</td></tr><tr><td align="left">') self.visit_checker_messages(m) self._dump("</td></tr>") def visit_state(self, s, prev_s): self.visit_store_in_state(s, prev_s) self.visit_environment_in_state("environment", "Expressions", s, prev_s) self.visit_generic_map_in_state("constraints", "Ranges", s, prev_s) self.visit_generic_map_in_state("dynamic_types", "Dynamic Types", s, prev_s) self.visit_environment_in_state( "constructing_objects", "Objects Under Construction", s, prev_s ) self.visit_environment_in_state( "index_of_element", "Indices Of Elements Under Construction", s, prev_s ) self.visit_environment_in_state( "pending_init_loops", "Pending Array Init Loop Expressions", s, prev_s ) self.visit_environment_in_state( "pending_destructors", "Indices of Elements Under Destruction", s, prev_s ) self.visit_checker_messages_in_state(s, prev_s) def visit_node(self, node): self._dump("%s [shape=record," % (node.node_name())) if self._dark_mode: self._dump('color="white",fontcolor="gray80",') self._dump('label=<<table border="0">') self._dump( '<tr><td bgcolor="%s"><b>State %s</b></td></tr>' % ( "gray20" if self._dark_mode else "gray70", node.state.state_id if node.state is not None else "Unspecified", ) ) if not self._topo_mode: self._dump('<tr><td align="left" width="0">') if len(node.points) > 1: self._dump("<b>Program points:</b></td></tr>") else: self._dump("<b>Program point:</b></td></tr>") self._dump( '<tr><td align="left" width="0">' '<table border="0" align="left" width="0">' ) for p in node.points: self.visit_program_point(p) self._dump("</table></td></tr>") if node.state is not None and not self._topo_mode: prev_s = None # Do diffs only when we have a unique predecessor. # Don't do diffs on the leaf nodes because they're # the important ones. if ( self._do_diffs and len(node.predecessors) == 1 and len(node.successors) > 0 ): prev_s = self._graph.nodes[node.predecessors[0]].state self.visit_state(node.state, prev_s) self._dump_raw("</table>>];\n") def visit_edge(self, pred, succ): self._dump_raw( "%s -> %s%s;\n" % ( pred.node_name(), succ.node_name(), ' [color="white"]' if self._dark_mode else "", ) ) def visit_end_of_graph(self): self._dump_raw("}\n") if not self._dump_dot_only: import sys import tempfile def write_temp_file(suffix, prefix, data): fd, filename = tempfile.mkstemp(suffix, prefix, ".", True) print('Writing "%s"...' % filename) with os.fdopen(fd, "w") as fp: fp.write(data) print("Done! Please remember to remove the file.") return filename try: import graphviz except ImportError: # The fallback behavior if graphviz is not installed! print("Python graphviz not found. Please invoke") print(" $ pip install graphviz") print("in order to enable automatic conversion to HTML.") print() print("You may also convert DOT to SVG manually via") print(" $ dot -Tsvg input.dot -o output.svg") print() write_temp_file(".dot", "egraph-", self.output()) return svg = graphviz.pipe("dot", "svg", self.output().encode()).decode() filename = write_temp_file( ".html", "egraph-", '<html><body bgcolor="%s">%s</body></html>' % ("#1a1a1a" if self._dark_mode else "white", svg), ) if sys.platform == "win32": os.startfile(filename) elif sys.platform == "darwin": os.system('open "%s"' % filename) else: os.system('xdg-open "%s"' % filename) # ===-----------------------------------------------------------------------===# # Explorers know how to traverse the ExplodedGraph in a certain order. # They would invoke a Visitor on every node or edge they encounter. # ===-----------------------------------------------------------------------===# # BasicExplorer explores the whole graph in no particular order. class BasicExplorer: def explore(self, graph, visitor): visitor.visit_begin_graph(graph) for node in sorted(graph.nodes): logging.debug("Visiting " + node) visitor.visit_node(graph.nodes[node]) for succ in sorted(graph.nodes[node].successors): logging.debug("Visiting edge: %s -> %s " % (node, succ)) visitor.visit_edge(graph.nodes[node], graph.nodes[succ]) visitor.visit_end_of_graph() # ===-----------------------------------------------------------------------===# # Trimmers cut out parts of the ExplodedGraph so that to focus on other parts. # Trimmers can be combined together by applying them sequentially. # ===-----------------------------------------------------------------------===# # SinglePathTrimmer keeps only a single path - the leftmost path from the root. # Useful when the trimmed graph is still too large. class SinglePathTrimmer: def trim(self, graph): visited_nodes = set() node_id = graph.root_id while True: visited_nodes.add(node_id) node = graph.nodes[node_id] if len(node.successors) > 0: succ_id = node.successors[0] succ = graph.nodes[succ_id] node.successors = [succ_id] succ.predecessors = [node_id] if succ_id in visited_nodes: break node_id = succ_id else: break graph.nodes = {node_id: graph.nodes[node_id] for node_id in visited_nodes} # TargetedTrimmer keeps paths that lead to specific nodes and discards all # other paths. Useful when you cannot use -trim-egraph (e.g. when debugging # a crash). class TargetedTrimmer: def __init__(self, target_nodes): self._target_nodes = target_nodes @staticmethod def parse_target_node(node, graph): if node.startswith("0x"): ret = "Node" + node assert ret in graph.nodes return ret else: for other_id in graph.nodes: other = graph.nodes[other_id] if other.node_id == int(node): return other_id @staticmethod def parse_target_nodes(target_nodes, graph): return [ TargetedTrimmer.parse_target_node(node, graph) for node in target_nodes.split(",") ] def trim(self, graph): queue = self._target_nodes visited_nodes = set() while len(queue) > 0: node_id = queue.pop() visited_nodes.add(node_id) node = graph.nodes[node_id] for pred_id in node.predecessors: if pred_id not in visited_nodes: queue.append(pred_id) graph.nodes = {node_id: graph.nodes[node_id] for node_id in visited_nodes} for node_id in graph.nodes: node = graph.nodes[node_id] node.successors = [ succ_id for succ_id in node.successors if succ_id in visited_nodes ] node.predecessors = [ succ_id for succ_id in node.predecessors if succ_id in visited_nodes ] # ===-----------------------------------------------------------------------===# # The entry point to the script. # ===-----------------------------------------------------------------------===# def main(): parser = argparse.ArgumentParser( description="Display and manipulate Exploded Graph dumps." ) parser.add_argument( "filename", type=str, help="the .dot file produced by the Static Analyzer" ) parser.add_argument( "-v", "--verbose", action="store_const", dest="loglevel", const=logging.DEBUG, default=logging.WARNING, help="enable info prints", ) parser.add_argument( "-d", "--diff", action="store_const", dest="diff", const=True, default=False, help="display differences between states", ) parser.add_argument( "-t", "--topology", action="store_const", dest="topology", const=True, default=False, help="only display program points, omit states", ) parser.add_argument( "-s", "--single-path", action="store_const", dest="single_path", const=True, default=False, help="only display the leftmost path in the graph " "(useful for trimmed graphs that still " "branch too much)", ) parser.add_argument( "--to", type=str, default=None, help="only display execution paths from the root " "to the given comma-separated list of nodes " "identified by a pointer or a stable ID; " "compatible with --single-path", ) parser.add_argument( "--dark", action="store_const", dest="dark", const=True, default=False, help="dark mode", ) parser.add_argument( "--gray", action="store_const", dest="gray", const=True, default=False, help="black-and-white mode", ) parser.add_argument( "--dump-dot-only", action="store_const", dest="dump_dot_only", const=True, default=False, help="instead of writing an HTML file and immediately " "displaying it, dump the rewritten dot file " "to stdout", ) args = parser.parse_args() logging.basicConfig(level=args.loglevel) graph = ExplodedGraph() with open(args.filename) as fd: for raw_line in fd: raw_line = raw_line.strip() graph.add_raw_line(raw_line) trimmers = [] if args.to is not None: trimmers.append( TargetedTrimmer(TargetedTrimmer.parse_target_nodes(args.to, graph)) ) if args.single_path: trimmers.append(SinglePathTrimmer()) explorer = BasicExplorer() visitor = DotDumpVisitor( args.diff, args.dark, args.gray, args.topology, args.dump_dot_only ) for trimmer in trimmers: trimmer.trim(graph) explorer.explore(graph, visitor) if __name__ == "__main__": main()