llvm-project/clang/utils/analyzer/exploded-graph-rewriter.py

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

1245 lines
42 KiB
Python
Raw Normal View History

#!/usr/bin/env python
#
# ===- exploded-graph-rewriter.py - ExplodedGraph dump tool -----*- python -*--#
#
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# ===-----------------------------------------------------------------------===#
from __future__ import print_function
import argparse
import collections
import difflib
import json
import logging
import os
import re
# ===-----------------------------------------------------------------------===#
# These data structures represent a deserialized ExplodedGraph.
# ===-----------------------------------------------------------------------===#
# A helper function for finding the difference between two dictionaries.
def diff_dicts(curr, prev):
removed = [k for k in prev if k not in curr or curr[k] != prev[k]]
added = [k for k in curr if k not in prev or curr[k] != prev[k]]
return (removed, added)
# Represents any program state trait that is a dictionary of key-value pairs.
class GenericMap:
def __init__(self, items):
self.generic_map = collections.OrderedDict(items)
def diff(self, prev):
return diff_dicts(self.generic_map, prev.generic_map)
def is_different(self, prev):
removed, added = self.diff(prev)
return len(removed) != 0 or len(added) != 0
# A deserialized source location.
class SourceLocation:
def __init__(self, json_loc):
logging.debug("json: %s" % json_loc)
self.line = json_loc["line"]
self.col = json_loc["column"]
self.filename = (
os.path.basename(json_loc["file"]) if "file" in json_loc else "(main file)"
)
self.spelling = (
SourceLocation(json_loc["spelling"]) if "spelling" in json_loc else None
)
def is_macro(self):
return self.spelling is not None
# A deserialized program point.
class ProgramPoint:
def __init__(self, json_pp):
self.kind = json_pp["kind"]
self.tag = json_pp["tag"]
self.node_id = json_pp["node_id"]
self.is_sink = bool(json_pp["is_sink"])
self.has_report = bool(json_pp["has_report"])
if self.kind == "Edge":
self.src_id = json_pp["src_id"]
self.dst_id = json_pp["dst_id"]
elif self.kind == "Statement":
logging.debug(json_pp)
self.stmt_kind = json_pp["stmt_kind"]
self.cast_kind = json_pp["cast_kind"] if "cast_kind" in json_pp else None
self.stmt_point_kind = json_pp["stmt_point_kind"]
self.stmt_id = json_pp["stmt_id"]
self.pointer = json_pp["pointer"]
self.pretty = json_pp["pretty"]
self.loc = (
SourceLocation(json_pp["location"])
if json_pp["location"] is not None
else None
)
elif self.kind == "BlockEntrance":
self.block_id = json_pp["block_id"]
# A single expression acting as a key in a deserialized Environment.
class EnvironmentBindingKey:
def __init__(self, json_ek):
# CXXCtorInitializer is not a Stmt!
self.stmt_id = (
json_ek["stmt_id"] if "stmt_id" in json_ek else json_ek["init_id"]
)
self.pretty = json_ek["pretty"]
self.kind = json_ek["kind"] if "kind" in json_ek else None
def _key(self):
return self.stmt_id
def __eq__(self, other):
return self._key() == other._key()
def __hash__(self):
return hash(self._key())
# Deserialized description of a location context.
class LocationContext:
def __init__(self, json_frame):
self.lctx_id = json_frame["lctx_id"]
self.caption = json_frame["location_context"]
self.decl = json_frame["calling"]
self.loc = (
SourceLocation(json_frame["location"])
if json_frame["location"] is not None
else None
)
def _key(self):
return self.lctx_id
def __eq__(self, other):
return self._key() == other._key()
def __hash__(self):
return hash(self._key())
# A group of deserialized Environment bindings that correspond to a specific
# location context.
class EnvironmentFrame:
def __init__(self, json_frame):
self.location_context = LocationContext(json_frame)
self.bindings = collections.OrderedDict(
[(EnvironmentBindingKey(b), b["value"]) for b in json_frame["items"]]
if json_frame["items"] is not None
else []
)
def diff_bindings(self, prev):
return diff_dicts(self.bindings, prev.bindings)
def is_different(self, prev):
removed, added = self.diff_bindings(prev)
return len(removed) != 0 or len(added) != 0
# A deserialized Environment. This class can also hold other entities that
# are similar to Environment, such as Objects Under Construction or
# Indices Of Elements Under Construction.
class GenericEnvironment:
def __init__(self, json_e):
self.frames = [EnvironmentFrame(f) for f in json_e]
def diff_frames(self, prev):
# TODO: It's difficult to display a good diff when frame numbers shift.
if len(self.frames) != len(prev.frames):
return None
updated = []
for i in range(len(self.frames)):
f = self.frames[i]
prev_f = prev.frames[i]
if f.location_context == prev_f.location_context:
if f.is_different(prev_f):
updated.append(i)
else:
# We have the whole frame replaced with another frame.
# TODO: Produce a nice diff.
return None
# TODO: Add support for added/removed.
return updated
def is_different(self, prev):
updated = self.diff_frames(prev)
return updated is None or len(updated) > 0
# A single binding key in a deserialized RegionStore cluster.
class StoreBindingKey:
def __init__(self, json_sk):
self.kind = json_sk["kind"]
self.offset = json_sk["offset"]
def _key(self):
return (self.kind, self.offset)
def __eq__(self, other):
return self._key() == other._key()
def __hash__(self):
return hash(self._key())
# A single cluster of the deserialized RegionStore.
class StoreCluster:
def __init__(self, json_sc):
self.base_region = json_sc["cluster"]
self.bindings = collections.OrderedDict(
[(StoreBindingKey(b), b["value"]) for b in json_sc["items"]]
)
def diff_bindings(self, prev):
return diff_dicts(self.bindings, prev.bindings)
def is_different(self, prev):
removed, added = self.diff_bindings(prev)
return len(removed) != 0 or len(added) != 0
# A deserialized RegionStore.
class Store:
def __init__(self, json_s):
self.ptr = json_s["pointer"]
self.clusters = collections.OrderedDict(
[(c["pointer"], StoreCluster(c)) for c in json_s["items"]]
)
def diff_clusters(self, prev):
removed = [k for k in prev.clusters if k not in self.clusters]
added = [k for k in self.clusters if k not in prev.clusters]
updated = [
k
for k in prev.clusters
if k in self.clusters and prev.clusters[k].is_different(self.clusters[k])
]
return (removed, added, updated)
def is_different(self, prev):
removed, added, updated = self.diff_clusters(prev)
return len(removed) != 0 or len(added) != 0 or len(updated) != 0
# Deserialized messages from a single checker in a single program state.
# Basically a list of raw strings.
class CheckerLines:
def __init__(self, json_lines):
self.lines = json_lines
def diff_lines(self, prev):
lines = difflib.ndiff(prev.lines, self.lines)
return [l.strip() for l in lines if l.startswith("+") or l.startswith("-")]
def is_different(self, prev):
return len(self.diff_lines(prev)) > 0
# Deserialized messages of all checkers, separated by checker.
class CheckerMessages:
def __init__(self, json_m):
self.items = collections.OrderedDict(
[(m["checker"], CheckerLines(m["messages"])) for m in json_m]
)
def diff_messages(self, prev):
removed = [k for k in prev.items if k not in self.items]
added = [k for k in self.items if k not in prev.items]
updated = [
k
for k in prev.items
if k in self.items and prev.items[k].is_different(self.items[k])
]
return (removed, added, updated)
def is_different(self, prev):
removed, added, updated = self.diff_messages(prev)
return len(removed) != 0 or len(added) != 0 or len(updated) != 0
# A deserialized program state.
class ProgramState:
def __init__(self, state_id, json_ps):
logging.debug("Adding ProgramState " + str(state_id))
store_key = "store"
env_key = "environment"
constraints_key = "constraints"
dyn_ty_key = "dynamic_types"
ctor_key = "constructing_objects"
ind_key = "index_of_element"
init_loop_key = "pending_init_loops"
dtor_key = "pending_destructors"
msg_key = "checker_messages"
if json_ps is None:
json_ps = {
store_key: None,
env_key: None,
constraints_key: None,
dyn_ty_key: None,
ctor_key: None,
ind_key: None,
init_loop_key: None,
dtor_key: None,
msg_key: None,
}
self.state_id = state_id
self.store = (
Store(json_ps[store_key]) if json_ps[store_key] is not None else None
)
self.environment = (
GenericEnvironment(json_ps[env_key]["items"])
if json_ps[env_key] is not None
else None
)
self.constraints = (
GenericMap([(c["symbol"], c["range"]) for c in json_ps[constraints_key]])
if json_ps[constraints_key] is not None
else None
)
self.dynamic_types = (
GenericMap(
[
(
t["region"],
"%s%s"
% (
t["dyn_type"],
" (or a sub-class)" if t["sub_classable"] else "",
),
)
for t in json_ps[dyn_ty_key]
]
)
if json_ps[dyn_ty_key] is not None
else None
)
self.checker_messages = (
CheckerMessages(json_ps[msg_key]) if json_ps[msg_key] is not None else None
)
# State traits
#
# For traits we always check if a key exists because if a trait
# has no imformation, nothing will be printed in the .dot file
# we parse.
self.constructing_objects = (
GenericEnvironment(json_ps[ctor_key])
if ctor_key in json_ps and json_ps[ctor_key] is not None
else None
)
self.index_of_element = (
GenericEnvironment(json_ps[ind_key])
if ind_key in json_ps and json_ps[ind_key] is not None
else None
)
self.pending_init_loops = (
GenericEnvironment(json_ps[init_loop_key])
if init_loop_key in json_ps and json_ps[init_loop_key] is not None
else None
)
self.pending_destructors = (
GenericEnvironment(json_ps[dtor_key])
if dtor_key in json_ps and json_ps[dtor_key] is not None
else None
)
# A deserialized exploded graph node. Has a default constructor because it
# may be referenced as part of an edge before its contents are deserialized,
# and in this moment we already need a room for predecessors and successors.
class ExplodedNode:
def __init__(self):
self.predecessors = []
self.successors = []
def construct(self, node_id, json_node):
logging.debug("Adding " + node_id)
self.ptr = node_id[4:]
self.points = [ProgramPoint(p) for p in json_node["program_points"]]
self.node_id = self.points[-1].node_id
self.state = ProgramState(
json_node["state_id"],
json_node["program_state"]
if json_node["program_state"] is not None
else None,
)
assert self.node_name() == node_id
def node_name(self):
return "Node" + self.ptr
# A deserialized ExplodedGraph. Constructed by consuming a .dot file
# line-by-line.
class ExplodedGraph:
# Parse .dot files with regular expressions.
node_re = re.compile(
'^(Node0x[0-9a-f]*) \\[shape=record,.*label="{(.*)\\\\l}"\\];$'
)
edge_re = re.compile("^(Node0x[0-9a-f]*) -> (Node0x[0-9a-f]*);$")
def __init__(self):
self.nodes = collections.defaultdict(ExplodedNode)
self.root_id = None
self.incomplete_line = ""
def add_raw_line(self, raw_line):
if raw_line.startswith("//"):
return
# Allow line breaks by waiting for ';'. This is not valid in
# a .dot file, but it is useful for writing tests.
if len(raw_line) > 0 and raw_line[-1] != ";":
self.incomplete_line += raw_line
return
raw_line = self.incomplete_line + raw_line
self.incomplete_line = ""
# Apply regexps one by one to see if it's a node or an edge
# and extract contents if necessary.
logging.debug("Line: " + raw_line)
result = self.edge_re.match(raw_line)
if result is not None:
logging.debug("Classified as edge line.")
pred = result.group(1)
succ = result.group(2)
self.nodes[pred].successors.append(succ)
self.nodes[succ].predecessors.append(pred)
return
result = self.node_re.match(raw_line)
if result is not None:
logging.debug("Classified as node line.")
node_id = result.group(1)
if len(self.nodes) == 0:
self.root_id = node_id
# Note: when writing tests you don't need to escape everything,
# even though in a valid dot file everything is escaped.
node_label = (
result.group(2)
.replace(" ", "")
.replace('\\"', '"')
.replace("\\{", "{")
.replace("\\}", "}")
.replace("\\\\", "\\")
.replace("\\|", "|")
.replace("\\<", "\\\\<")
.replace("\\>", "\\\\>")
.rstrip(",")
)
# Handle `\l` separately because a string literal can be in code
# like "string\\literal" with the `\l` inside.
# Also on Windows macros __FILE__ produces specific delimiters `\`
# and a directory or file may starts with the letter `l`.
# Find all `\l` (like `,\l`, `}\l`, `[\l`) except `\\l`,
# because the literal as a rule contains multiple `\` before `\l`.
node_label = re.sub(r"(?<!\\)\\l", "", node_label)
logging.debug(node_label)
json_node = json.loads(node_label)
self.nodes[node_id].construct(node_id, json_node)
return
logging.debug("Skipping.")
# ===-----------------------------------------------------------------------===#
# Visitors traverse a deserialized ExplodedGraph and do different things
# with every node and edge.
# ===-----------------------------------------------------------------------===#
# A visitor that dumps the ExplodedGraph into a DOT file with fancy HTML-based
# syntax highlighing.
class DotDumpVisitor:
def __init__(
self, do_diffs, dark_mode, gray_mode, topo_mode, dump_html_only, dump_dot_only
):
assert not (dump_html_only and dump_dot_only), (
"Option dump_html_only and dump_dot_only are conflict, "
"they cannot be true at the same time."
)
self._do_diffs = do_diffs
self._dark_mode = dark_mode
self._gray_mode = gray_mode
self._topo_mode = topo_mode
self._dump_html_only = dump_html_only
self._dump_dot_only = dump_dot_only
self._output = []
def _dump_raw(self, s):
if self._dump_dot_only:
print(s, end="")
else:
self._output.append(s)
def output(self):
assert not self._dump_dot_only
[analyzer] exploded-graph-rewriter: Fix python3 string encoding issues This encapsulates 3 changes: - `DotDumpVisitor` now aggregates strings instead of *bytes* for both `python2` and `python3`. This difference caused crashes when it tried to write out the content as *strings*, similarly described at D71746. - `graphviz.pipe()` expects the input in *bytes* instead of unicode strings. And it results in *bytes*. Due to string concatenations and similar operations, I'm using unicode string as the default, and converting to *bytes* on demand. - `write_temp_file()` now appends the `egraph-` prefix and more importantly, it will create the temp file in the **current working directory** instead of in the *temp*. This change makes `Firefox` be able to open the file even if the `security.sandbox.content.level` is set to the (default) most restricting `4`. See https://support.mozilla.org/si/questions/1259285 An artifact of the bad byte handling was previously in the `HTML` produced by the script that it displayed the `b'` string at the top left corner. Now it won't anymore :) I've tested that the following command works on `Ubuntu 22.04`: ``` exploded-graph-rewriter my-egraph.dot ``` Both `python2` and `python3` works as expected. PS: I'm not adding tests, as the current test infra does not support testing HTML outputs for this script. Check the `clang/test/Analysis/exploded-graph-rewriter/lit.local.cfg`. We always pass the `--dump-dot-only` flag to the script. Along with that, the default invocation will not only create this HTML report but also try to open it. In addition to this, I'm not sure if the buildbots have `graphviz` installed and also if this package is installed on `pip`. Unless we change some of these, we cannot test this change. Given that D71746 had no tests, I'm not too worried about this either. Reviewed By: NoQ Differential Revision: https://reviews.llvm.org/D131553
2022-08-11 16:07:52 +02:00
return "".join(self._output)
def _dump(self, s):
s = (
s.replace("&", "&amp;")
.replace("{", "\\{")
.replace("}", "\\}")
.replace("\\<", "&lt;")
.replace("\\>", "&gt;")
.replace("|", "\\|")
)
s = re.sub(r"(?<!\\)\\l", "<br />", s)
if self._gray_mode:
s = re.sub(r'<font color="[a-z0-9]*">', "", s)
s = re.sub(r"</font>", "", s)
self._dump_raw(s)
@staticmethod
def _diff_plus_minus(is_added):
if is_added is None:
return ""
if is_added:
return '<font color="forestgreen">+</font>'
return '<font color="red">-</font>'
@staticmethod
def _short_pretty(s):
if s is None:
return None
if len(s) < 20:
return s
left = s.find("{")
right = s.rfind("}")
if left == -1 or right == -1 or left >= right:
return s
candidate = s[0 : left + 1] + " ... " + s[right:]
if len(candidate) >= len(s):
return s
return candidate
@staticmethod
def _make_sloc(loc):
if loc is None:
return "<i>Invalid Source Location</i>"
def make_plain_loc(loc):
return "%s:<b>%s</b>:<b>%s</b>" % (loc.filename, loc.line, loc.col)
if loc.is_macro():
return '%s <font color="royalblue1">' "(<i>spelling at </i> %s)</font>" % (
make_plain_loc(loc),
make_plain_loc(loc.spelling),
)
return make_plain_loc(loc)
def visit_begin_graph(self, graph):
self._graph = graph
self._dump_raw('digraph "ExplodedGraph" {\n')
if self._dark_mode:
self._dump_raw('bgcolor="gray10";\n')
self._dump_raw('label="";\n')
def visit_program_point(self, p):
if p.kind in ["Edge", "BlockEntrance", "BlockExit"]:
color = "gold3"
elif p.kind in ["PreStmtPurgeDeadSymbols", "PostStmtPurgeDeadSymbols"]:
color = "red"
elif p.kind in ["CallEnter", "CallExitBegin", "CallExitEnd"]:
color = "dodgerblue" if self._dark_mode else "blue"
elif p.kind in ["Statement"]:
color = "cyan4"
else:
color = "forestgreen"
self._dump('<tr><td align="left">%s.</td>' % p.node_id)
if p.kind == "Statement":
# This avoids pretty-printing huge statements such as CompoundStmt.
# Such statements show up only at [Pre|Post]StmtPurgeDeadSymbols
skip_pretty = "PurgeDeadSymbols" in p.stmt_point_kind
stmt_color = "cyan3"
self._dump(
'<td align="left" width="0">%s:</td>'
'<td align="left" width="0"><font color="%s">'
"%s</font> </td>"
'<td align="left"><i>S%s</i></td>'
'<td align="left"><font color="%s">%s</font></td>'
'<td align="left">%s</td></tr>'
% (
self._make_sloc(p.loc),
color,
"%s (%s)" % (p.stmt_kind, p.cast_kind)
if p.cast_kind is not None
else p.stmt_kind,
p.stmt_id,
stmt_color,
p.stmt_point_kind,
self._short_pretty(p.pretty) if not skip_pretty else "",
)
)
elif p.kind == "Edge":
self._dump(
'<td width="0"></td>'
'<td align="left" width="0">'
'<font color="%s">%s</font></td><td align="left">'
"[B%d] -\\> [B%d]</td></tr>" % (color, "BlockEdge", p.src_id, p.dst_id)
)
elif p.kind == "BlockEntrance":
self._dump(
'<td width="0"></td>'
'<td align="left" width="0">'
'<font color="%s">%s</font></td>'
'<td align="left">[B%d]</td></tr>' % (color, p.kind, p.block_id)
)
else:
# TODO: Print more stuff for other kinds of points.
self._dump(
'<td width="0"></td>'
'<td align="left" width="0" colspan="2">'
'<font color="%s">%s</font></td></tr>' % (color, p.kind)
)
if p.tag is not None:
self._dump(
'<tr><td width="0"></td><td width="0"></td>'
'<td colspan="3" align="left">'
'<b>Tag: </b> <font color="crimson">'
"%s</font></td></tr>" % p.tag
)
if p.has_report:
self._dump(
'<tr><td width="0"></td><td width="0"></td>'
'<td colspan="3" align="left">'
'<font color="red"><b>Bug Report Attached'
"</b></font></td></tr>"
)
if p.is_sink:
self._dump(
'<tr><td width="0"></td><td width="0"></td>'
'<td colspan="3" align="left">'
'<font color="cornflowerblue"><b>Sink Node'
"</b></font></td></tr>"
)
def visit_environment(self, e, prev_e=None):
self._dump('<table border="0">')
def dump_location_context(lc, is_added=None):
self._dump(
"<tr><td>%s</td>"
'<td align="left"><b>%s</b></td>'
'<td align="left" colspan="2">'
'<font color="gray60">%s </font>'
"%s</td></tr>"
% (
self._diff_plus_minus(is_added),
lc.caption,
lc.decl,
("(%s)" % self._make_sloc(lc.loc)) if lc.loc is not None else "",
)
)
def dump_binding(f, b, is_added=None):
self._dump(
"<tr><td>%s</td>"
'<td align="left"><i>S%s</i></td>'
"%s"
'<td align="left">%s</td>'
'<td align="left">%s</td></tr>'
% (
self._diff_plus_minus(is_added),
b.stmt_id,
'<td align="left"><font color="%s"><i>'
"%s</i></font></td>"
% (
"lavender" if self._dark_mode else "darkgreen",
("(%s)" % b.kind) if b.kind is not None else " ",
),
self._short_pretty(b.pretty),
f.bindings[b],
)
)
frames_updated = e.diff_frames(prev_e) if prev_e is not None else None
if frames_updated:
for i in frames_updated:
f = e.frames[i]
prev_f = prev_e.frames[i]
dump_location_context(f.location_context)
bindings_removed, bindings_added = f.diff_bindings(prev_f)
for b in bindings_removed:
dump_binding(prev_f, b, False)
for b in bindings_added:
dump_binding(f, b, True)
else:
for f in e.frames:
dump_location_context(f.location_context)
for b in f.bindings:
dump_binding(f, b)
self._dump("</table>")
def visit_environment_in_state(self, selector, title, s, prev_s=None):
e = getattr(s, selector)
prev_e = getattr(prev_s, selector) if prev_s is not None else None
if e is None and prev_e is None:
return
self._dump('<hr /><tr><td align="left"><b>%s: </b>' % title)
if e is None:
self._dump("<i> Nothing!</i>")
else:
if prev_e is not None:
if e.is_different(prev_e):
self._dump('</td></tr><tr><td align="left">')
self.visit_environment(e, prev_e)
else:
self._dump("<i> No changes!</i>")
else:
self._dump('</td></tr><tr><td align="left">')
self.visit_environment(e)
self._dump("</td></tr>")
def visit_store(self, s, prev_s=None):
self._dump('<table border="0">')
def dump_binding(s, c, b, is_added=None):
self._dump(
"<tr><td>%s</td>"
'<td align="left">%s</td>'
'<td align="left">%s</td>'
'<td align="left">%s</td>'
'<td align="left">%s</td></tr>'
% (
self._diff_plus_minus(is_added),
s.clusters[c].base_region,
b.offset,
"(<i>Default</i>)" if b.kind == "Default" else "",
s.clusters[c].bindings[b],
)
)
if prev_s is not None:
clusters_removed, clusters_added, clusters_updated = s.diff_clusters(prev_s)
for c in clusters_removed:
for b in prev_s.clusters[c].bindings:
dump_binding(prev_s, c, b, False)
for c in clusters_updated:
bindings_removed, bindings_added = s.clusters[c].diff_bindings(
prev_s.clusters[c]
)
for b in bindings_removed:
dump_binding(prev_s, c, b, False)
for b in bindings_added:
dump_binding(s, c, b, True)
for c in clusters_added:
for b in s.clusters[c].bindings:
dump_binding(s, c, b, True)
else:
for c in s.clusters:
for b in s.clusters[c].bindings:
dump_binding(s, c, b)
self._dump("</table>")
def visit_store_in_state(self, s, prev_s=None):
st = s.store
prev_st = prev_s.store if prev_s is not None else None
if st is None and prev_st is None:
return
self._dump('<hr /><tr><td align="left"><b>Store: </b>')
if st is None:
self._dump("<i> Nothing!</i>")
else:
if self._dark_mode:
self._dump(' <font color="gray30">(%s)</font>' % st.ptr)
else:
self._dump(' <font color="gray">(%s)</font>' % st.ptr)
if prev_st is not None:
if s.store.is_different(prev_st):
self._dump('</td></tr><tr><td align="left">')
self.visit_store(st, prev_st)
else:
self._dump("<i> No changes!</i>")
else:
self._dump('</td></tr><tr><td align="left">')
self.visit_store(st)
self._dump("</td></tr>")
def visit_generic_map(self, m, prev_m=None):
self._dump('<table border="0">')
def dump_pair(m, k, is_added=None):
self._dump(
"<tr><td>%s</td>"
'<td align="left">%s</td>'
'<td align="left">%s</td></tr>'
% (self._diff_plus_minus(is_added), k, m.generic_map[k])
)
if prev_m is not None:
removed, added = m.diff(prev_m)
for k in removed:
dump_pair(prev_m, k, False)
for k in added:
dump_pair(m, k, True)
else:
for k in m.generic_map:
dump_pair(m, k, None)
self._dump("</table>")
def visit_generic_map_in_state(self, selector, title, s, prev_s=None):
m = getattr(s, selector)
prev_m = getattr(prev_s, selector) if prev_s is not None else None
if m is None and prev_m is None:
return
self._dump("<hr />")
self._dump('<tr><td align="left">' "<b>%s: </b>" % title)
if m is None:
self._dump("<i> Nothing!</i>")
else:
if prev_m is not None:
if m.is_different(prev_m):
self._dump('</td></tr><tr><td align="left">')
self.visit_generic_map(m, prev_m)
else:
self._dump("<i> No changes!</i>")
else:
self._dump('</td></tr><tr><td align="left">')
self.visit_generic_map(m)
self._dump("</td></tr>")
def visit_checker_messages(self, m, prev_m=None):
self._dump('<table border="0">')
def dump_line(l, is_added=None):
self._dump(
"<tr><td>%s</td>"
'<td align="left">%s</td></tr>' % (self._diff_plus_minus(is_added), l)
)
def dump_chk(chk, is_added=None):
dump_line("<i>%s</i>:" % chk, is_added)
if prev_m is not None:
removed, added, updated = m.diff_messages(prev_m)
for chk in removed:
dump_chk(chk, False)
for l in prev_m.items[chk].lines:
dump_line(l, False)
for chk in updated:
dump_chk(chk)
for l in m.items[chk].diff_lines(prev_m.items[chk]):
dump_line(l[1:], l.startswith("+"))
for chk in added:
dump_chk(chk, True)
for l in m.items[chk].lines:
dump_line(l, True)
else:
for chk in m.items:
dump_chk(chk)
for l in m.items[chk].lines:
dump_line(l)
self._dump("</table>")
def visit_checker_messages_in_state(self, s, prev_s=None):
m = s.checker_messages
prev_m = prev_s.checker_messages if prev_s is not None else None
if m is None and prev_m is None:
return
self._dump("<hr />")
self._dump('<tr><td align="left">' "<b>Checker State: </b>")
if m is None:
self._dump("<i> Nothing!</i>")
else:
if prev_m is not None:
if m.is_different(prev_m):
self._dump('</td></tr><tr><td align="left">')
self.visit_checker_messages(m, prev_m)
else:
self._dump("<i> No changes!</i>")
else:
self._dump('</td></tr><tr><td align="left">')
self.visit_checker_messages(m)
self._dump("</td></tr>")
def visit_state(self, s, prev_s):
self.visit_store_in_state(s, prev_s)
self.visit_environment_in_state("environment", "Expressions", s, prev_s)
self.visit_generic_map_in_state("constraints", "Ranges", s, prev_s)
self.visit_generic_map_in_state("dynamic_types", "Dynamic Types", s, prev_s)
self.visit_environment_in_state(
"constructing_objects", "Objects Under Construction", s, prev_s
)
self.visit_environment_in_state(
"index_of_element", "Indices Of Elements Under Construction", s, prev_s
)
self.visit_environment_in_state(
"pending_init_loops", "Pending Array Init Loop Expressions", s, prev_s
)
self.visit_environment_in_state(
"pending_destructors", "Indices of Elements Under Destruction", s, prev_s
)
self.visit_checker_messages_in_state(s, prev_s)
def visit_node(self, node):
self._dump("%s [shape=record," % (node.node_name()))
if self._dark_mode:
self._dump('color="white",fontcolor="gray80",')
self._dump('label=<<table border="0">')
self._dump(
'<tr><td bgcolor="%s"><b>State %s</b></td></tr>'
% (
"gray20" if self._dark_mode else "gray70",
node.state.state_id if node.state is not None else "Unspecified",
)
)
if not self._topo_mode:
self._dump('<tr><td align="left" width="0">')
if len(node.points) > 1:
self._dump("<b>Program points:</b></td></tr>")
else:
self._dump("<b>Program point:</b></td></tr>")
self._dump(
'<tr><td align="left" width="0">'
'<table border="0" align="left" width="0">'
)
for p in node.points:
self.visit_program_point(p)
self._dump("</table></td></tr>")
if node.state is not None and not self._topo_mode:
prev_s = None
# Do diffs only when we have a unique predecessor.
# Don't do diffs on the leaf nodes because they're
# the important ones.
if (
self._do_diffs
and len(node.predecessors) == 1
and len(node.successors) > 0
):
prev_s = self._graph.nodes[node.predecessors[0]].state
self.visit_state(node.state, prev_s)
self._dump_raw("</table>>];\n")
def visit_edge(self, pred, succ):
self._dump_raw(
"%s -> %s%s;\n"
% (
pred.node_name(),
succ.node_name(),
' [color="white"]' if self._dark_mode else "",
)
)
def visit_end_of_graph(self):
self._dump_raw("}\n")
if not self._dump_dot_only:
import sys
import tempfile
[analyzer] exploded-graph-rewriter: Fix python3 string encoding issues This encapsulates 3 changes: - `DotDumpVisitor` now aggregates strings instead of *bytes* for both `python2` and `python3`. This difference caused crashes when it tried to write out the content as *strings*, similarly described at D71746. - `graphviz.pipe()` expects the input in *bytes* instead of unicode strings. And it results in *bytes*. Due to string concatenations and similar operations, I'm using unicode string as the default, and converting to *bytes* on demand. - `write_temp_file()` now appends the `egraph-` prefix and more importantly, it will create the temp file in the **current working directory** instead of in the *temp*. This change makes `Firefox` be able to open the file even if the `security.sandbox.content.level` is set to the (default) most restricting `4`. See https://support.mozilla.org/si/questions/1259285 An artifact of the bad byte handling was previously in the `HTML` produced by the script that it displayed the `b'` string at the top left corner. Now it won't anymore :) I've tested that the following command works on `Ubuntu 22.04`: ``` exploded-graph-rewriter my-egraph.dot ``` Both `python2` and `python3` works as expected. PS: I'm not adding tests, as the current test infra does not support testing HTML outputs for this script. Check the `clang/test/Analysis/exploded-graph-rewriter/lit.local.cfg`. We always pass the `--dump-dot-only` flag to the script. Along with that, the default invocation will not only create this HTML report but also try to open it. In addition to this, I'm not sure if the buildbots have `graphviz` installed and also if this package is installed on `pip`. Unless we change some of these, we cannot test this change. Given that D71746 had no tests, I'm not too worried about this either. Reviewed By: NoQ Differential Revision: https://reviews.llvm.org/D131553
2022-08-11 16:07:52 +02:00
def write_temp_file(suffix, prefix, data):
fd, filename = tempfile.mkstemp(suffix, prefix, ".", True)
print('Writing "%s"...' % filename)
with os.fdopen(fd, "w") as fp:
fp.write(data)
print("Done! Please remember to remove the file.")
return filename
try:
import graphviz
except ImportError:
# The fallback behavior if graphviz is not installed!
print("Python graphviz not found. Please invoke")
print(" $ pip install graphviz")
print("in order to enable automatic conversion to HTML.")
print()
print("You may also convert DOT to SVG manually via")
print(" $ dot -Tsvg input.dot -o output.svg")
print()
[analyzer] exploded-graph-rewriter: Fix python3 string encoding issues This encapsulates 3 changes: - `DotDumpVisitor` now aggregates strings instead of *bytes* for both `python2` and `python3`. This difference caused crashes when it tried to write out the content as *strings*, similarly described at D71746. - `graphviz.pipe()` expects the input in *bytes* instead of unicode strings. And it results in *bytes*. Due to string concatenations and similar operations, I'm using unicode string as the default, and converting to *bytes* on demand. - `write_temp_file()` now appends the `egraph-` prefix and more importantly, it will create the temp file in the **current working directory** instead of in the *temp*. This change makes `Firefox` be able to open the file even if the `security.sandbox.content.level` is set to the (default) most restricting `4`. See https://support.mozilla.org/si/questions/1259285 An artifact of the bad byte handling was previously in the `HTML` produced by the script that it displayed the `b'` string at the top left corner. Now it won't anymore :) I've tested that the following command works on `Ubuntu 22.04`: ``` exploded-graph-rewriter my-egraph.dot ``` Both `python2` and `python3` works as expected. PS: I'm not adding tests, as the current test infra does not support testing HTML outputs for this script. Check the `clang/test/Analysis/exploded-graph-rewriter/lit.local.cfg`. We always pass the `--dump-dot-only` flag to the script. Along with that, the default invocation will not only create this HTML report but also try to open it. In addition to this, I'm not sure if the buildbots have `graphviz` installed and also if this package is installed on `pip`. Unless we change some of these, we cannot test this change. Given that D71746 had no tests, I'm not too worried about this either. Reviewed By: NoQ Differential Revision: https://reviews.llvm.org/D131553
2022-08-11 16:07:52 +02:00
write_temp_file(".dot", "egraph-", self.output())
return
[analyzer] exploded-graph-rewriter: Fix python3 string encoding issues This encapsulates 3 changes: - `DotDumpVisitor` now aggregates strings instead of *bytes* for both `python2` and `python3`. This difference caused crashes when it tried to write out the content as *strings*, similarly described at D71746. - `graphviz.pipe()` expects the input in *bytes* instead of unicode strings. And it results in *bytes*. Due to string concatenations and similar operations, I'm using unicode string as the default, and converting to *bytes* on demand. - `write_temp_file()` now appends the `egraph-` prefix and more importantly, it will create the temp file in the **current working directory** instead of in the *temp*. This change makes `Firefox` be able to open the file even if the `security.sandbox.content.level` is set to the (default) most restricting `4`. See https://support.mozilla.org/si/questions/1259285 An artifact of the bad byte handling was previously in the `HTML` produced by the script that it displayed the `b'` string at the top left corner. Now it won't anymore :) I've tested that the following command works on `Ubuntu 22.04`: ``` exploded-graph-rewriter my-egraph.dot ``` Both `python2` and `python3` works as expected. PS: I'm not adding tests, as the current test infra does not support testing HTML outputs for this script. Check the `clang/test/Analysis/exploded-graph-rewriter/lit.local.cfg`. We always pass the `--dump-dot-only` flag to the script. Along with that, the default invocation will not only create this HTML report but also try to open it. In addition to this, I'm not sure if the buildbots have `graphviz` installed and also if this package is installed on `pip`. Unless we change some of these, we cannot test this change. Given that D71746 had no tests, I'm not too worried about this either. Reviewed By: NoQ Differential Revision: https://reviews.llvm.org/D131553
2022-08-11 16:07:52 +02:00
svg = graphviz.pipe("dot", "svg", self.output().encode()).decode()
filename = write_temp_file(
[analyzer] exploded-graph-rewriter: Fix python3 string encoding issues This encapsulates 3 changes: - `DotDumpVisitor` now aggregates strings instead of *bytes* for both `python2` and `python3`. This difference caused crashes when it tried to write out the content as *strings*, similarly described at D71746. - `graphviz.pipe()` expects the input in *bytes* instead of unicode strings. And it results in *bytes*. Due to string concatenations and similar operations, I'm using unicode string as the default, and converting to *bytes* on demand. - `write_temp_file()` now appends the `egraph-` prefix and more importantly, it will create the temp file in the **current working directory** instead of in the *temp*. This change makes `Firefox` be able to open the file even if the `security.sandbox.content.level` is set to the (default) most restricting `4`. See https://support.mozilla.org/si/questions/1259285 An artifact of the bad byte handling was previously in the `HTML` produced by the script that it displayed the `b'` string at the top left corner. Now it won't anymore :) I've tested that the following command works on `Ubuntu 22.04`: ``` exploded-graph-rewriter my-egraph.dot ``` Both `python2` and `python3` works as expected. PS: I'm not adding tests, as the current test infra does not support testing HTML outputs for this script. Check the `clang/test/Analysis/exploded-graph-rewriter/lit.local.cfg`. We always pass the `--dump-dot-only` flag to the script. Along with that, the default invocation will not only create this HTML report but also try to open it. In addition to this, I'm not sure if the buildbots have `graphviz` installed and also if this package is installed on `pip`. Unless we change some of these, we cannot test this change. Given that D71746 had no tests, I'm not too worried about this either. Reviewed By: NoQ Differential Revision: https://reviews.llvm.org/D131553
2022-08-11 16:07:52 +02:00
".html",
"egraph-",
'<html><body bgcolor="%s">%s</body></html>'
% ("#1a1a1a" if self._dark_mode else "white", svg),
)
if self._dump_html_only:
return
if sys.platform == "win32":
os.startfile(filename)
elif sys.platform == "darwin":
os.system('open "%s"' % filename)
else:
os.system('xdg-open "%s"' % filename)
# ===-----------------------------------------------------------------------===#
# Explorers know how to traverse the ExplodedGraph in a certain order.
# They would invoke a Visitor on every node or edge they encounter.
# ===-----------------------------------------------------------------------===#
# BasicExplorer explores the whole graph in no particular order.
class BasicExplorer:
def explore(self, graph, visitor):
visitor.visit_begin_graph(graph)
for node in sorted(graph.nodes):
logging.debug("Visiting " + node)
visitor.visit_node(graph.nodes[node])
for succ in sorted(graph.nodes[node].successors):
logging.debug("Visiting edge: %s -> %s " % (node, succ))
visitor.visit_edge(graph.nodes[node], graph.nodes[succ])
visitor.visit_end_of_graph()
# ===-----------------------------------------------------------------------===#
# Trimmers cut out parts of the ExplodedGraph so that to focus on other parts.
# Trimmers can be combined together by applying them sequentially.
# ===-----------------------------------------------------------------------===#
# SinglePathTrimmer keeps only a single path - the leftmost path from the root.
# Useful when the trimmed graph is still too large.
class SinglePathTrimmer:
def trim(self, graph):
visited_nodes = set()
node_id = graph.root_id
while True:
visited_nodes.add(node_id)
node = graph.nodes[node_id]
if len(node.successors) > 0:
succ_id = node.successors[0]
succ = graph.nodes[succ_id]
node.successors = [succ_id]
succ.predecessors = [node_id]
if succ_id in visited_nodes:
break
node_id = succ_id
else:
break
graph.nodes = {node_id: graph.nodes[node_id] for node_id in visited_nodes}
# TargetedTrimmer keeps paths that lead to specific nodes and discards all
# other paths. Useful when you cannot use -trim-egraph (e.g. when debugging
# a crash).
class TargetedTrimmer:
def __init__(self, target_nodes):
self._target_nodes = target_nodes
@staticmethod
def parse_target_node(node, graph):
if node.startswith("0x"):
ret = "Node" + node
assert ret in graph.nodes
return ret
else:
for other_id in graph.nodes:
other = graph.nodes[other_id]
if other.node_id == int(node):
return other_id
@staticmethod
def parse_target_nodes(target_nodes, graph):
return [
TargetedTrimmer.parse_target_node(node, graph)
for node in target_nodes.split(",")
]
def trim(self, graph):
queue = self._target_nodes
visited_nodes = set()
while len(queue) > 0:
node_id = queue.pop()
visited_nodes.add(node_id)
node = graph.nodes[node_id]
for pred_id in node.predecessors:
if pred_id not in visited_nodes:
queue.append(pred_id)
graph.nodes = {node_id: graph.nodes[node_id] for node_id in visited_nodes}
for node_id in graph.nodes:
node = graph.nodes[node_id]
node.successors = [
succ_id for succ_id in node.successors if succ_id in visited_nodes
]
node.predecessors = [
succ_id for succ_id in node.predecessors if succ_id in visited_nodes
]
# ===-----------------------------------------------------------------------===#
# The entry point to the script.
# ===-----------------------------------------------------------------------===#
def main():
parser = argparse.ArgumentParser(
description="Display and manipulate Exploded Graph dumps."
)
parser.add_argument(
"filename", type=str, help="the .dot file produced by the Static Analyzer"
)
parser.add_argument(
"-v",
"--verbose",
action="store_const",
dest="loglevel",
const=logging.DEBUG,
default=logging.WARNING,
help="enable info prints",
)
parser.add_argument(
"-d",
"--diff",
action="store_const",
dest="diff",
const=True,
default=False,
help="display differences between states",
)
parser.add_argument(
"-t",
"--topology",
action="store_const",
dest="topology",
const=True,
default=False,
help="only display program points, omit states",
)
parser.add_argument(
"-s",
"--single-path",
action="store_const",
dest="single_path",
const=True,
default=False,
help="only display the leftmost path in the graph "
"(useful for trimmed graphs that still "
"branch too much)",
)
parser.add_argument(
"--to",
type=str,
default=None,
help="only display execution paths from the root "
"to the given comma-separated list of nodes "
"identified by a pointer or a stable ID; "
"compatible with --single-path",
)
parser.add_argument(
"--dark",
action="store_const",
dest="dark",
const=True,
default=False,
help="dark mode",
)
parser.add_argument(
"--gray",
action="store_const",
dest="gray",
const=True,
default=False,
help="black-and-white mode",
)
dump_conflict = parser.add_mutually_exclusive_group()
dump_conflict.add_argument(
"--dump-html-only",
action="store_const",
dest="dump_html_only",
const=True,
default=False,
help="dump the rewritten egraph to a temporary HTML file, "
"but do not open it immediately as by default",
)
dump_conflict.add_argument(
"--dump-dot-only",
action="store_const",
dest="dump_dot_only",
const=True,
default=False,
help="instead of writing an HTML file and immediately "
"displaying it, dump the rewritten dot file "
"to stdout",
)
args = parser.parse_args()
logging.basicConfig(level=args.loglevel)
graph = ExplodedGraph()
with open(args.filename) as fd:
for raw_line in fd:
raw_line = raw_line.strip()
graph.add_raw_line(raw_line)
trimmers = []
if args.to is not None:
trimmers.append(
TargetedTrimmer(TargetedTrimmer.parse_target_nodes(args.to, graph))
)
if args.single_path:
trimmers.append(SinglePathTrimmer())
explorer = BasicExplorer()
visitor = DotDumpVisitor(
args.diff,
args.dark,
args.gray,
args.topology,
args.dump_html_only,
args.dump_dot_only,
)
for trimmer in trimmers:
trimmer.trim(graph)
explorer.explore(graph, visitor)
if __name__ == "__main__":
main()