#!/usr/bin/env python # # Debugify summary for the original debug info testing. # from __future__ import print_function import argparse import os import sys from json import loads from collections import defaultdict from collections import OrderedDict class DILocBug: def __init__(self, action, bb_name, fn_name, instr): self.action = action self.bb_name = bb_name self.fn_name = fn_name self.instr = instr def __str__(self): return self.action + self.bb_name + self.fn_name + self.instr class DISPBug: def __init__(self, action, fn_name): self.action = action self.fn_name = fn_name def __str__(self): return self.action + self.fn_name class DIVarBug: def __init__(self, action, name, fn_name): self.action = action self.name = name self.fn_name = fn_name def __str__(self): return self.action + self.name + self.fn_name # Report the bugs in form of html. def generate_html_report( di_location_bugs, di_subprogram_bugs, di_var_bugs, di_location_bugs_summary, di_sp_bugs_summary, di_var_bugs_summary, html_file, ): fileout = open(html_file, "w") html_header = """ """ # Create the table for Location bugs. table_title_di_loc = "Location Bugs found by the Debugify" table_di_loc = """ """.format( table_title_di_loc ) header_di_loc = [ "File", "LLVM Pass Name", "LLVM IR Instruction", "Function Name", "Basic Block Name", "Action", ] for column in header_di_loc: table_di_loc += " \n".format(column.strip()) table_di_loc += " \n" at_least_one_bug_found = False # Handle loction bugs. for file, per_file_bugs in di_location_bugs.items(): for llvm_pass, per_pass_bugs in per_file_bugs.items(): # No location bugs for the pass. if len(per_pass_bugs) == 0: continue at_least_one_bug_found = True row = [] table_di_loc += " \n" # Get the bugs info. for x in per_pass_bugs: row.append(" \n") row.append(file) row.append(llvm_pass) row.append(x.instr) row.append(x.fn_name) row.append(x.bb_name) row.append(x.action) row.append(" \n") # Dump the bugs info into the table. for column in row: # The same file-pass pair can have multiple bugs. if column == " \n" or column == " \n": table_di_loc += column continue table_di_loc += " \n".format(column.strip()) table_di_loc += " \n" if not at_least_one_bug_found: table_di_loc += """ """ table_di_loc += "
{}
{0}
{0}
No bugs found
\n" # Create the summary table for the loc bugs. table_title_di_loc_sum = "Summary of Location Bugs" table_di_loc_sum = """ """.format( table_title_di_loc_sum ) header_di_loc_sum = ["LLVM Pass Name", "Number of bugs"] for column in header_di_loc_sum: table_di_loc_sum += " \n".format(column.strip()) table_di_loc_sum += " \n" # Print the summary. row = [] for llvm_pass, num in sorted(di_location_bugs_summary.items()): row.append(" \n") row.append(llvm_pass) row.append(str(num)) row.append(" \n") for column in row: if column == " \n" or column == " \n": table_di_loc_sum += column continue table_di_loc_sum += " \n".format(column.strip()) table_di_loc_sum += " \n" if not at_least_one_bug_found: table_di_loc_sum += """ """ table_di_loc_sum += "
{}
{0}
{0}
No bugs found
\n" # Create the table for SP bugs. table_title_di_sp = "SP Bugs found by the Debugify" table_di_sp = """ """.format( table_title_di_sp ) header_di_sp = ["File", "LLVM Pass Name", "Function Name", "Action"] for column in header_di_sp: table_di_sp += " \n".format(column.strip()) table_di_sp += " \n" at_least_one_bug_found = False # Handle fn bugs. for file, per_file_bugs in di_subprogram_bugs.items(): for llvm_pass, per_pass_bugs in per_file_bugs.items(): # No SP bugs for the pass. if len(per_pass_bugs) == 0: continue at_least_one_bug_found = True row = [] table_di_sp += " \n" # Get the bugs info. for x in per_pass_bugs: row.append(" \n") row.append(file) row.append(llvm_pass) row.append(x.fn_name) row.append(x.action) row.append(" \n") # Dump the bugs info into the table. for column in row: # The same file-pass pair can have multiple bugs. if column == " \n" or column == " \n": table_di_sp += column continue table_di_sp += " \n".format(column.strip()) table_di_sp += " \n" if not at_least_one_bug_found: table_di_sp += """ """ table_di_sp += "
{}
{0}
{0}
No bugs found
\n" # Create the summary table for the sp bugs. table_title_di_sp_sum = "Summary of SP Bugs" table_di_sp_sum = """ """.format( table_title_di_sp_sum ) header_di_sp_sum = ["LLVM Pass Name", "Number of bugs"] for column in header_di_sp_sum: table_di_sp_sum += " \n".format(column.strip()) table_di_sp_sum += " \n" # Print the summary. row = [] for llvm_pass, num in sorted(di_sp_bugs_summary.items()): row.append(" \n") row.append(llvm_pass) row.append(str(num)) row.append(" \n") for column in row: if column == " \n" or column == " \n": table_di_sp_sum += column continue table_di_sp_sum += " \n".format(column.strip()) table_di_sp_sum += " \n" if not at_least_one_bug_found: table_di_sp_sum += """ """ table_di_sp_sum += "
{}
{0}
{0}
No bugs found
\n" # Create the table for Variable bugs. table_title_di_var = "Variable Location Bugs found by the Debugify" table_di_var = """ """.format( table_title_di_var ) header_di_var = ["File", "LLVM Pass Name", "Variable", "Function", "Action"] for column in header_di_var: table_di_var += " \n".format(column.strip()) table_di_var += " \n" at_least_one_bug_found = False # Handle var bugs. for file, per_file_bugs in di_var_bugs.items(): for llvm_pass, per_pass_bugs in per_file_bugs.items(): # No SP bugs for the pass. if len(per_pass_bugs) == 0: continue at_least_one_bug_found = True row = [] table_di_var += " \n" # Get the bugs info. for x in per_pass_bugs: row.append(" \n") row.append(file) row.append(llvm_pass) row.append(x.name) row.append(x.fn_name) row.append(x.action) row.append(" \n") # Dump the bugs info into the table. for column in row: # The same file-pass pair can have multiple bugs. if column == " \n" or column == " \n": table_di_var += column continue table_di_var += " \n".format(column.strip()) table_di_var += " \n" if not at_least_one_bug_found: table_di_var += """ """ table_di_var += "
{}
{0}
{0}
No bugs found
\n" # Create the summary table for the sp bugs. table_title_di_var_sum = "Summary of Variable Location Bugs" table_di_var_sum = """ """.format( table_title_di_var_sum ) header_di_var_sum = ["LLVM Pass Name", "Number of bugs"] for column in header_di_var_sum: table_di_var_sum += " \n".format(column.strip()) table_di_var_sum += " \n" # Print the summary. row = [] for llvm_pass, num in sorted(di_var_bugs_summary.items()): row.append(" \n") row.append(llvm_pass) row.append(str(num)) row.append(" \n") for column in row: if column == " \n" or column == " \n": table_di_var_sum += column continue table_di_var_sum += " \n".format(column.strip()) table_di_var_sum += " \n" if not at_least_one_bug_found: table_di_var_sum += """ """ table_di_var_sum += "
{}
{0}
{0}
No bugs found
\n" # Finish the html page. html_footer = """ """ new_line = "
\n" fileout.writelines(html_header) fileout.writelines(table_di_loc) fileout.writelines(new_line) fileout.writelines(table_di_loc_sum) fileout.writelines(new_line) fileout.writelines(new_line) fileout.writelines(table_di_sp) fileout.writelines(new_line) fileout.writelines(table_di_sp_sum) fileout.writelines(new_line) fileout.writelines(new_line) fileout.writelines(table_di_var) fileout.writelines(new_line) fileout.writelines(table_di_var_sum) fileout.writelines(html_footer) fileout.close() print("The " + html_file + " generated.") # Read the JSON file in chunks. def get_json_chunk(file, start, size): json_parsed = None di_checker_data = [] skipped_lines = 0 line = 0 # The file contains json object per line. # An example of the line (formatted json): # { # "file": "simple.c", # "pass": "Deduce function attributes in RPO", # "bugs": [ # [ # { # "action": "drop", # "metadata": "DISubprogram", # "name": "fn2" # }, # { # "action": "drop", # "metadata": "DISubprogram", # "name": "fn1" # } # ] # ] # } with open(file) as json_objects_file: for json_object_line in json_objects_file: line += 1 if line < start: continue if line >= start + size: break try: json_object = loads(json_object_line) except: skipped_lines += 1 else: di_checker_data.append(json_object) return (di_checker_data, skipped_lines, line) # Parse the program arguments. def parse_program_args(parser): parser.add_argument("file_name", type=str, help="json file to process") parser.add_argument("html_file", type=str, help="html file to output data") parser.add_argument( "-compress", action="store_true", help="create reduced html report" ) return parser.parse_args() def Main(): parser = argparse.ArgumentParser() opts = parse_program_args(parser) if not opts.html_file.endswith(".html"): print("error: The output file must be '.html'.") sys.exit(1) # Use the defaultdict in order to make multidim dicts. di_location_bugs = defaultdict(lambda: defaultdict(dict)) di_subprogram_bugs = defaultdict(lambda: defaultdict(dict)) di_variable_bugs = defaultdict(lambda: defaultdict(dict)) # Use the ordered dict to make a summary. di_location_bugs_summary = OrderedDict() di_sp_bugs_summary = OrderedDict() di_var_bugs_summary = OrderedDict() # Compress similar bugs. # DILocBugs with same pass & instruction name. di_loc_pass_instr_set = set() # DISPBugs with same pass & function name. di_sp_pass_fn_set = set() # DIVarBugs with same pass & variable name. di_var_pass_var_set = set() start_line = 0 chunk_size = 1000000 end_line = chunk_size - 1 skipped_lines = 0 skipped_bugs = 0 # Process each chunk of 1 million JSON lines. while True: if start_line > end_line: break (debug_info_bugs, skipped, end_line) = get_json_chunk( opts.file_name, start_line, chunk_size ) start_line += chunk_size skipped_lines += skipped # Map the bugs into the file-pass pairs. for bugs_per_pass in debug_info_bugs: try: bugs_file = bugs_per_pass["file"] bugs_pass = bugs_per_pass["pass"] bugs = bugs_per_pass["bugs"][0] except: skipped_lines += 1 continue di_loc_bugs = [] di_sp_bugs = [] di_var_bugs = [] # Omit duplicated bugs. di_loc_set = set() di_sp_set = set() di_var_set = set() for bug in bugs: try: bugs_metadata = bug["metadata"] except: skipped_bugs += 1 continue if bugs_metadata == "DILocation": try: action = bug["action"] bb_name = bug["bb-name"] fn_name = bug["fn-name"] instr = bug["instr"] except: skipped_bugs += 1 continue di_loc_bug = DILocBug(action, bb_name, fn_name, instr) if not str(di_loc_bug) in di_loc_set: di_loc_set.add(str(di_loc_bug)) if opts.compress: pass_instr = bugs_pass + instr if not pass_instr in di_loc_pass_instr_set: di_loc_pass_instr_set.add(pass_instr) di_loc_bugs.append(di_loc_bug) else: di_loc_bugs.append(di_loc_bug) # Fill the summary dict. if bugs_pass in di_location_bugs_summary: di_location_bugs_summary[bugs_pass] += 1 else: di_location_bugs_summary[bugs_pass] = 1 elif bugs_metadata == "DISubprogram": try: action = bug["action"] name = bug["name"] except: skipped_bugs += 1 continue di_sp_bug = DISPBug(action, name) if not str(di_sp_bug) in di_sp_set: di_sp_set.add(str(di_sp_bug)) if opts.compress: pass_fn = bugs_pass + name if not pass_fn in di_sp_pass_fn_set: di_sp_pass_fn_set.add(pass_fn) di_sp_bugs.append(di_sp_bug) else: di_sp_bugs.append(di_sp_bug) # Fill the summary dict. if bugs_pass in di_sp_bugs_summary: di_sp_bugs_summary[bugs_pass] += 1 else: di_sp_bugs_summary[bugs_pass] = 1 elif bugs_metadata == "dbg-var-intrinsic": try: action = bug["action"] fn_name = bug["fn-name"] name = bug["name"] except: skipped_bugs += 1 continue di_var_bug = DIVarBug(action, name, fn_name) if not str(di_var_bug) in di_var_set: di_var_set.add(str(di_var_bug)) if opts.compress: pass_var = bugs_pass + name if not pass_var in di_var_pass_var_set: di_var_pass_var_set.add(pass_var) di_var_bugs.append(di_var_bug) else: di_var_bugs.append(di_var_bug) # Fill the summary dict. if bugs_pass in di_var_bugs_summary: di_var_bugs_summary[bugs_pass] += 1 else: di_var_bugs_summary[bugs_pass] = 1 else: # Unsupported metadata. skipped_bugs += 1 continue di_location_bugs[bugs_file][bugs_pass] = di_loc_bugs di_subprogram_bugs[bugs_file][bugs_pass] = di_sp_bugs di_variable_bugs[bugs_file][bugs_pass] = di_var_bugs generate_html_report( di_location_bugs, di_subprogram_bugs, di_variable_bugs, di_location_bugs_summary, di_sp_bugs_summary, di_var_bugs_summary, opts.html_file, ) if skipped_lines > 0: print("Skipped lines: " + str(skipped_lines)) if skipped_bugs > 0: print("Skipped bugs: " + str(skipped_bugs)) if __name__ == "__main__": Main() sys.exit(0)