#!/usr/bin/env python3 # # This is a tool that works like debug location coverage calculator. # It parses the llvm-dwarfdump --statistics output by reporting it # in a more human readable way. # from __future__ import print_function import argparse import os import sys from json import loads from math import ceil from collections import OrderedDict from subprocess import Popen, PIPE # This special value has been used to mark statistics that overflowed. TAINT_VALUE = "tainted" # Initialize the plot. def init_plot(plt): plt.title("Debug Location Statistics", fontweight="bold") plt.xlabel("location buckets") plt.ylabel("number of variables in the location buckets") plt.xticks(rotation=45, fontsize="x-small") plt.yticks() # Finalize the plot. def finish_plot(plt): plt.legend() plt.grid(color="grey", which="major", axis="y", linestyle="-", linewidth=0.3) plt.savefig("locstats.png") print('The plot was saved within "locstats.png".') # Holds the debug location statistics. class LocationStats: def __init__( self, file_name, variables_total, variables_total_locstats, variables_with_loc, variables_scope_bytes_covered, variables_scope_bytes, variables_coverage_map, ): self.file_name = file_name self.variables_total = variables_total self.variables_total_locstats = variables_total_locstats self.variables_with_loc = variables_with_loc self.scope_bytes_covered = variables_scope_bytes_covered self.scope_bytes = variables_scope_bytes self.variables_coverage_map = variables_coverage_map # Get the PC ranges coverage. def get_pc_coverage(self): if self.scope_bytes_covered == TAINT_VALUE or self.scope_bytes == TAINT_VALUE: return TAINT_VALUE pc_ranges_covered = int( ceil(self.scope_bytes_covered * 100.0) / self.scope_bytes ) return pc_ranges_covered # Pretty print the debug location buckets. def pretty_print(self): if self.scope_bytes == 0: print("No scope bytes found.") return -1 pc_ranges_covered = self.get_pc_coverage() variables_coverage_per_map = {} for cov_bucket in coverage_buckets(): variables_coverage_per_map[cov_bucket] = None if ( self.variables_coverage_map[cov_bucket] == TAINT_VALUE or self.variables_total_locstats == TAINT_VALUE ): variables_coverage_per_map[cov_bucket] = TAINT_VALUE else: variables_coverage_per_map[cov_bucket] = int( ceil(self.variables_coverage_map[cov_bucket] * 100.0) / self.variables_total_locstats ) print(" =================================================") print(" Debug Location Statistics ") print(" =================================================") print(" cov% samples percentage(~) ") print(" -------------------------------------------------") for cov_bucket in coverage_buckets(): if ( self.variables_coverage_map[cov_bucket] or self.variables_total_locstats == TAINT_VALUE ): print( " {0:10} {1:8} {2:3}%".format( cov_bucket, self.variables_coverage_map[cov_bucket], variables_coverage_per_map[cov_bucket], ) ) else: print( " {0:10} {1:8d} {2:3d}%".format( cov_bucket, self.variables_coverage_map[cov_bucket], variables_coverage_per_map[cov_bucket], ) ) print(" =================================================") print( " -the number of debug variables processed: " + str(self.variables_total_locstats) ) print(" -PC ranges covered: " + str(pc_ranges_covered) + "%") # Only if we are processing all the variables output the total # availability. if self.variables_total and self.variables_with_loc: total_availability = None if ( self.variables_total == TAINT_VALUE or self.variables_with_loc == TAINT_VALUE ): total_availability = TAINT_VALUE else: total_availability = int( ceil(self.variables_with_loc * 100.0) / self.variables_total ) print(" -------------------------------------------------") print(" -total availability: " + str(total_availability) + "%") print(" =================================================") return 0 # Draw a plot representing the location buckets. def draw_plot(self): from matplotlib import pyplot as plt buckets = range(len(self.variables_coverage_map)) plt.figure(figsize=(12, 8)) init_plot(plt) plt.bar( buckets, self.variables_coverage_map.values(), align="center", tick_label=self.variables_coverage_map.keys(), label="variables of {}".format(self.file_name), ) # Place the text box with the coverage info. pc_ranges_covered = self.get_pc_coverage() props = dict(boxstyle="round", facecolor="wheat", alpha=0.5) plt.text( 0.02, 0.90, "PC ranges covered: {}%".format(pc_ranges_covered), transform=plt.gca().transAxes, fontsize=12, verticalalignment="top", bbox=props, ) finish_plot(plt) # Compare the two LocationStats objects and draw a plot showing # the difference. def draw_location_diff(self, locstats_to_compare): from matplotlib import pyplot as plt pc_ranges_covered = self.get_pc_coverage() pc_ranges_covered_to_compare = locstats_to_compare.get_pc_coverage() buckets = range(len(self.variables_coverage_map)) buckets_to_compare = range(len(locstats_to_compare.variables_coverage_map)) fig = plt.figure(figsize=(12, 8)) ax = fig.add_subplot(111) init_plot(plt) comparison_keys = list(coverage_buckets()) ax.bar( buckets, self.variables_coverage_map.values(), align="edge", width=0.4, label="variables of {}".format(self.file_name), ) ax.bar( buckets_to_compare, locstats_to_compare.variables_coverage_map.values(), color="r", align="edge", width=-0.4, label="variables of {}".format(locstats_to_compare.file_name), ) ax.set_xticks(range(len(comparison_keys))) ax.set_xticklabels(comparison_keys) props = dict(boxstyle="round", facecolor="wheat", alpha=0.5) plt.text( 0.02, 0.88, "{} PC ranges covered: {}%".format(self.file_name, pc_ranges_covered), transform=plt.gca().transAxes, fontsize=12, verticalalignment="top", bbox=props, ) plt.text( 0.02, 0.83, "{} PC ranges covered: {}%".format( locstats_to_compare.file_name, pc_ranges_covered_to_compare ), transform=plt.gca().transAxes, fontsize=12, verticalalignment="top", bbox=props, ) finish_plot(plt) # Define the location buckets. def coverage_buckets(): yield "0%" yield "(0%,10%)" for start in range(10, 91, 10): yield "[{0}%,{1}%)".format(start, start + 10) yield "100%" # Parse the JSON representing the debug statistics, and create a # LocationStats object. def parse_locstats(opts, binary): # These will be different due to different options enabled. variables_total = None variables_total_locstats = None variables_with_loc = None variables_scope_bytes_covered = None variables_scope_bytes = None variables_scope_bytes_entry_values = None variables_coverage_map = OrderedDict() # Get the directory of the LLVM tools. llvm_dwarfdump_cmd = os.path.join(os.path.dirname(__file__), "llvm-dwarfdump") # The statistics llvm-dwarfdump option. llvm_dwarfdump_stats_opt = "--statistics" # Generate the stats with the llvm-dwarfdump. subproc = Popen( [llvm_dwarfdump_cmd, llvm_dwarfdump_stats_opt, binary], stdin=PIPE, stdout=PIPE, stderr=PIPE, universal_newlines=True, ) cmd_stdout, cmd_stderr = subproc.communicate() # TODO: Handle errors that are coming from llvm-dwarfdump. # Get the JSON and parse it. json_parsed = None try: json_parsed = loads(cmd_stdout) except: print("error: No valid llvm-dwarfdump statistics found.") sys.exit(1) # TODO: Parse the statistics Version from JSON. def init_field(name): if json_parsed[name] == "overflowed": print('warning: "' + name + '" field overflowed.') return TAINT_VALUE return json_parsed[name] if opts.only_variables: # Read the JSON only for local variables. variables_total_locstats = init_field( "#local vars processed by location statistics" ) variables_scope_bytes_covered = init_field( "sum_all_local_vars(#bytes in parent scope covered" " by DW_AT_location)" ) variables_scope_bytes = init_field("sum_all_local_vars(#bytes in parent scope)") if not opts.ignore_debug_entry_values: for cov_bucket in coverage_buckets(): cov_category = ( "#local vars with {} of parent scope covered " "by DW_AT_location".format(cov_bucket) ) variables_coverage_map[cov_bucket] = init_field(cov_category) else: variables_scope_bytes_entry_values = init_field( "sum_all_local_vars(#bytes in parent scope " "covered by DW_OP_entry_value)" ) if ( variables_scope_bytes_covered != TAINT_VALUE and variables_scope_bytes_entry_values != TAINT_VALUE ): variables_scope_bytes_covered = ( variables_scope_bytes_covered - variables_scope_bytes_entry_values ) for cov_bucket in coverage_buckets(): cov_category = ( "#local vars - entry values with {} of parent scope " "covered by DW_AT_location".format(cov_bucket) ) variables_coverage_map[cov_bucket] = init_field(cov_category) elif opts.only_formal_parameters: # Read the JSON only for formal parameters. variables_total_locstats = init_field( "#params processed by location statistics" ) variables_scope_bytes_covered = init_field( "sum_all_params(#bytes in parent scope covered " "by DW_AT_location)" ) variables_scope_bytes = init_field("sum_all_params(#bytes in parent scope)") if not opts.ignore_debug_entry_values: for cov_bucket in coverage_buckets(): cov_category = ( "#params with {} of parent scope covered " "by DW_AT_location".format(cov_bucket) ) variables_coverage_map[cov_bucket] = init_field(cov_category) else: variables_scope_bytes_entry_values = init_field( "sum_all_params(#bytes in parent scope covered " "by DW_OP_entry_value)" ) if ( variables_scope_bytes_covered != TAINT_VALUE and variables_scope_bytes_entry_values != TAINT_VALUE ): variables_scope_bytes_covered = ( variables_scope_bytes_covered - variables_scope_bytes_entry_values ) for cov_bucket in coverage_buckets(): cov_category = ( "#params - entry values with {} of parent scope covered" " by DW_AT_location".format(cov_bucket) ) variables_coverage_map[cov_bucket] = init_field(cov_category) else: # Read the JSON for both local variables and formal parameters. variables_total = init_field("#source variables") variables_with_loc = init_field("#source variables with location") variables_total_locstats = init_field( "#variables processed by location statistics" ) variables_scope_bytes_covered = init_field( "sum_all_variables(#bytes in parent scope covered " "by DW_AT_location)" ) variables_scope_bytes = init_field("sum_all_variables(#bytes in parent scope)") if not opts.ignore_debug_entry_values: for cov_bucket in coverage_buckets(): cov_category = ( "#variables with {} of parent scope covered " "by DW_AT_location".format(cov_bucket) ) variables_coverage_map[cov_bucket] = init_field(cov_category) else: variables_scope_bytes_entry_values = init_field( "sum_all_variables(#bytes in parent scope covered " "by DW_OP_entry_value)" ) if ( variables_scope_bytes_covered != TAINT_VALUE and variables_scope_bytes_entry_values != TAINT_VALUE ): variables_scope_bytes_covered = ( variables_scope_bytes_covered - variables_scope_bytes_entry_values ) for cov_bucket in coverage_buckets(): cov_category = ( "#variables - entry values with {} of parent scope covered " "by DW_AT_location".format(cov_bucket) ) variables_coverage_map[cov_bucket] = init_field(cov_category) return LocationStats( binary, variables_total, variables_total_locstats, variables_with_loc, variables_scope_bytes_covered, variables_scope_bytes, variables_coverage_map, ) # Parse the program arguments. def parse_program_args(parser): parser.add_argument( "--only-variables", action="store_true", default=False, help="calculate the location statistics only for local variables", ) parser.add_argument( "--only-formal-parameters", action="store_true", default=False, help="calculate the location statistics only for formal parameters", ) parser.add_argument( "--ignore-debug-entry-values", action="store_true", default=False, help="ignore the location statistics on locations with " "entry values", ) parser.add_argument( "--draw-plot", action="store_true", default=False, help="show histogram of location buckets generated (requires " "matplotlib)", ) parser.add_argument( "--compare", action="store_true", default=False, help="compare the debug location coverage on two files provided, " "and draw a plot showing the difference (requires " "matplotlib)", ) parser.add_argument("file_names", nargs="+", type=str, help="file to process") return parser.parse_args() # Verify that the program inputs meet the requirements. def verify_program_inputs(opts): if len(sys.argv) < 2: print("error: Too few arguments.") return False if opts.only_variables and opts.only_formal_parameters: print("error: Please use just one --only* option.") return False if not opts.compare and len(opts.file_names) != 1: print("error: Please specify only one file to process.") return False if opts.compare and len(opts.file_names) != 2: print("error: Please specify two files to process.") return False if opts.draw_plot or opts.compare: try: import matplotlib except ImportError: print("error: matplotlib not found.") return False return True def Main(): parser = argparse.ArgumentParser() opts = parse_program_args(parser) if not verify_program_inputs(opts): parser.print_help() sys.exit(1) binary_file = opts.file_names[0] locstats = parse_locstats(opts, binary_file) if not opts.compare: if opts.draw_plot: # Draw a histogram representing the location buckets. locstats.draw_plot() else: # Pretty print collected info on the standard output. if locstats.pretty_print() == -1: sys.exit(0) else: binary_file_to_compare = opts.file_names[1] locstats_to_compare = parse_locstats(opts, binary_file_to_compare) # Draw a plot showing the difference in debug location coverage between # two files. locstats.draw_location_diff(locstats_to_compare) if __name__ == "__main__": Main() sys.exit(0)