#!/usr/bin/env python3 """ A test update script. This script is a utility to update LLVM 'llvm-mc' based test cases with new FileCheck patterns. """ from __future__ import print_function import argparse import functools import os # Used to advertise this file's name ("autogenerated_note"). from UpdateTestChecks import common import subprocess import re mc_LIKE_TOOLS = [ "llvm-mc", ] ERROR_RE = re.compile(r":\d+: (warning|error): .*") ERROR_CHECK_RE = re.compile(r"# COM: .*") OUTPUT_SKIPPED_RE = re.compile(r"(.text)") COMMENT = {"asm": "//", "dasm": "#"} def invoke_tool(exe, check_rc, cmd_args, testline, verbose=False): if isinstance(cmd_args, list): args = [applySubstitutions(a, substitutions) for a in cmd_args] else: args = cmd_args cmd = 'echo "' + testline + '" | ' + exe + " " + args if verbose: print("Command: ", cmd) out = subprocess.run( cmd, shell=True, check=check_rc, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, ).stdout # Fix line endings to unix CR style. return out.decode().replace("\r\n", "\n") # create tests line-by-line, here we just filter out the check lines and comments # and treat all others as tests def isTestLine(input_line, mc_mode): line = input_line.strip() # Skip empty and comment lines if not line or line.startswith(COMMENT[mc_mode]): return False # skip any CHECK lines. elif common.CHECK_RE.match(input_line): return False return True def isRunLine(l): return common.RUN_LINE_RE.match(l) def hasErr(err): return err and ERROR_RE.search(err) is not None def getErrString(err): if not err: return "" # take the first match for line in err.splitlines(): s = ERROR_RE.search(line) if s: return s.group(0) return "" def getOutputString(out): if not out: return "" output = "" for line in out.splitlines(): if OUTPUT_SKIPPED_RE.search(line): continue if line.strip("\t ") == "": continue output += line.lstrip("\t ") return output def should_add_line_to_output(input_line, prefix_set, mc_mode): # special check line if mc_mode == "dasm" and ERROR_CHECK_RE.search(input_line): return False else: return common.should_add_line_to_output( input_line, prefix_set, comment_marker=COMMENT[mc_mode] ) def getStdCheckLine(prefix, output, mc_mode): o = "" for line in output.splitlines(): o += COMMENT[mc_mode] + " " + prefix + ": " + line + "\n" return o def getErrCheckLine(prefix, output, mc_mode, line_offset=1): return ( COMMENT[mc_mode] + " " + prefix + ": " + ":[[@LINE-{}]]".format(line_offset) + output + "\n" ) def main(): parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( "--llvm-mc-binary", default=None, help='The "mc" binary to use to generate the test case', ) parser.add_argument( "--tool", default=None, help="Treat the given tool name as an mc-like tool for which check lines should be generated", ) parser.add_argument( "--default-march", default=None, help="Set a default -march for when neither triple nor arch are found in a RUN line", ) parser.add_argument( "--unique", action="store_true", default=False, help="remove duplicated test line if found", ) parser.add_argument( "--sort", action="store_true", default=False, help="sort testline in alphabetic order (keep run-lines on top), this option could be dangerous as it" "could change the order of lines that are not expected", ) parser.add_argument("tests", nargs="+") initial_args = common.parse_commandline_args(parser) script_name = os.path.basename(__file__) for ti in common.itertests( initial_args.tests, parser, script_name="utils/" + script_name ): if ti.path.endswith(".s"): mc_mode = "asm" elif ti.path.endswith(".txt"): mc_mode = "dasm" if ti.args.sort: print("sorting with dasm(.txt) file is not supported!") return -1 else: common.warn("Expected .s and .txt, Skipping file : ", ti.path) continue triple_in_ir = None for l in ti.input_lines: m = common.TRIPLE_IR_RE.match(l) if m: triple_in_ir = m.groups()[0] break run_list = [] for l in ti.run_lines: if "|" not in l: common.warn("Skipping unparsable RUN line: " + l) continue commands = [cmd.strip() for cmd in l.split("|")] assert len(commands) >= 2 mc_cmd = " | ".join(commands[:-1]) filecheck_cmd = commands[-1] # special handling for negating exit status # if not is used in runline, disable rc check, since # the command might or might not # return non-zero code on a single line run check_rc = True mc_cmd_args = mc_cmd.strip().split() if mc_cmd_args[0] == "not": check_rc = False mc_tool = mc_cmd_args[1] mc_cmd = mc_cmd[len(mc_cmd_args[0]) :].strip() else: mc_tool = mc_cmd_args[0] triple_in_cmd = None m = common.TRIPLE_ARG_RE.search(mc_cmd) if m: triple_in_cmd = m.groups()[0] march_in_cmd = ti.args.default_march m = common.MARCH_ARG_RE.search(mc_cmd) if m: march_in_cmd = m.groups()[0] common.verify_filecheck_prefixes(filecheck_cmd) mc_like_tools = mc_LIKE_TOOLS[:] if ti.args.tool: mc_like_tools.append(ti.args.tool) if mc_tool not in mc_like_tools: common.warn("Skipping non-mc RUN line: " + l) continue if not filecheck_cmd.startswith("FileCheck "): common.warn("Skipping non-FileChecked RUN line: " + l) continue mc_cmd_args = mc_cmd[len(mc_tool) :].strip() mc_cmd_args = mc_cmd_args.replace("< %s", "").replace("%s", "").strip() check_prefixes = common.get_check_prefixes(filecheck_cmd) run_list.append( ( check_prefixes, mc_tool, check_rc, mc_cmd_args, triple_in_cmd, march_in_cmd, ) ) # find all test line from input testlines = [l for l in ti.input_lines if isTestLine(l, mc_mode)] # remove duplicated lines to save running time testlines = list(dict.fromkeys(testlines)) common.debug("Valid test line found: ", len(testlines)) run_list_size = len(run_list) testnum = len(testlines) raw_output = [] raw_prefixes = [] for ( prefixes, mc_tool, check_rc, mc_args, triple_in_cmd, march_in_cmd, ) in run_list: common.debug("Extracted mc cmd:", mc_tool, mc_args) common.debug("Extracted FileCheck prefixes:", str(prefixes)) common.debug("Extracted triple :", str(triple_in_cmd)) common.debug("Extracted march:", str(march_in_cmd)) triple = triple_in_cmd or triple_in_ir if not triple: triple = common.get_triple_from_march(march_in_cmd) raw_output.append([]) for line in testlines: # get output for each testline out = invoke_tool( ti.args.llvm_mc_binary or mc_tool, check_rc, mc_args, line, verbose=ti.args.verbose, ) raw_output[-1].append(out) common.debug("Collect raw tool lines:", str(len(raw_output[-1]))) raw_prefixes.append(prefixes) output_lines = [] generated_prefixes = {} used_prefixes = set() prefix_set = set([prefix for p in run_list for prefix in p[0]]) common.debug("Rewriting FileCheck prefixes:", str(prefix_set)) for test_id in range(testnum): input_line = testlines[test_id] # a {prefix : output, [runid] } dict # insert output to a prefix-key dict, and do a max sorting # to select the most-used prefix which share the same output string p_dict = {} for run_id in range(run_list_size): out = raw_output[run_id][test_id] if hasErr(out): o = getErrString(out) else: o = getOutputString(out) prefixes = raw_prefixes[run_id] for p in prefixes: if p not in p_dict: p_dict[p] = o, [run_id] else: if p_dict[p] == (None, []): continue prev_o, run_ids = p_dict[p] if o == prev_o: run_ids.append(run_id) p_dict[p] = o, run_ids else: # conflict, discard p_dict[p] = None, [] p_dict_sorted = dict( sorted(p_dict.items(), key=lambda item: -len(item[1][1])) ) # prefix is selected and generated with most shared output lines # each run_id can only be used once gen_prefix = "" used_runid = set() # line number diff between generated prefix and testline line_offset = 1 for prefix, tup in p_dict_sorted.items(): o, run_ids = tup if len(run_ids) == 0: continue skip = False for i in run_ids: if i in used_runid: skip = True else: used_runid.add(i) if not skip: used_prefixes.add(prefix) if hasErr(o): newline = getErrCheckLine(prefix, o, mc_mode, line_offset) else: newline = getStdCheckLine(prefix, o, mc_mode) if newline: gen_prefix += newline line_offset += 1 generated_prefixes[input_line] = gen_prefix.rstrip("\n") # write output for input_info in ti.iterlines(output_lines): input_line = input_info.line if input_line in testlines: output_lines.append(input_line) output_lines.append(generated_prefixes[input_line]) elif should_add_line_to_output(input_line, prefix_set, mc_mode): output_lines.append(input_line) if ti.args.unique or ti.args.sort: # split with double newlines test_units = "\n".join(output_lines).split("\n\n") # select the key line for each test unit test_dic = {} for unit in test_units: lines = unit.split("\n") for l in lines: # if contains multiple lines, use # the first testline or runline as key if isTestLine(l, mc_mode): test_dic[unit] = l break if isRunLine(l): test_dic[unit] = l break # unique if ti.args.unique: new_test_units = [] written_lines = set() for unit in test_units: # if not testline/runline, we just add it if unit not in test_dic: new_test_units.append(unit) else: if test_dic[unit] in written_lines: common.debug("Duplicated test skipped: ", unit) continue written_lines.add(test_dic[unit]) new_test_units.append(unit) test_units = new_test_units # sort if ti.args.sort: def getkey(l): # find key of test unit, otherwise use first line if l in test_dic: line = test_dic[l] else: line = l.split("\n")[0] # runline placed on the top return (not isRunLine(line), line) test_units = sorted(test_units, key=getkey) # join back to be output string output_lines = "\n\n".join(test_units).split("\n") # output if ti.args.gen_unused_prefix_body: output_lines.extend( ti.get_checks_for_unused_prefixes(run_list, used_prefixes) ) common.debug("Writing %d lines to %s..." % (len(output_lines), ti.path)) with open(ti.path, "wb") as f: f.writelines(["{}\n".format(l).encode("utf-8") for l in output_lines]) if __name__ == "__main__": main()