mirror of
https://github.com/llvm/llvm-project.git
synced 2025-04-25 17:46:04 +00:00

We currently display a list of all minimal cycles, but it's useful to be able to see the big picture impact of these cycles by merging them all together into groups of interconnected components. Because the cycle discovery algorithm only considers "minimal" cycles, it discards all information for dependencies which are not considered part of the minimal cycle. So all we know is that the components of each island definitely all depend on each other but it's still possible that there are hidden dependencies due to transitive includes. The cycle list should still be the authoritative reference for deciding where the easiest places to break cycles are, though. llvm-svn: 298530
195 lines
6.7 KiB
Python
195 lines
6.7 KiB
Python
import argparse
|
|
import itertools
|
|
import os
|
|
import re
|
|
import sys
|
|
|
|
from use_lldb_suite import lldb_root
|
|
|
|
parser = argparse.ArgumentParser(
|
|
description='Analyze LLDB project #include dependencies.')
|
|
parser.add_argument('--show-counts', default=False, action='store_true',
|
|
help='When true, show the number of dependencies from each subproject')
|
|
parser.add_argument('--discover-cycles', default=False, action='store_true',
|
|
help='When true, find and display all project dependency cycles. Note,'
|
|
'this option is very slow')
|
|
|
|
args = parser.parse_args()
|
|
|
|
src_dir = os.path.join(lldb_root, "source")
|
|
inc_dir = os.path.join(lldb_root, "include")
|
|
|
|
src_map = {}
|
|
|
|
include_regex = re.compile('#include \"((lldb|Plugins|clang)(.*/)+).*\"')
|
|
|
|
def is_sublist(small, big):
|
|
it = iter(big)
|
|
return all(c in it for c in small)
|
|
|
|
def normalize_host(str):
|
|
if str.startswith("lldb/Host"):
|
|
return "lldb/Host"
|
|
if str.startswith("Plugins"):
|
|
return "lldb/" + str
|
|
if str.startswith("lldb/../../source"):
|
|
return str.replace("lldb/../../source", "lldb")
|
|
return str
|
|
|
|
def scan_deps(this_dir, file):
|
|
global src_map
|
|
deps = {}
|
|
this_dir = normalize_host(this_dir)
|
|
if this_dir in src_map:
|
|
deps = src_map[this_dir]
|
|
|
|
with open(file) as f:
|
|
for line in list(f):
|
|
m = include_regex.match(line)
|
|
if m is None:
|
|
continue
|
|
relative = m.groups()[0].rstrip("/")
|
|
if relative == this_dir:
|
|
continue
|
|
relative = normalize_host(relative)
|
|
if relative in deps:
|
|
deps[relative] += 1
|
|
elif relative != this_dir:
|
|
deps[relative] = 1
|
|
if this_dir not in src_map and len(deps) > 0:
|
|
src_map[this_dir] = deps
|
|
|
|
for (base, dirs, files) in os.walk(inc_dir):
|
|
dir = os.path.basename(base)
|
|
relative = os.path.relpath(base, inc_dir)
|
|
inc_files = filter(lambda x : os.path.splitext(x)[1] in [".h"], files)
|
|
relative = relative.replace("\\", "/")
|
|
for inc in inc_files:
|
|
inc_path = os.path.join(base, inc)
|
|
scan_deps(relative, inc_path)
|
|
|
|
for (base, dirs, files) in os.walk(src_dir):
|
|
dir = os.path.basename(base)
|
|
relative = os.path.relpath(base, src_dir)
|
|
src_files = filter(lambda x : os.path.splitext(x)[1] in [".cpp", ".h", ".mm"], files)
|
|
norm_base_path = os.path.normpath(os.path.join("lldb", relative))
|
|
norm_base_path = norm_base_path.replace("\\", "/")
|
|
for src in src_files:
|
|
src_path = os.path.join(base, src)
|
|
scan_deps(norm_base_path, src_path)
|
|
pass
|
|
|
|
def is_existing_cycle(path, cycles):
|
|
# If we have a cycle like # A -> B -> C (with an implicit -> A at the end)
|
|
# then we don't just want to check for an occurrence of A -> B -> C in the
|
|
# list of known cycles, but every possible rotation of A -> B -> C. For
|
|
# example, if we previously encountered B -> C -> A (with an implicit -> B
|
|
# at the end), then A -> B -> C is also a cycle. This is an important
|
|
# optimization which reduces the search space by multiple orders of
|
|
# magnitude.
|
|
for i in xrange(0,len(path)):
|
|
if any(is_sublist(x, path) for x in cycles):
|
|
return True
|
|
path = [path[-1]] + path[0:-1]
|
|
return False
|
|
|
|
def expand(path_queue, path_lengths, cycles, src_map):
|
|
# We do a breadth first search, to make sure we visit all paths in order
|
|
# of ascending length. This is an important optimization to make sure that
|
|
# short cycles are discovered first, which will allow us to discard longer
|
|
# cycles which grow the search space exponentially the longer they get.
|
|
while len(path_queue) > 0:
|
|
cur_path = path_queue.pop(0)
|
|
if is_existing_cycle(cur_path, cycles):
|
|
continue
|
|
|
|
next_len = path_lengths.pop(0) + 1
|
|
last_component = cur_path[-1]
|
|
|
|
for item in src_map[last_component]:
|
|
if item.startswith("clang"):
|
|
continue
|
|
|
|
if item in cur_path:
|
|
# This is a cycle. Minimize it and then check if the result is
|
|
# already in the list of cycles. Insert it (or not) and then
|
|
# exit.
|
|
new_index = cur_path.index(item)
|
|
cycle = cur_path[new_index:]
|
|
if not is_existing_cycle(cycle, cycles):
|
|
cycles.append(cycle)
|
|
continue
|
|
|
|
path_lengths.append(next_len)
|
|
path_queue.append(cur_path + [item])
|
|
pass
|
|
|
|
cycles = []
|
|
|
|
path_queue = [[x] for x in src_map.iterkeys()]
|
|
path_lens = [1] * len(path_queue)
|
|
|
|
items = list(src_map.iteritems())
|
|
items.sort(lambda A, B : cmp(A[0], B[0]))
|
|
|
|
for (path, deps) in items:
|
|
print path + ":"
|
|
sorted_deps = list(deps.iteritems())
|
|
if args.show_counts:
|
|
sorted_deps.sort(lambda A, B: cmp(A[1], B[1]))
|
|
for dep in sorted_deps:
|
|
print "\t{} [{}]".format(dep[0], dep[1])
|
|
else:
|
|
sorted_deps.sort(lambda A, B: cmp(A[0], B[0]))
|
|
for dep in sorted_deps:
|
|
print "\t{}".format(dep[0])
|
|
|
|
def iter_cycles(cycles):
|
|
global src_map
|
|
for cycle in cycles:
|
|
cycle.append(cycle[0])
|
|
zipper = list(zip(cycle[0:-1], cycle[1:]))
|
|
result = [(x, src_map[x][y], y) for (x,y) in zipper]
|
|
total = 0
|
|
smallest = result[0][1]
|
|
for (first, value, last) in result:
|
|
total += value
|
|
smallest = min(smallest, value)
|
|
yield (total, smallest, result)
|
|
|
|
if args.discover_cycles:
|
|
print "Analyzing cycles..."
|
|
|
|
expand(path_queue, path_lens, cycles, src_map)
|
|
|
|
average = sum([len(x)+1 for x in cycles]) / len(cycles)
|
|
|
|
print "Found {} cycles. Average cycle length = {}.".format(len(cycles), average)
|
|
if args.show_counts:
|
|
counted = list(iter_cycles(cycles))
|
|
counted.sort(lambda A, B: cmp(A[0], B[0]))
|
|
for (total, smallest, cycle) in counted:
|
|
sys.stdout.write("{} deps to break: ".format(total))
|
|
sys.stdout.write(cycle[0][0])
|
|
for (first, count, last) in cycle:
|
|
sys.stdout.write(" [{}->] {}".format(count, last))
|
|
sys.stdout.write("\n")
|
|
else:
|
|
for cycle in cycles:
|
|
cycle.append(cycle[0])
|
|
print " -> ".join(cycle)
|
|
|
|
print "Analyzing islands..."
|
|
islands = []
|
|
for cycle in cycles:
|
|
this_cycle = set(cycle)
|
|
disjoints = [x for x in islands if this_cycle.isdisjoint(x)]
|
|
overlaps = [x for x in islands if not this_cycle.isdisjoint(x)]
|
|
islands = disjoints + [set.union(this_cycle, *overlaps)]
|
|
print "Found {} disjoint cycle islands...".format(len(islands))
|
|
for island in islands:
|
|
print "Island ({} elements)".format(len(island))
|
|
for node in island:
|
|
print " {0}".format(node)
|
|
sys.stdout.flush()
|
|
pass |