mirror of
https://github.com/llvm/llvm-project.git
synced 2025-04-17 18:26:37 +00:00
[utils][filecheck-lint]: speedup filecheck_lint (#94191)
For example: clang\test\OpenMP\task_codegen.cpp: 0m29.570s -> 0m0.159s clang\test\Driver: 4m55.917s -> 1m48.053s Most win from big files. --------- Co-authored-by: klensy <nightouser@gmail.com>
This commit is contained in:
parent
7eaae4e6af
commit
42ebf3eaaf
@ -81,29 +81,40 @@ class FileRange:
|
||||
"""Stores the coordinates of a span on a single line within a file.
|
||||
|
||||
Attributes:
|
||||
line: the line number
|
||||
start_column: the (inclusive) column where the span starts
|
||||
end_column: the (inclusive) column where the span ends
|
||||
content: line str
|
||||
start_byte: the (inclusive) byte offset the span starts
|
||||
end_byte: the (inclusive) byte offset the span ends
|
||||
"""
|
||||
|
||||
line: int
|
||||
start_column: int
|
||||
end_column: int
|
||||
content: str
|
||||
start_byte: int
|
||||
end_byte: int
|
||||
|
||||
def __init__(
|
||||
self, content: str, start_byte: int, end_byte: int
|
||||
): # pylint: disable=g-doc-args
|
||||
"""Derives a span's coordinates based on a string and start/end bytes.
|
||||
"""
|
||||
Stores the coordinates of a span based on a string and start/end bytes.
|
||||
|
||||
`start_byte` and `end_byte` are assumed to be on the same line.
|
||||
"""
|
||||
content_before_span = content[:start_byte]
|
||||
self.line = content_before_span.count("\n") + 1
|
||||
self.start_column = start_byte - content_before_span.rfind("\n")
|
||||
self.end_column = self.start_column + (end_byte - start_byte - 1)
|
||||
self.content = content
|
||||
self.start_byte = start_byte
|
||||
self.end_byte = end_byte
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f"{self.line}:{self.start_column}-{self.end_column}"
|
||||
def as_str(self):
|
||||
"""
|
||||
Derives span from line and coordinates.
|
||||
|
||||
start_column: the (inclusive) column where the span starts
|
||||
end_column: the (inclusive) column where the span ends
|
||||
"""
|
||||
content_before_span = self.content[: self.start_byte]
|
||||
line = content_before_span.count("\n") + 1
|
||||
start_column = self.start_byte - content_before_span.rfind("\n")
|
||||
end_column = start_column + (self.end_byte - self.start_byte - 1)
|
||||
|
||||
return f"{line}:{start_column}-{end_column}"
|
||||
|
||||
|
||||
class Diagnostic:
|
||||
@ -134,7 +145,7 @@ class Diagnostic:
|
||||
self.fix = fix
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f"{self.filepath}:" + str(self.filerange) + f": {self.summary()}"
|
||||
return f"{self.filepath}:" + self.filerange.as_str() + f": {self.summary()}"
|
||||
|
||||
def summary(self) -> str:
|
||||
return (
|
||||
@ -228,7 +239,8 @@ def find_directive_typos(
|
||||
)
|
||||
|
||||
potential_directives = find_potential_directives(content)
|
||||
|
||||
# Cache score and best_match to skip recalculating.
|
||||
score_and_best_match_for_potential_directive = dict()
|
||||
for filerange, potential_directive in potential_directives:
|
||||
# TODO(bchetioui): match count directives more finely. We skip directives
|
||||
# starting with 'CHECK-COUNT-' for the moment as they require more complex
|
||||
@ -244,7 +256,16 @@ def find_directive_typos(
|
||||
if len(potential_directive) > max(map(len, all_directives)) + threshold:
|
||||
continue
|
||||
|
||||
score, best_match = find_best_match(potential_directive)
|
||||
if potential_directive not in score_and_best_match_for_potential_directive:
|
||||
score, best_match = find_best_match(potential_directive)
|
||||
score_and_best_match_for_potential_directive[potential_directive] = (
|
||||
score,
|
||||
best_match,
|
||||
)
|
||||
else:
|
||||
score, best_match = score_and_best_match_for_potential_directive[
|
||||
potential_directive
|
||||
]
|
||||
if score == 0: # This is an actual directive, ignore.
|
||||
continue
|
||||
elif score <= threshold and best_match not in _ignore:
|
||||
|
@ -49,27 +49,15 @@ class TestTypoDetection(unittest.TestCase):
|
||||
results = list(fcl.find_potential_directives(content))
|
||||
assert len(results) == 3
|
||||
pos, match = results[0]
|
||||
assert (
|
||||
pos.line == 1
|
||||
and pos.start_column == len("junk; ") + 1
|
||||
and pos.end_column == len(lines[0]) - 1
|
||||
)
|
||||
assert pos.as_str() == "1:7-11"
|
||||
assert match == "CHCK1"
|
||||
|
||||
pos, match = results[1]
|
||||
assert (
|
||||
pos.line == 2
|
||||
and pos.start_column == len("junk// ") + 1
|
||||
and pos.end_column == len(lines[1]) - 1
|
||||
)
|
||||
assert pos.as_str() == "2:8-12"
|
||||
assert match == "CHCK2"
|
||||
|
||||
pos, match = results[2]
|
||||
assert (
|
||||
pos.line == 3
|
||||
and pos.start_column == 1
|
||||
and pos.end_column == len(lines[2]) - 1
|
||||
)
|
||||
assert pos.as_str() == "3:1-10"
|
||||
assert match == "SOME CHCK3"
|
||||
|
||||
def test_levenshtein(self):
|
||||
|
Loading…
x
Reference in New Issue
Block a user