mirror of
https://github.com/llvm/llvm-project.git
synced 2025-04-29 17:06:07 +00:00

This fixes a build error introduced by https://reviews.llvm.org/D153587 when using an old version of GCC. See https://reviews.llvm.org/D153587#4644735 for details.
251 lines
7.9 KiB
C++
251 lines
7.9 KiB
C++
//===-- GlobPattern.cpp - Glob pattern matcher implementation -------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This file implements a glob pattern matcher.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "llvm/Support/GlobPattern.h"
|
|
#include "llvm/ADT/StringRef.h"
|
|
#include "llvm/Support/Errc.h"
|
|
|
|
using namespace llvm;
|
|
|
|
// Expands character ranges and returns a bitmap.
|
|
// For example, "a-cf-hz" is expanded to "abcfghz".
|
|
static Expected<BitVector> expand(StringRef S, StringRef Original) {
|
|
BitVector BV(256, false);
|
|
|
|
// Expand X-Y.
|
|
for (;;) {
|
|
if (S.size() < 3)
|
|
break;
|
|
|
|
uint8_t Start = S[0];
|
|
uint8_t End = S[2];
|
|
|
|
// If it doesn't start with something like X-Y,
|
|
// consume the first character and proceed.
|
|
if (S[1] != '-') {
|
|
BV[Start] = true;
|
|
S = S.substr(1);
|
|
continue;
|
|
}
|
|
|
|
// It must be in the form of X-Y.
|
|
// Validate it and then interpret the range.
|
|
if (Start > End)
|
|
return make_error<StringError>("invalid glob pattern: " + Original,
|
|
errc::invalid_argument);
|
|
|
|
for (int C = Start; C <= End; ++C)
|
|
BV[(uint8_t)C] = true;
|
|
S = S.substr(3);
|
|
}
|
|
|
|
for (char C : S)
|
|
BV[(uint8_t)C] = true;
|
|
return BV;
|
|
}
|
|
|
|
// Identify brace expansions in S and return the list of patterns they expand
|
|
// into.
|
|
static Expected<SmallVector<std::string, 1>>
|
|
parseBraceExpansions(StringRef S, std::optional<size_t> MaxSubPatterns) {
|
|
SmallVector<std::string> SubPatterns = {S.str()};
|
|
if (!MaxSubPatterns || !S.contains('{'))
|
|
return std::move(SubPatterns);
|
|
|
|
struct BraceExpansion {
|
|
size_t Start;
|
|
size_t Length;
|
|
SmallVector<StringRef, 2> Terms;
|
|
};
|
|
SmallVector<BraceExpansion, 0> BraceExpansions;
|
|
|
|
BraceExpansion *CurrentBE = nullptr;
|
|
size_t TermBegin;
|
|
for (size_t I = 0, E = S.size(); I != E; ++I) {
|
|
if (S[I] == '[') {
|
|
I = S.find(']', I + 2);
|
|
if (I == std::string::npos)
|
|
return make_error<StringError>("invalid glob pattern, unmatched '['",
|
|
errc::invalid_argument);
|
|
} else if (S[I] == '{') {
|
|
if (CurrentBE)
|
|
return make_error<StringError>(
|
|
"nested brace expansions are not supported",
|
|
errc::invalid_argument);
|
|
CurrentBE = &BraceExpansions.emplace_back();
|
|
CurrentBE->Start = I;
|
|
TermBegin = I + 1;
|
|
} else if (S[I] == ',') {
|
|
if (!CurrentBE)
|
|
continue;
|
|
CurrentBE->Terms.push_back(S.substr(TermBegin, I - TermBegin));
|
|
TermBegin = I + 1;
|
|
} else if (S[I] == '}') {
|
|
if (!CurrentBE)
|
|
continue;
|
|
if (CurrentBE->Terms.empty())
|
|
return make_error<StringError>(
|
|
"empty or singleton brace expansions are not supported",
|
|
errc::invalid_argument);
|
|
CurrentBE->Terms.push_back(S.substr(TermBegin, I - TermBegin));
|
|
CurrentBE->Length = I - CurrentBE->Start + 1;
|
|
CurrentBE = nullptr;
|
|
} else if (S[I] == '\\') {
|
|
if (++I == E)
|
|
return make_error<StringError>("invalid glob pattern, stray '\\'",
|
|
errc::invalid_argument);
|
|
}
|
|
}
|
|
if (CurrentBE)
|
|
return make_error<StringError>("incomplete brace expansion",
|
|
errc::invalid_argument);
|
|
|
|
size_t NumSubPatterns = 1;
|
|
for (auto &BE : BraceExpansions) {
|
|
if (NumSubPatterns > std::numeric_limits<size_t>::max() / BE.Terms.size()) {
|
|
NumSubPatterns = std::numeric_limits<size_t>::max();
|
|
break;
|
|
}
|
|
NumSubPatterns *= BE.Terms.size();
|
|
}
|
|
if (NumSubPatterns > *MaxSubPatterns)
|
|
return make_error<StringError>("too many brace expansions",
|
|
errc::invalid_argument);
|
|
// Replace brace expansions in reverse order so that we don't invalidate
|
|
// earlier start indices
|
|
for (auto &BE : reverse(BraceExpansions)) {
|
|
SmallVector<std::string> OrigSubPatterns;
|
|
std::swap(SubPatterns, OrigSubPatterns);
|
|
for (StringRef Term : BE.Terms)
|
|
for (StringRef Orig : OrigSubPatterns)
|
|
SubPatterns.emplace_back(Orig).replace(BE.Start, BE.Length, Term);
|
|
}
|
|
return std::move(SubPatterns);
|
|
}
|
|
|
|
Expected<GlobPattern>
|
|
GlobPattern::create(StringRef S, std::optional<size_t> MaxSubPatterns) {
|
|
GlobPattern Pat;
|
|
|
|
// Store the prefix that does not contain any metacharacter.
|
|
size_t PrefixSize = S.find_first_of("?*[{\\");
|
|
Pat.Prefix = S.substr(0, PrefixSize);
|
|
if (PrefixSize == std::string::npos)
|
|
return Pat;
|
|
S = S.substr(PrefixSize);
|
|
|
|
SmallVector<std::string, 1> SubPats;
|
|
if (auto Err = parseBraceExpansions(S, MaxSubPatterns).moveInto(SubPats))
|
|
return std::move(Err);
|
|
for (StringRef SubPat : SubPats) {
|
|
auto SubGlobOrErr = SubGlobPattern::create(SubPat);
|
|
if (!SubGlobOrErr)
|
|
return SubGlobOrErr.takeError();
|
|
Pat.SubGlobs.push_back(*SubGlobOrErr);
|
|
}
|
|
|
|
return Pat;
|
|
}
|
|
|
|
Expected<GlobPattern::SubGlobPattern>
|
|
GlobPattern::SubGlobPattern::create(StringRef S) {
|
|
SubGlobPattern Pat;
|
|
|
|
// Parse brackets.
|
|
Pat.Pat.assign(S.begin(), S.end());
|
|
for (size_t I = 0, E = S.size(); I != E; ++I) {
|
|
if (S[I] == '[') {
|
|
// ']' is allowed as the first character of a character class. '[]' is
|
|
// invalid. So, just skip the first character.
|
|
++I;
|
|
size_t J = S.find(']', I + 1);
|
|
if (J == StringRef::npos)
|
|
return make_error<StringError>("invalid glob pattern, unmatched '['",
|
|
errc::invalid_argument);
|
|
StringRef Chars = S.substr(I, J - I);
|
|
bool Invert = S[I] == '^' || S[I] == '!';
|
|
Expected<BitVector> BV =
|
|
Invert ? expand(Chars.substr(1), S) : expand(Chars, S);
|
|
if (!BV)
|
|
return BV.takeError();
|
|
if (Invert)
|
|
BV->flip();
|
|
Pat.Brackets.push_back(Bracket{J + 1, std::move(*BV)});
|
|
I = J;
|
|
} else if (S[I] == '\\') {
|
|
if (++I == E)
|
|
return make_error<StringError>("invalid glob pattern, stray '\\'",
|
|
errc::invalid_argument);
|
|
}
|
|
}
|
|
return Pat;
|
|
}
|
|
|
|
bool GlobPattern::match(StringRef S) const {
|
|
if (!S.consume_front(Prefix))
|
|
return false;
|
|
if (SubGlobs.empty() && S.empty())
|
|
return true;
|
|
for (auto &Glob : SubGlobs)
|
|
if (Glob.match(S))
|
|
return true;
|
|
return false;
|
|
}
|
|
|
|
// Factor the pattern into segments split by '*'. The segment is matched
|
|
// sequentianlly by finding the first occurrence past the end of the previous
|
|
// match.
|
|
bool GlobPattern::SubGlobPattern::match(StringRef Str) const {
|
|
const char *P = Pat.data(), *SegmentBegin = nullptr, *S = Str.data(),
|
|
*SavedS = S;
|
|
const char *const PEnd = P + Pat.size(), *const End = S + Str.size();
|
|
size_t B = 0, SavedB = 0;
|
|
while (S != End) {
|
|
if (P == PEnd)
|
|
;
|
|
else if (*P == '*') {
|
|
// The non-* substring on the left of '*' matches the tail of S. Save the
|
|
// positions to be used by backtracking if we see a mismatch later.
|
|
SegmentBegin = ++P;
|
|
SavedS = S;
|
|
SavedB = B;
|
|
continue;
|
|
} else if (*P == '[') {
|
|
if (Brackets[B].Bytes[uint8_t(*S)]) {
|
|
P = Pat.data() + Brackets[B++].NextOffset;
|
|
++S;
|
|
continue;
|
|
}
|
|
} else if (*P == '\\') {
|
|
if (*++P == *S) {
|
|
++P;
|
|
++S;
|
|
continue;
|
|
}
|
|
} else if (*P == *S || *P == '?') {
|
|
++P;
|
|
++S;
|
|
continue;
|
|
}
|
|
if (!SegmentBegin)
|
|
return false;
|
|
// We have seen a '*'. Backtrack to the saved positions. Shift the S
|
|
// position to probe the next starting position in the segment.
|
|
P = SegmentBegin;
|
|
S = ++SavedS;
|
|
B = SavedB;
|
|
}
|
|
// All bytes in Str have been matched. Return true if the rest part of Pat is
|
|
// empty or contains only '*'.
|
|
return getPat().find_first_not_of('*', P - Pat.data()) == std::string::npos;
|
|
}
|