mirror of
https://github.com/llvm/llvm-project.git
synced 2025-04-17 06:56:44 +00:00

Previously we were calling glrRecover() ad-hoc at the end of input. Two main problems with this: - glrRecover() on two separate code paths is inelegant - We may have to recover several times in succession (e.g. to exit from nested scopes), so we need a loop at end-of-file Having an actual shift action for an EOF terminal allows us to handle both concerns in the main shift/recover/reduce loop. This revealed a recovery design bug where recovery could enter a loop by repeatedly choosing the same parent to identically recover from. Addressed this by allowing each node to be used as a recovery base once. Differential Revision: https://reviews.llvm.org/D130550
181 lines
6.5 KiB
C++
181 lines
6.5 KiB
C++
//===--- ForestTest.cpp - Test Forest dump ----------------------*- C++ -*-===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "clang-pseudo/Forest.h"
|
|
#include "clang-pseudo/Token.h"
|
|
#include "clang/Basic/LangOptions.h"
|
|
#include "llvm/ADT/StringRef.h"
|
|
#include "gmock/gmock.h"
|
|
#include "gtest/gtest.h"
|
|
#include <vector>
|
|
|
|
namespace clang {
|
|
namespace pseudo {
|
|
namespace {
|
|
|
|
// FIXME: extract to a TestGrammar class to allow code sharing among tests.
|
|
class ForestTest : public ::testing::Test {
|
|
public:
|
|
void build(llvm::StringRef BNF) {
|
|
Diags.clear();
|
|
G = Grammar::parseBNF(BNF, Diags);
|
|
}
|
|
|
|
SymbolID symbol(llvm::StringRef Name) const {
|
|
for (unsigned I = 0; I < NumTerminals; ++I)
|
|
if (G.table().Terminals[I] == Name)
|
|
return tokenSymbol(static_cast<tok::TokenKind>(I));
|
|
for (SymbolID ID = 0; ID < G.table().Nonterminals.size(); ++ID)
|
|
if (G.table().Nonterminals[ID].Name == Name)
|
|
return ID;
|
|
ADD_FAILURE() << "No such symbol found: " << Name;
|
|
return 0;
|
|
}
|
|
|
|
RuleID ruleFor(llvm::StringRef NonterminalName) const {
|
|
auto RuleRange = G.table().Nonterminals[symbol(NonterminalName)].RuleRange;
|
|
if (RuleRange.End - RuleRange.Start == 1)
|
|
return G.table().Nonterminals[symbol(NonterminalName)].RuleRange.Start;
|
|
ADD_FAILURE() << "Expected a single rule for " << NonterminalName
|
|
<< ", but it has " << RuleRange.End - RuleRange.Start
|
|
<< " rule!\n";
|
|
return 0;
|
|
}
|
|
|
|
protected:
|
|
Grammar G;
|
|
std::vector<std::string> Diags;
|
|
};
|
|
|
|
TEST_F(ForestTest, DumpBasic) {
|
|
build(R"cpp(
|
|
_ := add-expression EOF
|
|
add-expression := id-expression + id-expression
|
|
id-expression := IDENTIFIER
|
|
)cpp");
|
|
ASSERT_TRUE(Diags.empty());
|
|
ForestArena Arena;
|
|
const auto &TS =
|
|
cook(lex("a + b", clang::LangOptions()), clang::LangOptions());
|
|
|
|
auto T = Arena.createTerminals(TS);
|
|
ASSERT_EQ(T.size(), 4u);
|
|
const auto *Left = &Arena.createSequence(
|
|
symbol("id-expression"), ruleFor("id-expression"), {&T.front()});
|
|
const auto *Right = &Arena.createSequence(symbol("id-expression"),
|
|
ruleFor("id-expression"), {&T[2]});
|
|
|
|
const auto *Add =
|
|
&Arena.createSequence(symbol("add-expression"), ruleFor("add-expression"),
|
|
{Left, &T[1], Right});
|
|
EXPECT_EQ(Add->dumpRecursive(G, true),
|
|
"[ 0, end) add-expression := id-expression + id-expression\n"
|
|
"[ 0, 1) ├─id-expression~IDENTIFIER := tok[0]\n"
|
|
"[ 1, 2) ├─+ := tok[1]\n"
|
|
"[ 2, end) └─id-expression~IDENTIFIER := tok[2]\n");
|
|
EXPECT_EQ(Add->dumpRecursive(G, false),
|
|
"[ 0, end) add-expression := id-expression + id-expression\n"
|
|
"[ 0, 1) ├─id-expression := IDENTIFIER\n"
|
|
"[ 0, 1) │ └─IDENTIFIER := tok[0]\n"
|
|
"[ 1, 2) ├─+ := tok[1]\n"
|
|
"[ 2, end) └─id-expression := IDENTIFIER\n"
|
|
"[ 2, end) └─IDENTIFIER := tok[2]\n");
|
|
}
|
|
|
|
TEST_F(ForestTest, DumpAmbiguousAndRefs) {
|
|
build(R"cpp(
|
|
_ := type EOF
|
|
type := class-type # rule 4
|
|
type := enum-type # rule 5
|
|
class-type := shared-type
|
|
enum-type := shared-type
|
|
shared-type := IDENTIFIER)cpp");
|
|
ASSERT_TRUE(Diags.empty());
|
|
ForestArena Arena;
|
|
const auto &TS = cook(lex("abc", clang::LangOptions()), clang::LangOptions());
|
|
|
|
auto Terminals = Arena.createTerminals(TS);
|
|
ASSERT_EQ(Terminals.size(), 2u);
|
|
|
|
const auto *SharedType = &Arena.createSequence(
|
|
symbol("shared-type"), ruleFor("shared-type"), {Terminals.begin()});
|
|
const auto *ClassType = &Arena.createSequence(
|
|
symbol("class-type"), ruleFor("class-type"), {SharedType});
|
|
const auto *EnumType = &Arena.createSequence(
|
|
symbol("enum-type"), ruleFor("enum-type"), {SharedType});
|
|
const auto *Alternative1 =
|
|
&Arena.createSequence(symbol("type"), /*RuleID=*/4, {ClassType});
|
|
const auto *Alternative2 =
|
|
&Arena.createSequence(symbol("type"), /*RuleID=*/5, {EnumType});
|
|
const auto *Type =
|
|
&Arena.createAmbiguous(symbol("type"), {Alternative1, Alternative2});
|
|
EXPECT_EQ(Type->dumpRecursive(G),
|
|
"[ 0, end) type := <ambiguous>\n"
|
|
"[ 0, end) ├─type := class-type\n"
|
|
"[ 0, end) │ └─class-type := shared-type\n"
|
|
"[ 0, end) │ └─shared-type := IDENTIFIER #1\n"
|
|
"[ 0, end) │ └─IDENTIFIER := tok[0]\n"
|
|
"[ 0, end) └─type := enum-type\n"
|
|
"[ 0, end) └─enum-type := shared-type\n"
|
|
"[ 0, end) └─shared-type =#1\n");
|
|
}
|
|
|
|
TEST_F(ForestTest, DumpAbbreviatedShared) {
|
|
build(R"cpp(
|
|
_ := A
|
|
A := B
|
|
B := *
|
|
)cpp");
|
|
|
|
ForestArena Arena;
|
|
const auto *Star = &Arena.createTerminal(tok::star, 0);
|
|
|
|
const auto *B = &Arena.createSequence(symbol("B"), ruleFor("B"), {Star});
|
|
// We have two identical (but distinct) A nodes.
|
|
// The GLR parser would never produce this, but it makes the example simpler.
|
|
const auto *A1 = &Arena.createSequence(symbol("A"), ruleFor("A"), {B});
|
|
const auto *A2 = &Arena.createSequence(symbol("A"), ruleFor("A"), {B});
|
|
const auto *A = &Arena.createAmbiguous(symbol("A"), {A1, A2});
|
|
|
|
// We must not abbreviate away shared nodes: if we show A~* there's no way to
|
|
// show that the intermediate B node is shared between A1 and A2.
|
|
EXPECT_EQ(A->dumpRecursive(G, /*Abbreviate=*/true),
|
|
"[ 0, end) A := <ambiguous>\n"
|
|
"[ 0, end) ├─A~B := * #1\n"
|
|
"[ 0, end) │ └─* := tok[0]\n"
|
|
"[ 0, end) └─A~B =#1\n");
|
|
}
|
|
|
|
TEST_F(ForestTest, Iteration) {
|
|
// Z
|
|
// / \
|
|
// X Y
|
|
// |\|
|
|
// A B
|
|
ForestArena Arena;
|
|
const auto *A = &Arena.createTerminal(tok::identifier, 0);
|
|
const auto *B = &Arena.createOpaque(1, 0);
|
|
const auto *X = &Arena.createSequence(2, 1, {A, B});
|
|
const auto *Y = &Arena.createSequence(2, 2, {B});
|
|
const auto *Z = &Arena.createAmbiguous(2, {X, Y});
|
|
|
|
std::vector<const ForestNode *> Nodes;
|
|
for (const ForestNode &N : Z->descendants())
|
|
Nodes.push_back(&N);
|
|
EXPECT_THAT(Nodes, testing::UnorderedElementsAre(A, B, X, Y, Z));
|
|
|
|
Nodes.clear();
|
|
for (const ForestNode &N : X->descendants())
|
|
Nodes.push_back(&N);
|
|
EXPECT_THAT(Nodes, testing::UnorderedElementsAre(X, A, B));
|
|
}
|
|
|
|
} // namespace
|
|
} // namespace pseudo
|
|
} // namespace clang
|