[ELF] Replace inExpr with lexState. NFC

We may add another state State::Wild to behave more lik GNU ld.
This commit is contained in:
Fangrui Song 2025-02-01 15:49:08 -08:00
parent 14776c6d13
commit 5c3c0a8cec
3 changed files with 31 additions and 24 deletions

View File

@ -105,7 +105,7 @@ void ScriptLexer::lex() {
curBuf = buffers.pop_back_val();
continue;
}
curTokState = inExpr;
curTokState = lexState;
// Quoted token. Note that double-quote characters are parts of a token
// because, in a glob match context, only unquoted tokens are interpreted
@ -142,7 +142,13 @@ void ScriptLexer::lex() {
// C-like languages, so that you can write "file-name.cpp" as one bare
// token.
size_t pos;
if (inExpr) {
switch (lexState) {
case State::Script:
pos = s.find_first_not_of(
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
"0123456789_.$/\\~=+[]*?-!^:");
break;
case State::Expr:
pos = s.find_first_not_of(
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
"0123456789_.$");
@ -150,10 +156,7 @@ void ScriptLexer::lex() {
((s[0] == s[1] && strchr("<>&|", s[0])) ||
is_contained({"==", "!=", "<=", ">=", "<<", ">>"}, s.substr(0, 2))))
pos = 2;
} else {
pos = s.find_first_not_of(
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
"0123456789_.$/\\~=+[]*?-!^:");
break;
}
if (pos == 0)
@ -208,8 +211,8 @@ StringRef ScriptLexer::next() {
}
StringRef ScriptLexer::peek() {
// curTok is invalid if curTokState and inExpr mismatch.
if (curTok.size() && curTokState != inExpr) {
// curTok is invalid if curTokState and lexState mismatch.
if (curTok.size() && curTokState != lexState) {
curBuf.s = StringRef(curTok.data(), curBuf.s.end() - curTok.data());
curTok = {};
}

View File

@ -41,6 +41,11 @@ protected:
// Used to detect INCLUDE() cycles.
llvm::DenseSet<StringRef> activeFilenames;
enum class State {
Script,
Expr,
};
struct Token {
StringRef str;
explicit operator bool() const { return !str.empty(); }
@ -54,8 +59,9 @@ protected:
// expression state changes.
StringRef curTok;
size_t prevTokLine = 1;
// The inExpr state when curTok is cached.
bool curTokState = false;
// The lex state when curTok is cached.
State curTokState = State::Script;
State lexState = State::Script;
bool eof = false;
public:
@ -75,7 +81,6 @@ public:
MemoryBufferRef getCurrentMB();
std::vector<MemoryBufferRef> mbs;
bool inExpr = false;
private:
StringRef getLine();

View File

@ -289,7 +289,7 @@ void ScriptParser::readLinkerScript() {
void ScriptParser::readDefsym() {
if (errCount(ctx))
return;
inExpr = true;
SaveAndRestore saved(lexState, State::Expr);
StringRef name = readName();
expect("=");
Expr e = readExpr();
@ -954,8 +954,8 @@ bool ScriptParser::readSectionDirective(OutputSection *cmd, StringRef tok) {
// https://sourceware.org/binutils/docs/ld/Output-Section-Type.html
void ScriptParser::readSectionAddressType(OutputSection *cmd) {
if (consume("(")) {
// Temporarily set inExpr to support TYPE=<value> without spaces.
SaveAndRestore saved(inExpr, true);
// Temporarily set lexState to support TYPE=<value> without spaces.
SaveAndRestore saved(lexState, State::Expr);
if (readSectionDirective(cmd, peek()))
return;
cmd->addrExpr = readExpr();
@ -965,7 +965,7 @@ void ScriptParser::readSectionAddressType(OutputSection *cmd) {
}
if (consume("(")) {
SaveAndRestore saved(inExpr, true);
SaveAndRestore saved(lexState, State::Expr);
StringRef tok = peek();
if (!readSectionDirective(cmd, tok))
setError("unknown section directive: " + tok);
@ -1087,10 +1087,10 @@ OutputDesc *ScriptParser::readOutputSectionDescription(StringRef outSec) {
osec->phdrs = readOutputSectionPhdrs();
if (peek() == "=" || peek().starts_with("=")) {
inExpr = true;
lexState = State::Expr;
consume("=");
osec->filler = readFill();
inExpr = false;
lexState = State::Script;
}
// Consume optional comma following output section command.
@ -1162,7 +1162,7 @@ SymbolAssignment *ScriptParser::readAssignment(StringRef tok) {
bool savedSeenRelroEnd = ctx.script->seenRelroEnd;
const StringRef op = peek();
{
SaveAndRestore saved(inExpr, true);
SaveAndRestore saved(lexState, State::Expr);
if (op.starts_with("=")) {
// Support = followed by an expression without whitespace.
cmd = readSymbolAssignment(unquote(tok));
@ -1235,7 +1235,7 @@ SymbolAssignment *ScriptParser::readSymbolAssignment(StringRef name) {
Expr ScriptParser::readExpr() {
// Our lexer is context-aware. Set the in-expression bit so that
// they apply different tokenization rules.
SaveAndRestore saved(inExpr, true);
SaveAndRestore saved(lexState, State::Expr);
Expr e = readExpr1(readPrimary(), 0);
return e;
}
@ -1452,12 +1452,11 @@ std::pair<uint64_t, uint64_t> ScriptParser::readInputSectionFlags() {
StringRef ScriptParser::readParenName() {
expect("(");
bool orig = inExpr;
inExpr = false;
StringRef tok = readName();
inExpr = orig;
auto saved = std::exchange(lexState, State::Script);
StringRef name = readName();
lexState = saved;
expect(")");
return tok;
return name;
}
static void checkIfExists(LinkerScript &script, const OutputSection &osec,