Start implementing error recovery, this implements test/Parser/recovery-1.c

llvm-svn: 38833
This commit is contained in:
Chris Lattner 2006-08-06 21:55:29 +00:00
parent 944bde95ef
commit eec40f9990
4 changed files with 199 additions and 43 deletions

View File

@ -332,22 +332,14 @@ void Parser::ParseDirectDeclarator(Declarator &D) {
// direct-declarator: '(' attributes declarator ')' [TODO]
// Example: 'char (*X)' or 'int (*XX)(void)'
ParseParenDeclarator(D);
} else if (Tok.getKind() == tok::l_square &&
D.mayOmitIdentifier()) {
// direct-abstract-declarator[opt] '[' assignment-expression[opt] ']'
// direct-abstract-declarator[opt] '[' '*' ']'
// direct-abstract-declarator was not specified. Remember that this is the
// place where the identifier would have been.
D.SetIdentifier(0, Tok.getLocation());
// Don't consume the '[', handle it below.
} else if (D.mayOmitIdentifier()) {
// This could be something simple like "int" (in which case the declarator
// portion is empty), if an abstract-declarator is allowed.
D.SetIdentifier(0, Tok.getLocation());
} else {
// expected identifier or '(' or '['.
assert(0 && "ERROR: should recover!");
// Expected identifier or '('.
Diag(Tok, diag::err_expected_ident_lparen);
D.SetIdentifier(0, Tok.getLocation());
}
assert(D.isPastIdentifier() &&
@ -389,6 +381,7 @@ void Parser::ParseDirectDeclarator(Declarator &D) {
/// identifier-list ',' identifier
///
void Parser::ParseParenDeclarator(Declarator &D) {
SourceLocation LParenLoc = Tok.getLocation();
ConsumeParen();
// If we haven't past the identifier yet (or where the identifier would be
@ -418,10 +411,14 @@ void Parser::ParseParenDeclarator(Declarator &D) {
// direct-declarator: '(' attributes declarator ')' [TODO]
if (isGrouping) {
ParseDeclarator(D);
// expected ')': skip until we find ')'.
if (Tok.getKind() != tok::r_paren)
assert(0 && "Recover!");
ConsumeParen();
if (Tok.getKind() == tok::r_paren) {
ConsumeParen();
} else {
// expected ')': skip until we find ')'.
Diag(Tok, diag::err_expected_rparen);
Diag(LParenLoc, diag::err_matching);
SkipUntil(tok::r_paren);
}
return;
}
@ -520,6 +517,7 @@ void Parser::ParseParenDeclarator(Declarator &D) {
HasPrototype = true;
}
// FIXME: pop the scope.
// expected ')': skip until we find ')'.
if (Tok.getKind() != tok::r_paren)
@ -535,7 +533,7 @@ void Parser::ParseParenDeclarator(Declarator &D) {
/// [C99] direct-declarator '[' type-qual-list[opt] '*' ']'
void Parser::ParseBracketDeclarator(Declarator &D) {
SourceLocation StartLoc = Tok.getLocation();
ConsumeSquare();
ConsumeBracket();
// If valid, this location is the position where we read the 'static' keyword.
SourceLocation StaticLoc;
@ -570,15 +568,13 @@ void Parser::ParseBracketDeclarator(Declarator &D) {
Diag(StaticLoc, diag::err_unspecified_vla_size_with_static);
StaticLoc = SourceLocation(); // Drop the static.
isStar = true;
ConsumeToken();
} else {
// Otherwise, the * must have been some expression (such as '*ptr') that
// started an assign-expr. We already consumed the token, but now we need
// to reparse it.
// FIXME: There are two options here: first, we could push 'StarTok' and
// Tok back into the preprocessor as a macro expansion context, so they
// will be read again. Second, we could parse the rest of the assign-expr
// then apply the dereference.
// FIXME: We must push 'StarTok' and Tok back into the preprocessor as a
// macro expansion context, so they will be read again. It is basically
// impossible to refudge the * in otherwise, due to cases like X[*p + 4].
assert(0 && "FIXME: int X[*p] unimplemented");
}
}
@ -588,7 +584,7 @@ void Parser::ParseBracketDeclarator(Declarator &D) {
assert(0 && "expr parsing not impl yet!");
}
ConsumeSquare();
ConsumeBracket();
// If C99 isn't enabled, emit an ext-warn if the arg list wasn't empty and if
// it was not a constant expression.

View File

@ -22,6 +22,8 @@ Parser::Parser(Preprocessor &pp, ParserActions &actions)
// Create the global scope, install it as the current scope.
CurScope = new Scope(0);
Tok.SetKind(tok::eof);
ParenCount = BracketCount = BraceCount = 0;
}
Parser::~Parser() {
@ -34,6 +36,92 @@ void Parser::Diag(SourceLocation Loc, unsigned DiagID,
Diags.Report(Loc, DiagID, Msg);
}
//===----------------------------------------------------------------------===//
// Error recovery.
//===----------------------------------------------------------------------===//
/// SkipUntil - Read tokens until we get to the specified token, then consume
/// it (unless DontConsume is false). Because we cannot guarantee that the
/// token will ever occur, this skips to the next token, or to some likely
/// good stopping point. If StopAtSemi is true, skipping will stop at a ';'
/// character.
///
/// If SkipUntil finds the specified token, it returns true, otherwise it
/// returns false.
bool Parser::SkipUntil(tok::TokenKind T, bool StopAtSemi, bool DontConsume) {
while (1) {
// If we found the token, stop and return true.
if (Tok.getKind() == T) {
if (DontConsume) {
// Noop, don't consume the token.
} else if (isTokenParen()) {
ConsumeParen();
} else if (isTokenBracket()) {
ConsumeBracket();
} else if (isTokenBrace()) {
ConsumeBrace();
} else if (T == tok::string_literal) {
ConsumeStringToken();
} else {
ConsumeToken();
}
return true;
}
switch (Tok.getKind()) {
case tok::eof:
// Ran out of tokens.
return false;
case tok::l_paren:
// Recursively skip properly-nested parens.
ConsumeParen();
SkipUntil(tok::r_paren);
break;
case tok::l_square:
// Recursively skip properly-nested square brackets.
ConsumeBracket();
SkipUntil(tok::r_square);
break;
case tok::l_brace:
// Recursively skip properly-nested braces.
ConsumeBrace();
SkipUntil(tok::r_brace);
break;
// Okay, we found a ']' or '}' or ')', which we think should be balanced.
// Since the user wasn't looking for this token (if they were, it would
// already be handled), this isn't balanced. If there is a LHS token at a
// higher level, we will assume that this matches the unbalanced token
// and return it. Otherwise, this is a spurious RHS token, which we skip.
case tok::r_paren:
if (ParenCount) return false; // Matches something.
ConsumeParen();
break;
case tok::r_square:
if (BracketCount) return false; // Matches something.
ConsumeBracket();
break;
case tok::r_brace:
if (BraceCount) return false; // Matches something.
ConsumeBrace();
break;
case tok::string_literal:
ConsumeStringToken();
break;
case tok::semi:
if (StopAtSemi)
return false;
// FALL THROUGH.
default:
// Skip this token.
ConsumeToken();
break;
}
}
}
//===----------------------------------------------------------------------===//
// C99 6.9: External Definitions.
//===----------------------------------------------------------------------===//
@ -140,9 +228,8 @@ void Parser::ParseDeclarationOrFunctionDefinition() {
ConsumeToken();
} else {
Diag(Tok, diag::err_parse_error);
// FIXME: skip to end of block or statement
while (Tok.getKind() != tok::semi && Tok.getKind() != tok::eof)
ConsumeToken();
// Skip to end of block or statement
SkipUntil(tok::r_brace, true);
if (Tok.getKind() == tok::semi)
ConsumeToken();
}

View File

@ -255,8 +255,22 @@ DIAG(ext_ident_list_in_param, EXTENSION,
DIAG(ext_c99_array_usage, EXTENSION,
"use of c99-specific array features")
// Generic errors.
DIAG(err_parse_error, ERROR,
"parse error")
DIAG(err_expected_ident_lparen, ERROR,
"expected identifier or '('")
DIAG(err_expected_rparen, ERROR,
"expected ')'")
/// err_matching - this is used as a continuation of a previous error, e.g. to
/// specify the '(' when we expected a ')'. This should probably be some
/// special sort of diagnostic kind to indicate that it is the second half of
/// the previous diagnostic.
DIAG(err_matching, ERROR,
"to match")
DIAG(err_invalid_decl_spec_combination, ERROR,
"cannot combine with previous '%s' declaration specifier")
DIAG(err_invalid_sign_spec, ERROR,

View File

@ -33,6 +33,7 @@ class Parser {
ParserActions &Actions;
Diagnostic &Diags;
Scope *CurScope;
unsigned short ParenCount, BracketCount, BraceCount;
/// Tok - The current token we are peeking head. All parsing methods assume
/// that this is valid.
@ -61,36 +62,94 @@ public:
Diag(Tok, DiagID, Msg);
}
/// ConsumeToken - Consume the current 'peek token', lexing a new one and
/// returning the token kind. This does not work will all kinds of tokens,
/// strings and parens must be consumed with custom methods below.
/// isTokenParen - Return true if the cur token is '(' or ')'.
bool isTokenParen() const {
return Tok.getKind() == tok::l_paren || Tok.getKind() == tok::r_paren;
}
/// isTokenBracket - Return true if the cur token is '[' or ']'.
bool isTokenBracket() const {
return Tok.getKind() == tok::l_square || Tok.getKind() == tok::r_square;
}
/// isTokenBrace - Return true if the cur token is '{' or '}'.
bool isTokenBrace() const {
return Tok.getKind() == tok::l_brace || Tok.getKind() == tok::r_brace;
}
/// ConsumeToken - Consume the current 'peek token' and lex the next one.
/// This does not work will all kinds of tokens: strings and specific other
/// tokens must be consumed with custom methods below.
void ConsumeToken() {
// Note: update Parser::SkipUntil if any other special tokens are added.
assert(Tok.getKind() != tok::string_literal &&
Tok.getKind() != tok::l_paren &&
Tok.getKind() != tok::r_paren &&
Tok.getKind() != tok::l_square &&
Tok.getKind() != tok::r_square &&
!isTokenParen() && !isTokenBracket() && !isTokenBrace() &&
"Should consume special tokens with Consume*Token");
PP.Lex(Tok);
}
/// ConsumeParen - This consume method keeps the paren count up-to-date.
/// ConsumeParen - This consume method keeps the paren count up-to-date.
///
void ConsumeParen() {
assert((Tok.getKind() == tok::l_paren ||
Tok.getKind() == tok::r_paren) && "wrong consume method");
PP.Lex(Tok);
}
/// ConsumeSquare - This consume method keeps the bracket count up-to-date.
///
void ConsumeSquare() {
assert((Tok.getKind() == tok::l_square ||
Tok.getKind() == tok::r_square) && "wrong consume method");
assert(isTokenParen() && "wrong consume method");
if (Tok.getKind() == tok::l_paren)
++ParenCount;
else if (ParenCount)
--ParenCount; // Don't let unbalanced )'s drive the count negative.
PP.Lex(Tok);
}
/// ConsumeBracket - This consume method keeps the bracket count up-to-date.
///
void ConsumeBracket() {
assert(isTokenBracket() && "wrong consume method");
if (Tok.getKind() == tok::l_square)
++BracketCount;
else if (BracketCount)
--BracketCount; // Don't let unbalanced ]'s drive the count negative.
PP.Lex(Tok);
}
/// ConsumeBrace - This consume method keeps the brace count up-to-date.
///
void ConsumeBrace() {
assert(isTokenBrace() && "wrong consume method");
if (Tok.getKind() == tok::l_brace)
++BraceCount;
else if (BraceCount)
--BraceCount; // Don't let unbalanced }'s drive the count negative.
PP.Lex(Tok);
}
/// ConsumeStringToken - Consume the current 'peek token', lexing a new one
/// and returning the token kind. This method is specific to strings, as it
/// handles string literal concatenation, as per C99 5.1.1.2, translation
/// phase #6.
void ConsumeStringToken() {
assert(Tok.getKind() != tok::string_literal &&
"Should consume special tokens with Consume*Token");
// Due to string literal concatenation, all consequtive string literals are
// a single token.
while (Tok.getKind() == tok::string_literal)
PP.Lex(Tok);
}
private:
//===--------------------------------------------------------------------===//
// Error recovery.
/// SkipUntil - Read tokens until we get to the specified token, then consume
/// it (unless DontConsume is false). Because we cannot guarantee that the
/// token will ever occur, this skips to the next token, or to some likely
/// good stopping point. If StopAtSemi is true, skipping will stop at a ';'
/// character.
///
/// If SkipUntil finds the specified token, it returns true, otherwise it
/// returns false.
bool SkipUntil(tok::TokenKind T, bool StopAtSemi = false,
bool DontConsume = false);
//===--------------------------------------------------------------------===//
// C99 6.9: External Definitions.
void ParseExternalDeclaration();