Start implementing error recovery, this implements test/Parser/recovery-1.c

llvm-svn: 38833
2025-04-27 06:36:46 +00:00 · 2006-08-06 21:55:29 +00:00 · 2006-08-06 21:55:29 +00:00 · eec40f9990
commit eec40f9990
parent 944bde95ef
4 changed files with 199 additions and 43 deletions
--- a/clang/Parse/ParseDecl.cpp
+++ b/clang/Parse/ParseDecl.cpp
@ -332,22 +332,14 @@ void Parser::ParseDirectDeclarator(Declarator &D) {
    // direct-declarator: '(' attributes declarator ')'   [TODO]
    // Example: 'char (*X)'   or 'int (*XX)(void)'
    ParseParenDeclarator(D);
-  } else if (Tok.getKind() == tok::l_square &&
-             D.mayOmitIdentifier()) {
-    // direct-abstract-declarator[opt] '[' assignment-expression[opt] ']'
-    // direct-abstract-declarator[opt] '[' '*' ']'
-    
-    // direct-abstract-declarator was not specified.  Remember that this is the
-    // place where the identifier would have been.
-    D.SetIdentifier(0, Tok.getLocation());
-    // Don't consume the '[', handle it below.
  } else if (D.mayOmitIdentifier()) {
    // This could be something simple like "int" (in which case the declarator
    // portion is empty), if an abstract-declarator is allowed.
    D.SetIdentifier(0, Tok.getLocation());
  } else {
-    // expected identifier or '(' or '['.
-    assert(0 && "ERROR: should recover!");
+    // Expected identifier or '('.
+    Diag(Tok, diag::err_expected_ident_lparen);
+    D.SetIdentifier(0, Tok.getLocation());
  }
  
  assert(D.isPastIdentifier() &&
@ -389,6 +381,7 @@ void Parser::ParseDirectDeclarator(Declarator &D) {
 ///         identifier-list ',' identifier
 ///
 void Parser::ParseParenDeclarator(Declarator &D) {
+  SourceLocation LParenLoc = Tok.getLocation();
  ConsumeParen();
  
  // If we haven't past the identifier yet (or where the identifier would be
@ -418,10 +411,14 @@ void Parser::ParseParenDeclarator(Declarator &D) {
    // direct-declarator: '(' attributes declarator ')'   [TODO]
    if (isGrouping) {
      ParseDeclarator(D);
-      // expected ')': skip until we find ')'.
-     if (Tok.getKind() != tok::r_paren)
-        assert(0 && "Recover!");
-      ConsumeParen();
+      if (Tok.getKind() == tok::r_paren) {
+        ConsumeParen();
+      } else {
+        // expected ')': skip until we find ')'.
+        Diag(Tok, diag::err_expected_rparen);
+        Diag(LParenLoc, diag::err_matching);
+        SkipUntil(tok::r_paren);
+      }
      return;
    }
    
@ -520,6 +517,7 @@ void Parser::ParseParenDeclarator(Declarator &D) {
    HasPrototype = true;
  }
  
+  // FIXME: pop the scope.  
  
  // expected ')': skip until we find ')'.
  if (Tok.getKind() != tok::r_paren)
@ -535,7 +533,7 @@ void Parser::ParseParenDeclarator(Declarator &D) {
 /// [C99]   direct-declarator '[' type-qual-list[opt] '*' ']'
 void Parser::ParseBracketDeclarator(Declarator &D) {
  SourceLocation StartLoc = Tok.getLocation();
-  ConsumeSquare();
+  ConsumeBracket();
  
  // If valid, this location is the position where we read the 'static' keyword.
  SourceLocation StaticLoc;
@ -570,15 +568,13 @@ void Parser::ParseBracketDeclarator(Declarator &D) {
        Diag(StaticLoc, diag::err_unspecified_vla_size_with_static);
      StaticLoc = SourceLocation();  // Drop the static.
      isStar = true;
-      ConsumeToken();
    } else {
      // Otherwise, the * must have been some expression (such as '*ptr') that
      // started an assign-expr.  We already consumed the token, but now we need
      // to reparse it.
-      // FIXME: There are two options here: first, we could push 'StarTok' and
-      // Tok back into the preprocessor as a macro expansion context, so they
-      // will be read again.  Second, we could parse the rest of the assign-expr
-      // then apply the dereference.
+      // FIXME: We must push 'StarTok' and Tok back into the preprocessor as a
+      // macro expansion context, so they will be read again. It is basically
+      // impossible to refudge the * in otherwise, due to cases like X[*p + 4].
      assert(0 && "FIXME: int X[*p] unimplemented");
    }
  }
@ -588,7 +584,7 @@ void Parser::ParseBracketDeclarator(Declarator &D) {
    assert(0 && "expr parsing not impl yet!");
  }
  
-  ConsumeSquare();
+  ConsumeBracket();
  
  // If C99 isn't enabled, emit an ext-warn if the arg list wasn't empty and if
  // it was not a constant expression.
--- a/clang/Parse/Parser.cpp
+++ b/clang/Parse/Parser.cpp
@ -22,6 +22,8 @@ Parser::Parser(Preprocessor &pp, ParserActions &actions)
  // Create the global scope, install it as the current scope.
  CurScope = new Scope(0);
  Tok.SetKind(tok::eof);
+  
+  ParenCount = BracketCount = BraceCount = 0;
 }

 Parser::~Parser() {
@ -34,6 +36,92 @@ void Parser::Diag(SourceLocation Loc, unsigned DiagID,
  Diags.Report(Loc, DiagID, Msg);
 }

+//===----------------------------------------------------------------------===//
+// Error recovery.
+//===----------------------------------------------------------------------===//
+
+/// SkipUntil - Read tokens until we get to the specified token, then consume
+/// it (unless DontConsume is false).  Because we cannot guarantee that the
+/// token will ever occur, this skips to the next token, or to some likely
+/// good stopping point.  If StopAtSemi is true, skipping will stop at a ';'
+/// character.
+/// 
+/// If SkipUntil finds the specified token, it returns true, otherwise it
+/// returns false.  
+bool Parser::SkipUntil(tok::TokenKind T, bool StopAtSemi, bool DontConsume) {
+  while (1) {
+    // If we found the token, stop and return true.
+    if (Tok.getKind() == T) {
+      if (DontConsume) {
+        // Noop, don't consume the token.
+      } else if (isTokenParen()) {
+        ConsumeParen();
+      } else if (isTokenBracket()) {
+        ConsumeBracket();
+      } else if (isTokenBrace()) {
+        ConsumeBrace();
+      } else if (T == tok::string_literal) {
+        ConsumeStringToken();
+      } else {
+        ConsumeToken();
+      }
+      return true;
+    }
+    
+    switch (Tok.getKind()) {
+    case tok::eof:
+      // Ran out of tokens.
+      return false;
+      
+    case tok::l_paren:
+      // Recursively skip properly-nested parens.
+      ConsumeParen();
+      SkipUntil(tok::r_paren);
+      break;
+    case tok::l_square:
+      // Recursively skip properly-nested square brackets.
+      ConsumeBracket();
+      SkipUntil(tok::r_square);
+      break;
+    case tok::l_brace:
+      // Recursively skip properly-nested braces.
+      ConsumeBrace();
+      SkipUntil(tok::r_brace);
+      break;
+      
+    // Okay, we found a ']' or '}' or ')', which we think should be balanced.
+    // Since the user wasn't looking for this token (if they were, it would
+    // already be handled), this isn't balanced.  If there is a LHS token at a
+    // higher level, we will assume that this matches the unbalanced token
+    // and return it.  Otherwise, this is a spurious RHS token, which we skip.
+    case tok::r_paren:
+      if (ParenCount) return false;  // Matches something.
+      ConsumeParen();
+      break;
+    case tok::r_square:
+      if (BracketCount) return false;  // Matches something.
+      ConsumeBracket();
+      break;
+    case tok::r_brace:
+      if (BraceCount) return false;  // Matches something.
+      ConsumeBrace();
+      break;
+      
+    case tok::string_literal:
+      ConsumeStringToken();
+      break;
+    case tok::semi:
+      if (StopAtSemi)
+        return false;
+      // FALL THROUGH.
+    default:
+      // Skip this token.
+      ConsumeToken();
+      break;
+    }
+  }  
+}
+
 //===----------------------------------------------------------------------===//
 // C99 6.9: External Definitions.
 //===----------------------------------------------------------------------===//
@ -140,9 +228,8 @@ void Parser::ParseDeclarationOrFunctionDefinition() {
    ConsumeToken();
  } else {
    Diag(Tok, diag::err_parse_error);
-    // FIXME: skip to end of block or statement
-    while (Tok.getKind() != tok::semi && Tok.getKind() != tok::eof)
-      ConsumeToken();
+    // Skip to end of block or statement
+    SkipUntil(tok::r_brace, true);
    if (Tok.getKind() == tok::semi)
      ConsumeToken();
  }
--- a/clang/include/clang/Basic/DiagnosticKinds.def
+++ b/clang/include/clang/Basic/DiagnosticKinds.def
@ -255,8 +255,22 @@ DIAG(ext_ident_list_in_param, EXTENSION,
 DIAG(ext_c99_array_usage, EXTENSION,
     "use of c99-specific array features")

+// Generic errors.
 DIAG(err_parse_error, ERROR,
     "parse error")
+DIAG(err_expected_ident_lparen, ERROR,
+     "expected identifier or '('")
+DIAG(err_expected_rparen, ERROR,
+     "expected ')'")
+
+/// err_matching - this is used as a continuation of a previous error, e.g. to 
+/// specify the '(' when we expected a ')'.  This should probably be some
+/// special sort of diagnostic kind to indicate that it is the second half of
+/// the previous diagnostic.
+DIAG(err_matching, ERROR,
+     "to match")
+
+     
 DIAG(err_invalid_decl_spec_combination, ERROR,
     "cannot combine with previous '%s' declaration specifier")
 DIAG(err_invalid_sign_spec, ERROR,
--- a/clang/include/clang/Parse/Parser.h
+++ b/clang/include/clang/Parse/Parser.h
@ -33,6 +33,7 @@ class Parser {
  ParserActions &Actions;
  Diagnostic &Diags;
  Scope *CurScope;
+  unsigned short ParenCount, BracketCount, BraceCount;
  
  /// Tok - The current token we are peeking head.  All parsing methods assume
  /// that this is valid.
@ -61,36 +62,94 @@ public:
    Diag(Tok, DiagID, Msg);
  }
  
-  /// ConsumeToken - Consume the current 'peek token', lexing a new one and
-  /// returning the token kind.  This does not work will all kinds of tokens,
-  /// strings and parens must be consumed with custom methods below.
+  /// isTokenParen - Return true if the cur token is '(' or ')'.
+  bool isTokenParen() const {
+    return Tok.getKind() == tok::l_paren || Tok.getKind() == tok::r_paren;
+  }
+  /// isTokenBracket - Return true if the cur token is '[' or ']'.
+  bool isTokenBracket() const {
+    return Tok.getKind() == tok::l_square || Tok.getKind() == tok::r_square;
+  }
+  /// isTokenBrace - Return true if the cur token is '{' or '}'.
+  bool isTokenBrace() const {
+    return Tok.getKind() == tok::l_brace || Tok.getKind() == tok::r_brace;
+  }
+  
+  /// ConsumeToken - Consume the current 'peek token' and lex the next one.
+  /// This does not work will all kinds of tokens: strings and specific other
+  /// tokens must be consumed with custom methods below.
  void ConsumeToken() {
+    // Note: update Parser::SkipUntil if any other special tokens are added.
    assert(Tok.getKind() != tok::string_literal &&
-           Tok.getKind() != tok::l_paren &&
-           Tok.getKind() != tok::r_paren &&
-           Tok.getKind() != tok::l_square &&
-           Tok.getKind() != tok::r_square &&
+           !isTokenParen() && !isTokenBracket() && !isTokenBrace() &&
           "Should consume special tokens with Consume*Token");
    PP.Lex(Tok);
  }
  
-  /// ConsumeParen -  This consume method keeps the paren count up-to-date.
+  /// ConsumeParen - This consume method keeps the paren count up-to-date.
  ///
  void ConsumeParen() {
-    assert((Tok.getKind() == tok::l_paren ||
-            Tok.getKind() == tok::r_paren) && "wrong consume method");
-    PP.Lex(Tok);
-  }
-
-  /// ConsumeSquare -  This consume method keeps the bracket count up-to-date.
-  ///
-  void ConsumeSquare() {
-    assert((Tok.getKind() == tok::l_square ||
-            Tok.getKind() == tok::r_square) && "wrong consume method");
+    assert(isTokenParen() && "wrong consume method");
+    if (Tok.getKind() == tok::l_paren)
+      ++ParenCount;
+    else if (ParenCount)
+      --ParenCount;       // Don't let unbalanced )'s drive the count negative.
    PP.Lex(Tok);
  }
  
+  /// ConsumeBracket - This consume method keeps the bracket count up-to-date.
+  ///
+  void ConsumeBracket() {
+    assert(isTokenBracket() && "wrong consume method");
+    if (Tok.getKind() == tok::l_square)
+      ++BracketCount;
+    else if (BracketCount)
+      --BracketCount;     // Don't let unbalanced ]'s drive the count negative.
+    
+    PP.Lex(Tok);
+  }
+      
+  /// ConsumeBrace - This consume method keeps the brace count up-to-date.
+  ///
+  void ConsumeBrace() {
+    assert(isTokenBrace() && "wrong consume method");
+    if (Tok.getKind() == tok::l_brace)
+      ++BraceCount;
+    else if (BraceCount)
+      --BraceCount;     // Don't let unbalanced }'s drive the count negative.
+    
+    PP.Lex(Tok);
+  }
+  
+  
+  /// ConsumeStringToken - Consume the current 'peek token', lexing a new one
+  /// and returning the token kind.  This method is specific to strings, as it
+  /// handles string literal concatenation, as per C99 5.1.1.2, translation
+  /// phase #6.
+  void ConsumeStringToken() {
+    assert(Tok.getKind() != tok::string_literal &&
+           "Should consume special tokens with Consume*Token");
+    // Due to string literal concatenation, all consequtive string literals are
+    // a single token.
+    while (Tok.getKind() == tok::string_literal)
+      PP.Lex(Tok);
+  }
+  
 private:
+  //===--------------------------------------------------------------------===//
+  // Error recovery.
+    
+  /// SkipUntil - Read tokens until we get to the specified token, then consume
+  /// it (unless DontConsume is false).  Because we cannot guarantee that the
+  /// token will ever occur, this skips to the next token, or to some likely
+  /// good stopping point.  If StopAtSemi is true, skipping will stop at a ';'
+  /// character.
+  /// 
+  /// If SkipUntil finds the specified token, it returns true, otherwise it
+  /// returns false.  
+  bool SkipUntil(tok::TokenKind T, bool StopAtSemi = false,
+                 bool DontConsume = false);
+    
  //===--------------------------------------------------------------------===//
  // C99 6.9: External Definitions.
  void ParseExternalDeclaration();