Implement initializer parsering.

llvm-svn: 38900
2025-04-26 02:16:07 +00:00 · 2006-08-13 21:54:02 +00:00 · 2006-08-13 21:54:02 +00:00 · 8693a519d4
commit 8693a519d4
parent 7bddb3fc61
7 changed files with 243 additions and 24 deletions
--- a/clang/Parse/ParseExpr.cpp
+++ b/clang/Parse/ParseExpr.cpp
@ -24,27 +24,6 @@
 using namespace llvm;
 using namespace clang;

-// C99 6.7.8
-Parser::ExprResult Parser::ParseInitializer() {
-  // FIXME: STUB.
-  if (Tok.getKind() == tok::l_brace) {
-    ConsumeBrace();
-    
-    if (Tok.getKind() == tok::numeric_constant)
-      ConsumeToken();
-    
-    // FIXME: initializer-list
-    // Match the '}'.
-    MatchRHSPunctuation(tok::r_brace, Tok.getLocation(), "{",
-                        diag::err_expected_rbrace);
-    return ExprResult(false);
-  }
-  
-  return ParseAssignmentExpression();
-}
-
-
-
 /// PrecedenceLevels - These are precedences for the binary/ternary operators in
 /// the C99 grammar.  These have been named to relate with the C99 grammar
 /// productions.  Low precedences numbers bind more weakly than high numbers.
@ -241,6 +220,36 @@ ParseExpressionWithLeadingIdentifier(const LexerToken &Tok) {
  return ParseRHSOfBinaryExpression(Res, prec::Comma);
 }

+/// ParseExpressionWithLeadingIdentifier - This special purpose method is used
+/// in contexts where we have already consumed an identifier (which we saved in
+/// 'Tok'), then discovered that the identifier was really the leading token of
+/// part of an assignment-expression.  For example, in "A[1]+B", we consumed "A"
+/// (which is now in 'Tok') and the current token is "[".
+Parser::ExprResult Parser::
+ParseAssignmentExprWithLeadingIdentifier(const LexerToken &Tok) {
+  // We know that 'Tok' must correspond to this production:
+  //   primary-expression: identifier
+  
+  // TODO: Pass 'Tok' to the action.
+  ExprResult Res = ExprResult(false);
+  
+  // Because we have to parse an entire cast-expression before starting the
+  // ParseRHSOfBinaryExpression method (which parses any trailing binops), we
+  // need to handle the 'postfix-expression' rules.  We do this by invoking
+  // ParsePostfixExpressionSuffix to consume any postfix-expression suffixes:
+  Res = ParsePostfixExpressionSuffix(Res);
+  if (Res.isInvalid) return Res;
+  
+  // At this point, the "A[1]" part of "A[1]+B" has been consumed. Once this is
+  // done, we know we don't have to do anything for cast-expression, because the
+  // only non-postfix-expression production starts with a '(' token, and we know
+  // we have an identifier.  As such, we can invoke ParseRHSOfBinaryExpression
+  // to consume any trailing operators (e.g. "+" in this example) and connected
+  // chunks of the expression.
+  return ParseRHSOfBinaryExpression(Res, prec::Assignment);
+}
+
+
 /// ParseAssignmentExpressionWithLeadingStar - This special purpose method is
 /// used in contexts where we have already consumed a '*' (which we saved in
 /// 'Tok'), then discovered that the '*' was really the leading token of an
--- a/clang/Parse/ParseInit.cpp
+++ b/clang/Parse/ParseInit.cpp
@ -0,0 +1,191 @@
+//===--- Initializer.cpp - Initializer Parsing ----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements initializer parsing as specified by C99 6.7.8.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Parse/Parser.h"
+#include "clang/Basic/Diagnostic.h"
+using namespace llvm;
+using namespace clang;
+
+
+/// MayBeDesignationStart - Return true if this token might be the start of a
+/// designator.
+static bool MayBeDesignationStart(tok::TokenKind K) {
+  switch (K) {
+  default: return false;
+  case tok::period:      // designator: '.' identifier
+  case tok::l_square:    // designator: array-designator
+  case tok::identifier:  // designation: identifier ':'
+    return true;
+  }
+}
+
+
+///       designation:
+///         designator-list '='
+/// [GNU]   array-designator
+/// [GNU]   identifier ':'
+///
+///       designator-list:
+///         designator
+///         designator-list designator
+///
+///       designator:
+///         array-designator
+///         '.' identifier
+///
+///       array-designator:
+///         '[' constant-expression ']'
+/// [GNU]   '[' constant-expression '...' constant-expression ']'
+///
+/// NOTE: [OBC] allows '[ objc-receiver objc-message-args ]' as an
+/// initializer.  We need to consider this case when parsing array designators.
+///
+Parser::ExprResult Parser::ParseInitializerWithPotentialDesignator() {
+  // Parse each designator in the designator list until we find an initializer.
+  while (1) {
+    switch (Tok.getKind()) {
+    case tok::equal:
+      // We read some number (at least one due to the grammar we implemented)
+      // of designators and found an '=' sign.  The following tokens must be
+      // the initializer.
+      ConsumeToken();
+      return ParseInitializer();
+      
+    default: {
+      // We read some number (at least one due to the grammar we implemented)
+      // of designators and found something that isn't an = or an initializer.
+      // If we have exactly one array designator [TODO CHECK], this is the GNU
+      // 'designation: array-designator' extension.  Otherwise, it is a parse
+      // error.
+      SourceLocation Loc = Tok.getLocation();
+      ExprResult Init = ParseInitializer();
+      if (Init.isInvalid) return Init;
+      
+      Diag(Tok, diag::ext_gnu_missing_equal_designator);
+      return Init;
+    }
+    case tok::period:
+      // designator: '.' identifier
+      ConsumeToken();
+      if (ExpectAndConsume(tok::identifier, diag::err_expected_ident))
+        return ExprResult(true);
+      break;
+                         
+    case tok::l_square: {
+      // array-designator: '[' constant-expression ']'
+      // array-designator: '[' constant-expression '...' constant-expression ']'
+      SourceLocation StartLoc = Tok.getLocation();
+      ConsumeBracket();
+      
+      ExprResult Idx = ParseConstantExpression();
+      if (Idx.isInvalid) {
+        SkipUntil(tok::r_square);
+        return Idx;
+      }
+      
+      // Handle the gnu array range extension.
+      if (Tok.getKind() == tok::ellipsis) {
+        Diag(Tok, diag::ext_gnu_array_range);
+        ConsumeToken();
+        
+        ExprResult RHS = ParseConstantExpression();
+        if (RHS.isInvalid) {
+          SkipUntil(tok::r_square);
+          return RHS;
+        }
+      }
+      
+      MatchRHSPunctuation(tok::r_square, StartLoc, "[", 
+                          diag::err_expected_rsquare);
+      break;
+    }
+    case tok::identifier: {
+      // Due to the GNU "designation: identifier ':'" extension, we don't know
+      // whether something starting with an identifier is an
+      // assignment-expression or if it is an old-style structure field
+      // designator.
+      // TODO: Check that this is the first designator.
+      LexerToken Ident = Tok;
+      ConsumeToken();
+      
+      // If this is the gross GNU extension, handle it now.
+      if (Tok.getKind() == tok::colon) {
+        Diag(Ident, diag::ext_gnu_old_style_field_designator);
+        ConsumeToken();
+        return ParseInitializer();
+      }
+      
+      // Otherwise, we just consumed the first token of an expression.  Parse
+      // the rest of it now.
+      return ParseAssignmentExprWithLeadingIdentifier(Ident);
+    }
+    }
+  }
+}
+
+
+/// ParseInitializer
+///       initializer: [C99 6.7.8]
+///         assignment-expression
+///         '{' initializer-list '}'
+///         '{' initializer-list ',' '}'
+/// [GNU]   '{' '}'
+///
+///       initializer-list:
+///         designation[opt] initializer
+///         initializer-list ',' designation[opt] initializer
+///
+Parser::ExprResult Parser::ParseInitializer() {
+  if (Tok.getKind() != tok::l_brace)
+    return ParseAssignmentExpression();
+
+  SourceLocation LBraceLoc = Tok.getLocation();
+  ConsumeBrace();
+  
+  // We support empty initializers, but tell the user that they aren't using
+  // C99-clean code.
+  if (Tok.getKind() == tok::r_brace)
+    Diag(LBraceLoc, diag::ext_gnu_empty_initializer);
+  else {
+    while (1) {
+      // Parse: designation[opt] initializer
+      
+      // If we know that this cannot be a designation, just parse the nested
+      // initializer directly.
+      ExprResult SubElt;
+      if (!MayBeDesignationStart(Tok.getKind()))
+        SubElt = ParseInitializer();
+      else
+        SubElt = ParseInitializerWithPotentialDesignator();
+      
+      // If we couldn't parse the subelement, bail out.
+      if (SubElt.isInvalid) {
+        SkipUntil(tok::r_brace);
+        return SubElt;
+      }
+    
+      // If we don't have a comma continued list, we're done.
+      if (Tok.getKind() != tok::comma) break;
+      ConsumeToken();
+      
+      // Handle trailing comma.
+      if (Tok.getKind() == tok::r_brace) break;
+    }    
+  }
+  
+  // Match the '}'.
+  MatchRHSPunctuation(tok::r_brace, LBraceLoc, "{",
+                      diag::err_expected_rbrace);
+  return ExprResult(false);
+}
+
--- a/clang/Parse/ParseStmt.cpp
+++ b/clang/Parse/ParseStmt.cpp
@ -238,6 +238,8 @@ void Parser::ParseIdentifierStatement(bool OnlyStatement) {
 ///       labeled-statement:
 ///         'case' constant-expression ':' statement
 ///
+/// FIXME: Handle GNU case-range extension.
+///
 /// Note that this does not parse the 'statement' at the end.
 ///
 void Parser::ParseCaseStatement() {
--- a/clang/Parse/Parser.cpp
+++ b/clang/Parse/Parser.cpp
@ -299,7 +299,7 @@ void Parser::ParseFunctionDefinition(Declarator &D) {
    //if (isDeclarationSpecifier())
    //  Diag('k&r declspecs with prototype?');
    
-    // FIXME: Install the arguments into the current scope.
+    // TODO: Install the arguments into the current scope.
  }

  // We should have an opening brace now.
--- a/clang/clang.xcodeproj/project.pbxproj
+++ b/clang/clang.xcodeproj/project.pbxproj
@ -16,6 +16,7 @@
 		DE06D4300A8BB52D0050E87E /* DeclarationSemantics.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DE06D42E0A8BB52D0050E87E /* DeclarationSemantics.cpp */; };
 		DE06D4310A8BB52D0050E87E /* Parser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DE06D42F0A8BB52D0050E87E /* Parser.cpp */; };
 		DE06D4410A8BB55C0050E87E /* Declaration.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DE06D4400A8BB55C0050E87E /* Declaration.cpp */; };
+		DE06E4D70A8FBF7A0050E87E /* Initializer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DE06E4D60A8FBF7A0050E87E /* Initializer.cpp */; };
 		DE1F22030A7D852A00FBF588 /* Parser.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = DE1F22020A7D852A00FBF588 /* Parser.h */; };
 		DE1F22200A7D879000FBF588 /* ParserActions.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = DE1F221F0A7D879000FBF588 /* ParserActions.h */; };
 		DE1F24700A7DC99000FBF588 /* Actions.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DE1F246D0A7DC99000FBF588 /* Actions.cpp */; };
@ -121,6 +122,7 @@
 		DE06D42E0A8BB52D0050E87E /* DeclarationSemantics.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = DeclarationSemantics.cpp; path = Parse/DeclarationSemantics.cpp; sourceTree = "<group>"; };
 		DE06D42F0A8BB52D0050E87E /* Parser.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = Parser.cpp; path = Parse/Parser.cpp; sourceTree = "<group>"; };
 		DE06D4400A8BB55C0050E87E /* Declaration.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = Declaration.cpp; path = Parse/Declaration.cpp; sourceTree = "<group>"; };
+		DE06E4D60A8FBF7A0050E87E /* Initializer.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = Initializer.cpp; path = Parse/Initializer.cpp; sourceTree = "<group>"; };
 		DE1F22020A7D852A00FBF588 /* Parser.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = Parser.h; path = clang/Parse/Parser.h; sourceTree = "<group>"; };
 		DE1F221F0A7D879000FBF588 /* ParserActions.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = ParserActions.h; path = clang/Parse/ParserActions.h; sourceTree = "<group>"; };
 		DE1F246D0A7DC99000FBF588 /* Actions.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = Actions.cpp; path = Parse/Actions.cpp; sourceTree = "<group>"; };
@ -231,6 +233,7 @@
 				DE06D4400A8BB55C0050E87E /* Declaration.cpp */,
 				DE06D42E0A8BB52D0050E87E /* DeclarationSemantics.cpp */,
 				DE06CEBF0A8AE7800050E87E /* Expression.cpp */,
+				DE06E4D60A8FBF7A0050E87E /* Initializer.cpp */,
 				DE06D42F0A8BB52D0050E87E /* Parser.cpp */,
 				DE06BEC80A854E390050E87E /* Scope.cpp */,
 				DE06CC170A899E110050E87E /* Statement.cpp */,
@ -390,6 +393,7 @@
 				DE06D4300A8BB52D0050E87E /* DeclarationSemantics.cpp in Sources */,
 				DE06D4310A8BB52D0050E87E /* Parser.cpp in Sources */,
 				DE06D4410A8BB55C0050E87E /* Declaration.cpp in Sources */,
+				DE06E4D70A8FBF7A0050E87E /* Initializer.cpp in Sources */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
--- a/clang/include/clang/Basic/DiagnosticKinds.def
+++ b/clang/include/clang/Basic/DiagnosticKinds.def
@ -274,7 +274,14 @@ DIAG(ext_gnu_statement_expr, EXTENSION,
     "use of GNU statement expression extension")
 DIAG(ext_gnu_conditional_expr, EXTENSION,
     "use of GNU ?: expression extension, eliding middle term")
-     
+DIAG(ext_gnu_empty_initializer, EXTENSION,
+     "use of GNU empty initializer extension")
+DIAG(ext_gnu_array_range, EXTENSION,
+     "use of GNU array range extension")
+DIAG(ext_gnu_missing_equal_designator, EXTENSION,
+     "use of GNU 'missing =' extension in designator")
+DIAG(ext_gnu_old_style_field_designator, EXTENSION,
+     "use of GNU old-style field designator extension")
 // Generic errors.
 DIAG(err_parse_error, ERROR,
     "parse error")
--- a/clang/include/clang/Parse/Parser.h
+++ b/clang/include/clang/Parse/Parser.h
@ -195,11 +195,13 @@ private:

  //===--------------------------------------------------------------------===//
  // C99 6.5: Expressions.
+  
  ExprResult ParseExpression();
  ExprResult ParseConstantExpression();
  ExprResult ParseAssignmentExpression();  // Expr that doesn't include commas.
  
  ExprResult ParseExpressionWithLeadingIdentifier(const LexerToken &Tok);
+  ExprResult ParseAssignmentExprWithLeadingIdentifier(const LexerToken &Tok);
  ExprResult ParseAssignmentExpressionWithLeadingStar(const LexerToken &Tok);

  ExprResult ParseRHSOfBinaryExpression(ExprResult LHS, unsigned MinPrec);
@ -218,10 +220,14 @@ private:
  ExprResult ParseParenExpression(ParenParseOption &ExprType);
  ExprResult ParseStringLiteralExpression();
  
-  ExprResult ParseInitializer();   // C99 6.7.8
+  //===--------------------------------------------------------------------===//
+  // C99 6.7.8: Initialization.
+  ExprResult ParseInitializer();
+  ExprResult ParseInitializerWithPotentialDesignator();
  
  //===--------------------------------------------------------------------===//
  // C99 6.8: Statements and Blocks.
+  
  void ParseStatement() { ParseStatementOrDeclaration(true); }
  void ParseStatementOrDeclaration(bool OnlyStatement = false);
  void ParseIdentifierStatement(bool OnlyStatement);