/* * PUBLIC DOMAIN PCCTS-BASED C++ GRAMMAR (cplusplus.g, stat.g, expr.g) * * Authors: Sumana Srinivasan, NeXT Inc.; sumana_srinivasan@next.com * Terence Parr, Parr Research Corporation; parrt@parr-research.com * Russell Quong, Purdue University; quong@ecn.purdue.edu * * VERSION 1.2 * * SOFTWARE RIGHTS * * This file is a part of the ANTLR-based C++ grammar and is free * software. We do not reserve any LEGAL rights to its use or * distribution, but you may NOT claim ownership or authorship of this * grammar or support code. An individual or company may otherwise do * whatever they wish with the grammar distributed herewith including the * incorporation of the grammar or the output generated by ANTLR into * commerical software. You may redistribute in source or binary form * without payment of royalties to us as long as this header remains * in all source distributions. * * We encourage users to develop parsers/tools using this grammar. * In return, we ask that credit is given to us for developing this * grammar. By "credit", we mean that if you incorporate our grammar or * the generated code into one of your programs (commercial product, * research project, or otherwise) that you acknowledge this fact in the * documentation, research report, etc.... In addition, you should say nice * things about us at every opportunity. * * As long as these guidelines are kept, we expect to continue enhancing * this grammar. Feel free to send us enhancements, fixes, bug reports, * suggestions, or general words of encouragement at parrt@parr-research.com. * * NeXT Computer Inc. * 900 Chesapeake Dr. * Redwood City, CA 94555 * 12/02/1994 * * Restructured for public consumption by Terence Parr late February, 1995. * * DISCLAIMER: we make no guarantees that this grammar works, makes sense, * or can be used to do anything useful. */ /* 2001-2002 * Version 1.0 * This C++ grammar file has been converted from PCCTS to run under * ANTLR to generate lexer and parser in C++ code by * Jianguo Zuo and David Wigg at * The Centre for Systems and Software Engineering * London South Bank University * London, UK. * */ /* 2003 * Version 2.0 was published by David Wigg in September 2003 */ /* 2004 * Version 3.0 July 2004 * This is version 3.0 of the C++ grammar definition for ANTLR to * generate lexer and parser in C++ code updated by * David Wigg at * The Centre for Systems and Software Engineering * London South Bank University * London, UK. * * wiggjd@bcs.ac.uk * blackse@lsbu.ac.uk * * See MyReadMe.txt for further information * * This file is best viewed in courier font with tabs set to 4 spaces */ header { // The statements in this block appear in both CPPLexer.hpp and CPPParser.hpp #include "antlr/CharScanner.hpp" #include "CPPDictionary.hpp" #include "LineObject.hpp" // Query use. See LineObject.cpp #include "var_types.hpp" // Do not use. Should be deleted in the next version extern void process_line_directive(const char *, const char *); } options { language = "Cpp"; } { // The statements in this block appear only in CPPParser.cpp and not in CPPLexer.cpp // DO NOT USE THESE CONSTANTS - THEY WILL BE DELETED IN THE NEXT VERSION int id_type = ID_VAR_NAME; // variable type: ID_FUN_NAME, ID_VAR_NAME, ID_INPUT_PARAMETER, // ID_CLASS_DEF, ID_SYSTEM_FUNCTION, ID_CONST_DECL, // ID_TYPEDEF_VAR int statementTrace = 0; // Used to control selected (level) tracing (see support.cpp) // 1 Shows which external and member statements selected // 2 Shows above plus all declarations/definitions // 3 reserved for future use // 4 and above available for user void CPPParser::init() { //antlrTrace(false); // This is a dynamic trace facility for use with -traceParser etc. // It requires modification in LLkParser.cpp and LLkParser.hpp // otherwise it should be commented out (see MyReadMe.txt) // true shows antlr trace (can be set and reset during parsing) // false stops showing antlr trace // Provided the parser is always generated with -traceParser this // facility allows trace output to be turned on or off by changing // the setting here from false to true or vice versa and then // recompiling and linking CPPParser only thus avoiding the need // to use antlr.Tool to re-generate the lexer and parser again. // Creates a dictionary to hold symbols with 4001 buckets, 200 scopes and 800,000 characters // These can be changed to suit the size of program(s) being parsed symbols = new CPPDictionary(4001, 200, 800000); // Set template parameter and external scopes templateParameterScope = symbols->getCurrentScopeIndex(); // Set template parameter scope to 0 symbols->saveScope(); // Advance currentScope from 0 to 1 externalScope = symbols->getCurrentScopeIndex(); // Set external scope to 1 // Declare predefined scope "std" in external scope CPPSymbol *a = new CPPSymbol("std", CPPSymbol::otTypedef); symbols->define("std", a); // Global flags to allow for nested declarations _td = false; // For typedef _fd = false; // For friend _sc = scInvalid; // For StorageClass _tq = tqInvalid; // For TypeQualifier _ts = tsInvalid; // For TypeSpecifier _ds = dsInvalid; // For DeclSpecifier functionDefinition = 0; qualifierPrefix[0] = '\0'; enclosingClass = ""; assign_stmt_RHS_found = 0; in_parameter_list = false; K_and_R = false; // used to distinguish old K & R parameter definitions in_return = false; is_address = false; is_pointer = false; } } // End of CPPParser.cpp block class CPPParser extends Parser; options { k = 2; exportVocab = STDC; buildAST =false; codeGenMakeSwitchThreshold = 2; codeGenBitsetTestThreshold = 3; } { public: #define CPPParser_MaxQualifiedItemSize 500 // can't bitwise-OR enum elements together, this must be an int; damn! typedef unsigned long TypeSpecifier; // note: must be > 16bits #define tsInvalid 0x0 #define tsVOID 0x1 #define tsCHAR 0x2 #define tsSHORT 0x4 #define tsINT 0x8 #define tsLONG 0x10 #define tsFLOAT 0x20 #define tsDOUBLE 0x40 #define tsSIGNED 0x80 #define tsUNSIGNED 0x100 #define tsTYPEID 0x200 #define tsSTRUCT 0x400 #define tsENUM 0x800 #define tsUNION 0x1000 #define tsCLASS 0x2000 #define tsWCHAR_T 0x4000 #define tsBOOL 0x8000 enum TypeQualifier { tqInvalid=0, tqCONST=1, tqVOLATILE, tqCDECL }; enum StorageClass { scInvalid=0, scAUTO=1, scREGISTER, scSTATIC, scEXTERN, scMUTABLE }; enum DeclSpecifier { dsInvalid=0, dsVIRTUAL, dsINLINE, dsEXPLICIT, dsFRIEND }; // JEL 3/26/96 changed to allow ORing of values typedef int QualifiedItem; #define qiInvalid 0x0 #define qiType 0x1 // includes enum, class, typedefs, namespace #define qiDtor 0x2 #define qiCtor 0x4 #define qiOperator 0x8 #define qiPtrMember 0x10 #define qiVar 0x20 #define qiFun 0x40 protected: // Symbol table management stuff CPPDictionary *symbols; int templateParameterScope; int externalScope; bool _td; // For typedef bool _fd; // For friend StorageClass _sc; // For storage class TypeQualifier _tq; // For type qualifier TypeSpecifier _ts; // For type specifier DeclSpecifier _ds; // For declaration specifier int functionDefinition; // 0 = Function definition not being parsed // 1 = Parsing function name // 2 = Parsing function parameter list // 3 = Parsing function block char qualifierPrefix[CPPParser_MaxQualifiedItemSize+1]; char *enclosingClass; int assign_stmt_RHS_found; bool in_parameter_list; // DW 13/02/04 used within CPP_parser bool K_and_R; // used to distinguish old K & R parameter definitions bool in_return; bool is_address; bool is_pointer; // Limit lookahead for qualifiedItemIs() enum { MaxTemplateTokenScan = 200 }; public: void init(); protected: // Semantic interface; You could subclass and redefine these functions // so you don't have to mess with the grammar itself. // Symbol stuff virtual int qualifiedItemIsOneOf(QualifiedItem qiFlags, int lookahead_offset=0); virtual QualifiedItem qualifiedItemIs(int lookahead_offset=0); virtual int skipTemplateQualifiers(int& kInOut); virtual int skipNestedParens(int& kInOut); virtual int scopedItem(int k=1); virtual int finalQualifier(const int k=1); virtual int isTypeName(const char *s); virtual int isClassName(const char *s); virtual void end_of_stmt(); // Scoping stuff virtual void enterNewLocalScope(); virtual void exitLocalScope(); virtual void enterExternalScope(); virtual void exitExternalScope(); // Aggregate stuff virtual void classForwardDeclaration(TypeSpecifier, DeclSpecifier,const char *); virtual void beginClassDefinition(TypeSpecifier,const char *); virtual void endClassDefinition(); virtual void beginEnumDefinition(const char *); virtual void endEnumDefinition(); virtual void enumElement(const char *); // Declaration and definition stuff virtual void declarationSpecifier(bool,bool,StorageClass,TypeQualifier,TypeSpecifier,DeclSpecifier); virtual void beginDeclaration(); virtual void endDeclaration(); virtual void beginConstructorDeclaration(const char *); virtual void endConstructorDeclaration(); virtual void beginDestructorDeclaration(const char *); virtual void endDestructorDeclaration(); virtual void beginParameterDeclaration(); virtual void beginFieldDeclaration(); virtual void beginFunctionDefinition(); virtual void endFunctionDefinition(); virtual void functionParameterList(); virtual void functionEndParameterList(const int def); virtual void beginConstructorDefinition(); virtual void endConstructorDefinition(); virtual void beginDestructorDefinition(); virtual void endDestructorDefinition(); // Declarator stuff virtual void declaratorID(const char *, QualifiedItem); // This stores new symbol with its type. virtual void declaratorArray(); virtual void declaratorParameterList(const int def); virtual void declaratorEndParameterList(const int def); // template stuff virtual void templateTypeParameter(const char *); virtual void beginTemplateDeclaration(); virtual void endTemplateDeclaration(); virtual void beginTemplateDefinition(); virtual void endTemplateDefinition(); virtual void beginTemplateParameterList(); virtual void endTemplateParameterList(); // exception stuff virtual void exceptionBeginHandler(); virtual void exceptionEndHandler(); virtual void panic(const char *); // myCode functions ready for overriding in MyCode subclass virtual void myCode_pre_processing(int, char *[]); virtual void myCode_post_processing(); virtual void myCode_end_of_stmt(); virtual void myCode_function_direct_declarator(const char *); } translation_unit : {enterExternalScope();} (external_declaration)+ EOF {exitExternalScope();} ; external_declaration {char *s; K_and_R = false;} : ( // Template explicit specialisation (DW 14/04/03) ("template" LESSTHAN GREATERTHAN)=> {if(statementTrace>=1) printf("%d external_declaration_0 template explicit-specialisation\n",LT(1)->getLine()); } "template" LESSTHAN GREATERTHAN declaration | // Class definition (templates too) // This is separated out otherwise the next alternative // would look for "class A { ... } f() {...}" which is // an unacceptable level of backtracking. // JEL Note: Rule body does not need typedef, because // that is internal to "declaration", and it is invalid // to say "typedef template..." // Class definition (("typedef")? class_head)=> {if (statementTrace>=1) printf("%d external_declaration_1a Class definition\n",LT(1)->getLine()); } declaration | // Class template definition (template_head class_head)=> {if (statementTrace>=1) printf("%d external_declaration_1b Class template definition\n",LT(1)->getLine()); } template_head declaration | // Enum definition (don't want to backtrack over this in other alts) ("enum" (ID)? LCURLY)=> {if (statementTrace>=1) printf("%d external_declaration_2 Enum definition\n",LT(1)->getLine()); } enum_specifier (init_declarator_list)? SEMICOLON {end_of_stmt();} | // Destructor DEFINITION (templated or non-templated) ((template_head)? dtor_head[1] LCURLY)=> {if (statementTrace>=1) printf("%d external_declaration_3 Destructor definition\n",LT(1)->getLine()); } (template_head)? dtor_head[1] dtor_body | // Constructor DEFINITION (non-templated) // JEL 4/3/96 Added predicate that works, once the // restriction is added that ctor cannot be virtual // and ctor_declarator uses a more restrictive id ( (options {warnWhenFollowAmbig = false;}: ctor_decl_spec)? {qualifiedItemIsOneOf(qiCtor)}? )=> {if (statementTrace>=1) printf("%d external_declaration_4 Constructor definition\n",LT(1)->getLine()); } ctor_definition | // User-defined type cast (("inline")? scope_override conversion_function_decl_or_def)=> {if (statementTrace>=1) printf("%d external_declaration_5 Operator function\n",LT(1)->getLine()); } ("inline")? s = scope_override conversion_function_decl_or_def | // Function declaration (declaration_specifiers function_declarator[0] SEMICOLON)=> {if (statementTrace>=1) printf("%d external_declaration_6 Function declaration\n",LT(1)->getLine()); } declaration | // Function definition (declaration_specifiers function_declarator[1] LCURLY)=> {if (statementTrace>=1) printf("%d external_declaration_7 Function definition\n",LT(1)->getLine()); } function_definition | // K & R Function definition (declaration_specifiers function_declarator[1] declaration)=> {K_and_R = true; if (statementTrace>=1) printf("%d external_declaration_8 K & R function definition\n",LT(1)->getLine()); } function_definition | // templated forward class decl, init/decl of static member in template (template_head declaration_specifiers (init_declarator_list)? SEMICOLON {end_of_stmt();})=> {beginTemplateDeclaration(); if (statementTrace>=1) printf("%d external_declaration_9 Class template declaration\n",LT(1)->getLine()); } template_head declaration_specifiers (init_declarator_list)? SEMICOLON {end_of_stmt();} {endTemplateDeclaration();} | // Templated FUNCTIONS and CONSTRUCTORS matched here. {beginTemplateDefinition();} template_head ( // Templated CONSTRUCTOR definition // JEL 4/3/96 Added predicate that works once the // restriction is added that ctor cannot be virtual ( ctor_decl_spec {qualifiedItemIsOneOf(qiCtor)}? )=> {if (statementTrace>=1) printf("%d external_declaration_10a Template constructor definition\n",LT(1)->getLine()); } ctor_definition | // Templated function declaration (declaration_specifiers function_declarator[0] SEMICOLON)=> {if (statementTrace>=1) printf("%d external_declaration_10b Function template declaration\n",LT(1)->getLine()); } declaration | // Templated function definition (declaration_specifiers function_declarator[1] LCURLY)=> {if (statementTrace>=1) printf("%d external_declaration_10c Function template definition\n",LT(1)->getLine()); } function_definition ) {endTemplateDefinition();} | {if (statementTrace>=1) printf("%d external_declaration_11 Namespace declaration\n",LT(1)->getLine()); } decl_namespace | // everything else (except templates) {if (statementTrace>=1) printf("%d external_declaration_12 Declaration\n",LT(1)->getLine()); } declaration | {if (statementTrace>=1) printf("%d external_declaration_13 Semicolon\n",LT(1)->getLine()); } SEMICOLON {end_of_stmt();} ) ; // end of external_declaration decl_namespace {char *qid;} : "namespace" ( (ns:ID{_td = true;declaratorID((ns->getText()).data(),qiType);})? // The following statement can be invoked to trigger selective antlr trace // Also see below //{if (strcmp((ns->getText()).data(),"xyz")==0) antlrTrace(true);} // Used for diagnostic trigger LCURLY {enterNewLocalScope();} (external_declaration)* {exitLocalScope();} RCURLY // The following should be implemented to match the optional statement above //{antlrTrace(false);} | ns2:ID{_td = true;declaratorID((ns2->getText()).data(),qiType);} ASSIGNEQUAL qid = qualified_id SEMICOLON {end_of_stmt();} ) ; member_declaration {char *q;} : ( // Class definition // This is separated out otherwise the next alternative // would look for "class A { ... } f() {...}" which is // an unacceptable level of backtracking. ( ("typedef")? class_head) => {if (statementTrace>=1) printf("%d member_declaration_1 Class definition\n",LT(1)->getLine()); } declaration | // Enum definition (don't want to backtrack over this in other alts) ("enum" (ID)? LCURLY)=> {if (statementTrace>=1) printf("%d member_declaration_2 Enum definition\n",LT(1)->getLine()); } enum_specifier (member_declarator_list)? SEMICOLON {end_of_stmt();} | // Constructor declarator ( ctor_decl_spec {qualifiedItemIsOneOf(qiCtor)}? ctor_declarator[0] SEMICOLON )=> {if (statementTrace>=1) printf("%d member_declaration_3 Constructor declarator\n",LT(1)->getLine()); } ctor_decl_spec ctor_declarator[0] SEMICOLON {end_of_stmt();} // Constructor declarator | // JEL Predicate to distinguish ctor from function // This works now that ctor cannot have VIRTUAL // It unfortunately matches A::A where A is not enclosing // class -- this will have to be checked semantically ( ctor_decl_spec {qualifiedItemIsOneOf(qiCtor)}? ctor_declarator[1] (COLON // DEFINITION :ctor_initializer |LCURLY // DEFINITION (compound Statement) ? ) )=> {if (statementTrace>=1) printf("%d member_declaration_4 Constructor definition\n",LT(1)->getLine()); } ctor_definition | // No template_head allowed for dtor member // Backtrack if not a dtor (no TILDE) (dtor_head[0] SEMICOLON)=> {if (statementTrace>=1) printf("%d member_declaration_5a Destructor declaration\n",LT(1)->getLine()); } dtor_head[0] SEMICOLON {end_of_stmt();} // Declaration | // No template_head allowed for dtor member // Backtrack if not a dtor (no TILDE) (dtor_head[1] LCURLY)=> {if (statementTrace>=1) printf("%d member_declaration_5b Destructor definition\n",LT(1)->getLine()); } dtor_head[1] dtor_body // Definition | // Function declaration (declaration_specifiers function_declarator[0] SEMICOLON)=> {if (statementTrace>=1) printf("%d member_declaration_6 Function declaration\n",LT(1)->getLine()); } declaration | // Function definition (declaration_specifiers function_declarator[1] LCURLY)=> {beginFieldDeclaration(); if (statementTrace>=1) printf("%d member_declaration_7 Function definition\n",LT(1)->getLine()); } function_definition | // User-defined type cast (("inline")? conversion_function_decl_or_def)=> {if (statementTrace>=1) printf("%d member_declaration_8 Operator function\n",LT(1)->getLine()); } ("inline")? conversion_function_decl_or_def | // Hack to handle decls like "superclass::member", // to redefine access to private base class public members (qualified_id SEMICOLON)=> {if (statementTrace>=1) printf("%d member_declaration_9 Qualified ID\n",LT(1)->getLine()); } q = qualified_id SEMICOLON {end_of_stmt();} | // Member with a type or just a type def // A::T a(), ::T a, ::B a, void a, E a (where E is the enclosing class) (declaration_specifiers)=> {beginFieldDeclaration(); if (statementTrace>=1) printf("%d member_declaration_10 Declaration(s)\n",LT(1)->getLine()); } declaration_specifiers (member_declarator_list)? SEMICOLON {end_of_stmt();} | // Member without a type (I guess it can only be a function declaration or definition) (function_declarator[0] SEMICOLON)=> {beginFieldDeclaration(); printf("%d warning Function declaration found without typename\n",LT(1)->getLine()); if (statementTrace>=1) printf("%d member_declaration_11a Function declaration\n",LT(1)->getLine()); } function_declarator[0] SEMICOLON {end_of_stmt();} | // Member without a type (I guess it can only be a function definition) { printf("%d warning Function definition found without typename\n",LT(1)->getLine()); if (statementTrace>=1) printf("%d member_declaration_11b Function definition\n",LT(1)->getLine()); } function_declarator[1] compound_statement {endFunctionDefinition();} | // templated forward class decl, init/decl of static member in template // DW 27/06/03 Copied here from external_declaration since templates can now be nested (template_head declaration_specifiers (init_declarator_list)? SEMICOLON)=> {beginTemplateDeclaration(); if (statementTrace>=1) printf("%d member_declaration_12 Class template declaration\n",LT(1)->getLine()); } template_head declaration_specifiers (init_declarator_list)? SEMICOLON {end_of_stmt();} {endTemplateDeclaration();} | // Templated FUNCTIONS and CONSTRUCTORS matched here. // DW 27/06/03 Copied here from external_declaration since templates can now be nested {beginTemplateDefinition();} template_head ( // Templated CONSTRUCTOR definition // JEL 4/3/96 Added predicate that works once the // restriction is added that ctor cannot be virtual (ctor_decl_spec {qualifiedItemIsOneOf(qiCtor)}? )=> {if (statementTrace>=1) printf("%d member_declaration_13a Template constructor definition\n",LT(1)->getLine()); } ctor_definition | // Templated function declaration (declaration_specifiers function_declarator[0] SEMICOLON)=> {if (statementTrace>=1) printf("%d member_declaration_13b Function template declaration\n",LT(1)->getLine()); } declaration | // Templated function definition // Function definition DW 2/6/97 (declaration_specifiers function_declarator[1] LCURLY)=> {if (statementTrace>=1) printf("%d member_declaration_13c Function template definition\n",LT(1)->getLine()); } function_definition | {if (statementTrace>=1) printf("%d member_declaration_13d Templated operator function\n",LT(1)->getLine()); } conversion_function_decl_or_def ) {endTemplateDefinition();} | {if (statementTrace>=1) printf("%d member_declaration_14 Access specifier\n",LT(1)->getLine()); } access_specifier COLON | {if (statementTrace>=1) printf("%d member_declaration_15 Semicolon\n",LT(1)->getLine()); } SEMICOLON {end_of_stmt();} ) ; // end member_declaration function_definition : // don't want next action as an init-action due to (...)=> caller { beginFunctionDefinition(); } ( // Next line is equivalent to guarded predicate in PCCTS // (SCOPE | ID)? => <>? {( !(LA(1)==SCOPE||LA(1)==ID) || qualifiedItemIsOneOf(qiType|qiCtor) )}? declaration_specifiers function_declarator[1] ( options{warnWhenFollowAmbig = false;}: (declaration)* // Possible for K & R definition {in_parameter_list = false;} )? compound_statement | // Next line is equivalent to guarded predicate in PCCTS // (SCOPE | ID)? => <>? //{( !(LA(1)==SCOPE||LA(1)==ID) || (qualifiedItemIsOneOf(qiPtrMember)) )}? function_declarator[1] ( options{warnWhenFollowAmbig = false;}: (declaration)* // Possible for K & R definition {in_parameter_list = false;} )? compound_statement ) {endFunctionDefinition();} ; declaration : ("extern" StringLiteral)=> linkage_specification | { beginDeclaration(); } // LL 31/1/97: added (COMMA) ? below. This allows variables to typedef'ed more than once. DW 18/08/03 ? declaration_specifiers ((COMMA)? init_declarator_list)? SEMICOLON {end_of_stmt();} {endDeclaration();} | using_declaration // DW 19/04/04 ; linkage_specification : "extern" StringLiteral (LCURLY (external_declaration)* RCURLY |declaration ) ; declaration_specifiers { // Global flags to allow for nested declarations _td = false; // For typedef _fd = false; // For friend _sc = scInvalid; // For StorageClass _tq = tqInvalid; // For TypeQualifier _ts = tsInvalid; // For TypeSpecifier _ds = dsInvalid; // For DeclSpecifier // Locals bool td = false; // For typedef bool fd = false; // For friend StorageClass sc = scInvalid; // auto,register,static,extern,mutable TypeQualifier tq = tqInvalid; // const,const_cast,volatile,cdecl TypeSpecifier ts = tsInvalid; // char,int,double, etc., class,struct,union DeclSpecifier ds = dsInvalid; // inline,virtual,explicit } : ( (options {warnWhenFollowAmbig = false;} : sc = storage_class_specifier | tq = type_qualifier | ("inline"|"_inline"|"__inline") {ds = dsINLINE;} | "virtual" {ds = dsVIRTUAL;} | "explicit" {ds = dsEXPLICIT;} | "typedef" {td=true;} | "friend" {fd=true;} | ("_stdcall"|"__stdcall") )* ts = type_specifier[ds] | "typename" {td=true;} direct_declarator ) {declarationSpecifier(td,fd,sc,tq,ts,ds);} ; storage_class_specifier returns [CPPParser::StorageClass sc] : "auto" {sc = scAUTO;} | "register" {sc = scREGISTER;} | "static" {sc = scSTATIC;} | "extern" {sc = scEXTERN;} | "mutable" {sc = scMUTABLE;} ; type_qualifier returns [CPPParser::TypeQualifier tq] // aka cv_qualifier : ("const"|"const_cast") {tq = tqCONST;} | "volatile" {tq = tqVOLATILE;} ; type_specifier[DeclSpecifier ds] returns [CPPParser::TypeSpecifier ts] : ts = simple_type_specifier | ts = class_specifier[ds] | enum_specifier {ts=tsENUM;} ; simple_type_specifier returns [CPPParser::TypeSpecifier ts] {char *s; ts = tsInvalid;} : ( {qualifiedItemIsOneOf(qiType|qiCtor)}? s = qualified_type {ts=tsTYPEID;} | ( "char" {ts |= tsCHAR;} | "wchar_t" {ts |= tsWCHAR_T;} | "bool" {ts |= tsBOOL;} | "short" {ts |= tsSHORT;} | "int" {ts |= tsINT;} | ("_int64"|"__int64") {ts |= tsLONG;} | "__w64" {ts |= tsLONG;} | "long" {ts |= tsLONG;} | "signed" {ts |= tsSIGNED;} | "unsigned" {ts |= tsUNSIGNED;} | "float" {ts |= tsFLOAT;} | "double" {ts |= tsDOUBLE;} | "void" {ts |= tsVOID;} | ("_declspec"|"__declspec") LPAREN ID RPAREN )+ | // Fix towards allowing us to parse *.cpp files directly (qualified_type qualified_id)=> s = qualified_type {ts=tsTYPEID;} {printf("simple_type_specifier third option entered\n");} ) ; qualified_type returns [char *q] {char *s; static char qitem[CPPParser_MaxQualifiedItemSize+1];} : // JEL 3/29/96 removed this predicate and moved it upwards to // simple_type_specifier. This was done to allow parsing of ~ID to // be a unary_expression, which was never reached with this // predicate on // {qualifiedItemIsOneOf(qiType|qiCtor)}? s = scope_override id:ID (options {warnWhenFollowAmbig = false;}: LESSTHAN template_argument_list GREATERTHAN )? { //printf("qualified_type entered\n"); strcpy(qitem, s); strcat(qitem, (id->getText()).data()); q = qitem; } ; class_specifier[DeclSpecifier ds] returns [CPPParser::TypeSpecifier ts] {char *saveClass; char *id;} : ("class" {ts = tsCLASS;} |"struct" {ts = tsSTRUCT;} |"union" {ts = tsUNION;} ) ( id = qualified_id (options{generateAmbigWarnings = false;}: {saveClass = enclosingClass; enclosingClass = symbols->strdup(id); } (base_clause)? LCURLY {beginClassDefinition(ts, id);} // This stores class name in dictionary (member_declaration)* {endClassDefinition();} RCURLY {enclosingClass = saveClass;} | {classForwardDeclaration(ts, ds, id);} ) | LCURLY {saveClass = enclosingClass; enclosingClass = "__anonymous";} {beginClassDefinition(ts, "anonymous");} (member_declaration)* {endClassDefinition();} RCURLY {enclosingClass = saveClass;} ) ; enum_specifier : "enum" ( LCURLY enumerator_list RCURLY | id:ID // DW 22/04/03 Suggest qualified_id here to satisfy elaborated_type_specifier {beginEnumDefinition((id->getText()).data());} (LCURLY enumerator_list RCURLY)? {endEnumDefinition();} ) ; enumerator_list : enumerator (COMMA enumerator)* ; enumerator : id:ID (ASSIGNEQUAL constant_expression)? {enumElement((id->getText()).data());} ; /* This matches a generic qualified identifier ::T::B::foo * (including OPERATOR). * It might be a good idea to put T::~dtor in here * as well, but id_expression in expr.g puts it in manually. * Maybe not, 'cause many people use this assuming only A::B. * How about a 'qualified_complex_id'? */ qualified_id returns [char *q] { char *so; static char qitem[CPPParser_MaxQualifiedItemSize+1]; } : so = scope_override {strcpy(qitem, so);} ( id:ID (options{warnWhenFollowAmbig = false;}: LESSTHAN template_argument_list GREATERTHAN)? {strcat(qitem,(id->getText()).data());} | OPERATOR optor {strcat(qitem,"operator"); strcat(qitem,"NYI");} | "this" // DW 21/07/03 fix to pass test8.i | ("true"|"false") // DW 21/07/03 fix to pass test8.i ) {q = qitem; } ; typeID : {isTypeName((LT(1)->getText()).data())}? ID ; init_declarator_list : init_declarator (COMMA init_declarator)* ; init_declarator : declarator ( ASSIGNEQUAL initializer | LPAREN expression_list RPAREN )? ; initializer : remainder_expression // DW 18/4/01 assignment_expression | LCURLY initializer (COMMA initializer)* RCURLY ; class_head : // Used only by predicates ("struct" |"union" |"class" ) (ID (LESSTHAN template_argument_list GREATERTHAN)? (base_clause)? )? LCURLY ; base_clause : COLON base_specifier (COMMA base_specifier)* ; base_specifier {char *qt;} : // DW 13/08/03 Should check qualified_type for class-name? ( "virtual" (access_specifier)? qt = qualified_type | access_specifier ("virtual")? qt = qualified_type | qt = qualified_type ) ; access_specifier : "public" | "protected" | "private" ; member_declarator_list : member_declarator (ASSIGNEQUAL OCTALINT)? // The value must be 0 (pure virt.) (COMMA member_declarator (ASSIGNEQUAL OCTALINT)? )* ; member_declarator : ((ID)? COLON constant_expression)=>(ID)? COLON constant_expression | declarator ; conversion_function_decl_or_def {CPPParser::TypeQualifier tq;} : OPERATOR declaration_specifiers (STAR | AMPERSAND)? // DW 01/08/03 Use type_specifier here? see syntax (LESSTHAN template_parameter_list GREATERTHAN)? LPAREN (parameter_list)? RPAREN (tq = type_qualifier)? (exception_specification)? ( compound_statement | SEMICOLON {end_of_stmt();} ) ; // JEL note: does not use (const|volatile)* to avoid lookahead problems cv_qualifier_seq {CPPParser::TypeQualifier tq;} : (tq = type_qualifier)* ; declarator : //{( !(LA(1)==SCOPE||LA(1)==ID) || qualifiedItemIsOneOf(qiPtrMember) )}? (ptr_operator)=> ptr_operator // AMPERSAND or STAR declarator | direct_declarator ; direct_declarator {char *id; CPPParser::TypeQualifier tq;} : (qualified_id LPAREN (RPAREN|declaration_specifiers) )=> // Must be function declaration id = qualified_id {id_type = ID_FUN_NAME; // DW 28/05/04 ready to delete declaratorID(id,qiFun);} LPAREN {declaratorParameterList(0);} (parameter_list)? RPAREN {declaratorEndParameterList(0);} (tq = type_qualifier)* (exception_specification)? | (qualified_id LPAREN qualified_id)=> // Must be class instantiation id = qualified_id {id_type = ID_VAR_NAME; // DW 26/05/04 ready to delete declaratorID(id,qiVar);} LPAREN expression_list RPAREN | (qualified_id LSQUARE)=> // Must be array declaration id = qualified_id {if (_td==true) id_type = ID_TYPEDEF_VAR; // DW 28/05/04 To be deleted in the next version else id_type = ID_VAR_NAME; // DW 28/05/04 To be deleted in the next version if (_td==true) declaratorID(id,qiType); else declaratorID(id,qiVar); is_address = false; is_pointer = false; } (options {warnWhenFollowAmbig = false;}: LSQUARE (constant_expression)? RSQUARE)+ {declaratorArray();} | id = qualified_id {if (_td==true) id_type = ID_TYPEDEF_VAR; // DW 28/05/04 To be deleted in the next version else id_type = ID_VAR_NAME; // DW 28/05/04 To be deleted in the next version if (_td==true) declaratorID(id,qiType); else declaratorID(id,qiVar); is_address = false; is_pointer = false; } | // DW 24/05/04 This block probably never entered as dtor selected out earlier // Note In fact no dictionary entries for ctor or dtor TILDE dtor:ID {declaratorID((dtor->getText()).data(),qiDtor);} // Note "class" not recorded in CPPSymbol {printf("%d warning direct_declarator5 entered unexpectedly with %s\n", LT(1)->getLine(),(dtor->getText()).data());} LPAREN {declaratorParameterList(0);} (parameter_list)? RPAREN {declaratorEndParameterList(0);} | LPAREN declarator RPAREN declarator_suffixes ; declarator_suffixes {CPPParser::TypeQualifier tq;} : ( (options {warnWhenFollowAmbig = false;}: LSQUARE (constant_expression)? RSQUARE)+ {declaratorArray();} | {(!((LA(1)==LPAREN)&&(LA(2)==ID))||(qualifiedItemIsOneOf(qiType|qiCtor,1)))}? LPAREN {declaratorParameterList(0);} (parameter_list)? RPAREN {declaratorEndParameterList(0);} (tq = type_qualifier)* (exception_specification)? // | // DW 28/06/04 deleted Assume either following bracketed declaration // // empty ) ; /* I think something is weird with the context-guards for predicates; * as a result I manually hoist the appropriate pred from ptr_to_member * * TER: warning: seems that "ID::" will always bypass and go to 2nd alt :( */ function_declarator [int definition] : //{( !(LA(1)==SCOPE||LA(1)==ID) || qualifiedItemIsOneOf(qiPtrMember) )}? (ptr_operator)=> ptr_operator function_declarator[definition] | function_direct_declarator[definition] ; function_direct_declarator [int definition] {char *q; CPPParser::TypeQualifier tq;} : /* predicate indicate that plain ID is ok here; this counteracts any * other predicate that gets hoisted (along with this one) that * indicates that an ID is a type or whatever. E.g., * another rule testing isTypeName() alone, implies that the * the ID *MUST* be a type name. Combining isTypeName() and * this predicate in an OR situation like this one: * ( declaration_specifiers ... | function_declarator ... ) * would imply that ID can be a type name OR a plain ID. */ ( // fix prompted by (isdigit)() in xlocnum LPAREN q = qualified_id { id_type = ID_FUN_NAME; // DW 28/05/04 ready to delete declaratorID(q,qiFun); } RPAREN | q = qualified_id { id_type = ID_FUN_NAME; // DW 28/05/04 ready to delete declaratorID(q,qiFun); } ) { #ifdef MYCODE if (definition) myCode_function_direct_declarator(q); #endif MYCODE } LPAREN { functionParameterList(); if (K_and_R == false) in_parameter_list = true; } (parameter_list)? { if (K_and_R == false) in_parameter_list = false; else in_parameter_list = true; } RPAREN (options{warnWhenFollowAmbig = false;}: tq = type_qualifier)* (ASSIGNEQUAL OCTALINT)? // The value of the octal must be 0 {functionEndParameterList(definition);} (exception_specification)? ; ctor_definition : ctor_head ctor_body {endConstructorDefinition();} ; ctor_head : ctor_decl_spec ctor_declarator[1] ; ctor_decl_spec : (("inline"|"_inline"|"__inline")|"explicit")* ; ctor_declarator[int definition] {char *q;} : // JEL 4/3/96 qualified_id too broad DW 10/06/03 ? q = qualified_ctor_id {declaratorParameterList(definition);} LPAREN (parameter_list)? RPAREN {declaratorEndParameterList(definition);} (exception_specification)? ; // This matches a generic qualified identifier ::T::B::foo // that is satisfactory for a ctor (no operator, no trailing <>) qualified_ctor_id returns [char *q] { char *so; static char qitem[CPPParser_MaxQualifiedItemSize+1]; } : so = scope_override {strcpy(qitem, so);} id:ID // DW 24/05/04 Note. Neither Ctor or Dtor recorded in dictionary {strcat(qitem,(id->getText()).data()); q = qitem;} ; ctor_body : (ctor_initializer)? compound_statement ; ctor_initializer : COLON superclass_init (COMMA superclass_init)* ; superclass_init {char *q;} : q = qualified_id LPAREN (expression_list)? RPAREN ; dtor_head[int definition] : dtor_decl_spec dtor_declarator[definition] ; dtor_decl_spec : (("inline"|"_inline"|"__inline")|"virtual")* ; dtor_declarator[int definition] {char *s;} : s = scope_override TILDE ID {declaratorParameterList(definition);} LPAREN RPAREN {declaratorEndParameterList(definition);} (exception_specification)? ; dtor_body : compound_statement {endDestructorDefinition();} ; parameter_list : parameter_declaration_list (ELLIPSIS)? ; parameter_declaration_list : ( parameter_declaration (// Have not been able to find way of stopping warning of non-determinism between alt 1 and exit branch of block COMMA parameter_declaration )* ) ; parameter_declaration : {beginParameterDeclaration();} ( {!((LA(1)==SCOPE) && (LA(2)==STAR||LA(2)==OPERATOR))&&( !(LA(1)==SCOPE||LA(1)==ID) || qualifiedItemIsOneOf(qiType|qiCtor) )}? declaration_specifiers // DW 24/3/98 Mods for K & R ( (declarator)=> declarator // if arg name given | abstract_declarator // if arg name not given // can be empty ) | (declarator)=> declarator // DW 24/3/98 Mods for K & R | ELLIPSIS ) (ASSIGNEQUAL remainder_expression // DW 18/4/01 assignment_expression )? ; type_name // aka type_id : declaration_specifiers abstract_declarator ; /* This rule looks a bit weird because (...) can happen in two * places within the declaration such as "void (*)()" (ptr to * function returning nothing). However, the () of a function * can only occur after having seen either a (abstract_declarator) * and not after a [..] or simple '*'. These are the only two * valid () func-groups: * int (*)(); // ptr to func * int (*[])(); // array of ptr to func */ abstract_declarator : //{( !(LA(1)==SCOPE||LA(1)==ID) || qualifiedItemIsOneOf(qiPtrMember) )}? ptr_operator abstract_declarator | LPAREN abstract_declarator RPAREN (abstract_declarator_suffix)+ | (LSQUARE (constant_expression )? RSQUARE {declaratorArray();} )+ | /* empty */ ; abstract_declarator_suffix : LSQUARE (constant_expression)? RSQUARE {declaratorArray();} | LPAREN {declaratorParameterList(0);} (parameter_list)? RPAREN cv_qualifier_seq {declaratorEndParameterList(0);} (exception_specification)? ; exception_specification {char *so;} : "throw" LPAREN ( (so = scope_override ID (COMMA so = scope_override ID)? )? | ELLIPSIS ) RPAREN ; template_head : "template" LESSTHAN template_parameter_list GREATERTHAN ; template_parameter_list : {beginTemplateParameterList();} template_parameter (COMMA template_parameter)* {endTemplateParameterList();} ; /* Rule requires >2 lookahead tokens. The ambiguity is resolved * correctly, however. According to the manual "...A template argument * that can be interpreted either as a parameter-declaration or a * type-argument (because its identifier is the name of an * already existing class) is taken as type-argument." * Therefore, any "class ID" that is seen on the input, should * match the first alternative here (it should be a type-argument). */ template_parameter : (options{generateAmbigWarnings = false;}: ("class"|"typename") (id:ID (ASSIGNEQUAL assigned_type_name)? )? {templateTypeParameter((id->getText()).data());} | parameter_declaration // DW 30/06/03 This doesn't seem to match the current standard ) ; /* This is to allow an assigned type_name in a template parameter * list to be defined previously in the same parameter list, * as type setting is ineffective whilst guessing */ assigned_type_name {char* s; TypeSpecifier ts;} : (options{generateAmbigWarnings = false;}: s = qualified_type abstract_declarator | ts = simple_type_specifier abstract_declarator ) ; // This rule refers to an instance of a template class or function template_id // aka template_class_name : ID LESSTHAN template_argument_list GREATERTHAN ; template_argument_list : template_argument (COMMA template_argument)* ; /* Here assignment_expression was changed to shift_expression to rule out * x< 1<2 > which causes ambiguities. As a result, these can be used only * by enclosing parentheses x<(1<2)>. This is true for x<1+2> ==> bad, * x<(1+2)> ==> ok. */ template_argument : {( !(LA(1)==SCOPE||LA(1)==ID) || qualifiedItemIsOneOf(qiType|qiCtor) )}? type_name | shift_expression // failed in iosfwd // | assignment_expression // Inserted as per grammar summary ; /////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////// ////////////////////////////// STATEMENTS //////////////////////////// /////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////// statement_list : (statement)+ ; statement : ( (declaration)=> declaration | labeled_statement | case_statement | default_statement | expression SEMICOLON {end_of_stmt();} | compound_statement | selection_statement | iteration_statement | jump_statement | SEMICOLON {end_of_stmt();} | try_block | throw_statement | asm_block ) ; labeled_statement : ID COLON statement ; case_statement : "case" constant_expression COLON statement ; default_statement : "default" COLON statement ; compound_statement : LCURLY {end_of_stmt(); enterNewLocalScope(); } (statement_list)? RCURLY {exitLocalScope();} ; /* NOTE: cannot remove ELSE ambiguity, but it parses correctly. * The warning is removed with the options statement */ selection_statement : "if" LPAREN expression RPAREN statement (options {warnWhenFollowAmbig = false;}: "else" statement)? | "switch" LPAREN expression RPAREN statement ; iteration_statement : "while" LPAREN expression RPAREN statement | "do" statement "while" LPAREN expression RPAREN SEMICOLON {end_of_stmt();} | "for" LPAREN ( (declaration)=> declaration | expression SEMICOLON {end_of_stmt();} | SEMICOLON {end_of_stmt();} ) (expression)? SEMICOLON {end_of_stmt();} (expression)? RPAREN statement ; jump_statement : ( "goto" ID SEMICOLON {end_of_stmt();} | "continue" SEMICOLON {end_of_stmt();} | "break" SEMICOLON {end_of_stmt();} // DW 16/05/03 May be problem here if return is followed by a cast expression | "return" {in_return = true;} ( options{warnWhenFollowAmbig = false;}: (LPAREN {(qualifiedItemIsOneOf(qiType) )}? ID RPAREN)=> LPAREN ID RPAREN (expression)? // This is an unsatisfactory fix for problem in xstring re "return (allocator);" // and in xlocale re return (_E)(_Tolower((unsigned char)_C, &_Ctype)); //{printf("%d CPP_parser.g jump_statement Return fix used\n",LT(1)->getLine());} | expression )? SEMICOLON {in_return = false,end_of_stmt();} ) ; try_block : "try" compound_statement (handler)* ; handler : "catch" {exceptionBeginHandler();} {declaratorParameterList(1);} LPAREN exception_declaration RPAREN {declaratorEndParameterList(1);} compound_statement {exceptionEndHandler();} ; exception_declaration : parameter_declaration_list ; /* This is an expression of type void according to the ARM, which * to me means "statement"; it removes some ambiguity to put it in * as a statement also. */ throw_statement : "throw" (assignment_expression) ? SEMICOLON { end_of_stmt();} ; using_declaration {char *qid;} : "using" ("namespace" qid = qualified_id // Using-directive |qid = qualified_id // Using-declaration ) SEMICOLON {end_of_stmt();} ; asm_block : ("_asm"|"__asm") LCURLY (~RCURLY)* RCURLY ; /////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////// ////////////////////////////// EXPRESSIONS /////////////////////////// /////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////// expression : assignment_expression (COMMA assignment_expression)* ; /* right-to-left for assignment op */ assignment_expression : conditional_expression ( (ASSIGNEQUAL|TIMESEQUAL|DIVIDEEQUAL|MINUSEQUAL|PLUSEQUAL |MODEQUAL |SHIFTLEFTEQUAL |SHIFTRIGHTEQUAL |BITWISEANDEQUAL |BITWISEXOREQUAL |BITWISEOREQUAL ) remainder_expression )? ; remainder_expression : ( (conditional_expression (COMMA|SEMICOLON|RPAREN) )=> {assign_stmt_RHS_found += 1;} assignment_expression { if (assign_stmt_RHS_found > 0) assign_stmt_RHS_found -= 1; else { printf("%d warning Error in assign_stmt_RHS_found = %d\n", LT(1)->getLine(),assign_stmt_RHS_found); printf("Press return to continue\n"); getchar(); } } | assignment_expression ) ; conditional_expression : logical_or_expression (QUESTIONMARK expression COLON conditional_expression)? ; constant_expression : conditional_expression ; logical_or_expression : logical_and_expression (OR logical_and_expression)* ; logical_and_expression : inclusive_or_expression (AND inclusive_or_expression)* ; inclusive_or_expression : exclusive_or_expression (BITWISEOR exclusive_or_expression)* ; exclusive_or_expression : and_expression (BITWISEXOR and_expression)* ; and_expression : equality_expression (AMPERSAND equality_expression)* ; equality_expression : relational_expression ((NOTEQUAL | EQUAL) relational_expression)* ; relational_expression : shift_expression (options {warnWhenFollowAmbig = false;}: ( LESSTHAN | GREATERTHAN | LESSTHANOREQUALTO | GREATERTHANOREQUALTO ) shift_expression )* ; shift_expression : additive_expression ((SHIFTLEFT | SHIFTRIGHT) additive_expression)* ; /* See comment for multiplicative_expression regarding #pragma */ additive_expression : multiplicative_expression (options{warnWhenFollowAmbig = false;}: (PLUS | MINUS) multiplicative_expression )* ; /* ANTLR has trouble dealing with the analysis of the confusing unary/binary * operators such as STAR, AMPERSAND, PLUS, etc... With the #pragma (now "(options{warnWhenFollowAmbig = false;}:" etc.) * we simply tell ANTLR to use the "quick-to-analyze" approximate lookahead * as full LL(k) lookahead will not resolve the ambiguity anyway. Might * as well not bother. This has the side-benefit that ANTLR doesn't go * off to lunch here (take infinite time to read grammar). */ multiplicative_expression : pm_expression (options{warnWhenFollowAmbig = false;}: (STAR | DIVIDE | MOD) pm_expression )* ; pm_expression : cast_expression ((DOTMBR | POINTERTOMBR) cast_expression)* ; /* The string "( ID" can be either the start of a cast or * the start of a unary_expression. However, the ID must * be a type name for it to be a cast. Since ANTLR can only hoist * semantic predicates that are visible without consuming a token, * the semantic predicate in rule type_name is not hoisted--hence, the * rule is reported to be ambiguous. I am manually putting in the * correctly hoisted predicate. * * Ack! Actually "( ID" might be the start of "(T(expr))" which makes * the first parens just an ordinary expression grouping. The solution * is to look at what follows the type, T. Note, this could be a * qualified type. Yucko. I believe that "(T(" can only imply * function-style type cast in an expression (...) grouping. * * We DO NOT handle the following situation correctly at the moment: * Suppose you have * struct rusage rusage; * return (rusage.fp); * return (rusage*)p; * Now essentially there is an ambiguity here. If rusage is followed by any * postix operators then it is an identifier else it is a type name. This * problem does not occur in C because, unless the tag struct is attached, * rusage is not a type name. However in C++ that restriction is removed. * No *real* programmer would do this, but it's in the C++ standard just for * fun.. * * Another fun one (from an LL standpoint): * * (A::B::T *)v; // that's a cast of v to type A::B::T * (A::B::foo); // that's a simple member access * * The qualifiedItemIs(1) function scans ahead to what follows the * final "::" and returns qiType if the item is a type. The offset of * '1' makes it ignore the initial LPAREN; normally, the offset is 0. */ cast_expression {TypeQualifier tq; TypeSpecifier ts;} : // DW 23/06/03 (LPAREN (type_qualifier)? simple_type_specifier (ptr_operator)? RPAREN)=> LPAREN (tq = type_qualifier)? ts = simple_type_specifier (ptr_operator)? RPAREN cast_expression | unary_expression // handles outer (...) of "(T(expr))" ; unary_expression : ( //{!(LA(1)==TILDE && LA(2)==ID)||qualifiedItemIsOneOf(qiVar|qiFun|qiDtor|qiCtor)}? (postfix_expression)=> postfix_expression | PLUSPLUS unary_expression | MINUSMINUS unary_expression | unary_operator cast_expression | "sizeof" (// see comment for rule cast_expression for info on predicate // JEL NOTE 3/31/96 -- This won't work -- you really need to // call qualifiedItemIsOneOf(qiType|qiCtor,1) // The context should also be ( LPAREN (SCOPE|ID) ) // ( LPAREN ID ) => {isTypeName((LT(2)->getText()).data())}? {(!(((LA(1)==LPAREN&&(LA(2)==ID))))||(isTypeName((LT(2)->getText()).data())))}? LPAREN type_name RPAREN | unary_expression ) | (SCOPE)? (new_expression |delete_expression ) ) ; postfix_expression {TypeSpecifier ts; DeclSpecifier ds = dsInvalid; // Purpose ? } : ( options {warnWhenFollowAmbig = false;}: // Function-style cast must have a leading type {!(LA(1)==LPAREN)}? (ts = simple_type_specifier LPAREN RPAREN LPAREN)=> // DW 01/08/03 To cope with problem in xtree (see test10.i) ts = simple_type_specifier LPAREN RPAREN LPAREN (expression_list)? RPAREN | {!(LA(1)==LPAREN)}? (ts = simple_type_specifier LPAREN)=> ts = simple_type_specifier LPAREN (expression_list)? RPAREN | primary_expression (options {warnWhenFollowAmbig = false;}: LSQUARE expression RSQUARE | LPAREN (expression_list)? RPAREN | DOT id_expression | POINTERTO id_expression | PLUSPLUS | MINUSMINUS )* | ("dynamic_cast"|"static_cast"|"reinterpret_cast"|"const_cast") // Note const_cast in elsewhere LESSTHAN ts = type_specifier[ds] (ptr_operator)? GREATERTHAN LPAREN expression RPAREN ) ; primary_expression : id_expression | constant | "this" | LPAREN expression RPAREN ; id_expression {char *s;} : s = scope_override ( ID | OPERATOR optor | TILDE (STAR)? ID // DW 29/07/03 STAR included to allow for *_S = ~*_S; seen in vector ) ; unary_operator : AMPERSAND | STAR | PLUS | MINUS | TILDE | NOT ; /* JEL The first ()? is used to resolve "new (expr) (type)" because both * (expr) and (type) look identical until you've seen the whole thing. * * new_initializer appears to be conflicting with function arguments as * function arguments can follow a primary_expression. [This is a full * LL(k) versus LALL(k) problem. Enhancing context by duplication of * some rules might handle this.] */ new_expression : ( "new" ((LPAREN expression_list RPAREN)=> LPAREN expression_list RPAREN)? (new_type_id | LPAREN type_name RPAREN) (options{warnWhenFollowAmbig = false;}: (new_initializer)=> new_initializer)? ) ; new_initializer : LPAREN (expression_list)? RPAREN ; new_type_id : declaration_specifiers (options {warnWhenFollowAmbig = false;}: //{( !(LA(1)==SCOPE||LA(1)==ID) || qualifiedItemIsOneOf(qiPtrMember) )}? new_declarator )? ; new_declarator : //{( !(LA(1)==SCOPE||LA(1)==ID) || qualifiedItemIsOneOf(qiPtrMember) )}? //ptr_to_member cv_qualifier_seq ptr_operator (options {warnWhenFollowAmbig = false;}: new_declarator ) ? | direct_new_declarator ; ptr_operator : ( AMPERSAND {is_address = true;} | ("_cdecl"|"__cdecl") | ("_near"|"__near") | ("_far"|"__far") | "__interrupt" | ("pascal"|"_pascal"|"__pascal") | ("_stdcall"|"__stdcall") | ptr_to_member // e.g. STAR ) ; // Match A::B::* ptr_to_member {char *s;} : s = scope_override STAR {is_pointer = true;} cv_qualifier_seq ; // Match the A::B::C:: or nothing scope_override returns [char *s] { static char sitem[CPPParser_MaxQualifiedItemSize+1]; sitem[0]='\0'; } : //{!(qualifiedItemIsOneOf(qiType))}? (SCOPE {strcat(sitem,"::");} )? ( options {warnWhenFollowAmbig = false;}: {scopedItem()}? id:ID (LESSTHAN template_argument_list GREATERTHAN)? SCOPE { //printf("scope_override entered\n"); strcat(sitem,(id->getText()).data()); strcat(sitem,"::"); } )* {s = sitem; } ; /* The "[expression]" construct conflicts with the "new []" construct * (and possibly others). We used approximate lookahead for the "new []" * construct so that it would not try to compute full LL(2) lookahead. * Here, we use #pragma approx again because anytime we see a [ followed * by token that can begin an expression, we always want to loop. * Approximate lookahead handles this correctly. In fact, approximate * lookahead is the same as full lookahead when all but the last lookahead * depth are singleton sets; e.g., {"["} followed by FIRST(expression). */ direct_new_declarator : (options {warnWhenFollowAmbig = false;}: LSQUARE expression RSQUARE )+ ; delete_expression : "delete" (LSQUARE RSQUARE)? cast_expression ; expression_list : assignment_expression (COMMA assignment_expression)* ; constant : OCTALINT | DECIMALINT | HEXADECIMALINT | CharLiteral | (StringLiteral)+ | FLOATONE | FLOATTWO | "true" | "false" ; optor : "new" (options {warnWhenFollowAmbig = false;}: LSQUARE RSQUARE | ) // check syntax | "delete" (options {warnWhenFollowAmbig = false;}: LSQUARE RSQUARE | ) // check syntax | LPAREN RPAREN | LSQUARE RSQUARE | optor_simple_tokclass //OPTOR_SIMPLE_TOKCLASS ; //Zuo 5/11/2001 // This is the equivalent to "#tokclass OPTOR_SIMPLE_TOKCLASS" in cplusplus.g optor_simple_tokclass : (PLUS|MINUS|STAR|DIVIDE|MOD|BITWISEXOR|AMPERSAND|BITWISEOR|TILDE|NOT| SHIFTLEFT|SHIFTRIGHT| ASSIGNEQUAL|TIMESEQUAL|DIVIDEEQUAL|MODEQUAL|PLUSEQUAL|MINUSEQUAL| SHIFTLEFTEQUAL|SHIFTRIGHTEQUAL|BITWISEANDEQUAL|BITWISEXOREQUAL|BITWISEOREQUAL| EQUAL|NOTEQUAL|LESSTHAN|GREATERTHAN|LESSTHANOREQUALTO|GREATERTHANOREQUALTO|OR|AND| PLUSPLUS|MINUSMINUS|COMMA|POINTERTO|POINTERTOMBR ) ; // Zuo 19/11/01 from next line, the Lexer is derived from stdCParser.g class CPPLexer extends Lexer; options { k = 3; exportVocab = STDC; testLiterals = true; } // DW 4/11/02 put in to support manual hoisting tokens { OPERATOR = "operator"; } { ANTLR_USE_NAMESPACE(antlr)LineObject lineObject; ANTLR_USE_NAMESPACE(std)string originalSource; int deferredLineCount; int _line; void setOriginalSource(ANTLR_USE_NAMESPACE(std)string src) { originalSource = src; lineObject.setSource(src); } void setSource(ANTLR_USE_NAMESPACE(std)string src) { lineObject.setSource(src); } void deferredNewline() { deferredLineCount++; } void newline() { CharScanner::newline(); } } /* Operators: */ ASSIGNEQUAL : '=' ; COLON : ':' ; COMMA : ',' ; QUESTIONMARK : '?' ; SEMICOLON : ';' ; POINTERTO : "->" ; /* // DOT & ELLIPSIS are commented out since they are generated as part of // the Number rule below due to some bizarre lexical ambiguity shme. // DOT : '.' ; // ELLIPSIS : "..." ; */ LPAREN : '(' ; RPAREN : ')' ; LSQUARE : '[' ; RSQUARE : ']' ; LCURLY : '{' ; RCURLY : '}' ; EQUAL : "==" ; NOTEQUAL : "!=" ; LESSTHANOREQUALTO : "<=" ; LESSTHAN : "<" ; GREATERTHANOREQUALTO : ">=" ; GREATERTHAN : ">" ; DIVIDE : '/' ; DIVIDEEQUAL : "/=" ; PLUS : '+' ; PLUSEQUAL : "+=" ; PLUSPLUS : "++" ; MINUS : '-' ; MINUSEQUAL : "-=" ; MINUSMINUS : "--" ; STAR : '*' ; TIMESEQUAL : "*=" ; MOD : '%' ; MODEQUAL : "%=" ; SHIFTRIGHT : ">>" ; SHIFTRIGHTEQUAL : ">>=" ; SHIFTLEFT : "<<" ; SHIFTLEFTEQUAL : "<<=" ; AND : "&&" ; NOT : '!' ; OR : "||" ; AMPERSAND : '&' ; BITWISEANDEQUAL : "&=" ; TILDE : '~' ; BITWISEOR : '|' ; BITWISEOREQUAL : "|=" ; BITWISEXOR : '^' ; BITWISEXOREQUAL : "^=" ; //Zuo: the following tokens are come from cplusplus.g POINTERTOMBR : "->*" ; DOTMBR : ".*" ; SCOPE : "::" ; // DW 10/10/02 // Whitespace -- ignored Whitespace : ( (' ' |'\t' | '\f') // handle newlines | ( "\r\n" // MS | '\r' // Mac | '\n' // Unix ) { newline(); } // handle continuation lines | ( "\\\r\n" // MS | "\\\r" // Mac | "\\\n" // Unix ) {deferredNewline();} ) {_ttype = ANTLR_USE_NAMESPACE(antlr)Token::SKIP;} ; Comment : "/*" ( {LA(2) != '/'}? '*' | EndOfLine {deferredNewline();} | ~('*'| '\r' | '\n') )* "*/" {_ttype = ANTLR_USE_NAMESPACE(antlr)Token::SKIP;} ; CPPComment : "//" (~('\n' | '\r'))* EndOfLine {_ttype = ANTLR_USE_NAMESPACE(antlr)Token::SKIP; newline();} ; PREPROC_DIRECTIVE options{paraphrase = "a line directive";} : '#' LineDirective {_ttype = ANTLR_USE_NAMESPACE(antlr)Token::SKIP; newline();} ; protected LineDirective : ("line")? // this would be for if the directive started "#line" (Space)+ n:Decimal (Space)+ (sl:StringLiteral)? { process_line_directive((sl->getText()).data(), (n->getText()).data()); // see main() } EndOfLine ; protected Space : (' ' | '\t' | '\f') ; Pragma : ('#' "pragma" (~('\r' | '\n'))* EndOfLine) {_ttype = ANTLR_USE_NAMESPACE(antlr)Token::SKIP; newline();} ; Error : ('#' "error" (~('\r' | '\n'))* EndOfLine) {_ttype = ANTLR_USE_NAMESPACE(antlr)Token::SKIP; newline();} ; /* Literals: */ /* * Note that we do NOT handle tri-graphs nor multi-byte sequences. */ /* * Note that we can't have empty character constants (even though we * can have empty strings :-). */ CharLiteral : '\'' (Escape | ~( '\'' )) '\'' ; /* * Can't have raw imbedded newlines in string constants. Strict reading of * the standard gives odd dichotomy between newlines & carriage returns. * Go figure. */ StringLiteral : '"' ( Escape | ( "\\\r\n" // MS | "\\\r" // MAC | "\\\n" // Unix ) {deferredNewline();} | ~('"' | '\r' | '\n' | '\\') )* '"' ; protected EndOfLine : ( options{generateAmbigWarnings = false;}: "\r\n" // MS | '\r' // Mac | '\n' // Unix ) ; /* * Handle the various escape sequences. * * Note carefully that these numeric escape *sequences* are *not* of the * same form as the C language numeric *constants*. * * There is no such thing as a binary numeric escape sequence. * * Octal escape sequences are either 1, 2, or 3 octal digits exactly. * * There is no such thing as a decimal escape sequence. * * Hexadecimal escape sequences are begun with a leading \x and continue * until a non-hexadecimal character is found. * * No real handling of tri-graph sequences, yet. */ protected Escape : '\\' ( options{warnWhenFollowAmbig=false;}: 'a' | 'b' | 'f' | 'n' | 'r' | 't' | 'v' | '"' | '\'' | '\\' | '?' | ('0'..'3') (options{warnWhenFollowAmbig=false;}: Digit (options{warnWhenFollowAmbig=false;}: Digit)? )? | ('4'..'7') (options{warnWhenFollowAmbig=false;}: Digit)? | 'x' (options{warnWhenFollowAmbig=false;}: Digit | 'a'..'f' | 'A'..'F')+ ) ; /* Numeric Constants: */ protected Digit : '0'..'9' ; protected Decimal : ('0'..'9')+ ; protected LongSuffix : 'l' | 'L' ; protected UnsignedSuffix : 'u' | 'U' ; protected FloatSuffix : 'f' | 'F' ; protected Exponent : ('e' | 'E') ('+' | '-')? (Digit)+ ; protected Vocabulary : '\3'..'\377' ; Number : ( (Digit)+ ('.' | 'e' | 'E') )=> (Digit)+ ( '.' (Digit)* (Exponent)? {_ttype = FLOATONE;} //Zuo 3/12/01 | Exponent {_ttype = FLOATTWO;} //Zuo 3/12/01 ) //{_ttype = DoubleDoubleConst;} (FloatSuffix //{_ttype = FloatDoubleConst;} |LongSuffix //{_ttype = LongDoubleConst;} )? | ("...")=> "..." {_ttype = ELLIPSIS;} | '.' {_ttype = DOT;} ( (Digit)+ (Exponent)? {_ttype = FLOATONE;} //Zuo 3/12/01 //{_ttype = DoubleDoubleConst;} (FloatSuffix //{_ttype = FloatDoubleConst;} |LongSuffix //{_ttype = LongDoubleConst;} )? )? | '0' ('0'..'7')* //{_ttype = IntOctalConst;} (LongSuffix //{_ttype = LongOctalConst;} |UnsignedSuffix //{_ttype = UnsignedOctalConst;} )* {_ttype = OCTALINT;} | '1'..'9' (Digit)* //{_ttype = IntIntConst;} (LongSuffix //{_ttype = LongIntConst;} |UnsignedSuffix //{_ttype = UnsignedIntConst;} )* {_ttype = DECIMALINT;} | '0' ('x' | 'X') ('a'..'f' | 'A'..'F' | Digit)+ //{_ttype = IntHexConst;} (LongSuffix //{_ttype = LongHexConst;} |UnsignedSuffix //{_ttype = UnsignedHexConst;} )* {_ttype = HEXADECIMALINT;} ; ID options {testLiterals = true;} : ( 'a'..'z' | 'A'..'Z' | '_' ) ( 'a'..'z' | 'A'..'Z' | '_' | '0'..'9' )* ;