grammar Ctree; //think Iwill remove k=3 here and just make it parse in normal antlr way :) options{ //in theory this is unlimited look ahead.. should therefore work I guess backtrack = true; //memoize = true; language = C; output = AST; ASTLabelType= pANTLR3_BASE_TREE; } /* TODO : find specific stuff for code_coverage - this was the whole point :) make include statements visible */ tokens{ INT='int'; BREAK= 'break'; CHAR='char'; CONTINUE= 'continue'; FLOAT='float'; IF='if'; DOUBLE='double'; VOID='void'; SHORT='short'; LONG='long'; SIGNED='signed'; UNSIGNED='unsigned'; ELSE='else'; STRUCT='struct'; UNION='union'; FOR='for'; AUTO='auto'; DO='do'; EXTERN='extern'; WHILE='while'; REGISTER='register'; SWITCH='switch'; STATIC='static'; CASE='case'; GOTO='goto'; DEFAULT='default'; RETURN='return'; ENTRY='entry'; //apparently reserved for future use :/ SIZEOF='sizeof'; CONST='const'; VOLATILE='volatile'; TYPEDEF='typedef'; ENUM='enum'; DECLARATION; FUNCTION; FUNCBODY; DECLARATORID; COMPOUND; LVALUE; ASSIGNMENT; } //line : (expression ';')+ ; //line : (declaration )+; //line : (statement)+; line : (extern_def)+ ; //( declaration_specifiers* declarator declaration* '{' )=> extern_def : EXTERN^ STRING_CONSTANT '{' line '}' //| (declaration_specifiers* declarator declaration* '{' )=> extern_func_def | declaration; extern_func_def : declaration_specifiers* func_declarator func_body -> ^(FUNCTION declaration_specifiers* func_declarator func_body); func_declarator : declarator; func_body //: ';' //for forward declarations = no body -> don't think this is ever needed here. : declaration* compound_statement -> ^(FUNCBODY declaration compound_statement); statement : ';' //null statement | ('{')=> compound_statement | (WHILE)=>while_statement | (DO)=>do_statement | (FOR)=>for_statement | (SWITCH)=>switch_statement | (CASE|DEFAULT)=>case_statement | (BREAK|CONTINUE)=>break_statement | (RETURN)=>return_statement | (GOTO)=>goto_statement | (IF)=>conditional_statement | expression ';' ; compound_statement : '{' declaration* statement* '}' -> ^(COMPOUND '{' declaration* statement* '}'); conditional_statement : IF^ '(' expression ')' statement ((ELSE)=>ELSE^ statement)?; while_statement : WHILE^ '(' expression ')' statement; do_statement : DO^ statement WHILE '(' expression ')' ';'; for_statement : FOR^ '(' expression? ';' expression? ';' expression? ')' statement; switch_statement : SWITCH^ '(' expression ')' statement; //technically this must be constant expression :/ case_statement : CASE^ unary_expression ':' | DEFAULT^ ':'; break_statement : BREAK^ ';' | CONTINUE^ ';'; return_statement //: RETURN^ ('(' expression ')')? ';' : RETURN^ expression? ';'; goto_statement : GOTO^ expression ';' | IDENTIFIER ':'; //label declaration : typedef_statement | declaration_normal; // ( declaration_specifiers (declaration_specifiers | init_declarator_list) ) => declaration_normal : ( ( declaration_specifiers (declaration_specifiers | init_declarator_list | ';' | '{') ) =>declaration_specifiers)+ init_declarator_list? (';' | func_body) -> ^(DECLARATION declaration_specifiers+ init_declarator_list func_body); typedef_statement :TYPEDEF^ ( (declaration_specifiers (declaration_specifiers | init_declarator_list))=> declaration_specifiers)* init_declarator_list ';'; /* type_decl_list : declarator_list ; // syntactically this is the same as declaration* I think.. // yet somehow it just doesn't budge like that ?!?! declarator_list :( typedef_statement | declaration_normal // ((decl_specifiers init_declarator_list? ';')=> // decl_specifiers init_declarator_list? ';' ) )*; //: (declaration)*; declaration_normal :((decl_specifiers init_declarator_list? ';')=> ( decl_specifiers init_declarator_list? ';' -> ^(DECLARATION decl_specifiers init_declarator_list ';'))); */ //note: currnt decl_specifiers1 is a fix for the grammar here :( // in theory we can throw this out maybe somehow.. // typedef is still a problem /* declaration : typedef_statement | ( (decl_specifiers init_declarator_list? ';')=> decl_specifiers init_declarator_list? ';' -> ^(DECLARATION decl_specifiers init_declarator_list ';')); */ //this is not 100% correct with CONST and VOLATILE like this .. but it should work //figuring out the predicate thing here is a mission! //I figure maybe '(' is still a problem? //posibility one: (decl_posibilities ~(','|'='|'['))=> //Maybe be more specific to IDENTIFIER as that is where the problem lies! // (~( IDENTIFIER (','|';'|'='|'[')) // decl_suffic remain the problem! // '(' was a big mistake! should maybe get red of pred here? // it seems other problems result if I take this away :( /* decl_specifiers1 : ( ( (STRUCT | UNION) | (decl_posibilities (decl_posibilities | declarator)) )=> decl_posibilities)*; //:decl_posibilities*; decl_specifiers : ( ( (STRUCT | UNION) | (decl_posibilities (decl_posibilities | declarator)) )=> decl_posibilities)+; //:decl_posibilities+; decl_posibilities : CONST | VOLATILE | type_specifier | sc_specifier; declarator : (declarator_id | '*' declarator | '(' declarator ')') (('[' | '(')=> decl_suffix )*; declarator_id : IDENTIFIER -> ^(DECLARATORID IDENTIFIER); //parameter_type_list will accept IDENTIFIERS :) decl_suffix : '[' constant_expression? ']' | '(' (parameter_type_list )? ')' ; */ declaration_specifiers : ( sc_specifier | type_specifier | type_qualifier ) ; init_declarator_list : init_declarator (',' init_declarator)*; init_declarator : declarator ('=' initializer)?; declarator : direct_declarator | pointer direct_declarator? ; direct_declarator : (IDENTIFIER | '(' declarator ')' ) declarator_suffix*; declarator_suffix : '[' constant_expression ']' | '[' ']' | '(' parameter_type_list ')' | '(' identifier_list ')' | '(' ')' ; pointer : '*' type_qualifier+ pointer? | '*' pointer | '*' ; identifier_list : IDENTIFIER (',' IDENTIFIER)* ; type_qualifier : 'const' | 'volatile' ; //I have a feeling this should rather just = andorxor_expression constant_expression : andorxor_expression; /* : IDENTIFIER //can potentially be a define or something | INTEGER_CONSTANT | HEX_CONSTANT ;*/ parameter_type_list : parameter_declaration (',' parameter_declaration)* (',' '...')?; parameter_declaration : declaration_specifiers+ (declarator | abstract_declarator)*; abstract_declarator : pointer direct_abstract_declarator? | direct_abstract_declarator ; direct_abstract_declarator : ( '(' abstract_declarator ')' | abstract_declarator_suffix ) abstract_declarator_suffix* ; abstract_declarator_suffix : '[' ']' | '[' constant_expression ']' | '(' ')' | '(' parameter_type_list ')' ; initializer : assignment_expression | '{' initializer_list ','? '}'; initializer_list : initializer (',' initializer)* ; sc_specifier : AUTO | EXTERN | STATIC | REGISTER; type_specifier_fixed : VOID | INT | CHAR | LONG | FLOAT | DOUBLE | SIGNED | UNSIGNED |SHORT ; type_specifier : VOID | INT | CHAR | LONG | FLOAT | DOUBLE | SIGNED | UNSIGNED |SHORT | IDENTIFIER //I need this as I can't figure out if an identifier is a legal type or not | struct_type | enum_type; struct_type : (STRUCT | UNION)^ IDENTIFIER? '{' declaration* '}' | (STRUCT | UNION) IDENTIFIER; //( '{' type_decl_list '}' | IDENTIFIER | IDENTIFIER '{' type_decl_list '}'); enum_type : ENUM IDENTIFIER '{' enumerator_list '}' | ENUM IDENTIFIER; enumerator_list : enumerator (',' enumerator)*; enumerator : IDENTIFIER ('=' constant_expression)?; expression : assignment_expression (',' assignment_expression)*; // technically the parser is incapable of really checking for valid lvalue expressions // so unary_expression is used as that is as close as we can get. lvalue_expression : unary_expression -> ^(LVALUE unary_expression); primary_expression : IDENTIFIER | INTEGER_CONSTANT | HEX_CONSTANT | FLOAT_CONSTANT | CHAR_CONSTANT | STRING_CONSTANT | '(' expression ')' ; //requires expansion: type_id : type_specifier; argument_expression_list : assignment_expression (',' assignment_expression)*; //I'm not sure if the synpred is working correctly! unary_expression : (cast_expression unary_expression)=> cast_expression unary_expression | plus_or_minus_one unary_expression | unary_operator unary_expression // | SIZEOF unary_expression | SIZEOF '(' unary_expression ')' //should change this to type_id later and uncomment previous // | primary_expression //| sc_specifier unary_expression //| type_specifier_fixed unary_expression //| struct_type unary_expression //| (IDENTIFIER ~(','|'='|';'|'['|'('))=> IDENTIFIER unary_expression | postfix_expression ; postfix_expression : primary_expression ( '[' ']' | '[' expression ']' | '(' ')' | '(' argument_expression_list ')' | '.' IDENTIFIER //| '*' IDENTIFIER // what is this?? | '->' IDENTIFIER | plus_or_minus_one )* ; multiplication_expression : unary_expression (mult_operator^ unary_expression)*; additive_expression : multiplication_expression (add_operator^ multiplication_expression)*; shift_expression : additive_expression (shift_operator^ additive_expression)*; relation_expression : shift_expression (relation_operator^ shift_expression)*; equality_expression : relation_expression (equal_operator^ relation_expression)*; andorxor_expression : equality_expression ( andorxor_operator^ equality_expression)*; conditional_expression : andorxor_expression ('?'^ andorxor_expression ':' andorxor_expression )*; assignment_expression : ((lvalue_expression assignment_operator)=> lvalue_expression assignment_operator initializer -> ^(ASSIGNMENT assignment_operator lvalue_expression initializer)) | conditional_expression; //todo: this should accept things like "int" ect! //finish up when doing declerations! cast_expression : '(' specifier_qualifier_list abstract_declarator? ')' ; specifier_qualifier_list : ( type_qualifier | type_specifier )+ ; unary_operator : '*' | '&' | '!' | '-' | '+' | '~' ; mult_operator : '*' | '/' | '%' ; add_operator : '+' | '-' ; shift_operator : '<<' | '>>' ; relation_operator : '>' | '<' | '>=' | '<=' ; equal_operator : '==' | '!='; //thecnially this must be seperated but who cares andorxor_operator : '&' | '|' | '^' | '&&' | '||'; plus_or_minus_one : '++' | '--' ; assignment_operator : '=' | '+=' | '-=' | '*=' | '/=' | '%=' | '<<=' | '>>=' | '&=' | '|=' | '^=' ; INTEGER_CONSTANT : ('0'.. '9')+ IntegerTypeSuffix?; HEX_CONSTANT : ('0x' | '0X') ('0'..'9' | 'A'..'F' | 'a'..'f')+ IntegerTypeSuffix?; FLOAT_CONSTANT : ('0'..'9')+ '.' ('0'..'9')* Exponent? FloatTypeSuffix? | '.' ('0'..'9')+ Exponent? FloatTypeSuffix? | ('0'..'9')+ ( Exponent FloatTypeSuffix? | FloatTypeSuffix) ; //technically this is wrong.. but it's good enough! CHAR_CONSTANT : '\'' ( EscapeSequence | ~('\''|'\\') ) '\''; fragment EscapeSequence : '\\' ('b'|'t'|'n'|'f'|'r'|'\"'|'\''|'\\') | OctalEscape ; fragment OctalEscape : '\\' ('0'..'3') ('0'..'7') ('0'..'7') | '\\' ('0'..'7') ('0'..'7') | '\\' ('0'..'7') ; STRING_CONSTANT : '"' ( EscapeSequence | ~('\\'|'"') )* '"'; fragment Exponent : ('e'|'E') ('+'|'-')? ('0'..'9')+ ; fragment FloatTypeSuffix : ('f'|'F'|'d'|'D') ; fragment IntegerTypeSuffix : ('l'|'L') | ('u'|'U') ('l'|'L')? ; IDENTIFIER : LETTER (LETTER|'0'..'9')* ; fragment LETTER : '$' // not sure if $ is really legal? | 'A'..'Z' | 'a'..'z' | '_' ; WHITESPACE : (' '|'\r'|'\t'|'\u000C'|'\n') {$channel=HIDDEN;} ; COMMENT : '/*' ( options {greedy=false;} : . )* '*/' {$channel=HIDDEN;} ; LINE_COMMENT : '//' ~('\n'|'\r')* '\r'? '\n' {$channel=HIDDEN;} ; //pre-processor directives are ignored by this scanner/parser LINE_COMMAND : '#' (('\\' .* '\n' ) |~('\n'|'\r'|'\\'))* '\r'? '\n' {$channel=HIDDEN;} ;