grammar Arith; options { language = Java; output = AST; backtrack=true; } tokens{ UMINUS; PROG; IPLUS; IMINUS; IMULT; IDIV; IUMINUS; IREAD; IWRITE; IASSIGN; FPLUS; FMINUS; FMULT; FDIV; FUMINUS; FREAD; FWRITE; FASSIGN; CAST_I2F; BASSIGN; } @header{ package de.hshn.se.akse.ss09.parser; import java.util.HashMap; } @members{ HashMap types = new HashMap(); @Override protected Object recoverFromMismatchedToken(IntStream input, int ttype, BitSet follow) throws RecognitionException { throw new MismatchedTokenException(ttype,input); } @Override public Object recoverFromMismatchedSet(IntStream input, RecognitionException e, BitSet follow) throws RecognitionException { throw e; } } @rulecatch{ catch(RecognitionException e) { throw e; } } @lexer::header{ package de.hshn.se.akse.ss09.parser; } //----------------------- Lexer Rules --------------------------// //Whitespace characters are not important to the parser, so the tokenizer throws them away WS : (' '|'\t'|'\n'|'\r')+ { $channel=HIDDEN; }; //Natural numbers: '1' , '123' , '1000' NOT: '01' INT : '1'..'9' DIGIT* | '0' ; //Positive floating point numbers: '1.0' , '0.234' NOT: '.0' , '1.0.1' FLOAT : DIGIT+ '.' DIGIT+; fragment DIGIT: '0'..'9'; //Mathematical Operators: '+', '-', '*', '/' PLUS : '+'; MINUS : '-'; MULT : '*'; DIV : '/'; //Assignment Operator ASSIGN : ':='; //Comparsion Operators EQ: '=='; NEQ: '!='; LE: '<='; LT: '<'; GE: '>='; GT: '>'; //Logiacal Operators AND: '&&'; OR: '||'; NOT: '!'; //Additionally, the language supports parenthesis: '(' , ')' LPAREN : '('; RPAREN : ')'; //keywords need to be filtered out before we come to the identifier rule //otherwise 'read' would be matched as an ID which is not what we want READ : R E A D; WRITE : W R I T E; TRUE : T R U E; FALSE : F A L S E; //These rules are introduced to allow case insensitive language keyowrd like IF fragment A : ('a'|'A') ; fragment B : ('b'|'B') ; fragment C : ('c'|'C') ; fragment D : ('d'|'D') ; fragment E : ('e'|'E') ; fragment F : ('f'|'F') ; fragment G : ('g'|'G') ; fragment H : ('h'|'H') ; fragment I : ('i'|'I') ; fragment J : ('j'|'J') ; fragment K : ('k'|'K') ; fragment L : ('l'|'L') ; fragment M : ('m'|'M') ; fragment N : ('n'|'N') ; fragment O : ('o'|'O') ; fragment P : ('p'|'P') ; fragment R : ('r'|'R') ; fragment S : ('s'|'S') ; fragment T : ('t'|'T') ; fragment U : ('u'|'U') ; fragment V : ('v'|'V') ; fragment W : ('w'|'W') ; fragment X : ('x'|'X') ; fragment Y : ('y'|'Y') ; fragment Z : ('z'|'Z') ; //Variable identifiers start with a letter, possibly followed by a sequence of //letters, numbers or the special symbol '_' //Examples: 'x' , 'd_x1' , 'x1' , 'account_balance' NOT: 'x!' , '1x' , '_1' ID : LETTER (LETTER |INT|'_')*; fragment LETTER: ('A'..'Z'|'a'..'z'); //----------------------- Parser Rules --------------------------// //NOTE: The original parser grammar contained empty productions which //have been resolved here //a program consists of zero or more statements program : statement* -> ^(PROG statement*) ; //a statement can either be an assignment or an I/O Operation //FIXME: This language currently does not accept standalone arithmetic expressions like '3+3' statement : assignment | input | output ; input : READ 'int' ID {types.put($ID.text,"int");} -> ^(IREAD ID) //'read A' prompts the user for input and binds A to the supplied input | READ 'float' ID {types.put($ID.text,"float");} -> ^(FREAD ID) ; output : WRITE int_expr -> ^(IWRITE int_expr) //'write 42' prints 42 | WRITE float_expr -> ^(FWRITE float_expr) ; assignment : 'int' ID ASSIGN int_expr {types.put($ID.text,"int");} -> ^(IASSIGN ID int_expr) //assignment example: 'int X := 1' | 'float' ID ASSIGN float_expr {types.put($ID.text,"float");} -> ^(FASSIGN ID float_expr) //assignment example: 'float X := 1.0' | 'float' ID ASSIGN int_expr {types.put($ID.text,"float");} -> ^(FASSIGN ID int_expr) //assignment example: 'float X := 1' | 'bool' ID ASSIGN or_expression {types.put($ID.text,"bool");} -> ^(BASSIGN ID or_expression)//assignment example: 'bool X := 3 < 4' ; expr: int_expr |float_expr ; //an expression is broken down into terms //that is done to preserve operator precedence and remove ambiguity. //an expression consists of at least one term, which can be followed by a term_tail. int_expr : (int_term -> int_term) (int_add_op i=int_term -> ^(int_add_op $int_expr $i))* ; //terms represent the elemnts of an expression , that have the lowest precedence and need to be evaluted last int_term : (unary_int_expr -> unary_int_expr)(int_mult_op i=unary_int_expr -> ^(int_mult_op $int_term $i))* ; unary_int_expr : MINUS int_factor -> ^(IUMINUS int_factor) | int_factor ; //factors represent the elements of an expression, that have the highest precedence and need to be evaluated first int_factor : LPAREN int_expr RPAREN -> int_expr | {types.containsKey(input.LT(1).getText())}? {types.get(input.LT(1).getText()).equals("int")}?ID | INT ; fragment int_mult_op : MULT -> IMULT | DIV -> IDIV ; fragment int_add_op : PLUS -> IPLUS | MINUS -> IMINUS ; float_expr: (float_term -> float_term) (float_add_op i=float_term -> ^(float_add_op $float_expr $i))* ; float_term: (unary_float_expr -> unary_float_expr)(float_mult_op i=unary_float_expr -> ^(float_mult_op $float_term $i))* ; unary_float_expr : MINUS float_factor -> ^(FUMINUS float_factor) | float_factor ; float_factor : LPAREN float_expr RPAREN -> float_expr | {types.containsKey(input.LT(1).getText())}? {types.get(input.LT(1).getText()).equals("float")}?ID | {types.containsKey(input.LT(1).getText())}? {types.get(input.LT(1).getText()).equals("int")}?ID -> ^(CAST_I2F ID) | INT -> {new CommonTree(new CommonToken(FLOAT,$INT.text+".0"))} | FLOAT ; fragment float_add_op : PLUS -> FPLUS | MINUS -> FMINUS ; fragment float_mult_op : MULT -> FMULT | DIV -> FDIV ; or_expression: and_expression (OR^ and_expression)* ; and_expression: euqality_expression (AND^ euqality_expression)* ; euqality_expression : unary_bool_expr (equality_op^ unary_bool_expr)* ; relational_expression : expr (relational_op^ expr)* ; fragment relational_op : GT | GE | LT | LE ; fragment equality_op : EQ | NEQ ; unary_bool_expr: NOT^ bool_atom | bool_atom ; bool_atom: LPAREN or_expression RPAREN -> or_expression | {types.containsKey(input.LT(1).getText())}? {types.get(input.LT(1).getText()).equals("bool")}?ID | relational_expression | TRUE | FALSE ;