grammar Arith; options { language = Java; output = AST; } @header{ package de.hshn.se.akse.ss09.parser; } @lexer::header{ package de.hshn.se.akse.ss09.parser; } //----------------------- Lexer Rules --------------------------// //Whitespace characters are not important to the parser, so the tokenizer throws them away WS : (' '|'\t'|'\n'|'\r')+ { $channel=HIDDEN; }; //Zero gets an extra token, so we can identify special conditions (division by zero, n * 0, n + 0, n - 0) //in arithmetic operations. Zero: 0 NOT: 00 //FIXME : Should we also allow 0.0? ZERO : '0'; //Natural numbers: '1' , '123' , '1000' NOT: '01' INT : '1'..'9' DIGIT* ; //Positive floating point numbers: '1.0' , '0.234' NOT: '.0' , '1.0.1' FLOAT : DIGIT+ '.' DIGIT+; fragment DIGIT: '0'..'9'; //Mathematical Operators: '+', '-', '*', '/' PLUS : '+'; MINUS : '-'; MULT : '*'; DIV : '/'; //Assignment Operator ASSIGN : ':='; //Comparsion Operators EQ: '=='; NE: '!='; LE: '<='; LT: '<'; GE: '>='; GT: '>'; //Logiacal Operators AND: '&&'; OR: '||'; NOT: '!'; //Additionally, the language supports parenthesis: '(' , ')' LPAREN : '('; RPAREN : ')'; //keywords need to be filtered out before we come to the identifier rule //otherwise 'read' would be matched as an ID which is not what we want READ : R E A D; WRITE : W R I T E; TRUE : T R U E; FALSE : F A L S E; //These rules are introduced to allow case insensitive language keyowrd like IF fragment A : ('a'|'A') ; fragment B : ('b'|'B') ; fragment C : ('c'|'C') ; fragment D : ('d'|'D') ; fragment E : ('e'|'E') ; fragment F : ('f'|'F') ; fragment G : ('g'|'G') ; fragment H : ('h'|'H') ; fragment I : ('i'|'I') ; fragment J : ('j'|'J') ; fragment K : ('k'|'K') ; fragment L : ('l'|'L') ; fragment M : ('m'|'M') ; fragment N : ('n'|'N') ; fragment O : ('o'|'O') ; fragment P : ('p'|'P') ; fragment R : ('r'|'R') ; fragment S : ('s'|'S') ; fragment T : ('t'|'T') ; fragment U : ('u'|'U') ; fragment V : ('v'|'V') ; fragment W : ('w'|'W') ; fragment X : ('x'|'X') ; fragment Y : ('y'|'Y') ; fragment Z : ('z'|'Z') ; //Variable identifiers start with a letter, possibly followed by a sequence of //letters, numbers or the special symbol '_' //Examples: 'x' , 'd_x1' , 'x1' , 'account_balance' NOT: 'x!' , '1x' , '_1' ID : LETTER (LETTER |INT|ZERO|'_')*; fragment LETTER: ('A'..'Z'|'a'..'z'); //----------------------- Parser Rules --------------------------// //NOTE: The original parser grammar contained empty productions which //have been resolved here //a program consists of zero or more statements program : statement*; //a statement can either be an assignment or an I/O Operation //FIXME: This language currently does not accept standalone arithmetic expressions like '3+3' statement : ID ASSIGN expr -> ^(ASSIGN ID expr) //assignment example: 'X := 1' | READ ID -> ^(READ ID) //'read A' prompts the user for input and binds A to the supplied input | WRITE expr -> ^(WRITE expr) //'write 42' prints 42 //'write A' prints the value of the variable A ; //an expression is broken down into terms //that is done to preserve operator precedence and remove ambiguity. //an expression consists of at least one term, which can be followed by a term_tail. expr : term (add_op^ term)* ; //terms represent the elemnts of an expression , that have the lowest precedence and need to be evaluted last term : factor (mult_op^ factor)* ; //factors represent the elements of an expression, that have the highest precedence and need to be evaluated first factor : LPAREN expr RPAREN -> expr | ID | number ; mult_op : MULT | DIV ; add_op : PLUS | MINUS ; number : INT | FLOAT | ZERO ;