
|
If you were logged in you would be able to see more operations.
|
|
|
ANTLR v3
Created: 22/Aug/07 04:47 PM
Updated: 14/Dec/07 01:23 PM
|
|
| Component/s: |
ANTLR Core
|
| Affects Version/s: |
3.0.1
|
| Fix Version/s: |
3.1
|
|
|
grammar Mantra;
options {
output=AST;
}
tokens {
UNIT;
SLIST; // statement list
ELIST; // expression list; args of method call, array ref
CLOSURE; // x:{...}
LIST; // [1,2]
ARG;
FILTER;
MAP; // [x=y, a="hi"]
APPLY; // apply code to expression
SINK; // stream => sink
EXPR; // indicates root of an expression
PROGRAM; // indicates direct execution of code; usually main
METHOD;
FIELD;
CALL;
INDEX;
FIELDACCESS;// o.field
UNARY_MINUS;// unary operators
UNARY_PLUS;
UNARY_NOT;
UNARY_BNOT;
}
@header {
package tool;
}
@lexer::header {
package tool;
}
compilationUnit
: typeDefinition+ -> ^(UNIT typeDefinition+)
| statement+ -> ^(PROGRAM statement+)
;
typeDefinition
: modifier!? classDefinition
| modifier!? interfaceDefinition
;
classDefinition
: 'class' ID
('extends' sup=typename)?
('implements' i+=classname (',' i+=classname)*)?
'{'
field*
methodDefinition[true]*
'}'
-> ^('class' ID ^('extends' $sup)? ^('implements' $i+)?
methodDefinition*
)
;
interfaceDefinition
: 'interface' ID ('extends' sup+=classname (',' sup+=classname)* )?
'{'
( constant
| methodDefinition[false]
)*
'}'
-> ^('interface' ID ^('extends' $sup+)? constant* methodDefinition* )
;
field
: ID '=' expression ';' -> ^(FIELD ID expression)
;
constant
: 'const' ID '=' expression ';' -> ^('const' ID expression)
;
methodDefinition[boolean needBody]
: modifier? mname=ID
'(' idList? ')'
( {needBody}? compoundStatement
| {!needBody}? ';'
)
-> ^(METHOD ID idList? compoundStatement?)
;
modifier
: 'public'
| 'static'
| 'const'
| 'abstract'
;
typename
: classname
| builtInType
;
builtInType
: 'object'
| 'void'
| 'char'
| 'boolean'
| 'int'
| 'float'
| 'long'
| 'double'
| 'stream'
| 'string'
| datastructure
;
datastructure
: 'set'
| 'linkedlist'
| 'list'
| 'map'
;
compoundStatement
: '{' statement+ '}' -> ^(SLIST statement+)
;
statement
: (primary '++')=> primary '++'^ ';'!
| (primary '--')=> primary '--'^ ';'!
| (assignment)=> assignment
| (stream_sink)=> stream_sink
| (completeExpression ';')=> completeExpression ';'! // can this only be a function call?
| 'if' '(' equalityExpression ')' s1=statement
( 'else' s2=statement -> ^('if' ^(EXPR equalityExpression) $s1 $s2)
| -> ^('if' ^(EXPR equalityExpression) $s1)
)
| 'return'^ completeExpression ';'!
| 'break' ';' -> 'break'
| ';'!
;
expressionList
: completeExpression (',' completeExpression)* -> ^(ELIST completeExpression+)
| -> ELIST
;
// flip to var def if first assignment?
assignment
: lvalue
( '='^
| '+='^
| '-='^
| '*='^
| '/='^
)
completeExpression
';'!
;
/** lhs of an assignment */
lvalue
: postfixExpression -> ^(EXPR postfixExpression)
;
completeExpression
: expression -> ^(EXPR expression)
;
expression
: conditionalExpression
;
conditionalExpression
: logicalOrExpression
// ( '?'^ conditionalExpression ':'! conditionalExpression )?
// do we really need this? Ambig with apply closure ':'
;
logicalOrExpression
: logicalAndExpression ('||'^ logicalAndExpression)*
;
logicalAndExpression
: inclusiveOrExpression ('&&'^ inclusiveOrExpression)*
;
inclusiveOrExpression
: exclusiveOrExpression ('|'^ exclusiveOrExpression)*
;
exclusiveOrExpression
: andExpression ('^'^ andExpression)*
;
// bitwise or non-short-circuiting and (&)
andExpression
: equalityExpression ('&'^ equalityExpression)*
;
equalityExpression
: relationalExpression (('!='^ | '=='^ | 'is'^ | 'isnt'^) relationalExpression)*
;
relationalExpression
: shiftExpression
( ( ( '<'^
| '>'^
| '<='^
| '>='^
)
shiftExpression
)*
)
;
shiftExpression
: additiveExpression (('<<'^ | '>>'^) additiveExpression)*
;
additiveExpression
: multiplicativeExpression (('+'^ | '-'^) multiplicativeExpression)*
;
multiplicativeExpression
: unaryExpression (('*'^ | '/'^ | '%'^) unaryExpression)*
;
unaryExpression
: '-' unaryExpression -> ^(UNARY_MINUS unaryExpression)
| '+' unaryExpression -> ^(UNARY_PLUS unaryExpression)
| '!' unaryExpression -> ^(UNARY_NOT unaryExpression)
| '~' unaryExpression -> ^(UNARY_BNOT unaryExpression)
| postfixExpression
;
postfixExpression
: (primary->primary) // set return tree
( '(' args=expressionList ')' -> ^(CALL $postfixExpression $args)
| '[' ie=expression ']' -> ^(INDEX $postfixExpression $ie)
| '.' p=primary -> ^(FIELDACCESS $postfixExpression $p)
| ':' ID -> ^(APPLY ^(EXPR $postfixExpression) ID)
| ':' cl=closure -> ^(APPLY ^(EXPR $postfixExpression) $cl)
)*
;
primary
options {k=1;}
: ID
| 'new' typename '(' expressionList ')' -> ^('new' typename expressionList)
| ( '[' mapelement ) => mapliteral
| listliteral
| NUM_INT
| NUM_FLOAT
| STRING
| CHAR
| 'null'
| 'true'
| 'false'
| 'super'
| 'this'
| '(' e=expression ')' -> $e
| closure
;
/** { x; x!="ter" | println(x); } */
closure
: lc='{' ID
(';' equalityExpression)?
('|' statement+
-> ^(CLOSURE[$lc]
^(ARG ID)
^(FILTER ^(EXPR equalityExpression))?
^(SLIST statement+)
)
| -> ^(CLOSURE[$lc]
^(ARG ID)
^(FILTER ^(EXPR equalityExpression))?
)
)
'}'
;
stream_sink
: completeExpression '=>' completeExpression ';' -> ^(SINK completeExpression+)
;
listliteral
: '[' expressionList ']' -> ^(LIST expressionList)
;
mapliteral
: '[' mapelement (',' mapelement)* ']' -> ^(MAP mapelement+)
;
mapelement
: completeExpression '='^ completeExpression
;
classname
: ID
;
idList
: ID (',' ID)* -> ID+
;
ID : ('a'..'z'|'A'..'Z'|'_'|'$') ('a'..'z'|'A'..'Z'|'_'|'0'..'9'|'$')*
;
NUM_INT
: DECIMAL_LITERAL
| HEX_LITERAL
| OCTAL_LITERAL
;
fragment
DECIMAL_LITERAL: '1'..'9' ('0'..'9')* ('l'|'L')? ;
fragment
HEX_LITERAL: '0' ('x'|'X') ('0'..'9'|'a'..'f'|'A'..'F')+ ('l'|'L')? ;
fragment
OCTAL_LITERAL: '0' ('0'..'7')* ('l'|'L')? ;
NUM_FLOAT
: DIGITS '.' (DIGITS)? (EXPONENT_PART)? (FLOAT_TYPE_SUFFIX)?
| '.' DIGITS (EXPONENT_PART)? (FLOAT_TYPE_SUFFIX)?
| DIGITS EXPONENT_PART FLOAT_TYPE_SUFFIX
| DIGITS EXPONENT_PART
| DIGITS FLOAT_TYPE_SUFFIX
;
fragment
DIGITS : ('0'..'9')+ ;
fragment
EXPONENT_PART: ('e'|'E') ('+'|'-')? DIGITS ;
fragment
FLOAT_TYPE_SUFFIX : ('f'|'F'|'d'|'D') ;
STRING
: '\"'
( options {greedy=false;}
: ESCAPE_SEQUENCE
| ~'\\'
)*
'\"'
;
CHAR
: '\''
( ESCAPE_SEQUENCE
| ~'\''
)
'\''
;
fragment
ESCAPE_SEQUENCE
: '\\' 'b'
| '\\' 't'
| '\\' 'n'
| '\\' 'f'
| '\\' 'r'
| '\\' '\"'
| '\\' '\''
| '\\' '\\'
| '\\' '0'..'3' OCTAL_DIGIT OCTAL_DIGIT
| '\\' OCTAL_DIGIT OCTAL_DIGIT
| '\\' OCTAL_DIGIT
| UNICODE_CHAR
;
fragment
HEX_DIGIT
: '0'..'9'|'a'..'f'|'A'..'F'
;
fragment
OCTAL_DIGIT
: '0'..'7'
;
fragment
UNICODE_CHAR
: '\\' 'u' HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT
;
SL_COMMENT
: '//' ~'\n'* '\n' {$channel=HIDDEN; }
;
ML_COMMENT
: '/*'
( options {greedy=false;} : . )*
'*/'
{$channel=HIDDEN;}
;
WS : ( ' '
| '\t'
| '\r'? '\n'
)+
{ $channel=HIDDEN; }
;
|
|
Description
|
grammar Mantra;
options {
output=AST;
}
tokens {
UNIT;
SLIST; // statement list
ELIST; // expression list; args of method call, array ref
CLOSURE; // x:{...}
LIST; // [1,2]
ARG;
FILTER;
MAP; // [x=y, a="hi"]
APPLY; // apply code to expression
SINK; // stream => sink
EXPR; // indicates root of an expression
PROGRAM; // indicates direct execution of code; usually main
METHOD;
FIELD;
CALL;
INDEX;
FIELDACCESS;// o.field
UNARY_MINUS;// unary operators
UNARY_PLUS;
UNARY_NOT;
UNARY_BNOT;
}
@header {
package tool;
}
@lexer::header {
package tool;
}
compilationUnit
: typeDefinition+ -> ^(UNIT typeDefinition+)
| statement+ -> ^(PROGRAM statement+)
;
typeDefinition
: modifier!? classDefinition
| modifier!? interfaceDefinition
;
classDefinition
: 'class' ID
('extends' sup=typename)?
('implements' i+=classname (',' i+=classname)*)?
'{'
field*
methodDefinition[true]*
'}'
-> ^('class' ID ^('extends' $sup)? ^('implements' $i+)?
methodDefinition*
)
;
interfaceDefinition
: 'interface' ID ('extends' sup+=classname (',' sup+=classname)* )?
'{'
( constant
| methodDefinition[false]
)*
'}'
-> ^('interface' ID ^('extends' $sup+)? constant* methodDefinition* )
;
field
: ID '=' expression ';' -> ^(FIELD ID expression)
;
constant
: 'const' ID '=' expression ';' -> ^('const' ID expression)
;
methodDefinition[boolean needBody]
: modifier? mname=ID
'(' idList? ')'
( {needBody}? compoundStatement
| {!needBody}? ';'
)
-> ^(METHOD ID idList? compoundStatement?)
;
modifier
: 'public'
| 'static'
| 'const'
| 'abstract'
;
typename
: classname
| builtInType
;
builtInType
: 'object'
| 'void'
| 'char'
| 'boolean'
| 'int'
| 'float'
| 'long'
| 'double'
| 'stream'
| 'string'
| datastructure
;
datastructure
: 'set'
| 'linkedlist'
| 'list'
| 'map'
;
compoundStatement
: '{' statement+ '}' -> ^(SLIST statement+)
;
statement
: (primary '++')=> primary '++'^ ';'!
| (primary '--')=> primary '--'^ ';'!
| (assignment)=> assignment
| (stream_sink)=> stream_sink
| (completeExpression ';')=> completeExpression ';'! // can this only be a function call?
| 'if' '(' equalityExpression ')' s1=statement
( 'else' s2=statement -> ^('if' ^(EXPR equalityExpression) $s1 $s2)
| -> ^('if' ^(EXPR equalityExpression) $s1)
)
| 'return'^ completeExpression ';'!
| 'break' ';' -> 'break'
| ';'!
;
expressionList
: completeExpression (',' completeExpression)* -> ^(ELIST completeExpression+)
| -> ELIST
;
// flip to var def if first assignment?
assignment
: lvalue
( '='^
| '+='^
| '-='^
| '*='^
| '/='^
)
completeExpression
';'!
;
/** lhs of an assignment */
lvalue
: postfixExpression -> ^(EXPR postfixExpression)
;
completeExpression
: expression -> ^(EXPR expression)
;
expression
: conditionalExpression
;
conditionalExpression
: logicalOrExpression
// ( '?'^ conditionalExpression ':'! conditionalExpression )?
// do we really need this? Ambig with apply closure ':'
;
logicalOrExpression
: logicalAndExpression ('||'^ logicalAndExpression)*
;
logicalAndExpression
: inclusiveOrExpression ('&&'^ inclusiveOrExpression)*
;
inclusiveOrExpression
: exclusiveOrExpression ('|'^ exclusiveOrExpression)*
;
exclusiveOrExpression
: andExpression ('^'^ andExpression)*
;
// bitwise or non-short-circuiting and (&)
andExpression
: equalityExpression ('&'^ equalityExpression)*
;
equalityExpression
: relationalExpression (('!='^ | '=='^ | 'is'^ | 'isnt'^) relationalExpression)*
;
relationalExpression
: shiftExpression
( ( ( '<'^
| '>'^
| '<='^
| '>='^
)
shiftExpression
)*
)
;
shiftExpression
: additiveExpression (('<<'^ | '>>'^) additiveExpression)*
;
additiveExpression
: multiplicativeExpression (('+'^ | '-'^) multiplicativeExpression)*
;
multiplicativeExpression
: unaryExpression (('*'^ | '/'^ | '%'^) unaryExpression)*
;
unaryExpression
: '-' unaryExpression -> ^(UNARY_MINUS unaryExpression)
| '+' unaryExpression -> ^(UNARY_PLUS unaryExpression)
| '!' unaryExpression -> ^(UNARY_NOT unaryExpression)
| '~' unaryExpression -> ^(UNARY_BNOT unaryExpression)
| postfixExpression
;
postfixExpression
: (primary->primary) // set return tree
( '(' args=expressionList ')' -> ^(CALL $postfixExpression $args)
| '[' ie=expression ']' -> ^(INDEX $postfixExpression $ie)
| '.' p=primary -> ^(FIELDACCESS $postfixExpression $p)
| ':' ID -> ^(APPLY ^(EXPR $postfixExpression) ID)
| ':' cl=closure -> ^(APPLY ^(EXPR $postfixExpression) $cl)
)*
;
primary
options {k=1;}
: ID
| 'new' typename '(' expressionList ')' -> ^('new' typename expressionList)
| ( '[' mapelement ) => mapliteral
| listliteral
| NUM_INT
| NUM_FLOAT
| STRING
| CHAR
| 'null'
| 'true'
| 'false'
| 'super'
| 'this'
| '(' e=expression ')' -> $e
| closure
;
/** { x; x!="ter" | println(x); } */
closure
: lc='{' ID
(';' equalityExpression)?
('|' statement+
-> ^(CLOSURE[$lc]
^(ARG ID)
^(FILTER ^(EXPR equalityExpression))?
^(SLIST statement+)
)
| -> ^(CLOSURE[$lc]
^(ARG ID)
^(FILTER ^(EXPR equalityExpression))?
)
)
'}'
;
stream_sink
: completeExpression '=>' completeExpression ';' -> ^(SINK completeExpression+)
;
listliteral
: '[' expressionList ']' -> ^(LIST expressionList)
;
mapliteral
: '[' mapelement (',' mapelement)* ']' -> ^(MAP mapelement+)
;
mapelement
: completeExpression '='^ completeExpression
;
classname
: ID
;
idList
: ID (',' ID)* -> ID+
;
ID : ('a'..'z'|'A'..'Z'|'_'|'$') ('a'..'z'|'A'..'Z'|'_'|'0'..'9'|'$')*
;
NUM_INT
: DECIMAL_LITERAL
| HEX_LITERAL
| OCTAL_LITERAL
;
fragment
DECIMAL_LITERAL: '1'..'9' ('0'..'9')* ('l'|'L')? ;
fragment
HEX_LITERAL: '0' ('x'|'X') ('0'..'9'|'a'..'f'|'A'..'F')+ ('l'|'L')? ;
fragment
OCTAL_LITERAL: '0' ('0'..'7')* ('l'|'L')? ;
NUM_FLOAT
: DIGITS '.' (DIGITS)? (EXPONENT_PART)? (FLOAT_TYPE_SUFFIX)?
| '.' DIGITS (EXPONENT_PART)? (FLOAT_TYPE_SUFFIX)?
| DIGITS EXPONENT_PART FLOAT_TYPE_SUFFIX
| DIGITS EXPONENT_PART
| DIGITS FLOAT_TYPE_SUFFIX
;
fragment
DIGITS : ('0'..'9')+ ;
fragment
EXPONENT_PART: ('e'|'E') ('+'|'-')? DIGITS ;
fragment
FLOAT_TYPE_SUFFIX : ('f'|'F'|'d'|'D') ;
STRING
: '\"'
( options {greedy=false;}
: ESCAPE_SEQUENCE
| ~'\\'
)*
'\"'
;
CHAR
: '\''
( ESCAPE_SEQUENCE
| ~'\''
)
'\''
;
fragment
ESCAPE_SEQUENCE
: '\\' 'b'
| '\\' 't'
| '\\' 'n'
| '\\' 'f'
| '\\' 'r'
| '\\' '\"'
| '\\' '\''
| '\\' '\\'
| '\\' '0'..'3' OCTAL_DIGIT OCTAL_DIGIT
| '\\' OCTAL_DIGIT OCTAL_DIGIT
| '\\' OCTAL_DIGIT
| UNICODE_CHAR
;
fragment
HEX_DIGIT
: '0'..'9'|'a'..'f'|'A'..'F'
;
fragment
OCTAL_DIGIT
: '0'..'7'
;
fragment
UNICODE_CHAR
: '\\' 'u' HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT
;
SL_COMMENT
: '//' ~'\n'* '\n' {$channel=HIDDEN; }
;
ML_COMMENT
: '/*'
( options {greedy=false;} : . )*
'*/'
{$channel=HIDDEN;}
;
WS : ( ' '
| '\t'
| '\r'? '\n'
)+
{ $channel=HIDDEN; }
;
|
Show » |
|
parser grammar DFABug;
options {
// language=C++;
tokenVocab=DFABugTokens;
output=AST;
}
delimiter
: null_lines empty_lines?
| empty_lines
;
null_lines
: null_line+
;
null_line
: empty_lines? ( COMMA | SEMI )!
;
empty_lines
: line+
;
line : ( COMMENT | NEWLINES )!
;
statement_list
: statement ( delimiter statement )* delimiter?
;
statement
options {
k=2;
}
: (reference ASSIGN)=> reference ASSIGN^ expr
| expr
;
expr : reference
| INTEGER
| DOUBLE
| IMAGINARY
| TEXT
;
reference
: IDENTIFIER^ LPAREN! argument_list RPAREN!
;
argument_list
: expr ( COMMA! expr )*
;
He comments:
In this grammar, I'm using a syntactic predicate to differentiate between an assignment and expression-evaluation statements (see the "statement" rule below). Antlr accepts the parser definition without error.
The problem occurs when the parser attempts to parse a rather simple file:
y(1) = 0
y(1)
That is, it gives the error
line 1:5 no viable alternative at input '='
Perhaps there is an error in the parser definition, but the peculiar thing is that when, on the "statement" rule, I reduce "k" to 2, i.e.
statement:
options {
k=2;
}
: ...
The parser works correctly.
I also tried k at various other values. When it is set at a value of 10, I get the following message:
ANTLR Parser Generator Version 3.0.1 (August 13, 2007) 1989-2007
warning(205): DFABug.g:37:2: ANTLR could not analyze this decision in rule statement; often this is because of recursive rule references visible from the left edge of alternatives. ANTLR will re-analyze the decision with a fixed lookahead of k=1. Consider using "options {k=1;}" for that decision and possibly adding a syntactic predicate.
The warning message isn't really applicable, as the rule is not recursive and already has a syntactic predicate.
Is this normal behavior? I.e. when using syntactic predicates, is it normal to have to give "k" a small finite value to get the lookahead to work properly, rather than simply to enhance efficiency?
TJP: I get no errors emitted from 3.1 (vs 3.0.1) and yet I get the bad DFA for decision 8. (I will try to attach the DFA view).