//;-*- mode: antlr -*- /* DotParser Copyright (C) 2006 Jose San Leandro Armendariz chous@acm-sl.org This library is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Thanks to ACM S.L. for distributing this library under the GPL license. Contact info: chous@acm-sl.org Postal Address: c/Playa de Lagoa, 1 Urb. Valdecabanas Boadilla del monte 28660 Madrid Spain ***************************************************************************** * * Filename: $RCSfile: DotLexer.g,v $ * * Author: Jose San Leandro Armendariz * * Description: Clean ANTLR-based lexer according to GraphViz's Dot grammar. * * See http://www.graphviz.org/doc/info/lang.html * */ header { package antlr.grammars.dot; } class DotLexer extends Lexer; options { k=1; charVocabulary = '\3'..'\377'; exportVocab=Dot; } LITERALS : ( ("graph") => GRAPH_LITERAL {$setType(GRAPH_LITERAL);} | ("digraph") => DIGRAPH_LITERAL {$setType(DIGRAPH_LITERAL);} | ("strict") => STRICT_LITERAL {$setType(STRICT_LITERAL);} | ("node") => NODE_LITERAL {$setType(NODE_LITERAL);} | ("edge") => EDGE_LITERAL {$setType(EDGE_LITERAL);} | ("--") => EDGEOP_LITERAL {$setType(EDGEOP_LITERAL);} | ("->") => EDGEOP_LITERAL {$setType(EDGEOP_LITERAL);} | ("{") => O_BRACKET {$setType(O_BRACKET);} | ("}") => C_BRACKET {$setType(C_BRACKET);} | ("[") => O_SQR_BRACKET {$setType(O_SQR_BRACKET);} | ("]") => C_SQR_BRACKET {$setType(C_SQR_BRACKET);} | (";") => SEMI_COLON {$setType(SEMI_COLON);} | ("=") => EQUAL {$setType(EQUAL);} | (",") => COMMA {$setType(COMMA);} | (":") => COLON {$setType(COLON);} | ID {$setType(ID);}) ; protected GRAPH_LITERAL : "graph"; protected DIGRAPH_LITERAL : "digraph"; protected STRICT_LITERAL : "strict"; protected NODE_LITERAL : "node"; protected EDGE_LITERAL : "edge"; protected O_BRACKET : '{'; protected C_BRACKET : '}'; protected O_SQR_BRACKET : '['; protected C_SQR_BRACKET : ']'; protected SEMI_COLON : ';'; protected EQUAL : '='; protected COMMA : ','; protected COLON : ':'; protected EDGEOP_LITERAL : ( ("->") => "->" | ("--") => "--" ) ; protected ID : ( VALIDSTR | NUMBER | QUOTEDSTR | HTMLSTR ); protected COMPASS_PT : ( ("ne") => "ne" | ("nw") => "nw" | ("node") => NODE_LITERAL | "n" | "e" | ("se") => "se" | ("sw") => "sw" | "s" | "w" ); protected ALPHACHAR : ( 'a'..'z' | 'A'..'Z' | '_' ); protected VALIDSTR : ALPHACHAR ( ALPHACHAR | '0'..'9' )* ; protected NUMBER : ('-')? ('0'..'9')+ ('.' ('0'..'9')+)? ; protected QUOTEDSTR : '"' ( ("\\\"") => "\\\"" | ~('"') )* '"' ; protected HTMLSTR : '<' (~'>')* '>' ; WS : ( ' ' | '\t' | '\r' '\n' { newline(); } | '\n' { newline(); } ) {$setType(Token.SKIP);} //ignore this token ; // Single-line comments COMMENT : ( ("/*") => ML_COMMENT | ("//") => SL_COMMENT) {$setType(Token.SKIP); newline();} ; // Taken from ANTLR's Java grammar. // Single-line comments protected SL_COMMENT : "//" (~('\n'|'\r'))* ('\n'|'\r'('\n')?) {$setType(Token.SKIP); newline();} ; // multiple-line comments protected ML_COMMENT : "/*" ( /* '\r' '\n' can be matched in one alternative or by matching '\r' in one iteration and '\n' in another. I am trying to handle any flavor of newline that comes in, but the language that allows both "\r\n" and "\r" and "\n" to all be valid newline is ambiguous. Consequently, the resulting grammar must be ambiguous. I'm shutting this warning off. */ options { generateAmbigWarnings=false; } : {LA(2)!='/'}? '*' | '\r' '\n' {newline();} | '\r' {newline();} | '\n' {newline();} | ~('*'|'\n'|'\r') )* "*/" {$setType(Token.SKIP);} ;