options { language="CSharp"; } class DLexer extends Lexer; options { k = 4; // 4 token lookahead testLiterals = false; caseSensitiveLiterals=true; charVocabulary='\u0003'..'\uFFFE'; classHeaderPrefix="public"; } // *** L.1.1 KEYWORDS *** tokens { // grouped alphabetically ABSTRACT="abstract"; ALIAS="alias"; ALIGN="align"; ASM="asm"; ASSERT="assert"; AUTO="auto"; BIT="bit"; BODY="body"; BOOL="bool"; BREAK="break"; BYTE="byte"; CASE="case"; CAST="cast"; CATCH="catch"; CDOUBLE="cdouble"; CENT="cent"; CFLOAT="cfloat"; CLASS="class"; CONST="const"; CONTINUE="continue"; CREAL="creal"; DCHAR="dchar"; DEBUG="debug"; DEFAULT="default"; DELEGATE="delegate"; DELETE="delete"; DEPRECIATED="depreciated"; DO="do"; DOUBLE="double"; ELSE="else"; ENUM="enum"; EXPORT="export"; EXTERN="extern"; FALSE="false"; FINAL="final"; FINALLY="finally"; FLOAT="float"; FOR="for"; FOREACH="foreach"; FUNCTION="function"; GOTO="goto"; IDOUBLE="idouble"; IF="if"; IFLOAT="ifloat"; IMPORT="import"; IN="in"; INOUT="inout"; INT="int"; INTERFACE="interface"; INTERNAL="internal"; INVARIANT="invariant"; IREAL="ireal"; IS="is"; LONG="long"; MIXIN="mixin"; MODULE="module"; NEW="new"; NULL="null"; OUT="out"; OVERRIDE="override"; PACKAGE="package"; PRAGMA="pragma"; PRIVATE="private"; PROTECTED="protected"; PUBLIC="public"; REAL="real"; RETURN="return"; SHORT="short"; STATIC="static"; STRUCT="struct"; SUPER="super"; SWITCH="switch"; SYNCHRONIZED="synchronized"; TEMPLATE="template"; THIS="this"; THROW="throw"; TRUE="true"; TRY="try"; TYPEDEF="typedef"; TYPEID="typeid"; TYPEOF="typeof"; UBYTE="ubyte"; UCENT="ucent"; UINT="uint"; ULONG="ulong"; UNION="union"; UNITTEST="unittest"; USHORT="ushort"; VERSION="version"; VOID="void"; VOLATILE="volatile"; WCHAR="wchar"; WHILE="while"; WITH="with"; // imaginary tokens not defined by rules INT_LITERAL; FLOAT_LITERAL; HEX_FLOAT_LITERAL; BIN_LITERAL; } // *** L.1.2 WHITESPACE *** protected NEWLINE : //( ('\r' ('\n')?) // cr character followed by possible lf character ( { LA(2)=='\u000A' }? '\u000D' '\u000A' | '\n' | '\u2028' // line separator character | '\u2029' // paragraph separator character ) {newline();} ; protected NEWLINE_CHAR : ('\r'|'\n'|'\u2028'|'\u2029') ; protected NOT_NEWLINE_CHAR : ~('\r'|'\n'|'\u2028'|'\u2029') ; WS : ( ' ' | '\t' | '\f' | NEWLINE )+ { $setType(Token.SKIP); } ; // *** L.1.3 LITERALS *** NUMERIC_LITERAL // float : (DIGIT (DIGIT_GROUP)? '.' DIGIT_GROUP) => DIGIT (DIGIT_GROUP)? '.' (DIGIT_GROUP) (EXPONENT_PART)? (FLOAT_LITERAL_SUFFIX)? (IMAGINARY_SUFFIX)? {$setType(FLOAT_LITERAL);} | (DIGIT (DIGIT_GROUP)? (EXPONENT_PART)) => DIGIT (DIGIT_GROUP)? (EXPONENT_PART) (FLOAT_LITERAL_SUFFIX)? (IMAGINARY_SUFFIX)? {$setType(FLOAT_LITERAL);} | (DIGIT (DIGIT_GROUP)? (FLOAT_LITERAL_SUFFIX)) => DIGIT (DIGIT_GROUP)? (FLOAT_LITERAL_SUFFIX) (IMAGINARY_SUFFIX)? {$setType(FLOAT_LITERAL);} // decimal int | DIGIT (DIGIT_GROUP)? (INT_LITERAL_SUFFIX)? {$setType(INT_LITERAL);} ; HEX_LITERAL : '0'('x'|'X') (HEXDIGIT_GROUP) ( (INT_LITERAL_SUFFIX) | '.' HEXDIGIT_GROUP 'p' (DIGIT_GROUP) (FLOAT_LITERAL_SUFFIX)? (IMAGINARY_SUFFIX)? {$setType(HEX_FLOAT_LITERAL);} )? ; BIN_LITERAL : '0'('b'|'B') (BINDIGIT_GROUP) (INT_LITERAL_SUFFIX)? ; CHAR_LITERAL : '\''! ( ~('\''|'\\'|'\u000D'|'\u000A'|'\u2028'|'\u2029') | ESCAPE_SEQUENCE ) '\''! ; // regular strings span multiple lines REGULAR_STRING_LITERAL {string s="";} : '"'! ( { LA(2)=='\n' }? '\r' '\n' {s+=("\n"); newline();} | '\r' {s+=('\n'); newline();} | '\n' {s+=('\n'); newline();} | '\u2028' {s+=('\n'); newline();} | '\u2029' {s+=('\n'); newline();} | ch:~('"'|'\r'|'\n'|'\u2028'|'\u2029') {s+=(ch);} )* '"'! {$setText(s);} ; // difference between regular and wysiwyg strings is // wysiwyg strings don't do escape sequences, except // for a double-double-quote meaning '"'. WYSIWYG_STRING_LITERAL {string s="";} : ('r' '"')! ( "\"\"" {s+=("\"");} | '\\' {s+=("\\");} | { LA(2)=='\n' }? '\r' '\n' {s+=("\n"); newline();} | '\r' {s+=('\n'); newline();} | '\n' {s+=('\n'); newline();} | '\u2028' {s+=('\n'); newline();} | '\u2029' {s+=('\n'); newline();} | ch:~('"'|'\\'|'\r'|'\n'|'\u2028'|'\u2029') {s+=(ch);} )* '"'! {$setText(s);} ; ALT_WYSIWYG_STRING_LITERAL {string s="";} : '`'! ( "``" {s+=("`");} | '\\' {s+=("\\");} | { LA(2)=='\n' }? '\r' '\n' {s+=("\n"); newline();} | '\r' {s+=('\n'); newline();} | '\n' {s+=('\n'); newline();} | '\u2028' {s+=('\n'); newline();} | '\u2029' {s+=('\n'); newline();} | ch:~('`'|'\\'|'\r'|'\n'|'\u2028'|'\u2029') {s+=(ch);} )* '`'! {$setText(s);} ; //TODO: hex string literals. ex: char[] foo = x"00 FBCD 32FD 0A"; //TODO: standalone escaped string literals, ex: char[] foo = "test" ~ \r\n; //TODO: string literal postfix. ex: "abc"c "abc"w "abc"d // ===== literal helpers ============ //TODO: can't seem to get digit grouping (123_456) to work without ambiguities // nums protected DIGIT : '0'..'9'; protected DIGIT_GROUP : //('_'! | DIGIT)+ // screw these for now (DIGIT)+ ; //protected //REVERSE_DIGIT_GROUP : ('_'! | DIGIT)*; protected HEXDIGIT : ('a'..'f' | 'A'..'F' | '0'..'9'); protected HEXDIGIT_GROUP : //('_'! | HEXDIGIT)+ // screw these for now (HEXDIGIT)+ ; //protected //REVERSE_HEXDIGIT_GROUP : ('_'! | HEXDIGIT)*; protected OCTALDIGIT : '0'..'7'; protected OCTALDIGIT_GROUP : //('_'! | OCTALDIGIT)+ // screw these for now (OCTALDIGIT)+ ; //protected //REVERSE_OCTALDIGIT_GROUP : ('_'! | OCTALDIGIT)*; protected BINDIGIT : ('0'|'1'); protected BINDIGIT_GROUP : //('_'! | BINDIGIT) (BINDIGIT)+ ; protected INT_LITERAL_SUFFIX : ( options {generateAmbigWarnings=false;} : "l" | "L" | "u" | "U" | "lu" | "Lu" | "lU" | "LU" | "ul" | "Ul" | "uL" | "UL" ) ; protected EXPONENT_PART : "e" ('+'|'-')? (DIGIT_GROUP) | "E" ('+'|'-')? (DIGIT_GROUP) ; protected FLOAT_LITERAL_SUFFIX : "f" | "F" | "l" | "L" ; protected IMAGINARY_SUFFIX : ('i'|'I') ; protected ESCAPE_SEQUENCE : '\\' ( ( 'r'! {$setText("\r"); }) | ( 'n'! {$setText("\n"); }) | ( 't'! {$setText("\t"); }) | ( '\\'! {$setText("\\"); }) | ( '\''! {$setText("'"); }) | ( '"'! {$setText("\""); }) | ( 'v'! {text.Length = _begin; text.Append("\u000B"); }) | ( 'a'! {text.Length = _begin; text.Append("\a"); }) | ( 'b'! {text.Length = _begin; text.Append("\b"); }) | ( 'f'! {text.Length = _begin; text.Append("\f"); }) | ( '0'! {text.Length = _begin; text.Append("\0"); }) | ( 'x'! HEXDIGIT HEXDIGIT { char ch = (char)int.Parse(text.ToString(_begin, 2), System.Globalization.NumberStyles.HexNumber); text.Length = _begin; text.Append(ch); } ) | ( 'u'! HEXDIGIT HEXDIGIT HEXDIGIT HEXDIGIT { char ch = (char)int.Parse(text.ToString(_begin, 4), System.Globalization.NumberStyles.HexNumber); text.Length = _begin; text.Append(ch); } ) | ( 'U'! HEXDIGIT HEXDIGIT HEXDIGIT HEXDIGIT HEXDIGIT HEXDIGIT HEXDIGIT HEXDIGIT {} ) // BUG: doesn't handle octal escapes ) ; // *** L.1.4 COMMENTS *** SINGLE_LINE_COMMENT : "//" (NOT_NEWLINE_CHAR)* (NEWLINE)! { $setType(Token.SKIP); } ; NESTED_ML_COMMENT: "/+" ( options {warnWhenFollowAmbig = false;}: { LA(2) != '/' }? '+' | ("/+")=>NESTED_ML_COMMENT | NEWLINE | ~('+'|'\r'|'\n'|'\u2028'|'\u2029') )* "+/" { $setType(Token.SKIP); } ; ML_COMMENT: "/*" ( { LA(2) != '/' }? '*' | NEWLINE | ~('*'|'\r'|'\n'|'\u2028'|'\u2029') )* "*/" { $setType(Token.SKIP); } ; // *** L.1.5 IDENTIFIERS ID options { testLiterals = true; }: ID_LETTER (ID_LETTER | DIGIT)* ; protected ID_LETTER : ('_' | 'a'..'z' | 'A'..'Z' ); // *** L.1.9 OPERATORS AND PUNCTUATION *** //TODO: this is not a complete list of D operators and punctuation // see: http://www.digitalmars.com/d/lex.html LPAREN : '(' ; RPAREN : ')' ; LBRACK : '[' ; RBRACK : ']' ; LBRACE : '{' ; RBRACE : '}' ; PLUS : '+' ; PLUS_ASN : "+=" ; MINUS : '-' ; MINUS_ASN : "-=" ; STAR : '*' ; STAR_ASN : "*=" ; DIV : '/' ; DIV_ASN : "/=" ; MOD : '%' ; MOD_ASN : "%=" ; APPEND : '~' ; APPEND_ASN: "~=" ; INC : "++" ; DEC : "--" ; SL : "<<" ; SL_ASN : "<<=" ; SR : ">>" ; SR_ASN : ">>=" ; BSR : ">>>" ; BSR_ASN : ">>>="; BAND : '&' ; BAND_ASN : "&=" ; BOR : '|' ; BOR_ASN : "|=" ; BXOR : '^' ; BXOR_ASN : "^=" ; ASSIGN : '=' ; CMP_EQ : "==" ; CMP_LT : '<' ; CMP_LE : "<=" ; CMP_GT : ">" ; CMP_GE : ">=" ; NOT : '!' ; CMP_NEQ : "!=" ; LOR : "||" ; LAND : "&&" ; COMMA : ',' ; COLON : ':' ; SEMI : ';' ; HASH : "#" ; QUOTE : "\"" ; QMARK : '?' ; DOT : '.' ; DOTDOT : ".." ; DOTDOTDOT:"..." ; //ARROW : "->" ;