diff --git a/tool/src/main/java/org/antlr/codegen/CodeGenerator.java b/tool/src/main/java/org/antlr/codegen/CodeGenerator.java index b279bd565..df940ebf7 100644 --- a/tool/src/main/java/org/antlr/codegen/CodeGenerator.java +++ b/tool/src/main/java/org/antlr/codegen/CodeGenerator.java @@ -861,10 +861,11 @@ protected void genTokenTypeConstants(ST code) { protected void genTokenTypeNames(ST code) { for (int t=Label.MIN_TOKEN_TYPE; t<=grammar.getMaxTokenType(); t++) { String tokenName = grammar.getTokenDisplayName(t); - if ( tokenName!=null ) { + if (tokenName != null) { tokenName=target.getTargetStringLiteralFromString(tokenName, true); - code.add("tokenNames", tokenName); } + + code.add("tokenNames", tokenName); } } diff --git a/tool/src/main/java/org/antlr/tool/AssignTokenTypesBehavior.java b/tool/src/main/java/org/antlr/tool/AssignTokenTypesBehavior.java index cc7c0f4e1..6b56b6a27 100644 --- a/tool/src/main/java/org/antlr/tool/AssignTokenTypesBehavior.java +++ b/tool/src/main/java/org/antlr/tool/AssignTokenTypesBehavior.java @@ -271,9 +271,14 @@ protected void aliasTokenIDsAndLiterals(Grammar root) { String tokenID = entry.getKey(); String literal = entry.getValue(); if ( literal.charAt(0)=='\'' && stringLiterals.get(literal)!=null ) { - stringLiterals.put(literal, tokens.get(tokenID)); - // an alias still means you need a lexer rule for it Integer typeI = tokens.get(tokenID); + if (typeI == null) { + // must have been imported from a tokenVocab + typeI = grammar.composite.tokenIDToTypeMap.get(tokenID); + } + + stringLiterals.put(literal, typeI); + // an alias still means you need a lexer rule for it if ( !tokenRuleDefs.contains(tokenID) ) { root.defineLexerRuleForAliasedStringLiteral(tokenID, literal, typeI); } diff --git a/tool/src/main/resources/org/antlr/codegen/templates/ActionScript/ActionScript.stg b/tool/src/main/resources/org/antlr/codegen/templates/ActionScript/ActionScript.stg index 1fdeadc95..436061ca8 100644 --- a/tool/src/main/resources/org/antlr/codegen/templates/ActionScript/ActionScript.stg +++ b/tool/src/main/resources/org/antlr/codegen/templates/ActionScript/ActionScript.stg @@ -173,7 +173,7 @@ genericParser(grammar, name, scopes, tokens, tokenNames, rules, numRules, public class extends <@superClassName><@end> { public static const tokenNames:Array = [ - "\", "\", "\", "\", + "\", "\", "\", "\", \""> ];<\n> :int=;}; separator="\n"> diff --git a/tool/src/main/resources/org/antlr/codegen/templates/C/C.stg b/tool/src/main/resources/org/antlr/codegen/templates/C/C.stg index 63646a707..5e7ae0f40 100644 --- a/tool/src/main/resources/org/antlr/codegen/templates/C/C.stg +++ b/tool/src/main/resources/org/antlr/codegen/templates/C/C.stg @@ -1195,7 +1195,7 @@ pANTLR3_UINT8 TokenNames[+4] (pANTLR3_UINT8) "\", (pANTLR3_UINT8) "\", (pANTLR3_UINT8) "\", - }; separator=",\n"> + }; separator=",\n", null="(pANTLR3_UINT8) \"\""> }; diff --git a/tool/src/main/resources/org/antlr/codegen/templates/CSharp2/CSharp2.stg b/tool/src/main/resources/org/antlr/codegen/templates/CSharp2/CSharp2.stg index 8f15ef271..c037dc6b2 100644 --- a/tool/src/main/resources/org/antlr/codegen/templates/CSharp2/CSharp2.stg +++ b/tool/src/main/resources/org/antlr/codegen/templates/CSharp2/CSharp2.stg @@ -287,7 +287,7 @@ genericParser(grammar, name, scopes, tokens, tokenNames, rules, numRules, { internal static readonly string[] tokenNames = new string[] { - "\", "\", "\", "\", + "\", "\", "\", "\", \""> }; =;}; separator="\n"> diff --git a/tool/src/main/resources/org/antlr/codegen/templates/CSharp3/CSharp3.stg b/tool/src/main/resources/org/antlr/codegen/templates/CSharp3/CSharp3.stg index c9229e176..247e9e0d6 100644 --- a/tool/src/main/resources/org/antlr/codegen/templates/CSharp3/CSharp3.stg +++ b/tool/src/main/resources/org/antlr/codegen/templates/CSharp3/CSharp3.stg @@ -282,7 +282,7 @@ genericParser(grammar, name, scopes, tokens, tokenNames, rules, numRules, { internal static readonly string[] tokenNames = new string[] { - "\", "\", "\", "\", + "\", "\", "\", "\", \""> }; =;}; separator="\n"> diff --git a/tool/src/main/resources/org/antlr/codegen/templates/Cpp/Cpp.stg b/tool/src/main/resources/org/antlr/codegen/templates/Cpp/Cpp.stg index d94024b7d..74f935f0a 100755 --- a/tool/src/main/resources/org/antlr/codegen/templates/Cpp/Cpp.stg +++ b/tool/src/main/resources/org/antlr/codegen/templates/Cpp/Cpp.stg @@ -821,7 +821,7 @@ ANTLR_UINT8* TokenNames[+4] (ANTLR_UINT8*) "\", (ANTLR_UINT8*) "\", (ANTLR_UINT8*) "\", - }; separator=",\n"> + }; separator=",\n", null="(ANTLR_UINT8*) \"\""> }; diff --git a/tool/src/main/resources/org/antlr/codegen/templates/Delphi/Delphi.stg b/tool/src/main/resources/org/antlr/codegen/templates/Delphi/Delphi.stg index 7390fde6b..cafe524d9 100644 --- a/tool/src/main/resources/org/antlr/codegen/templates/Delphi/Delphi.stg +++ b/tool/src/main/resources/org/antlr/codegen/templates/Delphi/Delphi.stg @@ -321,7 +321,7 @@ type '\', '\', '\', - );<\n> + );<\n> public const diff --git a/tool/src/main/resources/org/antlr/codegen/templates/Java/Java.stg b/tool/src/main/resources/org/antlr/codegen/templates/Java/Java.stg index c4cc8d405..dbf98004e 100644 --- a/tool/src/main/resources/org/antlr/codegen/templates/Java/Java.stg +++ b/tool/src/main/resources/org/antlr/codegen/templates/Java/Java.stg @@ -208,7 +208,7 @@ genericParser(grammar, name, scopes, tokens, tokenNames, rules, numRules, public class extends <@superClassName><@end> { public static final String[] tokenNames = new String[] { - "\", "\", "\", "\", + "\", "\", "\", "\", \"", separator=", ", wrap="\n\t\t"> }; =;}; separator="\n"> diff --git a/tool/src/main/resources/org/antlr/codegen/templates/JavaScript/JavaScript.stg b/tool/src/main/resources/org/antlr/codegen/templates/JavaScript/JavaScript.stg index 73cbaf42c..f63e1aa32 100755 --- a/tool/src/main/resources/org/antlr/codegen/templates/JavaScript/JavaScript.stg +++ b/tool/src/main/resources/org/antlr/codegen/templates/JavaScript/JavaScript.stg @@ -217,7 +217,7 @@ org.antlr.lang.augmentObject(.prototype, { // public class variables org.antlr.lang.augmentObject(, { - tokenNames: ["\", "\", "\", "\", ],<\n> + tokenNames: ["\", "\", "\", "\", \"">],<\n> _in_}, words64=it.bits); separator=",\n"> diff --git a/tool/src/main/resources/org/antlr/codegen/templates/ObjC/ObjC.stg b/tool/src/main/resources/org/antlr/codegen/templates/ObjC/ObjC.stg index 369310a4c..dc0b55010 100644 --- a/tool/src/main/resources/org/antlr/codegen/templates/ObjC/ObjC.stg +++ b/tool/src/main/resources/org/antlr/codegen/templates/ObjC/ObjC.stg @@ -712,7 +712,7 @@ genericParser( grammar, { #pragma mark Bitsets _in_}, words64=it.bits)>}> - [BaseRecognizer setTokenNames:[[AMutableArray arrayWithObjects:@"\", @"\", @"\", @"\", }; separator=", ", wrap="\n ">, nil] retain]]; + [BaseRecognizer setTokenNames:[[AMutableArray arrayWithObjects:@"\", @"\", @"\", @"\", }; separator=", ", null="@\"\"", wrap="\n ">, nil] retain]]; [BaseRecognizer setGrammarFileName:@""]; }> } diff --git a/tool/src/main/resources/org/antlr/codegen/templates/Perl5/Perl5.stg b/tool/src/main/resources/org/antlr/codegen/templates/Perl5/Perl5.stg index 1eb08ee2d..d6645067f 100644 --- a/tool/src/main/resources/org/antlr/codegen/templates/Perl5/Perl5.stg +++ b/tool/src/main/resources/org/antlr/codegen/templates/Perl5/Perl5.stg @@ -201,7 +201,7 @@ use Moose; extends '<@superClassName><@end>'; Readonly my $token_names => [ - "\", "\", "\", "\", + "\", "\", "\", "\", \""> ]; use constant { diff --git a/tool/src/main/resources/org/antlr/codegen/templates/Python/Python.stg b/tool/src/main/resources/org/antlr/codegen/templates/Python/Python.stg index cffdf8677..f4c43cac2 100644 --- a/tool/src/main/resources/org/antlr/codegen/templates/Python/Python.stg +++ b/tool/src/main/resources/org/antlr/codegen/templates/Python/Python.stg @@ -227,7 +227,7 @@ genericParser(grammar, name, scopes, tokens, tokenNames, rules, numRules, # token names tokenNames = [ "\", "\", "\", "\", - + \""> ]<\n> from import tokenNames<\n> diff --git a/tool/src/main/resources/org/antlr/codegen/templates/Python3/Python3.stg b/tool/src/main/resources/org/antlr/codegen/templates/Python3/Python3.stg index ad2e2abf7..8f399e0e8 100644 --- a/tool/src/main/resources/org/antlr/codegen/templates/Python3/Python3.stg +++ b/tool/src/main/resources/org/antlr/codegen/templates/Python3/Python3.stg @@ -229,7 +229,7 @@ genericParser(grammar, name, scopes, tokens, tokenNames, rules, numRules, # token names tokenNames = [ "\", "\", "\", "\", - + \""> ] }> diff --git a/tool/src/main/resources/org/antlr/codegen/templates/Ruby/Ruby.stg b/tool/src/main/resources/org/antlr/codegen/templates/Ruby/Ruby.stg index d74362374..75f3363e6 100644 --- a/tool/src/main/resources/org/antlr/codegen/templates/Ruby/Ruby.stg +++ b/tool/src/main/resources/org/antlr/codegen/templates/Ruby/Ruby.stg @@ -118,7 +118,7 @@ module TokenData # this is necessary because anonymous tokens, which are # created from literal values in the grammar, do not # have descriptive names - register_names( }; separator=", ", anchor, wrap="\n"> ) + register_names( }; separator=", ", null="\"\"", anchor, wrap="\n"> ) diff --git a/tool/src/main/resources/org/antlr/codegen/templates/Scala/Scala.stg b/tool/src/main/resources/org/antlr/codegen/templates/Scala/Scala.stg index bfd7c44aa..96066d608 100644 --- a/tool/src/main/resources/org/antlr/codegen/templates/Scala/Scala.stg +++ b/tool/src/main/resources/org/antlr/codegen/templates/Scala/Scala.stg @@ -185,7 +185,7 @@ genericParser(grammar, name, scopes, tokens, tokenNames, rules, numRules, object { val tokenNames = Array( - "\", "\", "\", "\", + "\", "\", "\", "\", \""> )<\n> diff --git a/tool/src/test/java/org/antlr/test/TestJavaCodeGeneration.java b/tool/src/test/java/org/antlr/test/TestJavaCodeGeneration.java index 5a189cdd4..b8dd7ecae 100644 --- a/tool/src/test/java/org/antlr/test/TestJavaCodeGeneration.java +++ b/tool/src/test/java/org/antlr/test/TestJavaCodeGeneration.java @@ -158,4 +158,136 @@ public void testSemanticPredicateAnalysisStackOverflow() throws Exception { boolean success = rawGenerateAndBuildRecognizer("T.g", grammar, "TParser", "TLexer", false); assertTrue(success); } + + /** + * This is a regression test for antlr/antlr3#157 "missing lexer rules if + * tokenVocab defined" + * https://github.com/antlr/antlr3/pull/157 + */ + @Test public void testImportedTokensInCombinedGrammar() { + String tokensFile = + "TypeArray=394\n" + + "Null=395\n"; + System.out.println("dir " + tmpdir); + mkdir(tmpdir); + writeFile(tmpdir, "CustomVocab.tokens", tokensFile); + + String grammar = + "grammar T;\n" + + "options { output = AST; tokenVocab = CustomVocab; }\n" + + "tokens { TypeArray = 'array'; }\n" + + "a : ('array' TypeArray 'null' Null ID)* EOF\n" + + " {System.out.println(tokenNames[TypeArray] + \"=\" + TypeArray);\n" + + " System.out.println(tokenNames[Null] + \"=\" + Null);};\n" + + "Null : 'null';\n" + + "ID : 'a'..'z'+;\n" + + "WS : ' '+ {skip();};\n"; + String input = "array array null null foo"; + String found = execParser("T.g", grammar, "TParser", "TLexer", "a", input, false); + String expected = + "TypeArray=394\n" + + "Null=395\n" + + "array array null null foo \n"; + assertEquals(expected, found); + assertNull(stderrDuringParse); + } + + /** + * This is a regression test for antlr/antlr3#157 "missing lexer rules if + * tokenVocab defined" + * https://github.com/antlr/antlr3/pull/157 + */ + @Test public void testImportedTokensInCombinedGrammarNoReferences() { + String tokensFile = + "TypeArray=394\n" + + "Null=395\n"; + System.out.println("dir " + tmpdir); + mkdir(tmpdir); + writeFile(tmpdir, "CustomVocab.tokens", tokensFile); + + String grammar = + "grammar T;\n" + + "options { output = AST; tokenVocab = CustomVocab; }\n" + + "tokens { TypeArray = 'array'; }\n" + + "a : (ID)* EOF\n" + + " {System.out.println(tokenNames[TypeArray] + \"=\" + TypeArray);\n" + + " System.out.println(tokenNames[Null] + \"=\" + Null);};\n" + + "Null : 'null';\n" + + "ID : 'a'..'z'+;\n" + + "WS : ' '+ {skip();};\n"; + String input = "foo"; + String found = execParser("T.g", grammar, "TParser", "TLexer", "a", input, false); + String expected = + "TypeArray=394\n" + + "Null=395\n" + + "foo \n"; + assertEquals(expected, found); + assertNull(stderrDuringParse); + } + + /** + * This is a regression test for antlr/antlr3#157 "missing lexer rules if + * tokenVocab defined" + * https://github.com/antlr/antlr3/pull/157 + */ + @Test public void testImportedTokensInCombinedGrammarLiteralReferencesOnly() { + String tokensFile = + "TypeArray=394\n" + + "Null=395\n"; + System.out.println("dir " + tmpdir); + mkdir(tmpdir); + writeFile(tmpdir, "CustomVocab.tokens", tokensFile); + + String grammar = + "grammar T;\n" + + "options { output = AST; tokenVocab = CustomVocab; }\n" + + "tokens { TypeArray = 'array'; }\n" + + "a : ('array' 'null' ID)* EOF\n" + + " {System.out.println(tokenNames[TypeArray] + \"=\" + TypeArray);\n" + + " System.out.println(tokenNames[Null] + \"=\" + Null);};\n" + + "Null : 'null';\n" + + "ID : 'a'..'z'+;\n" + + "WS : ' '+ {skip();};\n"; + String input = "array null foo"; + String found = execParser("T.g", grammar, "TParser", "TLexer", "a", input, false); + String expected = + "TypeArray=394\n" + + "Null=395\n" + + "array null foo \n"; + assertEquals(expected, found); + assertNull(stderrDuringParse); + } + + /** + * This is a regression test for antlr/antlr3#157 "missing lexer rules if + * tokenVocab defined" + * https://github.com/antlr/antlr3/pull/157 + */ + @Test public void testImportedTokensInCombinedGrammarSymbolicReferencesOnly() { + String tokensFile = + "TypeArray=394\n" + + "Null=395\n"; + System.out.println("dir " + tmpdir); + mkdir(tmpdir); + writeFile(tmpdir, "CustomVocab.tokens", tokensFile); + + String grammar = + "grammar T;\n" + + "options { output = AST; tokenVocab = CustomVocab; }\n" + + "tokens { TypeArray = 'array'; }\n" + + "a : (TypeArray Null ID)* EOF\n" + + " {System.out.println(tokenNames[TypeArray] + \"=\" + TypeArray);\n" + + " System.out.println(tokenNames[Null] + \"=\" + Null);};\n" + + "Null : 'null';\n" + + "ID : 'a'..'z'+;\n" + + "WS : ' '+ {skip();};\n"; + String input = "array null foo"; + String found = execParser("T.g", grammar, "TParser", "TLexer", "a", input, false); + String expected = + "TypeArray=394\n" + + "Null=395\n" + + "array null foo \n"; + assertEquals(expected, found); + assertNull(stderrDuringParse); + } }