Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support explicit token assignments in combined grammars #165

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions tool/src/main/java/org/antlr/codegen/CodeGenerator.java
Original file line number Diff line number Diff line change
Expand Up @@ -861,10 +861,11 @@ protected void genTokenTypeConstants(ST code) {
protected void genTokenTypeNames(ST code) {
for (int t=Label.MIN_TOKEN_TYPE; t<=grammar.getMaxTokenType(); t++) {
String tokenName = grammar.getTokenDisplayName(t);
if ( tokenName!=null ) {
if (tokenName != null) {
tokenName=target.getTargetStringLiteralFromString(tokenName, true);
code.add("tokenNames", tokenName);
}

code.add("tokenNames", tokenName);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -271,9 +271,14 @@ protected void aliasTokenIDsAndLiterals(Grammar root) {
String tokenID = entry.getKey();
String literal = entry.getValue();
if ( literal.charAt(0)=='\'' && stringLiterals.get(literal)!=null ) {
stringLiterals.put(literal, tokens.get(tokenID));
// an alias still means you need a lexer rule for it
Integer typeI = tokens.get(tokenID);
if (typeI == null) {
// must have been imported from a tokenVocab
typeI = grammar.composite.tokenIDToTypeMap.get(tokenID);
}

stringLiterals.put(literal, typeI);
// an alias still means you need a lexer rule for it
if ( !tokenRuleDefs.contains(tokenID) ) {
root.defineLexerRuleForAliasedStringLiteral(tokenID, literal, typeI);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ genericParser(grammar, name, scopes, tokens, tokenNames, rules, numRules,
public class <grammar.recognizerName> extends <if(actions.(actionScope).superClass)><actions.(actionScope).superClass><else><@superClassName><superClass><@end><endif> {
<if(grammar.grammarIsRoot)>
public static const tokenNames:Array = [
"\<invalid>", "\<EOR>", "\<DOWN>", "\<UP>", <tokenNames; separator=", ">
"\<invalid>", "\<EOR>", "\<DOWN>", "\<UP>", <tokenNames; separator=", ", null="\"<invalid>\"">
];<\n>
<endif>
<tokens:{it |public static const <it.name>:int=<it.type>;}; separator="\n">
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1195,7 +1195,7 @@ pANTLR3_UINT8 <name>TokenNames[<length(tokenNames)>+4]
(pANTLR3_UINT8) "\<EOR>",
(pANTLR3_UINT8) "\<DOWN>",
(pANTLR3_UINT8) "\<UP>",
<tokenNames:{it |(pANTLR3_UINT8) <it>}; separator=",\n">
<tokenNames:{it |(pANTLR3_UINT8) <it>}; separator=",\n", null="(pANTLR3_UINT8) \"<invalid>\"">
};
<endif>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,7 @@ genericParser(grammar, name, scopes, tokens, tokenNames, rules, numRules,
{
<if(grammar.grammarIsRoot)>
internal static readonly string[] tokenNames = new string[] {
"\<invalid>", "\<EOR>", "\<DOWN>", "\<UP>", <tokenNames; separator=", ">
"\<invalid>", "\<EOR>", "\<DOWN>", "\<UP>", <tokenNames; separator=", ", null="\"<invalid>\"">
};
<endif>
<tokens:{it|public const int <it.name; format="id">=<it.type>;}; separator="\n">
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,7 @@ genericParser(grammar, name, scopes, tokens, tokenNames, rules, numRules,
{
<if(grammar.grammarIsRoot)>
internal static readonly string[] tokenNames = new string[] {
"\<invalid>", "\<EOR>", "\<DOWN>", "\<UP>", <tokenNames; separator=", ">
"\<invalid>", "\<EOR>", "\<DOWN>", "\<UP>", <tokenNames; separator=", ", null="\"<invalid>\"">
};
<endif>
<tokens:{it|public const int <it.name; format="id">=<it.type>;}; separator="\n">
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -821,7 +821,7 @@ ANTLR_UINT8* <name>TokenNames[<length(tokenNames)>+4]
(ANTLR_UINT8*) "\<EOR>",
(ANTLR_UINT8*) "\<DOWN>",
(ANTLR_UINT8*) "\<UP>",
<tokenNames:{it |(ANTLR_UINT8*) <it>}; separator=",\n">
<tokenNames:{it |(ANTLR_UINT8*) <it>}; separator=",\n", null="(ANTLR_UINT8*) \"<invalid>\"">
};
<endif>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,7 @@ type
'\<EOR>',
'\<DOWN>',
'\<UP>',
<tokenNames; separator=",\n">);<\n>
<tokenNames; separator=",\n", null="'<invalid>'">);<\n>
<endif>
public
const
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ genericParser(grammar, name, scopes, tokens, tokenNames, rules, numRules,
public class <grammar.recognizerName> extends <@superClassName><superClass><@end> {
<if(grammar.grammarIsRoot)>
public static final String[] tokenNames = new String[] {
"\<invalid>", "\<EOR>", "\<DOWN>", "\<UP>", <tokenNames; separator=", ", wrap="\n\t\t">
"\<invalid>", "\<EOR>", "\<DOWN>", "\<UP>", <tokenNames; null="\"<invalid>\"", separator=", ", wrap="\n\t\t">
};
<endif>
<tokens:{it |public static final int <it.name>=<it.type>;}; separator="\n">
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ org.antlr.lang.augmentObject(<grammar.recognizerName>.prototype, {
// public class variables
org.antlr.lang.augmentObject(<grammar.recognizerName>, {
<if(grammar.grammarIsRoot)>
tokenNames: ["\<invalid>", "\<EOR>", "\<DOWN>", "\<UP>", <tokenNames; separator=", ">],<\n>
tokenNames: ["\<invalid>", "\<EOR>", "\<DOWN>", "\<UP>", <tokenNames; separator=", ", null="\"<invalid>\"">],<\n>
<endif>
<bitsets:bitset(name={FOLLOW_<it.name>_in_<it.inName><it.tokenIndex>},
words64=it.bits); separator=",\n">
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -712,7 +712,7 @@ genericParser( grammar,
{
#pragma mark Bitsets
<bitsets:{it | <bitsetInit(name={FOLLOW_<it.name>_in_<it.inName><it.tokenIndex>}, words64=it.bits)>}>
[BaseRecognizer setTokenNames:[[AMutableArray arrayWithObjects:@"\<invalid>", @"\<EOR>", @"\<DOWN>", @"\<UP>", <tokenNames:{it | @<it>}; separator=", ", wrap="\n ">, nil] retain]];
[BaseRecognizer setTokenNames:[[AMutableArray arrayWithObjects:@"\<invalid>", @"\<EOR>", @"\<DOWN>", @"\<UP>", <tokenNames:{it | @<it>}; separator=", ", null="@\"<invalid>\"", wrap="\n ">, nil] retain]];
[BaseRecognizer setGrammarFileName:@"<fileName>"];
<synpreds:{pred | <synpred(pred)>}>
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ use Moose;
extends '<@superClassName><superClass><@end>';

Readonly my $token_names => [
"\<invalid>", "\<EOR>", "\<DOWN>", "\<UP>", <tokenNames; separator=", ">
"\<invalid>", "\<EOR>", "\<DOWN>", "\<UP>", <tokenNames; separator=", ", null="\"<invalid>\"">
];

use constant {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ genericParser(grammar, name, scopes, tokens, tokenNames, rules, numRules,
# token names
tokenNames = [
"\<invalid>", "\<EOR>", "\<DOWN>", "\<UP>",
<tokenNames; wrap, separator=", ">
<tokenNames; wrap, separator=", ", null="\"<invalid>\"">
]<\n>
<else>
from <grammar.composite.rootGrammar.recognizerName> import tokenNames<\n>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ genericParser(grammar, name, scopes, tokens, tokenNames, rules, numRules,
# token names
tokenNames = [
"\<invalid>", "\<EOR>", "\<DOWN>", "\<UP>",
<tokenNames; wrap, separator=", ">
<tokenNames; wrap, separator=", ", null="\"<invalid>\"">
]

<scopes:{it|<if(it.isDynamicGlobalScope)><globalAttributeScopeClass(scope=it)><endif>}>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ module TokenData
# this is necessary because anonymous tokens, which are
# created from literal values in the grammar, do not
# have descriptive names
register_names( <tokenNames:{it | <it>}; separator=", ", anchor, wrap="\n"> )
register_names( <tokenNames:{it | <it>}; separator=", ", null="\"<invalid>\"", anchor, wrap="\n"> )

<endif>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ genericParser(grammar, name, scopes, tokens, tokenNames, rules, numRules,
object <grammar.recognizerName> {
<if(grammar.grammarIsRoot)>
val tokenNames = Array(
"\<invalid>", "\<EOR>", "\<DOWN>", "\<UP>", <tokenNames; separator=", ">
"\<invalid>", "\<EOR>", "\<DOWN>", "\<UP>", <tokenNames; separator=", ", null="\"<invalid>\"">
)<\n>
<endif>

Expand Down
132 changes: 132 additions & 0 deletions tool/src/test/java/org/antlr/test/TestJavaCodeGeneration.java
Original file line number Diff line number Diff line change
Expand Up @@ -158,4 +158,136 @@ public void testSemanticPredicateAnalysisStackOverflow() throws Exception {
boolean success = rawGenerateAndBuildRecognizer("T.g", grammar, "TParser", "TLexer", false);
assertTrue(success);
}

/**
* This is a regression test for antlr/antlr3#157 "missing lexer rules if
* tokenVocab defined"
* https://github.com/antlr/antlr3/pull/157
*/
@Test public void testImportedTokensInCombinedGrammar() {
String tokensFile =
"TypeArray=394\n" +
"Null=395\n";
System.out.println("dir " + tmpdir);
mkdir(tmpdir);
writeFile(tmpdir, "CustomVocab.tokens", tokensFile);

String grammar =
"grammar T;\n" +
"options { output = AST; tokenVocab = CustomVocab; }\n" +
"tokens { TypeArray = 'array'; }\n" +
"a : ('array' TypeArray 'null' Null ID)* EOF\n" +
" {System.out.println(tokenNames[TypeArray] + \"=\" + TypeArray);\n" +
" System.out.println(tokenNames[Null] + \"=\" + Null);};\n" +
"Null : 'null';\n" +
"ID : 'a'..'z'+;\n" +
"WS : ' '+ {skip();};\n";
String input = "array array null null foo";
String found = execParser("T.g", grammar, "TParser", "TLexer", "a", input, false);
String expected =
"TypeArray=394\n" +
"Null=395\n" +
"array array null null foo <EOF>\n";
assertEquals(expected, found);
assertNull(stderrDuringParse);
}

/**
* This is a regression test for antlr/antlr3#157 "missing lexer rules if
* tokenVocab defined"
* https://github.com/antlr/antlr3/pull/157
*/
@Test public void testImportedTokensInCombinedGrammarNoReferences() {
String tokensFile =
"TypeArray=394\n" +
"Null=395\n";
System.out.println("dir " + tmpdir);
mkdir(tmpdir);
writeFile(tmpdir, "CustomVocab.tokens", tokensFile);

String grammar =
"grammar T;\n" +
"options { output = AST; tokenVocab = CustomVocab; }\n" +
"tokens { TypeArray = 'array'; }\n" +
"a : (ID)* EOF\n" +
" {System.out.println(tokenNames[TypeArray] + \"=\" + TypeArray);\n" +
" System.out.println(tokenNames[Null] + \"=\" + Null);};\n" +
"Null : 'null';\n" +
"ID : 'a'..'z'+;\n" +
"WS : ' '+ {skip();};\n";
String input = "foo";
String found = execParser("T.g", grammar, "TParser", "TLexer", "a", input, false);
String expected =
"TypeArray=394\n" +
"Null=395\n" +
"foo <EOF>\n";
assertEquals(expected, found);
assertNull(stderrDuringParse);
}

/**
* This is a regression test for antlr/antlr3#157 "missing lexer rules if
* tokenVocab defined"
* https://github.com/antlr/antlr3/pull/157
*/
@Test public void testImportedTokensInCombinedGrammarLiteralReferencesOnly() {
String tokensFile =
"TypeArray=394\n" +
"Null=395\n";
System.out.println("dir " + tmpdir);
mkdir(tmpdir);
writeFile(tmpdir, "CustomVocab.tokens", tokensFile);

String grammar =
"grammar T;\n" +
"options { output = AST; tokenVocab = CustomVocab; }\n" +
"tokens { TypeArray = 'array'; }\n" +
"a : ('array' 'null' ID)* EOF\n" +
" {System.out.println(tokenNames[TypeArray] + \"=\" + TypeArray);\n" +
" System.out.println(tokenNames[Null] + \"=\" + Null);};\n" +
"Null : 'null';\n" +
"ID : 'a'..'z'+;\n" +
"WS : ' '+ {skip();};\n";
String input = "array null foo";
String found = execParser("T.g", grammar, "TParser", "TLexer", "a", input, false);
String expected =
"TypeArray=394\n" +
"Null=395\n" +
"array null foo <EOF>\n";
assertEquals(expected, found);
assertNull(stderrDuringParse);
}

/**
* This is a regression test for antlr/antlr3#157 "missing lexer rules if
* tokenVocab defined"
* https://github.com/antlr/antlr3/pull/157
*/
@Test public void testImportedTokensInCombinedGrammarSymbolicReferencesOnly() {
String tokensFile =
"TypeArray=394\n" +
"Null=395\n";
System.out.println("dir " + tmpdir);
mkdir(tmpdir);
writeFile(tmpdir, "CustomVocab.tokens", tokensFile);

String grammar =
"grammar T;\n" +
"options { output = AST; tokenVocab = CustomVocab; }\n" +
"tokens { TypeArray = 'array'; }\n" +
"a : (TypeArray Null ID)* EOF\n" +
" {System.out.println(tokenNames[TypeArray] + \"=\" + TypeArray);\n" +
" System.out.println(tokenNames[Null] + \"=\" + Null);};\n" +
"Null : 'null';\n" +
"ID : 'a'..'z'+;\n" +
"WS : ' '+ {skip();};\n";
String input = "array null foo";
String found = execParser("T.g", grammar, "TParser", "TLexer", "a", input, false);
String expected =
"TypeArray=394\n" +
"Null=395\n" +
"array null foo <EOF>\n";
assertEquals(expected, found);
assertNull(stderrDuringParse);
}
}