diff --git a/jena-arq/Grammar/Final/README.txt b/jena-arq/Grammar/Final/README.txt index 5308eb58ed5..9d653ad57fc 100644 --- a/jena-arq/Grammar/Final/README.txt +++ b/jena-arq/Grammar/Final/README.txt @@ -1 +1,16 @@ Final-for-spec-publication versions of the grammar. + +sparql_10-final.jj - SPARQL 1.0 "sparql_10.jj" ("main.jj" after cpp) + +sparql_11-final.jj - SPARQL 1.1 "sparql_11.jj" ("main.jj" after cpp) + +sparql-main-11.jj - SPARQL 1.1 "main.jj" (com.hp) + + +sparql_11-dev-final.jj - End SPARQL 1.1 development. (org.apache.jena.graph "main.jj" at SPARQLParser11) +sparql_11-dev-final.txt - jjdoc +tokens_11.txt - Tokens file. + +-- Coming soon. +sparql_12-final.jj - SPARQL 1.2 "sparql_11.jj" ("main.jj" after cpp) +sparql-main-12.jj - SPARQL 1.2 "main.jj" diff --git a/jena-arq/Grammar/arq.jj b/jena-arq/Grammar/arq.jj index 96c41746932..f566195a025 100644 --- a/jena-arq/Grammar/arq.jj +++ b/jena-arq/Grammar/arq.jj @@ -1910,8 +1910,8 @@ String String() : { Token t ; String lex ; } | t = { lex = stripQuotes3(t.image) ; } | t = { lex = stripQuotes3(t.image) ; } ) - { checkString(lex, t.beginLine, t.beginColumn) ; - lex = unescapeStr(lex, t.beginLine, t.beginColumn) ; + { lex = unescapeStr(lex, t.beginLine, t.beginColumn) ; + checkString(lex, t.beginLine, t.beginColumn) ; return lex ; } } diff --git a/jena-arq/Grammar/main.jj b/jena-arq/Grammar/main.jj index 928d7d94cad..5f34cb3a10b 100644 --- a/jena-arq/Grammar/main.jj +++ b/jena-arq/Grammar/main.jj @@ -2621,8 +2621,7 @@ String String() : { Token t ; String lex ; } | t = { lex = stripQuotes3(t.image) ; } | t = { lex = stripQuotes3(t.image) ; } ) - { checkString(lex, t.beginLine, t.beginColumn) ; - lex = unescapeStr(lex, t.beginLine, t.beginColumn) ; + { lex = prepareLexicalForm(lex, t.beginLine, t.beginColumn) ; return lex ; } } diff --git a/jena-arq/Grammar/sparql_12.jj b/jena-arq/Grammar/sparql_12.jj index 8154839f003..ba9e68a2dd5 100644 --- a/jena-arq/Grammar/sparql_12.jj +++ b/jena-arq/Grammar/sparql_12.jj @@ -1625,8 +1625,8 @@ String String() : { Token t ; String lex ; } | t = { lex = stripQuotes3(t.image) ; } | t = { lex = stripQuotes3(t.image) ; } ) - { checkString(lex, t.beginLine, t.beginColumn) ; - lex = unescapeStr(lex, t.beginLine, t.beginColumn) ; + { lex = unescapeStr(lex, t.beginLine, t.beginColumn) ; + checkString(lex, t.beginLine, t.beginColumn) ; return lex ; } } diff --git a/jena-arq/src/main/java/org/apache/jena/sparql/lang/QueryParserBase.java b/jena-arq/src/main/java/org/apache/jena/sparql/lang/QueryParserBase.java index 006c9f8fda1..9fcfa24502b 100644 --- a/jena-arq/src/main/java/org/apache/jena/sparql/lang/QueryParserBase.java +++ b/jena-arq/src/main/java/org/apache/jena/sparql/lang/QueryParserBase.java @@ -209,10 +209,15 @@ protected Node stripSign(Node node) { } protected void checkString(String string, int line, int column) { + // Checks for bare surrogate pairs. for ( int i = 0 ; i < string.length() ; i++ ) { // Not "codePointAt" which does surrogate processing. char ch = string.charAt(i); - // Check surrogate pairs are pairs. + + // Check surrogate pairs are in pairs. Pairs are high-low. + if ( Character.isLowSurrogate(ch) ) + throw new QueryParseException("Bad surrogate pair (low surrogate without high surrogate)", line, column); + if ( Character.isHighSurrogate(ch) ) { i++; if ( i == string.length() ) @@ -221,8 +226,6 @@ protected void checkString(String string, int line, int column) { if ( !Character.isLowSurrogate(ch1) ) { throw new QueryParseException("Bad surrogate pair (high surrogate not followed by low surrogate)", line, column); } - } else if ( Character.isLowSurrogate(ch) ) { - throw new QueryParseException("Bad surrogate pair (low surrogate without high surrogate)", line, column); } } } @@ -345,7 +348,6 @@ protected static String stripChars(String s, int n) { protected Var createVariable(String s, int line, int column) { s = s.substring(1); // Drop the marker - // This is done by the parser input stream nowadays. // s = unescapeCodePoint(s, line, column); // Check \ u did not put in any illegals. @@ -361,6 +363,8 @@ protected Node createTripleTerm(Node s, Node p, Node o, int line, int column) { protected String resolveQuotedIRI(String iriStr, int line, int column) { iriStr = stripQuotes(iriStr); iriStr = unescapeUnicode(iriStr, line, column); + // Check for Unicode surrogates + checkString(iriStr, line, column); return resolveIRI(iriStr, line, column); } diff --git a/jena-arq/src/main/java/org/apache/jena/sparql/lang/arq/ARQParser.java b/jena-arq/src/main/java/org/apache/jena/sparql/lang/arq/ARQParser.java index 51ece29bc05..ccb9d9095d1 100644 --- a/jena-arq/src/main/java/org/apache/jena/sparql/lang/arq/ARQParser.java +++ b/jena-arq/src/main/java/org/apache/jena/sparql/lang/arq/ARQParser.java @@ -7442,8 +7442,8 @@ final public Node BooleanLiteral() throws ParseException { jj_consume_token(-1); throw new ParseException(); } -checkString(lex, t.beginLine, t.beginColumn) ; - lex = unescapeStr(lex, t.beginLine, t.beginColumn) ; +lex = unescapeStr(lex, t.beginLine, t.beginColumn) ; + checkString(lex, t.beginLine, t.beginColumn) ; {if ("" != null) return lex ;} throw new Error("Missing return statement in function"); } diff --git a/jena-arq/src/main/java/org/apache/jena/sparql/lang/sparql_12/SPARQLParser12.java b/jena-arq/src/main/java/org/apache/jena/sparql/lang/sparql_12/SPARQLParser12.java index 5bc894bc102..64270c70cd5 100644 --- a/jena-arq/src/main/java/org/apache/jena/sparql/lang/sparql_12/SPARQLParser12.java +++ b/jena-arq/src/main/java/org/apache/jena/sparql/lang/sparql_12/SPARQLParser12.java @@ -5837,8 +5837,8 @@ final public Node BooleanLiteral() throws ParseException { jj_consume_token(-1); throw new ParseException(); } -checkString(lex, t.beginLine, t.beginColumn) ; - lex = unescapeStr(lex, t.beginLine, t.beginColumn) ; +lex = unescapeStr(lex, t.beginLine, t.beginColumn) ; + checkString(lex, t.beginLine, t.beginColumn) ; {if ("" != null) return lex ;} throw new Error("Missing return statement in function"); } diff --git a/jena-arq/src/test/java/org/apache/jena/sparql/syntax/TestQueryParser.java b/jena-arq/src/test/java/org/apache/jena/sparql/syntax/TestQueryParser.java index 4b93c8dcca5..1221f7fa27d 100644 --- a/jena-arq/src/test/java/org/apache/jena/sparql/syntax/TestQueryParser.java +++ b/jena-arq/src/test/java/org/apache/jena/sparql/syntax/TestQueryParser.java @@ -19,6 +19,7 @@ package org.apache.jena.sparql.syntax; import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import org.junit.jupiter.api.Test; @@ -40,6 +41,36 @@ private static void silent(Runnable action) { LogCtl.withLevel(loggerSPARQL, "fatal", action); } + // Single backslash so a Java string escape, raw surrogate in the string. + @Test + public void syntax_unicode_raw_surrogate_uri() { + QueryParseException ex = assertThrows(QueryParseException.class, ()->testParse("SELECT * { ?p ?o}")); + assertTrue(ex.getMessage().contains("surrogate")); + } + + @Test + public void syntax_unicode_raw_surrogate_string() { + QueryParseException ex = assertThrows(QueryParseException.class, ()->testParse("SELECT * { ?s ?p '\uD800' }")); + assertTrue(ex.getMessage().contains("surrogate")); + } + + // Double backslash so the query string has an escape in it. + @Test + public void syntax_unicode_escaped_surrogate_uri() { + QueryParseException ex = assertThrows(QueryParseException.class, ()->testParse("SELECT * { ?p ?o}")); + assertTrue(ex.getMessage().contains("surrogate")); + } + + @Test + public void syntax_unicode_escaped_surrogate_strings() { + QueryParseException ex = assertThrows(QueryParseException.class, ()->testParse("SELECT * { ?s ?p '\\uD800'}")); + assertTrue(ex.getMessage().contains("surrogate")); + } + + private static void testParse(String string) { + QueryFactory.create(string); + } + @Test public void syntax_uri_brackets_1() { testParseIRIs(""); } diff --git a/jena-cmds/src/test/java/arq/rdftests.java b/jena-cmds/src/test/java/arq/rdftests.java index 8655942732a..70f2246445b 100644 --- a/jena-cmds/src/test/java/arq/rdftests.java +++ b/jena-cmds/src/test/java/arq/rdftests.java @@ -81,6 +81,8 @@ public class rdftests extends CmdGeneral RIOT.getContext().set(RIOT.symTurtleDirectiveStyle, "sparql"); } + // Test runners are in jena-arq, package org.apache.jena.arq.junit.runners + public static void main(String...argv) { try { new rdftests(argv).mainRun(); } catch (TerminationException ex) {