Regenerate.

RumbleDB · Jun 22, 2020 · fafca54 · fafca54
2 parents 7b868a2 + b0306a3
commit fafca54
Show file tree

Hide file tree

Showing 692 changed files with 7,404 additions and 4,953 deletions.
diff --git a/README.md b/README.md
@@ -1,6 +1,6 @@
 # Rumble
 
-Getting started: you will find a Jupyter notebook that introduces the JSONiq language on top of Rumble [here](https://github.com/ghislainfourny/jsoniq-tutorial/blob/master/JSONiq-tutorial.ipynb). You can use it by installing the all-in-one Data Science platform [Anaconda](https://www.anaconda.com), unless you prefer to install Python+Spark+PySpark+Jupyter manually (brew, apt...).
+Getting started: you will find a Jupyter notebook that introduces the JSONiq language on top of Rumble [here](https://github.com/ghislainfourny/jsoniq-tutorial). You can use it by installing the all-in-one Data Science platform [Anaconda](https://www.anaconda.com), unless you prefer to install Python+Spark+PySpark+Jupyter manually (brew, apt...).
 
 The documentation also contains an introduction specific to Rumble and how you can read input datasets, but we have not converted it to Jupyter notebooks yet (this will follow).
 

diff --git a/build_antlr_parser.xml b/build_antlr_parser.xml
@@ -25,7 +25,7 @@
     <!-- <property name="src.dir" value="${basedir}/src/main/java/sparksoniq"/> prepends full path as comment-->
     <property name="src.dir" value="./src/main/java/org/rumbledb"/>
     <property name="parser.dir" value="${src.dir}/parser"/>
-    <property name="antlr.jar" value="./lib/antlr-4.5.3-complete.jar"/>
+    <property name="antlr.jar" value="./lib/antlr-4.7-complete.jar"/>
 
 
     <target name="clean-parser">

diff --git a/docs/Error codes.md b/docs/Error codes.md
@@ -1,5 +1,7 @@
 # Error codes
 
+- [FOAR0001] - Division by zero.
+
 - [FOCA0002] - A value that is not lexically valid for a particular type has been encountered.
 
 - [FODC0002] - Error retrieving resource.
@@ -104,6 +106,9 @@ or a module import.
 if the version number specified in a version declaration 
 is not supported by the implementation. For now, only version 1.0 is supported.
 
+- [XQST0033] - Namespace prefix bound twice. It is a static error if
+a module contains multiple bindings for the same namespace prefix.
+
 - [XQST0034] - Function already exists. It is a static error
 if multiple functions declared or imported by a module have
 the same number of arguments and their expanded QNames are equal
@@ -113,13 +118,24 @@ the same number of arguments and their expanded QNames are equal
 for a function declaration or an inline function expression
 to have more than one parameter with the same name.
 
+- [XQST0047] - It is a static error if multiple module imports
+in the same Prolog specify the same target namespace.
+
 - [XQST0049] - It is a static error if two or more variables
 declared or imported by a module have the same name.
 
 - [XQST0052] - Simple type error. The type must be
 the name of a type defined in the in-scope schema types,
 and the {variety} of the type must be simple.
 
+- [XQST0059] - It is a static error if an implementation is unable
+to process a schema or module import by finding a schema or module with
+the specified target namespace.
+
+- [XQST0088] - It is a static error if the literal that specifies
+the target namespace in a module import or a module declaration is of
+zero length.
+
 - [XQST0094] - Invalid variable in group-by clause. 
 The name of each grouping variable must be equal 
 (by the eq operator on expanded QNames) to the name of a 

diff --git a/docs/Function library.md b/docs/Function library.md
@@ -635,7 +635,17 @@ Serializes the supplied input sequence, returning the serialized representation
 ```
 serialize({hello: "world"})
 ```
-returns "[ \\"hello\\", \\"world\\" ]"
+returns { "hello" : "world" }
+
+### normalize-unicode
+
+Returns the value of the input after applying Unicode normalization.
+
+```
+normalize-unicode("hello world", "NFC")
+```
+
+returns the unicode-normalized version of the input string. Normalization forms NFC, NFD, NFKC, and NFKD are supported. "FULLY-NORMALIZED" though supported, should be used with caution as only the composition exclusion characters supported FULLY-NORMALIZED are which are uncommented in the [following file](https://www.unicode.org/Public/UCD/latest/ucd/CompositionExclusions.txt).
 
 ## Date and time functions
 

diff --git a/docs/Getting started.md b/docs/Getting started.md
@@ -49,7 +49,7 @@ Create, in the same directory as Rumble, a file data.json and put the following
 
 In a shell, from the directory where the rumble .jar lies, type, all on one line:
 
-    spark-submit spark-rumble-1.6.2.jar --shell yes
+    spark-submit spark-rumble-1.6.4.jar --shell yes
                  
 The Rumble shell appears:
 

diff --git a/docs/HTTPServer.md b/docs/HTTPServer.md
@@ -4,7 +4,7 @@
 
 Rumble can be run as an HTTP server that listens for queries. In order to do so, you can use the --server and --port parameters:
 
-    spark-submit spark-rumble-1.6.2.jar --server yes --port 8001
+    spark-submit spark-rumble-1.6.4.jar --server yes --port 8001
 
 This command will not return until you force it to (Ctrl+C on Linux and Mac). This is because the server has to run permanently to listen to incoming requests.
 
@@ -86,10 +86,10 @@ You can also let Rumble run as an HTTP server on the master node of a cluster, e
 - Connect to the master with SSH with an extra parameter for securely tunneling the HTTP connection (for example `-L 8001:localhost:8001` or any port of your choosing)
 - Download the Rumble jar to the master node
 
-    wget https://github.com/RumbleDB/rumble/releases/download/v1.6.2/spark-rumble-1.6.2.jar
+    wget https://github.com/RumbleDB/rumble/releases/download/v1.6.4/spark-rumble-1.6.4.jar
 
 - Launch the HTTP server on the master node
 
-    spark-submit spark-rumble-1.6.2.jar --server yes --port 8001
+    spark-submit spark-rumble-1.6.4.jar --server yes --port 8001
 
 - And then use Jupyter notebooks in the same way you would do it locally (it magically works because of the tunneling)
diff --git a/docs/JSONiq.md b/docs/JSONiq.md
@@ -68,23 +68,33 @@ When an expression does not support pushdown, it will materialize automaticaly.
 Prologs with user-defined functions and global variables are now fully supported. Global external variables with string values are supported (use "--variable:foo bar" on the command line to assign values to them).
 
 
-## Unsupported/Unimplemented features (beta release)
+## Library modules
 
-Many core features of JSONiq are in place, but please be aware that some features (less and less, though) are not yet implemented. We are working on them for subsequent releases. We prioritize the implementation of the remaining features based on user requests.
+Library modules are now supported (experimental, please report bugs), and their namespace URI is used for resolution. If it is relative, it is resolved against the importing module location.
 
-### Settings and modules
+The same schemes are supported as for reading queries and data: file, hdfs, and so on. Http is not supported yet and may also be supported in the future.
 
-Prolog settings and library modules are not supported yet.
+Example of library module (the file name is library-module.jq):
 
-### Try/catch
+```
+module namespace m = "library-module.jq";
 
-Try/catch expressions are not supported yet but this is planned.
+declare variable $m:x := 2;
 
-### Nested expressions in object lookups (rhs)
+declare function mod:func($v) {
+  $m:x + $v
+);
+```
 
-Nested object lookup keys: nested expressions on the rhs of the dot syntax are not supported yet.
+Example of importing module (assuming it is in the same directory):
 
-### Types
+```
+import module namespace mod = "library-module.jq";
+
+mod:func($mod:x)
+```
+
+### Supported types
 
 The type system is not quite complete yet, although a lot of progress was made. Below is a complete list of JSONiq types and their support status.
 
@@ -124,11 +134,26 @@ The type system is not quite complete yet, although a lot of progress was made.
 | unsignedShort | not supported |
 | yearMonthDuration | supported |
 
+## Unsupported/Unimplemented features (beta release)
+
+Many core features of JSONiq are in place, but please be aware that some features (less and less at every release) are not yet implemented. We are working on them for subsequent releases. We prioritize the implementation of the remaining features based on user requests.
+
+### Settings
+
+Some prolog settings (mostly about changing the default behavior) are not supported yet.
+
+### Try/catch
+
+Try/catch expressions are not supported yet but this is planned.
+
+### Nested expressions in object lookups (rhs)
+
+Nested object lookup keys: nested expressions on the rhs of the dot syntax are not supported yet but this is planned.
 
 ### Builtin functions
 
 Not all JSONiq functions in the library are supported (see function documentation), even though they get added continuously. Please take a look at the function library documentation to know which functions are available.
 
 ### Updates and scripting
 
-JSON updates are not supported. Scripting features (assignment, while loop, ...) are not supported yet either.
+JSON updates are not supported yet. Scripting features (assignment, while loop, ...) are not supported yet.
diff --git a/docs/Run on a cluster.md b/docs/Run on a cluster.md
@@ -5,21 +5,21 @@ simply by modifying the command line parameters as documented [here for spark-su
 
 If the Spark cluster is running on yarn, then the --master option can be changed from local[\*] to yarn compared to the getting started guide. Most of the time, though (e.g., on Amazon EMR), it needs not be specified, as this is already set up in the environment.
 
-    spark-submit spark-rumble-1.6.2.jar --shell yes
+    spark-submit spark-rumble-1.6.4.jar --shell yes
                  
 or explicitly:
 
-    spark-submit --master yarn --deploy-mode client spark-rumble-1.6.2.jar --shell yes
+    spark-submit --master yarn --deploy-mode client spark-rumble-1.6.4.jar --shell yes
 
 You can also adapt the number of executors, etc.
 
     spark-submit --num-executors 30 --executor-cores 3 --executor-memory 10g
-                 spark-rumble-1.6.2.jar --shell yes
+                 spark-rumble-1.6.4.jar --shell yes
 
 The size limit for materialization can also be made higher with --result-size (the default is 100). This affects the number of items displayed on the shells as an answer to a query, as well as any materializations happening within the query with push-down is not supported. Warnings are issued if the cap is reached.
 
     spark-submit --num-executors 30 --executor-cores 3 --executor-memory 10g
-                 spark-rumble-1.6.2.jar
+                 spark-rumble-1.6.4.jar
                  --shell yes --result-size 10000
 
 ## Creation functions
@@ -59,15 +59,15 @@ Note that by default only the first 1000 items in the output will be displayed o
 Rumble also supports executing a single query from the command line, reading from HDFS and outputting the results to HDFS, with the query file being either local or on HDFS. For this, use the --query-path, --output-path and --log-path parameters.
 
     spark-submit --num-executors 30 --executor-cores 3 --executor-memory 10g
-                 spark-rumble-1.6.2.jar
+                 spark-rumble-1.6.4.jar
                  --query-path "hdfs:///user/me/query.jq"
                  --output-path "hdfs:///user/me/results/output"
                  --log-path "hdfs:///user/me/logging/mylog"
 
 The query path, output path and log path can be any of the supported schemes (HDFS, file, S3, WASB...) and can be relative or absolute.
 
     spark-submit --num-executors 30 --executor-cores 3 --executor-memory 10g
-                 spark-rumble-1.6.2.jar
+                 spark-rumble-1.6.4.jar
                  --query-path "/home/me/my-local-machine/query.jq"
                  --output-path "/user/me/results/output"
                  --log-path "hdfs:///user/me/logging/mylog"

diff --git a/docs/install.md b/docs/install.md
@@ -64,7 +64,7 @@ After successful completion, you can check the `target` directory, which should
 
 The most straightforward to test if the above steps were successful is to run the Rumble shell locally, like so:
 
-    $ spark-submit target/spark-rumble-1.6.2.jar --shell yes
+    $ spark-submit target/spark-rumble-1.6.4.jar --shell yes
 
 The Rumble shell should start:
 

diff --git a/pom.xml b/pom.xml
@@ -26,7 +26,7 @@
 
     <groupId>com.github.rumbledb</groupId>
     <artifactId>spark-rumble</artifactId>
-    <version>1.6.2</version>
+    <version>1.6.4</version>
     <packaging>jar</packaging>
     <name>Rumble</name>
     <description>A JSONiq engine to query large-scale JSON datasets stored on HDFS. Spark under the hood.</description>

diff --git a/src/main/java/org/rumbledb/api/Item.java b/src/main/java/org/rumbledb/api/Item.java
@@ -22,6 +22,7 @@
 
 import org.joda.time.DateTime;
 import org.joda.time.Period;
+import org.rumbledb.context.Name;
 import org.rumbledb.exceptions.ExceptionMetadata;
 import org.rumbledb.exceptions.IteratorFlowException;
 import org.rumbledb.exceptions.OurBadException;
@@ -620,7 +621,7 @@ public FunctionIdentifier getIdentifier() {
         throw new UnsupportedOperationException("Operation not defined");
     }
 
-    public List<String> getParameterNames() {
+    public List<Name> getParameterNames() {
         throw new UnsupportedOperationException("Operation not defined");
     }
 

diff --git a/src/main/java/org/rumbledb/api/Rumble.java b/src/main/java/org/rumbledb/api/Rumble.java
@@ -1,18 +1,8 @@
 package org.rumbledb.api;
 
-import org.antlr.v4.runtime.BailErrorStrategy;
-import org.antlr.v4.runtime.CharStream;
-import org.antlr.v4.runtime.CharStreams;
-import org.antlr.v4.runtime.CommonTokenStream;
-import org.antlr.v4.runtime.misc.ParseCancellationException;
-import org.rumbledb.compiler.TranslationVisitor;
 import org.rumbledb.compiler.VisitorHelpers;
 import org.rumbledb.config.RumbleRuntimeConfiguration;
-import org.rumbledb.exceptions.ExceptionMetadata;
-import org.rumbledb.exceptions.ParsingException;
 import org.rumbledb.expressions.module.MainModule;
-import org.rumbledb.parser.JsoniqLexer;
-import org.rumbledb.parser.JsoniqParser;
 import org.rumbledb.runtime.RuntimeIterator;
 import sparksoniq.spark.SparkSessionManager;
 
@@ -47,31 +37,10 @@ public Rumble(RumbleConf conf) {
      * @return the resulting sequence as an ItemIterator.
      */
     public SequenceOfItems runQuery(String query) {
-        CharStream charStream = CharStreams.fromString(query);
-        JsoniqLexer lexer = new JsoniqLexer(charStream);
-        JsoniqParser parser = new JsoniqParser(new CommonTokenStream(lexer));
-        parser.setErrorHandler(new BailErrorStrategy());
-        TranslationVisitor visitor = new TranslationVisitor();
-        MainModule mainModule = null;
-        try {
-            // TODO Handle module extras
-            JsoniqParser.ModuleContext module = parser.module();
-            JsoniqParser.MainModuleContext main = module.main;
-            mainModule = (MainModule) visitor.visit(main);
-        } catch (ParseCancellationException ex) {
-            ParsingException e = new ParsingException(
-                    lexer.getText(),
-                    new ExceptionMetadata(
-                            lexer.getLine(),
-                            lexer.getCharPositionInLine()
-                    )
-            );
-            e.initCause(ex);
-            throw e;
-        }
-        VisitorHelpers.resolveDependencies(mainModule, RumbleRuntimeConfiguration.getDefaultConfiguration());
-        VisitorHelpers.populateStaticContext(mainModule);
-
+        MainModule mainModule = VisitorHelpers.parseMainModuleFromQuery(
+            query,
+            RumbleRuntimeConfiguration.getDefaultConfiguration()
+        );
         RuntimeIterator iterator = VisitorHelpers.generateRuntimeIterator(mainModule);
         return new SequenceOfItems(iterator);
     }