From afa8ea6dac14e271136bd1d2b2ab17bc28297c67 Mon Sep 17 00:00:00 2001 From: Oscar Westra van Holthe - Kind Date: Tue, 19 Sep 2023 17:13:08 +0200 Subject: [PATCH] AVRO-3404: Add IDL syntax for schema definitions (#1589) Add schema syntax to the IDL syntax. This allows a full schema file (.avsc) equivalent: the IDL can now define any named or unnamed schema to be returned by the parser. The IDL documentation also includes examples. --- .../docs/++version++/IDL Language/_index.md | 164 +++++++++++++----- .../docs/++version++/Specification/_index.md | 6 +- .../java/org/apache/avro/idl/IdlFile.java | 21 ++- .../java/org/apache/avro/idl/IdlReader.java | 25 ++- .../org/apache/avro/idl/SchemaResolver.java | 13 +- .../idl/src/test/idl/extra/schemaSyntax.avdl | 8 + .../test/idl/input/schema_syntax_schema.avdl | 48 +++++ .../idl/src/test/idl/input/status_schema.avdl | 3 + .../src/test/idl/output/schema_syntax.avsc | 36 ++++ lang/java/idl/src/test/idl/output/status.avsc | 9 + .../org/apache/avro/idl/IdlReaderTest.java | 17 ++ .../java/org/apache/avro/mojo/IDLMojo.java | 130 ++++++++++++++ .../org/apache/avro/mojo/IDLProtocolMojo.java | 84 +-------- .../src/test/avro/AvdlClasspathImport.avdl | 16 +- ...tIDLProtocolMojo.java => TestIDLMojo.java} | 15 +- .../java/org/apache/avro/tool/IdlTool.java | 13 +- lang/java/tools/src/test/idl/schema.avdl | 36 ++++ lang/java/tools/src/test/idl/schema.avsc | 36 ++++ .../org/apache/avro/tool/TestIdlTool.java | 16 ++ share/idl_grammar/org/apache/avro/idl/Idl.g4 | 11 +- 20 files changed, 552 insertions(+), 155 deletions(-) create mode 100644 lang/java/idl/src/test/idl/extra/schemaSyntax.avdl create mode 100644 lang/java/idl/src/test/idl/input/schema_syntax_schema.avdl create mode 100644 lang/java/idl/src/test/idl/input/status_schema.avdl create mode 100644 lang/java/idl/src/test/idl/output/schema_syntax.avsc create mode 100644 lang/java/idl/src/test/idl/output/status.avsc create mode 100644 lang/java/maven-plugin/src/main/java/org/apache/avro/mojo/IDLMojo.java rename lang/java/maven-plugin/src/test/java/org/apache/avro/mojo/{TestIDLProtocolMojo.java => TestIDLMojo.java} (82%) create mode 100644 lang/java/tools/src/test/idl/schema.avdl create mode 100644 lang/java/tools/src/test/idl/schema.avsc diff --git a/doc/content/en/docs/++version++/IDL Language/_index.md b/doc/content/en/docs/++version++/IDL Language/_index.md index 8fe3d0592ac..be6375005da 100644 --- a/doc/content/en/docs/++version++/IDL Language/_index.md +++ b/doc/content/en/docs/++version++/IDL Language/_index.md @@ -34,7 +34,7 @@ This document defines Avro IDL, a higher-level language for authoring Avro schem The aim of the Avro IDL language is to enable developers to author schemata in a way that feels more similar to common programming languages like Java, C++, or Python. Additionally, the Avro IDL language may feel more familiar for those users who have previously used the interface description languages (IDLs) in other frameworks like Thrift, Protocol Buffers, or CORBA. ### Usage -Each Avro IDL file defines a single Avro Protocol, and thus generates as its output a JSON-format Avro Protocol file with extension .avpr. +Each Avro IDL file defines either a single Avro Protocol, or an Avro Schema with supporting named schemata in a namespace. When parsed, it thus yields either a Protocol or a Schema. These can be respectively written to JSON-format Avro Protocol files with extension .avpr or JSON-format Avro Schema files with extension .avsc. To convert a _.avdl_ file into a _.avpr_ file, it may be processed by the `idl` tool. For example: ```shell @@ -44,6 +44,16 @@ $ head /tmp/namespaces.avpr "protocol" : "TestNamespace", "namespace" : "avro.test.protocol", ``` +To convert a _.avdl_ file into a _.avsc_ file, it may be processed by the `idl` tool too. For example: +```shell +$ java -jar avro-tools.jar idl src/test/idl/input/schema_syntax_schema.avdl /tmp/schema_syntax.avsc +$ head /tmp/schema_syntax.avsc +{ + "type": "array", + "items": { + "type": "record", + "name": "StatusUpdate", +``` The `idl` tool can also process input to and from _stdin_ and _stdout_. See `idl --help` for full usage information. A Maven plugin is also provided to compile .avdl files. To use it, add something like the following to your pom.xml: @@ -56,7 +66,7 @@ A Maven plugin is also provided to compile .avdl files. To use it, add something - idl-protocol + idl @@ -65,6 +75,48 @@ A Maven plugin is also provided to compile .avdl files. To use it, add something ``` +## Defining a Schema in Avro IDL +An Avro IDL file consists of exactly one (main) schema definition. The minimal schema is defined by the following code: +```java +schema int; +``` +This is equivalent to (and generates) the following JSON schema definition: +```json +{ + "type": "int" +} +``` +More complex schemata can also be defined, for example by adding named schemata like this: +```java +namespace default.namespace.for.named.schemata; +schema Message; + +record Message { + string? title = null; + string message; +} +``` +This is equivalent to (and generates) the following JSON schema definition: +```json +{ + "type" : "record", + "name" : "Message", + "namespace" : "default.namespace.for.named.schemata", + "fields" : [ { + "name" : "title", + "type" : [ "null", "string" ], + "default": null + }, { + "name" : "message", + "type" : "string" + } ] +} +``` +Schemata in Avro IDL can contain the following items: + +* Imports of external protocol and schema files (only named schemata are imported). +* Definitions of named schemata, including records, errors, enums, and fixeds. + ## Defining a Protocol in Avro IDL An Avro IDL file consists of exactly one protocol definition. The minimal protocol is defined by the following code: ```java @@ -109,7 +161,7 @@ Files may be imported in one of three formats: `import schema "foo.avsc";` -Messages and types in the imported file are added to this file's protocol. +When importing into an IDL schema file, only (named) types are imported into this file. When importing into an IDL protocol, messages are imported into the protocol as well. Imported file names are resolved relative to the current IDL file. @@ -135,7 +187,7 @@ Fixed fields are defined using the following syntax: ``` fixed MD5(16); ``` -This example defines a fixed-length type called MD5 which contains 16 bytes. +This example defines a fixed-length type called MD5, which contains 16 bytes. ## Defining Records and Errors Records are defined in Avro IDL using a syntax similar to a struct definition in C: @@ -161,19 +213,20 @@ A type reference in Avro IDL must be one of: * A primitive type * A logical type -* A named schema defined prior to this usage in the same Protocol +* A named schema (either defined or imported) * A complex type (array, map, or union) ### Primitive Types The primitive types supported by Avro IDL are the same as those supported by Avro's JSON format. This list includes _int_, _long_, _string_, _boolean_, _float_, _double_, _null_, and _bytes_. ### Logical Types -Some of the logical types supported by Avro's JSON format are also supported by Avro IDL. The currently supported types are: +Some of the logical types supported by Avro's JSON format are directly supported by Avro IDL. The currently supported types are: * _decimal_ (logical type [decimal]({{< relref "specification#decimal" >}})) * _date_ (logical type [date]({{< relref "specification#date" >}})) * _time_ms_ (logical type [time-millis]({{< relref "specification#time-millisecond-precision" >}})) * _timestamp_ms_ (logical type [timestamp-millis]({{< relref "specification#timestamp-millisecond-precision" >}})) +* _local_timestamp_ms_ (logical type [local-timestamp-millis]({{< relref "specification#local_timestamp_ms" >}})) * _uuid_ (logical type [uuid]({{< relref "specification#uuid" >}})) For example: @@ -226,23 +279,25 @@ record RecordWithUnion { union { decimal(12, 6), float } number; } ``` -Note that the same restrictions apply to Avro IDL unions as apply to unions defined in the JSON format; namely, a record may not contain multiple elements of the same type. Also, fields/parameters that use the union type and have a default parameter must specify a default value of the same type as the **first** union type. +Note that the same restrictions apply to Avro IDL unions as apply to unions defined in the JSON format; namely, a union may not contain multiple elements of the same type. Also, fields/parameters that use the union type and have a default parameter must specify a default value of the same type as the **first** union type. -Because it occurs so often, there is a special shorthand to denote a union of `null` with another type. In the following snippet, the first three fields have identical types: +Because it occurs so often, there is a special shorthand to denote a union of `null` with one other schema. The first three fields in the following snippet have identical schemata, as do the last two fields: ```java record RecordWithUnion { union { null, string } optionalString1 = null; string? optionalString2 = null; string? optionalString3; // No default value - string? optionalString4 = "something"; + + union { string, null } optionalString4 = "something"; + string? optionalString5 = "something else"; } ``` -Note that unlike explicit unions, the position of the `null` type is fluid; it will be the first or last type depending on the default value (if any). So in the example above, all fields are valid. +Note that unlike explicit unions, the position of the `null` type is fluid; it will be the first or last type depending on the default value (if any). So all fields are valid in the example above. ## Defining RPC Messages -The syntax to define an RPC message within a Avro IDL protocol is similar to the syntax for a method declaration within a C header file or a Java interface. To define an RPC message add which takes two arguments named _foo_ and _bar_, returning an _int_, simply include the following definition within the protocol: +The syntax to define an RPC message within a Avro IDL protocol is similar to the syntax for a method declaration within a C header file or a Java interface. To define an RPC message _add_ which takes two arguments named _foo_ and _bar_, returning an _int_, simply include the following definition within the protocol: ```java int add(int foo, int bar = 0); ``` @@ -252,7 +307,7 @@ To define a message with no response, you may use the alias _void_, equivalent t ```java void logMessage(string message); ``` -If you have previously defined an error type within the same protocol, you may declare that a message can throw this error using the syntax: +If you have defined or imported an error type within the same protocol, you may declare that a message can throw this error using the syntax: ```java void goKaboom() throws Kaboom; ``` @@ -263,20 +318,22 @@ void fireAndForget(string message) oneway; ## Other Language Features -### Comments +### Comments and documentation All Java-style comments are supported within a Avro IDL file. Any text following _//_ on a line is ignored, as is any text between _/*_ and _*/_, possibly spanning multiple lines. Comments that begin with _/**_ are used as the documentation string for the type or field definition that follows the comment. ### Escaping Identifiers -Occasionally, one will need to use a reserved language keyword as an identifier. In order to do so, backticks (`) may be used to escape the identifier. For example, to define a message with the literal name error, you may write: +Occasionally, one may want to distinguish between identifiers and languages keywords. In order to do so, backticks (`) may be used to escape +the identifier. For example, to define a message with the literal name error, you may write: ```java void `error`(); ``` This syntax is allowed anywhere an identifier is expected. ### Annotations for Ordering and Namespaces -Java-style annotations may be used to add additional properties to types and fields throughout Avro IDL. +Java-style annotations may be used to add additional properties to types and fields throughout Avro IDL. These can be custom properties, or +special properties as used in the JSON-format Avro Schema and Protocol files. For example, to specify the sort order of a field within a record, one may use the `@order` annotation before the field name as follows: ```java @@ -319,46 +376,64 @@ record MyRecord { string @aliases(["oldField", "ancientField"]) myNewField; } ``` -Some annotations like those listed above are handled specially. All other annotations are added as properties to the protocol, message, schema or field. +Some annotations like those listed above are handled specially. All other annotations are added as properties to the protocol, message, schema or field. You can use any identifier or series of identifiers separated by dots and/or dashes as property name. ## Complete Example -The following is an example of an Avro IDL file that shows most of the above features: +The following is an example of two Avro IDL files that together show most of the above features: + +### schema.avdl ```java /* -* Header with license information. -*/ - -/** - * An example protocol in Avro IDL + * Header with license information. */ -@namespace("org.apache.avro.test") -protocol Simple { - /** Documentation for the enum type Kind */ - @aliases(["org.foo.KindOf"]) - enum Kind { - FOO, - BAR, // the bar enum value - BAZ - } = FOO; // For schema evolution purposes, unmatched values do not throw an error, but are resolved to FOO. +// Optional default namespace (if absent, the default namespace is the null namespace). +namespace org.apache.avro.test; +// Optional main schema definition; if used, the IDL file is equivalent to a .avsc file. +schema TestRecord; + +/** Documentation for the enum type Kind */ +@aliases(["org.foo.KindOf"]) +enum Kind { + FOO, + BAR, // the bar enum value + BAZ +} = FOO; // For schema evolution purposes, unmatched values do not throw an error, but are resolved to FOO. + +/** MD5 hash; good enough to avoid most collisions, and smaller than (for example) SHA256. */ +fixed MD5(16); - /** MD5 hash; good enough to avoid most collisions, and smaller than (for example) SHA256. */ - fixed MD5(16); +record TestRecord { + /** Record name; has no intrinsic order */ + string @order("ignore") name; - record TestRecord { - /** Record name; has no intrinsic order */ - string @order("ignore") name; + Kind @order("descending") kind; - Kind @order("descending") kind; + MD5 hash; - MD5 hash; + /* + Note that 'null' is the first union type. Just like .avsc / .avpr files, the default value must be of the first union type. + */ + union { null, MD5 } /** Optional field */ @aliases(["hash"]) nullableHash = null; + // Shorthand syntax; the null in this union is placed based on the default value (or first is there's no default). + MD5? anotherNullableHash = null; - /* - Note that 'null' is the first union type. Just like .avsc / .avpr files, the default value must be of the first union type. - */ - union { null, MD5 } /** Optional field */ @aliases(["hash"]) nullableHash = null; + array arrayOfLongs; +} +``` - array arrayOfLongs; - } +### protocol.avdl +```java +/* + * Header with license information. + */ + +/** + * An example protocol in Avro IDL + */ +@namespace("org.apache.avro.test") +protocol Simple { + // Import the example file above + import idl "schema.avdl"; /** Errors are records that can be thrown from a method */ error TestError { @@ -375,6 +450,7 @@ protocol Simple { void ping() oneway; } ``` + Additional examples may be found in the Avro source tree under the `src/test/idl/input` directory. ## IDE support diff --git a/doc/content/en/docs/++version++/Specification/_index.md b/doc/content/en/docs/++version++/Specification/_index.md index 4772b001086..9761cdc2922 100755 --- a/doc/content/en/docs/++version++/Specification/_index.md +++ b/doc/content/en/docs/++version++/Specification/_index.md @@ -812,7 +812,7 @@ The following schema represents a date: } ``` -### Time (millisecond precision) +### Time (millisecond precision) {#time_ms} The `time-millis` logical type represents a time of day, with no reference to a particular calendar, time zone or date, with a precision of one millisecond. A `time-millis` logical type annotates an Avro `int`, where the int stores the number of milliseconds after midnight, 00:00:00.000. @@ -822,7 +822,7 @@ The `time-micros` logical type represents a time of day, with no reference to a A `time-micros` logical type annotates an Avro `long`, where the long stores the number of microseconds after midnight, 00:00:00.000000. -### Timestamp (millisecond precision) +### Timestamp (millisecond precision) {#timestamp_ms} The `timestamp-millis` logical type represents an instant on the global timeline, independent of a particular time zone or calendar, with a precision of one millisecond. Please note that time zone information gets lost in this process. Upon reading a value back, we can only reconstruct the instant, but not the original representation. In practice, such timestamps are typically displayed to users in their local time zones, therefore they may be displayed differently depending on the execution environment. A `timestamp-millis` logical type annotates an Avro `long`, where the long stores the number of milliseconds from the unix epoch, 1 January 1970 00:00:00.000 UTC. @@ -832,7 +832,7 @@ The `timestamp-micros` logical type represents an instant on the global timeline A `timestamp-micros` logical type annotates an Avro `long`, where the long stores the number of microseconds from the unix epoch, 1 January 1970 00:00:00.000000 UTC. -### Local timestamp (millisecond precision) +### Local timestamp (millisecond precision) {#local_timestamp_ms} The `local-timestamp-millis` logical type represents a timestamp in a local timezone, regardless of what specific time zone is considered local, with a precision of one millisecond. A `local-timestamp-millis` logical type annotates an Avro `long`, where the long stores the number of milliseconds, from 1 January 1970 00:00:00.000. diff --git a/lang/java/idl/src/main/java/org/apache/avro/idl/IdlFile.java b/lang/java/idl/src/main/java/org/apache/avro/idl/IdlFile.java index 56627b5821b..b3777c9f790 100644 --- a/lang/java/idl/src/main/java/org/apache/avro/idl/IdlFile.java +++ b/lang/java/idl/src/main/java/org/apache/avro/idl/IdlFile.java @@ -32,25 +32,39 @@ * the protocol containing the schemas. */ public class IdlFile { + private final Schema mainSchema; private final Protocol protocol; private final String namespace; private final Map namedSchemas; private final List warnings; IdlFile(Protocol protocol, List warnings) { - this(protocol.getNamespace(), protocol.getTypes(), protocol, warnings); + this(protocol.getNamespace(), protocol.getTypes(), null, protocol, warnings); } - private IdlFile(String namespace, Iterable schemas, Protocol protocol, List warnings) { + IdlFile(String namespace, Schema mainSchema, Iterable schemas, List warnings) { + this(namespace, schemas, mainSchema, null, warnings); + } + + private IdlFile(String namespace, Iterable schemas, Schema mainSchema, Protocol protocol, + List warnings) { this.namespace = namespace; this.namedSchemas = new LinkedHashMap<>(); for (Schema namedSchema : schemas) { this.namedSchemas.put(namedSchema.getFullName(), namedSchema); } + this.mainSchema = mainSchema; this.protocol = protocol; this.warnings = Collections.unmodifiableList(new ArrayList<>(warnings)); } + /** + * The (main) schema defined by the IDL file. + */ + public Schema getMainSchema() { + return mainSchema; + } + /** * The protocol defined by the IDL file. */ @@ -106,6 +120,9 @@ String outputString() { if (protocol != null) { return protocol.toString(); } + if (mainSchema != null) { + return mainSchema.toString(); + } if (namedSchemas.isEmpty()) { return "[]"; } else { diff --git a/lang/java/idl/src/main/java/org/apache/avro/idl/IdlReader.java b/lang/java/idl/src/main/java/org/apache/avro/idl/IdlReader.java index ec9f698819a..f2419f5f551 100644 --- a/lang/java/idl/src/main/java/org/apache/avro/idl/IdlReader.java +++ b/lang/java/idl/src/main/java/org/apache/avro/idl/IdlReader.java @@ -57,6 +57,7 @@ import org.apache.avro.idl.IdlParser.JsonValueContext; import org.apache.avro.idl.IdlParser.MapTypeContext; import org.apache.avro.idl.IdlParser.MessageDeclarationContext; +import org.apache.avro.idl.IdlParser.NamespaceDeclarationContext; import org.apache.avro.idl.IdlParser.NullableTypeContext; import org.apache.avro.idl.IdlParser.PrimitiveTypeContext; import org.apache.avro.idl.IdlParser.ProtocolDeclarationBodyContext; @@ -259,6 +260,7 @@ private class IdlParserListener extends IdlBaseListener { private final List warnings; private IdlFile result; + private Schema mainSchema; private Protocol protocol; private final Deque namespaces; private final List enumSymbols; @@ -278,6 +280,7 @@ public IdlParserListener(URI inputDir, CommonTokenStream tokenStream) { warnings = new ArrayList<>(); result = null; + mainSchema = null; protocol = null; namespaces = new ArrayDeque<>(); enumSymbols = new ArrayList<>(); @@ -335,8 +338,8 @@ private void pushNamespace(String namespace) { } private String currentNamespace() { - String namespace = namespaces.element(); - return namespace.isEmpty() ? null : namespace; + String namespace = namespaces.peek(); + return namespace == null || namespace.isEmpty() ? null : namespace; } private void popNamespace() { @@ -345,7 +348,12 @@ private void popNamespace() { @Override public void exitIdlFile(IdlFileContext ctx) { - IdlFile unresolved = new IdlFile(protocol, warnings); + IdlFile unresolved; + if (protocol == null) { + unresolved = new IdlFile(currentNamespace(), mainSchema, getTypes().values(), warnings); + } else { + unresolved = new IdlFile(protocol, warnings); + } result = SchemaResolver.resolve(unresolved, OPTIONAL_NULLABLE_TYPE_PROPERTY); } @@ -375,6 +383,17 @@ public void exitProtocolDeclaration(ProtocolDeclarationContext ctx) { popNamespace(); } + @Override + public void exitNamespaceDeclaration(NamespaceDeclarationContext ctx) { + pushNamespace(namespace("", identifier(ctx.namespace))); + } + + @Override + public void exitMainSchemaDeclaration(IdlParser.MainSchemaDeclarationContext ctx) { + mainSchema = typeStack.pop(); + assert typeStack.isEmpty(); + } + @Override public void enterSchemaProperty(SchemaPropertyContext ctx) { assert jsonValues.isEmpty(); diff --git a/lang/java/idl/src/main/java/org/apache/avro/idl/SchemaResolver.java b/lang/java/idl/src/main/java/org/apache/avro/idl/SchemaResolver.java index 315f32221ba..8c9a9c15b99 100644 --- a/lang/java/idl/src/main/java/org/apache/avro/idl/SchemaResolver.java +++ b/lang/java/idl/src/main/java/org/apache/avro/idl/SchemaResolver.java @@ -22,7 +22,9 @@ import org.apache.avro.Schema; import java.util.Collections; +import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.concurrent.atomic.AtomicInteger; import java.util.function.Function; import java.util.stream.Collectors; @@ -100,7 +102,16 @@ static boolean isFullyResolvedSchema(final Schema schema) { * @return a copy of idlFile with all schemas resolved */ static IdlFile resolve(final IdlFile idlFile, String... schemaPropertiesToRemove) { - return new IdlFile(resolve(idlFile.getProtocol(), schemaPropertiesToRemove), idlFile.getWarnings()); + if (idlFile.getProtocol() != null) { + return new IdlFile(resolve(idlFile.getProtocol(), schemaPropertiesToRemove), idlFile.getWarnings()); + } + + ResolvingVisitor visitor = new ResolvingVisitor(null, idlFile::getNamedSchema, schemaPropertiesToRemove); + Function resolver = schema -> Schemas.visit(schema, visitor.withRoot(schema)); + + List namedSchemata = idlFile.getNamedSchemas().values().stream().map(resolver).collect(Collectors.toList()); + Schema mainSchema = Optional.ofNullable(idlFile.getMainSchema()).map(resolver).orElse(null); + return new IdlFile(idlFile.getNamespace(), mainSchema, namedSchemata, idlFile.getWarnings()); } /** diff --git a/lang/java/idl/src/test/idl/extra/schemaSyntax.avdl b/lang/java/idl/src/test/idl/extra/schemaSyntax.avdl new file mode 100644 index 00000000000..1d88ba6e43b --- /dev/null +++ b/lang/java/idl/src/test/idl/extra/schemaSyntax.avdl @@ -0,0 +1,8 @@ +namespace communication; + +schema array; + +record Message { + string? title; + string message; +} diff --git a/lang/java/idl/src/test/idl/input/schema_syntax_schema.avdl b/lang/java/idl/src/test/idl/input/schema_syntax_schema.avdl new file mode 100644 index 00000000000..1df43f7a656 --- /dev/null +++ b/lang/java/idl/src/test/idl/input/schema_syntax_schema.avdl @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: the correct extension for this new syntax is also '.avdl'. The test + * files end with '_schema.avdl' only to distinguish them from .avdl files using + * the protocol syntax, because the result is a schema file instead of a protocol file. + */ +namespace monitoring; +schema array; + +record StatusUpdate { + /** + * The moment of the status change. + */ + timestamp_ms timestamp; + /** + * The process whose status changed. + */ + string processName; + /** + * The new status of the process. + */ + Status newStatus; + /** + * A description why this status change occurred (optional). + */ + string? description; +} + +import idl "status_schema.avdl"; +import schema "foo.avsc"; +import protocol "bar.avpr"; diff --git a/lang/java/idl/src/test/idl/input/status_schema.avdl b/lang/java/idl/src/test/idl/input/status_schema.avdl new file mode 100644 index 00000000000..504218a4fcb --- /dev/null +++ b/lang/java/idl/src/test/idl/input/status_schema.avdl @@ -0,0 +1,3 @@ +enum Status { + UNKNOWN, NEW, STARTUP, RUNNING, TERMINATING, SHUTDOWN, CRASHED +} = UNKNOWN; diff --git a/lang/java/idl/src/test/idl/output/schema_syntax.avsc b/lang/java/idl/src/test/idl/output/schema_syntax.avsc new file mode 100644 index 00000000000..06042446188 --- /dev/null +++ b/lang/java/idl/src/test/idl/output/schema_syntax.avsc @@ -0,0 +1,36 @@ +{ + "type": "array", + "items": { + "type": "record", + "name": "StatusUpdate", + "namespace": "monitoring", + "fields": [ + { + "name": "timestamp", + "type": { + "type": "long", + "logicalType": "timestamp-millis" + }, + "doc": "The moment of the status change." + }, { + "name": "processName", + "type": "string", + "doc": "The process whose status changed." + }, { + "name": "newStatus", + "type": { + "type": "enum", + "name": "Status", + "namespace": "system", + "symbols": ["UNKNOWN", "NEW", "STARTUP", "RUNNING", "TERMINATING", "SHUTDOWN", "CRASHED"], + "default": "UNKNOWN" + }, + "doc": "The new status of the process." + }, { + "name": "description", + "type": ["null", "string"], + "doc": "A description why this status change occurred (optional)." + } + ] + } +} diff --git a/lang/java/idl/src/test/idl/output/status.avsc b/lang/java/idl/src/test/idl/output/status.avsc new file mode 100644 index 00000000000..82710b84137 --- /dev/null +++ b/lang/java/idl/src/test/idl/output/status.avsc @@ -0,0 +1,9 @@ +[ + { + "type": "enum", + "name": "Status", + "namespace": "system", + "symbols": [ "UNKNOWN", "NEW", "STARTUP", "RUNNING", "TERMINATING", "SHUTDOWN", "CRASHED" ], + "default": "UNKNOWN" + } +] diff --git a/lang/java/idl/src/test/java/org/apache/avro/idl/IdlReaderTest.java b/lang/java/idl/src/test/java/org/apache/avro/idl/IdlReaderTest.java index e609e1ff865..8e9f187f4ce 100644 --- a/lang/java/idl/src/test/java/org/apache/avro/idl/IdlReaderTest.java +++ b/lang/java/idl/src/test/java/org/apache/avro/idl/IdlReaderTest.java @@ -100,6 +100,23 @@ public void validateProtocolParsingResult() throws IOException { assertNotNull(idlFile.getNamedSchema("Message")); assertNotNull(idlFile.getProtocol()); + assertNull(idlFile.getMainSchema()); + } + + @Test + public void validateSchemaParsingResult() throws IOException { + // runTests already tests the actual parsing; this tests the result object. + IdlFile idlFile = parseExtraIdlFile("schemaSyntax.avdl"); + + assertEquals(1, idlFile.getNamedSchemas().size()); + idlFile.getNamedSchemas().keySet().forEach(System.out::println); + assertNotNull(idlFile.getNamedSchema("communication.Message")); + assertNotNull(idlFile.getNamedSchema("Message")); + + assertNull(idlFile.getProtocol()); + Schema mainSchema = idlFile.getMainSchema(); + assertEquals(Schema.Type.ARRAY, mainSchema.getType()); + assertEquals(idlFile.getNamedSchema("Message"), mainSchema.getElementType()); } @Test diff --git a/lang/java/maven-plugin/src/main/java/org/apache/avro/mojo/IDLMojo.java b/lang/java/maven-plugin/src/main/java/org/apache/avro/mojo/IDLMojo.java new file mode 100644 index 00000000000..15f6a6c0c4e --- /dev/null +++ b/lang/java/maven-plugin/src/main/java/org/apache/avro/mojo/IDLMojo.java @@ -0,0 +1,130 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.avro.mojo; + +import java.io.File; +import java.io.IOException; +import java.net.URL; +import java.net.URLClassLoader; +import java.util.ArrayList; +import java.util.List; + +import org.apache.avro.Protocol; +import org.apache.avro.compiler.specific.SpecificCompiler; +import org.apache.avro.generic.GenericData; + +import org.apache.avro.idl.IdlFile; +import org.apache.avro.idl.IdlReader; +import org.apache.maven.artifact.DependencyResolutionRequiredException; + +/** + * Generate Java classes and interfaces from AvroIDL files (.avdl) + * + * @goal idl + * @requiresDependencyResolution runtime + * @phase generate-sources + * @threadSafe + */ +public class IDLMojo extends AbstractAvroMojo { + /** + * A set of Ant-like inclusion patterns used to select files from the source + * directory for processing. By default, the pattern **/*.avdl + * is used to select IDL files. + * + * @parameter + */ + private String[] includes = new String[] { "**/*.avdl" }; + + /** + * A set of Ant-like inclusion patterns used to select files from the source + * directory for processing. By default, the pattern **/*.avdl + * is used to select IDL files. + * + * @parameter + */ + private String[] testIncludes = new String[] { "**/*.avdl" }; + + @Override + protected void doCompile(String filename, File sourceDirectory, File outputDirectory) throws IOException { + try { + @SuppressWarnings("rawtypes") + List runtimeClasspathElements = project.getRuntimeClasspathElements(); + + List runtimeUrls = new ArrayList<>(); + + // Add the source directory of avro files to the classpath so that + // imports can refer to other idl files as classpath resources + runtimeUrls.add(sourceDirectory.toURI().toURL()); + + // If runtimeClasspathElements is not empty values add its values to Idl path. + if (runtimeClasspathElements != null && !runtimeClasspathElements.isEmpty()) { + for (Object runtimeClasspathElement : runtimeClasspathElements) { + String element = (String) runtimeClasspathElement; + runtimeUrls.add(new File(element).toURI().toURL()); + } + } + + final ClassLoader contextClassLoader = Thread.currentThread().getContextClassLoader(); + URLClassLoader projPathLoader = new URLClassLoader(runtimeUrls.toArray(new URL[0]), contextClassLoader); + Thread.currentThread().setContextClassLoader(projPathLoader); + try { + IdlReader parser = new IdlReader(); + IdlFile idlFile = parser.parse(sourceDirectory.toPath().resolve(filename)); + for (String warning : idlFile.getWarnings()) { + getLog().warn(warning); + } + final SpecificCompiler compiler; + final Protocol protocol = idlFile.getProtocol(); + if (protocol != null) { + compiler = new SpecificCompiler(protocol); + } else { + compiler = new SpecificCompiler(idlFile.getNamedSchemas().values()); + } + compiler.setStringType(GenericData.StringType.valueOf(stringType)); + compiler.setTemplateDir(templateDirectory); + compiler.setFieldVisibility(getFieldVisibility()); + compiler.setCreateOptionalGetters(createOptionalGetters); + compiler.setGettersReturnOptional(gettersReturnOptional); + compiler.setOptionalGettersForNullableFieldsOnly(optionalGettersForNullableFieldsOnly); + compiler.setCreateSetters(createSetters); + compiler.setAdditionalVelocityTools(instantiateAdditionalVelocityTools()); + compiler.setEnableDecimalLogicalType(enableDecimalLogicalType); + for (String customConversion : customConversions) { + compiler.addCustomConversion(projPathLoader.loadClass(customConversion)); + } + compiler.setOutputCharacterEncoding(project.getProperties().getProperty("project.build.sourceEncoding")); + compiler.compileToDestination(null, outputDirectory); + } finally { + Thread.currentThread().setContextClassLoader(contextClassLoader); + } + } catch (ClassNotFoundException | DependencyResolutionRequiredException e) { + throw new IOException(e); + } + } + + @Override + protected String[] getIncludes() { + return includes; + } + + @Override + protected String[] getTestIncludes() { + return testIncludes; + } +} diff --git a/lang/java/maven-plugin/src/main/java/org/apache/avro/mojo/IDLProtocolMojo.java b/lang/java/maven-plugin/src/main/java/org/apache/avro/mojo/IDLProtocolMojo.java index eceaff88ded..a6dd9cf24ee 100644 --- a/lang/java/maven-plugin/src/main/java/org/apache/avro/mojo/IDLProtocolMojo.java +++ b/lang/java/maven-plugin/src/main/java/org/apache/avro/mojo/IDLProtocolMojo.java @@ -18,17 +18,6 @@ package org.apache.avro.mojo; -import org.apache.avro.Protocol; -import org.apache.avro.compiler.idl.Idl; -import org.apache.avro.compiler.idl.ParseException; -import org.apache.avro.idl.IdlFile; -import org.apache.avro.idl.IdlReader; - -import java.io.File; -import java.io.IOException; -import java.net.URL; -import java.net.URLClassLoader; - /** * Generate Java classes and interfaces from AvroIDL files (.avdl) * @@ -37,75 +26,6 @@ * @phase generate-sources * @threadSafe */ -public class IDLProtocolMojo extends AbstractAvroMojo { - /** - * Use the classic JavaCC parser for .avdl files. If - * false (the default), use the new ANTLR parser instead. - * - * @parameter - */ - private boolean useJavaCC = false; - - /** - * A set of Ant-like inclusion patterns used to select files from the source - * directory for processing. By default, the pattern **/*.avdl - * is used to select IDL files. - * - * @parameter - */ - private String[] includes = new String[] { "**/*.avdl" }; - - /** - * A set of Ant-like inclusion patterns used to select files from the source - * directory for processing. By default, the pattern **/*.avdl - * is used to select IDL files. - * - * @parameter - */ - private String[] testIncludes = new String[] { "**/*.avdl" }; - - @Override - protected void doCompile(String filename, File sourceDirectory, File outputDirectory) throws IOException { - File sourceFile = new File(sourceDirectory, filename); - - ClassLoader contextClassLoader = Thread.currentThread().getContextClassLoader(); - URL[] extraClasspath = new URL[] { sourceDirectory.toURI().toURL() }; - ClassLoader classLoader = new URLClassLoader(extraClasspath, contextClassLoader); - - Protocol protocol; - if (useJavaCC) { - try (Idl idl = new Idl(sourceFile, classLoader)) { - final Protocol p = idl.CompilationUnit(); - String json = p.toString(true); - protocol = Protocol.parse(json); - } catch (ParseException e) { - throw new IOException(e); - } - } else { - try { - Thread.currentThread().setContextClassLoader(classLoader); - - IdlReader parser = new IdlReader(); - IdlFile idlFile = parser.parse(sourceFile.toPath()); - for (String warning : idlFile.getWarnings()) { - getLog().warn(warning); - } - protocol = idlFile.getProtocol(); - } finally { - Thread.currentThread().setContextClassLoader(contextClassLoader); - } - } - - doCompile(sourceFile, protocol, outputDirectory); - } - - @Override - protected String[] getIncludes() { - return includes; - } - - @Override - protected String[] getTestIncludes() { - return testIncludes; - } +public class IDLProtocolMojo extends IDLMojo { + // Empty; kept for backwards compatibility. } diff --git a/lang/java/maven-plugin/src/test/avro/AvdlClasspathImport.avdl b/lang/java/maven-plugin/src/test/avro/AvdlClasspathImport.avdl index fd799d3dda5..81bdb609445 100644 --- a/lang/java/maven-plugin/src/test/avro/AvdlClasspathImport.avdl +++ b/lang/java/maven-plugin/src/test/avro/AvdlClasspathImport.avdl @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -15,14 +15,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -@namespace("test") -protocol IdlClasspathImportTest { - import idl "avro/User.avdl"; +namespace test; - /** Ignored Doc Comment */ - /** IDL User */ - record IdlUserWrapper { - union { null, test.IdlUser } wrapped; - } +import idl "avro/User.avdl"; +/** Ignored Doc Comment */ +/** IDL User */ +record IdlUserWrapper { + union { null, test.IdlUser } wrapped; } diff --git a/lang/java/maven-plugin/src/test/java/org/apache/avro/mojo/TestIDLProtocolMojo.java b/lang/java/maven-plugin/src/test/java/org/apache/avro/mojo/TestIDLMojo.java similarity index 82% rename from lang/java/maven-plugin/src/test/java/org/apache/avro/mojo/TestIDLProtocolMojo.java rename to lang/java/maven-plugin/src/test/java/org/apache/avro/mojo/TestIDLMojo.java index c8cc22316e4..94cc5b29e52 100644 --- a/lang/java/maven-plugin/src/test/java/org/apache/avro/mojo/TestIDLProtocolMojo.java +++ b/lang/java/maven-plugin/src/test/java/org/apache/avro/mojo/TestIDLMojo.java @@ -19,6 +19,7 @@ import java.io.File; import java.util.Collections; +import java.util.Collections; import java.util.HashSet; import java.util.Set; @@ -30,7 +31,7 @@ /** * Test the IDL Protocol Mojo. */ -public class TestIDLProtocolMojo extends AbstractAvroMojoTest { +public class TestIDLMojo extends AbstractAvroMojoTest { private File testPom = new File(getBasedir(), "src/test/resources/unit/idl/pom.xml"); private File injectingVelocityToolsTestPom = new File(getBasedir(), @@ -38,7 +39,7 @@ public class TestIDLProtocolMojo extends AbstractAvroMojoTest { @Test public void testIdlProtocolMojo() throws Exception { - final IDLProtocolMojo mojo = (IDLProtocolMojo) lookupMojo("idl-protocol", testPom); + final IDLMojo mojo = (IDLMojo) lookupMojo("idl", testPom); final TestLog log = new TestLog(); mojo.setLog(log); @@ -46,14 +47,14 @@ public void testIdlProtocolMojo() throws Exception { mojo.execute(); final File outputDir = new File(getBasedir(), "target/test-harness/idl/test/"); - final Set generatedFiles = new HashSet<>(asList("IdlPrivacy.java", "IdlTest.java", "IdlUser.java", - "IdlUserWrapper.java", "IdlClasspathImportTest.java")); + final Set generatedFiles = new HashSet<>( + asList("IdlPrivacy.java", "IdlTest.java", "IdlUser.java", "IdlUserWrapper.java")); assertFilesExist(outputDir, generatedFiles); final String idlUserContent = FileUtils.fileRead(new File(outputDir, "IdlUser.java")); assertTrue(idlUserContent.contains("java.time.Instant")); - assertEquals(Collections.singletonList("[WARN] Line 22, char 5: Ignoring out-of-place documentation comment.\n" + assertEquals(Collections.singletonList("[WARN] Line 22, char 1: Ignoring out-of-place documentation comment.\n" + "Did you mean to use a multiline comment ( /* ... */ ) instead?"), log.getLogEntries()); } @@ -67,8 +68,8 @@ public void testSetCompilerVelocityAdditionalTools() throws Exception { mojo.execute(); final File outputDir = new File(getBasedir(), "target/test-harness/idl-inject/test"); - final Set generatedFiles = new HashSet<>(asList("IdlPrivacy.java", "IdlTest.java", "IdlUser.java", - "IdlUserWrapper.java", "IdlClasspathImportTest.java")); + final Set generatedFiles = new HashSet<>( + asList("IdlPrivacy.java", "IdlTest.java", "IdlUser.java", "IdlUserWrapper.java")); assertFilesExist(outputDir, generatedFiles); diff --git a/lang/java/tools/src/main/java/org/apache/avro/tool/IdlTool.java b/lang/java/tools/src/main/java/org/apache/avro/tool/IdlTool.java index dfdaac966c8..d20226b8e77 100644 --- a/lang/java/tools/src/main/java/org/apache/avro/tool/IdlTool.java +++ b/lang/java/tools/src/main/java/org/apache/avro/tool/IdlTool.java @@ -19,6 +19,7 @@ package org.apache.avro.tool; import org.apache.avro.Protocol; +import org.apache.avro.Schema; import org.apache.avro.compiler.idl.Idl; import org.apache.avro.idl.IdlFile; import org.apache.avro.idl.IdlReader; @@ -54,7 +55,8 @@ public int run(InputStream in, PrintStream out, PrintStream err, List ar String outputName = getArg(args, useJavaCC ? 2 : 1, "-"); File outputFile = "-".equals(outputName) ? null : new File(outputName); - Protocol p; + Schema m = null; + Protocol p = null; if (useJavaCC) { try (Idl parser = new Idl(inputFile)) { p = parser.CompilationUnit(); @@ -69,6 +71,7 @@ public int run(InputStream in, PrintStream out, PrintStream err, List ar err.println("Warning: " + warning); } p = idlFile.getProtocol(); + m = idlFile.getMainSchema(); } PrintStream parseOut = out; @@ -76,8 +79,12 @@ public int run(InputStream in, PrintStream out, PrintStream err, List ar parseOut = new PrintStream(new FileOutputStream(outputFile)); } + if (m == null && p == null) { + err.println("Error: the IDL file does not contain a schema nor a protocol."); + return 1; + } try { - parseOut.print(p.toString(true)); + parseOut.print(m == null ? p.toString(true) : m.toString(true)); } finally { if (parseOut != out) // Close only the newly created FileOutputStream parseOut.close(); @@ -100,6 +107,6 @@ public String getName() { @Override public String getShortDescription() { - return "Generates a JSON protocol from an Avro IDL file"; + return "Generates a JSON schema or protocol from an Avro IDL file"; } } diff --git a/lang/java/tools/src/test/idl/schema.avdl b/lang/java/tools/src/test/idl/schema.avdl new file mode 100644 index 00000000000..312bd5d9ac5 --- /dev/null +++ b/lang/java/tools/src/test/idl/schema.avdl @@ -0,0 +1,36 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +namespace org.apache.avro.test; +schema TestRecord; + +record TestRecord { + string @order("ignore") name; + Kind @order("descending") kind; + MD5 hash; + MD5? @aliases(["hash"]) nullableHash; + array arrayOfLongs; +} + +@aliases(["org.foo.KindOf"]) +enum Kind { + FOO, + BAR, // the bar enum value + BAZ +} + +fixed MD5(16); diff --git a/lang/java/tools/src/test/idl/schema.avsc b/lang/java/tools/src/test/idl/schema.avsc new file mode 100644 index 00000000000..aa34f5b694b --- /dev/null +++ b/lang/java/tools/src/test/idl/schema.avsc @@ -0,0 +1,36 @@ +{ + "type" : "record", + "name" : "TestRecord", + "namespace" : "org.apache.avro.test", + "fields" : [ { + "name" : "name", + "type" : "string", + "order" : "ignore" + }, { + "name" : "kind", + "type" : { + "type" : "enum", + "name" : "Kind", + "symbols" : [ "FOO", "BAR", "BAZ" ], + "aliases" : [ "org.foo.KindOf" ] + }, + "order" : "descending" + }, { + "name" : "hash", + "type" : { + "type" : "fixed", + "name" : "MD5", + "size" : 16 + } + }, { + "name" : "nullableHash", + "type" : [ "null", "MD5" ], + "aliases" : [ "hash" ] + }, { + "name" : "arrayOfLongs", + "type" : { + "type" : "array", + "items" : "long" + } + } ] +} diff --git a/lang/java/tools/src/test/java/org/apache/avro/tool/TestIdlTool.java b/lang/java/tools/src/test/java/org/apache/avro/tool/TestIdlTool.java index 4cfe6eddaf4..136344bc15a 100644 --- a/lang/java/tools/src/test/java/org/apache/avro/tool/TestIdlTool.java +++ b/lang/java/tools/src/test/java/org/apache/avro/tool/TestIdlTool.java @@ -34,6 +34,22 @@ import java.util.stream.Collectors; public class TestIdlTool { + @Test + public void testWriteIdlAsSchema() throws Exception { + String idl = "src/test/idl/schema.avdl"; + String protocol = "src/test/idl/schema.avsc"; + String outfile = "target/test-schema.avsc"; + + ByteArrayOutputStream buffer = new ByteArrayOutputStream(); + List arglist = Arrays.asList(idl, outfile); + new IdlTool().run(null, null, new PrintStream(buffer), arglist); + + assertEquals(readFileAsString(protocol), readFileAsString(outfile)); + + String warnings = readPrintStreamBuffer(buffer); + assertEquals("Warning: Line 1, char 1: Ignoring out-of-place documentation comment." + + "\nDid you mean to use a multiline comment ( /* ... */ ) instead?", warnings); + } @Test void writeIdlAsProtocol() throws Exception { diff --git a/share/idl_grammar/org/apache/avro/idl/Idl.g4 b/share/idl_grammar/org/apache/avro/idl/Idl.g4 index 01017a26184..7572e9cc33b 100644 --- a/share/idl_grammar/org/apache/avro/idl/Idl.g4 +++ b/share/idl_grammar/org/apache/avro/idl/Idl.g4 @@ -38,18 +38,26 @@ grammar Idl; // \u001a is the ascii 'sub'(stitute) character, used as end-of-file marker in older systems. It was also used as "end of character stream". // Thus, accept it at end of the input and ignore anything that comes after it. (See: https://en.wikipedia.org/wiki/Substitute_character) -idlFile: protocol=protocolDeclaration ('\u001a' .*?)? EOF; +idlFile: ( + protocol=protocolDeclaration | + namespace=namespaceDeclaration? mainSchema=mainSchemaDeclaration? (imports+=importStatement|namedSchemas+=namedSchemaDeclaration)* +) ('\u001a' .*?)? EOF; protocolDeclaration: (doc=DocComment)? schemaProperties+=schemaProperty* Protocol name=identifier body=protocolDeclarationBody; protocolDeclarationBody : LBrace (imports+=importStatement|namedSchemas+=namedSchemaDeclaration|messages+=messageDeclaration)* RBrace ; +namespaceDeclaration: Namespace namespace=identifier Semicolon; + +mainSchemaDeclaration: Schema mainSchema=fullType Semicolon; + /** * The parser accepts anything that's not a symbol as an identifier. That is, it accepts both an IdentifierToken and all keywords. Which * identifiers are actually allowed is context dependent and decided when transforming the parse tree. */ identifier: word=(IdentifierToken | Protocol + | Namespace | Import | IDL | Schema @@ -164,6 +172,7 @@ WS: [ \t\n\r\f] -> skip; ** Simple tokens */ Protocol: 'protocol'; +Namespace: 'namespace'; Import: 'import'; IDL: 'idl'; Schema: 'schema';