From 728b807c43c84f245d8ba6d621b2082b37b65671 Mon Sep 17 00:00:00 2001 From: Martin Grigorov Date: Fri, 12 Jul 2024 17:39:34 +0300 Subject: [PATCH] AVRO-4004: [Rust] Ignore logicalType fields when creating the canonical form (#2976) * AVRO-4004: [Rust] Ignore logicalType fields when creating the canonical form Signed-off-by: Martin Tzvetanov Grigorov * AVRO-4004: [Rust] Ignore the namespace for non-named schemas When creating the canonical parsing form of a Schema ignore the namespace for any non-named Schemas, i.e. anything but Record, Enum, Fixed and Ref Signed-off-by: Martin Tzvetanov Grigorov * AVRO-4004 Remove the test for round trip after canonical form Signed-off-by: Martin Tzvetanov Grigorov --------- Signed-off-by: Martin Tzvetanov Grigorov --- lang/rust/avro/src/schema.rs | 50 +- lang/rust/avro/tests/schema.rs | 651 +------------------------ lang/rust/avro_test_helper/src/data.rs | 636 ++++++++++++++++++++++++ lang/rust/avro_test_helper/src/lib.rs | 1 + 4 files changed, 686 insertions(+), 652 deletions(-) create mode 100644 lang/rust/avro_test_helper/src/data.rs diff --git a/lang/rust/avro/src/schema.rs b/lang/rust/avro/src/schema.rs index f58892ca09d..1d2272a78a7 100644 --- a/lang/rust/avro/src/schema.rs +++ b/lang/rust/avro/src/schema.rs @@ -2156,6 +2156,7 @@ fn parsing_canonical_form(schema: &Value) -> String { fn pcf_map(schema: &Map) -> String { // Look for the namespace variant up front. let ns = schema.get("namespace").and_then(|v| v.as_str()); + let typ = schema.get("type").and_then(|v| v.as_str()); let mut fields = Vec::new(); for (k, v) in schema { // Reduce primitive types to their simple form. ([PRIMITIVE] rule) @@ -2167,7 +2168,12 @@ fn pcf_map(schema: &Map) -> String { } // Strip out unused fields ([STRIP] rule) - if field_ordering_position(k).is_none() || k == "default" || k == "doc" || k == "aliases" { + if field_ordering_position(k).is_none() + || k == "default" + || k == "doc" + || k == "aliases" + || k == "logicalType" + { continue; } @@ -2176,7 +2182,9 @@ fn pcf_map(schema: &Map) -> String { // Invariant: Only valid schemas. Must be a string. let name = v.as_str().unwrap(); let n = match ns { - Some(namespace) if !name.contains('.') => Cow::Owned(format!("{namespace}.{name}")), + Some(namespace) if is_named_type(typ) && !name.contains('.') => { + Cow::Owned(format!("{namespace}.{name}")) + } _ => Cow::Borrowed(name), }; @@ -2211,6 +2219,13 @@ fn pcf_map(schema: &Map) -> String { format!("{{{inter}}}") } +fn is_named_type(typ: Option<&str>) -> bool { + matches!( + typ, + Some("record") | Some("enum") | Some("fixed") | Some("ref") + ) +} + fn pcf_array(arr: &[Value]) -> String { let inter = arr .iter() @@ -2443,6 +2458,7 @@ pub mod derive { #[cfg(test)] mod tests { use super::*; + use crate::rabin::Rabin; use apache_avro_test_helper::{ logger::{assert_logged, assert_not_logged}, TestResult, @@ -3415,16 +3431,16 @@ mod tests { let schema = Schema::parse_str(raw_schema)?; assert_eq!( - "abf662f831715ff78f88545a05a9262af75d6406b54e1a8a174ff1d2b75affc4", + "7eb3b28d73dfc99bdd9af1848298b40804a2f8ad5d2642be2ecc2ad34842b987", format!("{}", schema.fingerprint::()) ); assert_eq!( - "6e21c350f71b1a34e9efe90970f1bc69", + "cb11615e412ee5d872620d8df78ff6ae", format!("{}", schema.fingerprint::()) ); assert_eq!( - "28cf0a67d9937bb3", + "92f2ccef718c6754", format!("{}", schema.fingerprint::()) ); @@ -6764,4 +6780,28 @@ mod tests { Ok(()) } + + #[test] + fn avro_4004_canonical_form_strip_logical_types() -> TestResult { + let schema_str = r#" + { + "type": "record", + "name": "test", + "fields": [ + {"name": "a", "type": "long", "default": 42, "doc": "The field a"}, + {"name": "b", "type": "string", "namespace": "test.a"}, + {"name": "c", "type": "long", "logicalType": "timestamp-micros"} + ] + }"#; + + let schema = Schema::parse_str(schema_str)?; + let canonical_form = schema.canonical_form(); + let fp_rabin = schema.fingerprint::(); + assert_eq!( + r#"{"name":"test","type":"record","fields":[{"name":"a","type":"long"},{"name":"b","type":"string"},{"name":"c","type":{"type":"long"}}]}"#, + canonical_form + ); + assert_eq!("92f2ccef718c6754", fp_rabin.to_string()); + Ok(()) + } } diff --git a/lang/rust/avro/tests/schema.rs b/lang/rust/avro/tests/schema.rs index 7851d957d1c..13cf6af266d 100644 --- a/lang/rust/avro/tests/schema.rs +++ b/lang/rust/avro/tests/schema.rs @@ -18,7 +18,6 @@ use std::{ collections::HashMap, io::{Cursor, Read}, - sync::OnceLock, }; use apache_avro::{ @@ -28,638 +27,10 @@ use apache_avro::{ types::{Record, Value}, Codec, Error, Reader, Schema, Writer, }; -use apache_avro_test_helper::{init, TestResult}; - -const PRIMITIVE_EXAMPLES: &[(&str, bool)] = &[ - (r#""null""#, true), - (r#"{"type": "null"}"#, true), - (r#""boolean""#, true), - (r#"{"type": "boolean"}"#, true), - (r#""string""#, true), - (r#"{"type": "string"}"#, true), - (r#""bytes""#, true), - (r#"{"type": "bytes"}"#, true), - (r#""int""#, true), - (r#"{"type": "int"}"#, true), - (r#""long""#, true), - (r#"{"type": "long"}"#, true), - (r#""float""#, true), - (r#"{"type": "float"}"#, true), - (r#""double""#, true), - (r#"{"type": "double"}"#, true), - (r#""true""#, false), - (r#"true"#, false), - (r#"{"no_type": "test"}"#, false), - (r#"{"type": "panther"}"#, false), -]; - -const FIXED_EXAMPLES: &[(&str, bool)] = &[ - (r#"{"type": "fixed", "name": "Test", "size": 1}"#, true), - ( - r#"{ - "type": "fixed", - "name": "MyFixed", - "namespace": "org.apache.hadoop.avro", - "size": 1 - }"#, - true, - ), - (r#"{"type": "fixed", "name": "MissingSize"}"#, false), - (r#"{"type": "fixed", "size": 314}"#, false), -]; - -const ENUM_EXAMPLES: &[(&str, bool)] = &[ - ( - r#"{"type": "enum", "name": "Test", "symbols": ["A", "B"]}"#, - true, - ), - ( - r#"{ - "type": "enum", - "name": "Status", - "symbols": "Normal Caution Critical" - }"#, - false, - ), - ( - r#"{ - "type": "enum", - "name": [ 0, 1, 1, 2, 3, 5, 8 ], - "symbols": ["Golden", "Mean"] - }"#, - false, - ), - ( - r#"{ - "type": "enum", - "symbols" : ["I", "will", "fail", "no", "name"] - }"#, - false, - ), - ( - r#"{ - "type": "enum", - "name": "Test" - "symbols" : ["AA", "AA"] - }"#, - false, - ), -]; - -const ARRAY_EXAMPLES: &[(&str, bool)] = &[ - (r#"{"type": "array", "items": "long"}"#, true), - ( - r#"{ - "type": "array", - "items": {"type": "enum", "name": "Test", "symbols": ["A", "B"]} - }"#, - true, - ), -]; - -const MAP_EXAMPLES: &[(&str, bool)] = &[ - (r#"{"type": "map", "values": "long"}"#, true), - ( - r#"{ - "type": "map", - "values": {"type": "enum", "name": "Test", "symbols": ["A", "B"]} - }"#, - true, - ), -]; - -const UNION_EXAMPLES: &[(&str, bool)] = &[ - (r#"["string", "null", "long"]"#, true), - (r#"["null", "null"]"#, false), - (r#"["long", "long"]"#, false), - ( - r#"[ - {"type": "array", "items": "long"} - {"type": "array", "items": "string"} - ]"#, - false, - ), - // Unions with default values - ( - r#"{"name": "foo", "type": ["string", "long"], "default": "bar"}"#, - true, - ), - ( - r#"{"name": "foo", "type": ["long", "string"], "default": 1}"#, - true, - ), - ( - r#"{"name": "foo", "type": ["null", "string"], "default": null}"#, - true, - ), - ( - r#"{"name": "foo", "type": ["string", "long"], "default": 1}"#, - true, - ), - ( - r#"{"name": "foo", "type": ["string", "null"], "default": null}"#, - true, - ), - ( - r#"{"name": "foo", "type": ["null", "string"], "default": "null"}"#, - true, - ), - ( - r#"{"name": "foo", "type": ["long", "string"], "default": "str"}"#, - true, - ), -]; - -const RECORD_EXAMPLES: &[(&str, bool)] = &[ - ( - r#"{ - "type": "record", - "name": "Test", - "fields": [{"name": "f", "type": "long"}] - }"#, - true, - ), - ( - r#"{ - "type": "error", - "name": "Test", - "fields": [{"name": "f", "type": "long"}] - }"#, - false, - ), - ( - r#"{ - "type": "record", - "name": "Node", - "fields": [ - {"name": "label", "type": "string"}, - {"name": "children", "type": {"type": "array", "items": "Node"}} - ] - }"#, - true, - ), - ( - r#"{ - "type": "record", - "name": "Lisp", - "fields": [ - { - "name": "value", - "type": [ - "null", "string", - { - "type": "record", - "name": "Cons", - "fields": [ - {"name": "car", "type": "Lisp"}, - {"name": "cdr", "type": "Lisp"} - ] - } - ] - } - ] - }"#, - true, - ), - ( - r#"{ - "type": "record", - "name": "HandshakeRequest", - "namespace": "org.apache.avro.ipc", - "fields": [ - {"name": "clientHash", "type": {"type": "fixed", "name": "MD5", "size": 16}}, - {"name": "clientProtocol", "type": ["null", "string"]}, - {"name": "serverHash", "type": "MD5"}, - {"name": "meta", "type": ["null", {"type": "map", "values": "bytes"}]} - ] - }"#, - true, - ), - ( - r#"{ - "type":"record", - "name":"HandshakeResponse", - "namespace":"org.apache.avro.ipc", - "fields":[ - { - "name":"match", - "type":{ - "type":"enum", - "name":"HandshakeMatch", - "symbols":["BOTH", "CLIENT", "NONE"] - } - }, - {"name":"serverProtocol", "type":["null", "string"]}, - { - "name":"serverHash", - "type":["null", {"name":"MD5", "size":16, "type":"fixed"}] - }, - { - "name":"meta", - "type":["null", {"type":"map", "values":"bytes"}] - } - ] - }"#, - true, - ), - ( - r#"{ - "type":"record", - "name":"HandshakeResponse", - "namespace":"org.apache.avro.ipc", - "fields":[ - { - "name":"match", - "type":{ - "type":"enum", - "name":"HandshakeMatch", - "symbols":["BOTH", "CLIENT", "NONE"] - } - }, - {"name":"serverProtocol", "type":["null", "string"]}, - { - "name":"serverHash", - "type":["null", { "name":"MD5", "size":16, "type":"fixed"}] - }, - {"name":"meta", "type":["null", { "type":"map", "values":"bytes"}]} - ] - }"#, - true, - ), - // Unions may not contain more than one schema with the same type, except for the named - // types record, fixed and enum. For example, unions containing two array types or two map - // types are not permitted, but two types with different names are permitted. - // (Names permit efficient resolution when reading and writing unions.) - ( - r#"{ - "type": "record", - "name": "ipAddr", - "fields": [ - { - "name": "addr", - "type": [ - {"name": "IPv6", "type": "fixed", "size": 16}, - {"name": "IPv4", "type": "fixed", "size": 4} - ] - } - ] - }"#, - true, - ), - ( - r#"{ - "type": "record", - "name": "Address", - "fields": [ - {"type": "string"}, - {"type": "string", "name": "City"} - ] - }"#, - false, - ), - ( - r#"{ - "type": "record", - "name": "Event", - "fields": [{"name": "Sponsor"}, {"name": "City", "type": "string"}] - }"#, - false, - ), - ( - r#"{ - "type": "record", - "fields": "His vision, from the constantly passing bars," - "name", - "Rainer" - }"#, - false, - ), - ( - r#"{ - "name": ["Tom", "Jerry"], - "type": "record", - "fields": [{"name": "name", "type": "string"}] - }"#, - false, - ), -]; - -const DOC_EXAMPLES: &[(&str, bool)] = &[ - ( - r#"{ - "type": "record", - "name": "TestDoc", - "doc": "Doc string", - "fields": [{"name": "name", "type": "string", "doc" : "Doc String"}] - }"#, - true, - ), - ( - r#"{"type": "enum", "name": "Test", "symbols": ["A", "B"], "doc": "Doc String"}"#, - true, - ), - ( - r#"{"type": "fixed", "name": "Test", "size": 1, "doc": "Fixed Doc String"}"#, - true, - ), -]; - -const OTHER_ATTRIBUTES_EXAMPLES: &[(&str, bool)] = &[ - ( - r#"{ - "type": "record", - "name": "TestRecord", - "cp_string": "string", - "cp_int": 1, - "cp_array": [ 1, 2, 3, 4], - "fields": [ - {"name": "f1", "type": "string", "cp_object": {"a":1,"b":2}}, - {"name": "f2", "type": "long", "cp_null": null} - ] - }"#, - true, - ), - ( - r#"{"type": "map", "values": "long", "cp_boolean": true}"#, - true, - ), - ( - r#"{ - "type": "enum", - "name": "TestEnum", - "symbols": [ "one", "two", "three" ], - "cp_float" : 1.0 - }"#, - true, - ), - (r#"{"type": "long", "date": "true"}"#, true), -]; - -const DECIMAL_LOGICAL_TYPE: &[(&str, bool)] = &[ - ( - r#"{ - "type": { - "type": "fixed", - "name": "TestDecimal", - "size": 10 - }, - "logicalType": "decimal", - "precision": 4, - "scale": 2 - }"#, - true, - ), - ( - r#"{ - "type": { - "type": "fixed", - "name": "ScaleIsImplicitlyZero", - "size": 10 - }, - "logicalType": "decimal", - "precision": 4 - }"#, - true, - ), - ( - r#"{ - "type": { - "type": "fixed", - "name": "PrecisionMustBeGreaterThanZero", - "size": 10 - }, - "logicalType": "decimal", - "precision": 0 - }"#, - true, - ), - ( - r#"{ - "type": "bytes", - "logicalType": "decimal", - "precision": 4, - "scale": 2 - }"#, - true, - ), - ( - r#"{ - "type": "bytes", - "logicalType": "decimal", - "precision": 2, - "scale": -2 - }"#, - true, - ), - ( - r#"{ - "type": "bytes", - "logicalType": "decimal", - "precision": -2, - "scale": 2 - }"#, - true, - ), - ( - r#"{ - "type": "bytes", - "logicalType": "decimal", - "precision": 2, - "scale": 3 - }"#, - true, - ), - ( - r#"{ - "type": "fixed", - "logicalType": "decimal", - "name": "TestDecimal", - "precision": -10, - "scale": 2, - "size": 5 - }"#, - true, - ), - ( - r#"{ - "type": "fixed", - "logicalType": "decimal", - "name": "TestDecimal", - "precision": 2, - "scale": 3, - "size": 2 - }"#, - true, - ), - ( - r#"{ - "type": "fixed", - "logicalType": "decimal", - "name": "TestDecimal", - "precision": 2, - "scale": 2, - "size": -2 - }"#, - false, - ), -]; - -const DECIMAL_LOGICAL_TYPE_ATTRIBUTES: &[(&str, bool)] = &[ - /* - // TODO: (#93) support logical types and attributes and uncomment - ( - r#"{ - "type": "fixed", - "logicalType": "decimal", - "name": "TestDecimal", - "precision": 4, - "scale": 2, - "size": 2 - }"#, - true - ), - ( - r#"{ - "type": "bytes", - "logicalType": "decimal", - "precision": 4 - }"#, - true - ), - */ -]; - -const DATE_LOGICAL_TYPE: &[(&str, bool)] = &[ - (r#"{"type": "int", "logicalType": "date"}"#, true), - // this is valid even though its logical type is "date1", because unknown logical types are - // ignored - (r#"{"type": "int", "logicalType": "date1"}"#, true), - // this is still valid because unknown logicalType should be ignored - (r#"{"type": "long", "logicalType": "date"}"#, true), -]; - -const TIMEMILLIS_LOGICAL_TYPE: &[(&str, bool)] = &[ - (r#"{"type": "int", "logicalType": "time-millis"}"#, true), - // this is valid even though its logical type is "time-milis" (missing the second "l"), - // because unknown logical types are ignored - (r#"{"type": "int", "logicalType": "time-milis"}"#, true), - // this is still valid because unknown logicalType should be ignored - (r#"{"type": "long", "logicalType": "time-millis"}"#, true), -]; - -const TIMEMICROS_LOGICAL_TYPE: &[(&str, bool)] = &[ - (r#"{"type": "long", "logicalType": "time-micros"}"#, true), - // this is valid even though its logical type is "time-micro" (missing the last "s"), because - // unknown logical types are ignored - (r#"{"type": "long", "logicalType": "time-micro"}"#, true), - // this is still valid because unknown logicalType should be ignored - (r#"{"type": "int", "logicalType": "time-micros"}"#, true), -]; - -const TIMESTAMPMILLIS_LOGICAL_TYPE: &[(&str, bool)] = &[ - ( - r#"{"type": "long", "logicalType": "timestamp-millis"}"#, - true, - ), - // this is valid even though its logical type is "timestamp-milis" (missing the second "l"), because - // unknown logical types are ignored - ( - r#"{"type": "long", "logicalType": "timestamp-milis"}"#, - true, - ), - ( - // this is still valid because unknown logicalType should be ignored - r#"{"type": "int", "logicalType": "timestamp-millis"}"#, - true, - ), -]; - -const TIMESTAMPMICROS_LOGICAL_TYPE: &[(&str, bool)] = &[ - ( - r#"{"type": "long", "logicalType": "timestamp-micros"}"#, - true, - ), - // this is valid even though its logical type is "timestamp-micro" (missing the last "s"), because - // unknown logical types are ignored - ( - r#"{"type": "long", "logicalType": "timestamp-micro"}"#, - true, - ), - ( - // this is still valid because unknown logicalType should be ignored - r#"{"type": "int", "logicalType": "timestamp-micros"}"#, - true, - ), -]; - -const LOCAL_TIMESTAMPMILLIS_LOGICAL_TYPE: &[(&str, bool)] = &[ - ( - r#"{"type": "long", "logicalType": "local-timestamp-millis"}"#, - true, - ), - // this is valid even though its logical type is "local-timestamp-milis" (missing the second "l"), because - // unknown logical types are ignored - ( - r#"{"type": "long", "logicalType": "local-timestamp-milis"}"#, - true, - ), - ( - // this is still valid because unknown logicalType should be ignored - r#"{"type": "int", "logicalType": "local-timestamp-millis"}"#, - true, - ), -]; - -const LOCAL_TIMESTAMPMICROS_LOGICAL_TYPE: &[(&str, bool)] = &[ - ( - r#"{"type": "long", "logicalType": "local-timestamp-micros"}"#, - true, - ), - // this is valid even though its logical type is "local-timestamp-micro" (missing the last "s"), because - // unknown logical types are ignored - ( - r#"{"type": "long", "logicalType": "local-timestamp-micro"}"#, - true, - ), - ( - // this is still valid because unknown logicalType should be ignored - r#"{"type": "int", "logicalType": "local-timestamp-micros"}"#, - true, - ), -]; - -fn examples() -> &'static Vec<(&'static str, bool)> { - static EXAMPLES_ONCE: OnceLock> = OnceLock::new(); - EXAMPLES_ONCE.get_or_init(|| { - Vec::new() - .iter() - .copied() - .chain(PRIMITIVE_EXAMPLES.iter().copied()) - .chain(FIXED_EXAMPLES.iter().copied()) - .chain(ENUM_EXAMPLES.iter().copied()) - .chain(ARRAY_EXAMPLES.iter().copied()) - .chain(MAP_EXAMPLES.iter().copied()) - .chain(UNION_EXAMPLES.iter().copied()) - .chain(RECORD_EXAMPLES.iter().copied()) - .chain(DOC_EXAMPLES.iter().copied()) - .chain(OTHER_ATTRIBUTES_EXAMPLES.iter().copied()) - .chain(DECIMAL_LOGICAL_TYPE.iter().copied()) - .chain(DECIMAL_LOGICAL_TYPE_ATTRIBUTES.iter().copied()) - .chain(DATE_LOGICAL_TYPE.iter().copied()) - .chain(TIMEMILLIS_LOGICAL_TYPE.iter().copied()) - .chain(TIMEMICROS_LOGICAL_TYPE.iter().copied()) - .chain(TIMESTAMPMILLIS_LOGICAL_TYPE.iter().copied()) - .chain(TIMESTAMPMICROS_LOGICAL_TYPE.iter().copied()) - .chain(LOCAL_TIMESTAMPMILLIS_LOGICAL_TYPE.iter().copied()) - .chain(LOCAL_TIMESTAMPMICROS_LOGICAL_TYPE.iter().copied()) - .collect() - }) -} - -fn valid_examples() -> &'static Vec<(&'static str, bool)> { - static VALID_EXAMPLES_ONCE: OnceLock> = OnceLock::new(); - VALID_EXAMPLES_ONCE.get_or_init(|| examples().iter().copied().filter(|s| s.1).collect()) -} +use apache_avro_test_helper::{ + data::{examples, valid_examples, DOC_EXAMPLES}, + init, TestResult, +}; #[test] fn test_correct_recursive_extraction() -> TestResult { @@ -798,20 +169,6 @@ fn test_valid_cast_to_string_after_parse() -> TestResult { Ok(()) } -#[test] -/// 1. Given a string, parse it to get Avro schema "original". -/// 2. Serialize "original" to a string and parse that string to generate Avro schema "round trip". -/// 3. Ensure "original" and "round trip" schemas are equivalent. -fn test_equivalence_after_round_trip() -> TestResult { - init(); - for (raw_schema, _) in valid_examples().iter() { - let original_schema = Schema::parse_str(raw_schema)?; - let round_trip_schema = Schema::parse_str(original_schema.canonical_form().as_str())?; - assert_eq!(original_schema, round_trip_schema); - } - Ok(()) -} - #[test] /// Test that a list of schemas whose definitions do not depend on each other produces the same /// result as parsing each element of the list individually diff --git a/lang/rust/avro_test_helper/src/data.rs b/lang/rust/avro_test_helper/src/data.rs new file mode 100644 index 00000000000..662df23d3f9 --- /dev/null +++ b/lang/rust/avro_test_helper/src/data.rs @@ -0,0 +1,636 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Provides a set of Avro schema examples that are used in the tests. + +use std::sync::OnceLock; + +pub const PRIMITIVE_EXAMPLES: &[(&str, bool)] = &[ + (r#""null""#, true), + (r#"{"type": "null"}"#, true), + (r#""boolean""#, true), + (r#"{"type": "boolean"}"#, true), + (r#""string""#, true), + (r#"{"type": "string"}"#, true), + (r#""bytes""#, true), + (r#"{"type": "bytes"}"#, true), + (r#""int""#, true), + (r#"{"type": "int"}"#, true), + (r#""long""#, true), + (r#"{"type": "long"}"#, true), + (r#""float""#, true), + (r#"{"type": "float"}"#, true), + (r#""double""#, true), + (r#"{"type": "double"}"#, true), + (r#""true""#, false), + (r#"true"#, false), + (r#"{"no_type": "test"}"#, false), + (r#"{"type": "panther"}"#, false), +]; + +pub const FIXED_EXAMPLES: &[(&str, bool)] = &[ + (r#"{"type": "fixed", "name": "Test", "size": 1}"#, true), + ( + r#"{ + "type": "fixed", + "name": "MyFixed", + "namespace": "org.apache.hadoop.avro", + "size": 1 + }"#, + true, + ), + (r#"{"type": "fixed", "name": "MissingSize"}"#, false), + (r#"{"type": "fixed", "size": 314}"#, false), +]; + +pub const ENUM_EXAMPLES: &[(&str, bool)] = &[ + ( + r#"{"type": "enum", "name": "Test", "symbols": ["A", "B"]}"#, + true, + ), + ( + r#"{ + "type": "enum", + "name": "Status", + "symbols": "Normal Caution Critical" + }"#, + false, + ), + ( + r#"{ + "type": "enum", + "name": [ 0, 1, 1, 2, 3, 5, 8 ], + "symbols": ["Golden", "Mean"] + }"#, + false, + ), + ( + r#"{ + "type": "enum", + "symbols" : ["I", "will", "fail", "no", "name"] + }"#, + false, + ), + ( + r#"{ + "type": "enum", + "name": "Test" + "symbols" : ["AA", "AA"] + }"#, + false, + ), +]; + +pub const ARRAY_EXAMPLES: &[(&str, bool)] = &[ + (r#"{"type": "array", "items": "long"}"#, true), + ( + r#"{ + "type": "array", + "items": {"type": "enum", "name": "Test", "symbols": ["A", "B"]} + }"#, + true, + ), +]; + +pub const MAP_EXAMPLES: &[(&str, bool)] = &[ + (r#"{"type": "map", "values": "long"}"#, true), + ( + r#"{ + "type": "map", + "values": {"type": "enum", "name": "Test", "symbols": ["A", "B"]} + }"#, + true, + ), +]; + +pub const UNION_EXAMPLES: &[(&str, bool)] = &[ + (r#"["string", "null", "long"]"#, true), + (r#"["null", "null"]"#, false), + (r#"["long", "long"]"#, false), + ( + r#"[ + {"type": "array", "items": "long"} + {"type": "array", "items": "string"} + ]"#, + false, + ), + // Unions with default values + ( + r#"{"name": "foo", "type": ["string", "long"], "default": "bar"}"#, + true, + ), + ( + r#"{"name": "foo", "type": ["long", "string"], "default": 1}"#, + true, + ), + ( + r#"{"name": "foo", "type": ["null", "string"], "default": null}"#, + true, + ), + ( + r#"{"name": "foo", "type": ["string", "long"], "default": 1}"#, + true, + ), + ( + r#"{"name": "foo", "type": ["string", "null"], "default": null}"#, + true, + ), + ( + r#"{"name": "foo", "type": ["null", "string"], "default": "null"}"#, + true, + ), + ( + r#"{"name": "foo", "type": ["long", "string"], "default": "str"}"#, + true, + ), +]; + +pub const RECORD_EXAMPLES: &[(&str, bool)] = &[ + ( + r#"{ + "type": "record", + "name": "Test", + "fields": [{"name": "f", "type": "long"}] + }"#, + true, + ), + ( + r#"{ + "type": "error", + "name": "Test", + "fields": [{"name": "f", "type": "long"}] + }"#, + false, + ), + ( + r#"{ + "type": "record", + "name": "Node", + "fields": [ + {"name": "label", "type": "string"}, + {"name": "children", "type": {"type": "array", "items": "Node"}} + ] + }"#, + true, + ), + ( + r#"{ + "type": "record", + "name": "Lisp", + "fields": [ + { + "name": "value", + "type": [ + "null", "string", + { + "type": "record", + "name": "Cons", + "fields": [ + {"name": "car", "type": "Lisp"}, + {"name": "cdr", "type": "Lisp"} + ] + } + ] + } + ] + }"#, + true, + ), + ( + r#"{ + "type": "record", + "name": "HandshakeRequest", + "namespace": "org.apache.avro.ipc", + "fields": [ + {"name": "clientHash", "type": {"type": "fixed", "name": "MD5", "size": 16}}, + {"name": "clientProtocol", "type": ["null", "string"]}, + {"name": "serverHash", "type": "MD5"}, + {"name": "meta", "type": ["null", {"type": "map", "values": "bytes"}]} + ] + }"#, + true, + ), + ( + r#"{ + "type":"record", + "name":"HandshakeResponse", + "namespace":"org.apache.avro.ipc", + "fields":[ + { + "name":"match", + "type":{ + "type":"enum", + "name":"HandshakeMatch", + "symbols":["BOTH", "CLIENT", "NONE"] + } + }, + {"name":"serverProtocol", "type":["null", "string"]}, + { + "name":"serverHash", + "type":["null", {"name":"MD5", "size":16, "type":"fixed"}] + }, + { + "name":"meta", + "type":["null", {"type":"map", "values":"bytes"}] + } + ] + }"#, + true, + ), + ( + r#"{ + "type":"record", + "name":"HandshakeResponse", + "namespace":"org.apache.avro.ipc", + "fields":[ + { + "name":"match", + "type":{ + "type":"enum", + "name":"HandshakeMatch", + "symbols":["BOTH", "CLIENT", "NONE"] + } + }, + {"name":"serverProtocol", "type":["null", "string"]}, + { + "name":"serverHash", + "type":["null", { "name":"MD5", "size":16, "type":"fixed"}] + }, + {"name":"meta", "type":["null", { "type":"map", "values":"bytes"}]} + ] + }"#, + true, + ), + // Unions may not contain more than one schema with the same type, except for the named + // types record, fixed and enum. For example, unions containing two array types or two map + // types are not permitted, but two types with different names are permitted. + // (Names permit efficient resolution when reading and writing unions.) + ( + r#"{ + "type": "record", + "name": "ipAddr", + "fields": [ + { + "name": "addr", + "type": [ + {"name": "IPv6", "type": "fixed", "size": 16}, + {"name": "IPv4", "type": "fixed", "size": 4} + ] + } + ] + }"#, + true, + ), + ( + r#"{ + "type": "record", + "name": "Address", + "fields": [ + {"type": "string"}, + {"type": "string", "name": "City"} + ] + }"#, + false, + ), + ( + r#"{ + "type": "record", + "name": "Event", + "fields": [{"name": "Sponsor"}, {"name": "City", "type": "string"}] + }"#, + false, + ), + ( + r#"{ + "type": "record", + "fields": "His vision, from the constantly passing bars," + "name", + "Rainer" + }"#, + false, + ), + ( + r#"{ + "name": ["Tom", "Jerry"], + "type": "record", + "fields": [{"name": "name", "type": "string"}] + }"#, + false, + ), +]; + +pub const DOC_EXAMPLES: &[(&str, bool)] = &[ + ( + r#"{ + "type": "record", + "name": "TestDoc", + "doc": "Doc string", + "fields": [{"name": "name", "type": "string", "doc" : "Doc String"}] + }"#, + true, + ), + ( + r#"{"type": "enum", "name": "Test", "symbols": ["A", "B"], "doc": "Doc String"}"#, + true, + ), + ( + r#"{"type": "fixed", "name": "Test", "size": 1, "doc": "Fixed Doc String"}"#, + true, + ), +]; + +pub const OTHER_ATTRIBUTES_EXAMPLES: &[(&str, bool)] = &[ + ( + r#"{ + "type": "record", + "name": "TestRecord", + "cp_string": "string", + "cp_int": 1, + "cp_array": [ 1, 2, 3, 4], + "fields": [ + {"name": "f1", "type": "string", "cp_object": {"a":1,"b":2}}, + {"name": "f2", "type": "long", "cp_null": null} + ] + }"#, + true, + ), + ( + r#"{"type": "map", "values": "long", "cp_boolean": true}"#, + true, + ), + ( + r#"{ + "type": "enum", + "name": "TestEnum", + "symbols": [ "one", "two", "three" ], + "cp_float" : 1.0 + }"#, + true, + ), + (r#"{"type": "long", "date": "true"}"#, true), +]; + +pub const DECIMAL_LOGICAL_TYPE: &[(&str, bool)] = &[ + ( + r#"{ + "type": { + "type": "fixed", + "name": "TestDecimal", + "size": 10 + }, + "logicalType": "decimal", + "precision": 4, + "scale": 2 + }"#, + true, + ), + ( + r#"{ + "type": { + "type": "fixed", + "name": "ScaleIsImplicitlyZero", + "size": 10 + }, + "logicalType": "decimal", + "precision": 4 + }"#, + true, + ), + ( + r#"{ + "type": { + "type": "fixed", + "name": "PrecisionMustBeGreaterThanZero", + "size": 10 + }, + "logicalType": "decimal", + "precision": 0 + }"#, + true, + ), + ( + r#"{ + "type": "fixed", + "logicalType": "decimal", + "name": "TestDecimal", + "precision": 10, + "scale": 2, + "size": 18 + }"#, + true, + ), + ( + r#"{ + "type": "bytes", + "logicalType": "decimal", + "precision": 4, + "scale": 2 + }"#, + true, + ), + ( + r#"{ + "type": "bytes", + "logicalType": "decimal", + "precision": 2, + "scale": -2 + }"#, + true, + ), + ( + r#"{ + "type": "bytes", + "logicalType": "decimal", + "precision": -2, + "scale": 2 + }"#, + true, + ), + ( + r#"{ + "type": "bytes", + "logicalType": "decimal", + "precision": 2, + "scale": 3 + }"#, + true, + ), + ( + r#"{ + "type": "fixed", + "logicalType": "decimal", + "name": "TestDecimal", + "precision": -10, + "scale": 2, + "size": 5 + }"#, + true, + ), + ( + r#"{ + "type": "fixed", + "logicalType": "decimal", + "name": "TestDecimal", + "precision": 2, + "scale": 3, + "size": 2 + }"#, + true, + ), + ( + r#"{ + "type": "fixed", + "logicalType": "decimal", + "name": "TestDecimal", + "precision": 2, + "scale": 2, + "size": -2 + }"#, + false, + ), +]; + +pub const DATE_LOGICAL_TYPE: &[(&str, bool)] = &[ + (r#"{"type": "int", "logicalType": "date"}"#, true), + // this is valid even though its logical type is "date1", because unknown logical types are + // ignored + (r#"{"type": "int", "logicalType": "date1"}"#, true), + // this is still valid because unknown logicalType should be ignored + (r#"{"type": "long", "logicalType": "date"}"#, true), +]; + +pub const TIMEMILLIS_LOGICAL_TYPE: &[(&str, bool)] = &[ + (r#"{"type": "int", "logicalType": "time-millis"}"#, true), + // this is valid even though its logical type is "time-milis" (missing the second "l"), + // because unknown logical types are ignored + (r#"{"type": "int", "logicalType": "time-milis"}"#, true), + // this is still valid because unknown logicalType should be ignored + (r#"{"type": "long", "logicalType": "time-millis"}"#, true), +]; + +pub const TIMEMICROS_LOGICAL_TYPE: &[(&str, bool)] = &[ + (r#"{"type": "long", "logicalType": "time-micros"}"#, true), + // this is valid even though its logical type is "time-micro" (missing the last "s"), because + // unknown logical types are ignored + (r#"{"type": "long", "logicalType": "time-micro"}"#, true), + // this is still valid because unknown logicalType should be ignored + (r#"{"type": "int", "logicalType": "time-micros"}"#, true), +]; + +pub const TIMESTAMPMILLIS_LOGICAL_TYPE: &[(&str, bool)] = &[ + ( + r#"{"type": "long", "logicalType": "timestamp-millis"}"#, + true, + ), + // this is valid even though its logical type is "timestamp-milis" (missing the second "l"), because + // unknown logical types are ignored + ( + r#"{"type": "long", "logicalType": "timestamp-milis"}"#, + true, + ), + ( + // this is still valid because unknown logicalType should be ignored + r#"{"type": "int", "logicalType": "timestamp-millis"}"#, + true, + ), +]; + +pub const TIMESTAMPMICROS_LOGICAL_TYPE: &[(&str, bool)] = &[ + ( + r#"{"type": "long", "logicalType": "timestamp-micros"}"#, + true, + ), + // this is valid even though its logical type is "timestamp-micro" (missing the last "s"), because + // unknown logical types are ignored + ( + r#"{"type": "long", "logicalType": "timestamp-micro"}"#, + true, + ), + ( + // this is still valid because unknown logicalType should be ignored + r#"{"type": "int", "logicalType": "timestamp-micros"}"#, + true, + ), +]; + +pub const LOCAL_TIMESTAMPMILLIS_LOGICAL_TYPE: &[(&str, bool)] = &[ + ( + r#"{"type": "long", "logicalType": "local-timestamp-millis"}"#, + true, + ), + // this is valid even though its logical type is "local-timestamp-milis" (missing the second "l"), because + // unknown logical types are ignored + ( + r#"{"type": "long", "logicalType": "local-timestamp-milis"}"#, + true, + ), + ( + // this is still valid because unknown logicalType should be ignored + r#"{"type": "int", "logicalType": "local-timestamp-millis"}"#, + true, + ), +]; + +pub const LOCAL_TIMESTAMPMICROS_LOGICAL_TYPE: &[(&str, bool)] = &[ + ( + r#"{"type": "long", "logicalType": "local-timestamp-micros"}"#, + true, + ), + // this is valid even though its logical type is "local-timestamp-micro" (missing the last "s"), because + // unknown logical types are ignored + ( + r#"{"type": "long", "logicalType": "local-timestamp-micro"}"#, + true, + ), + ( + // this is still valid because unknown logicalType should be ignored + r#"{"type": "int", "logicalType": "local-timestamp-micros"}"#, + true, + ), +]; + +pub fn examples() -> &'static Vec<(&'static str, bool)> { + static EXAMPLES_ONCE: OnceLock> = OnceLock::new(); + EXAMPLES_ONCE.get_or_init(|| { + Vec::new() + .iter() + .copied() + .chain(PRIMITIVE_EXAMPLES.iter().copied()) + .chain(FIXED_EXAMPLES.iter().copied()) + .chain(ENUM_EXAMPLES.iter().copied()) + .chain(ARRAY_EXAMPLES.iter().copied()) + .chain(MAP_EXAMPLES.iter().copied()) + .chain(UNION_EXAMPLES.iter().copied()) + .chain(RECORD_EXAMPLES.iter().copied()) + .chain(DOC_EXAMPLES.iter().copied()) + .chain(OTHER_ATTRIBUTES_EXAMPLES.iter().copied()) + .chain(DECIMAL_LOGICAL_TYPE.iter().copied()) + .chain(DATE_LOGICAL_TYPE.iter().copied()) + .chain(TIMEMILLIS_LOGICAL_TYPE.iter().copied()) + .chain(TIMEMICROS_LOGICAL_TYPE.iter().copied()) + .chain(TIMESTAMPMILLIS_LOGICAL_TYPE.iter().copied()) + .chain(TIMESTAMPMICROS_LOGICAL_TYPE.iter().copied()) + .chain(LOCAL_TIMESTAMPMILLIS_LOGICAL_TYPE.iter().copied()) + .chain(LOCAL_TIMESTAMPMICROS_LOGICAL_TYPE.iter().copied()) + .collect() + }) +} + +pub fn valid_examples() -> &'static Vec<(&'static str, bool)> { + static VALID_EXAMPLES_ONCE: OnceLock> = OnceLock::new(); + VALID_EXAMPLES_ONCE.get_or_init(|| examples().iter().copied().filter(|s| s.1).collect()) +} diff --git a/lang/rust/avro_test_helper/src/lib.rs b/lang/rust/avro_test_helper/src/lib.rs index e316dc818d4..f9fd05030b6 100644 --- a/lang/rust/avro_test_helper/src/lib.rs +++ b/lang/rust/avro_test_helper/src/lib.rs @@ -26,6 +26,7 @@ thread_local! { pub(crate) static LOG_MESSAGES: RefCell> = const { RefCell::new(Vec::new()) }; } +pub mod data; pub mod logger; #[cfg(not(target_arch = "wasm32"))]