From 00426d60f9868671b01c555aa66bc7c864baf9c2 Mon Sep 17 00:00:00 2001 From: Martin Grigorov Date: Thu, 25 Jan 2024 09:21:15 +0200 Subject: [PATCH] AVRO-3900: [Rust] Make it possible to use custom names validators (#2643) * AVRO-3900: [Rust] Make it possible to use custom validators * AVRO-3900: Add validator trait for enum symbol names * AVRO-3900: Introduce validator for RecordField's name * AVRO-3900: Extract the custom validators tests as IT tests This way they use the public APIs of the crate, as a real user application. * AVRO-3900: Polish APIs * AVRO-3900: Split the validator integration tests Otherwise there are timing issues with the initializations of the OnceLock's for the different validators. Parsing a Schema leads to initialization of the default validator and later it is impossible to set a custom one * AVRO-3900: Simplify generate_interop_data.rs example to not use avro_test_helper crate For some reason clearing the log messages in the tests destructor (#[dtor]) was failing for this example. Simplify the TestLogger to use the std thread_local!() instead of the third party ref_thread_local!() * AVRO-3900: Merge the validators integration tests in one test Just make sure the setup of the custom validators is done before the parsing of any schema to prevent registering the default validator (SpecificationValidator) * AVRO-3900: Improve the documentation Signed-off-by: Martin Tzvetanov Grigorov --- lang/rust/Cargo.lock | 7 - lang/rust/avro/README.md | 34 ++ .../avro/examples/generate_interop_data.rs | 6 +- lang/rust/avro/examples/test_interop_data.rs | 4 +- lang/rust/avro/src/lib.rs | 36 ++ lang/rust/avro/src/schema.rs | 84 +---- lang/rust/avro/src/validator.rs | 318 ++++++++++++++++++ lang/rust/avro/tests/validators.rs | 85 +++++ lang/rust/avro_test_helper/Cargo.toml | 1 - lang/rust/avro_test_helper/src/lib.rs | 7 +- lang/rust/avro_test_helper/src/logger.rs | 15 +- 11 files changed, 504 insertions(+), 93 deletions(-) create mode 100644 lang/rust/avro/src/validator.rs create mode 100644 lang/rust/avro/tests/validators.rs diff --git a/lang/rust/Cargo.lock b/lang/rust/Cargo.lock index b2d6ab81213..5c0a5cfaf6c 100644 --- a/lang/rust/Cargo.lock +++ b/lang/rust/Cargo.lock @@ -120,7 +120,6 @@ dependencies = [ "ctor", "env_logger", "log", - "ref_thread_local", ] [[package]] @@ -999,12 +998,6 @@ dependencies = [ "bitflags 1.3.2", ] -[[package]] -name = "ref_thread_local" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a0d51660a68078997855ba5602f73ab3a5031bd7ad480a9d4c90fbbf04e1fff0" - [[package]] name = "regex" version = "1.10.2" diff --git a/lang/rust/avro/README.md b/lang/rust/avro/README.md index 75789964e24..fca788f6c55 100644 --- a/lang/rust/avro/README.md +++ b/lang/rust/avro/README.md @@ -651,6 +651,40 @@ let writers_schema = Schema::parse_str(r#"{"type": "array", "items":"long"}"#).u let readers_schema = Schema::parse_str(r#"{"type": "array", "items":"int"}"#).unwrap(); assert!(SchemaCompatibility::can_read(&writers_schema, &readers_schema).is_err()); ``` +### Custom names validators + +By default the library follows the rules by the +[Avro specification](https://avro.apache.org/docs/1.11.1/specification/#names)! + +Some of the other Apache Avro language SDKs are not that strict and allow more +characters in names. For interoperability with those SDKs, the library provides +a way to customize the names validation. + +```rust +use apache_avro::AvroResult; +use apache_avro::schema::Namespace; +use apache_avro::validator::{SchemaNameValidator, set_schema_name_validator}; + +struct MyCustomValidator; + +impl SchemaNameValidator for MyCustomValidator { + fn validate(&self, name: &str) -> AvroResult<(String, Namespace)> { + todo!() + } +} + +// don't parse any schema before registering the custom validator(s) ! + +set_schema_name_validator(Box::new(MyCustomValidator)); + +// ... use the library +``` + +Similar logic could be applied to the schema namespace, enum symbols and field names validation. + +**Note**: the library allows to set a validator only once per the application lifetime! +If the application parses schemas before setting a validator, the default validator will be +registered and used! diff --git a/lang/rust/avro/examples/generate_interop_data.rs b/lang/rust/avro/examples/generate_interop_data.rs index 35a6dc7c090..29d50a144de 100644 --- a/lang/rust/avro/examples/generate_interop_data.rs +++ b/lang/rust/avro/examples/generate_interop_data.rs @@ -20,9 +20,9 @@ use apache_avro::{ types::{Record, Value}, Codec, Writer, }; -use apache_avro_test_helper::TestResult; use std::{ collections::HashMap, + error::Error, io::{BufWriter, Write}, }; use strum::IntoEnumIterator; @@ -75,7 +75,7 @@ fn create_datum(schema: &Schema) -> Record { datum } -fn main() -> TestResult { +fn main() -> Result<(), Box> { let schema_str = std::fs::read_to_string("../../share/test/schemas/interop.avsc") .expect("Unable to read the interop Avro schema"); let schema = Schema::parse_str(schema_str.as_str())?; @@ -105,7 +105,7 @@ fn main() -> TestResult { Ok(()) } -fn write_user_metadata(writer: &mut Writer>) -> TestResult { +fn write_user_metadata(writer: &mut Writer>) -> Result<(), Box> { writer.add_user_metadata("user_metadata".to_string(), b"someByteArray")?; Ok(()) diff --git a/lang/rust/avro/examples/test_interop_data.rs b/lang/rust/avro/examples/test_interop_data.rs index 736b1fd7d03..39c97d0649e 100644 --- a/lang/rust/avro/examples/test_interop_data.rs +++ b/lang/rust/avro/examples/test_interop_data.rs @@ -16,14 +16,14 @@ // under the License. use apache_avro::Reader; -use apache_avro_test_helper::TestResult; use std::{ collections::HashMap, + error::Error, ffi::OsStr, io::{BufReader, Read}, }; -fn main() -> TestResult { +fn main() -> Result<(), Box> { let mut expected_user_metadata: HashMap> = HashMap::new(); expected_user_metadata.insert("user_metadata".to_string(), b"someByteArray".to_vec()); diff --git a/lang/rust/avro/src/lib.rs b/lang/rust/avro/src/lib.rs index 2d9d79a7688..012bcb5e7e9 100644 --- a/lang/rust/avro/src/lib.rs +++ b/lang/rust/avro/src/lib.rs @@ -764,6 +764,41 @@ //! let readers_schema = Schema::parse_str(r#"{"type": "array", "items":"int"}"#).unwrap(); //! assert!(SchemaCompatibility::can_read(&writers_schema, &readers_schema).is_err()); //! ``` +//! ## Custom names validators +//! +//! By default the library follows the rules by the +//! [Avro specification](https://avro.apache.org/docs/1.11.1/specification/#names)! +//! +//! Some of the other Apache Avro language SDKs are not that strict and allow more +//! characters in names. For interoperability with those SDKs, the library provides +//! a way to customize the names validation. +//! +//! ```rust +//! use apache_avro::AvroResult; +//! use apache_avro::schema::Namespace; +//! use apache_avro::validator::{SchemaNameValidator, set_schema_name_validator}; +//! +//! struct MyCustomValidator; +//! +//! impl SchemaNameValidator for MyCustomValidator { +//! fn validate(&self, name: &str) -> AvroResult<(String, Namespace)> { +//! todo!() +//! } +//! } +//! +//! // don't parse any schema before registering the custom validator(s) ! +//! +//! set_schema_name_validator(Box::new(MyCustomValidator)); +//! +//! // ... use the library +//! ``` +//! +//! Similar logic could be applied to the schema namespace, enum symbols and field names validation. +//! +//! **Note**: the library allows to set a validator only once per the application lifetime! +//! If the application parses schemas before setting a validator, the default validator will be +//! registered and used! +//! mod bigdecimal; mod codec; @@ -782,6 +817,7 @@ pub mod rabin; pub mod schema; pub mod schema_compatibility; pub mod types; +pub mod validator; pub use codec::Codec; pub use de::from_value; diff --git a/lang/rust/avro/src/schema.rs b/lang/rust/avro/src/schema.rs index a7c6223354c..d911fa74635 100644 --- a/lang/rust/avro/src/schema.rs +++ b/lang/rust/avro/src/schema.rs @@ -16,9 +16,17 @@ // under the License. //! Logic for parsing and interacting with schemas in Avro format. -use crate::{error::Error, types, util::MapHelper, AvroResult}; +use crate::{ + error::Error, + types, + util::MapHelper, + validator::{ + validate_enum_symbol_name, validate_namespace, validate_record_field_name, + validate_schema_name, + }, + AvroResult, +}; use digest::Digest; -use regex_lite::Regex; use serde::{ ser::{SerializeMap, SerializeSeq}, Deserialize, Serialize, Serializer, @@ -33,37 +41,9 @@ use std::{ hash::Hash, io::Read, str::FromStr, - sync::OnceLock, }; use strum_macros::{EnumDiscriminants, EnumString}; -fn enum_symbol_name_r() -> &'static Regex { - static ENUM_SYMBOL_NAME_ONCE: OnceLock = OnceLock::new(); - ENUM_SYMBOL_NAME_ONCE.get_or_init(|| Regex::new(r"^[A-Za-z_][A-Za-z0-9_]*$").unwrap()) -} - -// An optional namespace (with optional dots) followed by a name without any dots in it. -fn schema_name_r() -> &'static Regex { - static SCHEMA_NAME_ONCE: OnceLock = OnceLock::new(); - SCHEMA_NAME_ONCE.get_or_init(|| { - Regex::new( - r"^((?P([A-Za-z_][A-Za-z0-9_]*(\.[A-Za-z_][A-Za-z0-9_]*)*)?)\.)?(?P[A-Za-z_][A-Za-z0-9_]*)$", - ).unwrap() - }) -} - -fn field_name_r() -> &'static Regex { - static FIELD_NAME_ONCE: OnceLock = OnceLock::new(); - FIELD_NAME_ONCE.get_or_init(|| Regex::new(r"^[A-Za-z_][A-Za-z0-9_]*$").unwrap()) -} - -fn namespace_r() -> &'static Regex { - static NAMESPACE_ONCE: OnceLock = OnceLock::new(); - NAMESPACE_ONCE.get_or_init(|| { - Regex::new(r"^([A-Za-z_][A-Za-z0-9_]*(\.[A-Za-z_][A-Za-z0-9_]*)*)?$").unwrap() - }) -} - /// Represents an Avro schema fingerprint /// More information about Avro schema fingerprints can be found in the /// [Avro Schema Fingerprint documentation](https://avro.apache.org/docs/current/spec.html#schema_fingerprints) @@ -279,13 +259,7 @@ impl Name { } fn get_name_and_namespace(name: &str) -> AvroResult<(String, Namespace)> { - let caps = schema_name_r() - .captures(name) - .ok_or_else(|| Error::InvalidSchemaName(name.to_string(), schema_name_r().as_str()))?; - Ok(( - caps["name"].to_string(), - caps.name("namespace").map(|s| s.as_str().to_string()), - )) + validate_schema_name(name) } /// Parse a `serde_json::Value` into a `Name`. @@ -312,12 +286,7 @@ impl Name { .filter(|ns| !ns.is_empty()); if let Some(ref ns) = namespace { - if !namespace_r().is_match(ns) { - return Err(Error::InvalidNamespace( - ns.to_string(), - namespace_r().as_str(), - )); - } + validate_namespace(ns)?; } Ok(Self { @@ -686,9 +655,7 @@ impl RecordField { ) -> AvroResult { let name = field.name().ok_or(Error::GetNameFieldFromRecord)?; - if !field_name_r().is_match(&name) { - return Err(Error::FieldName(name)); - } + validate_record_field_name(&name)?; // TODO: "type" = "" let schema = parser.parse_complex(field, &enclosing_record.namespace)?; @@ -1713,10 +1680,7 @@ impl Parser { let mut existing_symbols: HashSet<&String> = HashSet::with_capacity(symbols.len()); for symbol in symbols.iter() { - // Ensure enum symbol names match [A-Za-z_][A-Za-z0-9_]* - if !enum_symbol_name_r().is_match(symbol) { - return Err(Error::EnumSymbolName(symbol.to_string())); - } + validate_enum_symbol_name(symbol)?; // Ensure there are no duplicate symbols if existing_symbols.contains(&symbol) { @@ -6277,26 +6241,6 @@ mod tests { Ok(()) } - #[test] - fn test_avro_3897_disallow_invalid_namespaces_in_fully_qualified_name() -> TestResult { - let full_name = "ns.0.record1"; - let name = Name::new(full_name); - assert!(name.is_err()); - let expected = - Error::InvalidSchemaName(full_name.to_string(), schema_name_r().as_str()).to_string(); - let err = name.map_err(|e| e.to_string()).err().unwrap(); - assert_eq!(expected, err); - - let full_name = "ns..record1"; - let name = Name::new(full_name); - assert!(name.is_err()); - let expected = - Error::InvalidSchemaName(full_name.to_string(), schema_name_r().as_str()).to_string(); - let err = name.map_err(|e| e.to_string()).err().unwrap(); - assert_eq!(expected, err); - Ok(()) - } - /// A test cases showing that names and namespaces can be constructed /// entirely by underscores. #[test] diff --git a/lang/rust/avro/src/validator.rs b/lang/rust/avro/src/validator.rs new file mode 100644 index 00000000000..2b4967d7f8b --- /dev/null +++ b/lang/rust/avro/src/validator.rs @@ -0,0 +1,318 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::{schema::Namespace, AvroResult, Error}; +use regex_lite::Regex; +use std::sync::OnceLock; + +/// A validator that validates names and namespaces according to the Avro specification. +struct SpecificationValidator; + +/// A trait that validates schema names. +/// To register a custom one use [set_schema_name_validator]. +pub trait SchemaNameValidator: Send + Sync { + /// Returns the regex used to validate the schema name + /// according to the Avro specification. + fn regex(&self) -> &'static Regex { + static SCHEMA_NAME_ONCE: OnceLock = OnceLock::new(); + SCHEMA_NAME_ONCE.get_or_init(|| { + Regex::new( + // An optional namespace (with optional dots) followed by a name without any dots in it. + r"^((?P([A-Za-z_][A-Za-z0-9_]*(\.[A-Za-z_][A-Za-z0-9_]*)*)?)\.)?(?P[A-Za-z_][A-Za-z0-9_]*)$", + ) + .unwrap() + }) + } + + /// Validates the schema name and returns the name and the optional namespace, + /// or [Error::InvalidSchemaName] if it is invalid. + fn validate(&self, schema_name: &str) -> AvroResult<(String, Namespace)>; +} + +impl SchemaNameValidator for SpecificationValidator { + fn validate(&self, schema_name: &str) -> AvroResult<(String, Namespace)> { + let regex = SchemaNameValidator::regex(self); + let caps = regex + .captures(schema_name) + .ok_or_else(|| Error::InvalidSchemaName(schema_name.to_string(), regex.as_str()))?; + Ok(( + caps["name"].to_string(), + caps.name("namespace").map(|s| s.as_str().to_string()), + )) + } +} + +static NAME_VALIDATOR_ONCE: OnceLock> = OnceLock::new(); + +/// Sets a custom schema name validator. +/// +/// Returns a unit if the registration was successful or the already +/// registered validator if the registration failed. +/// +/// **Note**: This function must be called before parsing any schema because this will +/// register the default validator and the registration is one time only! +pub fn set_schema_name_validator( + validator: Box, +) -> Result<(), Box> { + debug!("Setting a custom schema name validator."); + NAME_VALIDATOR_ONCE.set(validator) +} + +pub(crate) fn validate_schema_name(schema_name: &str) -> AvroResult<(String, Namespace)> { + NAME_VALIDATOR_ONCE + .get_or_init(|| { + debug!("Going to use the default name validator."); + Box::new(SpecificationValidator) + }) + .validate(schema_name) +} + +/// A trait that validates schema namespaces. +/// To register a custom one use [set_schema_namespace_validator]. +pub trait SchemaNamespaceValidator: Send + Sync { + /// Returns the regex used to validate the schema namespace + /// according to the Avro specification. + fn regex(&self) -> &'static Regex { + static NAMESPACE_ONCE: OnceLock = OnceLock::new(); + NAMESPACE_ONCE.get_or_init(|| { + Regex::new(r"^([A-Za-z_][A-Za-z0-9_]*(\.[A-Za-z_][A-Za-z0-9_]*)*)?$").unwrap() + }) + } + + /// Validates the schema namespace or [Error::InvalidNamespace] if it is invalid. + fn validate(&self, namespace: &str) -> AvroResult<()>; +} + +impl SchemaNamespaceValidator for SpecificationValidator { + fn validate(&self, ns: &str) -> AvroResult<()> { + let regex = SchemaNamespaceValidator::regex(self); + if !regex.is_match(ns) { + return Err(Error::InvalidNamespace(ns.to_string(), regex.as_str())); + } else { + Ok(()) + } + } +} + +static NAMESPACE_VALIDATOR_ONCE: OnceLock> = + OnceLock::new(); + +/// Sets a custom schema namespace validator. +/// +/// Returns a unit if the registration was successful or the already +/// registered validator if the registration failed. +/// +/// **Note**: This function must be called before parsing any schema because this will +/// register the default validator and the registration is one time only! +pub fn set_schema_namespace_validator( + validator: Box, +) -> Result<(), Box> { + NAMESPACE_VALIDATOR_ONCE.set(validator) +} + +pub(crate) fn validate_namespace(ns: &str) -> AvroResult<()> { + NAMESPACE_VALIDATOR_ONCE + .get_or_init(|| { + debug!("Going to use the default namespace validator."); + Box::new(SpecificationValidator) + }) + .validate(ns) +} + +/// A trait that validates enum symbol names. +/// To register a custom one use [set_enum_symbol_name_validator]. +pub trait EnumSymbolNameValidator: Send + Sync { + /// Returns the regex used to validate the symbols of enum schema + /// according to the Avro specification. + fn regex(&self) -> &'static Regex { + static ENUM_SYMBOL_NAME_ONCE: OnceLock = OnceLock::new(); + ENUM_SYMBOL_NAME_ONCE.get_or_init(|| Regex::new(r"^[A-Za-z_][A-Za-z0-9_]*$").unwrap()) + } + + /// Validates the symbols of an Enum schema name and returns nothing (unit), + /// or [Error::EnumSymbolName] if it is invalid. + fn validate(&self, name: &str) -> AvroResult<()>; +} + +impl EnumSymbolNameValidator for SpecificationValidator { + fn validate(&self, symbol: &str) -> AvroResult<()> { + let regex = EnumSymbolNameValidator::regex(self); + if !regex.is_match(symbol) { + return Err(Error::EnumSymbolName(symbol.to_string())); + } + + Ok(()) + } +} + +static ENUM_SYMBOL_NAME_VALIDATOR_ONCE: OnceLock> = + OnceLock::new(); + +/// Sets a custom enum symbol name validator. +/// +/// Returns a unit if the registration was successful or the already +/// registered validator if the registration failed. +/// +/// **Note**: This function must be called before parsing any schema because this will +/// register the default validator and the registration is one time only! +pub fn set_enum_symbol_name_validator( + validator: Box, +) -> Result<(), Box> { + ENUM_SYMBOL_NAME_VALIDATOR_ONCE.set(validator) +} + +pub(crate) fn validate_enum_symbol_name(symbol: &str) -> AvroResult<()> { + ENUM_SYMBOL_NAME_VALIDATOR_ONCE + .get_or_init(|| { + debug!("Going to use the default enum symbol name validator."); + Box::new(SpecificationValidator) + }) + .validate(symbol) +} + +/// A trait that validates record field names. +/// To register a custom one use [set_record_field_name_validator]. +pub trait RecordFieldNameValidator: Send + Sync { + /// Returns the regex used to validate the record field names + /// according to the Avro specification. + fn regex(&self) -> &'static Regex { + static FIELD_NAME_ONCE: OnceLock = OnceLock::new(); + FIELD_NAME_ONCE.get_or_init(|| Regex::new(r"^[A-Za-z_][A-Za-z0-9_]*$").unwrap()) + } + + /// Validates the record field's names and returns nothing (unit), + /// or [Error::FieldName] if it is invalid. + fn validate(&self, name: &str) -> AvroResult<()>; +} + +impl RecordFieldNameValidator for SpecificationValidator { + fn validate(&self, field_name: &str) -> AvroResult<()> { + let regex = RecordFieldNameValidator::regex(self); + if !regex.is_match(field_name) { + return Err(Error::FieldName(field_name.to_string())); + } + + Ok(()) + } +} + +static RECORD_FIELD_NAME_VALIDATOR_ONCE: OnceLock> = + OnceLock::new(); + +/// Sets a custom record field name validator. +/// +/// Returns a unit if the registration was successful or the already +/// registered validator if the registration failed. +/// +/// **Note**: This function must be called before parsing any schema because this will +/// register the default validator and the registration is one time only! +pub fn set_record_field_name_validator( + validator: Box, +) -> Result<(), Box> { + RECORD_FIELD_NAME_VALIDATOR_ONCE.set(validator) +} + +pub(crate) fn validate_record_field_name(field_name: &str) -> AvroResult<()> { + RECORD_FIELD_NAME_VALIDATOR_ONCE + .get_or_init(|| { + debug!("Going to use the default record field name validator."); + Box::new(SpecificationValidator) + }) + .validate(field_name) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::schema::Name; + use apache_avro_test_helper::TestResult; + + #[test] + fn avro_3900_default_name_validator_with_valid_ns() -> TestResult { + validate_schema_name("example")?; + Ok(()) + } + + #[test] + fn avro_3900_default_name_validator_with_invalid_ns() -> TestResult { + assert!(validate_schema_name("com-example").is_err()); + Ok(()) + } + + #[test] + fn test_avro_3897_disallow_invalid_namespaces_in_fully_qualified_name() -> TestResult { + let full_name = "ns.0.record1"; + let name = Name::new(full_name); + assert!(name.is_err()); + let validator = SpecificationValidator; + let expected = Error::InvalidSchemaName( + full_name.to_string(), + SchemaNameValidator::regex(&validator).as_str(), + ) + .to_string(); + let err = name.map_err(|e| e.to_string()).err().unwrap(); + pretty_assertions::assert_eq!(expected, err); + + let full_name = "ns..record1"; + let name = Name::new(full_name); + assert!(name.is_err()); + let expected = Error::InvalidSchemaName( + full_name.to_string(), + SchemaNameValidator::regex(&validator).as_str(), + ) + .to_string(); + let err = name.map_err(|e| e.to_string()).err().unwrap(); + pretty_assertions::assert_eq!(expected, err); + Ok(()) + } + + #[test] + fn avro_3900_default_namespace_validator_with_valid_ns() -> TestResult { + validate_namespace("com.example")?; + Ok(()) + } + + #[test] + fn avro_3900_default_namespace_validator_with_invalid_ns() -> TestResult { + assert!(validate_namespace("com-example").is_err()); + Ok(()) + } + + #[test] + fn avro_3900_default_enum_symbol_validator_with_valid_symbol_name() -> TestResult { + validate_enum_symbol_name("spades")?; + Ok(()) + } + + #[test] + fn avro_3900_default_enum_symbol_validator_with_invalid_symbol_name() -> TestResult { + assert!(validate_enum_symbol_name("com-example").is_err()); + Ok(()) + } + + #[test] + fn avro_3900_default_record_field_validator_with_valid_name() -> TestResult { + validate_record_field_name("test")?; + Ok(()) + } + + #[test] + fn avro_3900_default_record_field_validator_with_invalid_name() -> TestResult { + assert!(validate_record_field_name("com-example").is_err()); + Ok(()) + } +} diff --git a/lang/rust/avro/tests/validators.rs b/lang/rust/avro/tests/validators.rs new file mode 100644 index 00000000000..941ffc3e7d7 --- /dev/null +++ b/lang/rust/avro/tests/validators.rs @@ -0,0 +1,85 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use apache_avro::{ + schema::Namespace, + validator::{ + set_enum_symbol_name_validator, set_record_field_name_validator, set_schema_name_validator, + set_schema_namespace_validator, EnumSymbolNameValidator, RecordFieldNameValidator, + SchemaNameValidator, SchemaNamespaceValidator, + }, + AvroResult, +}; +use apache_avro_test_helper::TestResult; + +struct CustomValidator; + +#[test] +fn avro_3900_custom_validator_with_spec_invalid_names() -> TestResult { + // Setup the custom validators before the schema is parsed + // because the parsing will trigger the validation and will + // setup the default validator (SpecificationValidator)! + impl SchemaNameValidator for CustomValidator { + fn validate(&self, schema_name: &str) -> AvroResult<(String, Namespace)> { + Ok((schema_name.to_string(), None)) + } + } + + impl SchemaNamespaceValidator for CustomValidator { + fn validate(&self, _ns: &str) -> AvroResult<()> { + Ok(()) + } + } + + impl EnumSymbolNameValidator for CustomValidator { + fn validate(&self, _ns: &str) -> AvroResult<()> { + Ok(()) + } + } + + impl RecordFieldNameValidator for CustomValidator { + fn validate(&self, _ns: &str) -> AvroResult<()> { + Ok(()) + } + } + + assert!(set_schema_name_validator(Box::new(CustomValidator)).is_ok()); + assert!(set_schema_namespace_validator(Box::new(CustomValidator)).is_ok()); + assert!(set_enum_symbol_name_validator(Box::new(CustomValidator)).is_ok()); + assert!(set_record_field_name_validator(Box::new(CustomValidator)).is_ok()); + + let invalid_schema = r#"{ + "name": "invalid-schema-name", + "namespace": "invalid-namespace", + "type": "record", + "fields": [ + { + "name": "invalid-field-name", + "type": "int" + }, + { + "type": "enum", + "name": "Test", + "symbols": ["A-B", "B-A"] + } + ] + }"#; + + apache_avro::Schema::parse_str(invalid_schema)?; + + Ok(()) +} diff --git a/lang/rust/avro_test_helper/Cargo.toml b/lang/rust/avro_test_helper/Cargo.toml index 376dce33bc9..37d31c63ddf 100644 --- a/lang/rust/avro_test_helper/Cargo.toml +++ b/lang/rust/avro_test_helper/Cargo.toml @@ -36,4 +36,3 @@ better-panic = { default-features = false, version = "0.3.0" } ctor = { default-features = false, version = "0.2.6" } env_logger = { default-features = false, version = "0.10.2" } log = { workspace = true } -ref_thread_local = { default-features = false, version = "0.1.1" } diff --git a/lang/rust/avro_test_helper/src/lib.rs b/lang/rust/avro_test_helper/src/lib.rs index 7e56c1261a3..5e1e04b50ae 100644 --- a/lang/rust/avro_test_helper/src/lib.rs +++ b/lang/rust/avro_test_helper/src/lib.rs @@ -17,14 +17,13 @@ #[cfg(not(target_arch = "wasm32"))] use ctor::{ctor, dtor}; +use std::cell::RefCell; -use ref_thread_local::ref_thread_local; - -ref_thread_local! { +thread_local! { // The unit tests run in parallel // We need to keep the log messages in a thread-local variable // and clear them after assertion - pub(crate) static managed LOG_MESSAGES: Vec = Vec::new(); + pub(crate) static LOG_MESSAGES: RefCell> = RefCell::new(Vec::new()); } pub mod logger; diff --git a/lang/rust/avro_test_helper/src/logger.rs b/lang/rust/avro_test_helper/src/logger.rs index 505e4254190..f1bb5f84d0f 100644 --- a/lang/rust/avro_test_helper/src/logger.rs +++ b/lang/rust/avro_test_helper/src/logger.rs @@ -17,7 +17,6 @@ use crate::LOG_MESSAGES; use log::{LevelFilter, Log, Metadata}; -use ref_thread_local::RefThreadLocal; use std::sync::OnceLock; struct TestLogger { @@ -32,7 +31,7 @@ impl Log for TestLogger { fn log(&self, record: &log::Record) { if self.enabled(record.metadata()) { - LOG_MESSAGES.borrow_mut().push(format!("{}", record.args())); + LOG_MESSAGES.with(|msgs| msgs.borrow_mut().push(format!("{}", record.args()))); self.delegate.log(record); } @@ -53,20 +52,24 @@ fn test_logger() -> &'static TestLogger { } pub fn clear_log_messages() { - LOG_MESSAGES.borrow_mut().clear(); + LOG_MESSAGES.with(|msgs| match msgs.try_borrow_mut() { + Ok(mut log_messages) => log_messages.clear(), + Err(err) => panic!("Failed to clear log messages: {err:?}"), + }); } pub fn assert_not_logged(unexpected_message: &str) { - match LOG_MESSAGES.borrow().last() { + LOG_MESSAGES.with(|msgs| match msgs.borrow().last() { Some(last_log) if last_log == unexpected_message => { panic!("The following log message should not have been logged: '{unexpected_message}'") } _ => (), - } + }); } pub fn assert_logged(expected_message: &str) { - assert_eq!(LOG_MESSAGES.borrow_mut().pop().unwrap(), expected_message); + let last_message = LOG_MESSAGES.with(|msgs| msgs.borrow_mut().pop().unwrap()); + assert_eq!(last_message, expected_message); } #[cfg(not(target_arch = "wasm32"))]