From c277f413c427f3ff00fb6d9dde33286307a95b81 Mon Sep 17 00:00:00 2001 From: Martin Tzvetanov Grigorov Date: Thu, 21 Dec 2023 12:05:45 +0200 Subject: [PATCH] AVRO-3900: Polish APIs Signed-off-by: Martin Tzvetanov Grigorov --- lang/rust/avro/README.md | 26 ++++++++ lang/rust/avro/src/lib.rs | 27 ++++++++ lang/rust/avro/src/validator.rs | 103 +++++++++++++++++------------ lang/rust/avro/tests/validators.rs | 35 ++++------ 4 files changed, 125 insertions(+), 66 deletions(-) diff --git a/lang/rust/avro/README.md b/lang/rust/avro/README.md index 75789964e24..28d8aee7f8f 100644 --- a/lang/rust/avro/README.md +++ b/lang/rust/avro/README.md @@ -651,6 +651,32 @@ let writers_schema = Schema::parse_str(r#"{"type": "array", "items":"long"}"#).u let readers_schema = Schema::parse_str(r#"{"type": "array", "items":"int"}"#).unwrap(); assert!(SchemaCompatibility::can_read(&writers_schema, &readers_schema).is_err()); ``` +### Custom names validators + +By default the library follows the rules by the +[Avro specification](https://avro.apache.org/docs/1.11.1/specification/#names)! + +Some of the other Apache Avro language SDKs are not that strict and allow more +characters in names. For interoperability with those SDKs, the library provides +a way to customize the names validation. + +```rust +use apache_avro::AvroResult; +use apache_avro::schema::Namespace; +use apache_avro::validator::{SchemaNameValidator, set_schema_name_validator}; + +struct MyCustomValidator; + +impl SchemaNameValidator for MyCustomValidator { + fn validate(&self, name: &str) -> AvroResult<(String, Namespace)> { + todo!() + } +} + +set_schema_name_validator(Box::new(MyCustomValidator)); +``` + +Similar logic could be applied to the schema namespace, enum symbols and field names validation. diff --git a/lang/rust/avro/src/lib.rs b/lang/rust/avro/src/lib.rs index 64d7d824005..2c3b0309cd4 100644 --- a/lang/rust/avro/src/lib.rs +++ b/lang/rust/avro/src/lib.rs @@ -764,6 +764,33 @@ //! let readers_schema = Schema::parse_str(r#"{"type": "array", "items":"int"}"#).unwrap(); //! assert!(SchemaCompatibility::can_read(&writers_schema, &readers_schema).is_err()); //! ``` +//! ## Custom names validators +//! +//! By default the library follows the rules by the +//! [Avro specification](https://avro.apache.org/docs/1.11.1/specification/#names)! +//! +//! Some of the other Apache Avro language SDKs are not that strict and allow more +//! characters in names. For interoperability with those SDKs, the library provides +//! a way to customize the names validation. +//! +//! ```rust +//! use apache_avro::AvroResult; +//! use apache_avro::schema::Namespace; +//! use apache_avro::validator::{SchemaNameValidator, set_schema_name_validator}; +//! +//! struct MyCustomValidator; +//! +//! impl SchemaNameValidator for MyCustomValidator { +//! fn validate(&self, name: &str) -> AvroResult<(String, Namespace)> { +//! todo!() +//! } +//! } +//! +//! set_schema_name_validator(Box::new(MyCustomValidator)); +//! ``` +//! +//! Similar logic could be applied to the schema namespace, enum symbols and field names validation. +//! mod bigdecimal; mod codec; diff --git a/lang/rust/avro/src/validator.rs b/lang/rust/avro/src/validator.rs index d8413af6ca1..d05722cbf6f 100644 --- a/lang/rust/avro/src/validator.rs +++ b/lang/rust/avro/src/validator.rs @@ -17,12 +17,12 @@ use crate::{schema::Namespace, AvroResult, Error}; use regex_lite::Regex; -use std::{fmt::Debug, sync::OnceLock}; +use std::sync::OnceLock; -#[derive(Debug)] -struct DefaultValidator; +/// A validator that validates names and namespaces according to the Avro specification. +struct SpecificationValidator; -pub trait NameValidator: Send + Sync { +pub trait SchemaNameValidator: Send + Sync { fn regex(&self) -> &'static Regex { static SCHEMA_NAME_ONCE: OnceLock = OnceLock::new(); SCHEMA_NAME_ONCE.get_or_init(|| { @@ -37,9 +37,9 @@ pub trait NameValidator: Send + Sync { fn validate(&self, name: &str) -> AvroResult<(String, Namespace)>; } -impl NameValidator<(String, Namespace)> for DefaultValidator { +impl SchemaNameValidator for SpecificationValidator { fn validate(&self, schema_name: &str) -> AvroResult<(String, Namespace)> { - let regex = NameValidator::regex(self); + let regex = SchemaNameValidator::regex(self); let caps = regex .captures(schema_name) .ok_or_else(|| Error::InvalidSchemaName(schema_name.to_string(), regex.as_str()))?; @@ -50,13 +50,12 @@ impl NameValidator<(String, Namespace)> for DefaultValidator { } } -static NAME_VALIDATOR_ONCE: OnceLock + Send + Sync>> = - OnceLock::new(); +static NAME_VALIDATOR_ONCE: OnceLock> = OnceLock::new(); #[allow(dead_code)] pub fn set_schema_name_validator( - validator: Box + Send + Sync>, -) -> Result<(), Box + Send + Sync>> { + validator: Box, +) -> Result<(), Box> { NAME_VALIDATOR_ONCE.set(validator) } @@ -64,12 +63,12 @@ pub(crate) fn validate_schema_name(schema_name: &str) -> AvroResult<(String, Nam NAME_VALIDATOR_ONCE .get_or_init(|| { debug!("Going to use the default name validator."); - Box::new(DefaultValidator) + Box::new(SpecificationValidator) }) .validate(schema_name) } -pub trait NamespaceValidator: Sync + Debug { +pub trait SchemaNamespaceValidator: Send + Sync { fn regex(&self) -> &'static Regex { static NAMESPACE_ONCE: OnceLock = OnceLock::new(); NAMESPACE_ONCE.get_or_init(|| { @@ -80,9 +79,9 @@ pub trait NamespaceValidator: Sync + Debug { fn validate(&self, name: &str) -> AvroResult<()>; } -impl NamespaceValidator for DefaultValidator { +impl SchemaNamespaceValidator for SpecificationValidator { fn validate(&self, ns: &str) -> AvroResult<()> { - let regex = NamespaceValidator::regex(self); + let regex = SchemaNamespaceValidator::regex(self); if !regex.is_match(ns) { return Err(Error::InvalidNamespace(ns.to_string(), regex.as_str())); } else { @@ -91,13 +90,13 @@ impl NamespaceValidator for DefaultValidator { } } -static NAMESPACE_VALIDATOR_ONCE: OnceLock> = +static NAMESPACE_VALIDATOR_ONCE: OnceLock> = OnceLock::new(); #[allow(dead_code)] -pub fn set_namespace_validator( - validator: Box, -) -> Result<(), Box> { +pub fn set_schema_namespace_validator( + validator: Box, +) -> Result<(), Box> { NAMESPACE_VALIDATOR_ONCE.set(validator) } @@ -105,21 +104,21 @@ pub(crate) fn validate_namespace(ns: &str) -> AvroResult<()> { NAMESPACE_VALIDATOR_ONCE .get_or_init(|| { debug!("Going to use the default namespace validator."); - Box::new(DefaultValidator) + Box::new(SpecificationValidator) }) .validate(ns) } -pub trait EnumSymbolNameValidator { +pub trait EnumSymbolNameValidator: Send + Sync { fn regex(&self) -> &'static Regex { static ENUM_SYMBOL_NAME_ONCE: OnceLock = OnceLock::new(); ENUM_SYMBOL_NAME_ONCE.get_or_init(|| Regex::new(r"^[A-Za-z_][A-Za-z0-9_]*$").unwrap()) } - fn validate(&self, name: &str) -> AvroResult; + fn validate(&self, name: &str) -> AvroResult<()>; } -impl EnumSymbolNameValidator<()> for DefaultValidator { +impl EnumSymbolNameValidator for SpecificationValidator { fn validate(&self, symbol: &str) -> AvroResult<()> { let regex = EnumSymbolNameValidator::regex(self); if !regex.is_match(symbol) { @@ -130,14 +129,13 @@ impl EnumSymbolNameValidator<()> for DefaultValidator { } } -static ENUM_SYMBOL_NAME_VALIDATOR_ONCE: OnceLock< - Box + Send + Sync>, -> = OnceLock::new(); +static ENUM_SYMBOL_NAME_VALIDATOR_ONCE: OnceLock> = + OnceLock::new(); #[allow(dead_code)] pub fn set_enum_symbol_name_validator( - validator: Box + Send + Sync>, -) -> Result<(), Box + Send + Sync>> { + validator: Box, +) -> Result<(), Box> { ENUM_SYMBOL_NAME_VALIDATOR_ONCE.set(validator) } @@ -145,21 +143,21 @@ pub(crate) fn validate_enum_symbol_name(symbol: &str) -> AvroResult<()> { ENUM_SYMBOL_NAME_VALIDATOR_ONCE .get_or_init(|| { debug!("Going to use the default enum symbol name validator."); - Box::new(DefaultValidator) + Box::new(SpecificationValidator) }) .validate(symbol) } -pub trait RecordFieldNameValidator { +pub trait RecordFieldNameValidator: Send + Sync { fn regex(&self) -> &'static Regex { static FIELD_NAME_ONCE: OnceLock = OnceLock::new(); FIELD_NAME_ONCE.get_or_init(|| Regex::new(r"^[A-Za-z_][A-Za-z0-9_]*$").unwrap()) } - fn validate(&self, name: &str) -> AvroResult; + fn validate(&self, name: &str) -> AvroResult<()>; } -impl RecordFieldNameValidator<()> for DefaultValidator { +impl RecordFieldNameValidator for SpecificationValidator { fn validate(&self, field_name: &str) -> AvroResult<()> { let regex = RecordFieldNameValidator::regex(self); if !regex.is_match(field_name) { @@ -170,14 +168,13 @@ impl RecordFieldNameValidator<()> for DefaultValidator { } } -static RECORD_FIELD_NAME_VALIDATOR_ONCE: OnceLock< - Box + Send + Sync>, -> = OnceLock::new(); +static RECORD_FIELD_NAME_VALIDATOR_ONCE: OnceLock> = + OnceLock::new(); #[allow(dead_code)] pub fn set_record_field_name_validator( - validator: Box + Send + Sync>, -) -> Result<(), Box + Send + Sync>> { + validator: Box, +) -> Result<(), Box> { RECORD_FIELD_NAME_VALIDATOR_ONCE.set(validator) } @@ -185,7 +182,7 @@ pub(crate) fn validate_record_field_name(field_name: &str) -> AvroResult<()> { RECORD_FIELD_NAME_VALIDATOR_ONCE .get_or_init(|| { debug!("Going to use the default record field name validator."); - Box::new(DefaultValidator) + Box::new(SpecificationValidator) }) .validate(field_name) } @@ -199,14 +196,12 @@ mod tests { #[test] fn avro_3900_default_name_validator_with_valid_ns() -> TestResult { validate_schema_name("example")?; - Ok(()) } #[test] fn avro_3900_default_name_validator_with_invalid_ns() -> TestResult { assert!(validate_schema_name("com-example").is_err()); - Ok(()) } @@ -215,10 +210,10 @@ mod tests { let full_name = "ns.0.record1"; let name = Name::new(full_name); assert!(name.is_err()); - let validator = DefaultValidator; + let validator = SpecificationValidator; let expected = Error::InvalidSchemaName( full_name.to_string(), - NameValidator::regex(&validator).as_str(), + SchemaNameValidator::regex(&validator).as_str(), ) .to_string(); let err = name.map_err(|e| e.to_string()).err().unwrap(); @@ -229,7 +224,7 @@ mod tests { assert!(name.is_err()); let expected = Error::InvalidSchemaName( full_name.to_string(), - NameValidator::regex(&validator).as_str(), + SchemaNameValidator::regex(&validator).as_str(), ) .to_string(); let err = name.map_err(|e| e.to_string()).err().unwrap(); @@ -240,14 +235,36 @@ mod tests { #[test] fn avro_3900_default_namespace_validator_with_valid_ns() -> TestResult { validate_namespace("com.example")?; - Ok(()) } #[test] fn avro_3900_default_namespace_validator_with_invalid_ns() -> TestResult { assert!(validate_namespace("com-example").is_err()); + Ok(()) + } + #[test] + fn avro_3900_default_enum_symbol_validator_with_valid_symbol_name() -> TestResult { + validate_enum_symbol_name("spades")?; + Ok(()) + } + + #[test] + fn avro_3900_default_enum_symbol_validator_with_invalid_symbol_name() -> TestResult { + assert!(validate_enum_symbol_name("com-example").is_err()); + Ok(()) + } + + #[test] + fn avro_3900_default_record_field_validator_with_valid_name() -> TestResult { + validate_record_field_name("test")?; + Ok(()) + } + + #[test] + fn avro_3900_default_record_field_validator_with_invalid_name() -> TestResult { + assert!(validate_record_field_name("com-example").is_err()); Ok(()) } } diff --git a/lang/rust/avro/tests/validators.rs b/lang/rust/avro/tests/validators.rs index 05d42eb2759..3b0c65887d8 100644 --- a/lang/rust/avro/tests/validators.rs +++ b/lang/rust/avro/tests/validators.rs @@ -18,30 +18,25 @@ use apache_avro::{ schema::Namespace, validator::{ - set_enum_symbol_name_validator, set_namespace_validator, set_record_field_name_validator, - set_schema_name_validator, EnumSymbolNameValidator, NameValidator, NamespaceValidator, - RecordFieldNameValidator, + set_enum_symbol_name_validator, set_record_field_name_validator, set_schema_name_validator, + set_schema_namespace_validator, EnumSymbolNameValidator, RecordFieldNameValidator, + SchemaNameValidator, SchemaNamespaceValidator, }, AvroResult, }; use apache_avro_test_helper::TestResult; -use regex_lite::Regex; -#[test] -fn avro_3900_custom_name_validator_with_spec_invalid_ns() -> TestResult { - #[derive(Debug)] - struct CustomNameValidator; - impl NameValidator<(String, Namespace)> for CustomNameValidator { - fn regex(&self) -> &'static Regex { - unimplemented!() - } +struct CustomValidator; +#[test] +fn avro_3900_custom_schema_name_validator_with_spec_invalid_name() -> TestResult { + impl SchemaNameValidator for CustomValidator { fn validate(&self, schema_name: &str) -> AvroResult<(String, Namespace)> { Ok((schema_name.to_string(), None)) } } - assert!(set_schema_name_validator(Box::new(CustomNameValidator)).is_ok()); + assert!(set_schema_name_validator(Box::new(CustomValidator)).is_ok()); let schema = r#"{ "name": "com-example", @@ -54,15 +49,13 @@ fn avro_3900_custom_name_validator_with_spec_invalid_ns() -> TestResult { #[test] fn avro_3900_custom_namespace_validator_with_spec_invalid_ns() -> TestResult { - #[derive(Debug)] - struct CustomNamespaceValidator; - impl NamespaceValidator for CustomNamespaceValidator { + impl SchemaNamespaceValidator for CustomValidator { fn validate(&self, _ns: &str) -> AvroResult<()> { Ok(()) } } - assert!(set_namespace_validator(Box::new(CustomNamespaceValidator)).is_ok()); + assert!(set_schema_namespace_validator(Box::new(CustomValidator)).is_ok()); let schema = r#"{ "name": "name", @@ -76,9 +69,7 @@ fn avro_3900_custom_namespace_validator_with_spec_invalid_ns() -> TestResult { #[test] fn avro_3900_custom_enum_symbol_validator_with_spec_invalid_enum_symbol_names() -> TestResult { - #[derive(Debug)] - struct CustomValidator; - impl EnumSymbolNameValidator<()> for CustomValidator { + impl EnumSymbolNameValidator for CustomValidator { fn validate(&self, _ns: &str) -> AvroResult<()> { Ok(()) } @@ -98,9 +89,7 @@ fn avro_3900_custom_enum_symbol_validator_with_spec_invalid_enum_symbol_names() #[test] fn avro_3900_custom_record_field_validator_with_spec_invalid_field_name() -> TestResult { - #[derive(Debug)] - struct CustomValidator; - impl RecordFieldNameValidator<()> for CustomValidator { + impl RecordFieldNameValidator for CustomValidator { fn validate(&self, _ns: &str) -> AvroResult<()> { Ok(()) }