Skip to content

Commit

Permalink
AVRO-4004: [Rust] Ignore logicalType fields when creating the canonic…
Browse files Browse the repository at this point in the history
…al form (#2976)

* AVRO-4004: [Rust] Ignore logicalType fields when creating the canonical form

Signed-off-by: Martin Tzvetanov Grigorov <[email protected]>

* AVRO-4004: [Rust] Ignore the namespace for non-named schemas

When creating the canonical parsing form of a Schema ignore the
namespace for any non-named Schemas, i.e. anything but Record, Enum,
Fixed and Ref

Signed-off-by: Martin Tzvetanov Grigorov <[email protected]>

* AVRO-4004 Remove the test for round trip after canonical form

Signed-off-by: Martin Tzvetanov Grigorov <[email protected]>

---------

Signed-off-by: Martin Tzvetanov Grigorov <[email protected]>
  • Loading branch information
martin-g authored Jul 12, 2024
1 parent 69cd998 commit 728b807
Show file tree
Hide file tree
Showing 4 changed files with 686 additions and 652 deletions.
50 changes: 45 additions & 5 deletions lang/rust/avro/src/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2156,6 +2156,7 @@ fn parsing_canonical_form(schema: &Value) -> String {
fn pcf_map(schema: &Map<String, Value>) -> String {
// Look for the namespace variant up front.
let ns = schema.get("namespace").and_then(|v| v.as_str());
let typ = schema.get("type").and_then(|v| v.as_str());
let mut fields = Vec::new();
for (k, v) in schema {
// Reduce primitive types to their simple form. ([PRIMITIVE] rule)
Expand All @@ -2167,7 +2168,12 @@ fn pcf_map(schema: &Map<String, Value>) -> String {
}

// Strip out unused fields ([STRIP] rule)
if field_ordering_position(k).is_none() || k == "default" || k == "doc" || k == "aliases" {
if field_ordering_position(k).is_none()
|| k == "default"
|| k == "doc"
|| k == "aliases"
|| k == "logicalType"
{
continue;
}

Expand All @@ -2176,7 +2182,9 @@ fn pcf_map(schema: &Map<String, Value>) -> String {
// Invariant: Only valid schemas. Must be a string.
let name = v.as_str().unwrap();
let n = match ns {
Some(namespace) if !name.contains('.') => Cow::Owned(format!("{namespace}.{name}")),
Some(namespace) if is_named_type(typ) && !name.contains('.') => {
Cow::Owned(format!("{namespace}.{name}"))
}
_ => Cow::Borrowed(name),
};

Expand Down Expand Up @@ -2211,6 +2219,13 @@ fn pcf_map(schema: &Map<String, Value>) -> String {
format!("{{{inter}}}")
}

fn is_named_type(typ: Option<&str>) -> bool {
matches!(
typ,
Some("record") | Some("enum") | Some("fixed") | Some("ref")
)
}

fn pcf_array(arr: &[Value]) -> String {
let inter = arr
.iter()
Expand Down Expand Up @@ -2443,6 +2458,7 @@ pub mod derive {
#[cfg(test)]
mod tests {
use super::*;
use crate::rabin::Rabin;
use apache_avro_test_helper::{
logger::{assert_logged, assert_not_logged},
TestResult,
Expand Down Expand Up @@ -3415,16 +3431,16 @@ mod tests {

let schema = Schema::parse_str(raw_schema)?;
assert_eq!(
"abf662f831715ff78f88545a05a9262af75d6406b54e1a8a174ff1d2b75affc4",
"7eb3b28d73dfc99bdd9af1848298b40804a2f8ad5d2642be2ecc2ad34842b987",
format!("{}", schema.fingerprint::<Sha256>())
);

assert_eq!(
"6e21c350f71b1a34e9efe90970f1bc69",
"cb11615e412ee5d872620d8df78ff6ae",
format!("{}", schema.fingerprint::<Md5>())
);
assert_eq!(
"28cf0a67d9937bb3",
"92f2ccef718c6754",
format!("{}", schema.fingerprint::<Rabin>())
);

Expand Down Expand Up @@ -6764,4 +6780,28 @@ mod tests {

Ok(())
}

#[test]
fn avro_4004_canonical_form_strip_logical_types() -> TestResult {
let schema_str = r#"
{
"type": "record",
"name": "test",
"fields": [
{"name": "a", "type": "long", "default": 42, "doc": "The field a"},
{"name": "b", "type": "string", "namespace": "test.a"},
{"name": "c", "type": "long", "logicalType": "timestamp-micros"}
]
}"#;

let schema = Schema::parse_str(schema_str)?;
let canonical_form = schema.canonical_form();
let fp_rabin = schema.fingerprint::<Rabin>();
assert_eq!(
r#"{"name":"test","type":"record","fields":[{"name":"a","type":"long"},{"name":"b","type":"string"},{"name":"c","type":{"type":"long"}}]}"#,
canonical_form
);
assert_eq!("92f2ccef718c6754", fp_rabin.to_string());
Ok(())
}
}
Loading

0 comments on commit 728b807

Please sign in to comment.