AVRO-4024: [Rust] support nan/inf/-inf as float/double default (#3051)

Signed-off-by: xxchan <[email protected]> Co-authored-by: Xiangjin <[email protected]>
apache · Aug 7, 2024 · 7701526 · 7701526
1 parent e91fd0e
commit 7701526
Show file tree

Hide file tree

Showing 2 changed files with 62 additions and 5 deletions.
diff --git a/lang/rust/avro/src/types.rs b/lang/rust/avro/src/types.rs
@@ -908,6 +908,10 @@ impl Value {
             Value::Long(n) => Ok(Value::Float(n as f32)),
             Value::Float(x) => Ok(Value::Float(x)),
             Value::Double(x) => Ok(Value::Float(x as f32)),
+            Value::String(x) => match Self::parse_special_float(&x) {
+                Some(f) => Ok(Value::Float(f)),
+                None => Err(Error::GetFloat(ValueKind::String)),
+            },
             other => Err(Error::GetFloat(other.into())),
         }
     }
@@ -918,10 +922,25 @@ impl Value {
             Value::Long(n) => Ok(Value::Double(n as f64)),
             Value::Float(x) => Ok(Value::Double(f64::from(x))),
             Value::Double(x) => Ok(Value::Double(x)),
+            Value::String(x) => match Self::parse_special_float(&x) {
+                Some(f) => Ok(Value::Double(f.into())),
+                None => Err(Error::GetDouble(ValueKind::String)),
+            },
             other => Err(Error::GetDouble(other.into())),
         }
     }
 
+    /// IEEE 754 NaN and infinities are not valid JSON numbers.
+    /// So they are represented in JSON as strings.
+    fn parse_special_float(s: &str) -> Option<f32> {
+        match s.trim().to_ascii_lowercase().as_str() {
+            "nan" | "+nan" | "-nan" => Some(f32::NAN),
+            "inf" | "+inf" | "infinity" | "+infinity" => Some(f32::INFINITY),
+            "-inf" | "-infinity" => Some(f32::NEG_INFINITY),
+            _ => None,
+        }
+    }
+
     fn resolve_bytes(self) -> Result<Self, Error> {
         match self {
             Value::Bytes(bytes) => Ok(Value::Bytes(bytes)),

diff --git a/lang/rust/avro/tests/io.rs b/lang/rust/avro/tests/io.rs
@@ -107,6 +107,14 @@ fn default_value_examples() -> &'static Vec<(&'static str, &'static str, Value)>
             (r#""long""#, "5", Value::Long(5)),
             (r#""float""#, "1.1", Value::Float(1.1)),
             (r#""double""#, "1.1", Value::Double(1.1)),
+            (r#""float""#, r#""  +inf ""#, Value::Float(f32::INFINITY)),
+            (
+                r#""double""#,
+                r#""-Infinity""#,
+                Value::Double(f64::NEG_INFINITY),
+            ),
+            (r#""float""#, r#""-NAN""#, Value::Float(f32::NAN)),
+            (r#""double""#, r#""-NAN""#, Value::Double(f64::NAN)),
             (
                 r#"{"type": "fixed", "name": "F", "size": 2}"#,
                 r#""a""#,
@@ -312,11 +320,41 @@ fn test_default_value() -> TestResult {
             &mut Cursor::new(encoded),
             Some(&reader_schema),
         )?;
-        assert_eq!(
-            datum_read, datum_to_read,
-            "{} -> {}",
-            *field_type, *default_json
-        );
+
+        match default_datum {
+            // For float/double, NaN != NaN, so we check specially here.
+            Value::Double(f) if f.is_nan() => {
+                let Value::Record(fields) = datum_read else {
+                    unreachable!("the test always constructs top level as record")
+                };
+                let Value::Double(f) = fields[0].1 else {
+                    panic!("double expected")
+                };
+                assert!(
+                    f.is_nan(),
+                    "{field_type} -> {default_json} is parsed as {f} rather than NaN"
+                );
+            }
+            Value::Float(f) if f.is_nan() => {
+                let Value::Record(fields) = datum_read else {
+                    unreachable!("the test always constructs top level as record")
+                };
+                let Value::Float(f) = fields[0].1 else {
+                    panic!("double expected")
+                };
+                assert!(
+                    f.is_nan(),
+                    "{field_type} -> {default_json} is parsed as {f} rather than NaN"
+                );
+            }
+            _ => {
+                assert_eq!(
+                    datum_read, datum_to_read,
+                    "{} -> {}",
+                    *field_type, *default_json
+                );
+            }
+        }
     }
 
     Ok(())