From d3c30cef1ea9bfb30687bf55fa35edf4f77b5a74 Mon Sep 17 00:00:00 2001 From: chielP Date: Thu, 24 Aug 2023 11:15:05 +0200 Subject: [PATCH] add check for non-numeric characters --- crates/polars-arrow/src/compute/decimal.rs | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/crates/polars-arrow/src/compute/decimal.rs b/crates/polars-arrow/src/compute/decimal.rs index cb265d43d75e..2af5037a2a86 100644 --- a/crates/polars-arrow/src/compute/decimal.rs +++ b/crates/polars-arrow/src/compute/decimal.rs @@ -15,6 +15,12 @@ fn split_decimal_bytes(bytes: &[u8]) -> (Option<&[u8]>, Option<&[u8]>) { (lhs, rhs) } +fn is_numeric(bytes: &[u8]) -> bool { + bytes + .iter() + .all(|b| matches!(*b, b'0'..=b'9' | b'.' | b'+' | b'-')) +} + pub fn infer_scale(bytes: &[u8]) -> Option { let (_lhs, rhs) = split_decimal_bytes(bytes); rhs.map(significant_digits) @@ -26,6 +32,9 @@ pub fn infer_scale(bytes: &[u8]) -> Option { pub(super) fn deserialize_decimal(bytes: &[u8], precision: Option, scale: u8) -> Option { let (lhs, rhs) = split_decimal_bytes(bytes); let precision = precision.unwrap_or(u8::MAX); + if !is_numeric(bytes) { + return None; + } match (lhs, rhs) { (Some(lhs), Some(rhs)) => atoi::(lhs).and_then(|x| { atoi::(rhs) @@ -127,5 +136,14 @@ mod test { deserialize_decimal(val.as_bytes(), precision, scale), Some(1000000000000000000) ); + let scale = 5; + let val = "12ABC.34"; + assert_eq!(deserialize_decimal(val.as_bytes(), precision, scale), None); + + let val = "1ABC2.34"; + assert_eq!(deserialize_decimal(val.as_bytes(), precision, scale), None); + + let val = "12.3ABC4"; + assert_eq!(deserialize_decimal(val.as_bytes(), precision, scale), None); } }