From a3851a404c61220ad69cd72833a78f564031c3f9 Mon Sep 17 00:00:00 2001 From: Samuel Hicks Date: Tue, 16 Apr 2024 10:42:49 +0100 Subject: [PATCH 01/13] De-Escaping strings using a provided buffer --- Cargo.toml | 1 + src/de/enum_.rs | 24 +++--- src/de/map.rs | 16 ++-- src/de/mod.rs | 204 ++++++++++++++++++++++++++++++++++++++++-------- src/de/seq.rs | 10 +-- src/lib.rs | 5 +- src/ser/mod.rs | 6 +- 7 files changed, 203 insertions(+), 63 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index a17db23f8..7365633de 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,6 +17,7 @@ ryu = "1.0.5" [dependencies.heapless] version = "0.8" +features = ["serde"] optional = true [dependencies.serde] diff --git a/src/de/enum_.rs b/src/de/enum_.rs index 9012873a5..2c3c6eb69 100644 --- a/src/de/enum_.rs +++ b/src/de/enum_.rs @@ -2,17 +2,17 @@ use serde::de; use crate::de::{Deserializer, Error, Result}; -pub(crate) struct UnitVariantAccess<'a, 'b> { - de: &'a mut Deserializer<'b>, +pub(crate) struct UnitVariantAccess<'a, 'b, 's> { + de: &'a mut Deserializer<'b, 's>, } -impl<'a, 'b> UnitVariantAccess<'a, 'b> { - pub(crate) fn new(de: &'a mut Deserializer<'b>) -> Self { +impl<'a, 'b, 's> UnitVariantAccess<'a, 'b, 's> { + pub(crate) fn new(de: &'a mut Deserializer<'b, 's>) -> Self { UnitVariantAccess { de } } } -impl<'a, 'de> de::EnumAccess<'de> for UnitVariantAccess<'a, 'de> { +impl<'a, 'de, 's> de::EnumAccess<'de> for UnitVariantAccess<'a, 'de, 's> { type Error = Error; type Variant = Self; @@ -25,7 +25,7 @@ impl<'a, 'de> de::EnumAccess<'de> for UnitVariantAccess<'a, 'de> { } } -impl<'de, 'a> de::VariantAccess<'de> for UnitVariantAccess<'a, 'de> { +impl<'de, 'a, 's> de::VariantAccess<'de> for UnitVariantAccess<'a, 'de, 's> { type Error = Error; fn unit_variant(self) -> Result<()> { @@ -54,17 +54,17 @@ impl<'de, 'a> de::VariantAccess<'de> for UnitVariantAccess<'a, 'de> { } } -pub(crate) struct VariantAccess<'a, 'b> { - de: &'a mut Deserializer<'b>, +pub(crate) struct VariantAccess<'a, 'b, 's> { + de: &'a mut Deserializer<'b, 's>, } -impl<'a, 'b> VariantAccess<'a, 'b> { - pub(crate) fn new(de: &'a mut Deserializer<'b>) -> Self { +impl<'a, 'b, 's> VariantAccess<'a, 'b, 's> { + pub(crate) fn new(de: &'a mut Deserializer<'b, 's>) -> Self { VariantAccess { de } } } -impl<'a, 'de> de::EnumAccess<'de> for VariantAccess<'a, 'de> { +impl<'a, 'de, 's> de::EnumAccess<'de> for VariantAccess<'a, 'de, 's> { type Error = Error; type Variant = Self; @@ -78,7 +78,7 @@ impl<'a, 'de> de::EnumAccess<'de> for VariantAccess<'a, 'de> { } } -impl<'de, 'a> de::VariantAccess<'de> for VariantAccess<'a, 'de> { +impl<'de, 'a, 's> de::VariantAccess<'de> for VariantAccess<'a, 'de, 's> { type Error = Error; fn unit_variant(self) -> Result<()> { diff --git a/src/de/map.rs b/src/de/map.rs index 7da47d3f3..c38c81d8a 100644 --- a/src/de/map.rs +++ b/src/de/map.rs @@ -2,18 +2,18 @@ use serde::de::{self, Visitor}; use crate::de::{Deserializer, Error}; -pub struct MapAccess<'a, 'b> { - de: &'a mut Deserializer<'b>, +pub struct MapAccess<'a, 'b, 's> { + de: &'a mut Deserializer<'b, 's>, first: bool, } -impl<'a, 'b> MapAccess<'a, 'b> { - pub(crate) fn new(de: &'a mut Deserializer<'b>) -> Self { +impl<'a, 'b, 's> MapAccess<'a, 'b, 's> { + pub(crate) fn new(de: &'a mut Deserializer<'b, 's>) -> Self { MapAccess { de, first: true } } } -impl<'a, 'de> de::MapAccess<'de> for MapAccess<'a, 'de> { +impl<'a, 'de, 's> de::MapAccess<'de> for MapAccess<'a, 'de, 's> { type Error = Error; fn next_key_seed(&mut self, seed: K) -> Result, Error> @@ -57,11 +57,11 @@ impl<'a, 'de> de::MapAccess<'de> for MapAccess<'a, 'de> { } } -struct MapKey<'a, 'b> { - de: &'a mut Deserializer<'b>, +struct MapKey<'a, 'b, 's> { + de: &'a mut Deserializer<'b, 's>, } -impl<'de, 'a> de::Deserializer<'de> for MapKey<'a, 'de> { +impl<'de, 'a, 's> de::Deserializer<'de> for MapKey<'a, 'de, 's> { type Error = Error; fn deserialize_any(self, _visitor: V) -> Result diff --git a/src/de/mod.rs b/src/de/mod.rs index b44ddb794..10d3ff24a 100644 --- a/src/de/mod.rs +++ b/src/de/mod.rs @@ -22,6 +22,12 @@ pub type Result = core::result::Result; #[cfg_attr(feature = "defmt", derive(defmt::Format))] #[non_exhaustive] pub enum Error { + /// Can’t parse a value without knowing its expected type. + AnyIsUnsupported, + + /// Cannot parse a sequence of bytes. + BytesIsUnsupported, + /// EOF while parsing a list. EofWhileParsingList, @@ -61,6 +67,12 @@ pub enum Error { /// Invalid unicode code point. InvalidUnicodeCodePoint, + /// Invalid String Escape Sequence + InvalidEscapeSequence, + + /// Escaped String length exceeds buffer size + EscapedStringIsTooLong, + /// Object key is not a string. KeyMustBeAString, @@ -83,15 +95,20 @@ pub enum Error { impl serde::de::StdError for Error {} /// A structure that deserializes Rust values from JSON in a buffer. -pub struct Deserializer<'b> { +pub struct Deserializer<'b, 's> { slice: &'b [u8], index: usize, + string_unescape_buffer: &'s mut [u8], } -impl<'a> Deserializer<'a> { +impl<'a, 's> Deserializer<'a, 's> { /// Create a new `Deserializer` - pub fn new(slice: &'a [u8]) -> Deserializer<'_> { - Deserializer { slice, index: 0 } + pub fn new(slice: &'a [u8], string_unescape_buffer: &'s mut [u8]) -> Deserializer<'a, 's> { + Deserializer { + slice, + index: 0, + string_unescape_buffer, + } } fn eat_char(&mut self) { @@ -204,6 +221,7 @@ impl<'a> Deserializer<'a> { } else { let end = self.index; self.eat_char(); + return str::from_utf8(&self.slice[start..end]) .map_err(|_| Error::InvalidUnicodeCodePoint); } @@ -343,7 +361,7 @@ macro_rules! deserialize_fromstr { }}; } -impl<'a, 'de> de::Deserializer<'de> for &'a mut Deserializer<'de> { +impl<'a, 'de, 's> de::Deserializer<'de> for &'a mut Deserializer<'de, 's> { type Error = Error; /// Unsupported. Can’t parse a value without knowing its expected type. @@ -351,7 +369,7 @@ impl<'a, 'de> de::Deserializer<'de> for &'a mut Deserializer<'de> { where V: Visitor<'de>, { - unreachable!() + Err(Error::AnyIsUnsupported) } fn deserialize_bool(self, visitor: V) -> Result @@ -445,11 +463,11 @@ impl<'a, 'de> de::Deserializer<'de> for &'a mut Deserializer<'de> { deserialize_fromstr!(self, visitor, f64, visit_f64, b"0123456789+-.eE") } - fn deserialize_char(self, _visitor: V) -> Result + fn deserialize_char(self, visitor: V) -> Result where V: Visitor<'de>, { - unreachable!() + self.deserialize_str(visitor) } fn deserialize_str(self, visitor: V) -> Result @@ -461,18 +479,94 @@ impl<'a, 'de> de::Deserializer<'de> for &'a mut Deserializer<'de> { match peek { b'"' => { self.eat_char(); - visitor.visit_borrowed_str(self.parse_str()?) + + let s = self.parse_str()?; + + if s.as_bytes().contains(&b'\\') { + let mut string_unescape_buffer_slots = self.string_unescape_buffer.iter_mut(); + + // We've already checked that the string is valid UTF-8, so the only b'\\' is the start of escape sequence + let mut escaped_string_bytes = s.as_bytes().iter(); + + loop { + match escaped_string_bytes.next().copied() { + None => break, + Some(b'\\') => { + let unescaped_byte = match escaped_string_bytes.next() { + Some(b'"') => b'"', + Some(b'\\') => b'\\', + Some(b'/') => b'/', + Some(b'b') => 0x8, + Some(b'f') => 0xC, + Some(b'n') => b'\n', + Some(b'r') => b'\r', + Some(b't') => b'\t', + Some(b'u') => { + let (escape_sequence, remaining_escaped_string_bytes) = + escaped_string_bytes + .as_slice() + .split_first_chunk::<4>() + .ok_or(Error::InvalidEscapeSequence)?; + + escaped_string_bytes = + remaining_escaped_string_bytes.iter(); + + let unescaped_char = core::str::from_utf8(escape_sequence) + .ok() + .and_then(|escape_sequence| { + u32::from_str_radix(escape_sequence, 16).ok() + }) + .and_then(char::from_u32) + .ok_or(Error::InvalidEscapeSequence)?; + + for &unescaped_byte in + unescaped_char.encode_utf8(&mut [0; 4]).as_bytes() + { + *string_unescape_buffer_slots + .next() + .ok_or(Error::EscapedStringIsTooLong)? = + unescaped_byte; + } + + continue; + } + _ => return Err(Error::InvalidEscapeSequence), + }; + + *string_unescape_buffer_slots + .next() + .ok_or(Error::EscapedStringIsTooLong)? = unescaped_byte; + } + Some(c) => { + *string_unescape_buffer_slots + .next() + .ok_or(Error::EscapedStringIsTooLong)? = c; + } + } + } + + let remaining_length = string_unescape_buffer_slots.len(); + let unescaped_string_length = + self.string_unescape_buffer.len() - remaining_length; + + visitor.visit_str( + str::from_utf8(&self.string_unescape_buffer[..unescaped_string_length]) + .map_err(|_| Error::InvalidUnicodeCodePoint)?, + ) + } else { + visitor.visit_borrowed_str(s) + } } _ => Err(Error::InvalidType), } } /// Unsupported. String is not available in no-std. - fn deserialize_string(self, _visitor: V) -> Result + fn deserialize_string(self, visitor: V) -> Result where V: Visitor<'de>, { - unreachable!() + self.deserialize_str(visitor) } /// Unsupported @@ -480,7 +574,7 @@ impl<'a, 'de> de::Deserializer<'de> for &'a mut Deserializer<'de> { where V: Visitor<'de>, { - unreachable!() + Err(Error::BytesIsUnsupported) } /// Unsupported @@ -488,7 +582,7 @@ impl<'a, 'de> de::Deserializer<'de> for &'a mut Deserializer<'de> { where V: Visitor<'de>, { - unreachable!() + Err(Error::BytesIsUnsupported) } fn deserialize_option(self, visitor: V) -> Result @@ -734,19 +828,42 @@ impl fmt::Display for Error { } } -/// Deserializes an instance of type `T` from bytes of JSON text +/// Deserializes an instance of type `T` from bytes of JSON text, using the provided buffer to unescape strings /// Returns the value and the number of bytes consumed in the process -pub fn from_slice<'a, T>(v: &'a [u8]) -> Result<(T, usize)> +pub fn from_slice_using_string_unescape_buffer<'a, T>( + v: &'a [u8], + string_unescape_buffer: &mut [u8], +) -> Result<(T, usize)> where T: de::Deserialize<'a>, { - let mut de = Deserializer::new(v); + let mut de = Deserializer::new(v, string_unescape_buffer); let value = de::Deserialize::deserialize(&mut de)?; let length = de.end()?; Ok((value, length)) } +/// Deserializes an instance of type `T` from bytes of JSON text +/// Returns the value and the number of bytes consumed in the process +pub fn from_slice<'a, T>(v: &'a [u8]) -> Result<(T, usize)> +where + T: de::Deserialize<'a>, +{ + from_slice_using_string_unescape_buffer(v, &mut [0; 128]) +} + +/// Deserializes an instance of type T from a string of JSON text, using the provided buffer to unescape strings +pub fn from_str_using_string_unescape_buffer<'a, T>( + s: &'a str, + string_unescape_buffer: &mut [u8], +) -> Result<(T, usize)> +where + T: de::Deserialize<'a>, +{ + from_slice_using_string_unescape_buffer(s.as_bytes(), string_unescape_buffer) +} + /// Deserializes an instance of type T from a string of JSON text pub fn from_str<'a, T>(s: &'a str) -> Result<(T, usize)> where @@ -757,7 +874,6 @@ where #[cfg(test)] mod tests { - use core::str::FromStr; use serde_derive::Deserialize; #[derive(Debug, Deserialize, PartialEq)] @@ -819,49 +935,69 @@ mod tests { assert_eq!(crate::from_str(r#" "thing" "#), Ok((Type::Thing, 9))); } + #[test] + fn char() { + assert_eq!(crate::from_str("\"n\""), Ok(('n', 3))); + assert_eq!(crate::from_str("\"\\\"\""), Ok(('"', 4))); + assert_eq!(crate::from_str("\"\\\\\""), Ok(('\\', 4))); + assert_eq!(crate::from_str("\"/\""), Ok(('/', 3))); + assert_eq!(crate::from_str("\"\\b\""), Ok(('\x08', 4))); + assert_eq!(crate::from_str("\"\\f\""), Ok(('\x0C', 4))); + assert_eq!(crate::from_str("\"\\n\""), Ok(('\n', 4))); + assert_eq!(crate::from_str("\"\\r\""), Ok(('\r', 4))); + assert_eq!(crate::from_str("\"\\t\""), Ok(('\t', 4))); + assert_eq!(crate::from_str("\"\\u000b\""), Ok(('\x0B', 8))); + assert_eq!(crate::from_str("\"\\u000B\""), Ok(('\x0B', 8))); + assert_eq!(crate::from_str("\"\u{3A3}\""), Ok(('\u{3A3}', 4))); + } + #[test] fn str() { + // No escaping, so can borrow from the input assert_eq!(crate::from_str(r#" "hello" "#), Ok(("hello", 9))); assert_eq!(crate::from_str(r#" "" "#), Ok(("", 4))); assert_eq!(crate::from_str(r#" " " "#), Ok((" ", 5))); assert_eq!(crate::from_str(r#" "👏" "#), Ok(("👏", 8))); - // no unescaping is done (as documented as a known issue in lib.rs) - assert_eq!(crate::from_str(r#" "hel\tlo" "#), Ok(("hel\\tlo", 11))); - assert_eq!(crate::from_str(r#" "hello \\" "#), Ok(("hello \\\\", 12))); + fn s(s: &'static str) -> heapless::String<1024> { + core::str::FromStr::from_str(s).expect("Failed to create test string") + } // escaped " in the string content - assert_eq!(crate::from_str(r#" "foo\"bar" "#), Ok((r#"foo\"bar"#, 12))); + assert_eq!( + crate::from_str(r#" "foo\"bar" "#), + Ok((s(r#"foo"bar"#), 12)) + ); assert_eq!( crate::from_str(r#" "foo\\\"bar" "#), - Ok((r#"foo\\\"bar"#, 14)) + Ok((s(r#"foo\"bar"#), 14)) ); assert_eq!( crate::from_str(r#" "foo\"\"bar" "#), - Ok((r#"foo\"\"bar"#, 14)) + Ok((s(r#"foo""bar"#), 14)) ); - assert_eq!(crate::from_str(r#" "\"bar" "#), Ok((r#"\"bar"#, 9))); - assert_eq!(crate::from_str(r#" "foo\"" "#), Ok((r#"foo\""#, 9))); - assert_eq!(crate::from_str(r#" "\"" "#), Ok((r#"\""#, 6))); + assert_eq!(crate::from_str(r#" "\"bar" "#), Ok((s(r#""bar"#), 9))); + assert_eq!(crate::from_str(r#" "foo\"" "#), Ok((s(r#"foo""#), 9))); + assert_eq!(crate::from_str(r#" "\"" "#), Ok((s(r#"""#), 6))); // non-excaped " preceded by backslashes assert_eq!( crate::from_str(r#" "foo bar\\" "#), - Ok((r#"foo bar\\"#, 13)) + Ok((s(r#"foo bar\"#), 13)) ); assert_eq!( crate::from_str(r#" "foo bar\\\\" "#), - Ok((r#"foo bar\\\\"#, 15)) + Ok((s(r#"foo bar\\"#), 15)) ); assert_eq!( crate::from_str(r#" "foo bar\\\\\\" "#), - Ok((r#"foo bar\\\\\\"#, 17)) + Ok((s(r#"foo bar\\\"#), 17)) ); assert_eq!( crate::from_str(r#" "foo bar\\\\\\\\" "#), - Ok((r#"foo bar\\\\\\\\"#, 19)) + Ok((s(r#"foo bar\\\\"#), 19)) ); - assert_eq!(crate::from_str(r#" "\\" "#), Ok((r#"\\"#, 6))); + assert_eq!(crate::from_str(r#" "\\" "#), Ok((s(r#"\"#), 6))); } #[test] @@ -1084,7 +1220,7 @@ mod tests { assert_eq!( crate::from_str::(r#"[10]"#), Err(crate::de::Error::CustomErrorWithMessage( - heapless::String::from_str( + core::str::FromStr::from_str( "invalid length 1, expected tuple struct Xy with 2 elements" ) .unwrap() @@ -1195,7 +1331,7 @@ mod tests { assert_eq!( crate::de::Error::custom("something bad happened"), crate::de::Error::CustomErrorWithMessage( - heapless::String::from_str("something bad happened").unwrap() + core::str::FromStr::from_str("something bad happened").unwrap() ) ); } @@ -1206,7 +1342,7 @@ mod tests { use serde::de::Error; assert_eq!( crate::de::Error::custom("0123456789012345678901234567890123456789012345678901234567890123 <- after here the message should be truncated"), - crate::de::Error::CustomErrorWithMessage(heapless::String::from_str( + crate::de::Error::CustomErrorWithMessage(core::str::FromStr::from_str( "0123456789012345678901234567890123456789012345678901234567890123").unwrap() ) ); diff --git a/src/de/seq.rs b/src/de/seq.rs index f3dbbdca3..1470a5af9 100644 --- a/src/de/seq.rs +++ b/src/de/seq.rs @@ -2,18 +2,18 @@ use serde::de; use crate::de::{Deserializer, Error, Result}; -pub(crate) struct SeqAccess<'a, 'b> { +pub(crate) struct SeqAccess<'a, 'b, 's> { first: bool, - de: &'a mut Deserializer<'b>, + de: &'a mut Deserializer<'b, 's>, } -impl<'a, 'b> SeqAccess<'a, 'b> { - pub fn new(de: &'a mut Deserializer<'b>) -> Self { +impl<'a, 'b, 's> SeqAccess<'a, 'b, 's> { + pub fn new(de: &'a mut Deserializer<'b, 's>) -> Self { SeqAccess { de, first: true } } } -impl<'a, 'de> de::SeqAccess<'de> for SeqAccess<'a, 'de> { +impl<'a, 'de, 's> de::SeqAccess<'de> for SeqAccess<'a, 'de, 's> { type Error = Error; fn next_element_seed(&mut self, seed: T) -> Result> diff --git a/src/lib.rs b/src/lib.rs index 7f2de3a37..65a437a4b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -65,7 +65,10 @@ pub mod de; pub mod ser; #[doc(inline)] -pub use self::de::{from_slice, from_str}; +pub use self::de::{ + from_slice, from_slice_using_string_unescape_buffer, from_str, + from_str_using_string_unescape_buffer, +}; #[doc(inline)] pub use self::ser::to_slice; #[cfg(feature = "heapless")] diff --git a/src/ser/mod.rs b/src/ser/mod.rs index cbec8a109..286cf8349 100644 --- a/src/ser/mod.rs +++ b/src/ser/mod.rs @@ -714,7 +714,7 @@ mod tests { assert_eq!( &*crate::to_string::<_, N>(&Temperature { - temperature: -2.3456789012345e-23 + temperature: -2.3456788e-23 }) .unwrap(), r#"{"temperature":-2.3456788e-23}"# @@ -870,7 +870,7 @@ mod tests { { let mut aux: String<{ N }> = String::new(); write!(aux, "{:.2}", self.0).unwrap(); - serializer.serialize_bytes(&aux.as_bytes()) + serializer.serialize_bytes(aux.as_bytes()) } } @@ -880,7 +880,7 @@ mod tests { let sd2 = SimpleDecimal(0.000); assert_eq!(&*crate::to_string::<_, N>(&sd2).unwrap(), r#"0.00"#); - let sd3 = SimpleDecimal(22222.777777); + let sd3 = SimpleDecimal(22222.78); assert_eq!(&*crate::to_string::<_, N>(&sd3).unwrap(), r#"22222.78"#); } } From c6e7e385d137ff2199f1824e5edf478c0b838c17 Mon Sep 17 00:00:00 2001 From: Samuel Hicks Date: Wed, 24 Jul 2024 19:07:58 +0100 Subject: [PATCH 02/13] Using `r#""#` syntax for char unit tests --- src/de/mod.rs | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/de/mod.rs b/src/de/mod.rs index 10d3ff24a..4d09d5858 100644 --- a/src/de/mod.rs +++ b/src/de/mod.rs @@ -937,18 +937,18 @@ mod tests { #[test] fn char() { - assert_eq!(crate::from_str("\"n\""), Ok(('n', 3))); - assert_eq!(crate::from_str("\"\\\"\""), Ok(('"', 4))); - assert_eq!(crate::from_str("\"\\\\\""), Ok(('\\', 4))); - assert_eq!(crate::from_str("\"/\""), Ok(('/', 3))); - assert_eq!(crate::from_str("\"\\b\""), Ok(('\x08', 4))); - assert_eq!(crate::from_str("\"\\f\""), Ok(('\x0C', 4))); - assert_eq!(crate::from_str("\"\\n\""), Ok(('\n', 4))); - assert_eq!(crate::from_str("\"\\r\""), Ok(('\r', 4))); - assert_eq!(crate::from_str("\"\\t\""), Ok(('\t', 4))); - assert_eq!(crate::from_str("\"\\u000b\""), Ok(('\x0B', 8))); - assert_eq!(crate::from_str("\"\\u000B\""), Ok(('\x0B', 8))); - assert_eq!(crate::from_str("\"\u{3A3}\""), Ok(('\u{3A3}', 4))); + assert_eq!(crate::from_str(r#""n""#), Ok(('n', 3))); + assert_eq!(crate::from_str(r#""\"""#), Ok(('"', 4))); + assert_eq!(crate::from_str(r#""\\""#), Ok(('\\', 4))); + assert_eq!(crate::from_str(r#""/""#), Ok(('/', 3))); + assert_eq!(crate::from_str(r#""\b""#), Ok(('\x08', 4))); + assert_eq!(crate::from_str(r#""\f""#), Ok(('\x0C', 4))); + assert_eq!(crate::from_str(r#""\n""#), Ok(('\n', 4))); + assert_eq!(crate::from_str(r#""\r""#), Ok(('\r', 4))); + assert_eq!(crate::from_str(r#""\t""#), Ok(('\t', 4))); + assert_eq!(crate::from_str(r#""\u000b""#), Ok(('\x0B', 8))); + assert_eq!(crate::from_str(r#""\u000B""#), Ok(('\x0B', 8))); + assert_eq!(crate::from_str(r#""Σ""#), Ok(('Σ', 4))); } #[test] From 35765f4c59dfac07a21c15797411bd9e5a81450e Mon Sep 17 00:00:00 2001 From: Samuel Hicks Date: Wed, 24 Jul 2024 19:33:33 +0100 Subject: [PATCH 03/13] The default unescaping buffer length is now 0, not 128, with an error case specifying if a buffer is required --- src/de/mod.rs | 104 +++++++++++++++++++++++++++++++++++++------------- 1 file changed, 77 insertions(+), 27 deletions(-) diff --git a/src/de/mod.rs b/src/de/mod.rs index 4d09d5858..7c510bfce 100644 --- a/src/de/mod.rs +++ b/src/de/mod.rs @@ -70,6 +70,9 @@ pub enum Error { /// Invalid String Escape Sequence InvalidEscapeSequence, + /// Unescaping and Escaped String requires a buffer + EscapedStringRequiresBuffer, + /// Escaped String length exceeds buffer size EscapedStringIsTooLong, @@ -850,7 +853,13 @@ pub fn from_slice<'a, T>(v: &'a [u8]) -> Result<(T, usize)> where T: de::Deserialize<'a>, { - from_slice_using_string_unescape_buffer(v, &mut [0; 128]) + from_slice_using_string_unescape_buffer(v, &mut []).map_err(|error| { + if let Error::EscapedStringIsTooLong = error { + Error::EscapedStringRequiresBuffer + } else { + error + } + }) } /// Deserializes an instance of type T from a string of JSON text, using the provided buffer to unescape strings @@ -937,18 +946,24 @@ mod tests { #[test] fn char() { - assert_eq!(crate::from_str(r#""n""#), Ok(('n', 3))); - assert_eq!(crate::from_str(r#""\"""#), Ok(('"', 4))); - assert_eq!(crate::from_str(r#""\\""#), Ok(('\\', 4))); - assert_eq!(crate::from_str(r#""/""#), Ok(('/', 3))); - assert_eq!(crate::from_str(r#""\b""#), Ok(('\x08', 4))); - assert_eq!(crate::from_str(r#""\f""#), Ok(('\x0C', 4))); - assert_eq!(crate::from_str(r#""\n""#), Ok(('\n', 4))); - assert_eq!(crate::from_str(r#""\r""#), Ok(('\r', 4))); - assert_eq!(crate::from_str(r#""\t""#), Ok(('\t', 4))); - assert_eq!(crate::from_str(r#""\u000b""#), Ok(('\x0B', 8))); - assert_eq!(crate::from_str(r#""\u000B""#), Ok(('\x0B', 8))); - assert_eq!(crate::from_str(r#""Σ""#), Ok(('Σ', 4))); + fn from_str_test<'de, T: serde::Deserialize<'de>>( + s: &'de str, + ) -> super::Result<(T, usize)> { + crate::from_str_using_string_unescape_buffer(s, &mut [0; 8]) + } + + assert_eq!(from_str_test(r#""n""#), Ok(('n', 3))); + assert_eq!(from_str_test(r#""\"""#), Ok(('"', 4))); + assert_eq!(from_str_test(r#""\\""#), Ok(('\\', 4))); + assert_eq!(from_str_test(r#""/""#), Ok(('/', 3))); + assert_eq!(from_str_test(r#""\b""#), Ok(('\x08', 4))); + assert_eq!(from_str_test(r#""\f""#), Ok(('\x0C', 4))); + assert_eq!(from_str_test(r#""\n""#), Ok(('\n', 4))); + assert_eq!(from_str_test(r#""\r""#), Ok(('\r', 4))); + assert_eq!(from_str_test(r#""\t""#), Ok(('\t', 4))); + assert_eq!(from_str_test(r#""\u000b""#), Ok(('\x0B', 8))); + assert_eq!(from_str_test(r#""\u000B""#), Ok(('\x0B', 8))); + assert_eq!(from_str_test(r#""Σ""#), Ok(('Σ', 4))); } #[test] @@ -963,41 +978,76 @@ mod tests { core::str::FromStr::from_str(s).expect("Failed to create test string") } + fn from_str_test<'de, T: serde::Deserialize<'de>>( + s: &'de str, + ) -> super::Result<(T, usize)> { + crate::from_str_using_string_unescape_buffer(s, &mut [0; 16]) + } + // escaped " in the string content + assert_eq!(from_str_test(r#" "foo\"bar" "#), Ok((s(r#"foo"bar"#), 12))); assert_eq!( - crate::from_str(r#" "foo\"bar" "#), - Ok((s(r#"foo"bar"#), 12)) - ); - assert_eq!( - crate::from_str(r#" "foo\\\"bar" "#), + from_str_test(r#" "foo\\\"bar" "#), Ok((s(r#"foo\"bar"#), 14)) ); assert_eq!( - crate::from_str(r#" "foo\"\"bar" "#), + from_str_test(r#" "foo\"\"bar" "#), Ok((s(r#"foo""bar"#), 14)) ); - assert_eq!(crate::from_str(r#" "\"bar" "#), Ok((s(r#""bar"#), 9))); - assert_eq!(crate::from_str(r#" "foo\"" "#), Ok((s(r#"foo""#), 9))); - assert_eq!(crate::from_str(r#" "\"" "#), Ok((s(r#"""#), 6))); + assert_eq!(from_str_test(r#" "\"bar" "#), Ok((s(r#""bar"#), 9))); + assert_eq!(from_str_test(r#" "foo\"" "#), Ok((s(r#"foo""#), 9))); + assert_eq!(from_str_test(r#" "\"" "#), Ok((s(r#"""#), 6))); // non-excaped " preceded by backslashes assert_eq!( - crate::from_str(r#" "foo bar\\" "#), + from_str_test(r#" "foo bar\\" "#), Ok((s(r#"foo bar\"#), 13)) ); assert_eq!( - crate::from_str(r#" "foo bar\\\\" "#), + from_str_test(r#" "foo bar\\\\" "#), Ok((s(r#"foo bar\\"#), 15)) ); assert_eq!( - crate::from_str(r#" "foo bar\\\\\\" "#), + from_str_test(r#" "foo bar\\\\\\" "#), Ok((s(r#"foo bar\\\"#), 17)) ); assert_eq!( - crate::from_str(r#" "foo bar\\\\\\\\" "#), + from_str_test(r#" "foo bar\\\\\\\\" "#), Ok((s(r#"foo bar\\\\"#), 19)) ); - assert_eq!(crate::from_str(r#" "\\" "#), Ok((s(r#"\"#), 6))); + assert_eq!(from_str_test(r#" "\\" "#), Ok((s(r#"\"#), 6))); + } + + #[test] + fn tuple_of_str() { + fn s(s: &'static str) -> heapless::String<1024> { + core::str::FromStr::from_str(s).expect("Failed to create test string") + } + + fn from_str_test<'de, T: serde::Deserialize<'de>>( + s: &'de str, + ) -> super::Result<(T, usize)> { + crate::from_str_using_string_unescape_buffer(s, &mut [0; 16]) + } + + // The combined length of the first and third strings are longer than the buffer, but that's OK, + // as escaped strings are deserialized into owned str types, e.g. `heapless::String`. + // The second string is longer than the buffer, but that's OK, as strings which aren't escaped + // are deserialized as str's borrowed from the input + + assert_eq!( + from_str_test( + r#" [ "AAAAAAAAAAAA\n", "BBBBBBBBBBBBBBBBBBBBBBBB", "CCCCCCCCCCCC\n" ] "# + ), + Ok(( + ( + s("AAAAAAAAAAAA\n"), + "BBBBBBBBBBBBBBBBBBBBBBBB", + s("CCCCCCCCCCCC\n") + ), + 68 + )) + ); } #[test] From b8fab524d0e853d85f68cbdb261ea661d14456dc Mon Sep 17 00:00:00 2001 From: Samuel Hicks Date: Wed, 24 Jul 2024 19:45:53 +0100 Subject: [PATCH 04/13] Improved function names `from_str_using_string_unescape_buffer` => `from_str_escaped` `from_slice_using_string_unescape_buffer` => `from_slice_escaped` --- src/de/mod.rs | 17 +++++++---------- src/lib.rs | 5 +---- 2 files changed, 8 insertions(+), 14 deletions(-) diff --git a/src/de/mod.rs b/src/de/mod.rs index 7c510bfce..fbf596588 100644 --- a/src/de/mod.rs +++ b/src/de/mod.rs @@ -833,7 +833,7 @@ impl fmt::Display for Error { /// Deserializes an instance of type `T` from bytes of JSON text, using the provided buffer to unescape strings /// Returns the value and the number of bytes consumed in the process -pub fn from_slice_using_string_unescape_buffer<'a, T>( +pub fn from_slice_escaped<'a, T>( v: &'a [u8], string_unescape_buffer: &mut [u8], ) -> Result<(T, usize)> @@ -853,7 +853,7 @@ pub fn from_slice<'a, T>(v: &'a [u8]) -> Result<(T, usize)> where T: de::Deserialize<'a>, { - from_slice_using_string_unescape_buffer(v, &mut []).map_err(|error| { + from_slice_escaped(v, &mut []).map_err(|error| { if let Error::EscapedStringIsTooLong = error { Error::EscapedStringRequiresBuffer } else { @@ -863,14 +863,11 @@ where } /// Deserializes an instance of type T from a string of JSON text, using the provided buffer to unescape strings -pub fn from_str_using_string_unescape_buffer<'a, T>( - s: &'a str, - string_unescape_buffer: &mut [u8], -) -> Result<(T, usize)> +pub fn from_str_escaped<'a, T>(s: &'a str, string_unescape_buffer: &mut [u8]) -> Result<(T, usize)> where T: de::Deserialize<'a>, { - from_slice_using_string_unescape_buffer(s.as_bytes(), string_unescape_buffer) + from_slice_escaped(s.as_bytes(), string_unescape_buffer) } /// Deserializes an instance of type T from a string of JSON text @@ -949,7 +946,7 @@ mod tests { fn from_str_test<'de, T: serde::Deserialize<'de>>( s: &'de str, ) -> super::Result<(T, usize)> { - crate::from_str_using_string_unescape_buffer(s, &mut [0; 8]) + crate::from_str_escaped(s, &mut [0; 8]) } assert_eq!(from_str_test(r#""n""#), Ok(('n', 3))); @@ -981,7 +978,7 @@ mod tests { fn from_str_test<'de, T: serde::Deserialize<'de>>( s: &'de str, ) -> super::Result<(T, usize)> { - crate::from_str_using_string_unescape_buffer(s, &mut [0; 16]) + crate::from_str_escaped(s, &mut [0; 16]) } // escaped " in the string content @@ -1027,7 +1024,7 @@ mod tests { fn from_str_test<'de, T: serde::Deserialize<'de>>( s: &'de str, ) -> super::Result<(T, usize)> { - crate::from_str_using_string_unescape_buffer(s, &mut [0; 16]) + crate::from_str_escaped(s, &mut [0; 16]) } // The combined length of the first and third strings are longer than the buffer, but that's OK, diff --git a/src/lib.rs b/src/lib.rs index 65a437a4b..14a2ad032 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -65,10 +65,7 @@ pub mod de; pub mod ser; #[doc(inline)] -pub use self::de::{ - from_slice, from_slice_using_string_unescape_buffer, from_str, - from_str_using_string_unescape_buffer, -}; +pub use self::de::{from_slice, from_slice_escaped, from_str, from_str_escaped}; #[doc(inline)] pub use self::ser::to_slice; #[cfg(feature = "heapless")] From 4c6de3d4a1e38009e7616c3332b778b3a8e74462 Mon Sep 17 00:00:00 2001 From: Samuel Hicks Date: Wed, 24 Jul 2024 20:25:10 +0100 Subject: [PATCH 05/13] Using manual implementation of `<[u8]>::split_first_chunk::<4>` --- src/de/mod.rs | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/de/mod.rs b/src/de/mod.rs index 752cade72..067e57658 100644 --- a/src/de/mod.rs +++ b/src/de/mod.rs @@ -506,10 +506,17 @@ impl<'a, 'de, 's> de::Deserializer<'de> for &'a mut Deserializer<'de, 's> { Some(b'r') => b'\r', Some(b't') => b'\t', Some(b'u') => { + // TODO - Replace with `<[u8]>::split_first_chunk::<4>` once MSRV >= 1.77 + fn split_first_slice( + bytes: &[u8], + len: usize, + ) -> Option<(&[u8], &[u8])> + { + Some((bytes.get(..len)?, bytes.get(len..)?)) + } + let (escape_sequence, remaining_escaped_string_bytes) = - escaped_string_bytes - .as_slice() - .split_first_chunk::<4>() + split_first_slice(escaped_string_bytes.as_slice(), 4) .ok_or(Error::InvalidEscapeSequence)?; escaped_string_bytes = From 01418368a540f05f05bf7cff0576d042c9893614 Mon Sep 17 00:00:00 2001 From: Samuel Hicks Date: Thu, 25 Jul 2024 18:22:16 +0100 Subject: [PATCH 06/13] `from_slice` and `from_str` do not attempt to unescape strings --- src/de/mod.rs | 197 +++++++++++++++++++++++++------------------------- 1 file changed, 100 insertions(+), 97 deletions(-) diff --git a/src/de/mod.rs b/src/de/mod.rs index 067e57658..d7618a695 100644 --- a/src/de/mod.rs +++ b/src/de/mod.rs @@ -71,9 +71,6 @@ pub enum Error { /// Invalid String Escape Sequence InvalidEscapeSequence, - /// Unescaping and Escaped String requires a buffer - EscapedStringRequiresBuffer, - /// Escaped String length exceeds buffer size EscapedStringIsTooLong, @@ -102,12 +99,16 @@ impl serde::de::StdError for Error {} pub struct Deserializer<'b, 's> { slice: &'b [u8], index: usize, - string_unescape_buffer: &'s mut [u8], + string_unescape_buffer: Option<&'s mut [u8]>, } impl<'a, 's> Deserializer<'a, 's> { - /// Create a new `Deserializer` - pub fn new(slice: &'a [u8], string_unescape_buffer: &'s mut [u8]) -> Deserializer<'a, 's> { + /// Create a new `Deserializer`, optionally with a buffer to use to unescape strings. + /// If not present, strings are not unescaped. + pub fn new( + slice: &'a [u8], + string_unescape_buffer: Option<&'s mut [u8]>, + ) -> Deserializer<'a, 's> { Deserializer { slice, index: 0, @@ -193,6 +194,12 @@ impl<'a, 's> Deserializer<'a, 's> { } fn parse_str(&mut self) -> Result<&'a str> { + if self.parse_whitespace().ok_or(Error::EofWhileParsingValue)? == b'"' { + self.eat_char(); + } else { + return Err(Error::InvalidType); + } + let start = self.index; loop { match self.peek() { @@ -478,97 +485,89 @@ impl<'a, 'de, 's> de::Deserializer<'de> for &'a mut Deserializer<'de, 's> { where V: Visitor<'de>, { - let peek = self.parse_whitespace().ok_or(Error::EofWhileParsingValue)?; + let s = self.parse_str()?; - match peek { - b'"' => { - self.eat_char(); + if let Some(string_unescape_buffer) = self.string_unescape_buffer.as_deref_mut() { + if s.as_bytes().contains(&b'\\') { + let mut string_unescape_buffer_slots = string_unescape_buffer.iter_mut(); - let s = self.parse_str()?; - - if s.as_bytes().contains(&b'\\') { - let mut string_unescape_buffer_slots = self.string_unescape_buffer.iter_mut(); - - // We've already checked that the string is valid UTF-8, so the only b'\\' is the start of escape sequence - let mut escaped_string_bytes = s.as_bytes().iter(); - - loop { - match escaped_string_bytes.next().copied() { - None => break, - Some(b'\\') => { - let unescaped_byte = match escaped_string_bytes.next() { - Some(b'"') => b'"', - Some(b'\\') => b'\\', - Some(b'/') => b'/', - Some(b'b') => 0x8, - Some(b'f') => 0xC, - Some(b'n') => b'\n', - Some(b'r') => b'\r', - Some(b't') => b'\t', - Some(b'u') => { - // TODO - Replace with `<[u8]>::split_first_chunk::<4>` once MSRV >= 1.77 - fn split_first_slice( - bytes: &[u8], - len: usize, - ) -> Option<(&[u8], &[u8])> - { - Some((bytes.get(..len)?, bytes.get(len..)?)) - } - - let (escape_sequence, remaining_escaped_string_bytes) = - split_first_slice(escaped_string_bytes.as_slice(), 4) - .ok_or(Error::InvalidEscapeSequence)?; - - escaped_string_bytes = - remaining_escaped_string_bytes.iter(); - - let unescaped_char = core::str::from_utf8(escape_sequence) - .ok() - .and_then(|escape_sequence| { - u32::from_str_radix(escape_sequence, 16).ok() - }) - .and_then(char::from_u32) - .ok_or(Error::InvalidEscapeSequence)?; + // We've already checked that the string is valid UTF-8, so the only b'\\' is the start of escape sequence + let mut escaped_string_bytes = s.as_bytes().iter(); + + loop { + match escaped_string_bytes.next().copied() { + None => break, + Some(b'\\') => { + let unescaped_byte = match escaped_string_bytes.next() { + Some(b'"') => b'"', + Some(b'\\') => b'\\', + Some(b'/') => b'/', + Some(b'b') => 0x8, + Some(b'f') => 0xC, + Some(b'n') => b'\n', + Some(b'r') => b'\r', + Some(b't') => b'\t', + Some(b'u') => { + // TODO - Replace with `<[u8]>::split_first_chunk::<4>` once MSRV >= 1.77 + fn split_first_slice( + bytes: &[u8], + len: usize, + ) -> Option<(&[u8], &[u8])> + { + Some((bytes.get(..len)?, bytes.get(len..)?)) + } - for &unescaped_byte in - unescaped_char.encode_utf8(&mut [0; 4]).as_bytes() - { - *string_unescape_buffer_slots - .next() - .ok_or(Error::EscapedStringIsTooLong)? = - unescaped_byte; - } + let (escape_sequence, remaining_escaped_string_bytes) = + split_first_slice(escaped_string_bytes.as_slice(), 4) + .ok_or(Error::InvalidEscapeSequence)?; - continue; + escaped_string_bytes = remaining_escaped_string_bytes.iter(); + + let unescaped_char = core::str::from_utf8(escape_sequence) + .ok() + .and_then(|escape_sequence| { + u32::from_str_radix(escape_sequence, 16).ok() + }) + .and_then(char::from_u32) + .ok_or(Error::InvalidEscapeSequence)?; + + for &unescaped_byte in + unescaped_char.encode_utf8(&mut [0; 4]).as_bytes() + { + *string_unescape_buffer_slots + .next() + .ok_or(Error::EscapedStringIsTooLong)? = unescaped_byte; } - _ => return Err(Error::InvalidEscapeSequence), - }; - *string_unescape_buffer_slots - .next() - .ok_or(Error::EscapedStringIsTooLong)? = unescaped_byte; - } - Some(c) => { - *string_unescape_buffer_slots - .next() - .ok_or(Error::EscapedStringIsTooLong)? = c; - } + continue; + } + _ => return Err(Error::InvalidEscapeSequence), + }; + + *string_unescape_buffer_slots + .next() + .ok_or(Error::EscapedStringIsTooLong)? = unescaped_byte; + } + Some(c) => { + *string_unescape_buffer_slots + .next() + .ok_or(Error::EscapedStringIsTooLong)? = c; } } + } - let remaining_length = string_unescape_buffer_slots.len(); - let unescaped_string_length = - self.string_unescape_buffer.len() - remaining_length; + let remaining_length = string_unescape_buffer_slots.len(); + let unescaped_string_length = string_unescape_buffer.len() - remaining_length; - visitor.visit_str( - str::from_utf8(&self.string_unescape_buffer[..unescaped_string_length]) - .map_err(|_| Error::InvalidUnicodeCodePoint)?, - ) - } else { - visitor.visit_borrowed_str(s) - } + visitor.visit_str( + str::from_utf8(&string_unescape_buffer[..unescaped_string_length]) + .map_err(|_| Error::InvalidUnicodeCodePoint)?, + ) + } else { + visitor.visit_borrowed_str(s) } - _ => Err(Error::InvalidType), + } else { + visitor.visit_borrowed_str(s) } } @@ -839,11 +838,9 @@ impl fmt::Display for Error { } } -/// Deserializes an instance of type `T` from bytes of JSON text, using the provided buffer to unescape strings -/// Returns the value and the number of bytes consumed in the process -pub fn from_slice_escaped<'a, T>( +fn from_slice_maybe_escaped<'a, T>( v: &'a [u8], - string_unescape_buffer: &mut [u8], + string_unescape_buffer: Option<&mut [u8]>, ) -> Result<(T, usize)> where T: de::Deserialize<'a>, @@ -855,19 +852,25 @@ where Ok((value, length)) } +/// Deserializes an instance of type `T` from bytes of JSON text, using the provided buffer to unescape strings +/// Returns the value and the number of bytes consumed in the process +pub fn from_slice_escaped<'a, T>( + v: &'a [u8], + string_unescape_buffer: &mut [u8], +) -> Result<(T, usize)> +where + T: de::Deserialize<'a>, +{ + from_slice_maybe_escaped(v, Some(string_unescape_buffer)) +} + /// Deserializes an instance of type `T` from bytes of JSON text /// Returns the value and the number of bytes consumed in the process pub fn from_slice<'a, T>(v: &'a [u8]) -> Result<(T, usize)> where T: de::Deserialize<'a>, { - from_slice_escaped(v, &mut []).map_err(|error| { - if let Error::EscapedStringIsTooLong = error { - Error::EscapedStringRequiresBuffer - } else { - error - } - }) + from_slice_maybe_escaped(v, None) } /// Deserializes an instance of type T from a string of JSON text, using the provided buffer to unescape strings From 6a7102e9acfb3a6046f42ce6a558886693c4ec1b Mon Sep 17 00:00:00 2001 From: Samuel Hicks Date: Thu, 25 Jul 2024 19:12:34 +0100 Subject: [PATCH 07/13] Created new struct `EscapedString` which borrows escaped strings from the input when deserializing, and has an iterator of unescaped fragments. Useful for zero-copy deserialization. --- src/de/mod.rs | 137 +++++++++++++++++------------------ src/lib.rs | 5 +- src/ser/mod.rs | 191 ++++++++++++++++++++++++++++++++++++++++++++++++- src/str.rs | 139 +++++++++++++++++++++++++++++++++++ 4 files changed, 395 insertions(+), 77 deletions(-) create mode 100644 src/str.rs diff --git a/src/de/mod.rs b/src/de/mod.rs index d7618a695..bf4c8628d 100644 --- a/src/de/mod.rs +++ b/src/de/mod.rs @@ -95,6 +95,14 @@ pub enum Error { impl serde::de::StdError for Error {} +impl From for Error { + fn from(error: crate::str::StringUnescapeError) -> Self { + match error { + crate::str::StringUnescapeError::InvalidEscapeSequence => Self::InvalidEscapeSequence, + } + } +} + /// A structure that deserializes Rust values from JSON in a buffer. pub struct Deserializer<'b, 's> { slice: &'b [u8], @@ -485,89 +493,43 @@ impl<'a, 'de, 's> de::Deserializer<'de> for &'a mut Deserializer<'de, 's> { where V: Visitor<'de>, { - let s = self.parse_str()?; + let escaped_string = self.parse_str()?; if let Some(string_unescape_buffer) = self.string_unescape_buffer.as_deref_mut() { - if s.as_bytes().contains(&b'\\') { - let mut string_unescape_buffer_slots = string_unescape_buffer.iter_mut(); + if escaped_string.as_bytes().contains(&b'\\') { + let mut string_unescape_buffer_write_position = 0; - // We've already checked that the string is valid UTF-8, so the only b'\\' is the start of escape sequence - let mut escaped_string_bytes = s.as_bytes().iter(); + for fragment in crate::str::unescape_fragments(escaped_string) { + let char_encode_buffer = &mut [0; 4]; - loop { - match escaped_string_bytes.next().copied() { - None => break, - Some(b'\\') => { - let unescaped_byte = match escaped_string_bytes.next() { - Some(b'"') => b'"', - Some(b'\\') => b'\\', - Some(b'/') => b'/', - Some(b'b') => 0x8, - Some(b'f') => 0xC, - Some(b'n') => b'\n', - Some(b'r') => b'\r', - Some(b't') => b'\t', - Some(b'u') => { - // TODO - Replace with `<[u8]>::split_first_chunk::<4>` once MSRV >= 1.77 - fn split_first_slice( - bytes: &[u8], - len: usize, - ) -> Option<(&[u8], &[u8])> - { - Some((bytes.get(..len)?, bytes.get(len..)?)) - } - - let (escape_sequence, remaining_escaped_string_bytes) = - split_first_slice(escaped_string_bytes.as_slice(), 4) - .ok_or(Error::InvalidEscapeSequence)?; - - escaped_string_bytes = remaining_escaped_string_bytes.iter(); - - let unescaped_char = core::str::from_utf8(escape_sequence) - .ok() - .and_then(|escape_sequence| { - u32::from_str_radix(escape_sequence, 16).ok() - }) - .and_then(char::from_u32) - .ok_or(Error::InvalidEscapeSequence)?; - - for &unescaped_byte in - unescaped_char.encode_utf8(&mut [0; 4]).as_bytes() - { - *string_unescape_buffer_slots - .next() - .ok_or(Error::EscapedStringIsTooLong)? = unescaped_byte; - } - - continue; - } - _ => return Err(Error::InvalidEscapeSequence), - }; - - *string_unescape_buffer_slots - .next() - .ok_or(Error::EscapedStringIsTooLong)? = unescaped_byte; + let unescaped_bytes = match fragment? { + crate::str::EscapedStringFragment::NotEscaped(fragment) => { + fragment.as_bytes() } - Some(c) => { - *string_unescape_buffer_slots - .next() - .ok_or(Error::EscapedStringIsTooLong)? = c; + crate::str::EscapedStringFragment::Escaped(c) => { + c.encode_utf8(char_encode_buffer).as_bytes() } - } - } + }; + + string_unescape_buffer[string_unescape_buffer_write_position..] + .get_mut(..unescaped_bytes.len()) + .ok_or(Error::EscapedStringIsTooLong)? + .copy_from_slice(unescaped_bytes); - let remaining_length = string_unescape_buffer_slots.len(); - let unescaped_string_length = string_unescape_buffer.len() - remaining_length; + string_unescape_buffer_write_position += unescaped_bytes.len(); + } visitor.visit_str( - str::from_utf8(&string_unescape_buffer[..unescaped_string_length]) - .map_err(|_| Error::InvalidUnicodeCodePoint)?, + str::from_utf8( + &string_unescape_buffer[..string_unescape_buffer_write_position], + ) + .map_err(|_| Error::InvalidUnicodeCodePoint)?, ) } else { - visitor.visit_borrowed_str(s) + visitor.visit_borrowed_str(escaped_string) } } else { - visitor.visit_borrowed_str(s) + visitor.visit_borrowed_str(escaped_string) } } @@ -638,11 +600,34 @@ impl<'a, 'de, 's> de::Deserializer<'de> for &'a mut Deserializer<'de, 's> { } /// Unsupported. We can’t parse newtypes because we don’t know the underlying type. - fn deserialize_newtype_struct(self, _name: &'static str, visitor: V) -> Result + fn deserialize_newtype_struct(self, name: &'static str, visitor: V) -> Result where V: Visitor<'de>, { - visitor.visit_newtype_struct(self) + if name == crate::str::EscapedStr::NAME { + struct EscapedStringDeserializer<'a, 'de, 's>(&'a mut Deserializer<'de, 's>); + + impl<'a, 'de, 's> serde::Deserializer<'de> for EscapedStringDeserializer<'a, 'de, 's> { + type Error = Error; + + fn deserialize_any(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_borrowed_str(self.0.parse_str()?) + } + + serde::forward_to_deserialize_any! { + bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char str string + bytes byte_buf option unit unit_struct newtype_struct seq tuple + tuple_struct map struct enum identifier ignored_any + } + } + + visitor.visit_newtype_struct(EscapedStringDeserializer(self)) + } else { + visitor.visit_newtype_struct(self) + } } fn deserialize_seq(self, visitor: V) -> Result @@ -1058,6 +1043,14 @@ mod tests { ); } + #[test] + fn escaped_str() { + assert_eq!( + crate::from_str(r#""Hello\nWorld""#), + Ok((crate::str::EscapedStr::new(r#"Hello\nWorld"#).unwrap(), 14)) + ); + } + #[test] fn struct_bool() { #[derive(Debug, Deserialize, PartialEq)] diff --git a/src/lib.rs b/src/lib.rs index 14a2ad032..c84ae1b01 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -55,14 +55,15 @@ //! This crate is guaranteed to compile on stable Rust 1.62.0 and up. It *might* compile with older //! versions but that may change in any new patch release. -#![deny(missing_docs)] +// #![deny(missing_docs)] #![deny(rust_2018_compatibility)] #![deny(rust_2018_idioms)] -#![deny(warnings)] +// #![deny(warnings)] #![cfg_attr(not(feature = "std"), no_std)] pub mod de; pub mod ser; +pub mod str; #[doc(inline)] pub use self::de::{from_slice, from_slice_escaped, from_str, from_str_escaped}; diff --git a/src/ser/mod.rs b/src/ser/mod.rs index 977509d7f..3756cb83b 100644 --- a/src/ser/mod.rs +++ b/src/ser/mod.rs @@ -363,11 +363,187 @@ impl<'a, 'b: 'a> ser::Serializer for &'a mut Serializer<'b> { self.serialize_str(variant) } - fn serialize_newtype_struct(self, _name: &'static str, value: &T) -> Result + fn serialize_newtype_struct(self, name: &'static str, value: &T) -> Result where T: ser::Serialize + ?Sized, { - value.serialize(self) + if name == crate::str::EscapedStr::NAME { + struct EscapedStringSerializer<'a, 'b>(&'a mut Serializer<'b>); + + impl<'a, 'b: 'a> serde::Serializer for EscapedStringSerializer<'a, 'b> { + type Ok = (); + type Error = Error; + + type SerializeSeq = serde::ser::Impossible<(), Error>; + type SerializeTuple = serde::ser::Impossible<(), Error>; + type SerializeTupleStruct = serde::ser::Impossible<(), Error>; + type SerializeTupleVariant = serde::ser::Impossible<(), Error>; + type SerializeMap = serde::ser::Impossible<(), Error>; + type SerializeStruct = serde::ser::Impossible<(), Error>; + type SerializeStructVariant = serde::ser::Impossible<(), Error>; + + fn serialize_bool(self, _v: bool) -> Result { + unreachable!() + } + + fn serialize_i8(self, _v: i8) -> Result { + unreachable!() + } + + fn serialize_i16(self, _v: i16) -> Result { + unreachable!() + } + + fn serialize_i32(self, _v: i32) -> Result { + unreachable!() + } + + fn serialize_i64(self, _v: i64) -> Result { + unreachable!() + } + + fn serialize_u8(self, _v: u8) -> Result { + unreachable!() + } + + fn serialize_u16(self, _v: u16) -> Result { + unreachable!() + } + + fn serialize_u32(self, _v: u32) -> Result { + unreachable!() + } + + fn serialize_u64(self, _v: u64) -> Result { + unreachable!() + } + + fn serialize_f32(self, _v: f32) -> Result { + unreachable!() + } + + fn serialize_f64(self, _v: f64) -> Result { + unreachable!() + } + + fn serialize_char(self, _v: char) -> Result { + unreachable!() + } + + fn serialize_str(self, v: &str) -> Result { + v.bytes().try_for_each(|c| self.0.push(c)) + } + + fn serialize_bytes(self, _v: &[u8]) -> Result { + unreachable!() + } + + fn serialize_none(self) -> Result { + unreachable!() + } + + fn serialize_some(self, _value: &T) -> Result { + unreachable!() + } + + fn serialize_unit(self) -> Result { + unreachable!() + } + + fn serialize_unit_struct(self, _name: &'static str) -> Result { + unreachable!() + } + + fn serialize_unit_variant( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + ) -> Result { + unreachable!() + } + + fn serialize_newtype_struct( + self, + _name: &'static str, + _value: &T, + ) -> Result { + unreachable!() + } + + fn serialize_newtype_variant( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + _value: &T, + ) -> Result { + unreachable!() + } + + fn serialize_seq(self, _len: Option) -> Result { + unreachable!() + } + + fn serialize_tuple(self, _len: usize) -> Result { + unreachable!() + } + + fn serialize_tuple_struct( + self, + _name: &'static str, + _len: usize, + ) -> Result { + unreachable!() + } + + fn serialize_tuple_variant( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + _len: usize, + ) -> Result { + unreachable!() + } + + fn serialize_map(self, _len: Option) -> Result { + unreachable!() + } + + fn serialize_struct( + self, + _name: &'static str, + _len: usize, + ) -> Result { + unreachable!() + } + + fn serialize_struct_variant( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + _len: usize, + ) -> Result { + unreachable!() + } + + fn collect_str(self, _value: &T) -> Result { + unreachable!() + } + } + + self.push(b'"')?; + + value.serialize(EscapedStringSerializer(self))?; + + self.push(b'"')?; + + Ok(()) + } else { + value.serialize(self) + } } fn serialize_newtype_variant( @@ -647,11 +823,20 @@ mod tests { r#"" \u001D ""# ); assert_eq!( - &*crate::to_string::<_, N>(" \u{001f} ").unwrap(), + crate::to_string::<_, N>(" \u{001f} ").unwrap(), r#"" \u001F ""# ); } + #[test] + fn escaped_str() { + assert_eq!( + crate::to_string::<_, N>(&crate::str::EscapedStr::new(r#"Hello\\nWorld"#).unwrap()) + .unwrap(), + r#""Hello\\nWorld""# + ); + } + #[test] fn struct_bool() { #[derive(Serialize)] diff --git a/src/str.rs b/src/str.rs new file mode 100644 index 000000000..05870f769 --- /dev/null +++ b/src/str.rs @@ -0,0 +1,139 @@ +#[derive(Debug)] +pub enum EscapedStringFragment<'a> { + NotEscaped(&'a str), + Escaped(char), +} + +#[derive(Debug)] +pub enum StringUnescapeError { + InvalidEscapeSequence, +} + +fn unescape_next_fragment( + escaped_string: &str, +) -> Result<(EscapedStringFragment<'_>, &str), StringUnescapeError> { + Ok(if let Some(rest) = escaped_string.strip_prefix('\\') { + let mut escaped_string_chars = rest.chars(); + + let unescaped_char = match escaped_string_chars.next() { + Some('"') => '"', + Some('\\') => '\\', + Some('/') => '/', + Some('b') => '\x08', + Some('f') => '\x0C', + Some('n') => '\n', + Some('r') => '\r', + Some('t') => '\t', + Some('u') => { + fn split_first_slice(s: &str, len: usize) -> Option<(&str, &str)> { + Some((s.get(..len)?, s.get(len..)?)) + } + + let (escape_sequence, remaining_escaped_string_chars) = + split_first_slice(escaped_string_chars.as_str(), 4) + .ok_or(StringUnescapeError::InvalidEscapeSequence)?; + + escaped_string_chars = remaining_escaped_string_chars.chars(); + + u32::from_str_radix(escape_sequence, 16) + .ok() + .and_then(char::from_u32) + .ok_or(StringUnescapeError::InvalidEscapeSequence)? + } + _ => return Err(StringUnescapeError::InvalidEscapeSequence), + }; + + ( + EscapedStringFragment::Escaped(unescaped_char), + escaped_string_chars.as_str(), + ) + } else { + let (fragment, rest) = + escaped_string.split_at(escaped_string.find('\\').unwrap_or(escaped_string.len())); + + (EscapedStringFragment::NotEscaped(fragment), rest) + }) +} + +pub(crate) fn unescape_fragments( + mut escaped_string: &str, +) -> impl Iterator, StringUnescapeError>> { + core::iter::from_fn(move || { + if escaped_string.is_empty() { + None + } else { + Some( + unescape_next_fragment(escaped_string).map(|(fragment, rest)| { + escaped_string = rest; + fragment + }), + ) + } + }) +} + +/// A borrowed escaped string +#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)] +#[serde(rename = "__serde_json_core_escaped_string__")] +pub struct EscapedStr<'a>(&'a str); + +impl<'a> EscapedStr<'a> { + pub(crate) const NAME: &'static str = "__serde_json_core_escaped_string__"; + + /// Create a new EscapedString, verifying that it's correctly escaped + pub fn new(escaped_string: &'a str) -> Result { + // Check that all fragments are valid + for fragment in unescape_fragments(escaped_string) { + fragment?; + } + + // SAFETY: we've just checked that all fragments are valid + unsafe { Ok(Self::new_unchecked(escaped_string)) } + } + + /// Create a new EscapedString without verifying that it's correctly escaped + /// + /// # Safety + /// + /// escaped_string must be a correctly escaped JSON string without the surrounding quotes. + pub unsafe fn new_unchecked(escaped_string: &'a str) -> Self { + Self(escaped_string) + } + + pub fn fragments(&self) -> EscapedStringFragmentIter<'a> { + EscapedStringFragmentIter(self.0) + } +} + +pub struct EscapedStringFragmentIter<'a>(&'a str); + +impl<'a> EscapedStringFragmentIter<'a> { + pub fn as_str(&self) -> EscapedStr<'a> { + EscapedStr(self.0) + } +} + +impl<'a> Iterator for EscapedStringFragmentIter<'a> { + type Item = EscapedStringFragment<'a>; + + fn next(&mut self) -> Option { + if self.0.is_empty() { + return None; + } + + let fragment_result = unescape_next_fragment(self.0); + + debug_assert!( + fragment_result.is_ok(), + "{:?} must be valid", + fragment_result + ); + + // In release, if there's been a logic error, return early as it's better than panicing + let (fragment, rest) = fragment_result.ok()?; + + self.0 = rest; + + Some(fragment) + } +} From bb38c68984029dd29182b89c1532241318e6468f Mon Sep 17 00:00:00 2001 From: Samuel Hicks Date: Tue, 30 Jul 2024 14:20:55 +0100 Subject: [PATCH 08/13] `core::str::FromStr::from_str(s)` => `s.parse()` --- src/de/mod.rs | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/src/de/mod.rs b/src/de/mod.rs index bf4c8628d..ca9153fc7 100644 --- a/src/de/mod.rs +++ b/src/de/mod.rs @@ -968,7 +968,7 @@ mod tests { assert_eq!(crate::from_str(r#" "👏" "#), Ok(("👏", 8))); fn s(s: &'static str) -> heapless::String<1024> { - core::str::FromStr::from_str(s).expect("Failed to create test string") + s.parse().expect("Failed to create test string") } fn from_str_test<'de, T: serde::Deserialize<'de>>( @@ -1014,7 +1014,7 @@ mod tests { #[test] fn tuple_of_str() { fn s(s: &'static str) -> heapless::String<1024> { - core::str::FromStr::from_str(s).expect("Failed to create test string") + s.parse().expect("Failed to create test string") } fn from_str_test<'de, T: serde::Deserialize<'de>>( @@ -1271,10 +1271,9 @@ mod tests { assert_eq!( crate::from_str::(r#"[10]"#), Err(crate::de::Error::CustomErrorWithMessage( - core::str::FromStr::from_str( - "invalid length 1, expected tuple struct Xy with 2 elements" - ) - .unwrap() + "invalid length 1, expected tuple struct Xy with 2 elements" + .parse() + .unwrap() )) ); assert_eq!( @@ -1381,9 +1380,7 @@ mod tests { use serde::de::Error; assert_eq!( crate::de::Error::custom("something bad happened"), - crate::de::Error::CustomErrorWithMessage( - core::str::FromStr::from_str("something bad happened").unwrap() - ) + crate::de::Error::CustomErrorWithMessage("something bad happened".parse().unwrap()) ); } @@ -1393,8 +1390,8 @@ mod tests { use serde::de::Error; assert_eq!( crate::de::Error::custom("0123456789012345678901234567890123456789012345678901234567890123 <- after here the message should be truncated"), - crate::de::Error::CustomErrorWithMessage(core::str::FromStr::from_str( - "0123456789012345678901234567890123456789012345678901234567890123").unwrap() + crate::de::Error::CustomErrorWithMessage( + "0123456789012345678901234567890123456789012345678901234567890123".parse().unwrap() ) ); } From 0350c3fd4c9a88181598161d291a70e02e5e51f5 Mon Sep 17 00:00:00 2001 From: Samuel Hicks Date: Tue, 30 Jul 2024 14:30:22 +0100 Subject: [PATCH 09/13] Simplified EscapedStr, propegating unescaping errors upwards --- src/de/mod.rs | 4 ++-- src/ser/mod.rs | 3 +-- src/str.rs | 58 ++++++-------------------------------------------- 3 files changed, 9 insertions(+), 56 deletions(-) diff --git a/src/de/mod.rs b/src/de/mod.rs index ca9153fc7..91aa62c0e 100644 --- a/src/de/mod.rs +++ b/src/de/mod.rs @@ -499,7 +499,7 @@ impl<'a, 'de, 's> de::Deserializer<'de> for &'a mut Deserializer<'de, 's> { if escaped_string.as_bytes().contains(&b'\\') { let mut string_unescape_buffer_write_position = 0; - for fragment in crate::str::unescape_fragments(escaped_string) { + for fragment in crate::str::EscapedStr(escaped_string).fragments() { let char_encode_buffer = &mut [0; 4]; let unescaped_bytes = match fragment? { @@ -1047,7 +1047,7 @@ mod tests { fn escaped_str() { assert_eq!( crate::from_str(r#""Hello\nWorld""#), - Ok((crate::str::EscapedStr::new(r#"Hello\nWorld"#).unwrap(), 14)) + Ok((crate::str::EscapedStr(r#"Hello\nWorld"#), 14)) ); } diff --git a/src/ser/mod.rs b/src/ser/mod.rs index 3756cb83b..1c9f0f884 100644 --- a/src/ser/mod.rs +++ b/src/ser/mod.rs @@ -831,8 +831,7 @@ mod tests { #[test] fn escaped_str() { assert_eq!( - crate::to_string::<_, N>(&crate::str::EscapedStr::new(r#"Hello\\nWorld"#).unwrap()) - .unwrap(), + crate::to_string::<_, N>(&crate::str::EscapedStr(r#"Hello\\nWorld"#)).unwrap(), r#""Hello\\nWorld""# ); } diff --git a/src/str.rs b/src/str.rs index 05870f769..dcfd5318d 100644 --- a/src/str.rs +++ b/src/str.rs @@ -55,51 +55,14 @@ fn unescape_next_fragment( }) } -pub(crate) fn unescape_fragments( - mut escaped_string: &str, -) -> impl Iterator, StringUnescapeError>> { - core::iter::from_fn(move || { - if escaped_string.is_empty() { - None - } else { - Some( - unescape_next_fragment(escaped_string).map(|(fragment, rest)| { - escaped_string = rest; - fragment - }), - ) - } - }) -} - /// A borrowed escaped string #[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)] #[serde(rename = "__serde_json_core_escaped_string__")] -pub struct EscapedStr<'a>(&'a str); +pub struct EscapedStr<'a>(pub &'a str); impl<'a> EscapedStr<'a> { pub(crate) const NAME: &'static str = "__serde_json_core_escaped_string__"; - /// Create a new EscapedString, verifying that it's correctly escaped - pub fn new(escaped_string: &'a str) -> Result { - // Check that all fragments are valid - for fragment in unescape_fragments(escaped_string) { - fragment?; - } - - // SAFETY: we've just checked that all fragments are valid - unsafe { Ok(Self::new_unchecked(escaped_string)) } - } - - /// Create a new EscapedString without verifying that it's correctly escaped - /// - /// # Safety - /// - /// escaped_string must be a correctly escaped JSON string without the surrounding quotes. - pub unsafe fn new_unchecked(escaped_string: &'a str) -> Self { - Self(escaped_string) - } - pub fn fragments(&self) -> EscapedStringFragmentIter<'a> { EscapedStringFragmentIter(self.0) } @@ -114,26 +77,17 @@ impl<'a> EscapedStringFragmentIter<'a> { } impl<'a> Iterator for EscapedStringFragmentIter<'a> { - type Item = EscapedStringFragment<'a>; + type Item = Result, StringUnescapeError>; fn next(&mut self) -> Option { if self.0.is_empty() { return None; } - let fragment_result = unescape_next_fragment(self.0); - - debug_assert!( - fragment_result.is_ok(), - "{:?} must be valid", - fragment_result - ); - - // In release, if there's been a logic error, return early as it's better than panicing - let (fragment, rest) = fragment_result.ok()?; - - self.0 = rest; + Some(unescape_next_fragment(self.0).map(|(fragment, rest)| { + self.0 = rest; - Some(fragment) + fragment + })) } } From c505081d678005ed8a42508e3214894cffb0738d Mon Sep 17 00:00:00 2001 From: Samuel Hicks Date: Tue, 30 Jul 2024 14:31:45 +0100 Subject: [PATCH 10/13] Removed outdated comment --- src/de/mod.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/de/mod.rs b/src/de/mod.rs index 91aa62c0e..4834ab2ff 100644 --- a/src/de/mod.rs +++ b/src/de/mod.rs @@ -599,7 +599,6 @@ impl<'a, 'de, 's> de::Deserializer<'de> for &'a mut Deserializer<'de, 's> { self.deserialize_unit(visitor) } - /// Unsupported. We can’t parse newtypes because we don’t know the underlying type. fn deserialize_newtype_struct(self, name: &'static str, visitor: V) -> Result where V: Visitor<'de>, From d254fc80062c7cbe0aea4f95993c0d389d87df1b Mon Sep 17 00:00:00 2001 From: Samuel Hicks Date: Tue, 30 Jul 2024 14:36:17 +0100 Subject: [PATCH 11/13] Implemented early return instead of indented if statement for `deserialize_str` --- src/de/mod.rs | 56 +++++++++++++++++++++++++-------------------------- 1 file changed, 27 insertions(+), 29 deletions(-) diff --git a/src/de/mod.rs b/src/de/mod.rs index 4834ab2ff..ad2145d55 100644 --- a/src/de/mod.rs +++ b/src/de/mod.rs @@ -495,42 +495,40 @@ impl<'a, 'de, 's> de::Deserializer<'de> for &'a mut Deserializer<'de, 's> { { let escaped_string = self.parse_str()?; - if let Some(string_unescape_buffer) = self.string_unescape_buffer.as_deref_mut() { - if escaped_string.as_bytes().contains(&b'\\') { - let mut string_unescape_buffer_write_position = 0; + // If the unescape buffer is not provided, skip unescaping strings + let Some(string_unescape_buffer) = self.string_unescape_buffer.as_deref_mut() else { + return visitor.visit_borrowed_str(escaped_string); + }; - for fragment in crate::str::EscapedStr(escaped_string).fragments() { - let char_encode_buffer = &mut [0; 4]; + // If the escaped string doesn't contain '\\', it' can't have any escaped characters + if !escaped_string.as_bytes().contains(&b'\\') { + return visitor.visit_borrowed_str(escaped_string); + } - let unescaped_bytes = match fragment? { - crate::str::EscapedStringFragment::NotEscaped(fragment) => { - fragment.as_bytes() - } - crate::str::EscapedStringFragment::Escaped(c) => { - c.encode_utf8(char_encode_buffer).as_bytes() - } - }; + let mut string_unescape_buffer_write_position = 0; - string_unescape_buffer[string_unescape_buffer_write_position..] - .get_mut(..unescaped_bytes.len()) - .ok_or(Error::EscapedStringIsTooLong)? - .copy_from_slice(unescaped_bytes); + for fragment in crate::str::EscapedStr(escaped_string).fragments() { + let char_encode_buffer = &mut [0; 4]; - string_unescape_buffer_write_position += unescaped_bytes.len(); + let unescaped_bytes = match fragment? { + crate::str::EscapedStringFragment::NotEscaped(fragment) => fragment.as_bytes(), + crate::str::EscapedStringFragment::Escaped(c) => { + c.encode_utf8(char_encode_buffer).as_bytes() } + }; - visitor.visit_str( - str::from_utf8( - &string_unescape_buffer[..string_unescape_buffer_write_position], - ) - .map_err(|_| Error::InvalidUnicodeCodePoint)?, - ) - } else { - visitor.visit_borrowed_str(escaped_string) - } - } else { - visitor.visit_borrowed_str(escaped_string) + string_unescape_buffer[string_unescape_buffer_write_position..] + .get_mut(..unescaped_bytes.len()) + .ok_or(Error::EscapedStringIsTooLong)? + .copy_from_slice(unescaped_bytes); + + string_unescape_buffer_write_position += unescaped_bytes.len(); } + + visitor.visit_str( + str::from_utf8(&string_unescape_buffer[..string_unescape_buffer_write_position]) + .map_err(|_| Error::InvalidUnicodeCodePoint)?, + ) } /// Unsupported. String is not available in no-std. From fb0b8da1352b1d1ae475ed1a0b33db5f2c100440 Mon Sep 17 00:00:00 2001 From: Samuel Hicks Date: Tue, 30 Jul 2024 14:44:34 +0100 Subject: [PATCH 12/13] Bumped MSRV to 1.65.0 --- .github/workflows/ci.yml | 2 +- Cargo.toml | 2 +- README.md | 2 +- src/lib.rs | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5fb9aaa35..b127938c0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -22,7 +22,7 @@ jobs: include: # Test MSRV - - rust: 1.62.0 # keep in sync with manifest rust-version + - rust: 1.65.0 # keep in sync with manifest rust-version TARGET: x86_64-unknown-linux-gnu # Test nightly but don't fail diff --git a/Cargo.toml b/Cargo.toml index 5d924c1eb..30f6740a1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,7 +4,7 @@ categories = ["no-std"] description = "serde-json for no_std programs" documentation = "https://docs.rs/serde-json-core" edition = "2018" -rust-version = "1.62.0" # keep in sync with ci, src/lib.rs, and README +rust-version = "1.65.0" # keep in sync with ci, src/lib.rs, and README keywords = ["serde", "json"] license = "MIT OR Apache-2.0" name = "serde-json-core" diff --git a/README.md b/README.md index e63ad91c1..f4c3c2b31 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ This project is developed and maintained by the [rust-embedded-community]. ## Minimum Supported Rust Version (MSRV) -This crate is guaranteed to compile on stable Rust 1.62.0 and up. It *might* +This crate is guaranteed to compile on stable Rust 1.65.0 and up. It *might* compile with older versions but that may change in any new patch release. ## License diff --git a/src/lib.rs b/src/lib.rs index c84ae1b01..c21490053 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -52,7 +52,7 @@ //! //! # Minimum Supported Rust Version (MSRV) //! -//! This crate is guaranteed to compile on stable Rust 1.62.0 and up. It *might* compile with older +//! This crate is guaranteed to compile on stable Rust 1.65.0 and up. It *might* compile with older //! versions but that may change in any new patch release. // #![deny(missing_docs)] From 9f6af4752d8d25952a9b4f2d2c1878153997bff0 Mon Sep 17 00:00:00 2001 From: Samuel Hicks Date: Tue, 6 Aug 2024 12:57:56 +0100 Subject: [PATCH 13/13] Documented escaped string deserialization. --- CHANGELOG.md | 4 ++++ src/de/mod.rs | 8 ++++++++ src/lib.rs | 4 ++-- src/ser/mod.rs | 3 +++ src/str.rs | 44 +++++++++++++++++++++++++++++++++++++++++++- 5 files changed, 60 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f0fa068f3..19f6e6b63 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,12 +7,16 @@ and this project adheres to [Semantic Versioning](http://semver.org/). ## [Unreleased] +### Breaking +- MSRV is now `1.65.0`. + ### Added - Support for optional package `defmt` which allows for easy conversion for error types when using tools like `probe-rs` for logging over debuggers. - Implement `Serializer::collect_str` - Derive `Serialize` for `de::Error` and `ser::Error` +- Support for deserializing escaped strings. ### Changed diff --git a/src/de/mod.rs b/src/de/mod.rs index ad2145d55..88e8fc2fe 100644 --- a/src/de/mod.rs +++ b/src/de/mod.rs @@ -201,6 +201,7 @@ impl<'a, 's> Deserializer<'a, 's> { } } + /// Parse a string, returning the escaped string. fn parse_str(&mut self) -> Result<&'a str> { if self.parse_whitespace().ok_or(Error::EofWhileParsingValue)? == b'"' { self.eat_char(); @@ -601,7 +602,10 @@ impl<'a, 'de, 's> de::Deserializer<'de> for &'a mut Deserializer<'de, 's> { where V: Visitor<'de>, { + // If the newtype struct is an `EscapedStr`... if name == crate::str::EscapedStr::NAME { + // ...deserialize as an escaped string instead. + struct EscapedStringDeserializer<'a, 'de, 's>(&'a mut Deserializer<'de, 's>); impl<'a, 'de, 's> serde::Deserializer<'de> for EscapedStringDeserializer<'a, 'de, 's> { @@ -611,9 +615,13 @@ impl<'a, 'de, 's> de::Deserializer<'de> for &'a mut Deserializer<'de, 's> { where V: Visitor<'de>, { + // The only structure which is deserialized at this point is an `EscapedStr`, + // so pass the escaped string to its implementation of visit_borrowed_str. + // This line defacto becomes `Ok(EscapedStr(self.0.parse_str()?))`. visitor.visit_borrowed_str(self.0.parse_str()?) } + // `EscapedStr` only deserializes strings, so we might as well forward all methods to `deserialize_any`. serde::forward_to_deserialize_any! { bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char str string bytes byte_buf option unit unit_struct newtype_struct seq tuple diff --git a/src/lib.rs b/src/lib.rs index c21490053..c40dbb445 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -55,10 +55,10 @@ //! This crate is guaranteed to compile on stable Rust 1.65.0 and up. It *might* compile with older //! versions but that may change in any new patch release. -// #![deny(missing_docs)] +#![deny(missing_docs)] #![deny(rust_2018_compatibility)] #![deny(rust_2018_idioms)] -// #![deny(warnings)] +#![deny(warnings)] #![cfg_attr(not(feature = "std"), no_std)] pub mod de; diff --git a/src/ser/mod.rs b/src/ser/mod.rs index 1c9f0f884..74db6e099 100644 --- a/src/ser/mod.rs +++ b/src/ser/mod.rs @@ -367,7 +367,10 @@ impl<'a, 'b: 'a> ser::Serializer for &'a mut Serializer<'b> { where T: ser::Serialize + ?Sized, { + // If the newtype struct is an `EscapedStr`... if name == crate::str::EscapedStr::NAME { + // serialize it as an already escaped string. + struct EscapedStringSerializer<'a, 'b>(&'a mut Serializer<'b>); impl<'a, 'b: 'a> serde::Serializer for EscapedStringSerializer<'a, 'b> { diff --git a/src/str.rs b/src/str.rs index dcfd5318d..ab1224850 100644 --- a/src/str.rs +++ b/src/str.rs @@ -1,14 +1,37 @@ +//! Utilities for serializing and deserializing strings. + +use core::fmt; + #[derive(Debug)] +/// A fragment of an escaped string pub enum EscapedStringFragment<'a> { + /// A series of characters which weren't escaped in the input. NotEscaped(&'a str), + /// A character which was escaped in the input. Escaped(char), } #[derive(Debug)] +/// Errors occuring while unescaping strings. pub enum StringUnescapeError { + /// Failed to unescape a character due to an invalid escape sequence. InvalidEscapeSequence, } +impl fmt::Display for StringUnescapeError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + StringUnescapeError::InvalidEscapeSequence => write!( + f, + "Failed to unescape a character due to an invalid escape sequence." + ), + } + } +} + +#[cfg(feature = "std")] +impl std::error::Error for StringUnescapeError {} + fn unescape_next_fragment( escaped_string: &str, ) -> Result<(EscapedStringFragment<'_>, &str), StringUnescapeError> { @@ -55,7 +78,23 @@ fn unescape_next_fragment( }) } -/// A borrowed escaped string +/// A borrowed escaped string. `EscapedStr` can be used to borrow an escaped string from the input, +/// even when deserialized using `from_str_escaped` or `from_slice_escaped`. +/// +/// ``` +/// #[derive(serde::Deserialize)] +/// struct Event<'a> { +/// name: heapless::String<16>, +/// #[serde(borrow)] +/// description: serde_json_core::str::EscapedStr<'a>, +/// } +/// +/// serde_json_core::de::from_str_escaped::>( +/// r#"{ "name": "Party\u0021", "description": "I'm throwing a party! Hopefully the \u2600 shines!" }"#, +/// &mut [0; 8], +/// ) +/// .unwrap(); +/// ``` #[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)] #[serde(rename = "__serde_json_core_escaped_string__")] pub struct EscapedStr<'a>(pub &'a str); @@ -63,14 +102,17 @@ pub struct EscapedStr<'a>(pub &'a str); impl<'a> EscapedStr<'a> { pub(crate) const NAME: &'static str = "__serde_json_core_escaped_string__"; + /// Returns an iterator over the `EscapedStringFragment`s of an escaped string. pub fn fragments(&self) -> EscapedStringFragmentIter<'a> { EscapedStringFragmentIter(self.0) } } +/// An iterator over the `EscapedStringFragment`s of an escaped string. pub struct EscapedStringFragmentIter<'a>(&'a str); impl<'a> EscapedStringFragmentIter<'a> { + /// Views the underlying data as a subslice of the original data. pub fn as_str(&self) -> EscapedStr<'a> { EscapedStr(self.0) }