Merge pull request #428 from Mingun/encoding

Change all event and Attributes constructors to accept strings
tafia · Jul 24, 2022 · e738b68 · e738b68
2 parents e7f30d2 + 5467b6c
commit e738b68
Show file tree

Hide file tree

Showing 20 changed files with 405 additions and 437 deletions.
diff --git a/Changelog.md b/Changelog.md
@@ -129,13 +129,12 @@
 
 - [#415]: Changed custom entity unescaping API to accept closures rather than a mapping of entity to
   replacement text. This avoids needing to allocate a map and provides the user with more flexibility.
-- [#415]: Renamed many functions following the pattern `unescape_and_decode*` to `decode_and_unescape*`
-  to better communicate their function. Renamed functions following the pattern `*_with_custom_entities`
-  to `decode_and_unescape_with` to be more consistent across the API.
-- [#415]: `BytesText::escaped()` renamed to `BytesText::escape()`, `BytesText::unescaped()` renamed to
-  `BytesText::unescape()`, `BytesText::unescaped_with()` renamed to `BytesText::unescape_with()`,
-  `Attribute::escaped_value()` renamed to `Attribute::escape_value()`, and `Attribute::escaped_value_with()`
-  renamed to `Attribute::escape_value_with()` for consistency across the API.
+- [#415]: Renamed functions for consistency across the API:
+  |Old Name                |New Name
+  |------------------------|-------------------------------------------
+  |`*_with_custom_entities`|`*_with`
+  |`BytesText::unescaped()`|`BytesText::unescape()`
+  |`Attribute::unescaped_*`|`Attribute::unescape_*`
 
 - [#416]: `BytesStart::to_borrowed` renamed to `BytesStart::borrow`, the same method
   added to all events
@@ -150,6 +149,16 @@
 - [#423]: Removed `BytesText::from_plain` because it internally did escaping of a byte array,
   but since now escaping works on strings. Use `BytesText::from_plain_str` instead
 
+- [#428]: Removed `BytesText::escaped()`. Use `.as_ref()` provided by `Deref` impl instead.
+- [#428]: Removed `BytesText::from_escaped()`. Use constructors from strings instead,
+  because writer anyway works in UTF-8 only
+- [#428]: Removed `BytesCData::new()`. Use constructors from strings instead,
+  because writer anyway works in UTF-8 only
+- [#428]: Changed the event and `Attributes` constructors to accept a `&str` slices instead of `&[u8]` slices.
+  Handmade events has always been assumed to store their content UTF-8 encoded.
+- [#428]: Removed `Decoder` parameter from `_and_decode` versions of functions for
+  `BytesText` (remember, that those functions was renamed in #415).
+
 ### New Tests
 
 - [#9]: Added tests for incorrect nested tags in input
@@ -180,6 +189,7 @@
 [#418]: https://github.com/tafia/quick-xml/pull/418
 [#421]: https://github.com/tafia/quick-xml/pull/421
 [#423]: https://github.com/tafia/quick-xml/pull/423
+[#428]: https://github.com/tafia/quick-xml/pull/428
 [#434]: https://github.com/tafia/quick-xml/pull/434
 [#437]: https://github.com/tafia/quick-xml/pull/437
 

diff --git a/README.md b/README.md
@@ -53,7 +53,7 @@ loop {
                 _ => (),
             }
         }
-        Ok(Event::Text(e)) => txt.push(e.decode_and_unescape(&reader).unwrap().into_owned()),
+        Ok(Event::Text(e)) => txt.push(e.unescape().unwrap().into_owned()),
 
         // There are several other `Event`s we do not consider here
         _ => (),
@@ -80,7 +80,7 @@ loop {
 
             // crates a new element ... alternatively we could reuse `e` by calling
             // `e.into_owned()`
-            let mut elem = BytesStart::owned_name(b"my_elem".to_vec());
+            let mut elem = BytesStart::owned_name("my_elem");
 
             // collect existing attributes
             elem.extend_attributes(e.attributes().map(|attr| attr.unwrap()));
@@ -92,7 +92,7 @@ loop {
             assert!(writer.write_event(Event::Start(elem)).is_ok());
         },
         Ok(Event::End(e)) if e.name().as_ref() == b"this_tag" => {
-            assert!(writer.write_event(Event::End(BytesEnd::borrowed(b"my_elem"))).is_ok());
+            assert!(writer.write_event(Event::End(BytesEnd::borrowed("my_elem"))).is_ok());
         },
         Ok(Event::Eof) => break,
         // we can either move or borrow the event to write, depending on your use-case

diff --git a/benches/macrobenches.rs b/benches/macrobenches.rs
@@ -28,7 +28,7 @@ fn parse_document(doc: &[u8]) -> XmlResult<()> {
                 }
             }
             Event::Text(e) => {
-                criterion::black_box(e.decode_and_unescape(&r)?);
+                criterion::black_box(e.unescape()?);
             }
             Event::CData(e) => {
                 criterion::black_box(e.into_inner());

diff --git a/benches/microbenches.rs b/benches/microbenches.rs
@@ -174,7 +174,7 @@ fn one_event(c: &mut Criterion) {
                 .check_comments(false)
                 .trim_text(true);
             match r.read_event_into(&mut buf) {
-                Ok(Event::Comment(e)) => nbtxt += e.decode_and_unescape(&r).unwrap().len(),
+                Ok(Event::Comment(e)) => nbtxt += e.unescape().unwrap().len(),
                 something_else => panic!("Did not expect {:?}", something_else),
             };
 

diff --git a/examples/custom_entities.rs b/examples/custom_entities.rs
@@ -60,9 +60,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
             Ok(Event::Text(ref e)) => {
                 println!(
                     "text value: {}",
-                    e.decode_and_unescape_with(&reader, |ent| custom_entities
-                        .get(ent)
-                        .map(|s| s.as_str()))
+                    e.unescape_with(|ent| custom_entities.get(ent).map(|s| s.as_str()))
                         .unwrap()
                 );
             }

diff --git a/src/de/mod.rs b/src/de/mod.rs
@@ -612,16 +612,15 @@ where
         unescape: bool,
         allow_start: bool,
     ) -> Result<Cow<'de, str>, DeError> {
-        let decoder = self.reader.decoder();
         match self.next()? {
-            DeEvent::Text(e) => Ok(e.decode(decoder, unescape)?),
-            DeEvent::CData(e) => Ok(e.decode(decoder)?),
+            DeEvent::Text(e) => Ok(e.decode(unescape)?),
+            DeEvent::CData(e) => Ok(e.decode()?),
             DeEvent::Start(e) if allow_start => {
                 // allow one nested level
                 let inner = self.next()?;
                 let t = match inner {
-                    DeEvent::Text(t) => t.decode(decoder, unescape)?,
-                    DeEvent::CData(t) => t.decode(decoder)?,
+                    DeEvent::Text(t) => t.decode(unescape)?,
+                    DeEvent::CData(t) => t.decode()?,
                     DeEvent::Start(s) => {
                         return Err(DeError::UnexpectedStart(s.name().as_ref().to_owned()))
                     }
@@ -1042,13 +1041,10 @@ mod tests {
             assert_eq!(de.read, vec![]);
             assert_eq!(de.write, vec![]);
 
-            assert_eq!(
-                de.next().unwrap(),
-                Start(BytesStart::borrowed_name(b"root"))
-            );
+            assert_eq!(de.next().unwrap(), Start(BytesStart::borrowed_name("root")));
             assert_eq!(
                 de.peek().unwrap(),
-                &Start(BytesStart::borrowed_name(b"inner"))
+                &Start(BytesStart::borrowed_name("inner"))
             );
 
             // Should skip first <inner> tree
@@ -1057,11 +1053,11 @@ mod tests {
             assert_eq!(
                 de.write,
                 vec![
-                    Start(BytesStart::borrowed_name(b"inner")),
+                    Start(BytesStart::borrowed_name("inner")),
                     Text(BytesText::from_escaped_str("text")),
-                    Start(BytesStart::borrowed_name(b"inner")),
-                    End(BytesEnd::borrowed(b"inner")),
-                    End(BytesEnd::borrowed(b"inner")),
+                    Start(BytesStart::borrowed_name("inner")),
+                    End(BytesEnd::borrowed("inner")),
+                    End(BytesEnd::borrowed("inner")),
                 ]
             );
 
@@ -1073,11 +1069,8 @@ mod tests {
             //   </inner>
             //   <target/>
             // </root>
-            assert_eq!(
-                de.next().unwrap(),
-                Start(BytesStart::borrowed_name(b"next"))
-            );
-            assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed(b"next")));
+            assert_eq!(de.next().unwrap(), Start(BytesStart::borrowed_name("next")));
+            assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed("next")));
 
             // We finish writing. Next call to `next()` should start replay that messages:
             //
@@ -1094,27 +1087,27 @@ mod tests {
             assert_eq!(
                 de.read,
                 vec![
-                    Start(BytesStart::borrowed_name(b"inner")),
+                    Start(BytesStart::borrowed_name("inner")),
                     Text(BytesText::from_escaped_str("text")),
-                    Start(BytesStart::borrowed_name(b"inner")),
-                    End(BytesEnd::borrowed(b"inner")),
-                    End(BytesEnd::borrowed(b"inner")),
+                    Start(BytesStart::borrowed_name("inner")),
+                    End(BytesEnd::borrowed("inner")),
+                    End(BytesEnd::borrowed("inner")),
                 ]
             );
             assert_eq!(de.write, vec![]);
             assert_eq!(
                 de.next().unwrap(),
-                Start(BytesStart::borrowed_name(b"inner"))
+                Start(BytesStart::borrowed_name("inner"))
             );
 
             // Skip `#text` node and consume <inner/> after it
             de.skip().unwrap();
             assert_eq!(
                 de.read,
                 vec![
-                    Start(BytesStart::borrowed_name(b"inner")),
-                    End(BytesEnd::borrowed(b"inner")),
-                    End(BytesEnd::borrowed(b"inner")),
+                    Start(BytesStart::borrowed_name("inner")),
+                    End(BytesEnd::borrowed("inner")),
+                    End(BytesEnd::borrowed("inner")),
                 ]
             );
             assert_eq!(
@@ -1128,9 +1121,9 @@ mod tests {
 
             assert_eq!(
                 de.next().unwrap(),
-                Start(BytesStart::borrowed_name(b"inner"))
+                Start(BytesStart::borrowed_name("inner"))
             );
-            assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed(b"inner")));
+            assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed("inner")));
 
             // We finish writing. Next call to `next()` should start replay messages:
             //
@@ -1146,21 +1139,21 @@ mod tests {
                 de.read,
                 vec![
                     Text(BytesText::from_escaped_str("text")),
-                    End(BytesEnd::borrowed(b"inner")),
+                    End(BytesEnd::borrowed("inner")),
                 ]
             );
             assert_eq!(de.write, vec![]);
             assert_eq!(
                 de.next().unwrap(),
                 Text(BytesText::from_escaped_str("text"))
             );
-            assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed(b"inner")));
+            assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed("inner")));
             assert_eq!(
                 de.next().unwrap(),
-                Start(BytesStart::borrowed_name(b"target"))
+                Start(BytesStart::borrowed_name("target"))
             );
-            assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed(b"target")));
-            assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed(b"root")));
+            assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed("target")));
+            assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed("root")));
         }
 
         /// Checks that `read_to_end()` behaves correctly after `skip()`
@@ -1184,22 +1177,19 @@ mod tests {
             assert_eq!(de.read, vec![]);
             assert_eq!(de.write, vec![]);
 
-            assert_eq!(
-                de.next().unwrap(),
-                Start(BytesStart::borrowed_name(b"root"))
-            );
+            assert_eq!(de.next().unwrap(), Start(BytesStart::borrowed_name("root")));
 
             // Skip the <skip> tree
             de.skip().unwrap();
             assert_eq!(de.read, vec![]);
             assert_eq!(
                 de.write,
                 vec![
-                    Start(BytesStart::borrowed_name(b"skip")),
+                    Start(BytesStart::borrowed_name("skip")),
                     Text(BytesText::from_escaped_str("text")),
-                    Start(BytesStart::borrowed_name(b"skip")),
-                    End(BytesEnd::borrowed(b"skip")),
-                    End(BytesEnd::borrowed(b"skip")),
+                    Start(BytesStart::borrowed_name("skip")),
+                    End(BytesEnd::borrowed("skip")),
+                    End(BytesEnd::borrowed("skip")),
                 ]
             );
 
@@ -1212,18 +1202,18 @@ mod tests {
             // </root>
             assert_eq!(
                 de.next().unwrap(),
-                Start(BytesStart::borrowed_name(b"target"))
+                Start(BytesStart::borrowed_name("target"))
             );
             de.read_to_end(QName(b"target")).unwrap();
             assert_eq!(de.read, vec![]);
             assert_eq!(
                 de.write,
                 vec![
-                    Start(BytesStart::borrowed_name(b"skip")),
+                    Start(BytesStart::borrowed_name("skip")),
                     Text(BytesText::from_escaped_str("text")),
-                    Start(BytesStart::borrowed_name(b"skip")),
-                    End(BytesEnd::borrowed(b"skip")),
-                    End(BytesEnd::borrowed(b"skip")),
+                    Start(BytesStart::borrowed_name("skip")),
+                    End(BytesEnd::borrowed("skip")),
+                    End(BytesEnd::borrowed("skip")),
                 ]
             );
 
@@ -1241,22 +1231,19 @@ mod tests {
             assert_eq!(
                 de.read,
                 vec![
-                    Start(BytesStart::borrowed_name(b"skip")),
+                    Start(BytesStart::borrowed_name("skip")),
                     Text(BytesText::from_escaped_str("text")),
-                    Start(BytesStart::borrowed_name(b"skip")),
-                    End(BytesEnd::borrowed(b"skip")),
-                    End(BytesEnd::borrowed(b"skip")),
+                    Start(BytesStart::borrowed_name("skip")),
+                    End(BytesEnd::borrowed("skip")),
+                    End(BytesEnd::borrowed("skip")),
                 ]
             );
             assert_eq!(de.write, vec![]);
 
-            assert_eq!(
-                de.next().unwrap(),
-                Start(BytesStart::borrowed_name(b"skip"))
-            );
+            assert_eq!(de.next().unwrap(), Start(BytesStart::borrowed_name("skip")));
             de.read_to_end(QName(b"skip")).unwrap();
 
-            assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed(b"root")));
+            assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed("root")));
         }
 
         /// Checks that limiting buffer size works correctly
@@ -1306,34 +1293,31 @@ mod tests {
             "#,
         );
 
-        assert_eq!(
-            de.next().unwrap(),
-            Start(BytesStart::borrowed_name(b"root"))
-        );
+        assert_eq!(de.next().unwrap(), Start(BytesStart::borrowed_name("root")));
 
         assert_eq!(
             de.next().unwrap(),
-            Start(BytesStart::borrowed(br#"tag a="1""#, 3))
+            Start(BytesStart::borrowed(r#"tag a="1""#, 3))
         );
         assert_eq!(de.read_to_end(QName(b"tag")).unwrap(), ());
 
         assert_eq!(
             de.next().unwrap(),
-            Start(BytesStart::borrowed(br#"tag a="2""#, 3))
+            Start(BytesStart::borrowed(r#"tag a="2""#, 3))
         );
         assert_eq!(
             de.next().unwrap(),
             CData(BytesCData::from_str("cdata content"))
         );
-        assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed(b"tag")));
+        assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed("tag")));
 
         assert_eq!(
             de.next().unwrap(),
-            Start(BytesStart::borrowed(b"self-closed", 11))
+            Start(BytesStart::borrowed_name("self-closed"))
         );
         assert_eq!(de.read_to_end(QName(b"self-closed")).unwrap(), ());
 
-        assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed(b"root")));
+        assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed("root")));
         assert_eq!(de.next().unwrap(), Eof);
     }
 
@@ -1402,17 +1386,17 @@ mod tests {
             events,
             vec![
                 Start(BytesStart::borrowed(
-                    br#"item name="hello" source="world.rs""#,
+                    r#"item name="hello" source="world.rs""#,
                     4
                 )),
                 Text(BytesText::from_escaped_str("Some text")),
-                End(BytesEnd::borrowed(b"item")),
-                Start(BytesStart::borrowed(b"item2", 5)),
-                End(BytesEnd::borrowed(b"item2")),
-                Start(BytesStart::borrowed(b"item3", 5)),
-                End(BytesEnd::borrowed(b"item3")),
-                Start(BytesStart::borrowed(br#"item4 value="world" "#, 5)),
-                End(BytesEnd::borrowed(b"item4")),
+                End(BytesEnd::borrowed("item")),
+                Start(BytesStart::borrowed("item2", 5)),
+                End(BytesEnd::borrowed("item2")),
+                Start(BytesStart::borrowed("item3", 5)),
+                End(BytesEnd::borrowed("item3")),
+                Start(BytesStart::borrowed(r#"item4 value="world" "#, 5)),
+                End(BytesEnd::borrowed("item4")),
             ]
         )
     }
@@ -1432,7 +1416,7 @@ mod tests {
 
         assert_eq!(
             reader.next().unwrap(),
-            DeEvent::Start(BytesStart::borrowed(b"item ", 4))
+            DeEvent::Start(BytesStart::borrowed("item ", 4))
         );
         reader.read_to_end(QName(b"item")).unwrap();
         assert_eq!(reader.next().unwrap(), DeEvent::Eof);