diff --git a/Changelog.md b/Changelog.md index e9a74ecb..341cb892 100644 --- a/Changelog.md +++ b/Changelog.md @@ -129,13 +129,12 @@ - [#415]: Changed custom entity unescaping API to accept closures rather than a mapping of entity to replacement text. This avoids needing to allocate a map and provides the user with more flexibility. -- [#415]: Renamed many functions following the pattern `unescape_and_decode*` to `decode_and_unescape*` - to better communicate their function. Renamed functions following the pattern `*_with_custom_entities` - to `decode_and_unescape_with` to be more consistent across the API. -- [#415]: `BytesText::escaped()` renamed to `BytesText::escape()`, `BytesText::unescaped()` renamed to - `BytesText::unescape()`, `BytesText::unescaped_with()` renamed to `BytesText::unescape_with()`, - `Attribute::escaped_value()` renamed to `Attribute::escape_value()`, and `Attribute::escaped_value_with()` - renamed to `Attribute::escape_value_with()` for consistency across the API. +- [#415]: Renamed functions for consistency across the API: + |Old Name |New Name + |------------------------|------------------------------------------- + |`*_with_custom_entities`|`*_with` + |`BytesText::unescaped()`|`BytesText::unescape()` + |`Attribute::unescaped_*`|`Attribute::unescape_*` - [#416]: `BytesStart::to_borrowed` renamed to `BytesStart::borrow`, the same method added to all events @@ -148,7 +147,33 @@ - [#423]: All escaping functions now accepts and returns strings instead of byte slices - [#423]: Removed `BytesText::from_plain` because it internally did escaping of a byte array, - but since now escaping works on strings. Use `BytesText::from_plain_str` instead + but since now escaping works on strings. Use `BytesText::new` instead + +- [#428]: Removed `BytesText::escaped()`. Use `.as_ref()` provided by `Deref` impl instead. +- [#428]: Removed `BytesText::from_escaped()`. Use constructors from strings instead, + because writer anyway works in UTF-8 only +- [#428]: Removed `BytesCData::new()`. Use constructors from strings instead, + because writer anyway works in UTF-8 only +- [#428]: Changed the event and `Attributes` constructors to accept a `&str` slices instead of `&[u8]` slices. + Handmade events has always been assumed to store their content UTF-8 encoded. +- [#428]: Removed `Decoder` parameter from `_and_decode` versions of functions for + `BytesText` (remember, that those functions was renamed in #415). + +- [#431]: Changed event constructors: + |Old names |New name + |--------------------------------------------------|---------------------------------------------- + |`BytesStart::owned_name(impl Into>)` |`BytesStart::new(impl Into>)` + |`BytesStart::borrowed_name(&[u8])` |_(as above)_ + |`BytesStart::owned(impl Into>, usize)` |`BytesStart::from_content(impl Into>, usize)` + |`BytesStart::borrowed(&[u8], usize)` |_(as above)_ + |`BytesEnd::owned(Vec)` |`BytesEnd::new(impl Into>)` + |`BytesEnd::borrowed(&[u8])` |_(as above)_ + |`BytesText::from_escaped(impl Into>)` |`BytesText::from_escaped(impl Into>)` + |`BytesText::from_escaped_str(impl Into>)`|_(as above)_ + |`BytesText::from_plain(&[u8])` |`BytesText::new(&str)` + |`BytesText::from_plain_str(&str)` |_(as above)_ + |`BytesCData::new(impl Into>)` |`BytesCData::new(impl Into>)` + |`BytesCData::from_str(&str)` |_(as above)_ ### New Tests @@ -180,6 +205,8 @@ [#418]: https://github.com/tafia/quick-xml/pull/418 [#421]: https://github.com/tafia/quick-xml/pull/421 [#423]: https://github.com/tafia/quick-xml/pull/423 +[#428]: https://github.com/tafia/quick-xml/pull/428 +[#431]: https://github.com/tafia/quick-xml/pull/431 [#434]: https://github.com/tafia/quick-xml/pull/434 [#437]: https://github.com/tafia/quick-xml/pull/437 diff --git a/README.md b/README.md index 6fa273ee..26499d58 100644 --- a/README.md +++ b/README.md @@ -53,7 +53,7 @@ loop { _ => (), } } - Ok(Event::Text(e)) => txt.push(e.decode_and_unescape(&reader).unwrap().into_owned()), + Ok(Event::Text(e)) => txt.push(e.unescape().unwrap().into_owned()), // There are several other `Event`s we do not consider here _ => (), @@ -80,7 +80,7 @@ loop { // crates a new element ... alternatively we could reuse `e` by calling // `e.into_owned()` - let mut elem = BytesStart::owned_name(b"my_elem".to_vec()); + let mut elem = BytesStart::new("my_elem"); // collect existing attributes elem.extend_attributes(e.attributes().map(|attr| attr.unwrap())); @@ -92,7 +92,7 @@ loop { assert!(writer.write_event(Event::Start(elem)).is_ok()); }, Ok(Event::End(e)) if e.name().as_ref() == b"this_tag" => { - assert!(writer.write_event(Event::End(BytesEnd::borrowed(b"my_elem"))).is_ok()); + assert!(writer.write_event(Event::End(BytesEnd::new("my_elem"))).is_ok()); }, Ok(Event::Eof) => break, // we can either move or borrow the event to write, depending on your use-case diff --git a/benches/macrobenches.rs b/benches/macrobenches.rs index 3358f3a4..72a1c0b5 100644 --- a/benches/macrobenches.rs +++ b/benches/macrobenches.rs @@ -28,7 +28,7 @@ fn parse_document(doc: &[u8]) -> XmlResult<()> { } } Event::Text(e) => { - criterion::black_box(e.decode_and_unescape(&r)?); + criterion::black_box(e.unescape()?); } Event::CData(e) => { criterion::black_box(e.into_inner()); diff --git a/benches/microbenches.rs b/benches/microbenches.rs index 1e7cc232..95568224 100644 --- a/benches/microbenches.rs +++ b/benches/microbenches.rs @@ -174,7 +174,7 @@ fn one_event(c: &mut Criterion) { .check_comments(false) .trim_text(true); match r.read_event_into(&mut buf) { - Ok(Event::Comment(e)) => nbtxt += e.decode_and_unescape(&r).unwrap().len(), + Ok(Event::Comment(e)) => nbtxt += e.unescape().unwrap().len(), something_else => panic!("Did not expect {:?}", something_else), }; diff --git a/examples/custom_entities.rs b/examples/custom_entities.rs index 3c31d4d1..16f03482 100644 --- a/examples/custom_entities.rs +++ b/examples/custom_entities.rs @@ -60,9 +60,7 @@ fn main() -> Result<(), Box> { Ok(Event::Text(ref e)) => { println!( "text value: {}", - e.decode_and_unescape_with(&reader, |ent| custom_entities - .get(ent) - .map(|s| s.as_str())) + e.unescape_with(|ent| custom_entities.get(ent).map(|s| s.as_str())) .unwrap() ); } diff --git a/src/de/mod.rs b/src/de/mod.rs index e564e041..caabbdf8 100644 --- a/src/de/mod.rs +++ b/src/de/mod.rs @@ -612,16 +612,15 @@ where unescape: bool, allow_start: bool, ) -> Result, DeError> { - let decoder = self.reader.decoder(); match self.next()? { - DeEvent::Text(e) => Ok(e.decode(decoder, unescape)?), - DeEvent::CData(e) => Ok(e.decode(decoder)?), + DeEvent::Text(e) => Ok(e.decode(unescape)?), + DeEvent::CData(e) => Ok(e.decode()?), DeEvent::Start(e) if allow_start => { // allow one nested level let inner = self.next()?; let t = match inner { - DeEvent::Text(t) => t.decode(decoder, unescape)?, - DeEvent::CData(t) => t.decode(decoder)?, + DeEvent::Text(t) => t.decode(unescape)?, + DeEvent::CData(t) => t.decode()?, DeEvent::Start(s) => { return Err(DeError::UnexpectedStart(s.name().as_ref().to_owned())) } @@ -1042,14 +1041,8 @@ mod tests { assert_eq!(de.read, vec![]); assert_eq!(de.write, vec![]); - assert_eq!( - de.next().unwrap(), - Start(BytesStart::borrowed_name(b"root")) - ); - assert_eq!( - de.peek().unwrap(), - &Start(BytesStart::borrowed_name(b"inner")) - ); + assert_eq!(de.next().unwrap(), Start(BytesStart::new("root"))); + assert_eq!(de.peek().unwrap(), &Start(BytesStart::new("inner"))); // Should skip first tree de.skip().unwrap(); @@ -1057,11 +1050,11 @@ mod tests { assert_eq!( de.write, vec![ - Start(BytesStart::borrowed_name(b"inner")), - Text(BytesText::from_escaped_str("text")), - Start(BytesStart::borrowed_name(b"inner")), - End(BytesEnd::borrowed(b"inner")), - End(BytesEnd::borrowed(b"inner")), + Start(BytesStart::new("inner")), + Text(BytesText::from_escaped("text")), + Start(BytesStart::new("inner")), + End(BytesEnd::new("inner")), + End(BytesEnd::new("inner")), ] ); @@ -1073,11 +1066,8 @@ mod tests { // // // - assert_eq!( - de.next().unwrap(), - Start(BytesStart::borrowed_name(b"next")) - ); - assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed(b"next"))); + assert_eq!(de.next().unwrap(), Start(BytesStart::new("next"))); + assert_eq!(de.next().unwrap(), End(BytesEnd::new("next"))); // We finish writing. Next call to `next()` should start replay that messages: // @@ -1094,27 +1084,24 @@ mod tests { assert_eq!( de.read, vec![ - Start(BytesStart::borrowed_name(b"inner")), - Text(BytesText::from_escaped_str("text")), - Start(BytesStart::borrowed_name(b"inner")), - End(BytesEnd::borrowed(b"inner")), - End(BytesEnd::borrowed(b"inner")), + Start(BytesStart::new("inner")), + Text(BytesText::from_escaped("text")), + Start(BytesStart::new("inner")), + End(BytesEnd::new("inner")), + End(BytesEnd::new("inner")), ] ); assert_eq!(de.write, vec![]); - assert_eq!( - de.next().unwrap(), - Start(BytesStart::borrowed_name(b"inner")) - ); + assert_eq!(de.next().unwrap(), Start(BytesStart::new("inner"))); // Skip `#text` node and consume after it de.skip().unwrap(); assert_eq!( de.read, vec![ - Start(BytesStart::borrowed_name(b"inner")), - End(BytesEnd::borrowed(b"inner")), - End(BytesEnd::borrowed(b"inner")), + Start(BytesStart::new("inner")), + End(BytesEnd::new("inner")), + End(BytesEnd::new("inner")), ] ); assert_eq!( @@ -1122,15 +1109,12 @@ mod tests { vec![ // This comment here to keep the same formatting of both arrays // otherwise rustfmt suggest one-line it - Text(BytesText::from_escaped_str("text")), + Text(BytesText::from_escaped("text")), ] ); - assert_eq!( - de.next().unwrap(), - Start(BytesStart::borrowed_name(b"inner")) - ); - assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed(b"inner"))); + assert_eq!(de.next().unwrap(), Start(BytesStart::new("inner"))); + assert_eq!(de.next().unwrap(), End(BytesEnd::new("inner"))); // We finish writing. Next call to `next()` should start replay messages: // @@ -1145,22 +1129,16 @@ mod tests { assert_eq!( de.read, vec![ - Text(BytesText::from_escaped_str("text")), - End(BytesEnd::borrowed(b"inner")), + Text(BytesText::from_escaped("text")), + End(BytesEnd::new("inner")), ] ); assert_eq!(de.write, vec![]); - assert_eq!( - de.next().unwrap(), - Text(BytesText::from_escaped_str("text")) - ); - assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed(b"inner"))); - assert_eq!( - de.next().unwrap(), - Start(BytesStart::borrowed_name(b"target")) - ); - assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed(b"target"))); - assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed(b"root"))); + assert_eq!(de.next().unwrap(), Text(BytesText::from_escaped("text"))); + assert_eq!(de.next().unwrap(), End(BytesEnd::new("inner"))); + assert_eq!(de.next().unwrap(), Start(BytesStart::new("target"))); + assert_eq!(de.next().unwrap(), End(BytesEnd::new("target"))); + assert_eq!(de.next().unwrap(), End(BytesEnd::new("root"))); } /// Checks that `read_to_end()` behaves correctly after `skip()` @@ -1184,10 +1162,7 @@ mod tests { assert_eq!(de.read, vec![]); assert_eq!(de.write, vec![]); - assert_eq!( - de.next().unwrap(), - Start(BytesStart::borrowed_name(b"root")) - ); + assert_eq!(de.next().unwrap(), Start(BytesStart::new("root"))); // Skip the tree de.skip().unwrap(); @@ -1195,11 +1170,11 @@ mod tests { assert_eq!( de.write, vec![ - Start(BytesStart::borrowed_name(b"skip")), - Text(BytesText::from_escaped_str("text")), - Start(BytesStart::borrowed_name(b"skip")), - End(BytesEnd::borrowed(b"skip")), - End(BytesEnd::borrowed(b"skip")), + Start(BytesStart::new("skip")), + Text(BytesText::from_escaped("text")), + Start(BytesStart::new("skip")), + End(BytesEnd::new("skip")), + End(BytesEnd::new("skip")), ] ); @@ -1210,20 +1185,17 @@ mod tests { // // // - assert_eq!( - de.next().unwrap(), - Start(BytesStart::borrowed_name(b"target")) - ); + assert_eq!(de.next().unwrap(), Start(BytesStart::new("target"))); de.read_to_end(QName(b"target")).unwrap(); assert_eq!(de.read, vec![]); assert_eq!( de.write, vec![ - Start(BytesStart::borrowed_name(b"skip")), - Text(BytesText::from_escaped_str("text")), - Start(BytesStart::borrowed_name(b"skip")), - End(BytesEnd::borrowed(b"skip")), - End(BytesEnd::borrowed(b"skip")), + Start(BytesStart::new("skip")), + Text(BytesText::from_escaped("text")), + Start(BytesStart::new("skip")), + End(BytesEnd::new("skip")), + End(BytesEnd::new("skip")), ] ); @@ -1241,22 +1213,19 @@ mod tests { assert_eq!( de.read, vec![ - Start(BytesStart::borrowed_name(b"skip")), - Text(BytesText::from_escaped_str("text")), - Start(BytesStart::borrowed_name(b"skip")), - End(BytesEnd::borrowed(b"skip")), - End(BytesEnd::borrowed(b"skip")), + Start(BytesStart::new("skip")), + Text(BytesText::from_escaped("text")), + Start(BytesStart::new("skip")), + End(BytesEnd::new("skip")), + End(BytesEnd::new("skip")), ] ); assert_eq!(de.write, vec![]); - assert_eq!( - de.next().unwrap(), - Start(BytesStart::borrowed_name(b"skip")) - ); + assert_eq!(de.next().unwrap(), Start(BytesStart::new("skip"))); de.read_to_end(QName(b"skip")).unwrap(); - assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed(b"root"))); + assert_eq!(de.next().unwrap(), End(BytesEnd::new("root"))); } /// Checks that limiting buffer size works correctly @@ -1306,34 +1275,25 @@ mod tests { "#, ); - assert_eq!( - de.next().unwrap(), - Start(BytesStart::borrowed_name(b"root")) - ); + assert_eq!(de.next().unwrap(), Start(BytesStart::new("root"))); assert_eq!( de.next().unwrap(), - Start(BytesStart::borrowed(br#"tag a="1""#, 3)) + Start(BytesStart::from_content(r#"tag a="1""#, 3)) ); assert_eq!(de.read_to_end(QName(b"tag")).unwrap(), ()); assert_eq!( de.next().unwrap(), - Start(BytesStart::borrowed(br#"tag a="2""#, 3)) - ); - assert_eq!( - de.next().unwrap(), - CData(BytesCData::from_str("cdata content")) + Start(BytesStart::from_content(r#"tag a="2""#, 3)) ); - assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed(b"tag"))); + assert_eq!(de.next().unwrap(), CData(BytesCData::new("cdata content"))); + assert_eq!(de.next().unwrap(), End(BytesEnd::new("tag"))); - assert_eq!( - de.next().unwrap(), - Start(BytesStart::borrowed(b"self-closed", 11)) - ); + assert_eq!(de.next().unwrap(), Start(BytesStart::new("self-closed"))); assert_eq!(de.read_to_end(QName(b"self-closed")).unwrap(), ()); - assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed(b"root"))); + assert_eq!(de.next().unwrap(), End(BytesEnd::new("root"))); assert_eq!(de.next().unwrap(), Eof); } @@ -1401,18 +1361,18 @@ mod tests { assert_eq!( events, vec![ - Start(BytesStart::borrowed( - br#"item name="hello" source="world.rs""#, + Start(BytesStart::from_content( + r#"item name="hello" source="world.rs""#, 4 )), - Text(BytesText::from_escaped_str("Some text")), - End(BytesEnd::borrowed(b"item")), - Start(BytesStart::borrowed(b"item2", 5)), - End(BytesEnd::borrowed(b"item2")), - Start(BytesStart::borrowed(b"item3", 5)), - End(BytesEnd::borrowed(b"item3")), - Start(BytesStart::borrowed(br#"item4 value="world" "#, 5)), - End(BytesEnd::borrowed(b"item4")), + Text(BytesText::from_escaped("Some text")), + End(BytesEnd::new("item")), + Start(BytesStart::from_content("item2", 5)), + End(BytesEnd::new("item2")), + Start(BytesStart::from_content("item3", 5)), + End(BytesEnd::new("item3")), + Start(BytesStart::from_content(r#"item4 value="world" "#, 5)), + End(BytesEnd::new("item4")), ] ) } @@ -1432,7 +1392,7 @@ mod tests { assert_eq!( reader.next().unwrap(), - DeEvent::Start(BytesStart::borrowed(b"item ", 4)) + DeEvent::Start(BytesStart::from_content("item ", 4)) ); reader.read_to_end(QName(b"item")).unwrap(); assert_eq!(reader.next().unwrap(), DeEvent::Eof); diff --git a/src/de/seq.rs b/src/de/seq.rs index fe4559bd..8dc9a462 100644 --- a/src/de/seq.rs +++ b/src/de/seq.rs @@ -134,7 +134,7 @@ where #[test] fn test_not_in() { - let tag = BytesStart::borrowed_name(b"tag"); + let tag = BytesStart::new("tag"); assert_eq!(not_in(&[], &tag, Decoder::utf8()).unwrap(), true); assert_eq!( diff --git a/src/events/attributes.rs b/src/events/attributes.rs index 51168d5e..ad938f9b 100644 --- a/src/events/attributes.rs +++ b/src/events/attributes.rs @@ -171,27 +171,30 @@ impl<'a> From> for Attribute<'a> { /// [`with_checks(false)`]: Self::with_checks #[derive(Clone, Debug)] pub struct Attributes<'a> { - /// slice of `Element` corresponding to attributes + /// Slice of `BytesStart` corresponding to attributes bytes: &'a [u8], /// Iterator state, independent from the actual source of bytes state: IterState, } impl<'a> Attributes<'a> { - /// Creates a new attribute iterator from a buffer. - pub fn new(buf: &'a [u8], pos: usize) -> Self { + /// Internal constructor, used by `BytesStart`. Supplies data in reader's encoding + #[inline] + pub(crate) fn wrap(buf: &'a [u8], pos: usize, html: bool) -> Self { Self { bytes: buf, - state: IterState::new(pos, false), + state: IterState::new(pos, html), } } + /// Creates a new attribute iterator from a buffer. + pub fn new(buf: &'a str, pos: usize) -> Self { + Self::wrap(buf.as_bytes(), pos, false) + } + /// Creates a new attribute iterator from a buffer, allowing HTML attribute syntax. - pub fn html(buf: &'a [u8], pos: usize) -> Self { - Self { - bytes: buf, - state: IterState::new(pos, true), - } + pub fn html(buf: &'a str, pos: usize) -> Self { + Self::wrap(buf.as_bytes(), pos, true) } /// Changes whether attributes should be checked for uniqueness. @@ -785,7 +788,7 @@ mod xml { /// Attribute have a value enclosed in single quotes #[test] fn single_quoted() { - let mut iter = Attributes::new(br#"tag key='value'"#, 3); + let mut iter = Attributes::new(r#"tag key='value'"#, 3); assert_eq!( iter.next(), @@ -801,7 +804,7 @@ mod xml { /// Attribute have a value enclosed in double quotes #[test] fn double_quoted() { - let mut iter = Attributes::new(br#"tag key="value""#, 3); + let mut iter = Attributes::new(r#"tag key="value""#, 3); assert_eq!( iter.next(), @@ -817,8 +820,8 @@ mod xml { /// Attribute have a value, not enclosed in quotes #[test] fn unquoted() { - let mut iter = Attributes::new(br#"tag key=value"#, 3); - // 0 ^ = 8 + let mut iter = Attributes::new(r#"tag key=value"#, 3); + // 0 ^ = 8 assert_eq!(iter.next(), Some(Err(AttrError::UnquotedValue(8)))); assert_eq!(iter.next(), None); @@ -828,8 +831,8 @@ mod xml { /// Only attribute key is present #[test] fn key_only() { - let mut iter = Attributes::new(br#"tag key"#, 3); - // 0 ^ = 7 + let mut iter = Attributes::new(r#"tag key"#, 3); + // 0 ^ = 7 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(7)))); assert_eq!(iter.next(), None); @@ -841,7 +844,7 @@ mod xml { /// that invalid attribute will be returned #[test] fn key_start_invalid() { - let mut iter = Attributes::new(br#"tag 'key'='value'"#, 3); + let mut iter = Attributes::new(r#"tag 'key'='value'"#, 3); assert_eq!( iter.next(), @@ -859,7 +862,7 @@ mod xml { /// that invalid attribute will be returned #[test] fn key_contains_invalid() { - let mut iter = Attributes::new(br#"tag key&jey='value'"#, 3); + let mut iter = Attributes::new(r#"tag key&jey='value'"#, 3); assert_eq!( iter.next(), @@ -875,8 +878,8 @@ mod xml { /// Attribute value is missing after `=` #[test] fn missed_value() { - let mut iter = Attributes::new(br#"tag key="#, 3); - // 0 ^ = 8 + let mut iter = Attributes::new(r#"tag key="#, 3); + // 0 ^ = 8 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedValue(8)))); assert_eq!(iter.next(), None); @@ -892,7 +895,7 @@ mod xml { /// Attribute have a value enclosed in single quotes #[test] fn single_quoted() { - let mut iter = Attributes::new(br#"tag key='value' regular='attribute'"#, 3); + let mut iter = Attributes::new(r#"tag key='value' regular='attribute'"#, 3); assert_eq!( iter.next(), @@ -915,7 +918,7 @@ mod xml { /// Attribute have a value enclosed in double quotes #[test] fn double_quoted() { - let mut iter = Attributes::new(br#"tag key="value" regular='attribute'"#, 3); + let mut iter = Attributes::new(r#"tag key="value" regular='attribute'"#, 3); assert_eq!( iter.next(), @@ -938,8 +941,8 @@ mod xml { /// Attribute have a value, not enclosed in quotes #[test] fn unquoted() { - let mut iter = Attributes::new(br#"tag key=value regular='attribute'"#, 3); - // 0 ^ = 8 + let mut iter = Attributes::new(r#"tag key=value regular='attribute'"#, 3); + // 0 ^ = 8 assert_eq!(iter.next(), Some(Err(AttrError::UnquotedValue(8)))); // check error recovery @@ -957,8 +960,8 @@ mod xml { /// Only attribute key is present #[test] fn key_only() { - let mut iter = Attributes::new(br#"tag key regular='attribute'"#, 3); - // 0 ^ = 8 + let mut iter = Attributes::new(r#"tag key regular='attribute'"#, 3); + // 0 ^ = 8 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(8)))); // check error recovery @@ -978,7 +981,7 @@ mod xml { /// that invalid attribute will be returned #[test] fn key_start_invalid() { - let mut iter = Attributes::new(br#"tag 'key'='value' regular='attribute'"#, 3); + let mut iter = Attributes::new(r#"tag 'key'='value' regular='attribute'"#, 3); assert_eq!( iter.next(), @@ -1003,7 +1006,7 @@ mod xml { /// that invalid attribute will be returned #[test] fn key_contains_invalid() { - let mut iter = Attributes::new(br#"tag key&jey='value' regular='attribute'"#, 3); + let mut iter = Attributes::new(r#"tag key&jey='value' regular='attribute'"#, 3); assert_eq!( iter.next(), @@ -1026,8 +1029,8 @@ mod xml { /// Attribute value is missing after `=`. #[test] fn missed_value() { - let mut iter = Attributes::new(br#"tag key= regular='attribute'"#, 3); - // 0 ^ = 9 + let mut iter = Attributes::new(r#"tag key= regular='attribute'"#, 3); + // 0 ^ = 9 assert_eq!(iter.next(), Some(Err(AttrError::UnquotedValue(9)))); // Because we do not check validity of keys and values during parsing, @@ -1038,8 +1041,8 @@ mod xml { //////////////////////////////////////////////////////////////////// - let mut iter = Attributes::new(br#"tag key= regular= 'attribute'"#, 3); - // 0 ^ = 9 ^ = 29 + let mut iter = Attributes::new(r#"tag key= regular= 'attribute'"#, 3); + // 0 ^ = 9 ^ = 29 // In that case "regular=" considered as unquoted value assert_eq!(iter.next(), Some(Err(AttrError::UnquotedValue(9)))); @@ -1051,8 +1054,8 @@ mod xml { //////////////////////////////////////////////////////////////////// - let mut iter = Attributes::new(br#"tag key= regular ='attribute'"#, 3); - // 0 ^ = 9 ^ = 29 + let mut iter = Attributes::new(r#"tag key= regular ='attribute'"#, 3); + // 0 ^ = 9 ^ = 29 // In that case "regular" considered as unquoted value assert_eq!(iter.next(), Some(Err(AttrError::UnquotedValue(9)))); @@ -1064,8 +1067,8 @@ mod xml { //////////////////////////////////////////////////////////////////// - let mut iter = Attributes::new(br#"tag key= regular = 'attribute'"#, 3); - // 0 ^ = 9 ^ = 19 ^ = 30 + let mut iter = Attributes::new(r#"tag key= regular = 'attribute'"#, 3); + // 0 ^ = 9 ^ = 19 ^ = 30 assert_eq!(iter.next(), Some(Err(AttrError::UnquotedValue(9)))); // In that case second "=" considered as a key, because we do not check @@ -1087,7 +1090,7 @@ mod xml { /// Attribute have a value enclosed in single quotes #[test] fn single_quoted() { - let mut iter = Attributes::new(br#"tag key = 'value' "#, 3); + let mut iter = Attributes::new(r#"tag key = 'value' "#, 3); assert_eq!( iter.next(), @@ -1103,7 +1106,7 @@ mod xml { /// Attribute have a value enclosed in double quotes #[test] fn double_quoted() { - let mut iter = Attributes::new(br#"tag key = "value" "#, 3); + let mut iter = Attributes::new(r#"tag key = "value" "#, 3); assert_eq!( iter.next(), @@ -1119,8 +1122,8 @@ mod xml { /// Attribute have a value, not enclosed in quotes #[test] fn unquoted() { - let mut iter = Attributes::new(br#"tag key = value "#, 3); - // 0 ^ = 10 + let mut iter = Attributes::new(r#"tag key = value "#, 3); + // 0 ^ = 10 assert_eq!(iter.next(), Some(Err(AttrError::UnquotedValue(10)))); assert_eq!(iter.next(), None); @@ -1130,8 +1133,8 @@ mod xml { /// Only attribute key is present #[test] fn key_only() { - let mut iter = Attributes::new(br#"tag key "#, 3); - // 0 ^ = 8 + let mut iter = Attributes::new(r#"tag key "#, 3); + // 0 ^ = 8 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(8)))); assert_eq!(iter.next(), None); @@ -1143,7 +1146,7 @@ mod xml { /// that invalid attribute will be returned #[test] fn key_start_invalid() { - let mut iter = Attributes::new(br#"tag 'key' = 'value' "#, 3); + let mut iter = Attributes::new(r#"tag 'key' = 'value' "#, 3); assert_eq!( iter.next(), @@ -1161,7 +1164,7 @@ mod xml { /// that invalid attribute will be returned #[test] fn key_contains_invalid() { - let mut iter = Attributes::new(br#"tag key&jey = 'value' "#, 3); + let mut iter = Attributes::new(r#"tag key&jey = 'value' "#, 3); assert_eq!( iter.next(), @@ -1177,8 +1180,8 @@ mod xml { /// Attribute value is missing after `=` #[test] fn missed_value() { - let mut iter = Attributes::new(br#"tag key = "#, 3); - // 0 ^ = 10 + let mut iter = Attributes::new(r#"tag key = "#, 3); + // 0 ^ = 10 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedValue(10)))); assert_eq!(iter.next(), None); @@ -1198,8 +1201,8 @@ mod xml { /// Attribute have a value enclosed in single quotes #[test] fn single_quoted() { - let mut iter = Attributes::new(br#"tag key='value' key='dup' another=''"#, 3); - // 0 ^ = 4 ^ = 16 + let mut iter = Attributes::new(r#"tag key='value' key='dup' another=''"#, 3); + // 0 ^ = 4 ^ = 16 assert_eq!( iter.next(), @@ -1223,8 +1226,8 @@ mod xml { /// Attribute have a value enclosed in double quotes #[test] fn double_quoted() { - let mut iter = Attributes::new(br#"tag key='value' key="dup" another=''"#, 3); - // 0 ^ = 4 ^ = 16 + let mut iter = Attributes::new(r#"tag key='value' key="dup" another=''"#, 3); + // 0 ^ = 4 ^ = 16 assert_eq!( iter.next(), @@ -1248,8 +1251,8 @@ mod xml { /// Attribute have a value, not enclosed in quotes #[test] fn unquoted() { - let mut iter = Attributes::new(br#"tag key='value' key=dup another=''"#, 3); - // 0 ^ = 4 ^ = 16 + let mut iter = Attributes::new(r#"tag key='value' key=dup another=''"#, 3); + // 0 ^ = 4 ^ = 16 assert_eq!( iter.next(), @@ -1273,8 +1276,8 @@ mod xml { /// Only attribute key is present #[test] fn key_only() { - let mut iter = Attributes::new(br#"tag key='value' key another=''"#, 3); - // 0 ^ = 20 + let mut iter = Attributes::new(r#"tag key='value' key another=''"#, 3); + // 0 ^ = 20 assert_eq!( iter.next(), @@ -1304,7 +1307,7 @@ mod xml { /// Attribute have a value enclosed in single quotes #[test] fn single_quoted() { - let mut iter = Attributes::new(br#"tag key='value' key='dup' another=''"#, 3); + let mut iter = Attributes::new(r#"tag key='value' key='dup' another=''"#, 3); iter.with_checks(false); assert_eq!( @@ -1335,7 +1338,7 @@ mod xml { /// Attribute have a value enclosed in double quotes #[test] fn double_quoted() { - let mut iter = Attributes::new(br#"tag key='value' key="dup" another=''"#, 3); + let mut iter = Attributes::new(r#"tag key='value' key="dup" another=''"#, 3); iter.with_checks(false); assert_eq!( @@ -1366,8 +1369,8 @@ mod xml { /// Attribute have a value, not enclosed in quotes #[test] fn unquoted() { - let mut iter = Attributes::new(br#"tag key='value' key=dup another=''"#, 3); - // 0 ^ = 20 + let mut iter = Attributes::new(r#"tag key='value' key=dup another=''"#, 3); + // 0 ^ = 20 iter.with_checks(false); assert_eq!( @@ -1392,8 +1395,8 @@ mod xml { /// Only attribute key is present #[test] fn key_only() { - let mut iter = Attributes::new(br#"tag key='value' key another=''"#, 3); - // 0 ^ = 20 + let mut iter = Attributes::new(r#"tag key='value' key another=''"#, 3); + // 0 ^ = 20 iter.with_checks(false); assert_eq!( @@ -1419,7 +1422,7 @@ mod xml { #[test] fn mixed_quote() { - let mut iter = Attributes::new(br#"tag a='a' b = "b" c='cc"cc' d="dd'dd""#, 3); + let mut iter = Attributes::new(r#"tag a='a' b = "b" c='cc"cc' d="dd'dd""#, 3); assert_eq!( iter.next(), @@ -1472,7 +1475,7 @@ mod html { /// Attribute have a value enclosed in single quotes #[test] fn single_quoted() { - let mut iter = Attributes::html(br#"tag key='value'"#, 3); + let mut iter = Attributes::html(r#"tag key='value'"#, 3); assert_eq!( iter.next(), @@ -1488,7 +1491,7 @@ mod html { /// Attribute have a value enclosed in double quotes #[test] fn double_quoted() { - let mut iter = Attributes::html(br#"tag key="value""#, 3); + let mut iter = Attributes::html(r#"tag key="value""#, 3); assert_eq!( iter.next(), @@ -1504,7 +1507,7 @@ mod html { /// Attribute have a value, not enclosed in quotes #[test] fn unquoted() { - let mut iter = Attributes::html(br#"tag key=value"#, 3); + let mut iter = Attributes::html(r#"tag key=value"#, 3); assert_eq!( iter.next(), @@ -1520,7 +1523,7 @@ mod html { /// Only attribute key is present #[test] fn key_only() { - let mut iter = Attributes::html(br#"tag key"#, 3); + let mut iter = Attributes::html(r#"tag key"#, 3); assert_eq!( iter.next(), @@ -1538,7 +1541,7 @@ mod html { /// that invalid attribute will be returned #[test] fn key_start_invalid() { - let mut iter = Attributes::html(br#"tag 'key'='value'"#, 3); + let mut iter = Attributes::html(r#"tag 'key'='value'"#, 3); assert_eq!( iter.next(), @@ -1556,7 +1559,7 @@ mod html { /// that invalid attribute will be returned #[test] fn key_contains_invalid() { - let mut iter = Attributes::html(br#"tag key&jey='value'"#, 3); + let mut iter = Attributes::html(r#"tag key&jey='value'"#, 3); assert_eq!( iter.next(), @@ -1572,8 +1575,8 @@ mod html { /// Attribute value is missing after `=` #[test] fn missed_value() { - let mut iter = Attributes::html(br#"tag key="#, 3); - // 0 ^ = 8 + let mut iter = Attributes::html(r#"tag key="#, 3); + // 0 ^ = 8 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedValue(8)))); assert_eq!(iter.next(), None); @@ -1589,7 +1592,7 @@ mod html { /// Attribute have a value enclosed in single quotes #[test] fn single_quoted() { - let mut iter = Attributes::html(br#"tag key='value' regular='attribute'"#, 3); + let mut iter = Attributes::html(r#"tag key='value' regular='attribute'"#, 3); assert_eq!( iter.next(), @@ -1612,7 +1615,7 @@ mod html { /// Attribute have a value enclosed in double quotes #[test] fn double_quoted() { - let mut iter = Attributes::html(br#"tag key="value" regular='attribute'"#, 3); + let mut iter = Attributes::html(r#"tag key="value" regular='attribute'"#, 3); assert_eq!( iter.next(), @@ -1635,7 +1638,7 @@ mod html { /// Attribute have a value, not enclosed in quotes #[test] fn unquoted() { - let mut iter = Attributes::html(br#"tag key=value regular='attribute'"#, 3); + let mut iter = Attributes::html(r#"tag key=value regular='attribute'"#, 3); assert_eq!( iter.next(), @@ -1658,7 +1661,7 @@ mod html { /// Only attribute key is present #[test] fn key_only() { - let mut iter = Attributes::html(br#"tag key regular='attribute'"#, 3); + let mut iter = Attributes::html(r#"tag key regular='attribute'"#, 3); assert_eq!( iter.next(), @@ -1683,7 +1686,7 @@ mod html { /// that invalid attribute will be returned #[test] fn key_start_invalid() { - let mut iter = Attributes::html(br#"tag 'key'='value' regular='attribute'"#, 3); + let mut iter = Attributes::html(r#"tag 'key'='value' regular='attribute'"#, 3); assert_eq!( iter.next(), @@ -1708,7 +1711,7 @@ mod html { /// that invalid attribute will be returned #[test] fn key_contains_invalid() { - let mut iter = Attributes::html(br#"tag key&jey='value' regular='attribute'"#, 3); + let mut iter = Attributes::html(r#"tag key&jey='value' regular='attribute'"#, 3); assert_eq!( iter.next(), @@ -1731,7 +1734,7 @@ mod html { /// Attribute value is missing after `=` #[test] fn missed_value() { - let mut iter = Attributes::html(br#"tag key= regular='attribute'"#, 3); + let mut iter = Attributes::html(r#"tag key= regular='attribute'"#, 3); // Because we do not check validity of keys and values during parsing, // "regular='attribute'" is considered as unquoted attribute value @@ -1747,7 +1750,7 @@ mod html { //////////////////////////////////////////////////////////////////// - let mut iter = Attributes::html(br#"tag key= regular= 'attribute'"#, 3); + let mut iter = Attributes::html(r#"tag key= regular= 'attribute'"#, 3); // Because we do not check validity of keys and values during parsing, // "regular=" is considered as unquoted attribute value @@ -1772,7 +1775,7 @@ mod html { //////////////////////////////////////////////////////////////////// - let mut iter = Attributes::html(br#"tag key= regular ='attribute'"#, 3); + let mut iter = Attributes::html(r#"tag key= regular ='attribute'"#, 3); // Because we do not check validity of keys and values during parsing, // "regular" is considered as unquoted attribute value @@ -1797,8 +1800,8 @@ mod html { //////////////////////////////////////////////////////////////////// - let mut iter = Attributes::html(br#"tag key= regular = 'attribute'"#, 3); - // 0 ^ = 9 ^ = 19 ^ = 30 + let mut iter = Attributes::html(r#"tag key= regular = 'attribute'"#, 3); + // 0 ^ = 9 ^ = 19 ^ = 30 // Because we do not check validity of keys and values during parsing, // "regular" is considered as unquoted attribute value @@ -1840,7 +1843,7 @@ mod html { /// Attribute have a value enclosed in single quotes #[test] fn single_quoted() { - let mut iter = Attributes::html(br#"tag key = 'value' "#, 3); + let mut iter = Attributes::html(r#"tag key = 'value' "#, 3); assert_eq!( iter.next(), @@ -1856,7 +1859,7 @@ mod html { /// Attribute have a value enclosed in double quotes #[test] fn double_quoted() { - let mut iter = Attributes::html(br#"tag key = "value" "#, 3); + let mut iter = Attributes::html(r#"tag key = "value" "#, 3); assert_eq!( iter.next(), @@ -1872,7 +1875,7 @@ mod html { /// Attribute have a value, not enclosed in quotes #[test] fn unquoted() { - let mut iter = Attributes::html(br#"tag key = value "#, 3); + let mut iter = Attributes::html(r#"tag key = value "#, 3); assert_eq!( iter.next(), @@ -1888,7 +1891,7 @@ mod html { /// Only attribute key is present #[test] fn key_only() { - let mut iter = Attributes::html(br#"tag key "#, 3); + let mut iter = Attributes::html(r#"tag key "#, 3); assert_eq!( iter.next(), @@ -1906,7 +1909,7 @@ mod html { /// that invalid attribute will be returned #[test] fn key_start_invalid() { - let mut iter = Attributes::html(br#"tag 'key' = 'value' "#, 3); + let mut iter = Attributes::html(r#"tag 'key' = 'value' "#, 3); assert_eq!( iter.next(), @@ -1924,7 +1927,7 @@ mod html { /// that invalid attribute will be returned #[test] fn key_contains_invalid() { - let mut iter = Attributes::html(br#"tag key&jey = 'value' "#, 3); + let mut iter = Attributes::html(r#"tag key&jey = 'value' "#, 3); assert_eq!( iter.next(), @@ -1940,8 +1943,8 @@ mod html { /// Attribute value is missing after `=` #[test] fn missed_value() { - let mut iter = Attributes::html(br#"tag key = "#, 3); - // 0 ^ = 10 + let mut iter = Attributes::html(r#"tag key = "#, 3); + // 0 ^ = 10 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedValue(10)))); assert_eq!(iter.next(), None); @@ -1961,8 +1964,8 @@ mod html { /// Attribute have a value enclosed in single quotes #[test] fn single_quoted() { - let mut iter = Attributes::html(br#"tag key='value' key='dup' another=''"#, 3); - // 0 ^ = 4 ^ = 16 + let mut iter = Attributes::html(r#"tag key='value' key='dup' another=''"#, 3); + // 0 ^ = 4 ^ = 16 assert_eq!( iter.next(), @@ -1986,8 +1989,8 @@ mod html { /// Attribute have a value enclosed in double quotes #[test] fn double_quoted() { - let mut iter = Attributes::html(br#"tag key='value' key="dup" another=''"#, 3); - // 0 ^ = 4 ^ = 16 + let mut iter = Attributes::html(r#"tag key='value' key="dup" another=''"#, 3); + // 0 ^ = 4 ^ = 16 assert_eq!( iter.next(), @@ -2011,8 +2014,8 @@ mod html { /// Attribute have a value, not enclosed in quotes #[test] fn unquoted() { - let mut iter = Attributes::html(br#"tag key='value' key=dup another=''"#, 3); - // 0 ^ = 4 ^ = 16 + let mut iter = Attributes::html(r#"tag key='value' key=dup another=''"#, 3); + // 0 ^ = 4 ^ = 16 assert_eq!( iter.next(), @@ -2036,8 +2039,8 @@ mod html { /// Only attribute key is present #[test] fn key_only() { - let mut iter = Attributes::html(br#"tag key='value' key another=''"#, 3); - // 0 ^ = 4 ^ = 16 + let mut iter = Attributes::html(r#"tag key='value' key another=''"#, 3); + // 0 ^ = 4 ^ = 16 assert_eq!( iter.next(), @@ -2067,7 +2070,7 @@ mod html { /// Attribute have a value enclosed in single quotes #[test] fn single_quoted() { - let mut iter = Attributes::html(br#"tag key='value' key='dup' another=''"#, 3); + let mut iter = Attributes::html(r#"tag key='value' key='dup' another=''"#, 3); iter.with_checks(false); assert_eq!( @@ -2098,7 +2101,7 @@ mod html { /// Attribute have a value enclosed in double quotes #[test] fn double_quoted() { - let mut iter = Attributes::html(br#"tag key='value' key="dup" another=''"#, 3); + let mut iter = Attributes::html(r#"tag key='value' key="dup" another=''"#, 3); iter.with_checks(false); assert_eq!( @@ -2129,7 +2132,7 @@ mod html { /// Attribute have a value, not enclosed in quotes #[test] fn unquoted() { - let mut iter = Attributes::html(br#"tag key='value' key=dup another=''"#, 3); + let mut iter = Attributes::html(r#"tag key='value' key=dup another=''"#, 3); iter.with_checks(false); assert_eq!( @@ -2160,7 +2163,7 @@ mod html { /// Only attribute key is present #[test] fn key_only() { - let mut iter = Attributes::html(br#"tag key='value' key another=''"#, 3); + let mut iter = Attributes::html(r#"tag key='value' key another=''"#, 3); iter.with_checks(false); assert_eq!( @@ -2192,7 +2195,7 @@ mod html { #[test] fn mixed_quote() { - let mut iter = Attributes::html(br#"tag a='a' b = "b" c='cc"cc' d="dd'dd""#, 3); + let mut iter = Attributes::html(r#"tag a='a' b = "b" c='cc"cc' d="dd'dd""#, 3); assert_eq!( iter.next(), diff --git a/src/events/mod.rs b/src/events/mod.rs index b2672edf..6181a40b 100644 --- a/src/events/mod.rs +++ b/src/events/mod.rs @@ -29,6 +29,8 @@ //! //! See [`Writer`] for further information. //! +//! [`Reader::read_event_into`]: crate::reader::Reader::read_event_into +//! [`Reader`]: crate::reader::Reader //! [`Writer`]: crate::writer::Writer //! [`Event`]: crate::events::Event @@ -44,7 +46,7 @@ use std::str::from_utf8; use crate::errors::{Error, Result}; use crate::escape::{escape, partial_escape, unescape_with}; use crate::name::{LocalName, QName}; -use crate::reader::{Decoder, Reader}; +use crate::reader::Decoder; use crate::utils::write_cow_string; use attributes::{Attribute, Attributes}; @@ -52,6 +54,9 @@ use attributes::{Attribute, Attributes}; /// /// In well-formed XML it could contain a Byte-Order-Mark (BOM). If this event /// contains something else except BOM, the XML should be considered ill-formed. +/// +/// This is a reader-only event. If you need to write a text before the first tag, +/// use the [`BytesText`] event. #[derive(Debug, Clone, Eq, PartialEq)] pub struct BytesStartText<'a> { content: BytesText<'a>, @@ -84,9 +89,9 @@ impl<'a> BytesStartText<'a> { /// /// This method does not unescapes content, because no escape sequences can /// appeared in the BOM or in the text before the first tag. - pub fn decode_with_bom_removal(&self, decoder: Decoder) -> Result { + pub fn decode_with_bom_removal(&self) -> Result { //TODO: Fix lifetime issue - it should be possible to borrow string - let decoded = decoder.decode_with_bom_removal(&*self)?; + let decoded = self.content.decoder.decode_with_bom_removal(&*self)?; Ok(decoded.to_string()) } @@ -127,13 +132,9 @@ pub struct BytesStart<'a> { } impl<'a> BytesStart<'a> { - /// Creates a new `BytesStart` from the given content (name + attributes). - /// - /// # Warning - /// - /// `&content[..name_len]` is not checked to be a valid name + /// Internal constructor, used by `Reader`. Supplies data in reader's encoding #[inline] - pub fn borrowed(content: &'a [u8], name_len: usize) -> Self { + pub(crate) fn wrap(content: &'a [u8], name_len: usize) -> Self { BytesStart { buf: Cow::Borrowed(content), name_len, @@ -144,43 +145,45 @@ impl<'a> BytesStart<'a> { /// /// # Warning /// - /// `&content` is not checked to be a valid name - #[inline] - pub fn borrowed_name(name: &'a [u8]) -> BytesStart<'a> { - Self::borrowed(name, name.len()) - } - - /// Creates a new `BytesStart` from the given content (name + attributes) - /// - /// Owns its contents. + /// `name` must be a valid name. #[inline] - pub fn owned>>(content: C, name_len: usize) -> BytesStart<'static> { + pub fn new>>(name: C) -> Self { + let buf = str_cow_to_bytes(name); BytesStart { - buf: Cow::Owned(content.into()), - name_len, + name_len: buf.len(), + buf, } } - /// Creates a new `BytesStart` from the given name + /// Creates a new `BytesStart` from the given content (name + attributes). + /// + /// # Warning /// - /// Owns its contents. + /// `&content[..name_len]` must be a valid name, and the remainder of `content` + /// must be correctly-formed attributes. Neither are checked, it is possible + /// to generate invalid XML if `content` or `name_len` are incorrect. #[inline] - pub fn owned_name>>(name: C) -> BytesStart<'static> { - let content = name.into(); + pub fn from_content>>(content: C, name_len: usize) -> Self { BytesStart { - name_len: content.len(), - buf: Cow::Owned(content), + buf: str_cow_to_bytes(content), + name_len, } } /// Converts the event into an owned event. pub fn into_owned(self) -> BytesStart<'static> { - Self::owned(self.buf.into_owned(), self.name_len) + BytesStart { + buf: Cow::Owned(self.buf.into_owned()), + name_len: self.name_len, + } } /// Converts the event into an owned event without taking ownership of Event pub fn to_owned(&self) -> BytesStart<'static> { - Self::owned(self.buf.to_owned(), self.name_len) + BytesStart { + buf: Cow::Owned(self.buf.to_owned().into()), + name_len: self.name_len, + } } /// Converts the event into a borrowed event. Most useful when paired with [`to_end`]. @@ -208,12 +211,15 @@ impl<'a> BytesStart<'a> { /// /// [`to_end`]: Self::to_end pub fn borrow(&self) -> BytesStart { - BytesStart::borrowed(&self.buf, self.name_len) + BytesStart { + buf: Cow::Borrowed(&self.buf), + name_len: self.name_len, + } } /// Creates new paired close tag pub fn to_end(&self) -> BytesEnd { - BytesEnd::borrowed(self.name().into_inner()) + BytesEnd::wrap(self.name().into_inner().into()) } /// Gets the undecoded raw tag name, as present in the input stream. @@ -235,7 +241,7 @@ impl<'a> BytesStart<'a> { /// /// # Warning /// - /// `name` is not checked to be a valid name + /// `name` must be a valid name. pub fn set_name(&mut self, name: &[u8]) -> &mut BytesStart<'a> { let bytes = self.buf.to_mut(); bytes.splice(..self.name_len, name.iter().cloned()); @@ -294,12 +300,12 @@ impl<'a> BytesStart<'a> { /// Returns an iterator over the attributes of this tag. pub fn attributes(&self) -> Attributes { - Attributes::new(&self.buf, self.name_len) + Attributes::wrap(&self.buf, self.name_len, false) } /// Returns an iterator over the HTML-like attributes of this tag (no mandatory quotes or `=`). pub fn html_attributes(&self) -> Attributes { - Attributes::html(self, self.name_len) + Attributes::wrap(&self.buf, self.name_len, true) } /// Gets the undecoded raw string with the attributes of this tag as a `&[u8]`, @@ -351,6 +357,53 @@ pub struct BytesDecl<'a> { } impl<'a> BytesDecl<'a> { + /// Constructs a new `XmlDecl` from the (mandatory) _version_ (should be `1.0` or `1.1`), + /// the optional _encoding_ (e.g., `UTF-8`) and the optional _standalone_ (`yes` or `no`) + /// attribute. + /// + /// Does not escape any of its inputs. Always uses double quotes to wrap the attribute values. + /// The caller is responsible for escaping attribute values. Shouldn't usually be relevant since + /// the double quote character is not allowed in any of the attribute values. + pub fn new( + version: &str, + encoding: Option<&str>, + standalone: Option<&str>, + ) -> BytesDecl<'static> { + // Compute length of the buffer based on supplied attributes + // ' encoding=""' => 12 + let encoding_attr_len = if let Some(xs) = encoding { + 12 + xs.len() + } else { + 0 + }; + // ' standalone=""' => 14 + let standalone_attr_len = if let Some(xs) = standalone { + 14 + xs.len() + } else { + 0 + }; + // 'xml version=""' => 14 + let mut buf = String::with_capacity(14 + encoding_attr_len + standalone_attr_len); + + buf.push_str("xml version=\""); + buf.push_str(version); + + if let Some(encoding_val) = encoding { + buf.push_str("\" encoding=\""); + buf.push_str(encoding_val); + } + + if let Some(standalone_val) = standalone { + buf.push_str("\" standalone=\""); + buf.push_str(standalone_val); + } + buf.push('"'); + + BytesDecl { + content: BytesStart::from_content(buf, 3), + } + } + /// Creates a `BytesDecl` from a `BytesStart` pub fn from_start(start: BytesStart<'a>) -> Self { Self { content: start } @@ -373,35 +426,35 @@ impl<'a> BytesDecl<'a> { /// use quick_xml::events::{BytesDecl, BytesStart}; /// /// // - /// let decl = BytesDecl::from_start(BytesStart::borrowed(b" version='1.1'", 0)); + /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0)); /// assert_eq!( /// decl.version().unwrap(), /// Cow::Borrowed(b"1.1".as_ref()) /// ); /// /// // - /// let decl = BytesDecl::from_start(BytesStart::borrowed(b" version='1.0' version='1.1'", 0)); + /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.0' version='1.1'", 0)); /// assert_eq!( /// decl.version().unwrap(), /// Cow::Borrowed(b"1.0".as_ref()) /// ); /// /// // - /// let decl = BytesDecl::from_start(BytesStart::borrowed(b" encoding='utf-8'", 0)); + /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8'", 0)); /// match decl.version() { /// Err(Error::XmlDeclWithoutVersion(Some(key))) => assert_eq!(key, "encoding".to_string()), /// _ => assert!(false), /// } /// /// // - /// let decl = BytesDecl::from_start(BytesStart::borrowed(b" encoding='utf-8' version='1.1'", 0)); + /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8' version='1.1'", 0)); /// match decl.version() { /// Err(Error::XmlDeclWithoutVersion(Some(key))) => assert_eq!(key, "encoding".to_string()), /// _ => assert!(false), /// } /// /// // - /// let decl = BytesDecl::from_start(BytesStart::borrowed(b"", 0)); + /// let decl = BytesDecl::from_start(BytesStart::from_content("", 0)); /// match decl.version() { /// Err(Error::XmlDeclWithoutVersion(None)) => {}, /// _ => assert!(false), @@ -441,18 +494,18 @@ impl<'a> BytesDecl<'a> { /// use quick_xml::events::{BytesDecl, BytesStart}; /// /// // - /// let decl = BytesDecl::from_start(BytesStart::borrowed(b" version='1.1'", 0)); + /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0)); /// assert!(decl.encoding().is_none()); /// /// // - /// let decl = BytesDecl::from_start(BytesStart::borrowed(b" encoding='utf-8'", 0)); + /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8'", 0)); /// match decl.encoding() { /// Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"utf-8"), /// _ => assert!(false), /// } /// /// // - /// let decl = BytesDecl::from_start(BytesStart::borrowed(b" encoding='something_WRONG' encoding='utf-8'", 0)); + /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='something_WRONG' encoding='utf-8'", 0)); /// match decl.encoding() { /// Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"something_WRONG"), /// _ => assert!(false), @@ -483,18 +536,18 @@ impl<'a> BytesDecl<'a> { /// use quick_xml::events::{BytesDecl, BytesStart}; /// /// // - /// let decl = BytesDecl::from_start(BytesStart::borrowed(b" version='1.1'", 0)); + /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0)); /// assert!(decl.standalone().is_none()); /// /// // - /// let decl = BytesDecl::from_start(BytesStart::borrowed(b" standalone='yes'", 0)); + /// let decl = BytesDecl::from_start(BytesStart::from_content(" standalone='yes'", 0)); /// match decl.standalone() { /// Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"yes"), /// _ => assert!(false), /// } /// /// // - /// let decl = BytesDecl::from_start(BytesStart::borrowed(b" standalone='something_WRONG' encoding='utf-8'", 0)); + /// let decl = BytesDecl::from_start(BytesStart::from_content(" standalone='something_WRONG' encoding='utf-8'", 0)); /// match decl.standalone() { /// Some(Ok(Cow::Borrowed(flag))) => assert_eq!(flag, b"something_WRONG"), /// _ => assert!(false), @@ -509,53 +562,6 @@ impl<'a> BytesDecl<'a> { .transpose() } - /// Constructs a new `XmlDecl` from the (mandatory) _version_ (should be `1.0` or `1.1`), - /// the optional _encoding_ (e.g., `UTF-8`) and the optional _standalone_ (`yes` or `no`) - /// attribute. - /// - /// Does not escape any of its inputs. Always uses double quotes to wrap the attribute values. - /// The caller is responsible for escaping attribute values. Shouldn't usually be relevant since - /// the double quote character is not allowed in any of the attribute values. - pub fn new( - version: &[u8], - encoding: Option<&[u8]>, - standalone: Option<&[u8]>, - ) -> BytesDecl<'static> { - // Compute length of the buffer based on supplied attributes - // ' encoding=""' => 12 - let encoding_attr_len = if let Some(xs) = encoding { - 12 + xs.len() - } else { - 0 - }; - // ' standalone=""' => 14 - let standalone_attr_len = if let Some(xs) = standalone { - 14 + xs.len() - } else { - 0 - }; - // 'xml version=""' => 14 - let mut buf = Vec::with_capacity(14 + encoding_attr_len + standalone_attr_len); - - buf.extend_from_slice(b"xml version=\""); - buf.extend_from_slice(version); - - if let Some(encoding_val) = encoding { - buf.extend_from_slice(b"\" encoding=\""); - buf.extend_from_slice(encoding_val); - } - - if let Some(standalone_val) = standalone { - buf.extend_from_slice(b"\" standalone=\""); - buf.extend_from_slice(standalone_val); - } - buf.push(b'"'); - - BytesDecl { - content: BytesStart::owned(buf, 3), - } - } - /// Gets the decoder struct #[cfg(feature = "encoding")] pub fn encoder(&self) -> Option<&'static Encoding> { @@ -597,20 +603,20 @@ pub struct BytesEnd<'a> { } impl<'a> BytesEnd<'a> { - /// Creates a new `BytesEnd` borrowing a slice + /// Internal constructor, used by `Reader`. Supplies data in reader's encoding #[inline] - pub fn borrowed(name: &'a [u8]) -> BytesEnd<'a> { - BytesEnd { - name: Cow::Borrowed(name), - } + pub(crate) fn wrap(name: Cow<'a, [u8]>) -> Self { + BytesEnd { name } } - /// Creates a new `BytesEnd` owning its name + /// Creates a new `BytesEnd` borrowing a slice. + /// + /// # Warning + /// + /// `name` must be a valid name. #[inline] - pub fn owned(name: Vec) -> BytesEnd<'static> { - BytesEnd { - name: Cow::Owned(name), - } + pub fn new>>(name: C) -> Self { + Self::wrap(str_cow_to_bytes(name)) } /// Converts the event into an owned event. @@ -670,36 +676,31 @@ pub struct BytesText<'a> { /// document encoding when event comes from the reader and should be in the /// document encoding when event passed to the writer content: Cow<'a, [u8]>, + /// Encoding in which the `content` is stored inside the event + decoder: Decoder, } impl<'a> BytesText<'a> { - /// Creates a new `BytesText` from an escaped byte sequence. + /// Creates a new `BytesText` from an escaped byte sequence in the specified encoding. #[inline] - pub fn from_escaped>>(content: C) -> Self { + pub(crate) fn wrap>>(content: C, decoder: Decoder) -> Self { Self { content: content.into(), + decoder, } } /// Creates a new `BytesText` from an escaped string. #[inline] - pub fn from_escaped_str>>(content: C) -> Self { - Self::from_escaped(match content.into() { - Cow::Owned(o) => Cow::Owned(o.into_bytes()), - Cow::Borrowed(b) => Cow::Borrowed(b.as_bytes()), - }) + pub fn from_escaped>>(content: C) -> Self { + Self::wrap(str_cow_to_bytes(content), Decoder::utf8()) } /// Creates a new `BytesText` from a string. The string is expected not to /// be escaped. #[inline] - pub fn from_plain_str(content: &'a str) -> Self { - Self { - content: match escape(content) { - Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()), - Cow::Owned(s) => Cow::Owned(s.into_bytes()), - }, - } + pub fn new(content: &'a str) -> Self { + Self::from_escaped(escape(content)) } /// Ensures that all data is owned to extend the object's lifetime if @@ -708,6 +709,7 @@ impl<'a> BytesText<'a> { pub fn into_owned(self) -> BytesText<'static> { BytesText { content: self.content.into_owned().into(), + decoder: self.decoder, } } @@ -722,62 +724,27 @@ impl<'a> BytesText<'a> { pub fn borrow(&self) -> BytesText { BytesText { content: Cow::Borrowed(&self.content), + decoder: self.decoder, } } - /// Decodes using UTF-8 then unescapes the content of the event. - /// - /// Searches for '&' into content and try to escape the coded character if possible - /// returns Malformed error with index within element if '&' is not followed by ';' - /// - /// See also [`unescape_with()`](Self::unescape_with) - /// - /// This method is available only if `encoding` feature is **not** enabled. - #[cfg(any(doc, not(feature = "encoding")))] - pub fn unescape(&self) -> Result> { - self.unescape_with(|_| None) - } - - /// Decodes using UTF-8 then unescapes the content of the event with custom entities. - /// - /// Searches for '&' into content and try to escape the coded character if possible - /// returns Malformed error with index within element if '&' is not followed by ';' - /// A fallback resolver for additional custom entities can be provided via `resolve_entity`. - /// - /// See also [`unescape()`](Self::unescape) - /// - /// This method is available only if `encoding` feature is **not** enabled. - #[cfg(any(doc, not(feature = "encoding")))] - pub fn unescape_with<'entity>( - &self, - resolve_entity: impl Fn(&str) -> Option<&'entity str>, - ) -> Result> { - // from_utf8 should never fail because content is always UTF-8 encoded - Ok(unescape_with(from_utf8(&self.content)?, resolve_entity)?) - } - /// Decodes then unescapes the content of the event. /// /// This will allocate if the value contains any escape sequences or in /// non-UTF-8 encoding. - pub fn decode_and_unescape(&self, reader: &Reader) -> Result> { - self.decode_and_unescape_with(reader, |_| None) + pub fn unescape(&self) -> Result> { + self.unescape_with(|_| None) } /// Decodes then unescapes the content of the event with custom entities. /// /// This will allocate if the value contains any escape sequences or in /// non-UTF-8 encoding. - /// - /// # Pre-condition - /// - /// The implementation of `resolve_entity` is expected to operate over UTF-8 inputs. - pub fn decode_and_unescape_with<'entity, B>( + pub fn unescape_with<'entity>( &self, - reader: &Reader, resolve_entity: impl Fn(&str) -> Option<&'entity str>, ) -> Result> { - let decoded = reader.decoder().decode(&*self)?; + let decoded = self.decoder.decode(&*self)?; match unescape_with(&decoded, resolve_entity)? { // Because result is borrowed, no replacements was done and we can use original string @@ -786,21 +753,16 @@ impl<'a> BytesText<'a> { } } - /// Gets escaped content. - pub fn escape(&self) -> &[u8] { - self.content.as_ref() - } - /// Gets content of this text buffer in the specified encoding and optionally - /// unescapes it. Unlike [`Self::decode_and_unescape`] & Co., the lifetime + /// unescapes it. Unlike [`Self::unescape`] & Co., the lifetime /// of the returned `Cow` is bound to the original buffer / input #[cfg(feature = "serialize")] - pub(crate) fn decode(&self, decoder: Decoder, unescape: bool) -> Result> { + pub(crate) fn decode(&self, unescape: bool) -> Result> { //TODO: too many copies, can be optimized let text = match &self.content { - Cow::Borrowed(bytes) => decoder.decode(bytes)?, + Cow::Borrowed(bytes) => self.decoder.decode(bytes)?, // Convert to owned, because otherwise Cow will be bound with wrong lifetime - Cow::Owned(bytes) => decoder.decode(bytes)?.into_owned().into(), + Cow::Owned(bytes) => self.decoder.decode(bytes)?.into_owned().into(), }; let text = if unescape { //FIXME: need to take into account entities defined in the document @@ -845,21 +807,28 @@ impl<'a> From> for BytesText<'a> { #[derive(Clone, Eq, PartialEq)] pub struct BytesCData<'a> { content: Cow<'a, [u8]>, + /// Encoding in which the `content` is stored inside the event + decoder: Decoder, } impl<'a> BytesCData<'a> { - /// Creates a new `BytesCData` from a byte sequence. + /// Creates a new `BytesCData` from a byte sequence in the specified encoding. #[inline] - pub fn new>>(content: C) -> Self { + pub(crate) fn wrap>>(content: C, decoder: Decoder) -> Self { Self { content: content.into(), + decoder, } } - /// Creates a new `BytesCData` from a string + /// Creates a new `BytesCData` from a string. + /// + /// # Warning + /// + /// `content` must not contain the `]]>` sequence. #[inline] - pub fn from_str(content: &'a str) -> Self { - Self::new(content.as_bytes()) + pub fn new>>(content: C) -> Self { + Self::wrap(str_cow_to_bytes(content), Decoder::utf8()) } /// Ensures that all data is owned to extend the object's lifetime if @@ -868,6 +837,7 @@ impl<'a> BytesCData<'a> { pub fn into_owned(self) -> BytesCData<'static> { BytesCData { content: self.content.into_owned().into(), + decoder: self.decoder, } } @@ -882,6 +852,7 @@ impl<'a> BytesCData<'a> { pub fn borrow(&self) -> BytesCData { BytesCData { content: Cow::Borrowed(&self.content), + decoder: self.decoder, } } @@ -897,13 +868,16 @@ impl<'a> BytesCData<'a> { /// | `&` | `&` /// | `'` | `'` /// | `"` | `"` - pub fn escape(self, decoder: Decoder) -> Result> { - let decoded = self.decode(decoder)?; - Ok(BytesText::from_escaped(match escape(&decoded) { - // Because result is borrowed, no replacements was done and we can use original content - Cow::Borrowed(_) => self.content, - Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()), - })) + pub fn escape(self) -> Result> { + let decoded = self.decode()?; + Ok(BytesText::wrap( + match escape(&decoded) { + // Because result is borrowed, no replacements was done and we can use original content + Cow::Borrowed(_) => self.content, + Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()), + }, + Decoder::utf8(), + )) } /// Converts this CDATA content to an escaped version, that can be written @@ -919,21 +893,24 @@ impl<'a> BytesCData<'a> { /// | `<` | `<` /// | `>` | `>` /// | `&` | `&` - pub fn partial_escape(self, decoder: Decoder) -> Result> { - let decoded = self.decode(decoder)?; - Ok(BytesText::from_escaped(match partial_escape(&decoded) { - // Because result is borrowed, no replacements was done and we can use original content - Cow::Borrowed(_) => self.content, - Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()), - })) + pub fn partial_escape(self) -> Result> { + let decoded = self.decode()?; + Ok(BytesText::wrap( + match partial_escape(&decoded) { + // Because result is borrowed, no replacements was done and we can use original content + Cow::Borrowed(_) => self.content, + Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()), + }, + Decoder::utf8(), + )) } /// Gets content of this text buffer in the specified encoding - pub(crate) fn decode(&self, decoder: Decoder) -> Result> { + pub(crate) fn decode(&self) -> Result> { Ok(match &self.content { - Cow::Borrowed(bytes) => decoder.decode(bytes)?, + Cow::Borrowed(bytes) => self.decoder.decode(bytes)?, // Convert to owned, because otherwise Cow will be bound with wrong lifetime - Cow::Owned(bytes) => decoder.decode(bytes)?.into_owned().into(), + Cow::Owned(bytes) => self.decoder.decode(bytes)?.into_owned().into(), }) } } @@ -957,6 +934,8 @@ impl<'a> Deref for BytesCData<'a> { //////////////////////////////////////////////////////////////////////////////////////////////////// /// Event emitted by [`Reader::read_event_into`]. +/// +/// [`Reader::read_event_into`]: crate::reader::Reader::read_event_into #[derive(Clone, Debug, Eq, PartialEq)] pub enum Event<'a> { /// Text that appeared before the first opening tag or an [XML declaration]. @@ -1005,6 +984,7 @@ pub enum Event<'a> { /// /// [XML declaration]: Event::Decl /// [std]: https://www.w3.org/TR/xml11/#NT-document + /// [`Reader`]: crate::reader::Reader /// [`Writer`]: crate::writer::Writer StartText(BytesStartText<'a>), /// Start tag (with attributes) ``. @@ -1094,6 +1074,14 @@ impl<'a> AsRef> for Event<'a> { //////////////////////////////////////////////////////////////////////////////////////////////////// +#[inline] +fn str_cow_to_bytes<'a, C: Into>>(content: C) -> Cow<'a, [u8]> { + match content.into() { + Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()), + Cow::Owned(s) => Cow::Owned(s.into_bytes()), + } +} + #[cfg(test)] mod test { use super::*; @@ -1101,14 +1089,14 @@ mod test { #[test] fn bytestart_create() { - let b = BytesStart::owned_name("test"); + let b = BytesStart::new("test"); assert_eq!(b.len(), 4); assert_eq!(b.name(), QName(b"test")); } #[test] fn bytestart_set_name() { - let mut b = BytesStart::owned_name("test"); + let mut b = BytesStart::new("test"); assert_eq!(b.len(), 4); assert_eq!(b.name(), QName(b"test")); assert_eq!(b.attributes_raw(), b""); @@ -1122,7 +1110,7 @@ mod test { #[test] fn bytestart_clear_attributes() { - let mut b = BytesStart::owned_name("test"); + let mut b = BytesStart::new("test"); b.push_attribute(("x", "y\"z")); b.push_attribute(("x", "y\"z")); b.clear_attributes(); diff --git a/src/name.rs b/src/name.rs index 64891d70..ea304e02 100644 --- a/src/name.rs +++ b/src/name.rs @@ -574,11 +574,14 @@ mod namespaces { let mut resolver = NamespaceResolver::default(); let mut buffer = Vec::new(); - resolver.push(&BytesStart::borrowed(b" xmlns='default'", 0), &mut buffer); + resolver.push( + &BytesStart::from_content(" xmlns='default'", 0), + &mut buffer, + ); assert_eq!(buffer, b"default"); // Check that tags without namespaces does not change result - resolver.push(&BytesStart::borrowed(b"", 0), &mut buffer); + resolver.push(&BytesStart::from_content("", 0), &mut buffer); assert_eq!(buffer, b"default"); resolver.pop(&mut buffer); @@ -604,8 +607,8 @@ mod namespaces { let mut resolver = NamespaceResolver::default(); let mut buffer = Vec::new(); - resolver.push(&BytesStart::borrowed(b" xmlns='old'", 0), &mut buffer); - resolver.push(&BytesStart::borrowed(b" xmlns='new'", 0), &mut buffer); + resolver.push(&BytesStart::from_content(" xmlns='old'", 0), &mut buffer); + resolver.push(&BytesStart::from_content(" xmlns='new'", 0), &mut buffer); assert_eq!(buffer, b"oldnew"); assert_eq!( @@ -643,8 +646,8 @@ mod namespaces { let mut resolver = NamespaceResolver::default(); let mut buffer = Vec::new(); - resolver.push(&BytesStart::borrowed(b" xmlns='old'", 0), &mut buffer); - resolver.push(&BytesStart::borrowed(b" xmlns=''", 0), &mut buffer); + resolver.push(&BytesStart::from_content(" xmlns='old'", 0), &mut buffer); + resolver.push(&BytesStart::from_content(" xmlns=''", 0), &mut buffer); assert_eq!(buffer, b"old"); assert_eq!( @@ -684,11 +687,14 @@ mod namespaces { let mut resolver = NamespaceResolver::default(); let mut buffer = Vec::new(); - resolver.push(&BytesStart::borrowed(b" xmlns:p='default'", 0), &mut buffer); + resolver.push( + &BytesStart::from_content(" xmlns:p='default'", 0), + &mut buffer, + ); assert_eq!(buffer, b"pdefault"); // Check that tags without namespaces does not change result - resolver.push(&BytesStart::borrowed(b"", 0), &mut buffer); + resolver.push(&BytesStart::from_content("", 0), &mut buffer); assert_eq!(buffer, b"pdefault"); resolver.pop(&mut buffer); @@ -714,8 +720,8 @@ mod namespaces { let mut resolver = NamespaceResolver::default(); let mut buffer = Vec::new(); - resolver.push(&BytesStart::borrowed(b" xmlns:p='old'", 0), &mut buffer); - resolver.push(&BytesStart::borrowed(b" xmlns:p='new'", 0), &mut buffer); + resolver.push(&BytesStart::from_content(" xmlns:p='old'", 0), &mut buffer); + resolver.push(&BytesStart::from_content(" xmlns:p='new'", 0), &mut buffer); assert_eq!(buffer, b"poldpnew"); assert_eq!( @@ -753,8 +759,8 @@ mod namespaces { let mut resolver = NamespaceResolver::default(); let mut buffer = Vec::new(); - resolver.push(&BytesStart::borrowed(b" xmlns:p='old'", 0), &mut buffer); - resolver.push(&BytesStart::borrowed(b" xmlns:p=''", 0), &mut buffer); + resolver.push(&BytesStart::from_content(" xmlns:p='old'", 0), &mut buffer); + resolver.push(&BytesStart::from_content(" xmlns:p=''", 0), &mut buffer); assert_eq!(buffer, b"poldp"); assert_eq!( diff --git a/src/reader/buffered_reader.rs b/src/reader/buffered_reader.rs index 2b22c6a9..c86f5ace 100644 --- a/src/reader/buffered_reader.rs +++ b/src/reader/buffered_reader.rs @@ -48,7 +48,7 @@ impl Reader { /// loop { /// match reader.read_event_into(&mut buf) { /// Ok(Event::Start(ref e)) => count += 1, - /// Ok(Event::Text(e)) => txt.push(e.decode_and_unescape(&reader).unwrap().into_owned()), + /// Ok(Event::Text(e)) => txt.push(e.unescape().unwrap().into_owned()), /// Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e), /// Ok(Event::Eof) => break, /// _ => (), @@ -124,7 +124,7 @@ impl Reader { /// reader.trim_text(true); /// let mut buf = Vec::new(); /// - /// let start = BytesStart::borrowed_name(b"outer"); + /// let start = BytesStart::new("outer"); /// let end = start.to_end().into_owned(); /// /// // First, we read a start event... @@ -207,7 +207,7 @@ impl Reader { let s = match self.read_event_into(buf) { Err(e) => return Err(e), - Ok(Event::Text(e)) => e.decode_and_unescape(self)?.into_owned(), + Ok(Event::Text(e)) => e.unescape()?.into_owned(), Ok(Event::End(e)) if e.name() == end => return Ok("".to_string()), Ok(Event::Eof) => return Err(Error::UnexpectedEof("Text".to_string())), _ => return Err(Error::TextNotFound), diff --git a/src/reader/mod.rs b/src/reader/mod.rs index 0ad1d90a..e4c8f342 100644 --- a/src/reader/mod.rs +++ b/src/reader/mod.rs @@ -269,7 +269,7 @@ impl EncodingRef { /// _ => (), /// } /// } -/// Ok(Event::Text(e)) => txt.push(e.decode_and_unescape(&reader).unwrap().into_owned()), +/// Ok(Event::Text(e)) => txt.push(e.unescape().unwrap().into_owned()), /// /// // There are several other `Event`s we do not consider here /// _ => (), @@ -514,9 +514,9 @@ impl Reader { }; Ok(if first { - Event::StartText(BytesText::from_escaped(content).into()) + Event::StartText(BytesText::wrap(content, self.decoder()).into()) } else { - Event::Text(BytesText::from_escaped(content)) + Event::Text(BytesText::wrap(content, self.decoder())) }) } Ok(None) => Ok(Event::Eof), @@ -587,10 +587,13 @@ impl Reader { return Err(Error::UnexpectedToken("--".to_string())); } } - Ok(Event::Comment(BytesText::from_escaped(&buf[3..len - 2]))) + Ok(Event::Comment(BytesText::wrap( + &buf[3..len - 2], + self.decoder(), + ))) } BangType::CData if uncased_starts_with(buf, b"![CDATA[") => { - Ok(Event::CData(BytesCData::new(&buf[8..]))) + Ok(Event::CData(BytesCData::wrap(&buf[8..], self.decoder()))) } BangType::DocType if uncased_starts_with(buf, b"!DOCTYPE") => { let start = buf[8..] @@ -598,7 +601,10 @@ impl Reader { .position(|b| !is_whitespace(*b)) .unwrap_or_else(|| len - 8); debug_assert!(start < len - 8, "DocType must have a name"); - Ok(Event::DocType(BytesText::from_escaped(&buf[8 + start..]))) + Ok(Event::DocType(BytesText::wrap( + &buf[8 + start..], + self.decoder(), + ))) } _ => Err(bang_type.to_err()), } @@ -635,13 +641,13 @@ impl Reader { mismatch_err(expected, name, &mut self.buf_position) } else { self.opened_buffer.truncate(start); - Ok(Event::End(BytesEnd::borrowed(name))) + Ok(Event::End(BytesEnd::wrap(name.into()))) } } None => mismatch_err(b"", &buf[1..], &mut self.buf_position), } } else { - Ok(Event::End(BytesEnd::borrowed(name))) + Ok(Event::End(BytesEnd::wrap(name.into()))) } } @@ -651,7 +657,7 @@ impl Reader { let len = buf.len(); if len > 2 && buf[len - 1] == b'?' { if len > 5 && &buf[1..4] == b"xml" && is_whitespace(buf[4]) { - let event = BytesDecl::from_start(BytesStart::borrowed(&buf[1..len - 1], 3)); + let event = BytesDecl::from_start(BytesStart::wrap(&buf[1..len - 1], 3)); // Try getting encoding from the declaration event #[cfg(feature = "encoding")] @@ -663,7 +669,7 @@ impl Reader { Ok(Event::Decl(event)) } else { - Ok(Event::PI(BytesText::from_escaped(&buf[1..len - 1]))) + Ok(Event::PI(BytesText::wrap(&buf[1..len - 1], self.decoder()))) } } else { self.buf_position -= len; @@ -677,7 +683,7 @@ impl Reader { let name = self .opened_buffer .split_off(self.opened_starts.pop().unwrap()); - Ok(Event::End(BytesEnd::owned(name))) + Ok(Event::End(BytesEnd::wrap(name.into()))) } /// reads `BytesElement` starting with any character except `/`, `!` or ``?` @@ -692,16 +698,16 @@ impl Reader { self.tag_state = TagState::Empty; self.opened_starts.push(self.opened_buffer.len()); self.opened_buffer.extend(&buf[..end]); - Ok(Event::Start(BytesStart::borrowed(&buf[..len - 1], end))) + Ok(Event::Start(BytesStart::wrap(&buf[..len - 1], end))) } else { - Ok(Event::Empty(BytesStart::borrowed(&buf[..len - 1], end))) + Ok(Event::Empty(BytesStart::wrap(&buf[..len - 1], end))) } } else { if self.check_end_names { self.opened_starts.push(self.opened_buffer.len()); self.opened_buffer.extend(&buf[..name_end]); } - Ok(Event::Start(BytesStart::borrowed(buf, name_end))) + Ok(Event::Start(BytesStart::wrap(buf, name_end))) } } } @@ -935,7 +941,7 @@ pub(crate) fn is_whitespace(b: u8) -> bool { /// any XML declarations are ignored. /// /// [utf16]: https://github.com/tafia/quick-xml/issues/158 -#[derive(Clone, Copy, Debug)] +#[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct Decoder { #[cfg(feature = "encoding")] encoding: &'static Encoding, @@ -1025,9 +1031,6 @@ impl Decoder { } } -/// This implementation is required for tests of other parts of the library -#[cfg(test)] -#[cfg(feature = "serialize")] impl Decoder { pub(crate) fn utf8() -> Self { Decoder { @@ -1036,7 +1039,7 @@ impl Decoder { } } - #[cfg(feature = "encoding")] + #[cfg(all(test, feature = "encoding", feature = "serialize"))] pub(crate) fn utf16() -> Self { Decoder { encoding: UTF_16LE } } @@ -1841,7 +1844,7 @@ mod test { assert_eq!( reader.read_event_impl($buf).unwrap(), - Event::StartText(BytesText::from_escaped_str("bom").into()) + Event::StartText(BytesText::from_escaped("bom").into()) ); } @@ -1851,7 +1854,7 @@ mod test { assert_eq!( reader.read_event_impl($buf).unwrap(), - Event::Decl(BytesDecl::from_start(BytesStart::borrowed(b"xml ", 3))) + Event::Decl(BytesDecl::from_start(BytesStart::from_content("xml ", 3))) ); } @@ -1861,7 +1864,7 @@ mod test { assert_eq!( reader.read_event_impl($buf).unwrap(), - Event::DocType(BytesText::from_escaped_str("x")) + Event::DocType(BytesText::from_escaped("x")) ); } @@ -1871,7 +1874,7 @@ mod test { assert_eq!( reader.read_event_impl($buf).unwrap(), - Event::PI(BytesText::from_escaped_str("xml-stylesheet")) + Event::PI(BytesText::from_escaped("xml-stylesheet")) ); } @@ -1881,7 +1884,7 @@ mod test { assert_eq!( reader.read_event_impl($buf).unwrap(), - Event::Start(BytesStart::borrowed_name(b"tag")) + Event::Start(BytesStart::new("tag")) ); } @@ -1894,7 +1897,7 @@ mod test { assert_eq!( reader.read_event_impl($buf).unwrap(), - Event::End(BytesEnd::borrowed(b"tag")) + Event::End(BytesEnd::new("tag")) ); } @@ -1904,7 +1907,7 @@ mod test { assert_eq!( reader.read_event_impl($buf).unwrap(), - Event::Empty(BytesStart::borrowed_name(b"tag")) + Event::Empty(BytesStart::new("tag")) ); } @@ -1915,12 +1918,12 @@ mod test { assert_eq!( reader.read_event_impl($buf).unwrap(), - Event::Empty(BytesStart::borrowed_name(b"tag")) + Event::Empty(BytesStart::new("tag")) ); assert_eq!( reader.read_event_impl($buf).unwrap(), - Event::Text(BytesText::from_escaped_str("text")) + Event::Text(BytesText::from_escaped("text")) ); } @@ -1930,7 +1933,7 @@ mod test { assert_eq!( reader.read_event_impl($buf).unwrap(), - Event::CData(BytesCData::from_str("")) + Event::CData(BytesCData::new("")) ); } @@ -1940,7 +1943,7 @@ mod test { assert_eq!( reader.read_event_impl($buf).unwrap(), - Event::Comment(BytesText::from_escaped_str("")) + Event::Comment(BytesText::from_escaped("")) ); } diff --git a/src/reader/ns_reader.rs b/src/reader/ns_reader.rs index 0ecb4eea..868e88f4 100644 --- a/src/reader/ns_reader.rs +++ b/src/reader/ns_reader.rs @@ -329,7 +329,7 @@ impl NsReader { /// } /// } /// Event::Text(e) => { - /// txt.push(e.decode_and_unescape(&reader).unwrap().into_owned()) + /// txt.push(e.unescape().unwrap().into_owned()) /// } /// Event::Eof => break, /// _ => (), @@ -388,7 +388,7 @@ impl NsReader { /// (_, Event::Start(_)) => unreachable!(), /// /// (_, Event::Text(e)) => { - /// txt.push(e.decode_and_unescape(&reader).unwrap().into_owned()) + /// txt.push(e.unescape().unwrap().into_owned()) /// } /// (_, Event::Eof) => break, /// _ => (), @@ -472,7 +472,7 @@ impl NsReader { /// let mut buf = Vec::new(); /// /// let ns = Namespace(b"namespace 1"); - /// let start = BytesStart::borrowed(br#"outer xmlns="namespace 1""#, 5); + /// let start = BytesStart::from_content(r#"outer xmlns="namespace 1""#, 5); /// let end = start.to_end().into_owned(); /// /// // First, we read a start event... @@ -566,7 +566,7 @@ impl<'i> NsReader<&'i [u8]> { /// } /// } /// Event::Text(e) => { - /// txt.push(e.decode_and_unescape(&reader).unwrap().into_owned()) + /// txt.push(e.unescape().unwrap().into_owned()) /// } /// Event::Eof => break, /// _ => (), @@ -624,7 +624,7 @@ impl<'i> NsReader<&'i [u8]> { /// (_, Event::Start(_)) => unreachable!(), /// /// (_, Event::Text(e)) => { - /// txt.push(e.decode_and_unescape(&reader).unwrap().into_owned()) + /// txt.push(e.unescape().unwrap().into_owned()) /// } /// (_, Event::Eof) => break, /// _ => (), @@ -693,7 +693,7 @@ impl<'i> NsReader<&'i [u8]> { /// reader.trim_text(true); /// /// let ns = Namespace(b"namespace 1"); - /// let start = BytesStart::borrowed(br#"outer xmlns="namespace 1""#, 5); + /// let start = BytesStart::from_content(r#"outer xmlns="namespace 1""#, 5); /// let end = start.to_end().into_owned(); /// /// // First, we read a start event... diff --git a/src/reader/slice_reader.rs b/src/reader/slice_reader.rs index abbb04f6..0a71f050 100644 --- a/src/reader/slice_reader.rs +++ b/src/reader/slice_reader.rs @@ -93,7 +93,7 @@ impl<'a> Reader<&'a [u8]> { /// "#); /// reader.trim_text(true); /// - /// let start = BytesStart::borrowed_name(b"outer"); + /// let start = BytesStart::new("outer"); /// let end = start.to_end().into_owned(); /// /// // First, we read a start event... diff --git a/src/se/mod.rs b/src/se/mod.rs index 896f0aac..b05c9881 100644 --- a/src/se/mod.rs +++ b/src/se/mod.rs @@ -102,9 +102,9 @@ impl<'r, W: Write> Serializer<'r, W> { ) -> Result<(), DeError> { let value = value.to_string(); let event = if escaped { - BytesText::from_escaped_str(&value) + BytesText::from_escaped(&value) } else { - BytesText::from_plain_str(&value) + BytesText::new(&value) }; self.writer.write_event(Event::Text(event))?; Ok(()) @@ -113,7 +113,7 @@ impl<'r, W: Write> Serializer<'r, W> { /// Writes self-closed tag `` into inner writer fn write_self_closed(&mut self, tag_name: &str) -> Result<(), DeError> { self.writer - .write_event(Event::Empty(BytesStart::borrowed_name(tag_name.as_bytes())))?; + .write_event(Event::Empty(BytesStart::new(tag_name)))?; Ok(()) } @@ -124,10 +124,10 @@ impl<'r, W: Write> Serializer<'r, W> { value: &T, ) -> Result<(), DeError> { self.writer - .write_event(Event::Start(BytesStart::borrowed_name(tag_name.as_bytes())))?; + .write_event(Event::Start(BytesStart::new(tag_name)))?; value.serialize(&mut *self)?; self.writer - .write_event(Event::End(BytesEnd::borrowed(tag_name.as_bytes())))?; + .write_event(Event::End(BytesEnd::new(tag_name)))?; Ok(()) } } @@ -306,7 +306,7 @@ impl<'r, 'w, W: Write> ser::Serializer for &'w mut Serializer<'r, W> { if let Some(tag) = self.root_tag { // TODO: Write self-closed tag if map is empty self.writer - .write_event(Event::Start(BytesStart::borrowed_name(tag.as_bytes())))?; + .write_event(Event::Start(BytesStart::new(tag)))?; } Ok(Map::new(self)) } diff --git a/src/se/var.rs b/src/se/var.rs index 4ce0edeb..c8e062eb 100644 --- a/src/se/var.rs +++ b/src/se/var.rs @@ -54,7 +54,7 @@ where if let Some(tag) = self.parent.root_tag { self.parent .writer - .write_event(Event::End(BytesEnd::borrowed(tag.as_bytes())))?; + .write_event(Event::End(BytesEnd::new(tag)))?; } Ok(()) } @@ -100,10 +100,9 @@ where { /// Create a new `Struct` pub fn new(parent: &'w mut Serializer<'r, W>, name: &'r str) -> Self { - let name = name.as_bytes(); Struct { parent, - attrs: BytesStart::borrowed_name(name), + attrs: BytesStart::new(name), children: Vec::new(), buffer: Vec::new(), } diff --git a/src/writer.rs b/src/writer.rs index f353ef7b..e42c231d 100644 --- a/src/writer.rs +++ b/src/writer.rs @@ -26,7 +26,7 @@ use std::io::Write; /// /// // crates a new element ... alternatively we could reuse `e` by calling /// // `e.into_owned()` -/// let mut elem = BytesStart::owned_name(b"my_elem".to_vec()); +/// let mut elem = BytesStart::new("my_elem"); /// /// // collect existing attributes /// elem.extend_attributes(e.attributes().map(|attr| attr.unwrap())); @@ -38,7 +38,7 @@ use std::io::Write; /// assert!(writer.write_event(Event::Start(elem)).is_ok()); /// }, /// Ok(Event::End(e)) if e.name().as_ref() == b"this_tag" => { -/// assert!(writer.write_event(Event::End(BytesEnd::borrowed(b"my_elem"))).is_ok()); +/// assert!(writer.write_event(Event::End(BytesEnd::new("my_elem"))).is_ok()); /// }, /// Ok(Event::Eof) => break, /// // we can either move or borrow the event to write, depending on your use-case @@ -106,7 +106,7 @@ impl Writer { Event::Empty(ref e) => self.write_wrapped(b"<", e, b"/>"), Event::Text(ref e) => { next_should_line_break = false; - self.write(&e.escape()) + self.write(&e) } Event::Comment(ref e) => self.write_wrapped(b""), Event::CData(ref e) => { @@ -193,7 +193,7 @@ impl Writer { /// // writes with some text inside /// writer.create_element("tag") /// .with_attributes(vec![("attr1", "value1"), ("attr2", "value2")].into_iter()) // or add attributes from an iterator - /// .write_text_content(BytesText::from_plain_str("with some text inside"))?; + /// .write_text_content(BytesText::new("with some text inside"))?; /// /// // writes appleorange /// writer.create_element("tag") @@ -203,7 +203,7 @@ impl Writer { /// writer /// .create_element("fruit") /// .with_attribute(("quantity", quant.to_string().as_str())) - /// .write_text_content(BytesText::from_plain_str(item))?; + /// .write_text_content(BytesText::new(item))?; /// } /// Ok(()) /// })?; @@ -213,11 +213,11 @@ impl Writer { #[must_use] pub fn create_element<'a, N>(&'a mut self, name: &'a N) -> ElementWriter where - N: 'a + AsRef<[u8]> + ?Sized, + N: 'a + AsRef + ?Sized, { ElementWriter { writer: self, - start_tag: BytesStart::borrowed_name(name.as_ref()), + start_tag: BytesStart::new(name.as_ref()), } } } @@ -347,7 +347,7 @@ mod indentation { let mut buffer = Vec::new(); let mut writer = Writer::new_with_indent(&mut buffer, b' ', 4); - let tag = BytesStart::borrowed_name(b"self-closed") + let tag = BytesStart::new("self-closed") .with_attributes(vec![("attr1", "value1"), ("attr2", "value2")].into_iter()); writer .write_event(Event::Empty(tag)) @@ -364,12 +364,11 @@ mod indentation { let mut buffer = Vec::new(); let mut writer = Writer::new_with_indent(&mut buffer, b' ', 4); - let name = b"paired"; - let start = BytesStart::borrowed_name(name) + let start = BytesStart::new("paired") .with_attributes(vec![("attr1", "value1"), ("attr2", "value2")].into_iter()); - let end = BytesEnd::borrowed(name); + let end = start.to_end(); writer - .write_event(Event::Start(start)) + .write_event(Event::Start(start.clone())) .expect("write start tag failed"); writer .write_event(Event::End(end)) @@ -387,14 +386,13 @@ mod indentation { let mut buffer = Vec::new(); let mut writer = Writer::new_with_indent(&mut buffer, b' ', 4); - let name = b"paired"; - let start = BytesStart::borrowed_name(name) + let start = BytesStart::new("paired") .with_attributes(vec![("attr1", "value1"), ("attr2", "value2")].into_iter()); - let end = BytesEnd::borrowed(name); - let inner = BytesStart::borrowed_name(b"inner"); + let end = start.to_end(); + let inner = BytesStart::new("inner"); writer - .write_event(Event::Start(start)) + .write_event(Event::Start(start.clone())) .expect("write start tag failed"); writer .write_event(Event::Empty(inner)) @@ -416,14 +414,13 @@ mod indentation { let mut buffer = Vec::new(); let mut writer = Writer::new_with_indent(&mut buffer, b' ', 4); - let name = b"paired"; - let start = BytesStart::borrowed_name(name) + let start = BytesStart::new("paired") .with_attributes(vec![("attr1", "value1"), ("attr2", "value2")].into_iter()); - let end = BytesEnd::borrowed(name); - let text = BytesText::from_plain_str("text"); + let end = start.to_end(); + let text = BytesText::new("text"); writer - .write_event(Event::Start(start)) + .write_event(Event::Start(start.clone())) .expect("write start tag failed"); writer .write_event(Event::Text(text)) @@ -443,15 +440,14 @@ mod indentation { let mut buffer = Vec::new(); let mut writer = Writer::new_with_indent(&mut buffer, b' ', 4); - let name = b"paired"; - let start = BytesStart::borrowed_name(name) + let start = BytesStart::new("paired") .with_attributes(vec![("attr1", "value1"), ("attr2", "value2")].into_iter()); - let end = BytesEnd::borrowed(name); - let text = BytesText::from_plain_str("text"); - let inner = BytesStart::borrowed_name(b"inner"); + let end = start.to_end(); + let text = BytesText::new("text"); + let inner = BytesStart::new("inner"); writer - .write_event(Event::Start(start)) + .write_event(Event::Start(start.clone())) .expect("write start tag failed"); writer .write_event(Event::Text(text)) @@ -475,17 +471,16 @@ mod indentation { let mut buffer = Vec::new(); let mut writer = Writer::new_with_indent(&mut buffer, b' ', 4); - let name = b"paired"; - let start = BytesStart::borrowed_name(name) + let start = BytesStart::new("paired") .with_attributes(vec![("attr1", "value1"), ("attr2", "value2")].into_iter()); - let end = BytesEnd::borrowed(name); - let inner = BytesStart::borrowed_name(b"inner"); + let end = start.to_end(); + let inner = BytesStart::new("inner"); writer .write_event(Event::Start(start.clone())) .expect("write start 1 tag failed"); writer - .write_event(Event::Start(start)) + .write_event(Event::Start(start.clone())) .expect("write start 2 tag failed"); writer .write_event(Event::Empty(inner)) @@ -512,7 +507,7 @@ mod indentation { let mut writer = Writer::new_with_indent(&mut buffer, b' ', 4); writer - .create_element(b"empty") + .create_element("empty") .with_attribute(("attr1", "value1")) .with_attribute(("attr2", "value2")) .write_empty() @@ -533,7 +528,7 @@ mod indentation { .create_element("paired") .with_attribute(("attr1", "value1")) .with_attribute(("attr2", "value2")) - .write_text_content(BytesText::from_plain_str("text")) + .write_text_content(BytesText::new("text")) .expect("failure"); assert_eq!( @@ -557,7 +552,7 @@ mod indentation { writer .create_element("fruit") .with_attribute(("quantity", quant.to_string().as_str())) - .write_text_content(BytesText::from_plain_str(item))?; + .write_text_content(BytesText::new(item))?; } writer .create_element("inner") diff --git a/tests/test.rs b/tests/test.rs index 5ac9dae8..819f8ab6 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -98,7 +98,7 @@ fn test_koi8_r_encoding() { loop { match r.read_event() { Ok(Text(e)) => { - e.decode_and_unescape(&r).unwrap(); + e.unescape().unwrap(); } Ok(Eof) => break, _ => (), @@ -157,7 +157,7 @@ fn fuzz_101() { } } Ok(Text(e)) => { - if e.decode_and_unescape(&reader).is_err() { + if e.unescape().is_err() { break; } } diff --git a/tests/unit_tests.rs b/tests/unit_tests.rs index bb32a602..80407736 100644 --- a/tests/unit_tests.rs +++ b/tests/unit_tests.rs @@ -319,12 +319,12 @@ fn test_write_attrs() -> Result<()> { Start(elem) => { let mut attrs = elem.attributes().collect::>>()?; attrs.extend_from_slice(&[("a", "b").into(), ("c", "d").into()]); - let mut elem = BytesStart::owned(b"copy".to_vec(), 4); + let mut elem = BytesStart::new("copy"); elem.extend_attributes(attrs); elem.push_attribute(("x", "y\"z")); Start(elem) } - End(_) => End(BytesEnd::borrowed(b"copy")), + End(_) => End(BytesEnd::new("copy")), e => e, }; assert!(writer.write_event(event).is_ok()); @@ -340,7 +340,7 @@ fn test_write_attrs() -> Result<()> { fn test_new_xml_decl_full() { let mut writer = Writer::new(Vec::new()); writer - .write_event(Decl(BytesDecl::new(b"1.2", Some(b"utf-X"), Some(b"yo")))) + .write_event(Decl(BytesDecl::new("1.2", Some("utf-X"), Some("yo")))) .expect("writing xml decl should succeed"); let result = writer.into_inner(); @@ -355,7 +355,7 @@ fn test_new_xml_decl_full() { fn test_new_xml_decl_standalone() { let mut writer = Writer::new(Vec::new()); writer - .write_event(Decl(BytesDecl::new(b"1.2", None, Some(b"yo")))) + .write_event(Decl(BytesDecl::new("1.2", None, Some("yo")))) .expect("writing xml decl should succeed"); let result = writer.into_inner(); @@ -370,7 +370,7 @@ fn test_new_xml_decl_standalone() { fn test_new_xml_decl_encoding() { let mut writer = Writer::new(Vec::new()); writer - .write_event(Decl(BytesDecl::new(b"1.2", Some(b"utf-X"), None))) + .write_event(Decl(BytesDecl::new("1.2", Some("utf-X"), None))) .expect("writing xml decl should succeed"); let result = writer.into_inner(); @@ -385,7 +385,7 @@ fn test_new_xml_decl_encoding() { fn test_new_xml_decl_version() { let mut writer = Writer::new(Vec::new()); writer - .write_event(Decl(BytesDecl::new(b"1.2", None, None))) + .write_event(Decl(BytesDecl::new("1.2", None, None))) .expect("writing xml decl should succeed"); let result = writer.into_inner(); @@ -403,7 +403,7 @@ fn test_new_xml_decl_empty() { // An empty version should arguably be an error, but we don't expect anyone to actually supply // an empty version. writer - .write_event(Decl(BytesDecl::new(b"", Some(b""), Some(b"")))) + .write_event(Decl(BytesDecl::new("", Some(""), Some("")))) .expect("writing xml decl should succeed"); let result = writer.into_inner(); @@ -506,7 +506,7 @@ fn test_escaped_content() { "content unexpected: expecting '<test>', got '{:?}'", from_utf8(&*e) ); - match e.decode_and_unescape(&r) { + match e.unescape() { Ok(c) => assert_eq!(c, ""), Err(e) => panic!( "cannot escape content at position {}: {:?}", @@ -577,38 +577,6 @@ fn test_read_write_roundtrip() -> Result<()> { Ok(()) } -#[test] -fn test_read_write_roundtrip_escape() -> Result<()> { - let input = r#" - -
-
-
-
data <escaped>
-
- "#; - - let mut reader = Reader::from_str(input); - reader.trim_text(false).expand_empty_elements(false); - let mut writer = Writer::new(Cursor::new(Vec::new())); - loop { - match reader.read_event()? { - Eof => break, - Text(e) => { - let t = e.escape(); - assert!(writer - .write_event(Text(BytesText::from_escaped(t.to_vec()))) - .is_ok()); - } - e => assert!(writer.write_event(e).is_ok()), - } - } - - let result = writer.into_inner().into_inner(); - assert_eq!(String::from_utf8(result).unwrap(), input.to_string()); - Ok(()) -} - #[test] fn test_read_write_roundtrip_escape_text() -> Result<()> { let input = r#" @@ -627,10 +595,8 @@ fn test_read_write_roundtrip_escape_text() -> Result<()> { match reader.read_event()? { Eof => break, Text(e) => { - let t = e.decode_and_unescape(&reader).unwrap(); - assert!(writer - .write_event(Text(BytesText::from_plain_str(&t))) - .is_ok()); + let t = e.unescape().unwrap(); + assert!(writer.write_event(Text(BytesText::new(&t))).is_ok()); } e => assert!(writer.write_event(e).is_ok()), } @@ -769,7 +735,7 @@ mod decode_with_bom_removal { loop { match reader.read_event() { - Ok(StartText(e)) => txt.push(e.decode_with_bom_removal(reader.decoder()).unwrap()), + Ok(StartText(e)) => txt.push(e.decode_with_bom_removal().unwrap()), Ok(Eof) => break, _ => (), } @@ -792,7 +758,7 @@ mod decode_with_bom_removal { loop { match reader.read_event() { - Ok(StartText(e)) => txt.push(e.decode_with_bom_removal(reader.decoder()).unwrap()), + Ok(StartText(e)) => txt.push(e.decode_with_bom_removal().unwrap()), Ok(Eof) => break, _ => (), } @@ -810,7 +776,7 @@ mod decode_with_bom_removal { loop { match reader.read_event() { - Ok(StartText(e)) => txt.push(e.decode_with_bom_removal(reader.decoder()).unwrap()), + Ok(StartText(e)) => txt.push(e.decode_with_bom_removal().unwrap()), Ok(Eof) => break, _ => (), } @@ -830,7 +796,7 @@ mod decode_with_bom_removal { loop { match reader.read_event() { - Ok(StartText(e)) => txt.push(e.decode_with_bom_removal(reader.decoder()).unwrap()), + Ok(StartText(e)) => txt.push(e.decode_with_bom_removal().unwrap()), Ok(Eof) => break, _ => (), } diff --git a/tests/xmlrs_reader_tests.rs b/tests/xmlrs_reader_tests.rs index c6d4c18f..14024bb8 100644 --- a/tests/xmlrs_reader_tests.rs +++ b/tests/xmlrs_reader_tests.rs @@ -416,7 +416,7 @@ fn test_bytes(input: &[u8], output: &[u8], trim: bool) { Ok((_, Event::CData(e))) => format!("CData({})", decoder.decode(&e).unwrap()), Ok((_, Event::Text(e))) => match unescape(&decoder.decode(&e).unwrap()) { Ok(c) => format!("Characters({})", &c), - Err(err) => format!("FailedUnescape({:?}; {})", e.escape(), err), + Err(err) => format!("FailedUnescape({:?}; {})", e.as_ref(), err), }, Ok((_, Event::Eof)) => format!("EndDocument"), Err(e) => format!("Error: {}", e),