From 1c26671cfc7d30e2e38355b32913ce53eed744df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Frank=20Pr=C3=B6=C3=9Fdorf?= Date: Wed, 23 Sep 2020 11:06:37 +0300 Subject: [PATCH 1/8] Allow using tokio's AsyncBufRead --- Cargo.toml | 4 + README.md | 5 +- examples/custom_entities.rs | 13 +- examples/issue68.rs | 16 +- examples/nested_readers.rs | 32 +- examples/read_texts.rs | 29 +- src/de/mod.rs | 1 + src/errors.rs | 6 +- src/escape.rs | 4 + src/escapei.rs | 3 +- src/events/attributes.rs | 57 +- src/events/mod.rs | 101 +-- src/lib.rs | 36 +- src/reader/asynchronous.rs | 1036 +++++++++++++++++++++++++++++ src/reader/mod.rs | 323 +++++++++ src/{reader.rs => reader/sync.rs} | 328 +-------- src/writer.rs | 9 +- tests/test.rs | 264 +++++++- tests/unit_tests.rs | 474 +++++++++++-- tests/xmlrs_reader_tests.rs | 308 +++++---- 20 files changed, 2401 insertions(+), 648 deletions(-) create mode 100644 src/escape.rs create mode 100644 src/reader/asynchronous.rs create mode 100644 src/reader/mod.rs rename src/{reader.rs => reader/sync.rs} (81%) diff --git a/Cargo.toml b/Cargo.toml index 57e1924a..eda5f256 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,7 +16,9 @@ license = "MIT" travis-ci = { repository = "tafia/quick-xml" } [dependencies] +async-recursion = { version = "0.3.2", optional = true } encoding_rs = { version = "0.8.26", optional = true } +tokio = { version = "0.2.22", features = ["fs", "io-util"], optional = true } serde = { version = "1.0", optional = true } memchr = "2.3.4" @@ -24,6 +26,7 @@ memchr = "2.3.4" serde = { version = "1.0", features = ["derive"] } serde-value = "0.7" regex = "1" +tokio = { version = "0.2.22", features = ["macros", "rt-threaded"] } [lib] bench = false @@ -33,6 +36,7 @@ default = [] encoding = ["encoding_rs"] serialize = ["serde"] escape-html = [] +asynchronous = ["tokio", "async-recursion"] [package.metadata.docs.rs] features = ["serialize"] diff --git a/README.md b/README.md index 35247bfa..acee1af6 100644 --- a/README.md +++ b/README.md @@ -213,8 +213,8 @@ fn crates_io() -> Result { ### Credits -This has largely been inspired by [serde-xml-rs](https://github.com/RReverser/serde-xml-rs). -quick-xml follows its convention for deserialization, including the +This has largely been inspired by [serde-xml-rs](https://github.com/RReverser/serde-xml-rs). +quick-xml follows its convention for deserialization, including the [`$value`](https://github.com/RReverser/serde-xml-rs#parsing-the-value-of-a-tag) special name. ### Parsing the "value" of a tag @@ -251,6 +251,7 @@ Note that despite not focusing on performance (there are several unecessary copi - `encoding`: support non utf8 xmls - `serialize`: support serde `Serialize`/`Deserialize` +- `asynchronous`: support for `AsyncRead`s in `tokio` ## Performance diff --git a/examples/custom_entities.rs b/examples/custom_entities.rs index b6280945..42986126 100644 --- a/examples/custom_entities.rs +++ b/examples/custom_entities.rs @@ -14,6 +14,8 @@ use quick_xml::events::Event; use quick_xml::Reader; use regex::bytes::Regex; use std::collections::HashMap; +#[cfg(feature = "asynchronous")] +use tokio::runtime::Runtime; const DATA: &str = r#" @@ -33,8 +35,17 @@ fn main() -> Result<(), Box> { let mut custom_entities = HashMap::new(); let entity_re = Regex::new(r#""#)?; + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + loop { - match reader.read_event(&mut buf) { + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { reader.read_event(&mut buf).await }); + + #[cfg(not(feature = "asynchronous"))] + let event = reader.read_event(&mut buf); + + match event { Ok(Event::DocType(ref e)) => { for cap in entity_re.captures_iter(&e) { custom_entities.insert(cap[1].to_vec(), cap[2].to_vec()); diff --git a/examples/issue68.rs b/examples/issue68.rs index a6ba1d7f..d738ed84 100644 --- a/examples/issue68.rs +++ b/examples/issue68.rs @@ -1,10 +1,10 @@ #![allow(unused)] -extern crate quick_xml; - use quick_xml::events::Event; use quick_xml::Reader; use std::io::Read; +#[cfg(feature = "asynchronous")] +use tokio::runtime::Runtime; struct Resource { etag: String, @@ -81,8 +81,18 @@ fn parse_report(xml_data: &str) -> Vec { let mut depth = 0; let mut state = State::MultiStatus; + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + loop { - match reader.read_namespaced_event(&mut buf, &mut ns_buffer) { + #[cfg(feature = "asynchronous")] + let event = runtime + .block_on(async { reader.read_namespaced_event(&mut buf, &mut ns_buffer).await }); + + #[cfg(not(feature = "asynchronous"))] + let event = reader.read_namespaced_event(&mut buf, &mut ns_buffer); + + match event { Ok((namespace_value, Event::Start(e))) => { let namespace_value = namespace_value.unwrap_or_default(); match (depth, state, namespace_value, e.local_name()) { diff --git a/examples/nested_readers.rs b/examples/nested_readers.rs index 892fdc92..5dd1dbbc 100644 --- a/examples/nested_readers.rs +++ b/examples/nested_readers.rs @@ -1,6 +1,8 @@ -extern crate quick_xml; use quick_xml::events::Event; use quick_xml::Reader; +#[cfg(feature = "asynchronous")] +use tokio::runtime::Runtime; + // a structure to capture the rows we've extracted // from a ECMA-376 table in document.xml #[derive(Debug, Clone)] @@ -16,10 +18,26 @@ fn main() -> Result<(), quick_xml::Error> { // buffer for nested reader let mut skip_buf = Vec::new(); let mut count = 0; + + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + + #[cfg(feature = "asynchronous")] + let mut reader = + runtime.block_on(async { Reader::from_file("tests/documents/document.xml").await })?; + + #[cfg(not(feature = "asynchronous"))] let mut reader = Reader::from_file("tests/documents/document.xml")?; + let mut found_tables = Vec::new(); loop { - match reader.read_event(&mut buf)? { + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { reader.read_event(&mut buf).await })?; + + #[cfg(not(feature = "asynchronous"))] + let event = reader.read_event(&mut buf)?; + + match event { Event::Start(element) => match element.name() { b"w:tbl" => { count += 1; @@ -32,7 +50,15 @@ fn main() -> Result<(), quick_xml::Error> { let mut row_index = 0; loop { skip_buf.clear(); - match reader.read_event(&mut skip_buf)? { + + #[cfg(feature = "asynchronous")] + let event = + runtime.block_on(async { reader.read_event(&mut skip_buf).await })?; + + #[cfg(not(feature = "asynchronous"))] + let event = reader.read_event(&mut skip_buf)?; + + match event { Event::Start(element) => match element.name() { b"w:tr" => { stats.rows.push(vec![]); diff --git a/examples/read_texts.rs b/examples/read_texts.rs index c0bb4778..227911cc 100644 --- a/examples/read_texts.rs +++ b/examples/read_texts.rs @@ -1,4 +1,5 @@ -extern crate quick_xml; +#[cfg(feature = "asynchronous")] +use tokio::runtime::Runtime; fn main() { use quick_xml::events::Event; @@ -13,14 +14,32 @@ fn main() { let mut txt = Vec::new(); let mut buf = Vec::new(); + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + loop { - match reader.read_event(&mut buf) { + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { reader.read_event(&mut buf).await }); + + #[cfg(not(feature = "asynchronous"))] + let event = reader.read_event(&mut buf); + + match event { Ok(Event::Start(ref e)) if e.name() == b"tag2" => { - txt.push( + #[cfg(feature = "asynchronous")] + let text = runtime.block_on(async { reader .read_text(b"tag2", &mut Vec::new()) - .expect("Cannot decode text value"), - ); + .await + .expect("Cannot decode text value") + }); + + #[cfg(not(feature = "asynchronous"))] + let text = reader + .read_text(b"tag2", &mut Vec::new()) + .expect("Cannot decode text value"); + + txt.push(text); println!("{:?}", txt); } Ok(Event::Eof) => break, // exits the loop when reaching end of file diff --git a/src/de/mod.rs b/src/de/mod.rs index f689463b..242c14a8 100644 --- a/src/de/mod.rs +++ b/src/de/mod.rs @@ -116,6 +116,7 @@ pub use crate::errors::serialize::DeError; use crate::{ errors::Error, events::{BytesStart, BytesText, Event}, + reader::Decode, reader::Decoder, Reader, }; diff --git a/src/errors.rs b/src/errors.rs index ef7bacba..eafc58ec 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -33,7 +33,7 @@ pub enum Error { /// Duplicate attribute DuplicatedAttribute(usize, usize), /// Escape error - EscapeError(crate::escape::EscapeError), + Escape(crate::escape::EscapeError), } impl From<::std::io::Error> for Error { @@ -109,7 +109,7 @@ impl std::fmt::Display for Error { Duplicate attribute at position {1} and {0}", pos1, pos2 ), - Error::EscapeError(e) => write!(f, "{}", e), + Error::Escape(e) => write!(f, "{}", e), } } } @@ -119,7 +119,7 @@ impl std::error::Error for Error { match self { Error::Io(e) => Some(e), Error::Utf8(e) => Some(e), - Error::EscapeError(e) => Some(e), + Error::Escape(e) => Some(e), _ => None, } } diff --git a/src/escape.rs b/src/escape.rs new file mode 100644 index 00000000..295b1dec --- /dev/null +++ b/src/escape.rs @@ -0,0 +1,4 @@ +//! Pub escape module. + +pub(crate) use crate::escapei::{do_unescape, EscapeError}; +pub use crate::escapei::{escape, unescape, unescape_with}; diff --git a/src/escapei.rs b/src/escapei.rs index 30236d6f..f31675f7 100644 --- a/src/escapei.rs +++ b/src/escapei.rs @@ -1,6 +1,5 @@ //! Manage xml character escapes -use memchr; use std::borrow::Cow; use std::collections::HashMap; use std::ops::Range; @@ -1673,7 +1672,7 @@ const fn named_entity(name: &[u8]) -> Option<&str> { } fn push_utf8(out: &mut Vec, code: char) { - let mut buf = [0u8; 4]; + let mut buf = [0_u8; 4]; out.extend_from_slice(code.encode_utf8(&mut buf).as_bytes()); } diff --git a/src/events/attributes.rs b/src/events/attributes.rs index 83ebd489..3607ec67 100644 --- a/src/events/attributes.rs +++ b/src/events/attributes.rs @@ -4,8 +4,10 @@ use crate::errors::{Error, Result}; use crate::escape::{do_unescape, escape}; -use crate::reader::{is_whitespace, Reader}; -use std::{borrow::Cow, collections::HashMap, io::BufRead, ops::Range}; +use crate::reader::{is_whitespace, Decode, Reader}; +use std::borrow::Cow; +use std::collections::HashMap; +use std::ops::Range; /// Iterator over XML attributes. /// @@ -120,7 +122,7 @@ impl<'a> Attribute<'a> { &self, custom_entities: Option<&HashMap, Vec>>, ) -> Result> { - do_unescape(&*self.value, custom_entities).map_err(Error::EscapeError) + do_unescape(&*self.value, custom_entities).map_err(Error::Escape) } /// Decode then unescapes the value @@ -133,7 +135,7 @@ impl<'a> Attribute<'a> { /// /// [`unescaped_value()`]: #method.unescaped_value /// [`Reader::decode()`]: ../../reader/struct.Reader.html#method.decode - pub fn unescape_and_decode_value(&self, reader: &Reader) -> Result { + pub fn unescape_and_decode_value(&self, reader: &impl Decode) -> Result { self.do_unescape_and_decode_value(reader, None) } @@ -151,9 +153,9 @@ impl<'a> Attribute<'a> { /// # Pre-condition /// /// The keys and values of `custom_entities`, if any, must be valid UTF-8. - pub fn unescape_and_decode_value_with_custom_entities( + pub fn unescape_and_decode_value_with_custom_entities( &self, - reader: &Reader, + reader: &impl Decode, custom_entities: &HashMap, Vec>, ) -> Result { self.do_unescape_and_decode_value(reader, Some(custom_entities)) @@ -161,9 +163,9 @@ impl<'a> Attribute<'a> { /// The keys and values of `custom_entities`, if any, must be valid UTF-8. #[cfg(feature = "encoding")] - fn do_unescape_and_decode_value( + fn do_unescape_and_decode_value( &self, - reader: &Reader, + reader: &impl Decode, custom_entities: Option<&HashMap, Vec>>, ) -> Result { let decoded = reader.decode(&*self.value); @@ -173,14 +175,13 @@ impl<'a> Attribute<'a> { } #[cfg(not(feature = "encoding"))] - fn do_unescape_and_decode_value( + fn do_unescape_and_decode_value( &self, - reader: &Reader, + reader: &impl Decode, custom_entities: Option<&HashMap, Vec>>, ) -> Result { let decoded = reader.decode(&*self.value)?; - let unescaped = - do_unescape(decoded.as_bytes(), custom_entities).map_err(Error::EscapeError)?; + let unescaped = do_unescape(decoded.as_bytes(), custom_entities).map_err(Error::Escape)?; String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error())) } @@ -192,10 +193,7 @@ impl<'a> Attribute<'a> { /// 1. BytesText::unescaped() /// 2. Reader::decode(...) #[cfg(feature = "encoding")] - pub fn unescape_and_decode_without_bom( - &self, - reader: &mut Reader, - ) -> Result { + pub fn unescape_and_decode_without_bom(&self, reader: &mut impl Decode) -> Result { self.do_unescape_and_decode_without_bom(reader, None) } @@ -207,10 +205,7 @@ impl<'a> Attribute<'a> { /// 1. BytesText::unescaped() /// 2. Reader::decode(...) #[cfg(not(feature = "encoding"))] - pub fn unescape_and_decode_without_bom( - &self, - reader: &Reader, - ) -> Result { + pub fn unescape_and_decode_without_bom(&self, reader: &impl Decode) -> Result { self.do_unescape_and_decode_without_bom(reader, None) } @@ -226,9 +221,9 @@ impl<'a> Attribute<'a> { /// /// The keys and values of `custom_entities`, if any, must be valid UTF-8. #[cfg(feature = "encoding")] - pub fn unescape_and_decode_without_bom_with_custom_entities( + pub fn unescape_and_decode_without_bom_with_custom_entities( &self, - reader: &mut Reader, + reader: &mut impl Decode, custom_entities: &HashMap, Vec>, ) -> Result { self.do_unescape_and_decode_without_bom(reader, Some(custom_entities)) @@ -246,18 +241,18 @@ impl<'a> Attribute<'a> { /// /// The keys and values of `custom_entities`, if any, must be valid UTF-8. #[cfg(not(feature = "encoding"))] - pub fn unescape_and_decode_without_bom_with_custom_entities( + pub fn unescape_and_decode_without_bom_with_custom_entities( &self, - reader: &Reader, + reader: &impl Decode, custom_entities: &HashMap, Vec>, ) -> Result { self.do_unescape_and_decode_without_bom(reader, Some(custom_entities)) } #[cfg(feature = "encoding")] - fn do_unescape_and_decode_without_bom( + fn do_unescape_and_decode_without_bom( &self, - reader: &mut Reader, + reader: &mut impl Decode, custom_entities: Option<&HashMap, Vec>>, ) -> Result { let decoded = reader.decode_without_bom(&*self.value); @@ -267,14 +262,13 @@ impl<'a> Attribute<'a> { } #[cfg(not(feature = "encoding"))] - fn do_unescape_and_decode_without_bom( + fn do_unescape_and_decode_without_bom( &self, - reader: &Reader, + reader: &impl Decode, custom_entities: Option<&HashMap, Vec>>, ) -> Result { let decoded = reader.decode_without_bom(&*self.value)?; - let unescaped = - do_unescape(decoded.as_bytes(), custom_entities).map_err(Error::EscapeError)?; + let unescaped = do_unescape(decoded.as_bytes(), custom_entities).map_err(Error::Escape)?; String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error())) } } @@ -348,9 +342,8 @@ impl<'a> Iterator for Attributes<'a> { self.position = len; if self.html { attr!($key, 0..0) - } else { - None } + return None; }}; ($key:expr, $val:expr) => { Some(Ok(Attribute { diff --git a/src/events/mod.rs b/src/events/mod.rs index 97f94a96..3d8d35c0 100644 --- a/src/events/mod.rs +++ b/src/events/mod.rs @@ -38,13 +38,19 @@ pub mod attributes; #[cfg(feature = "encoding_rs")] use encoding_rs::Encoding; -use std::{borrow::Cow, collections::HashMap, io::BufRead, ops::Deref, str::from_utf8}; use crate::escape::{do_unescape, escape}; -use crate::{errors::Error, errors::Result, reader::Reader}; -use attributes::{Attribute, Attributes}; use memchr; +use std::borrow::Cow; +use std::collections::HashMap; +use std::ops::Deref; +use std::str::from_utf8; + +use self::attributes::{Attribute, Attributes}; +use crate::errors::{Error, Result}; +use crate::escapei::{do_unescape, escape}; +use crate::reader::{Decode, Reader}; /// Opening tag data (`Event::Start`), with optional attributes. /// @@ -219,7 +225,7 @@ impl<'a> BytesStart<'a> { &'s self, custom_entities: Option<&HashMap, Vec>>, ) -> Result> { - do_unescape(&*self.buf, custom_entities).map_err(Error::EscapeError) + do_unescape(&*self.buf, custom_entities).map_err(Error::Escape) } /// Returns an iterator over the attributes of this tag. @@ -266,7 +272,7 @@ impl<'a> BytesStart<'a> { /// [`unescaped()`]: #method.unescaped /// [`Reader::decode()`]: ../reader/struct.Reader.html#method.decode #[inline] - pub fn unescape_and_decode(&self, reader: &Reader) -> Result { + pub fn unescape_and_decode(&self, reader: &impl Decode) -> Result { self.do_unescape_and_decode_with_custom_entities(reader, None) } @@ -285,9 +291,9 @@ impl<'a> BytesStart<'a> { /// /// The keys and values of `custom_entities`, if any, must be valid UTF-8. #[inline] - pub fn unescape_and_decode_with_custom_entities( + pub fn unescape_and_decode_with_custom_entities( &self, - reader: &Reader, + reader: &impl Decode, custom_entities: &HashMap, Vec>, ) -> Result { self.do_unescape_and_decode_with_custom_entities(reader, Some(custom_entities)) @@ -295,9 +301,9 @@ impl<'a> BytesStart<'a> { #[cfg(feature = "encoding")] #[inline] - fn do_unescape_and_decode_with_custom_entities( + fn do_unescape_and_decode_with_custom_entities( &self, - reader: &Reader, + reader: &impl Decode, custom_entities: Option<&HashMap, Vec>>, ) -> Result { let decoded = reader.decode(&*self); @@ -308,14 +314,13 @@ impl<'a> BytesStart<'a> { #[cfg(not(feature = "encoding"))] #[inline] - fn do_unescape_and_decode_with_custom_entities( + fn do_unescape_and_decode_with_custom_entities( &self, - reader: &Reader, + reader: &impl Decode, custom_entities: Option<&HashMap, Vec>>, ) -> Result { let decoded = reader.decode(&*self)?; - let unescaped = - do_unescape(decoded.as_bytes(), custom_entities).map_err(Error::EscapeError)?; + let unescaped = do_unescape(decoded.as_bytes(), custom_entities).map_err(Error::Escape)?; String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error())) } @@ -645,7 +650,7 @@ impl<'a> BytesText<'a> { &'s self, custom_entities: Option<&HashMap, Vec>>, ) -> Result> { - do_unescape(self, custom_entities).map_err(Error::EscapeError) + do_unescape(self, custom_entities).map_err(Error::Escape) } #[cfg(feature = "serialize")] @@ -706,10 +711,7 @@ impl<'a> BytesText<'a> { /// 1. BytesText::unescaped() /// 2. Reader::decode(...) #[cfg(feature = "encoding")] - pub fn unescape_and_decode_without_bom( - &self, - reader: &mut Reader, - ) -> Result { + pub fn unescape_and_decode_without_bom(&self, reader: &mut impl Decode) -> Result { self.do_unescape_and_decode_without_bom(reader, None) } @@ -721,10 +723,7 @@ impl<'a> BytesText<'a> { /// 1. BytesText::unescaped() /// 2. Reader::decode(...) #[cfg(not(feature = "encoding"))] - pub fn unescape_and_decode_without_bom( - &self, - reader: &Reader, - ) -> Result { + pub fn unescape_and_decode_without_bom(&self, reader: &impl Decode) -> Result { self.do_unescape_and_decode_without_bom(reader, None) } @@ -740,9 +739,9 @@ impl<'a> BytesText<'a> { /// /// The keys and values of `custom_entities`, if any, must be valid UTF-8. #[cfg(feature = "encoding")] - pub fn unescape_and_decode_without_bom_with_custom_entities( + pub fn unescape_and_decode_without_bom_with_custom_entities( &self, - reader: &mut Reader, + reader: &mut impl Decode, custom_entities: &HashMap, Vec>, ) -> Result { self.do_unescape_and_decode_without_bom(reader, Some(custom_entities)) @@ -760,18 +759,18 @@ impl<'a> BytesText<'a> { /// /// The keys and values of `custom_entities`, if any, must be valid UTF-8. #[cfg(not(feature = "encoding"))] - pub fn unescape_and_decode_without_bom_with_custom_entities( + pub fn unescape_and_decode_without_bom_with_custom_entities( &self, - reader: &Reader, + reader: &impl Decode, custom_entities: &HashMap, Vec>, ) -> Result { self.do_unescape_and_decode_without_bom(reader, Some(custom_entities)) } #[cfg(feature = "encoding")] - fn do_unescape_and_decode_without_bom( + fn do_unescape_and_decode_without_bom( &self, - reader: &mut Reader, + reader: &mut impl Decode, custom_entities: Option<&HashMap, Vec>>, ) -> Result { let decoded = reader.decode_without_bom(&*self); @@ -781,14 +780,13 @@ impl<'a> BytesText<'a> { } #[cfg(not(feature = "encoding"))] - fn do_unescape_and_decode_without_bom( + fn do_unescape_and_decode_without_bom( &self, - reader: &Reader, + reader: &impl Decode, custom_entities: Option<&HashMap, Vec>>, ) -> Result { let decoded = reader.decode_without_bom(&*self)?; - let unescaped = - do_unescape(decoded.as_bytes(), custom_entities).map_err(Error::EscapeError)?; + let unescaped = do_unescape(decoded.as_bytes(), custom_entities).map_err(Error::Escape)?; String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error())) } @@ -798,7 +796,7 @@ impl<'a> BytesText<'a> { /// it might be wiser to manually use /// 1. BytesText::unescaped() /// 2. Reader::decode(...) - pub fn unescape_and_decode(&self, reader: &Reader) -> Result { + pub fn unescape_and_decode(&self, reader: &impl Decode) -> Result { self.do_unescape_and_decode_with_custom_entities(reader, None) } @@ -812,18 +810,18 @@ impl<'a> BytesText<'a> { /// # Pre-condition /// /// The keys and values of `custom_entities`, if any, must be valid UTF-8. - pub fn unescape_and_decode_with_custom_entities( + pub fn unescape_and_decode_with_custom_entities( &self, - reader: &Reader, + reader: &impl Decode, custom_entities: &HashMap, Vec>, ) -> Result { self.do_unescape_and_decode_with_custom_entities(reader, Some(custom_entities)) } #[cfg(feature = "encoding")] - fn do_unescape_and_decode_with_custom_entities( + fn do_unescape_and_decode_with_custom_entities( &self, - reader: &Reader, + reader: &impl Decode, custom_entities: Option<&HashMap, Vec>>, ) -> Result { let decoded = reader.decode(&*self); @@ -833,14 +831,13 @@ impl<'a> BytesText<'a> { } #[cfg(not(feature = "encoding"))] - fn do_unescape_and_decode_with_custom_entities( + fn do_unescape_and_decode_with_custom_entities( &self, - reader: &Reader, + reader: &impl Decode, custom_entities: Option<&HashMap, Vec>>, ) -> Result { let decoded = reader.decode(&*self)?; - let unescaped = - do_unescape(decoded.as_bytes(), custom_entities).map_err(Error::EscapeError)?; + let unescaped = do_unescape(decoded.as_bytes(), custom_entities).map_err(Error::Escape)?; String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error())) } @@ -879,6 +876,7 @@ pub enum Event<'a> { CData(BytesText<'a>), /// XML declaration ``. Decl(BytesDecl<'a>), + #[allow(clippy::upper_case_acronyms)] /// Processing instruction ``. PI(BytesText<'a>), /// Doctype ``. @@ -960,6 +958,8 @@ impl<'a> AsRef> for Event<'a> { #[cfg(test)] mod test { use super::*; + #[cfg(feature = "asynchronous")] + use tokio::runtime::Runtime; #[test] fn local_name() { @@ -970,11 +970,25 @@ mod test { <:foo attr='bar'>foobusbar foobusbar "#; - let mut rdr = Reader::from_str(xml); + let mut rdr = crate::Reader::from_str(xml); let mut buf = Vec::new(); let mut parsed_local_names = Vec::new(); + + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + loop { - match rdr.read_event(&mut buf).expect("unable to read xml event") { + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { + rdr.read_event(&mut buf) + .await + .expect("unable to read xml event") + }); + + #[cfg(not(feature = "asynchronous"))] + let event = rdr.read_event(&mut buf).expect("unable to read xml event"); + + match event { Event::Start(ref e) => parsed_local_names.push( from_utf8(e.local_name()) .expect("unable to build str from local_name") @@ -989,6 +1003,7 @@ mod test { _ => {} } } + assert_eq!(parsed_local_names[0], "bus".to_string()); assert_eq!(parsed_local_names[1], "bus".to_string()); assert_eq!(parsed_local_names[2], "".to_string()); diff --git a/src/lib.rs b/src/lib.rs index 1eb77875..5d592708 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -24,7 +24,7 @@ //! //! ### Reader //! -//! ```rust +//! ```ignore //! use quick_xml::Reader; //! use quick_xml::events::Event; //! @@ -39,8 +39,8 @@ //! reader.trim_text(true); //! //! let mut count = 0; -//! let mut txt = Vec::new(); -//! let mut buf = Vec::new(); +//! let mut txt: Vec = Vec::new(); +//! let mut buf: Vec = Vec::new(); //! //! // The `Reader` does not implement `Iterator` because it outputs borrowed data (`Cow`s) //! loop { @@ -72,7 +72,7 @@ //! //! ### Writer //! -//! ```rust +//! ```ignore //! use quick_xml::Writer; //! use quick_xml::events::{Event, BytesEnd, BytesStart}; //! use quick_xml::Reader; @@ -83,7 +83,7 @@ //! let mut reader = Reader::from_str(xml); //! reader.trim_text(true); //! let mut writer = Writer::new(Cursor::new(Vec::new())); -//! let mut buf = Vec::new(); +//! let mut buf: Vec = Vec::new(); //! loop { //! match reader.read_event(&mut buf) { //! Ok(Event::Start(ref e)) if e.name() == b"this_tag" => { @@ -123,6 +123,7 @@ //! quick-xml supports 2 additional features, non activated by default: //! - `encoding`: support non utf8 XMLs //! - `serialize`: support serde `Serialize`/`Deserialize` +//! - `asynchronous`: support async reading //! //! [StAX]: https://en.wikipedia.org/wiki/StAX //! [Serde]: https://serde.rs/ @@ -130,14 +131,6 @@ #![deny(missing_docs)] #![recursion_limit = "1024"] -#[cfg(feature = "encoding_rs")] -extern crate encoding_rs; -extern crate memchr; -#[cfg(feature = "serialize")] -extern crate serde; -#[cfg(all(test, feature = "serialize"))] -extern crate serde_value; - #[cfg(feature = "serialize")] pub mod de; mod errors; @@ -151,11 +144,22 @@ pub mod events; mod reader; #[cfg(feature = "serialize")] pub mod se; + +mod errors; +mod escapei; mod utils; mod writer; +pub mod escape; +pub mod events; +pub mod reader; + // reexports #[cfg(feature = "serialize")] -pub use crate::errors::serialize::DeError; -pub use crate::errors::{Error, Result}; -pub use crate::{reader::Reader, writer::Writer}; +pub use errors::serialize::DeError; +pub use errors::{Error, Result}; +#[cfg(feature = "asynchronous")] +pub use reader::asynchronous::Reader; +#[cfg(not(feature = "asynchronous"))] +pub use reader::sync::Reader; +pub use writer::Writer; diff --git a/src/reader/asynchronous.rs b/src/reader/asynchronous.rs new file mode 100644 index 00000000..bba8a31f --- /dev/null +++ b/src/reader/asynchronous.rs @@ -0,0 +1,1036 @@ +//! A module to handle the async `Reader` + +use async_recursion::async_recursion; +#[cfg(feature = "encoding")] +use encoding_rs::Encoding; +use std::future::Future; +use std::io; +use std::marker::Unpin; +use std::path::Path; +use std::pin::Pin; +use std::str::from_utf8; +use std::task::{Context, Poll}; +use tokio::fs::File; +use tokio::io::{AsyncBufRead, AsyncBufReadExt, BufReader}; + +use crate::errors::{Error, Result}; +use crate::events::{BytesDecl, BytesEnd, BytesStart, BytesText, Event}; + +use super::{is_whitespace, Decode, Decoder, NamespaceBufferIndex, TagState}; + +impl Decode for Reader { + #[cfg(feature = "encoding")] + fn read_encoding(&self) -> &'static Encoding { + self.encoding + } + + #[cfg(feature = "encoding")] + fn read_is_encoding_set(&self) -> bool { + self.is_encoding_set + } + + #[cfg(feature = "encoding")] + fn write_encoding(&mut self, val: &'static Encoding) { + self.encoding = val; + } + + #[cfg(feature = "encoding")] + fn write_is_encoding_set(&mut self, val: bool) { + self.is_encoding_set = val; + } +} + +/// A low level encoding-agnostic XML event reader. +/// +/// Consumes a `BufRead` and streams XML `Event`s. +/// +/// # Examples +/// +/// ``` +/// use quick_xml::Reader; +/// use quick_xml::events::Event; +/// +/// #[tokio::main] +/// async fn main() { +/// let xml = r#" +/// Test +/// Test 2 +/// "#; +/// let mut reader = Reader::from_str(xml); +/// reader.trim_text(true); +/// let mut count = 0; +/// let mut txt = Vec::new(); +/// let mut buf = Vec::new(); +/// loop { +/// match reader.read_event(&mut buf).await { +/// Ok(Event::Start(ref e)) => { +/// match e.name() { +/// b"tag1" => println!("attributes values: {:?}", +/// e.attributes().map(|a| a.unwrap().value) +/// .collect::>()), +/// b"tag2" => count += 1, +/// _ => (), +/// } +/// }, +/// Ok(Event::Text(e)) => txt.push(e.unescape_and_decode(&reader).unwrap()), +/// Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e), +/// Ok(Event::Eof) => break, +/// _ => (), +/// } +/// buf.clear(); +/// } +/// } +/// ``` +pub struct Reader { + /// reader + reader: B, + /// current buffer position, useful for debuging errors + buf_position: usize, + /// current state Open/Close + tag_state: TagState, + /// expand empty element into an opening and closing element + expand_empty_elements: bool, + /// trims Text events, skip the element if text is empty + trim_text: bool, + /// trims trailing whitespaces from markup names in closing tags `` + trim_markup_names_in_closing_tags: bool, + /// check if End nodes match last Start node + check_end_names: bool, + /// check if comments contains `--` (false per default) + check_comments: bool, + /// all currently Started elements which didn't have a matching + /// End element yet + opened_buffer: Vec, + /// opened name start indexes + opened_starts: Vec, + /// a buffer to manage namespaces + ns_buffer: NamespaceBufferIndex, + #[cfg(feature = "encoding")] + /// the encoding specified in the xml, defaults to utf8 + encoding: &'static Encoding, + #[cfg(feature = "encoding")] + /// check if quick-rs could find out the encoding + is_encoding_set: bool, +} + +impl Reader { + /// Creates a `Reader` that reads from a reader implementing `BufRead`. + pub fn from_reader(reader: B) -> Reader { + Reader { + reader, + opened_buffer: Vec::new(), + opened_starts: Vec::new(), + tag_state: TagState::Closed, + expand_empty_elements: false, + trim_text: false, + trim_markup_names_in_closing_tags: true, + check_end_names: true, + buf_position: 0, + check_comments: false, + ns_buffer: NamespaceBufferIndex::default(), + #[cfg(feature = "encoding")] + encoding: ::encoding_rs::UTF_8, + #[cfg(feature = "encoding")] + is_encoding_set: false, + } + } + + /// Changes whether empty elements should be split into an `Open` and a `Close` event. + /// + /// When set to `true`, all [`Empty`] events produced by a self-closing tag like `` are + /// expanded into a [`Start`] event followed by a [`End`] event. When set to `false` (the + /// default), those tags are represented by an [`Empty`] event instead. + /// + /// (`false` by default) + /// + /// [`Empty`]: events/enum.Event.html#variant.Empty + /// [`Start`]: events/enum.Event.html#variant.Start + /// [`End`]: events/enum.Event.html#variant.End + pub fn expand_empty_elements(&mut self, val: bool) -> &mut Reader { + self.expand_empty_elements = val; + self + } + + /// Changes whether whitespace before and after character data should be removed. + /// + /// When set to `true`, all [`Text`] events are trimmed. If they are empty, no event will be + /// pushed. + /// + /// (`false` by default) + /// + /// [`Text`]: events/enum.Event.html#variant.Text + pub fn trim_text(&mut self, val: bool) -> &mut Reader { + self.trim_text = val; + self + } + + /// Changes wether trailing whitespaces after the markup name are trimmed in closing tags + /// ``. + /// + /// If true the emitted [`End`] event is stripped of trailing whitespace after the markup name. + /// + /// Note that if set to `false` and `check_end_names` is true the comparison of markup names is + /// going to fail erronously if a closing tag contains trailing whitespaces. + /// + /// (`true` by default) + /// + /// [`End`]: events/enum.Event.html#variant.End + pub fn trim_markup_names_in_closing_tags(&mut self, val: bool) -> &mut Reader { + self.trim_markup_names_in_closing_tags = val; + self + } + + /// Changes whether mismatched closing tag names should be detected. + /// + /// When set to `false`, it won't check if a closing tag matches the corresponding opening tag. + /// For example, `` will be permitted. + /// + /// If the XML is known to be sane (already processed, etc.) this saves extra time. + /// + /// Note that the emitted [`End`] event will not be modified if this is disabled, ie. it will + /// contain the data of the mismatched end tag. + /// + /// (`true` by default) + /// + /// [`End`]: events/enum.Event.html#variant.End + pub fn check_end_names(&mut self, val: bool) -> &mut Reader { + self.check_end_names = val; + self + } + + /// Changes whether comments should be validated. + /// + /// When set to `true`, every [`Comment`] event will be checked for not containing `--`, which + /// is not allowed in XML comments. Most of the time we don't want comments at all so we don't + /// really care about comment correctness, thus the default value is `false` to improve + /// performance. + /// + /// (`false` by default) + /// + /// [`Comment`]: events/enum.Event.html#variant.Comment + pub fn check_comments(&mut self, val: bool) -> &mut Reader { + self.check_comments = val; + self + } + + /// Gets the current byte position in the input data. + /// + /// Useful when debugging errors. + pub fn buffer_position(&self) -> usize { + // when internal state is Opened, we have actually read until '<', + // which we don't want to show + if let TagState::Opened = self.tag_state { + self.buf_position - 1 + } else { + self.buf_position + } + } + + /// private function to read until '<' is found + /// return a `Text` event + #[async_recursion] + async fn read_until_open<'a, 'b>(&'a mut self, buf: &'b mut Vec) -> Result> { + self.tag_state = TagState::Opened; + let buf_start = buf.len(); + + match read_until(&mut self.reader, b'<', buf, &mut self.buf_position).await { + Ok(0) => Ok(Event::Eof), + Ok(_) => { + let (start, len) = if self.trim_text { + match buf.iter().skip(buf_start).position(|&b| !is_whitespace(b)) { + Some(start) => ( + buf_start + start, + buf.iter() + .rposition(|&b| !is_whitespace(b)) + .map_or_else(|| buf.len(), |p| p + 1), + ), + None => return self.read_event(buf).await, + } + } else { + (buf_start, buf.len()) + }; + Ok(Event::Text(BytesText::from_escaped(&buf[start..len]))) + } + Err(e) => Err(e), + } + } + + /// private function to read until '>' is found + async fn read_until_close<'a, 'b>(&'a mut self, buf: &'b mut Vec) -> Result> { + self.tag_state = TagState::Closed; + + // need to read 1 character to decide whether pay special attention to attribute values + let buf_start = buf.len(); + + let start = match read_one_dont_consume(&mut self.reader).await { + Ok(n) if n.is_none() => return Ok(Event::Eof), + Ok(n) => n.unwrap(), + Err(e) => return Err(Error::Io(e)), + }; + + if start != b'/' && start != b'!' && start != b'?' { + match read_elem_until(&mut self.reader, b'>', buf, &mut self.buf_position).await { + Ok(0) => Ok(Event::Eof), + Ok(_) => { + // we already *know* that we are in this case + self.read_start(&buf[buf_start..]) + } + Err(e) => Err(e), + } + } else { + match read_until(&mut self.reader, b'>', buf, &mut self.buf_position).await { + Ok(0) => Ok(Event::Eof), + Ok(_) => match start { + b'/' => self.read_end(&buf[buf_start..]), + b'!' => self.read_bang(buf_start, buf).await, + b'?' => self.read_question_mark(&buf[buf_start..]), + _ => unreachable!( + "We checked that `start` must be one of [/!?], was {:?} \ + instead.", + start + ), + }, + Err(e) => Err(e), + } + } + } + + /// reads `BytesElement` starting with a `/`, + /// if `self.check_end_names`, checks that element matches last opened element + /// return `End` event + fn read_end<'a, 'b>(&'a mut self, buf: &'b [u8]) -> Result> { + // XML standard permits whitespaces after the markup name in closing tags. + // Let's strip them from the buffer before comparing tag names. + let name = if self.trim_markup_names_in_closing_tags { + if let Some(pos_end_name) = buf[1..].iter().rposition(|&b| !b.is_ascii_whitespace()) { + let (name, _) = buf[1..].split_at(pos_end_name + 1); + name + } else { + &buf[1..] + } + } else { + &buf[1..] + }; + if self.check_end_names { + let mismatch_err = |expected: &[u8], found: &[u8], buf_position: &mut usize| { + *buf_position -= buf.len(); + Err(Error::EndEventMismatch { + expected: from_utf8(expected).unwrap_or("").to_owned(), + found: from_utf8(found).unwrap_or("").to_owned(), + }) + }; + match self.opened_starts.pop() { + Some(start) => { + if name != &self.opened_buffer[start..] { + let expected = &self.opened_buffer[start..]; + mismatch_err(expected, name, &mut self.buf_position) + } else { + self.opened_buffer.truncate(start); + Ok(Event::End(BytesEnd::borrowed(name))) + } + } + None => mismatch_err(b"", &buf[1..], &mut self.buf_position), + } + } else { + Ok(Event::End(BytesEnd::borrowed(name))) + } + } + + /// reads `BytesElement` starting with a `!`, + /// return `Comment`, `CData` or `DocType` event + /// + /// Note: depending on the start of the Event, we may need to read more + /// data, thus we need a mutable buffer + async fn read_bang<'a, 'b>( + &'a mut self, + buf_start: usize, + buf: &'b mut Vec, + ) -> Result> { + if buf[buf_start..].starts_with(b"!--") { + while buf.len() < buf_start + 5 || !buf.ends_with(b"--") { + buf.push(b'>'); + match read_until(&mut self.reader, b'>', buf, &mut self.buf_position).await { + Ok(0) => { + // In sync sometimes the last char is included and sometimes it isn't + self.buf_position -= 1; + self.buf_position -= buf.len() - buf_start; + return Err(Error::UnexpectedEof("Comment".to_string())); + } + Ok(_) => (), + Err(e) => return Err(e), + } + } + let len = buf.len(); + if self.check_comments { + // search if '--' not in comments + if let Some(p) = memchr::memchr_iter(b'-', &buf[buf_start + 3..len - 2]) + .position(|p| buf[buf_start + 3 + p + 1] == b'-') + { + self.buf_position -= buf.len() - buf_start + p; + return Err(Error::UnexpectedToken("--".to_string())); + } + } + Ok(Event::Comment(BytesText::from_escaped( + &buf[buf_start + 3..len - 2], + ))) + } else if buf.len() >= buf_start + 8 { + match &buf[buf_start + 1..buf_start + 8] { + b"[CDATA[" => { + while buf.len() < 10 || !buf.ends_with(b"]]") { + buf.push(b'>'); + match read_until(&mut self.reader, b'>', buf, &mut self.buf_position).await + { + Ok(0) => { + self.buf_position -= buf.len() - buf_start; + return Err(Error::UnexpectedEof("CData".to_string())); + } + Ok(_) => (), + Err(e) => return Err(e), + } + } + Ok(Event::CData(BytesText::from_plain( + &buf[buf_start + 8..buf.len() - 2], + ))) + } + b"DOCTYPE" => { + let mut count = buf.iter().skip(buf_start).filter(|&&b| b == b'<').count(); + while count > 0 { + buf.push(b'>'); + match read_until(&mut self.reader, b'>', buf, &mut self.buf_position).await + { + Ok(0) => { + self.buf_position -= buf.len() - buf_start; + return Err(Error::UnexpectedEof("DOCTYPE".to_string())); + } + Ok(n) => { + let start = buf.len() - n; + count += buf.iter().skip(start).filter(|&&b| b == b'<').count(); + count -= 1; + } + Err(e) => return Err(e), + } + } + Ok(Event::DocType(BytesText::from_escaped( + &buf[buf_start + 8..buf.len()], + ))) + } + _ => return Err(Error::UnexpectedBang), + } + } else { + self.buf_position -= buf.len() - buf_start; + return Err(Error::UnexpectedBang); + } + } + + /// reads `BytesElement` starting with a `?`, + /// return `Decl` or `PI` event + #[cfg(feature = "encoding")] + fn read_question_mark<'a, 'b>(&'a mut self, buf: &'b [u8]) -> Result> { + let len = buf.len(); + if len > 2 && buf[len - 1] == b'?' { + if len > 5 && &buf[1..4] == b"xml" && is_whitespace(buf[4]) { + let event = BytesDecl::from_start(BytesStart::borrowed(&buf[1..len - 1], 3)); + // Try getting encoding from the declaration event + if let Some(enc) = event.encoder() { + self.encoding = enc; + self.is_encoding_set = true; + } + Ok(Event::Decl(event)) + } else { + Ok(Event::PI(BytesText::from_escaped(&buf[1..len - 1]))) + } + } else { + self.buf_position -= len; + Err(Error::UnexpectedEof("XmlDecl".to_string())) + } + } + + /// reads `BytesElement` starting with a `?`, + /// return `Decl` or `PI` event + #[cfg(not(feature = "encoding"))] + fn read_question_mark<'a, 'b>(&'a mut self, buf: &'b [u8]) -> Result> { + let len = buf.len(); + if len > 2 && buf[len - 1] == b'?' { + if len > 5 && &buf[1..4] == b"xml" && is_whitespace(buf[4]) { + let event = BytesDecl::from_start(BytesStart::borrowed(&buf[1..len - 1], 3)); + Ok(Event::Decl(event)) + } else { + Ok(Event::PI(BytesText::from_escaped(&buf[1..len - 1]))) + } + } else { + self.buf_position -= len; + Err(Error::UnexpectedEof("XmlDecl".to_string())) + } + } + + #[inline] + fn close_expanded_empty(&mut self) -> Result> { + self.tag_state = TagState::Closed; + let name = self + .opened_buffer + .split_off(self.opened_starts.pop().unwrap()); + Ok(Event::End(BytesEnd::owned(name))) + } + + /// reads `BytesElement` starting with any character except `/`, `!` or ``?` + /// return `Start` or `Empty` event + fn read_start<'a, 'b>(&'a mut self, buf: &'b [u8]) -> Result> { + // TODO: do this directly when reading bufreader ... + let len = buf.len(); + let name_end = buf.iter().position(|&b| is_whitespace(b)).unwrap_or(len); + + if let Some(&b'/') = buf.last() { + let end = if name_end < len { name_end } else { len - 1 }; + if self.expand_empty_elements { + self.tag_state = TagState::Empty; + self.opened_starts.push(self.opened_buffer.len()); + self.opened_buffer.extend(&buf[..end]); + Ok(Event::Start(BytesStart::borrowed(&buf[..len - 1], end))) + } else { + Ok(Event::Empty(BytesStart::borrowed(&buf[..len - 1], end))) + } + } else { + if self.check_end_names { + self.opened_starts.push(self.opened_buffer.len()); + self.opened_buffer.extend(&buf[..name_end]); + } + Ok(Event::Start(BytesStart::borrowed(buf, name_end))) + } + } + + /// Reads the next `Event`. + /// + /// This is the main entry point for reading XML `Event`s. + /// + /// `Event`s borrow `buf` and can be converted to own their data if needed (uses `Cow` + /// internally). + /// + /// Having the possibility to control the internal buffers gives you some additional benefits + /// such as: + /// + /// - Reduce the number of allocations by reusing the same buffer. For constrained systems, + /// you can call `buf.clear()` once you are done with processing the event (typically at the + /// end of your loop). + /// - Reserve the buffer length if you know the file size (using `Vec::with_capacity`). + /// + /// # Examples + /// + /// ``` + /// use quick_xml::Reader; + /// use quick_xml::events::Event; + /// + /// #[tokio::main] + /// async fn main() { + /// let xml = r#" + /// Test + /// Test 2 + /// "#; + /// let mut reader = Reader::from_str(xml); + /// reader.trim_text(true); + /// let mut count = 0; + /// let mut buf = Vec::new(); + /// let mut txt = Vec::new(); + /// loop { + /// match reader.read_event(&mut buf).await { + /// Ok(Event::Start(ref e)) => count += 1, + /// Ok(Event::Text(e)) => txt.push(e.unescape_and_decode(&reader).expect("Error!")), + /// Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e), + /// Ok(Event::Eof) => break, + /// _ => (), + /// } + /// buf.clear(); + /// } + /// println!("Found {} start events", count); + /// println!("Text events: {:?}", txt); + /// } + /// ``` + #[async_recursion] + pub async fn read_event<'a, 'b>(&'a mut self, buf: &'b mut Vec) -> Result> { + let event = match self.tag_state { + TagState::Opened => self.read_until_close(buf).await, + TagState::Closed => self.read_until_open(buf).await, + TagState::Empty => self.close_expanded_empty(), + TagState::Exit => return Ok(Event::Eof), + }; + match event { + Err(_) | Ok(Event::Eof) => self.tag_state = TagState::Exit, + _ => {} + } + event + } + + /// Resolves a potentially qualified **event name** into (namespace name, local name). + /// + /// *Qualified* attribute names have the form `prefix:local-name` where the`prefix` is defined + /// on any containing XML element via `xmlns:prefix="the:namespace:uri"`. The namespace prefix + /// can be defined on the same element as the attribute in question. + /// + /// *Unqualified* event inherits the current *default namespace*. + #[inline] + pub fn event_namespace<'a, 'b, 'c>( + &'a self, + qname: &'b [u8], + namespace_buffer: &'c [u8], + ) -> (Option<&'c [u8]>, &'b [u8]) { + self.ns_buffer + .resolve_namespace(qname, namespace_buffer, true) + } + + /// Resolves a potentially qualified **attribute name** into (namespace name, local name). + /// + /// *Qualified* attribute names have the form `prefix:local-name` where the`prefix` is defined + /// on any containing XML element via `xmlns:prefix="the:namespace:uri"`. The namespace prefix + /// can be defined on the same element as the attribute in question. + /// + /// *Unqualified* attribute names do *not* inherit the current *default namespace*. + #[inline] + pub fn attribute_namespace<'a, 'b, 'c>( + &'a self, + qname: &'b [u8], + namespace_buffer: &'c [u8], + ) -> (Option<&'c [u8]>, &'b [u8]) { + self.ns_buffer + .resolve_namespace(qname, namespace_buffer, false) + } + + /// Reads the next event and resolves its namespace (if applicable). + /// + /// # Examples + /// + /// ``` + /// use std::str::from_utf8; + /// use quick_xml::Reader; + /// use quick_xml::events::Event; + /// + /// #[tokio::main] + /// async fn main() { + /// let xml = r#" + /// Test + /// Test 2 + /// "#; + /// let mut reader = Reader::from_str(xml); + /// reader.trim_text(true); + /// let mut count = 0; + /// let mut buf = Vec::new(); + /// let mut ns_buf = Vec::new(); + /// let mut txt = Vec::new(); + /// loop { + /// match reader.read_namespaced_event(&mut buf, &mut ns_buf).await { + /// Ok((ref ns, Event::Start(ref e))) => { + /// count += 1; + /// match (*ns, e.local_name()) { + /// (Some(b"www.xxxx"), b"tag1") => (), + /// (Some(b"www.yyyy"), b"tag2") => (), + /// (ns, n) => panic!("Namespace and local name mismatch"), + /// } + /// println!("Resolved namespace: {:?}", ns.and_then(|ns| from_utf8(ns).ok())); + /// } + /// Ok((_, Event::Text(e))) => { + /// txt.push(e.unescape_and_decode(&reader).expect("Error!")) + /// }, + /// Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e), + /// Ok((_, Event::Eof)) => break, + /// _ => (), + /// } + /// buf.clear(); + /// } + /// println!("Found {} start events", count); + /// println!("Text events: {:?}", txt); + /// } + /// ``` + pub async fn read_namespaced_event<'a, 'b, 'c>( + &'a mut self, + buf: &'b mut Vec, + namespace_buffer: &'c mut Vec, + ) -> Result<(Option<&'c [u8]>, Event<'b>)> { + self.ns_buffer.pop_empty_namespaces(namespace_buffer); + match self.read_event(buf).await { + Ok(Event::Eof) => Ok((None, Event::Eof)), + Ok(Event::Start(e)) => { + self.ns_buffer.push_new_namespaces(&e, namespace_buffer); + Ok(( + self.ns_buffer + .find_namespace_value(e.name(), &**namespace_buffer), + Event::Start(e), + )) + } + Ok(Event::Empty(e)) => { + // For empty elements we need to 'artificially' keep the namespace scope on the + // stack until the next `next()` call occurs. + // Otherwise the caller has no chance to use `resolve` in the context of the + // namespace declarations that are 'in scope' for the empty element alone. + // Ex: + self.ns_buffer.push_new_namespaces(&e, namespace_buffer); + // notify next `read_namespaced_event()` invocation that it needs to pop this + // namespace scope + self.ns_buffer.pending_pop = true; + Ok(( + self.ns_buffer + .find_namespace_value(e.name(), &**namespace_buffer), + Event::Empty(e), + )) + } + Ok(Event::End(e)) => { + // notify next `read_namespaced_event()` invocation that it needs to pop this + // namespace scope + self.ns_buffer.pending_pop = true; + Ok(( + self.ns_buffer + .find_namespace_value(e.name(), &**namespace_buffer), + Event::End(e), + )) + } + Ok(e) => Ok((None, e)), + Err(e) => Err(e), + } + } + + /// Returns the `Reader`s encoding. + /// + /// The used encoding may change after parsing the XML declaration. + /// + /// This encoding will be used by [`decode`]. + /// + /// [`decode`]: #method.decode + #[cfg(feature = "encoding")] + pub fn encoding(&self) -> &'static Encoding { + self.encoding + } + + /// Get utf8 decoder + #[cfg(feature = "encoding")] + pub fn decoder(&self) -> Decoder { + Decoder { + encoding: self.encoding, + } + } + + /// Get utf8 decoder + #[cfg(not(feature = "encoding"))] + pub fn decoder(&self) -> Decoder { + Decoder + } + + /// Reads until end element is found + /// + /// Manages nested cases where parent and child elements have the same name + pub async fn read_to_end>(&mut self, end: K, buf: &mut Vec) -> Result<()> { + let mut depth = 0; + let end = end.as_ref(); + loop { + match self.read_event(buf).await { + Ok(Event::End(ref e)) if e.name() == end => { + if depth == 0 { + return Ok(()); + } + depth -= 1; + } + Ok(Event::Start(ref e)) if e.name() == end => depth += 1, + Err(e) => return Err(e), + Ok(Event::Eof) => { + return Err(Error::UnexpectedEof(format!("", from_utf8(end)))); + } + _ => (), + } + buf.clear(); + } + } + + /// Reads optional text between start and end tags. + /// + /// If the next event is a [`Text`] event, returns the decoded and unescaped content as a + /// `String`. If the next event is an [`End`] event, returns the empty string. In all other + /// cases, returns an error. + /// + /// Any text will be decoded using the XML encoding specified in the XML declaration (or UTF-8 + /// if none is specified). + /// + /// # Examples + /// + /// ``` + /// use quick_xml::Reader; + /// use quick_xml::events::Event; + /// + /// #[tokio::main] + /// async fn main() { + /// let mut xml = Reader::from_reader(b" + /// <b> + /// + /// " as &[u8]); + /// xml.trim_text(true); + /// + /// let expected = ["", ""]; + /// for &content in expected.iter() { + /// match xml.read_event(&mut Vec::new()).await { + /// Ok(Event::Start(ref e)) => { + /// assert_eq!(&xml.read_text(e.name(), &mut Vec::new()).await.unwrap(), content); + /// }, + /// e => panic!("Expecting Start event, found {:?}", e), + /// } + /// } + /// } + /// ``` + /// + /// [`Text`]: events/enum.Event.html#variant.Text + /// [`End`]: events/enum.Event.html#variant.End + pub async fn read_text>(&mut self, end: K, buf: &mut Vec) -> Result { + let s = match self.read_event(buf).await { + Ok(Event::Text(e)) => e.unescape_and_decode(self), + Ok(Event::End(ref e)) if e.name() == end.as_ref() => return Ok("".to_string()), + Err(e) => return Err(e), + Ok(Event::Eof) => return Err(Error::UnexpectedEof("Text".to_string())), + _ => return Err(Error::TextNotFound), + }; + self.read_to_end(end, buf).await?; + s + } + + /// Consumes `Reader` returning the underlying reader + /// + /// Can be used to compute line and column of a parsing error position + /// + /// # Examples + /// + /// ```ignore + /// use std::{str, io::Cursor}; + /// use quick_xml::Reader; + /// use quick_xml::events::Event; + /// + /// fn into_line_and_column(reader: Reader>) -> (usize, usize) { + /// let end_pos = reader.buffer_position(); + /// let mut cursor = reader.into_underlying_reader(); + /// let s = String::from_utf8(cursor.into_inner()[0..end_pos].to_owned()) + /// .expect("can't make a string"); + /// let mut line = 1; + /// let mut column = 0; + /// for c in s.chars() { + /// if c == '\n' { + /// line += 1; + /// column = 0; + /// } else { + /// column += 1; + /// } + /// } + /// (line, column) + /// } + /// + /// #[tokio::main] + /// async fn main() { + /// let xml = r#" + /// Test + /// Test 2 + /// "#; + /// let mut reader = Reader::from_reader(Cursor::new(xml.as_bytes())); + /// let mut buf = Vec::new(); + /// + /// loop { + /// match reader.read_event(&mut buf).await { + /// Ok(Event::Start(ref e)) => match e.name() { + /// b"tag1" | b"tag2" => (), + /// tag => { + /// assert_eq!(b"tag3", tag); + /// assert_eq!((3, 22), into_line_and_column(reader)); + /// break; + /// } + /// }, + /// Ok(Event::Eof) => unreachable!(), + /// _ => (), + /// } + /// buf.clear(); + /// } + /// } + /// ``` + pub fn into_underlying_reader(self) -> B { + self.reader + } +} + +impl Reader> { + /// Creates an XML reader from a file path. + pub async fn from_file>(path: P) -> Result>> { + let file = File::open(path).await.map_err(Error::Io)?; + let reader = BufReader::new(file); + Ok(Reader::from_reader(reader)) + } +} + +impl<'a> Reader<&'a [u8]> { + /// Creates an XML reader from a string slice. + pub fn from_str(s: &'a str) -> Reader<&'a [u8]> { + Reader::from_reader(s.as_bytes()) + } +} + +/// Container for a future that reads one byte from a reader +/// but does not consume the byte, so it can be read again. +#[derive(Debug)] +#[must_use = "futures do nothing unless you `.await` or poll them"] +pub struct ReadOneDontConsume<'a, R: ?Sized> { + reader: &'a mut R, +} + +fn read_one_dont_consume<'a, R>(reader: &'a mut R) -> ReadOneDontConsume<'a, R> +where + R: AsyncBufRead + ?Sized + Unpin, +{ + ReadOneDontConsume { reader } +} + +fn read_one_dont_consume_internal( + mut reader: Pin<&mut R>, + cx: &mut Context<'_>, +) -> Poll>> { + match reader.as_mut().poll_fill_buf(cx) { + Poll::Ready(t) => Poll::Ready(t.map(|s| if s.is_empty() { None } else { Some(s[0]) })), + Poll::Pending => Poll::Pending, + } +} + +impl Future for ReadOneDontConsume<'_, R> { + type Output = io::Result>; + + fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + let Self { reader } = &mut *self; + read_one_dont_consume_internal(Pin::new(reader), cx) + } +} + +/// read until `byte` is found or end of file +/// return the position of byte +#[inline] +async fn read_until( + r: &mut R, + byte: u8, + buf: &mut Vec, + buf_position: &mut usize, +) -> Result { + let result = r.read_until(byte, buf).await; + + if let Ok(size) = result { + if buf.len() > 0 && buf[buf.len() - 1] == byte { + buf.remove(buf.len() - 1); + } + *buf_position += size; + } + + result.map_err(Error::Io) + + // let mut read = 0; + // let mut done = false; + // while !done { + // let used = { + // let available = match r.fill_buf() { + // Ok(n) if n.is_empty() => break, + // Ok(n) => n, + // Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, + // Err(e) => { + // *position += read; + // return Err(Error::Io(e)); + // } + // }; + + // match memchr::memchr(byte, available) { + // Some(i) => { + // buf.extend_from_slice(&available[..i]); + // done = true; + // i + 1 + // } + // None => { + // buf.extend_from_slice(available); + // available.len() + // } + // } + // }; + // r.consume(used); + // read += used; + // } + // *position += read; + // Ok(read) +} + +/// Derived from `read_until`, but modified to handle XML attributes using a minimal state machine. +/// [W3C Extensible Markup Language (XML) 1.1 (2006)](https://www.w3.org/TR/xml11) +/// +/// Attribute values are defined as follows: +/// ```plain +/// AttValue := '"' (([^<&"]) | Reference)* '"' +/// | "'" (([^<&']) | Reference)* "'" +/// ``` +/// (`Reference` is something like `"`, but we don't care about escaped characters at this +/// level) +#[inline] +async fn read_elem_until( + r: &mut R, + end_byte: u8, + buf: &mut Vec, + position: &mut usize, +) -> Result { + #[derive(Clone, Copy)] + enum State { + /// The initial state (inside element, but outside of attribute value) + Elem, + /// Inside a single-quoted attribute value + SingleQ, + /// Inside a double-quoted attribute value + DoubleQ, + } + let mut state = State::Elem; + let mut read = 0; + let mut done = false; + while !done { + let used = { + let available = match r.read_until(end_byte, buf).await { + Ok(n) if n == 0 => { + buf.remove(buf.len() - 1); + return Ok(read); + } + Ok(n) => { + let len = buf.len(); + &buf[len - n..len] + } + Err(ref e) if e.kind() == tokio::io::ErrorKind::Interrupted => continue, + Err(e) => { + *position += read; + return Err(Error::Io(e)); + } + }; + + let mut memiter = memchr::memchr3_iter(end_byte, b'\'', b'"', &available); + let used: usize; + loop { + match memiter.next() { + Some(i) => { + state = match (state, available[i]) { + (State::Elem, b) if b == end_byte => { + // only allowed to match `end_byte` while we are in state `Elem` + done = true; + used = i + 1; + break; + } + (State::Elem, b'\'') => State::SingleQ, + (State::Elem, b'\"') => State::DoubleQ, + + // the only end_byte that gets us out if the same character + (State::SingleQ, b'\'') | (State::DoubleQ, b'\"') => State::Elem, + + // all other bytes: no state change + _ => state, + }; + } + None => { + used = available.len(); + break; + } + } + } + + used + }; + read += used; + } + + buf.remove(buf.len() - 1); + + *position += read; + Ok(read) +} diff --git a/src/reader/mod.rs b/src/reader/mod.rs new file mode 100644 index 00000000..db120753 --- /dev/null +++ b/src/reader/mod.rs @@ -0,0 +1,323 @@ +//! A module to handle `Reader` + +use crate::events::{attributes::Attribute, BytesStart}; + +#[cfg(not(feature = "encoding"))] +use crate::errors::{Error, Result}; +#[cfg(not(feature = "encoding"))] +use std::str::from_utf8; + +#[cfg(feature = "encoding")] +use encoding_rs::{Encoding, UTF_16BE, UTF_16LE}; +#[cfg(feature = "encoding")] +use std::borrow::Cow; + +#[cfg(feature = "asynchronous")] +pub mod asynchronous; +#[cfg(not(feature = "asynchronous"))] +pub mod sync; + +/// Trait for decoding, which is shared by the sync and async `Reader` +pub trait Decode { + /// Decodes a slice using the encoding specified in the XML declaration. + /// + /// Decode `bytes` with BOM sniffing and with malformed sequences replaced with the + /// `U+FFFD REPLACEMENT CHARACTER`. + /// + /// If no encoding is specified, defaults to UTF-8. + #[inline] + #[cfg(feature = "encoding")] + fn decode<'b, 'c>(&'b self, bytes: &'c [u8]) -> Cow<'c, str> { + self.read_encoding().decode(bytes).0 + } + + /// Decodes a UTF8 slice regardless of XML declaration. + /// + /// Decode `bytes` with BOM sniffing and with malformed sequences replaced with the + /// `U+FFFD REPLACEMENT CHARACTER`. + /// + /// # Note + /// + /// If you instead want to use XML declared encoding, use the `encoding` feature + #[inline] + #[cfg(not(feature = "encoding"))] + fn decode<'c>(&self, bytes: &'c [u8]) -> Result<&'c str> { + from_utf8(bytes).map_err(Error::Utf8) + } + + /// Decodes a UTF8 slice without BOM (Byte order mark) regardless of XML declaration. + /// + /// Decode `bytes` without BOM and with malformed sequences replaced with the + /// `U+FFFD REPLACEMENT CHARACTER`. + /// + /// # Note + /// + /// If you instead want to use XML declared encoding, use the `encoding` feature + #[inline] + #[cfg(not(feature = "encoding"))] + fn decode_without_bom<'c>(&self, bytes: &'c [u8]) -> Result<&'c str> { + if bytes.starts_with(b"\xEF\xBB\xBF") { + from_utf8(&bytes[3..]).map_err(Error::Utf8) + } else { + from_utf8(bytes).map_err(Error::Utf8) + } + } + + /// Decodes a slice using without BOM (Byte order mark) the encoding specified in the XML declaration. + /// + /// Decode `bytes` without BOM and with malformed sequences replaced with the + /// `U+FFFD REPLACEMENT CHARACTER`. + /// + /// If no encoding is specified, defaults to UTF-8. + #[inline] + #[cfg(feature = "encoding")] + fn decode_without_bom<'b, 'c>(&'b mut self, mut bytes: &'c [u8]) -> Cow<'c, str> { + if self.read_is_encoding_set() { + return self.read_encoding().decode_with_bom_removal(bytes).0; + } + if bytes.starts_with(b"\xEF\xBB\xBF") { + self.write_is_encoding_set(true); + bytes = &bytes[3..]; + } else if bytes.starts_with(b"\xFF\xFE") { + self.write_is_encoding_set(true); + self.write_encoding(UTF_16LE); + bytes = &bytes[2..]; + } else if bytes.starts_with(b"\xFE\xFF") { + self.write_is_encoding_set(true); + self.write_encoding(UTF_16BE); + bytes = &bytes[3..]; + }; + self.read_encoding().decode_without_bom_handling(bytes).0 + } + + #[cfg(feature = "encoding")] + /// Returns the encoding specified in the xml, defaults to utf8 + fn read_encoding(&self) -> &'static Encoding; + + #[cfg(feature = "encoding")] + /// check if quick-rs could find out the encoding + fn read_is_encoding_set(&self) -> bool; + + #[cfg(feature = "encoding")] + /// Returns the encoding specified in the xml, defaults to utf8 + fn write_encoding(&mut self, val: &'static Encoding); + + #[cfg(feature = "encoding")] + /// check if quick-rs could find out the encoding + fn write_is_encoding_set(&mut self, val: bool); +} + +#[derive(Clone, Debug)] +enum TagState { + Opened, + Closed, + Empty, + /// Either Eof or Errored + Exit, +} + +/// A function to check whether the byte is a whitespace (blank, new line, carriage return or tab) +#[inline] +pub(crate) fn is_whitespace(b: u8) -> bool { + match b { + b' ' | b'\r' | b'\n' | b'\t' => true, + _ => false, + } +} + +/// A namespace declaration. Can either bind a namespace to a prefix or define the current default +/// namespace. +#[derive(Clone, Debug)] +struct Namespace { + /// Index of the namespace in the buffer + start: usize, + /// Length of the prefix + /// * if bigger than start, then binds this namespace to the corresponding slice. + /// * else defines the current default namespace. + prefix_len: usize, + /// The namespace name (the URI) of this namespace declaration. + /// + /// The XML standard specifies that an empty namespace value 'removes' a namespace declaration + /// for the extent of its scope. For prefix declarations that's not very interesting, but it is + /// vital for default namespace declarations. With `xmlns=""` you can revert back to the default + /// behaviour of leaving unqualified element names unqualified. + value_len: usize, + /// Level of nesting at which this namespace was declared. The declaring element is included, + /// i.e., a declaration on the document root has `level = 1`. + /// This is used to pop the namespace when the element gets closed. + level: i32, +} + +impl Namespace { + /// Gets the value slice out of namespace buffer + /// + /// Returns `None` if `value_len == 0` + #[inline] + fn opt_value<'a, 'b>(&'a self, ns_buffer: &'b [u8]) -> Option<&'b [u8]> { + if self.value_len == 0 { + None + } else { + let start = self.start + self.prefix_len; + Some(&ns_buffer[start..start + self.value_len]) + } + } + + /// Check if the namespace matches the potentially qualified name + #[inline] + fn is_match(&self, ns_buffer: &[u8], qname: &[u8]) -> bool { + if self.prefix_len == 0 { + !qname.contains(&b':') + } else { + qname.get(self.prefix_len).map_or(false, |n| *n == b':') + && qname.starts_with(&ns_buffer[self.start..self.start + self.prefix_len]) + } + } +} + +/// A namespace management buffer. +/// +/// Holds all internal logic to push/pop namespaces with their levels. +#[derive(Clone, Debug, Default)] +struct NamespaceBufferIndex { + /// a buffer of namespace ranges + slices: Vec, + /// The number of open tags at the moment. We need to keep track of this to know which namespace + /// declarations to remove when we encounter an `End` event. + nesting_level: i32, + /// For `Empty` events keep the 'scope' of the element on the stack artificially. That way, the + /// consumer has a chance to use `resolve` in the context of the empty element. We perform the + /// pop as the first operation in the next `next()` call. + pending_pop: bool, +} + +impl NamespaceBufferIndex { + #[inline] + fn find_namespace_value<'a, 'b, 'c>( + &'a self, + element_name: &'b [u8], + buffer: &'c [u8], + ) -> Option<&'c [u8]> { + self.slices + .iter() + .rfind(|n| n.is_match(buffer, element_name)) + .and_then(|n| n.opt_value(buffer)) + } + + fn pop_empty_namespaces(&mut self, buffer: &mut Vec) { + if !self.pending_pop { + return; + } + self.pending_pop = false; + self.nesting_level -= 1; + let current_level = self.nesting_level; + // from the back (most deeply nested scope), look for the first scope that is still valid + match self.slices.iter().rposition(|n| n.level <= current_level) { + // none of the namespaces are valid, remove all of them + None => { + buffer.clear(); + self.slices.clear(); + } + // drop all namespaces past the last valid namespace + Some(last_valid_pos) => { + if let Some(len) = self.slices.get(last_valid_pos + 1).map(|n| n.start) { + buffer.truncate(len); + self.slices.truncate(last_valid_pos + 1); + } + } + } + } + + fn push_new_namespaces(&mut self, e: &BytesStart, buffer: &mut Vec) { + self.nesting_level += 1; + let level = self.nesting_level; + // adds new namespaces for attributes starting with 'xmlns:' and for the 'xmlns' + // (default namespace) attribute. + for a in e.attributes().with_checks(false) { + if let Ok(Attribute { key: k, value: v }) = a { + if k.starts_with(b"xmlns") { + match k.get(5) { + None => { + let start = buffer.len(); + buffer.extend_from_slice(&*v); + self.slices.push(Namespace { + start, + prefix_len: 0, + value_len: v.len(), + level, + }); + } + Some(&b':') => { + let start = buffer.len(); + buffer.extend_from_slice(&k[6..]); + buffer.extend_from_slice(&*v); + self.slices.push(Namespace { + start, + prefix_len: k.len() - 6, + value_len: v.len(), + level, + }); + } + _ => break, + } + } + } else { + break; + } + } + } + + /// Resolves a potentially qualified **attribute name** into (namespace name, local name). + /// + /// *Qualified* attribute names have the form `prefix:local-name` where the`prefix` is defined + /// on any containing XML element via `xmlns:prefix="the:namespace:uri"`. The namespace prefix + /// can be defined on the same element as the attribute in question. + /// + /// *Unqualified* attribute names do *not* inherit the current *default namespace*. + #[inline] + fn resolve_namespace<'a, 'b, 'c>( + &'a self, + qname: &'b [u8], + buffer: &'c [u8], + use_default: bool, + ) -> (Option<&'c [u8]>, &'b [u8]) { + self.slices + .iter() + .rfind(|n| n.is_match(buffer, qname)) + .map_or((None, qname), |n| { + let len = n.prefix_len; + if len > 0 { + (n.opt_value(buffer), &qname[len + 1..]) + } else if use_default { + (n.opt_value(buffer), qname) + } else { + (None, qname) + } + }) + } +} + +/// Utf8 Decoder +#[cfg(not(feature = "encoding"))] +#[derive(Clone, Copy, Debug)] +pub struct Decoder; + +/// Utf8 Decoder +#[cfg(feature = "encoding")] +#[derive(Clone, Copy, Debug)] +pub struct Decoder { + encoding: &'static Encoding, +} + +impl Decoder { + /// Decode a slice of u8 into a UTF8 str + #[cfg(not(feature = "encoding"))] + pub fn decode<'c>(&self, bytes: &'c [u8]) -> Result<&'c str> { + from_utf8(bytes).map_err(Error::Utf8) + } + + /// Decode a slice of u8 into a Cow str + #[cfg(feature = "encoding")] + pub fn decode<'c>(&self, bytes: &'c [u8]) -> Cow<'c, str> { + self.encoding.decode(bytes).0 + } +} diff --git a/src/reader.rs b/src/reader/sync.rs similarity index 81% rename from src/reader.rs rename to src/reader/sync.rs index 961c9be2..c3cd260d 100644 --- a/src/reader.rs +++ b/src/reader/sync.rs @@ -1,25 +1,39 @@ -//! A module to handle `Reader` +//! A module to handle sync `Reader` #[cfg(feature = "encoding")] use std::borrow::Cow; +use std::fs::File; use std::io::{self, BufRead, BufReader}; use std::{fs::File, path::Path, str::from_utf8}; #[cfg(feature = "encoding")] -use encoding_rs::{Encoding, UTF_16BE, UTF_16LE}; +use encoding_rs::Encoding; use crate::errors::{Error, Result}; use crate::events::{attributes::Attribute, BytesDecl, BytesEnd, BytesStart, BytesText, Event}; -use memchr; +use super::{is_whitespace, Decode, Decoder, NamespaceBufferIndex, TagState}; -#[derive(Clone)] -enum TagState { - Opened, - Closed, - Empty, - /// Either Eof or Errored - Exit, +impl Decode for Reader { + #[cfg(feature = "encoding")] + fn read_encoding(&self) -> &'static Encoding { + self.encoding + } + + #[cfg(feature = "encoding")] + fn read_is_encoding_set(&self) -> bool { + self.is_encoding_set + } + + #[cfg(feature = "encoding")] + fn write_encoding(&mut self, val: &'static Encoding) { + self.encoding = val; + } + + #[cfg(feature = "encoding")] + fn write_is_encoding_set(&mut self, val: bool) { + self.is_encoding_set = val; + } } /// A low level encoding-agnostic XML event reader. @@ -60,6 +74,7 @@ enum TagState { /// buf.clear(); /// } /// ``` +#[allow(clippy::struct_excessive_bools)] #[derive(Clone)] pub struct Reader { /// reader @@ -294,8 +309,8 @@ impl Reader { b'/' => self.read_end(bytes), b'?' => self.read_question_mark(bytes), _ => unreachable!( - "We checked that `start` must be one of [/?], was {:?} \ - instead.", + "We checked that `start` must be one of [/!?], was {:?} \ + instead.", start ), }, @@ -330,12 +345,12 @@ impl Reader { }; match self.opened_starts.pop() { Some(start) => { - if name != &self.opened_buffer[start..] { - let expected = &self.opened_buffer[start..]; - mismatch_err(expected, name, &mut self.buf_position) - } else { + if name == &self.opened_buffer[start..] { self.opened_buffer.truncate(start); Ok(Event::End(BytesEnd::borrowed(name))) + } else { + let expected = &self.opened_buffer[start..]; + mismatch_err(expected, name, &mut self.buf_position) } } None => mismatch_err(b"", &buf[1..], &mut self.buf_position), @@ -433,6 +448,7 @@ impl Reader { // TODO: do this directly when reading bufreader ... let len = buf.len(); let name_end = buf.iter().position(|&b| is_whitespace(b)).unwrap_or(len); + if let Some(&b'/') = buf.last() { let end = if name_end < len { name_end } else { len - 1 }; if self.expand_empty_elements { @@ -655,77 +671,6 @@ impl Reader { self.encoding } - /// Decodes a slice using the encoding specified in the XML declaration. - /// - /// Decode `bytes` with BOM sniffing and with malformed sequences replaced with the - /// `U+FFFD REPLACEMENT CHARACTER`. - /// - /// If no encoding is specified, defaults to UTF-8. - #[inline] - #[cfg(feature = "encoding")] - pub fn decode<'b, 'c>(&'b self, bytes: &'c [u8]) -> Cow<'c, str> { - self.encoding.decode(bytes).0 - } - - /// Decodes a UTF8 slice without BOM (Byte order mark) regardless of XML declaration. - /// - /// Decode `bytes` without BOM and with malformed sequences replaced with the - /// `U+FFFD REPLACEMENT CHARACTER`. - /// - /// # Note - /// - /// If you instead want to use XML declared encoding, use the `encoding` feature - #[inline] - #[cfg(not(feature = "encoding"))] - pub fn decode_without_bom<'c>(&self, bytes: &'c [u8]) -> Result<&'c str> { - if bytes.starts_with(b"\xEF\xBB\xBF") { - from_utf8(&bytes[3..]).map_err(Error::Utf8) - } else { - from_utf8(bytes).map_err(Error::Utf8) - } - } - - /// Decodes a slice using without BOM (Byte order mark) the encoding specified in the XML declaration. - /// - /// Decode `bytes` without BOM and with malformed sequences replaced with the - /// `U+FFFD REPLACEMENT CHARACTER`. - /// - /// If no encoding is specified, defaults to UTF-8. - #[inline] - #[cfg(feature = "encoding")] - pub fn decode_without_bom<'b, 'c>(&'b mut self, mut bytes: &'c [u8]) -> Cow<'c, str> { - if self.is_encoding_set { - return self.encoding.decode_with_bom_removal(bytes).0; - } - if bytes.starts_with(b"\xEF\xBB\xBF") { - self.is_encoding_set = true; - bytes = &bytes[3..]; - } else if bytes.starts_with(b"\xFF\xFE") { - self.is_encoding_set = true; - self.encoding = UTF_16LE; - bytes = &bytes[2..]; - } else if bytes.starts_with(b"\xFE\xFF") { - self.is_encoding_set = true; - self.encoding = UTF_16BE; - bytes = &bytes[3..]; - }; - self.encoding.decode_without_bom_handling(bytes).0 - } - - /// Decodes a UTF8 slice regardless of XML declaration. - /// - /// Decode `bytes` with BOM sniffing and with malformed sequences replaced with the - /// `U+FFFD REPLACEMENT CHARACTER`. - /// - /// # Note - /// - /// If you instead want to use XML declared encoding, use the `encoding` feature - #[inline] - #[cfg(not(feature = "encoding"))] - pub fn decode<'c>(&self, bytes: &'c [u8]) -> Result<&'c str> { - from_utf8(bytes).map_err(Error::Utf8) - } - /// Get utf8 decoder #[cfg(feature = "encoding")] pub fn decoder(&self) -> Decoder { @@ -1385,212 +1330,3 @@ impl<'a> BufferedInput<'a, 'a, ()> for &'a [u8] { return event; } } - -/// A function to check whether the byte is a whitespace (blank, new line, carriage return or tab) -#[inline] -pub(crate) fn is_whitespace(b: u8) -> bool { - match b { - b' ' | b'\r' | b'\n' | b'\t' => true, - _ => false, - } -} - -/// A namespace declaration. Can either bind a namespace to a prefix or define the current default -/// namespace. -#[derive(Debug, Clone)] -struct Namespace { - /// Index of the namespace in the buffer - start: usize, - /// Length of the prefix - /// * if bigger than start, then binds this namespace to the corresponding slice. - /// * else defines the current default namespace. - prefix_len: usize, - /// The namespace name (the URI) of this namespace declaration. - /// - /// The XML standard specifies that an empty namespace value 'removes' a namespace declaration - /// for the extent of its scope. For prefix declarations that's not very interesting, but it is - /// vital for default namespace declarations. With `xmlns=""` you can revert back to the default - /// behaviour of leaving unqualified element names unqualified. - value_len: usize, - /// Level of nesting at which this namespace was declared. The declaring element is included, - /// i.e., a declaration on the document root has `level = 1`. - /// This is used to pop the namespace when the element gets closed. - level: i32, -} - -impl Namespace { - /// Gets the value slice out of namespace buffer - /// - /// Returns `None` if `value_len == 0` - #[inline] - fn opt_value<'a, 'b>(&'a self, ns_buffer: &'b [u8]) -> Option<&'b [u8]> { - if self.value_len == 0 { - None - } else { - let start = self.start + self.prefix_len; - Some(&ns_buffer[start..start + self.value_len]) - } - } - - /// Check if the namespace matches the potentially qualified name - #[inline] - fn is_match(&self, ns_buffer: &[u8], qname: &[u8]) -> bool { - if self.prefix_len == 0 { - !qname.contains(&b':') - } else { - qname.get(self.prefix_len).map_or(false, |n| *n == b':') - && qname.starts_with(&ns_buffer[self.start..self.start + self.prefix_len]) - } - } -} - -/// A namespace management buffer. -/// -/// Holds all internal logic to push/pop namespaces with their levels. -#[derive(Debug, Default, Clone)] -struct NamespaceBufferIndex { - /// a buffer of namespace ranges - slices: Vec, - /// The number of open tags at the moment. We need to keep track of this to know which namespace - /// declarations to remove when we encounter an `End` event. - nesting_level: i32, - /// For `Empty` events keep the 'scope' of the element on the stack artificially. That way, the - /// consumer has a chance to use `resolve` in the context of the empty element. We perform the - /// pop as the first operation in the next `next()` call. - pending_pop: bool, -} - -impl NamespaceBufferIndex { - #[inline] - fn find_namespace_value<'a, 'b, 'c>( - &'a self, - element_name: &'b [u8], - buffer: &'c [u8], - ) -> Option<&'c [u8]> { - self.slices - .iter() - .rfind(|n| n.is_match(buffer, element_name)) - .and_then(|n| n.opt_value(buffer)) - } - - fn pop_empty_namespaces(&mut self, buffer: &mut Vec) { - if !self.pending_pop { - return; - } - self.pending_pop = false; - self.nesting_level -= 1; - let current_level = self.nesting_level; - // from the back (most deeply nested scope), look for the first scope that is still valid - match self.slices.iter().rposition(|n| n.level <= current_level) { - // none of the namespaces are valid, remove all of them - None => { - buffer.clear(); - self.slices.clear(); - } - // drop all namespaces past the last valid namespace - Some(last_valid_pos) => { - if let Some(len) = self.slices.get(last_valid_pos + 1).map(|n| n.start) { - buffer.truncate(len); - self.slices.truncate(last_valid_pos + 1); - } - } - } - } - - fn push_new_namespaces(&mut self, e: &BytesStart, buffer: &mut Vec) { - self.nesting_level += 1; - let level = self.nesting_level; - // adds new namespaces for attributes starting with 'xmlns:' and for the 'xmlns' - // (default namespace) attribute. - for a in e.attributes().with_checks(false) { - if let Ok(Attribute { key: k, value: v }) = a { - if k.starts_with(b"xmlns") { - match k.get(5) { - None => { - let start = buffer.len(); - buffer.extend_from_slice(&*v); - self.slices.push(Namespace { - start, - prefix_len: 0, - value_len: v.len(), - level, - }); - } - Some(&b':') => { - let start = buffer.len(); - buffer.extend_from_slice(&k[6..]); - buffer.extend_from_slice(&*v); - self.slices.push(Namespace { - start, - prefix_len: k.len() - 6, - value_len: v.len(), - level, - }); - } - _ => break, - } - } - } else { - break; - } - } - } - - /// Resolves a potentially qualified **attribute name** into (namespace name, local name). - /// - /// *Qualified* attribute names have the form `prefix:local-name` where the`prefix` is defined - /// on any containing XML element via `xmlns:prefix="the:namespace:uri"`. The namespace prefix - /// can be defined on the same element as the attribute in question. - /// - /// *Unqualified* attribute names do *not* inherit the current *default namespace*. - #[inline] - fn resolve_namespace<'a, 'b, 'c>( - &'a self, - qname: &'b [u8], - buffer: &'c [u8], - use_default: bool, - ) -> (Option<&'c [u8]>, &'b [u8]) { - self.slices - .iter() - .rfind(|n| n.is_match(buffer, qname)) - .map_or((None, qname), |n| { - let len = n.prefix_len; - if len > 0 { - (n.opt_value(buffer), &qname[len + 1..]) - } else if use_default { - (n.opt_value(buffer), qname) - } else { - (None, qname) - } - }) - } -} - -/// Utf8 Decoder -#[cfg(not(feature = "encoding"))] -#[derive(Clone, Copy, Debug)] -pub struct Decoder; - -/// Utf8 Decoder -#[cfg(feature = "encoding")] -#[derive(Clone, Copy, Debug)] -pub struct Decoder { - encoding: &'static Encoding, -} - -impl Decoder { - #[cfg(not(feature = "encoding"))] - pub fn decode<'c>(&self, bytes: &'c [u8]) -> Result<&'c str> { - from_utf8(bytes).map_err(Error::Utf8) - } - - #[cfg(not(feature = "encoding"))] - pub fn decode_owned<'c>(&self, bytes: Vec) -> Result { - String::from_utf8(bytes).map_err(|e| Error::Utf8(e.utf8_error())) - } - - #[cfg(feature = "encoding")] - pub fn decode<'c>(&self, bytes: &'c [u8]) -> Cow<'c, str> { - self.encoding.decode(bytes).0 - } -} diff --git a/src/writer.rs b/src/writer.rs index 04ed699d..02336894 100644 --- a/src/writer.rs +++ b/src/writer.rs @@ -10,8 +10,9 @@ use std::io::Write; /// /// # Examples /// -/// ```rust -/// # fn main() { +/// ```ignore +/// extern crate quick_xml; +/// fn main() { /// use quick_xml::{Reader, Writer}; /// use quick_xml::events::{Event, BytesEnd, BytesStart}; /// use std::io::Cursor; @@ -25,7 +26,7 @@ use std::io::Write; /// match reader.read_event(&mut buf) { /// Ok(Event::Start(ref e)) if e.name() == b"this_tag" => { /// -/// // crates a new element ... alternatively we could reuse `e` by calling +/// // creates a new element ... alternatively we could reuse `e` by calling /// // `e.into_owned()` /// let mut elem = BytesStart::owned(b"my_elem".to_vec(), "my_elem".len()); /// @@ -52,7 +53,7 @@ use std::io::Write; /// let result = writer.into_inner().into_inner(); /// let expected = r#"text"#; /// assert_eq!(result, expected.as_bytes()); -/// # } +/// } /// ``` #[derive(Clone)] pub struct Writer { diff --git a/tests/test.rs b/tests/test.rs index 09f15a87..76b31683 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -2,11 +2,15 @@ extern crate quick_xml; #[cfg(feature = "serialize")] extern crate serde; -use quick_xml::{events::attributes::Attribute, events::Event::*, Error, Reader}; -use std::{borrow::Cow, io::Cursor}; - +use quick_xml::events::attributes::Attribute; +use quick_xml::events::Event::*; +use quick_xml::Reader; #[cfg(feature = "serialize")] use serde::{Deserialize, Serialize}; +use std::borrow::Cow; +use std::io::Cursor; +#[cfg(feature = "asynchronous")] +use tokio::runtime::Runtime; #[test] fn test_sample() { @@ -14,8 +18,45 @@ fn test_sample() { let mut buf = Vec::new(); let mut r = Reader::from_reader(src); let mut count = 0; + + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + loop { - match r.read_event(&mut buf).unwrap() { + #[cfg(not(feature = "asynchronous"))] + let event = r.read_event(&mut buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { r.read_event(&mut buf).await }); + + match event.unwrap() { + Start(_) => count += 1, + Decl(e) => println!("{:?}", e.version()), + Eof => break, + _ => (), + } + buf.clear(); + } + println!("{}", count); +} + +#[test] +fn test_sample_async() { + let src: &[u8] = include_bytes!("sample_rss.xml"); + let mut buf = Vec::new(); + let mut r = Reader::from_reader(src); + let mut count = 0; + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + + loop { + #[cfg(not(feature = "asynchronous"))] + let event = r.read_event(&mut buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { r.read_event(&mut buf).await }); + + match event.unwrap() { Start(_) => count += 1, Decl(e) => println!("{:?}", e.version()), Eof => break, @@ -32,7 +73,16 @@ fn test_attributes_empty() { let mut r = Reader::from_reader(src as &[u8]); r.trim_text(true).expand_empty_elements(false); let mut buf = Vec::new(); - match r.read_event(&mut buf) { + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + + #[cfg(not(feature = "asynchronous"))] + let event = r.read_event(&mut buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { r.read_event(&mut buf).await }); + + match event { Ok(Empty(e)) => { let mut atts = e.attributes(); match atts.next() { @@ -58,13 +108,23 @@ fn test_attributes_empty() { } } +#[cfg(not(feature = "asynchronous"))] #[test] fn test_attribute_equal() { let src = b""; let mut r = Reader::from_reader(src as &[u8]); r.trim_text(true).expand_empty_elements(false); let mut buf = Vec::new(); - match r.read_event(&mut buf) { + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + + #[cfg(not(feature = "asynchronous"))] + let event = r.read_event(&mut buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { r.read_event(&mut buf).await }); + + match event { Ok(Empty(e)) => { let mut atts = e.attributes(); match atts.next() { @@ -83,14 +143,24 @@ fn test_attribute_equal() { } } +#[cfg(not(feature = "asynchronous"))] #[test] fn test_comment_starting_with_gt() { let src = b"-->"; let mut r = Reader::from_reader(src as &[u8]); r.trim_text(true).expand_empty_elements(false); let mut buf = Vec::new(); + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + loop { - match r.read_event(&mut buf) { + #[cfg(not(feature = "asynchronous"))] + let event = r.read_event(&mut buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { r.read_event(&mut buf).await }); + + match event { Ok(Comment(ref e)) if &**e == b">" => break, Ok(Eof) => panic!("Expecting Comment"), _ => (), @@ -109,8 +179,16 @@ fn test_attributes_empty_ns() { r.trim_text(true).expand_empty_elements(false); let mut buf = Vec::new(); let mut ns_buf = Vec::new(); + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + + #[cfg(not(feature = "asynchronous"))] + let event = r.read_namespaced_event(&mut buf, &mut ns_buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { r.read_namespaced_event(&mut buf, &mut ns_buf).await }); - let e = match r.read_namespaced_event(&mut buf, &mut ns_buf) { + let e = match event { Ok((None, Empty(e))) => e, e => panic!("Expecting Empty event, got {:?}", e), }; @@ -146,6 +224,7 @@ fn test_attributes_empty_ns() { /// Single empty element with qualified attributes. /// Empty element expansion: enabled /// The code path for namespace handling is slightly different for `Empty` vs. `Start+End`. +#[cfg(not(feature = "asynchronous"))] #[test] fn test_attributes_empty_ns_expanded() { let src = b""; @@ -154,8 +233,18 @@ fn test_attributes_empty_ns_expanded() { r.trim_text(true).expand_empty_elements(true); let mut buf = Vec::new(); let mut ns_buf = Vec::new(); + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + { - let e = match r.read_namespaced_event(&mut buf, &mut ns_buf) { + #[cfg(not(feature = "asynchronous"))] + let event = r.read_namespaced_event(&mut buf, &mut ns_buf); + + #[cfg(feature = "asynchronous")] + let event = + runtime.block_on(async { r.read_namespaced_event(&mut buf, &mut ns_buf).await }); + + let e = match event { Ok((None, Start(e))) => e, e => panic!("Expecting Empty event, got {:?}", e), }; @@ -188,7 +277,13 @@ fn test_attributes_empty_ns_expanded() { } } - match r.read_namespaced_event(&mut buf, &mut ns_buf) { + #[cfg(not(feature = "asynchronous"))] + let event = r.read_namespaced_event(&mut buf, &mut ns_buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { r.read_namespaced_event(&mut buf, &mut ns_buf).await }); + + match event { Ok((None, End(e))) => assert_eq!(b"a", e.name()), e => panic!("Expecting End event, got {:?}", e), } @@ -202,10 +297,19 @@ fn test_default_ns_shadowing_empty() { r.trim_text(true).expand_empty_elements(false); let mut buf = Vec::new(); let mut ns_buf = Vec::new(); + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); // { - match r.read_namespaced_event(&mut buf, &mut ns_buf) { + #[cfg(not(feature = "asynchronous"))] + let event = r.read_namespaced_event(&mut buf, &mut ns_buf); + + #[cfg(feature = "asynchronous")] + let event = + runtime.block_on(async { r.read_namespaced_event(&mut buf, &mut ns_buf).await }); + + match event { Ok((Some(ns), Start(e))) => { assert_eq!(&ns[..], b"urn:example:o"); assert_eq!(e.name(), b"e"); @@ -216,7 +320,14 @@ fn test_default_ns_shadowing_empty() { // { - let e = match r.read_namespaced_event(&mut buf, &mut ns_buf) { + #[cfg(not(feature = "asynchronous"))] + let event = r.read_namespaced_event(&mut buf, &mut ns_buf); + + #[cfg(feature = "asynchronous")] + let event = + runtime.block_on(async { r.read_namespaced_event(&mut buf, &mut ns_buf).await }); + + let e = match event { Ok((Some(ns), Empty(e))) => { assert_eq!(::std::str::from_utf8(ns).unwrap(), "urn:example:i"); assert_eq!(e.name(), b"e"); @@ -247,7 +358,14 @@ fn test_default_ns_shadowing_empty() { } // - match r.read_namespaced_event(&mut buf, &mut ns_buf) { + + #[cfg(not(feature = "asynchronous"))] + let event = r.read_namespaced_event(&mut buf, &mut ns_buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { r.read_namespaced_event(&mut buf, &mut ns_buf).await }); + + match event { Ok((Some(ns), End(e))) => { assert_eq!(&ns[..], b"urn:example:o"); assert_eq!(e.name(), b"e"); @@ -256,6 +374,7 @@ fn test_default_ns_shadowing_empty() { } } +#[cfg(not(feature = "asynchronous"))] #[test] fn test_default_ns_shadowing_expanded() { let src = b""; @@ -264,10 +383,19 @@ fn test_default_ns_shadowing_expanded() { r.trim_text(true).expand_empty_elements(true); let mut buf = Vec::new(); let mut ns_buf = Vec::new(); + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); // { - match r.read_namespaced_event(&mut buf, &mut ns_buf) { + #[cfg(not(feature = "asynchronous"))] + let event = r.read_namespaced_event(&mut buf, &mut ns_buf); + + #[cfg(feature = "asynchronous")] + let event = + runtime.block_on(async { r.read_namespaced_event(&mut buf, &mut ns_buf).await }); + + match event { Ok((Some(ns), Start(e))) => { assert_eq!(&ns[..], b"urn:example:o"); assert_eq!(e.name(), b"e"); @@ -279,7 +407,14 @@ fn test_default_ns_shadowing_expanded() { // { - let e = match r.read_namespaced_event(&mut buf, &mut ns_buf) { + #[cfg(not(feature = "asynchronous"))] + let event = r.read_namespaced_event(&mut buf, &mut ns_buf); + + #[cfg(feature = "asynchronous")] + let event = + runtime.block_on(async { r.read_namespaced_event(&mut buf, &mut ns_buf).await }); + + let e = match event { Ok((Some(ns), Start(e))) => { assert_eq!(&ns[..], b"urn:example:i"); assert_eq!(e.name(), b"e"); @@ -309,15 +444,28 @@ fn test_default_ns_shadowing_expanded() { } // virtual - match r.read_namespaced_event(&mut buf, &mut ns_buf) { + #[cfg(not(feature = "asynchronous"))] + let event = r.read_namespaced_event(&mut buf, &mut ns_buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { r.read_namespaced_event(&mut buf, &mut ns_buf).await }); + + match event { Ok((Some(ns), End(e))) => { assert_eq!(&ns[..], b"urn:example:i"); assert_eq!(e.name(), b"e"); } e => panic!("Expected End event (), got {:?}", e), } + // - match r.read_namespaced_event(&mut buf, &mut ns_buf) { + #[cfg(not(feature = "asynchronous"))] + let event = r.read_namespaced_event(&mut buf, &mut ns_buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { r.read_namespaced_event(&mut buf, &mut ns_buf).await }); + + match event { Ok((Some(ns), End(e))) => { assert_eq!(&ns[..], b"urn:example:o"); assert_eq!(e.name(), b"e"); @@ -333,8 +481,17 @@ fn test_koi8_r_encoding() { let mut r = Reader::from_reader(src as &[u8]); r.trim_text(true).expand_empty_elements(false); let mut buf = Vec::new(); + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + loop { - match r.read_event(&mut buf) { + #[cfg(not(feature = "asynchronous"))] + let event = r.read_event(&mut buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { r.read_event(&mut buf).await }); + + match event { Ok(Text(e)) => { e.unescape_and_decode(&r).unwrap(); } @@ -352,14 +509,24 @@ fn fuzz_53() { let cursor = Cursor::new(data); let mut reader = Reader::from_reader(cursor); let mut buf = vec![]; + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + loop { - match reader.read_event(&mut buf) { + #[cfg(not(feature = "asynchronous"))] + let event = reader.read_event(&mut buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { reader.read_event(&mut buf).await }); + + match event { Ok(quick_xml::events::Event::Eof) | Err(..) => break, _ => buf.clear(), } } } +#[cfg(not(feature = "asynchronous"))] #[test] fn test_issue94() { let data = br#" @@ -368,8 +535,17 @@ fn test_issue94() { let mut reader = Reader::from_reader(&data[..]); reader.trim_text(true); let mut buf = vec![]; + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + loop { - match reader.read_event(&mut buf) { + #[cfg(not(feature = "asynchronous"))] + let event = reader.read_event(&mut buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { reader.read_event(&mut buf).await }); + + match event { Ok(quick_xml::events::Event::Eof) | Err(..) => break, _ => buf.clear(), } @@ -385,8 +561,17 @@ fn fuzz_101() { let cursor = Cursor::new(data); let mut reader = Reader::from_reader(cursor); let mut buf = vec![]; + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + loop { - match reader.read_event(&mut buf) { + #[cfg(not(feature = "asynchronous"))] + let event = reader.read_event(&mut buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { reader.read_event(&mut buf).await }); + + match event { Ok(Start(ref e)) | Ok(Empty(ref e)) => { if e.unescaped().is_err() { break; @@ -417,14 +602,30 @@ fn test_default_namespace() { // let mut buf = Vec::new(); let mut ns_buf = Vec::new(); - if let Ok((None, Start(_))) = r.read_namespaced_event(&mut buf, &mut ns_buf) { + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + + #[cfg(not(feature = "asynchronous"))] + let event = r.read_namespaced_event(&mut buf, &mut ns_buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { r.read_namespaced_event(&mut buf, &mut ns_buf).await }); + + if let Ok((None, Start(_))) = event { } else { panic!("expecting outer start element with no namespace"); } // { - let event = match r.read_namespaced_event(&mut buf, &mut ns_buf) { + #[cfg(not(feature = "asynchronous"))] + let event = r.read_namespaced_event(&mut buf, &mut ns_buf); + + #[cfg(feature = "asynchronous")] + let event = + runtime.block_on(async { r.read_namespaced_event(&mut buf, &mut ns_buf).await }); + + let event = match event { Ok((Some(b"www1"), Start(event))) => event, Ok((Some(_), Start(_))) => panic!("expecting namespace to resolve to 'www1'"), _ => panic!("expecting namespace resolution"), @@ -442,7 +643,13 @@ fn test_default_namespace() { } // - match r.read_namespaced_event(&mut buf, &mut ns_buf) { + #[cfg(not(feature = "asynchronous"))] + let event = r.read_namespaced_event(&mut buf, &mut ns_buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { r.read_namespaced_event(&mut buf, &mut ns_buf).await }); + + match event { Ok((Some(b"www1"), End(_))) => (), Ok((Some(_), End(_))) => panic!("expecting namespace to resolve to 'www1'"), _ => panic!("expecting namespace resolution"), @@ -450,7 +657,14 @@ fn test_default_namespace() { // very important: a should not be in any namespace. The default namespace only applies to // the sub-document it is defined on. - if let Ok((None, End(_))) = r.read_namespaced_event(&mut buf, &mut ns_buf) { + + #[cfg(not(feature = "asynchronous"))] + let event = r.read_namespaced_event(&mut buf, &mut ns_buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { r.read_namespaced_event(&mut buf, &mut ns_buf).await }); + + if let Ok((None, End(_))) = event { } else { panic!("expecting outer end element with no namespace"); } diff --git a/tests/unit_tests.rs b/tests/unit_tests.rs index 3515666a..07491972 100644 --- a/tests/unit_tests.rs +++ b/tests/unit_tests.rs @@ -1,13 +1,28 @@ use std::io::Cursor; use std::str::from_utf8; +use quick_xml::events::Event::*; use quick_xml::events::{BytesDecl, BytesEnd, BytesStart, BytesText, Event}; -use quick_xml::{events::Event::*, Reader, Result, Writer}; +use quick_xml::{Reader, Result, Writer}; +use std::io::Cursor; +use std::str::from_utf8; +#[cfg(feature = "asynchronous")] +use tokio::runtime::Runtime; macro_rules! next_eq_name { ($r:expr, $t:tt, $bytes:expr) => { let mut buf = Vec::new(); - match $r.read_event(&mut buf).unwrap() { + + #[cfg(not(feature = "asynchronous"))] + let event = $r.read_event(&mut buf); + + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { $r.read_event(&mut buf).await }); + + match event.unwrap() { $t(ref e) if e.name() == $bytes => (), e => panic!( "expecting {}({:?}), found {:?}", @@ -23,7 +38,17 @@ macro_rules! next_eq_name { macro_rules! next_eq_content { ($r:expr, $t:tt, $bytes:expr) => { let mut buf = Vec::new(); - match $r.read_event(&mut buf).unwrap() { + + #[cfg(not(feature = "asynchronous"))] + let event = $r.read_event(&mut buf); + + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { $r.read_event(&mut buf).await }); + + match event.unwrap() { $t(ref e) if &**e == $bytes => (), e => panic!( "expecting {}({:?}), found {:?}", @@ -124,7 +149,17 @@ fn test_xml_decl() { let mut r = Reader::from_str(""); r.trim_text(true); let mut buf = Vec::new(); - match r.read_event(&mut buf).unwrap() { + + #[cfg(not(feature = "asynchronous"))] + let event = r.read_event(&mut buf); + + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { r.read_event(&mut buf).await }); + + match event.unwrap() { Decl(ref e) => { match e.version() { Ok(v) => assert_eq!( @@ -204,10 +239,21 @@ fn test_writer() -> Result<()> { reader.trim_text(true); let mut writer = Writer::new(Cursor::new(Vec::new())); let mut buf = Vec::new(); + + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + loop { - match reader.read_event(&mut buf)? { - Eof => break, - e => assert!(writer.write_event(e).is_ok()), + #[cfg(not(feature = "asynchronous"))] + let event = reader.read_event(&mut buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { reader.read_event(&mut buf).await }); + + match event { + Ok(Eof) => break, + Ok(e) => assert!(writer.write_event(e).is_ok()), + Err(e) => panic!("{}", e), } } @@ -223,10 +269,21 @@ fn test_writer_borrow() -> Result<()> { reader.trim_text(true); let mut writer = Writer::new(Cursor::new(Vec::new())); let mut buf = Vec::new(); + + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + loop { - match reader.read_event(&mut buf)? { - Eof => break, - e => assert!(writer.write_event(&e).is_ok()), // either `e` or `&e` + #[cfg(not(feature = "asynchronous"))] + let event = reader.read_event(&mut buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { reader.read_event(&mut buf).await }); + + match event { + Ok(Eof) => break, + Ok(e) => assert!(writer.write_event(&e).is_ok()), // either `e` or `&e` + Err(e) => panic!("{}", e), } } @@ -246,15 +303,25 @@ fn test_writer_indent() -> Result<()> { reader.trim_text(true); let mut writer = Writer::new_with_indent(Cursor::new(Vec::new()), b' ', 4); let mut buf = Vec::new(); + + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + loop { - match reader.read_event(&mut buf)? { - Eof => break, - e => assert!(writer.write_event(e).is_ok()), + #[cfg(not(feature = "asynchronous"))] + let event = reader.read_event(&mut buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { reader.read_event(&mut buf).await }); + + match event { + Ok(Eof) => break, + Ok(e) => assert!(writer.write_event(e).is_ok()), + Err(e) => panic!("{}", e), } } let result = writer.into_inner().into_inner(); - // println!("{:?}", String::from_utf8_lossy(&result)); #[cfg(windows)] assert!(result.into_iter().eq(txt.bytes().filter(|b| *b != 13))); @@ -272,10 +339,21 @@ fn test_writer_indent_cdata() -> Result<()> { reader.trim_text(true); let mut writer = Writer::new_with_indent(Cursor::new(Vec::new()), b' ', 4); let mut buf = Vec::new(); + + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + loop { - match reader.read_event(&mut buf)? { - Eof => break, - e => assert!(writer.write_event(e).is_ok()), + #[cfg(not(feature = "asynchronous"))] + let event = reader.read_event(&mut buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { reader.read_event(&mut buf).await }); + + match event { + Ok(Eof) => break, + Ok(e) => assert!(writer.write_event(e).is_ok()), + Err(e) => panic!("{}", e), } } @@ -298,10 +376,21 @@ fn test_write_empty_element_attrs() -> Result<()> { reader.expand_empty_elements(false); let mut writer = Writer::new(Cursor::new(Vec::new())); let mut buf = Vec::new(); + + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + loop { - match reader.read_event(&mut buf)? { - Eof => break, - e => assert!(writer.write_event(e).is_ok()), + #[cfg(not(feature = "asynchronous"))] + let event = reader.read_event(&mut buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { reader.read_event(&mut buf).await }); + + match event { + Ok(Eof) => break, + Ok(e) => assert!(writer.write_event(e).is_ok()), + Err(e) => panic!("{}", e), } } @@ -318,19 +407,30 @@ fn test_write_attrs() -> Result<()> { reader.trim_text(true); let mut writer = Writer::new(Cursor::new(Vec::new())); let mut buf = Vec::new(); + + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + loop { - let event = match reader.read_event(&mut buf)? { - Eof => break, - Start(elem) => { - let mut attrs = elem.attributes().collect::>>()?; + #[cfg(not(feature = "asynchronous"))] + let ev = reader.read_event(&mut buf); + + #[cfg(feature = "asynchronous")] + let ev = runtime.block_on(async { reader.read_event(&mut buf).await }); + + let event = match ev { + Ok(Eof) => break, + Ok(Start(elem)) => { + let mut attrs = elem.attributes().collect::>>().unwrap(); attrs.extend_from_slice(&[("a", "b").into(), ("c", "d").into()]); let mut elem = BytesStart::owned(b"copy".to_vec(), 4); elem.extend_attributes(attrs); elem.push_attribute(("x", "y\"z")); Start(elem) } - End(_) => End(BytesEnd::borrowed(b"copy")), - e => e, + Ok(End(_)) => End(BytesEnd::borrowed(b"copy")), + Ok(e) => e, + Err(e) => panic!("{}", e), }; assert!(writer.write_event(event).is_ok()); } @@ -425,7 +525,17 @@ fn test_buf_position_err_end_element() { r.trim_text(true).check_end_names(true); let mut buf = Vec::new(); - match r.read_event(&mut buf) { + + #[cfg(not(feature = "asynchronous"))] + let event = r.read_event(&mut buf); + + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { r.read_event(&mut buf).await }); + + match event { Err(_) if r.buffer_position() == 2 => (), // error at char 2: no opening tag Err(e) => panic!( "expecting buf_pos = 2, found {}, err: {:?}", @@ -445,7 +555,17 @@ fn test_buf_position_err_comment() { assert_eq!(r.buffer_position(), 3); let mut buf = Vec::new(); - match r.read_event(&mut buf) { + + #[cfg(not(feature = "asynchronous"))] + let event = r.read_event(&mut buf); + + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { r.read_event(&mut buf).await }); + + match event { Err(_) if r.buffer_position() == 4 => { // error at char 5: no closing --> tag found assert!(true); @@ -464,18 +584,34 @@ fn test_buf_position_err_comment_2_buf() { let mut r = Reader::from_str(" tag found assert!(true); } Err(e) => panic!( - "expecting buf_pos = 5, found {}, err: {:?}", + "expecting buf_pos = 4, found {}, err: {:?}", r.buffer_position(), e ), @@ -492,7 +628,17 @@ fn test_buf_position_err_comment_trim_text() { assert_eq!(r.buffer_position(), 3); let mut buf = Vec::new(); - match r.read_event(&mut buf) { + + #[cfg(not(feature = "asynchronous"))] + let event = r.read_event(&mut buf); + + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { r.read_event(&mut buf).await }); + + match event { Err(_) if r.buffer_position() == 7 => { // error at char 5: no closing --> tag found assert!(true); @@ -513,12 +659,28 @@ fn test_namespace() { let mut buf = Vec::new(); let mut ns_buf = Vec::new(); - if let Ok((None, Start(_))) = r.read_namespaced_event(&mut buf, &mut ns_buf) { + + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + + #[cfg(not(feature = "asynchronous"))] + let event = r.read_namespaced_event(&mut buf, &mut ns_buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { r.read_namespaced_event(&mut buf, &mut ns_buf).await }); + + if let Ok((None, Start(_))) = event { } else { assert!(false, "expecting start element with no namespace"); } - if let Ok((Some(a), Start(_))) = r.read_namespaced_event(&mut buf, &mut ns_buf) { + #[cfg(not(feature = "asynchronous"))] + let event = r.read_namespaced_event(&mut buf, &mut ns_buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { r.read_namespaced_event(&mut buf, &mut ns_buf).await }); + + if let Ok((Some(a), Start(_))) = event { if &*a == b"www1" { assert!(true); } else { @@ -534,16 +696,32 @@ fn test_default_namespace() { let mut r = Reader::from_str(""); r.trim_text(true); - // let mut buf = Vec::new(); let mut ns_buf = Vec::new(); - if let Ok((None, Start(_))) = r.read_namespaced_event(&mut buf, &mut ns_buf) { + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + + // + #[cfg(not(feature = "asynchronous"))] + let event = r.read_namespaced_event(&mut buf, &mut ns_buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { r.read_namespaced_event(&mut buf, &mut ns_buf).await }); + + if let Ok((None, Start(_))) = event { } else { assert!(false, "expecting outer start element with no namespace"); } // - if let Ok((Some(a), Start(_))) = r.read_namespaced_event(&mut buf, &mut ns_buf) { + + #[cfg(not(feature = "asynchronous"))] + let event = r.read_namespaced_event(&mut buf, &mut ns_buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { r.read_namespaced_event(&mut buf, &mut ns_buf).await }); + + if let Ok((Some(a), Start(_))) = event { if &*a == b"www1" { assert!(true); } else { @@ -554,7 +732,14 @@ fn test_default_namespace() { } // - if let Ok((Some(a), End(_))) = r.read_namespaced_event(&mut buf, &mut ns_buf) { + + #[cfg(not(feature = "asynchronous"))] + let event = r.read_namespaced_event(&mut buf, &mut ns_buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { r.read_namespaced_event(&mut buf, &mut ns_buf).await }); + + if let Ok((Some(a), End(_))) = event { if &*a == b"www1" { assert!(true); } else { @@ -566,7 +751,14 @@ fn test_default_namespace() { // very important: a should not be in any namespace. The default namespace only applies to // the sub-document it is defined on. - if let Ok((None, End(_))) = r.read_namespaced_event(&mut buf, &mut ns_buf) { + + #[cfg(not(feature = "asynchronous"))] + let event = r.read_namespaced_event(&mut buf, &mut ns_buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { r.read_namespaced_event(&mut buf, &mut ns_buf).await }); + + if let Ok((None, End(_))) = event { } else { assert!(false, "expecting outer end element with no namespace"); } @@ -579,7 +771,16 @@ fn test_default_namespace_reset() { let mut buf = Vec::new(); let mut ns_buf = Vec::new(); - if let Ok((Some(a), Start(_))) = r.read_namespaced_event(&mut buf, &mut ns_buf) { + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + + #[cfg(not(feature = "asynchronous"))] + let event = r.read_namespaced_event(&mut buf, &mut ns_buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { r.read_namespaced_event(&mut buf, &mut ns_buf).await }); + + if let Ok((Some(a), Start(_))) = event { assert_eq!( &a[..], b"www1", @@ -589,16 +790,35 @@ fn test_default_namespace_reset() { panic!("expecting outer start element with to resolve to 'www1'"); } - match r.read_namespaced_event(&mut buf, &mut ns_buf) { + #[cfg(not(feature = "asynchronous"))] + let event = r.read_namespaced_event(&mut buf, &mut ns_buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { r.read_namespaced_event(&mut buf, &mut ns_buf).await }); + + match event { Ok((None, Start(_))) => (), e => panic!("expecting inner start element, got {:?}", e), } - if let Ok((None, End(_))) = r.read_namespaced_event(&mut buf, &mut ns_buf) { + + #[cfg(not(feature = "asynchronous"))] + let event = r.read_namespaced_event(&mut buf, &mut ns_buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { r.read_namespaced_event(&mut buf, &mut ns_buf).await }); + + if let Ok((None, End(_))) = event { } else { assert!(false, "expecting inner end element"); } - if let Ok((Some(a), End(_))) = r.read_namespaced_event(&mut buf, &mut ns_buf) { + #[cfg(not(feature = "asynchronous"))] + let event = r.read_namespaced_event(&mut buf, &mut ns_buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { r.read_namespaced_event(&mut buf, &mut ns_buf).await }); + + if let Ok((Some(a), End(_))) = event { assert_eq!( &a[..], b"www1", @@ -615,7 +835,17 @@ fn test_escaped_content() { r.trim_text(true); next_eq!(r, Start, b"a"); let mut buf = Vec::new(); - match r.read_event(&mut buf) { + + #[cfg(not(feature = "asynchronous"))] + let event = r.read_event(&mut buf); + + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { r.read_event(&mut buf).await }); + + match event { Ok(Text(e)) => { if &*e != b"<test>" { panic!( @@ -664,10 +894,20 @@ fn test_read_write_roundtrip_results_in_identity() -> Result<()> { reader.trim_text(false).expand_empty_elements(false); let mut writer = Writer::new(Cursor::new(Vec::new())); let mut buf = Vec::new(); + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + loop { - match reader.read_event(&mut buf)? { - Eof => break, - e => assert!(writer.write_event(e).is_ok()), + #[cfg(not(feature = "asynchronous"))] + let event = reader.read_event(&mut buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { reader.read_event(&mut buf).await }); + + match event { + Ok(Eof) => break, + Ok(e) => assert!(writer.write_event(e).is_ok()), + Err(e) => panic!("{}", e), } } @@ -691,10 +931,20 @@ fn test_read_write_roundtrip() -> Result<()> { reader.trim_text(false).expand_empty_elements(false); let mut writer = Writer::new(Cursor::new(Vec::new())); let mut buf = Vec::new(); + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + loop { - match reader.read_event(&mut buf)? { - Eof => break, - e => assert!(writer.write_event(e).is_ok()), + #[cfg(not(feature = "asynchronous"))] + let event = reader.read_event(&mut buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { reader.read_event(&mut buf).await }); + + match event { + Ok(Eof) => break, + Ok(e) => assert!(writer.write_event(e).is_ok()), + Err(e) => panic!("{}", e), } } @@ -718,16 +968,26 @@ fn test_read_write_roundtrip_escape() -> Result<()> { reader.trim_text(false).expand_empty_elements(false); let mut writer = Writer::new(Cursor::new(Vec::new())); let mut buf = Vec::new(); + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + loop { - match reader.read_event(&mut buf)? { - Eof => break, - Text(e) => { + #[cfg(not(feature = "asynchronous"))] + let event = reader.read_event(&mut buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { reader.read_event(&mut buf).await }); + + match event { + Ok(Eof) => break, + Ok(Text(e)) => { let t = e.escaped(); assert!(writer .write_event(Event::Text(BytesText::from_escaped(t.to_vec()))) .is_ok()); } - e => assert!(writer.write_event(e).is_ok()), + Ok(e) => assert!(writer.write_event(e).is_ok()), + Err(e) => panic!("{}", e), } } @@ -751,16 +1011,26 @@ fn test_read_write_roundtrip_escape_text() -> Result<()> { reader.trim_text(false).expand_empty_elements(false); let mut writer = Writer::new(Cursor::new(Vec::new())); let mut buf = Vec::new(); + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + loop { - match reader.read_event(&mut buf)? { - Eof => break, - Text(e) => { + #[cfg(not(feature = "asynchronous"))] + let event = reader.read_event(&mut buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { reader.read_event(&mut buf).await }); + + match event { + Ok(Eof) => break, + Ok(Text(e)) => { let t = e.unescape_and_decode(&reader).unwrap(); assert!(writer .write_event(Event::Text(BytesText::from_plain_str(&t))) .is_ok()); } - e => assert!(writer.write_event(e).is_ok()), + Ok(e) => assert!(writer.write_event(e).is_ok()), + Err(e) => panic!("{}", e), } } @@ -774,7 +1044,17 @@ fn test_closing_bracket_in_single_quote_attr() { let mut r = Reader::from_str(""); r.trim_text(true); let mut buf = Vec::new(); - match r.read_event(&mut buf) { + + #[cfg(not(feature = "asynchronous"))] + let event = r.read_event(&mut buf); + + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { r.read_event(&mut buf).await }); + + match event { Ok(Start(e)) => { let mut attrs = e.attributes(); match attrs.next() { @@ -797,7 +1077,17 @@ fn test_closing_bracket_in_double_quote_attr() { let mut r = Reader::from_str("\" check=\"2\">"); r.trim_text(true); let mut buf = Vec::new(); - match r.read_event(&mut buf) { + + #[cfg(not(feature = "asynchronous"))] + let event = r.read_event(&mut buf); + + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { r.read_event(&mut buf).await }); + + match event { Ok(Start(e)) => { let mut attrs = e.attributes(); match attrs.next() { @@ -820,7 +1110,17 @@ fn test_closing_bracket_in_double_quote_mixed() { let mut r = Reader::from_str("'\" check=\"'2'\">"); r.trim_text(true); let mut buf = Vec::new(); - match r.read_event(&mut buf) { + + #[cfg(not(feature = "asynchronous"))] + let event = r.read_event(&mut buf); + + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { r.read_event(&mut buf).await }); + + match event { Ok(Start(e)) => { let mut attrs = e.attributes(); match attrs.next() { @@ -843,7 +1143,17 @@ fn test_closing_bracket_in_single_quote_mixed() { let mut r = Reader::from_str(""); r.trim_text(true); let mut buf = Vec::new(); - match r.read_event(&mut buf) { + + #[cfg(not(feature = "asynchronous"))] + let event = r.read_event(&mut buf); + + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { r.read_event(&mut buf).await }); + + match event { Ok(Start(e)) => { let mut attrs = e.attributes(); match attrs.next() { @@ -871,9 +1181,17 @@ fn test_unescape_and_decode_without_bom_removes_utf8_bom() { let mut txt = Vec::new(); let mut buf = Vec::new(); + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); loop { - match reader.read_event(&mut buf) { + #[cfg(not(feature = "asynchronous"))] + let event = reader.read_event(&mut buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { reader.read_event(&mut buf).await }); + + match event { Ok(Event::Text(e)) => txt.push(e.unescape_and_decode_without_bom(&reader).unwrap()), Ok(Event::Eof) => break, _ => (), @@ -890,9 +1208,17 @@ fn test_unescape_and_decode_without_bom_removes_utf16be_bom() { let mut txt = Vec::new(); let mut buf = Vec::new(); + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); loop { - match reader.read_event(&mut buf) { + #[cfg(not(feature = "asynchronous"))] + let event = reader.read_event(&mut buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { reader.read_event(&mut buf).await }); + + match event { Ok(Event::Text(e)) => txt.push(e.unescape_and_decode_without_bom(&mut reader).unwrap()), Ok(Event::Eof) => break, _ => (), @@ -909,9 +1235,17 @@ fn test_unescape_and_decode_without_bom_removes_utf16le_bom() { let mut txt = Vec::new(); let mut buf = Vec::new(); + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); loop { - match reader.read_event(&mut buf) { + #[cfg(not(feature = "asynchronous"))] + let event = reader.read_event(&mut buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { reader.read_event(&mut buf).await }); + + match event { Ok(Event::Text(e)) => txt.push(e.unescape_and_decode_without_bom(&mut reader).unwrap()), Ok(Event::Eof) => break, _ => (), @@ -930,9 +1264,17 @@ fn test_unescape_and_decode_without_bom_does_nothing_if_no_bom_exists() { let mut txt = Vec::new(); let mut buf = Vec::new(); + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); loop { - match reader.read_event(&mut buf) { + #[cfg(not(feature = "asynchronous"))] + let event = reader.read_event(&mut buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { reader.read_event(&mut buf).await }); + + match event { Ok(Event::Text(e)) => txt.push(e.unescape_and_decode_without_bom(&mut reader).unwrap()), Ok(Event::Eof) => break, _ => (), diff --git a/tests/xmlrs_reader_tests.rs b/tests/xmlrs_reader_tests.rs index f7f10ff3..78a24b25 100644 --- a/tests/xmlrs_reader_tests.rs +++ b/tests/xmlrs_reader_tests.rs @@ -1,15 +1,18 @@ extern crate quick_xml; use quick_xml::events::{BytesStart, Event}; +#[cfg(feature = "asynchronous")] +use quick_xml::AsyncReader; use quick_xml::{Reader, Result}; -use std::borrow::Cow; use std::str::from_utf8; +#[cfg(feature = "asynchronous")] +use tokio::runtime::Runtime; #[test] fn sample_1_short() { test( - include_str!("documents/sample_1.xml"), - include_str!("documents/sample_1_short.txt"), + include_bytes!("documents/sample_1.xml"), + include_bytes!("documents/sample_1_short.txt"), true, ); } @@ -17,8 +20,8 @@ fn sample_1_short() { #[test] fn sample_1_full() { test( - include_str!("documents/sample_1.xml"), - include_str!("documents/sample_1_full.txt"), + include_bytes!("documents/sample_1.xml"), + include_bytes!("documents/sample_1_full.txt"), false, ); } @@ -26,8 +29,8 @@ fn sample_1_full() { #[test] fn sample_2_short() { test( - include_str!("documents/sample_2.xml"), - include_str!("documents/sample_2_short.txt"), + include_bytes!("documents/sample_2.xml"), + include_bytes!("documents/sample_2_short.txt"), true, ); } @@ -35,76 +38,37 @@ fn sample_2_short() { #[test] fn sample_2_full() { test( - include_str!("documents/sample_2.xml"), - include_str!("documents/sample_2_full.txt"), + include_bytes!("documents/sample_2.xml"), + include_bytes!("documents/sample_2_full.txt"), false, ); } -#[cfg(feature = "escape-html")] +#[cfg(all(not(windows), feature = "escape-html"))] #[test] fn html5() { test( - include_str!("documents/html5.html"), - include_str!("documents/html5.txt"), + include_bytes!("documents/html5.html"), + include_bytes!("documents/html5.txt"), false, ); } +#[cfg(all(windows, feature = "escape-html"))] #[test] -fn escaped_characters() { - test( - r#"'a' < '&'"#, - r#" - |StartElement(e [attr=""Hello""]) - |Characters('a' < '&') - |EndElement(e) - |EndDocument - "#, - true, - ) -} - -#[cfg(feature = "escape-html")] -#[test] -fn escaped_characters_html() { +fn html5() { test( - r#"╔╗╔╗╔╗"#, - r#" - |StartElement(e [attr="ℏÈℓ𝕝⨀"]) - |Characters(╔╗╔╗╔╗) - |EndElement(e) - |EndDocument - "#, - true, - ) -} - -#[cfg(feature = "encoding")] -#[test] -fn encoded_characters() { - test_bytes( - b"\ - \n\ - \x82\xA0\x82\xA2\x82\xA4\ - ", - " - |StartDocument(1.0, Shift_JIS) - |StartElement(a) - |Characters(あいう) - |EndElement(a) - |EndDocument - " - .as_bytes(), - true, - ) + include_bytes!("documents/html5.html"), + include_bytes!("documents/html5-windows.txt"), + false, + ); } // #[test] // fn sample_3_short() { // test( -// include_str!("documents/sample_3.xml"), -// include_str!("documents/sample_3_short.txt"), +// include_bytes!("documents/sample_3.xml"), +// include_bytes!("documents/sample_3_short.txt"), // true // ); // } @@ -112,8 +76,8 @@ fn encoded_characters() { // #[test] // fn sample_3_full() { // test( -// include_str!("documents/sample_3.xml"), -// include_str!("documents/sample_3_full.txt"), +// include_bytes!("documents/sample_3.xml"), +// include_bytes!("documents/sample_3_full.txt"), // false // ); // } @@ -121,8 +85,8 @@ fn encoded_characters() { // #[test] // fn sample_4_short() { // test( -// include_str!("documents/sample_4.xml"), -// include_str!("documents/sample_4_short.txt"), +// include_bytes!("documents/sample_4.xml"), +// include_bytes!("documents/sample_4_short.txt"), // true // ); // } @@ -130,31 +94,18 @@ fn encoded_characters() { // #[test] // fn sample_4_full() { // test( -// include_str!("documents/sample_4.xml"), -// include_str!("documents/sample_4_full.txt"), +// include_bytes!("documents/sample_4.xml"), +// include_bytes!("documents/sample_4_full.txt"), // false // ); // // } -#[test] -// FIXME: Trips on the first byte-order-mark byte -// Expected: StartDocument(1.0, utf-16) -// Found: InvalidUtf8([255, 254]; invalid utf-8 sequence of 1 bytes from index 0) -#[ignore] -fn sample_5_short() { - test_bytes( - include_bytes!("documents/sample_5_utf16bom.xml"), - include_bytes!("documents/sample_5_short.txt"), - true, - ); -} - #[test] fn sample_ns_short() { test( - include_str!("documents/sample_ns.xml"), - include_str!("documents/sample_ns_short.txt"), + include_bytes!("documents/sample_ns.xml"), + include_bytes!("documents/sample_ns_short.txt"), true, ); } @@ -162,8 +113,8 @@ fn sample_ns_short() { #[test] fn eof_1() { test( - r#""#, - r#" + br#""#, + br#" |Error: Unexpected token '--' "#, true, ); test( - r#""#, - r#" + br#""#, + br#" |Error: Unexpected token '--' "#, true, @@ -199,8 +150,8 @@ fn dashes_in_comments() { #[test] fn tabs_1() { test( - "\t\t", - r#" + b"\t\t", + br#" StartElement(a) EmptyElement(b) EndElement(a) @@ -215,8 +166,8 @@ fn issue_83_duplicate_attributes() { // Error when parsing attributes won't stop main event reader // as it is a lazy operation => add ending events test( - r#""#, - " + br#""#, + b" |StartElement(hello) |1:30 EmptyElement(some-tag, attr-error: error while parsing \ attribute at position 16: Duplicate attribute at position 9 and 16) @@ -230,13 +181,14 @@ fn issue_83_duplicate_attributes() { #[test] fn issue_93_large_characters_in_entity_references() { test( - r#"&𤶼;"#, + r#"&𤶼;"#.as_bytes(), r#" |StartElement(hello) |1:10 FailedUnescape([38, 240, 164, 182, 188, 59]; Error while escaping character at range 1..5: Unrecognized escape symbol: Ok("𤶼")) |EndElement(hello) |EndDocument - "#, + "# + .as_bytes(), true, ) } @@ -244,8 +196,8 @@ fn issue_93_large_characters_in_entity_references() { #[test] fn issue_98_cdata_ending_with_right_bracket() { test( - r#""#, - r#" + br#""#, + br#" |StartElement(hello) |Characters() |CData(Foo [Bar]) @@ -260,8 +212,8 @@ fn issue_98_cdata_ending_with_right_bracket() { #[test] fn issue_105_unexpected_double_dash() { test( - r#"-- "#, - r#" + br#"-- "#, + br#" |StartElement(hello) |Characters(-- ) |EndElement(hello) @@ -271,8 +223,8 @@ fn issue_105_unexpected_double_dash() { ); test( - r#"--"#, - r#" + br#"--"#, + br#" |StartElement(hello) |Characters(--) |EndElement(hello) @@ -282,8 +234,8 @@ fn issue_105_unexpected_double_dash() { ); test( - r#"-->"#, - r#" + br#"-->"#, + br#" |StartElement(hello) |Characters(-->) |EndElement(hello) @@ -293,8 +245,8 @@ fn issue_105_unexpected_double_dash() { ); test( - r#""#, - r#" + br#""#, + br#" |StartElement(hello) |Characters() |CData(--) @@ -311,8 +263,8 @@ fn issue_attributes_have_no_default_namespace() { // At the moment, the 'test' method doesn't render namespaces for attribute names. // This test only checks whether the default namespace got applied to the EmptyElement. test( - r#""#, - r#" + br#""#, + br#" |EmptyElement({urn:foo}hello [x="y"]) |EndDocument "#, @@ -324,8 +276,8 @@ fn issue_attributes_have_no_default_namespace() { fn issue_default_namespace_on_outermost_element() { // Regression test test( - r#""#, - r#" + br#""#, + br#" |EmptyElement({urn:foo}hello) |EndDocument "#, @@ -336,10 +288,10 @@ fn issue_default_namespace_on_outermost_element() { #[test] fn default_namespace_applies_to_end_elem() { test( - r#" + br#" "#, - r#" + br#" |StartElement({urn:foo}hello [x="y"]) |EmptyElement({urn:foo}inner) |EndElement({urn:foo}hello) @@ -349,18 +301,9 @@ fn default_namespace_applies_to_end_elem() { ); } -fn test(input: &str, output: &str, is_short: bool) { - test_bytes(input.as_bytes(), output.as_bytes(), is_short); -} - -fn test_bytes(input: &[u8], output: &[u8], is_short: bool) { - // Normalize newlines on Windows to just \n, which is what the reader and - // writer use. - // let input = input.replace("\r\n", "\n"); - // let input = input.as_bytes(); - // let output = output.replace("\r\n", "\n"); - // let output = output.as_bytes(); +fn test_sync(input: &[u8], output: &[u8], is_short: bool) { let mut reader = Reader::from_reader(input); + reader .trim_text(is_short) .check_comments(true) @@ -377,8 +320,75 @@ fn test_bytes(input: &[u8], output: &[u8], is_short: bool) { loop { buf.clear(); + let event = reader.read_namespaced_event(&mut buf, &mut ns_buffer); - let line = xmlrs_display(&event, &reader); + + let line = xmlrs_display(&event); + if let Some((n, spec)) = spec_lines.next() { + if spec.trim() == "EndDocument" { + break; + } + if line.trim() != spec.trim() { + panic!( + "\n-------------------\n\ + Unexpected event at line {}:\n\ + Expected: {}\nFound: {}\n\ + -------------------\n", + n + 1, + spec, + line + ); + } + } else { + if line == "EndDocument" { + break; + } + panic!("Unexpected event: {}", line); + } + + if !is_short && line.starts_with("StartDocument") { + // advance next Characters(empty space) ... + + let mut buf = Vec::new(); + + if let Ok(Event::Text(ref e)) = reader.read_event(&mut buf) { + if e.iter().any(|b| match *b { + b' ' | b'\r' | b'\n' | b'\t' => false, + _ => true, + }) { + panic!("Reader expects empty Text event after a StartDocument"); + } + } else { + panic!("Reader expects empty Text event after a StartDocument"); + } + } + } +} + +#[cfg(feature = "asynchronous")] +async fn test_async(input: &[u8], output: &[u8], is_short: bool) { + let mut reader = AsyncReader::from_reader(input); + + reader + .trim_text(is_short) + .check_comments(true) + .expand_empty_elements(false); + + let mut spec_lines = SpecIter(output).enumerate(); + let mut buf = Vec::new(); + let mut ns_buffer = Vec::new(); + + if !is_short { + // discard first whitespace + reader.read_event(&mut buf).await.unwrap(); + } + + loop { + buf.clear(); + + let event = reader.read_namespaced_event(&mut buf, &mut ns_buffer).await; + + let line = xmlrs_display(&event); if let Some((n, spec)) = spec_lines.next() { if spec.trim() == "EndDocument" { break; @@ -403,7 +413,10 @@ fn test_bytes(input: &[u8], output: &[u8], is_short: bool) { if !is_short && line.starts_with("StartDocument") { // advance next Characters(empty space) ... - if let Ok(Event::Text(ref e)) = reader.read_event(&mut Vec::new()) { + + let mut buf = Vec::new(); + + if let Ok(Event::Text(ref e)) = reader.read_event(&mut buf).await { if e.iter().any(|b| match *b { b' ' | b'\r' | b'\n' | b'\t' => false, _ => true, @@ -417,6 +430,16 @@ fn test_bytes(input: &[u8], output: &[u8], is_short: bool) { } } +fn test(input: &[u8], output: &[u8], is_short: bool) { + test_sync(input, output, is_short); + + #[cfg(feature = "asynchronous")] + let runtime = Runtime::new().expect("Runtime cannot be initialized"); + + #[cfg(feature = "asynchronous")] + runtime.block_on(async { test_async(input, output, is_short).await }); +} + fn namespace_name(n: &Option<&[u8]>, name: &[u8]) -> String { match *n { Some(n) => format!("{{{}}}{}", from_utf8(n).unwrap(), from_utf8(name).unwrap()), @@ -443,21 +466,10 @@ fn make_attrs(e: &BytesStart) -> ::std::result::Result { Ok(atts.join(", ")) } -// FIXME: The public API differs based on the "encoding" feature -fn decode<'a>(text: &'a [u8], reader: &Reader<&[u8]>) -> Cow<'a, str> { - #[cfg(feature = "encoding")] - let decoded = reader.decode(text); - - #[cfg(not(feature = "encoding"))] - let decoded = Cow::Borrowed(reader.decode(text).unwrap()); - - decoded -} - -fn xmlrs_display(opt_event: &Result<(Option<&[u8]>, Event)>, reader: &Reader<&[u8]>) -> String { +fn xmlrs_display(opt_event: &Result<(Option<&[u8]>, Event)>) -> String { match opt_event { Ok((ref n, Event::Start(ref e))) => { - let name = namespace_name(n, decode(e.name(), reader).as_bytes()); + let name = namespace_name(n, e.name()); match make_attrs(e) { Ok(ref attrs) if attrs.is_empty() => format!("StartElement({})", &name), Ok(ref attrs) => format!("StartElement({} [{}])", &name, &attrs), @@ -465,25 +477,26 @@ fn xmlrs_display(opt_event: &Result<(Option<&[u8]>, Event)>, reader: &Reader<&[u } } Ok((ref n, Event::Empty(ref e))) => { - let name = namespace_name(n, decode(e.name(), reader).as_bytes()); + let name = namespace_name(n, e.name()); match make_attrs(e) { Ok(ref attrs) if attrs.is_empty() => format!("EmptyElement({})", &name), Ok(ref attrs) => format!("EmptyElement({} [{}])", &name, &attrs), Err(e) => format!("EmptyElement({}, attr-error: {})", &name, &e), } } - Ok((ref n, Event::End(ref e))) => { - let name = namespace_name(n, decode(e.name(), reader).as_bytes()); - format!("EndElement({})", name) - } + Ok((ref n, Event::End(ref e))) => format!("EndElement({})", namespace_name(n, e.name())), Ok((_, Event::Comment(ref e))) => format!("Comment({})", from_utf8(e).unwrap()), Ok((_, Event::CData(ref e))) => format!("CData({})", from_utf8(e).unwrap()), - Ok((_, Event::Text(ref e))) => match e.unescaped() { - Ok(c) => match from_utf8(decode(&*c, reader).as_bytes()) { - Ok(c) => format!("Characters({})", c), - Err(ref err) => format!("InvalidUtf8({:?}; {})", e.escaped(), err), - }, - Err(ref err) => format!("FailedUnescape({:?}; {})", e.escaped(), err), + Ok((_, Event::Text(ref e))) => { + match e.unescaped() { + Ok(c) => { + match from_utf8(&*c) { + Ok(c) => format!("Characters({})", c), + Err(ref err) => format!("InvalidUtf8({:?}; {})", e.escaped(), err), + } + }, + Err(ref err) => format!("FailedUnescape({:?}; {})", e.escaped(), err), + } }, Ok((_, Event::Decl(ref e))) => { let version_cow = e.version().unwrap(); @@ -527,3 +540,4 @@ impl<'a> Iterator for SpecIter<'a> { } } } + From 1d9c11c21b5e16943c8f9d647218d77185a9e65e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Frank=20Pr=C3=B6=C3=9Fdorf?= Date: Sun, 27 Sep 2020 14:46:31 +0300 Subject: [PATCH 2/8] Introduce an AsyncReader instead of overloading the Reader --- Cargo.toml | 4 +- examples/custom_entities.rs | 71 ++- examples/issue68.rs | 91 ++- examples/nested_readers.rs | 111 +++- examples/read_texts.rs | 69 +- src/errors.rs | 6 +- src/events/attributes.rs | 12 +- src/events/mod.rs | 78 ++- src/lib.rs | 13 +- src/reader/asynchronous.rs | 62 +- src/reader/mod.rs | 8 +- src/writer.rs | 2 +- tests/test.rs | 228 +------ tests/test_async.rs | 1187 +++++++++++++++++++++++++++++++++++ tests/unit_tests.rs | 434 ++----------- tests/unit_tests_async.rs | 1026 ++++++++++++++++++++++++++++++ tests/xmlrs_reader_tests.rs | 34 + 17 files changed, 2705 insertions(+), 731 deletions(-) create mode 100644 tests/test_async.rs create mode 100644 tests/unit_tests_async.rs diff --git a/Cargo.toml b/Cargo.toml index eda5f256..4c535a21 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,7 +18,7 @@ travis-ci = { repository = "tafia/quick-xml" } [dependencies] async-recursion = { version = "0.3.2", optional = true } encoding_rs = { version = "0.8.26", optional = true } -tokio = { version = "0.2.22", features = ["fs", "io-util"], optional = true } +tokio = { version = "1.4.0", features = ["fs", "io-util"], optional = true } serde = { version = "1.0", optional = true } memchr = "2.3.4" @@ -26,7 +26,7 @@ memchr = "2.3.4" serde = { version = "1.0", features = ["derive"] } serde-value = "0.7" regex = "1" -tokio = { version = "0.2.22", features = ["macros", "rt-threaded"] } +tokio = { version = "1.4.0", features = ["macros", "rt-multi-thread"] } [lib] bench = false diff --git a/examples/custom_entities.rs b/examples/custom_entities.rs index 42986126..4fde855b 100644 --- a/examples/custom_entities.rs +++ b/examples/custom_entities.rs @@ -11,6 +11,8 @@ extern crate quick_xml; extern crate regex; use quick_xml::events::Event; +#[cfg(feature = "asynchronous")] +use quick_xml::AsyncReader; use quick_xml::Reader; use regex::bytes::Regex; use std::collections::HashMap; @@ -27,22 +29,15 @@ const DATA: &str = r#" "#; -fn main() -> Result<(), Box> { - let mut reader = Reader::from_str(DATA); +fn custom_entities(data: &str) -> Result<(), Box> { + let mut reader = Reader::from_str(data); reader.trim_text(true); let mut buf = Vec::new(); let mut custom_entities = HashMap::new(); let entity_re = Regex::new(r#""#)?; - #[cfg(feature = "asynchronous")] - let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); - loop { - #[cfg(feature = "asynchronous")] - let event = runtime.block_on(async { reader.read_event(&mut buf).await }); - - #[cfg(not(feature = "asynchronous"))] let event = reader.read_event(&mut buf); match event { @@ -80,3 +75,61 @@ fn main() -> Result<(), Box> { } Ok(()) } + +#[cfg(feature = "asynchronous")] +async fn custom_entities_async(data: &str) -> Result<(), Box> { + let mut reader = AsyncReader::from_str(data); + reader.trim_text(true); + + let mut buf = Vec::new(); + let mut custom_entities = HashMap::new(); + let entity_re = Regex::new(r#""#)?; + + loop { + match reader.read_event(&mut buf).await { + Ok(Event::DocType(ref e)) => { + for cap in entity_re.captures_iter(&e) { + custom_entities.insert(cap[1].to_vec(), cap[2].to_vec()); + } + } + Ok(Event::Start(ref e)) => match e.name() { + b"test" => println!( + "attributes values: {:?}", + e.attributes() + .map(|a| a + .unwrap() + .unescape_and_decode_value_with_custom_entities( + &reader, + &custom_entities + ) + .unwrap()) + .collect::>() + ), + _ => (), + }, + Ok(Event::Text(ref e)) => { + println!( + "text value: {}", + e.unescape_and_decode_with_custom_entities(&reader, &custom_entities) + .unwrap() + ); + } + Ok(Event::Eof) => break, + Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e), + _ => (), + } + } + Ok(()) +} + +fn main() -> Result<(), Box> { + custom_entities(DATA)?; + + #[cfg(feature = "asynchronous")] + let runtime = Runtime::new().expect("Runtime cannot be initialized"); + + #[cfg(feature = "asynchronous")] + runtime.block_on(async { custom_entities_async(DATA).await })?; + + Ok(()) +} diff --git a/examples/issue68.rs b/examples/issue68.rs index d738ed84..c90ed684 100644 --- a/examples/issue68.rs +++ b/examples/issue68.rs @@ -1,6 +1,8 @@ #![allow(unused)] use quick_xml::events::Event; +#[cfg(feature = "asynchronous")] +use quick_xml::AsyncReader; use quick_xml::Reader; use std::io::Read; #[cfg(feature = "asynchronous")] @@ -55,25 +57,26 @@ impl Response { } } -fn parse_report(xml_data: &str) -> Vec { +#[derive(Clone, Copy)] +enum State { + Root, + MultiStatus, + Response, + Success, + Error, +} + +#[cfg(feature = "asynchronous")] +async fn parse_report_async(xml_data: &str) -> Vec { let result = Vec::::new(); - let mut reader = Reader::from_str(xml_data); + let mut reader = AsyncReader::from_str(xml_data); reader.trim_text(true); let mut count = 0; let mut buf = Vec::new(); let mut ns_buffer = Vec::new(); - #[derive(Clone, Copy)] - enum State { - Root, - MultiStatus, - Response, - Success, - Error, - }; - let mut responses = Vec::::new(); let mut current_response = Response::new(); let mut current_prop = Prop::new(); @@ -81,18 +84,60 @@ fn parse_report(xml_data: &str) -> Vec { let mut depth = 0; let mut state = State::MultiStatus; - #[cfg(feature = "asynchronous")] - let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); - loop { - #[cfg(feature = "asynchronous")] - let event = runtime - .block_on(async { reader.read_namespaced_event(&mut buf, &mut ns_buffer).await }); + match reader.read_namespaced_event(&mut buf, &mut ns_buffer).await { + Ok((namespace_value, Event::Start(e))) => { + let namespace_value = namespace_value.unwrap_or_default(); + match (depth, state, namespace_value, e.local_name()) { + (0, State::Root, b"DAV:", b"multistatus") => state = State::MultiStatus, + (1, State::MultiStatus, b"DAV:", b"response") => { + state = State::Response; + current_response = Response::new(); + } + (2, State::Response, b"DAV:", b"href") => { + current_response.href = e.unescape_and_decode(&reader).unwrap(); + } + _ => {} + } + depth += 1; + } + Ok((namespace_value, Event::End(e))) => { + let namespace_value = namespace_value.unwrap_or_default(); + let local_name = e.local_name(); + match (depth, state, &*namespace_value, local_name) { + (1, State::MultiStatus, b"DAV:", b"multistatus") => state = State::Root, + (2, State::MultiStatus, b"DAV:", b"multistatus") => state = State::MultiStatus, + _ => {} + } + depth -= 1; + } + Ok((_, Event::Eof)) => break, + Err(e) => break, + _ => (), + } + } + result +} + +fn parse_report(xml_data: &str) -> Vec { + let result = Vec::::new(); - #[cfg(not(feature = "asynchronous"))] - let event = reader.read_namespaced_event(&mut buf, &mut ns_buffer); + let mut reader = Reader::from_str(xml_data); + reader.trim_text(true); - match event { + let mut count = 0; + let mut buf = Vec::new(); + let mut ns_buffer = Vec::new(); + + let mut responses = Vec::::new(); + let mut current_response = Response::new(); + let mut current_prop = Prop::new(); + + let mut depth = 0; + let mut state = State::MultiStatus; + + loop { + match reader.read_namespaced_event(&mut buf, &mut ns_buffer) { Ok((namespace_value, Event::Start(e))) => { let namespace_value = namespace_value.unwrap_or_default(); match (depth, state, namespace_value, e.local_name()) { @@ -148,4 +193,10 @@ fn main() { "#; parse_report(test_data); + + #[cfg(feature = "asynchronous")] + let runtime = Runtime::new().expect("Runtime cannot be initialized"); + + #[cfg(feature = "asynchronous")] + runtime.block_on(async { parse_report_async(test_data).await }); } diff --git a/examples/nested_readers.rs b/examples/nested_readers.rs index 5dd1dbbc..6a415e1f 100644 --- a/examples/nested_readers.rs +++ b/examples/nested_readers.rs @@ -1,4 +1,6 @@ use quick_xml::events::Event; +#[cfg(feature = "asynchronous")] +use quick_xml::AsyncReader; use quick_xml::Reader; #[cfg(feature = "asynchronous")] use tokio::runtime::Runtime; @@ -10,34 +12,18 @@ struct TableStat { index: u8, rows: Vec>, } -// demonstrate how to nest readers -// This is useful for when you need to traverse -// a few levels of a document to extract things. -fn main() -> Result<(), quick_xml::Error> { + +fn nest_readers() -> Result<(), quick_xml::Error> { let mut buf = Vec::new(); // buffer for nested reader let mut skip_buf = Vec::new(); let mut count = 0; - #[cfg(feature = "asynchronous")] - let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); - - #[cfg(feature = "asynchronous")] - let mut reader = - runtime.block_on(async { Reader::from_file("tests/documents/document.xml").await })?; - - #[cfg(not(feature = "asynchronous"))] let mut reader = Reader::from_file("tests/documents/document.xml")?; let mut found_tables = Vec::new(); loop { - #[cfg(feature = "asynchronous")] - let event = runtime.block_on(async { reader.read_event(&mut buf).await })?; - - #[cfg(not(feature = "asynchronous"))] - let event = reader.read_event(&mut buf)?; - - match event { + match reader.read_event(&mut buf)? { Event::Start(element) => match element.name() { b"w:tbl" => { count += 1; @@ -51,14 +37,74 @@ fn main() -> Result<(), quick_xml::Error> { loop { skip_buf.clear(); - #[cfg(feature = "asynchronous")] - let event = - runtime.block_on(async { reader.read_event(&mut skip_buf).await })?; + match reader.read_event(&mut skip_buf)? { + Event::Start(element) => match element.name() { + b"w:tr" => { + stats.rows.push(vec![]); + row_index = stats.rows.len() - 1; + } + b"w:tc" => { + stats.rows[row_index] + .push(String::from_utf8(element.name().to_vec()).unwrap()); + } + _ => {} + }, + Event::End(element) => { + if element.name() == b"w:tbl" { + found_tables.push(stats); + break; + } + } + _ => {} + } + } + } + _ => {} + }, + Event::Eof => break, + _ => {} + } + buf.clear(); + } + assert_eq!(found_tables.len(), 2); + // pretty print the table + println!("{:#?}", found_tables); + assert_eq!(found_tables[0].rows.len(), 2); + assert_eq!(found_tables[0].rows[0].len(), 4); + assert_eq!(found_tables[0].rows[1].len(), 4); - #[cfg(not(feature = "asynchronous"))] - let event = reader.read_event(&mut skip_buf)?; + assert_eq!(found_tables[1].rows.len(), 2); + assert_eq!(found_tables[1].rows[0].len(), 4); + assert_eq!(found_tables[1].rows[1].len(), 4); + Ok(()) +} + +#[cfg(feature = "asynchronous")] +async fn nest_readers_async() -> Result<(), quick_xml::Error> { + let mut buf = Vec::new(); + // buffer for nested reader + let mut skip_buf = Vec::new(); + let mut count = 0; + + let mut reader = AsyncReader::from_file("tests/documents/document.xml").await?; + + let mut found_tables = Vec::new(); + loop { + match reader.read_event(&mut buf).await? { + Event::Start(element) => match element.name() { + b"w:tbl" => { + count += 1; + let mut stats = TableStat { + index: count, + rows: vec![], + }; + // must define stateful variables + // outside the nested loop else they are overwritten + let mut row_index = 0; + loop { + skip_buf.clear(); - match event { + match reader.read_event(&mut skip_buf).await? { Event::Start(element) => match element.name() { b"w:tr" => { stats.rows.push(vec![]); @@ -99,3 +145,18 @@ fn main() -> Result<(), quick_xml::Error> { assert_eq!(found_tables[1].rows[1].len(), 4); Ok(()) } + +// demonstrate how to nest readers +// This is useful for when you need to traverse +// a few levels of a document to extract things. +fn main() -> Result<(), quick_xml::Error> { + #[cfg(feature = "asynchronous")] + let runtime = Runtime::new().expect("Runtime cannot be initialized"); + + #[cfg(feature = "asynchronous")] + runtime.block_on(async { nest_readers_async().await })?; + + nest_readers()?; + + Ok(()) +} diff --git a/examples/read_texts.rs b/examples/read_texts.rs index 227911cc..a8e2988c 100644 --- a/examples/read_texts.rs +++ b/examples/read_texts.rs @@ -1,40 +1,48 @@ +use quick_xml::events::Event; +#[cfg(feature = "asynchronous")] +use quick_xml::AsyncReader; +use quick_xml::Reader; #[cfg(feature = "asynchronous")] use tokio::runtime::Runtime; -fn main() { - use quick_xml::events::Event; - use quick_xml::Reader; +#[cfg(feature = "asynchronous")] +async fn read_text_async(xml: &str) { + let mut reader = AsyncReader::from_str(xml); + reader.trim_text(true); - let xml = "text1text2\ - text3text4"; + let mut txt = Vec::new(); + let mut buf = Vec::new(); + loop { + match reader.read_event(&mut buf).await { + Ok(Event::Start(ref e)) if e.name() == b"tag2" => { + #[cfg(feature = "asynchronous")] + let text = reader + .read_text(b"tag2", &mut Vec::new()) + .await + .expect("Cannot decode text value"); + + txt.push(text); + println!("{:?}", txt); + } + Ok(Event::Eof) => break, // exits the loop when reaching end of file + Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e), + _ => (), // There are several other `Event`s we do not consider here + } + buf.clear(); + } +} + +fn read_text(xml: &str) { let mut reader = Reader::from_str(xml); reader.trim_text(true); let mut txt = Vec::new(); let mut buf = Vec::new(); - #[cfg(feature = "asynchronous")] - let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); - loop { - #[cfg(feature = "asynchronous")] - let event = runtime.block_on(async { reader.read_event(&mut buf).await }); - - #[cfg(not(feature = "asynchronous"))] - let event = reader.read_event(&mut buf); - - match event { + match reader.read_event(&mut buf) { Ok(Event::Start(ref e)) if e.name() == b"tag2" => { - #[cfg(feature = "asynchronous")] - let text = runtime.block_on(async { - reader - .read_text(b"tag2", &mut Vec::new()) - .await - .expect("Cannot decode text value") - }); - - #[cfg(not(feature = "asynchronous"))] let text = reader .read_text(b"tag2", &mut Vec::new()) .expect("Cannot decode text value"); @@ -49,3 +57,16 @@ fn main() { buf.clear(); } } + +fn main() { + let xml = "text1text2\ + text3text4"; + + read_text(xml); + + #[cfg(feature = "asynchronous")] + let runtime = Runtime::new().expect("Runtime cannot be initialized"); + + #[cfg(feature = "asynchronous")] + runtime.block_on(async { read_text_async(xml).await }); +} diff --git a/src/errors.rs b/src/errors.rs index eafc58ec..ef7bacba 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -33,7 +33,7 @@ pub enum Error { /// Duplicate attribute DuplicatedAttribute(usize, usize), /// Escape error - Escape(crate::escape::EscapeError), + EscapeError(crate::escape::EscapeError), } impl From<::std::io::Error> for Error { @@ -109,7 +109,7 @@ impl std::fmt::Display for Error { Duplicate attribute at position {1} and {0}", pos1, pos2 ), - Error::Escape(e) => write!(f, "{}", e), + Error::EscapeError(e) => write!(f, "{}", e), } } } @@ -119,7 +119,7 @@ impl std::error::Error for Error { match self { Error::Io(e) => Some(e), Error::Utf8(e) => Some(e), - Error::Escape(e) => Some(e), + Error::EscapeError(e) => Some(e), _ => None, } } diff --git a/src/events/attributes.rs b/src/events/attributes.rs index 3607ec67..2d44435b 100644 --- a/src/events/attributes.rs +++ b/src/events/attributes.rs @@ -122,7 +122,7 @@ impl<'a> Attribute<'a> { &self, custom_entities: Option<&HashMap, Vec>>, ) -> Result> { - do_unescape(&*self.value, custom_entities).map_err(Error::Escape) + do_unescape(&*self.value, custom_entities).map_err(Error::EscapeError) } /// Decode then unescapes the value @@ -169,8 +169,7 @@ impl<'a> Attribute<'a> { custom_entities: Option<&HashMap, Vec>>, ) -> Result { let decoded = reader.decode(&*self.value); - let unescaped = - do_unescape(decoded.as_bytes(), custom_entities).map_err(Error::EscapeError)?; + let unescaped = do_unescape(decoded.as_bytes(), custom_entities).map_err(Error::EscapeError)?; String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error())) } @@ -181,7 +180,7 @@ impl<'a> Attribute<'a> { custom_entities: Option<&HashMap, Vec>>, ) -> Result { let decoded = reader.decode(&*self.value)?; - let unescaped = do_unescape(decoded.as_bytes(), custom_entities).map_err(Error::Escape)?; + let unescaped = do_unescape(decoded.as_bytes(), custom_entities).map_err(Error::EscapeError)?; String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error())) } @@ -256,8 +255,7 @@ impl<'a> Attribute<'a> { custom_entities: Option<&HashMap, Vec>>, ) -> Result { let decoded = reader.decode_without_bom(&*self.value); - let unescaped = - do_unescape(decoded.as_bytes(), custom_entities).map_err(Error::EscapeError)?; + let unescaped = do_unescape(decoded.as_bytes(), custom_entities).map_err(Error::EscapeError)?; String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error())) } @@ -268,7 +266,7 @@ impl<'a> Attribute<'a> { custom_entities: Option<&HashMap, Vec>>, ) -> Result { let decoded = reader.decode_without_bom(&*self.value)?; - let unescaped = do_unescape(decoded.as_bytes(), custom_entities).map_err(Error::Escape)?; + let unescaped = do_unescape(decoded.as_bytes(), custom_entities).map_err(Error::EscapeError)?; String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error())) } } diff --git a/src/events/mod.rs b/src/events/mod.rs index 3d8d35c0..76f7880c 100644 --- a/src/events/mod.rs +++ b/src/events/mod.rs @@ -225,7 +225,7 @@ impl<'a> BytesStart<'a> { &'s self, custom_entities: Option<&HashMap, Vec>>, ) -> Result> { - do_unescape(&*self.buf, custom_entities).map_err(Error::Escape) + do_unescape(&*self.buf, custom_entities).map_err(Error::EscapeError) } /// Returns an iterator over the attributes of this tag. @@ -307,8 +307,7 @@ impl<'a> BytesStart<'a> { custom_entities: Option<&HashMap, Vec>>, ) -> Result { let decoded = reader.decode(&*self); - let unescaped = - do_unescape(decoded.as_bytes(), custom_entities).map_err(Error::EscapeError)?; + let unescaped = do_unescape(decoded.as_bytes(), custom_entities).map_err(Error::EscapeError)?; String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error())) } @@ -320,7 +319,7 @@ impl<'a> BytesStart<'a> { custom_entities: Option<&HashMap, Vec>>, ) -> Result { let decoded = reader.decode(&*self)?; - let unescaped = do_unescape(decoded.as_bytes(), custom_entities).map_err(Error::Escape)?; + let unescaped = do_unescape(decoded.as_bytes(), custom_entities).map_err(Error::EscapeError)?; String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error())) } @@ -650,7 +649,7 @@ impl<'a> BytesText<'a> { &'s self, custom_entities: Option<&HashMap, Vec>>, ) -> Result> { - do_unescape(self, custom_entities).map_err(Error::Escape) + do_unescape(self, custom_entities).map_err(Error::EscapeError) } #[cfg(feature = "serialize")] @@ -774,8 +773,7 @@ impl<'a> BytesText<'a> { custom_entities: Option<&HashMap, Vec>>, ) -> Result { let decoded = reader.decode_without_bom(&*self); - let unescaped = - do_unescape(decoded.as_bytes(), custom_entities).map_err(Error::EscapeError)?; + let unescaped = do_unescape(decoded.as_bytes(), custom_entities).map_err(Error::EscapeError)?; String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error())) } @@ -786,7 +784,7 @@ impl<'a> BytesText<'a> { custom_entities: Option<&HashMap, Vec>>, ) -> Result { let decoded = reader.decode_without_bom(&*self)?; - let unescaped = do_unescape(decoded.as_bytes(), custom_entities).map_err(Error::Escape)?; + let unescaped = do_unescape(decoded.as_bytes(), custom_entities).map_err(Error::EscapeError)?; String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error())) } @@ -825,8 +823,7 @@ impl<'a> BytesText<'a> { custom_entities: Option<&HashMap, Vec>>, ) -> Result { let decoded = reader.decode(&*self); - let unescaped = - do_unescape(decoded.as_bytes(), custom_entities).map_err(Error::EscapeError)?; + let unescaped = do_unescape(decoded.as_bytes(), custom_entities).map_err(Error::EscapeError)?; String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error())) } @@ -837,7 +834,7 @@ impl<'a> BytesText<'a> { custom_entities: Option<&HashMap, Vec>>, ) -> Result { let decoded = reader.decode(&*self)?; - let unescaped = do_unescape(decoded.as_bytes(), custom_entities).map_err(Error::Escape)?; + let unescaped = do_unescape(decoded.as_bytes(), custom_entities).map_err(Error::EscapeError)?; String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error())) } @@ -958,8 +955,6 @@ impl<'a> AsRef> for Event<'a> { #[cfg(test)] mod test { use super::*; - #[cfg(feature = "asynchronous")] - use tokio::runtime::Runtime; #[test] fn local_name() { @@ -974,18 +969,7 @@ mod test { let mut buf = Vec::new(); let mut parsed_local_names = Vec::new(); - #[cfg(feature = "asynchronous")] - let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); - loop { - #[cfg(feature = "asynchronous")] - let event = runtime.block_on(async { - rdr.read_event(&mut buf) - .await - .expect("unable to read xml event") - }); - - #[cfg(not(feature = "asynchronous"))] let event = rdr.read_event(&mut buf).expect("unable to read xml event"); match event { @@ -1014,6 +998,52 @@ mod test { assert_eq!(parsed_local_names[7], "bus:baz".to_string()); } + #[cfg(feature = "asynchronous")] + #[tokio::test] + async fn local_name_async() { + use std::str::from_utf8; + let xml = r#" + foobusbar + foobusbar + <:foo attr='bar'>foobusbar + foobusbar + "#; + let mut rdr = crate::AsyncReader::from_str(xml); + let mut buf = Vec::new(); + let mut parsed_local_names = Vec::new(); + + loop { + let event = rdr + .read_event(&mut buf) + .await + .expect("unable to read xml event"); + + match event { + Event::Start(ref e) => parsed_local_names.push( + from_utf8(e.local_name()) + .expect("unable to build str from local_name") + .to_string(), + ), + Event::End(ref e) => parsed_local_names.push( + from_utf8(e.local_name()) + .expect("unable to build str from local_name") + .to_string(), + ), + Event::Eof => break, + _ => {} + } + } + + assert_eq!(parsed_local_names[0], "bus".to_string()); + assert_eq!(parsed_local_names[1], "bus".to_string()); + assert_eq!(parsed_local_names[2], "".to_string()); + assert_eq!(parsed_local_names[3], "".to_string()); + assert_eq!(parsed_local_names[4], "foo".to_string()); + assert_eq!(parsed_local_names[5], "foo".to_string()); + assert_eq!(parsed_local_names[6], "bus:baz".to_string()); + assert_eq!(parsed_local_names[7], "bus:baz".to_string()); + } + #[test] fn bytestart_create() { let b = BytesStart::owned_name("test"); diff --git a/src/lib.rs b/src/lib.rs index 5d592708..b90ff0f6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -24,7 +24,7 @@ //! //! ### Reader //! -//! ```ignore +//! ```rust //! use quick_xml::Reader; //! use quick_xml::events::Event; //! @@ -39,8 +39,8 @@ //! reader.trim_text(true); //! //! let mut count = 0; -//! let mut txt: Vec = Vec::new(); -//! let mut buf: Vec = Vec::new(); +//! let mut txt = Vec::new(); +//! let mut buf = Vec::new(); //! //! // The `Reader` does not implement `Iterator` because it outputs borrowed data (`Cow`s) //! loop { @@ -72,7 +72,7 @@ //! //! ### Writer //! -//! ```ignore +//! ```rust //! use quick_xml::Writer; //! use quick_xml::events::{Event, BytesEnd, BytesStart}; //! use quick_xml::Reader; @@ -83,7 +83,7 @@ //! let mut reader = Reader::from_str(xml); //! reader.trim_text(true); //! let mut writer = Writer::new(Cursor::new(Vec::new())); -//! let mut buf: Vec = Vec::new(); +//! let mut buf = Vec::new(); //! loop { //! match reader.read_event(&mut buf) { //! Ok(Event::Start(ref e)) if e.name() == b"this_tag" => { @@ -159,7 +159,6 @@ pub mod reader; pub use errors::serialize::DeError; pub use errors::{Error, Result}; #[cfg(feature = "asynchronous")] -pub use reader::asynchronous::Reader; -#[cfg(not(feature = "asynchronous"))] +pub use reader::asynchronous::AsyncReader; pub use reader::sync::Reader; pub use writer::Writer; diff --git a/src/reader/asynchronous.rs b/src/reader/asynchronous.rs index bba8a31f..089ab093 100644 --- a/src/reader/asynchronous.rs +++ b/src/reader/asynchronous.rs @@ -1,4 +1,4 @@ -//! A module to handle the async `Reader` +//! A module to handle the `AsyncReader` use async_recursion::async_recursion; #[cfg(feature = "encoding")] @@ -18,7 +18,7 @@ use crate::events::{BytesDecl, BytesEnd, BytesStart, BytesText, Event}; use super::{is_whitespace, Decode, Decoder, NamespaceBufferIndex, TagState}; -impl Decode for Reader { +impl Decode for AsyncReader { #[cfg(feature = "encoding")] fn read_encoding(&self) -> &'static Encoding { self.encoding @@ -47,7 +47,7 @@ impl Decode for Reader { /// # Examples /// /// ``` -/// use quick_xml::Reader; +/// use quick_xml::AsyncReader; /// use quick_xml::events::Event; /// /// #[tokio::main] @@ -56,7 +56,7 @@ impl Decode for Reader { /// Test /// Test 2 /// "#; -/// let mut reader = Reader::from_str(xml); +/// let mut reader = AsyncReader::from_str(xml); /// reader.trim_text(true); /// let mut count = 0; /// let mut txt = Vec::new(); @@ -81,7 +81,7 @@ impl Decode for Reader { /// } /// } /// ``` -pub struct Reader { +pub struct AsyncReader { /// reader reader: B, /// current buffer position, useful for debuging errors @@ -113,10 +113,10 @@ pub struct Reader { is_encoding_set: bool, } -impl Reader { +impl AsyncReader { /// Creates a `Reader` that reads from a reader implementing `BufRead`. - pub fn from_reader(reader: B) -> Reader { - Reader { + pub fn from_reader(reader: B) -> AsyncReader { + AsyncReader { reader, opened_buffer: Vec::new(), opened_starts: Vec::new(), @@ -146,7 +146,7 @@ impl Reader { /// [`Empty`]: events/enum.Event.html#variant.Empty /// [`Start`]: events/enum.Event.html#variant.Start /// [`End`]: events/enum.Event.html#variant.End - pub fn expand_empty_elements(&mut self, val: bool) -> &mut Reader { + pub fn expand_empty_elements(&mut self, val: bool) -> &mut AsyncReader { self.expand_empty_elements = val; self } @@ -159,7 +159,7 @@ impl Reader { /// (`false` by default) /// /// [`Text`]: events/enum.Event.html#variant.Text - pub fn trim_text(&mut self, val: bool) -> &mut Reader { + pub fn trim_text(&mut self, val: bool) -> &mut AsyncReader { self.trim_text = val; self } @@ -175,7 +175,7 @@ impl Reader { /// (`true` by default) /// /// [`End`]: events/enum.Event.html#variant.End - pub fn trim_markup_names_in_closing_tags(&mut self, val: bool) -> &mut Reader { + pub fn trim_markup_names_in_closing_tags(&mut self, val: bool) -> &mut AsyncReader { self.trim_markup_names_in_closing_tags = val; self } @@ -193,7 +193,7 @@ impl Reader { /// (`true` by default) /// /// [`End`]: events/enum.Event.html#variant.End - pub fn check_end_names(&mut self, val: bool) -> &mut Reader { + pub fn check_end_names(&mut self, val: bool) -> &mut AsyncReader { self.check_end_names = val; self } @@ -208,7 +208,7 @@ impl Reader { /// (`false` by default) /// /// [`Comment`]: events/enum.Event.html#variant.Comment - pub fn check_comments(&mut self, val: bool) -> &mut Reader { + pub fn check_comments(&mut self, val: bool) -> &mut AsyncReader { self.check_comments = val; self } @@ -392,7 +392,7 @@ impl Reader { &buf[buf_start + 8..buf.len() - 2], ))) } - b"DOCTYPE" => { + x if x.eq_ignore_ascii_case(b"DOCTYPE") => { let mut count = buf.iter().skip(buf_start).filter(|&&b| b == b'<').count(); while count > 0 { buf.push(b'>'); @@ -516,7 +516,7 @@ impl Reader { /// # Examples /// /// ``` - /// use quick_xml::Reader; + /// use quick_xml::AsyncReader; /// use quick_xml::events::Event; /// /// #[tokio::main] @@ -525,7 +525,7 @@ impl Reader { /// Test /// Test 2 /// "#; - /// let mut reader = Reader::from_str(xml); + /// let mut reader = AsyncReader::from_str(xml); /// reader.trim_text(true); /// let mut count = 0; /// let mut buf = Vec::new(); @@ -599,7 +599,7 @@ impl Reader { /// /// ``` /// use std::str::from_utf8; - /// use quick_xml::Reader; + /// use quick_xml::AsyncReader; /// use quick_xml::events::Event; /// /// #[tokio::main] @@ -608,7 +608,7 @@ impl Reader { /// Test /// Test 2 /// "#; - /// let mut reader = Reader::from_str(xml); + /// let mut reader = AsyncReader::from_str(xml); /// reader.trim_text(true); /// let mut count = 0; /// let mut buf = Vec::new(); @@ -685,7 +685,7 @@ impl Reader { } } - /// Returns the `Reader`s encoding. + /// Returns the `AsyncReader`s encoding. /// /// The used encoding may change after parsing the XML declaration. /// @@ -748,12 +748,12 @@ impl Reader { /// # Examples /// /// ``` - /// use quick_xml::Reader; + /// use quick_xml::AsyncReader; /// use quick_xml::events::Event; /// /// #[tokio::main] /// async fn main() { - /// let mut xml = Reader::from_reader(b" + /// let mut xml = AsyncReader::from_reader(b" /// <b> /// /// " as &[u8]); @@ -785,7 +785,7 @@ impl Reader { s } - /// Consumes `Reader` returning the underlying reader + /// Consumes `AsyncReader` returning the underlying reader /// /// Can be used to compute line and column of a parsing error position /// @@ -793,10 +793,10 @@ impl Reader { /// /// ```ignore /// use std::{str, io::Cursor}; - /// use quick_xml::Reader; + /// use quick_xml::AsyncReader; /// use quick_xml::events::Event; /// - /// fn into_line_and_column(reader: Reader>) -> (usize, usize) { + /// fn into_line_and_column(reader: AsyncReader>) -> (usize, usize) { /// let end_pos = reader.buffer_position(); /// let mut cursor = reader.into_underlying_reader(); /// let s = String::from_utf8(cursor.into_inner()[0..end_pos].to_owned()) @@ -820,7 +820,7 @@ impl Reader { /// Test /// Test 2 /// "#; - /// let mut reader = Reader::from_reader(Cursor::new(xml.as_bytes())); + /// let mut reader = AsyncReader::from_reader(Cursor::new(xml.as_bytes())); /// let mut buf = Vec::new(); /// /// loop { @@ -845,19 +845,19 @@ impl Reader { } } -impl Reader> { +impl AsyncReader> { /// Creates an XML reader from a file path. - pub async fn from_file>(path: P) -> Result>> { + pub async fn from_file>(path: P) -> Result>> { let file = File::open(path).await.map_err(Error::Io)?; let reader = BufReader::new(file); - Ok(Reader::from_reader(reader)) + Ok(AsyncReader::from_reader(reader)) } } -impl<'a> Reader<&'a [u8]> { +impl<'a> AsyncReader<&'a [u8]> { /// Creates an XML reader from a string slice. - pub fn from_str(s: &'a str) -> Reader<&'a [u8]> { - Reader::from_reader(s.as_bytes()) + pub fn from_str(s: &'a str) -> AsyncReader<&'a [u8]> { + AsyncReader::from_reader(s.as_bytes()) } } diff --git a/src/reader/mod.rs b/src/reader/mod.rs index db120753..225683a4 100644 --- a/src/reader/mod.rs +++ b/src/reader/mod.rs @@ -1,9 +1,10 @@ //! A module to handle `Reader` -use crate::events::{attributes::Attribute, BytesStart}; - #[cfg(not(feature = "encoding"))] -use crate::errors::{Error, Result}; +use crate::errors::Error; +#[cfg(not(feature = "encoding"))] +use crate::errors::Result; +use crate::events::{attributes::Attribute, BytesStart}; #[cfg(not(feature = "encoding"))] use std::str::from_utf8; @@ -14,7 +15,6 @@ use std::borrow::Cow; #[cfg(feature = "asynchronous")] pub mod asynchronous; -#[cfg(not(feature = "asynchronous"))] pub mod sync; /// Trait for decoding, which is shared by the sync and async `Reader` diff --git a/src/writer.rs b/src/writer.rs index 02336894..fab33abf 100644 --- a/src/writer.rs +++ b/src/writer.rs @@ -10,7 +10,7 @@ use std::io::Write; /// /// # Examples /// -/// ```ignore +/// ```rust /// extern crate quick_xml; /// fn main() { /// use quick_xml::{Reader, Writer}; diff --git a/tests/test.rs b/tests/test.rs index 76b31683..17ea9bed 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -9,8 +9,6 @@ use quick_xml::Reader; use serde::{Deserialize, Serialize}; use std::borrow::Cow; use std::io::Cursor; -#[cfg(feature = "asynchronous")] -use tokio::runtime::Runtime; #[test] fn test_sample() { @@ -19,17 +17,8 @@ fn test_sample() { let mut r = Reader::from_reader(src); let mut count = 0; - #[cfg(feature = "asynchronous")] - let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); - loop { - #[cfg(not(feature = "asynchronous"))] - let event = r.read_event(&mut buf); - - #[cfg(feature = "asynchronous")] - let event = runtime.block_on(async { r.read_event(&mut buf).await }); - - match event.unwrap() { + match r.read_event(&mut buf).unwrap() { Start(_) => count += 1, Decl(e) => println!("{:?}", e.version()), Eof => break, @@ -46,17 +35,9 @@ fn test_sample_async() { let mut buf = Vec::new(); let mut r = Reader::from_reader(src); let mut count = 0; - #[cfg(feature = "asynchronous")] - let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); loop { - #[cfg(not(feature = "asynchronous"))] - let event = r.read_event(&mut buf); - - #[cfg(feature = "asynchronous")] - let event = runtime.block_on(async { r.read_event(&mut buf).await }); - - match event.unwrap() { + match r.read_event(&mut buf).unwrap() { Start(_) => count += 1, Decl(e) => println!("{:?}", e.version()), Eof => break, @@ -73,16 +54,8 @@ fn test_attributes_empty() { let mut r = Reader::from_reader(src as &[u8]); r.trim_text(true).expand_empty_elements(false); let mut buf = Vec::new(); - #[cfg(feature = "asynchronous")] - let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); - - #[cfg(not(feature = "asynchronous"))] - let event = r.read_event(&mut buf); - - #[cfg(feature = "asynchronous")] - let event = runtime.block_on(async { r.read_event(&mut buf).await }); - match event { + match r.read_event(&mut buf) { Ok(Empty(e)) => { let mut atts = e.attributes(); match atts.next() { @@ -108,23 +81,14 @@ fn test_attributes_empty() { } } -#[cfg(not(feature = "asynchronous"))] #[test] fn test_attribute_equal() { let src = b""; let mut r = Reader::from_reader(src as &[u8]); r.trim_text(true).expand_empty_elements(false); let mut buf = Vec::new(); - #[cfg(feature = "asynchronous")] - let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); - #[cfg(not(feature = "asynchronous"))] - let event = r.read_event(&mut buf); - - #[cfg(feature = "asynchronous")] - let event = runtime.block_on(async { r.read_event(&mut buf).await }); - - match event { + match r.read_event(&mut buf) { Ok(Empty(e)) => { let mut atts = e.attributes(); match atts.next() { @@ -143,24 +107,15 @@ fn test_attribute_equal() { } } -#[cfg(not(feature = "asynchronous"))] #[test] fn test_comment_starting_with_gt() { let src = b"-->"; let mut r = Reader::from_reader(src as &[u8]); r.trim_text(true).expand_empty_elements(false); let mut buf = Vec::new(); - #[cfg(feature = "asynchronous")] - let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); loop { - #[cfg(not(feature = "asynchronous"))] - let event = r.read_event(&mut buf); - - #[cfg(feature = "asynchronous")] - let event = runtime.block_on(async { r.read_event(&mut buf).await }); - - match event { + match r.read_event(&mut buf) { Ok(Comment(ref e)) if &**e == b">" => break, Ok(Eof) => panic!("Expecting Comment"), _ => (), @@ -179,16 +134,8 @@ fn test_attributes_empty_ns() { r.trim_text(true).expand_empty_elements(false); let mut buf = Vec::new(); let mut ns_buf = Vec::new(); - #[cfg(feature = "asynchronous")] - let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); - - #[cfg(not(feature = "asynchronous"))] - let event = r.read_namespaced_event(&mut buf, &mut ns_buf); - - #[cfg(feature = "asynchronous")] - let event = runtime.block_on(async { r.read_namespaced_event(&mut buf, &mut ns_buf).await }); - let e = match event { + let e = match r.read_namespaced_event(&mut buf, &mut ns_buf) { Ok((None, Empty(e))) => e, e => panic!("Expecting Empty event, got {:?}", e), }; @@ -224,7 +171,6 @@ fn test_attributes_empty_ns() { /// Single empty element with qualified attributes. /// Empty element expansion: enabled /// The code path for namespace handling is slightly different for `Empty` vs. `Start+End`. -#[cfg(not(feature = "asynchronous"))] #[test] fn test_attributes_empty_ns_expanded() { let src = b""; @@ -233,18 +179,9 @@ fn test_attributes_empty_ns_expanded() { r.trim_text(true).expand_empty_elements(true); let mut buf = Vec::new(); let mut ns_buf = Vec::new(); - #[cfg(feature = "asynchronous")] - let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); { - #[cfg(not(feature = "asynchronous"))] - let event = r.read_namespaced_event(&mut buf, &mut ns_buf); - - #[cfg(feature = "asynchronous")] - let event = - runtime.block_on(async { r.read_namespaced_event(&mut buf, &mut ns_buf).await }); - - let e = match event { + let e = match r.read_namespaced_event(&mut buf, &mut ns_buf) { Ok((None, Start(e))) => e, e => panic!("Expecting Empty event, got {:?}", e), }; @@ -277,13 +214,7 @@ fn test_attributes_empty_ns_expanded() { } } - #[cfg(not(feature = "asynchronous"))] - let event = r.read_namespaced_event(&mut buf, &mut ns_buf); - - #[cfg(feature = "asynchronous")] - let event = runtime.block_on(async { r.read_namespaced_event(&mut buf, &mut ns_buf).await }); - - match event { + match r.read_namespaced_event(&mut buf, &mut ns_buf) { Ok((None, End(e))) => assert_eq!(b"a", e.name()), e => panic!("Expecting End event, got {:?}", e), } @@ -297,19 +228,10 @@ fn test_default_ns_shadowing_empty() { r.trim_text(true).expand_empty_elements(false); let mut buf = Vec::new(); let mut ns_buf = Vec::new(); - #[cfg(feature = "asynchronous")] - let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); // { - #[cfg(not(feature = "asynchronous"))] - let event = r.read_namespaced_event(&mut buf, &mut ns_buf); - - #[cfg(feature = "asynchronous")] - let event = - runtime.block_on(async { r.read_namespaced_event(&mut buf, &mut ns_buf).await }); - - match event { + match r.read_namespaced_event(&mut buf, &mut ns_buf) { Ok((Some(ns), Start(e))) => { assert_eq!(&ns[..], b"urn:example:o"); assert_eq!(e.name(), b"e"); @@ -320,14 +242,7 @@ fn test_default_ns_shadowing_empty() { // { - #[cfg(not(feature = "asynchronous"))] - let event = r.read_namespaced_event(&mut buf, &mut ns_buf); - - #[cfg(feature = "asynchronous")] - let event = - runtime.block_on(async { r.read_namespaced_event(&mut buf, &mut ns_buf).await }); - - let e = match event { + let e = match r.read_namespaced_event(&mut buf, &mut ns_buf) { Ok((Some(ns), Empty(e))) => { assert_eq!(::std::str::from_utf8(ns).unwrap(), "urn:example:i"); assert_eq!(e.name(), b"e"); @@ -358,14 +273,7 @@ fn test_default_ns_shadowing_empty() { } // - - #[cfg(not(feature = "asynchronous"))] - let event = r.read_namespaced_event(&mut buf, &mut ns_buf); - - #[cfg(feature = "asynchronous")] - let event = runtime.block_on(async { r.read_namespaced_event(&mut buf, &mut ns_buf).await }); - - match event { + match r.read_namespaced_event(&mut buf, &mut ns_buf) { Ok((Some(ns), End(e))) => { assert_eq!(&ns[..], b"urn:example:o"); assert_eq!(e.name(), b"e"); @@ -374,7 +282,6 @@ fn test_default_ns_shadowing_empty() { } } -#[cfg(not(feature = "asynchronous"))] #[test] fn test_default_ns_shadowing_expanded() { let src = b""; @@ -383,19 +290,10 @@ fn test_default_ns_shadowing_expanded() { r.trim_text(true).expand_empty_elements(true); let mut buf = Vec::new(); let mut ns_buf = Vec::new(); - #[cfg(feature = "asynchronous")] - let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); // { - #[cfg(not(feature = "asynchronous"))] - let event = r.read_namespaced_event(&mut buf, &mut ns_buf); - - #[cfg(feature = "asynchronous")] - let event = - runtime.block_on(async { r.read_namespaced_event(&mut buf, &mut ns_buf).await }); - - match event { + match r.read_namespaced_event(&mut buf, &mut ns_buf) { Ok((Some(ns), Start(e))) => { assert_eq!(&ns[..], b"urn:example:o"); assert_eq!(e.name(), b"e"); @@ -407,14 +305,7 @@ fn test_default_ns_shadowing_expanded() { // { - #[cfg(not(feature = "asynchronous"))] - let event = r.read_namespaced_event(&mut buf, &mut ns_buf); - - #[cfg(feature = "asynchronous")] - let event = - runtime.block_on(async { r.read_namespaced_event(&mut buf, &mut ns_buf).await }); - - let e = match event { + let e = match r.read_namespaced_event(&mut buf, &mut ns_buf) { Ok((Some(ns), Start(e))) => { assert_eq!(&ns[..], b"urn:example:i"); assert_eq!(e.name(), b"e"); @@ -444,13 +335,7 @@ fn test_default_ns_shadowing_expanded() { } // virtual - #[cfg(not(feature = "asynchronous"))] - let event = r.read_namespaced_event(&mut buf, &mut ns_buf); - - #[cfg(feature = "asynchronous")] - let event = runtime.block_on(async { r.read_namespaced_event(&mut buf, &mut ns_buf).await }); - - match event { + match r.read_namespaced_event(&mut buf, &mut ns_buf) { Ok((Some(ns), End(e))) => { assert_eq!(&ns[..], b"urn:example:i"); assert_eq!(e.name(), b"e"); @@ -459,13 +344,7 @@ fn test_default_ns_shadowing_expanded() { } // - #[cfg(not(feature = "asynchronous"))] - let event = r.read_namespaced_event(&mut buf, &mut ns_buf); - - #[cfg(feature = "asynchronous")] - let event = runtime.block_on(async { r.read_namespaced_event(&mut buf, &mut ns_buf).await }); - - match event { + match r.read_namespaced_event(&mut buf, &mut ns_buf) { Ok((Some(ns), End(e))) => { assert_eq!(&ns[..], b"urn:example:o"); assert_eq!(e.name(), b"e"); @@ -481,17 +360,9 @@ fn test_koi8_r_encoding() { let mut r = Reader::from_reader(src as &[u8]); r.trim_text(true).expand_empty_elements(false); let mut buf = Vec::new(); - #[cfg(feature = "asynchronous")] - let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); loop { - #[cfg(not(feature = "asynchronous"))] - let event = r.read_event(&mut buf); - - #[cfg(feature = "asynchronous")] - let event = runtime.block_on(async { r.read_event(&mut buf).await }); - - match event { + match r.read_event(&mut buf) { Ok(Text(e)) => { e.unescape_and_decode(&r).unwrap(); } @@ -509,24 +380,15 @@ fn fuzz_53() { let cursor = Cursor::new(data); let mut reader = Reader::from_reader(cursor); let mut buf = vec![]; - #[cfg(feature = "asynchronous")] - let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); loop { - #[cfg(not(feature = "asynchronous"))] - let event = reader.read_event(&mut buf); - - #[cfg(feature = "asynchronous")] - let event = runtime.block_on(async { reader.read_event(&mut buf).await }); - - match event { + match reader.read_event(&mut buf) { Ok(quick_xml::events::Event::Eof) | Err(..) => break, _ => buf.clear(), } } } -#[cfg(not(feature = "asynchronous"))] #[test] fn test_issue94() { let data = br#" @@ -535,17 +397,9 @@ fn test_issue94() { let mut reader = Reader::from_reader(&data[..]); reader.trim_text(true); let mut buf = vec![]; - #[cfg(feature = "asynchronous")] - let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); loop { - #[cfg(not(feature = "asynchronous"))] - let event = reader.read_event(&mut buf); - - #[cfg(feature = "asynchronous")] - let event = runtime.block_on(async { reader.read_event(&mut buf).await }); - - match event { + match reader.read_event(&mut buf) { Ok(quick_xml::events::Event::Eof) | Err(..) => break, _ => buf.clear(), } @@ -561,17 +415,9 @@ fn fuzz_101() { let cursor = Cursor::new(data); let mut reader = Reader::from_reader(cursor); let mut buf = vec![]; - #[cfg(feature = "asynchronous")] - let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); loop { - #[cfg(not(feature = "asynchronous"))] - let event = reader.read_event(&mut buf); - - #[cfg(feature = "asynchronous")] - let event = runtime.block_on(async { reader.read_event(&mut buf).await }); - - match event { + match reader.read_event(&mut buf) { Ok(Start(ref e)) | Ok(Empty(ref e)) => { if e.unescaped().is_err() { break; @@ -602,30 +448,15 @@ fn test_default_namespace() { // let mut buf = Vec::new(); let mut ns_buf = Vec::new(); - #[cfg(feature = "asynchronous")] - let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); - #[cfg(not(feature = "asynchronous"))] - let event = r.read_namespaced_event(&mut buf, &mut ns_buf); - - #[cfg(feature = "asynchronous")] - let event = runtime.block_on(async { r.read_namespaced_event(&mut buf, &mut ns_buf).await }); - - if let Ok((None, Start(_))) = event { + if let Ok((None, Start(_))) = r.read_namespaced_event(&mut buf, &mut ns_buf) { } else { panic!("expecting outer start element with no namespace"); } // { - #[cfg(not(feature = "asynchronous"))] - let event = r.read_namespaced_event(&mut buf, &mut ns_buf); - - #[cfg(feature = "asynchronous")] - let event = - runtime.block_on(async { r.read_namespaced_event(&mut buf, &mut ns_buf).await }); - - let event = match event { + let event = match r.read_namespaced_event(&mut buf, &mut ns_buf) { Ok((Some(b"www1"), Start(event))) => event, Ok((Some(_), Start(_))) => panic!("expecting namespace to resolve to 'www1'"), _ => panic!("expecting namespace resolution"), @@ -643,13 +474,7 @@ fn test_default_namespace() { } // - #[cfg(not(feature = "asynchronous"))] - let event = r.read_namespaced_event(&mut buf, &mut ns_buf); - - #[cfg(feature = "asynchronous")] - let event = runtime.block_on(async { r.read_namespaced_event(&mut buf, &mut ns_buf).await }); - - match event { + match r.read_namespaced_event(&mut buf, &mut ns_buf) { Ok((Some(b"www1"), End(_))) => (), Ok((Some(_), End(_))) => panic!("expecting namespace to resolve to 'www1'"), _ => panic!("expecting namespace resolution"), @@ -657,14 +482,7 @@ fn test_default_namespace() { // very important: a should not be in any namespace. The default namespace only applies to // the sub-document it is defined on. - - #[cfg(not(feature = "asynchronous"))] - let event = r.read_namespaced_event(&mut buf, &mut ns_buf); - - #[cfg(feature = "asynchronous")] - let event = runtime.block_on(async { r.read_namespaced_event(&mut buf, &mut ns_buf).await }); - - if let Ok((None, End(_))) = event { + if let Ok((None, End(_))) = r.read_namespaced_event(&mut buf, &mut ns_buf) { } else { panic!("expecting outer end element with no namespace"); } diff --git a/tests/test_async.rs b/tests/test_async.rs new file mode 100644 index 00000000..c1b6d975 --- /dev/null +++ b/tests/test_async.rs @@ -0,0 +1,1187 @@ +#[cfg(feature = "asynchronous")] +use quick_xml::events::attributes::Attribute; +#[cfg(feature = "asynchronous")] +use quick_xml::events::Event::*; +#[cfg(feature = "asynchronous")] +use quick_xml::AsyncReader; +#[cfg(feature = "asynchronous")] +use std::borrow::Cow; +#[cfg(feature = "asynchronous")] +use std::io::Cursor; + +#[cfg(feature = "asynchronous")] +#[tokio::test] +async fn test_sample() { + let src: &[u8] = include_bytes!("sample_rss.xml"); + let mut buf = Vec::new(); + let mut r = AsyncReader::from_reader(src); + let mut count = 0; + + loop { + match r.read_event(&mut buf).await.unwrap() { + Start(_) => count += 1, + Decl(e) => println!("{:?}", e.version()), + Eof => break, + _ => (), + } + buf.clear(); + } + println!("{}", count); +} + +#[cfg(feature = "asynchronous")] +#[tokio::test] +async fn test_sample_async() { + let src: &[u8] = include_bytes!("sample_rss.xml"); + let mut buf = Vec::new(); + let mut r = AsyncReader::from_reader(src); + let mut count = 0; + + loop { + match r.read_event(&mut buf).await.unwrap() { + Start(_) => count += 1, + Decl(e) => println!("{:?}", e.version()), + Eof => break, + _ => (), + } + buf.clear(); + } + println!("{}", count); +} + +#[cfg(feature = "asynchronous")] +#[tokio::test] +async fn test_attributes_empty() { + let src = b""; + let mut r = AsyncReader::from_reader(src as &[u8]); + r.trim_text(true).expand_empty_elements(false); + let mut buf = Vec::new(); + + match r.read_event(&mut buf).await { + Ok(Empty(e)) => { + let mut atts = e.attributes(); + match atts.next() { + Some(Ok(Attribute { + key: b"att1", + value: Cow::Borrowed(b"a"), + })) => (), + e => panic!("Expecting att1='a' attribute, found {:?}", e), + } + match atts.next() { + Some(Ok(Attribute { + key: b"att2", + value: Cow::Borrowed(b"b"), + })) => (), + e => panic!("Expecting att2='b' attribute, found {:?}", e), + } + match atts.next() { + None => (), + e => panic!("Expecting None, found {:?}", e), + } + } + e => panic!("Expecting Empty event, got {:?}", e), + } +} + +#[cfg(feature = "asynchronous")] +#[tokio::test] +async fn test_attribute_equal() { + let src = b""; + let mut r = AsyncReader::from_reader(src as &[u8]); + r.trim_text(true).expand_empty_elements(false); + let mut buf = Vec::new(); + + match r.read_event(&mut buf).await { + Ok(Empty(e)) => { + let mut atts = e.attributes(); + match atts.next() { + Some(Ok(Attribute { + key: b"att1", + value: Cow::Borrowed(b"a=b"), + })) => (), + e => panic!("Expecting att1=\"a=b\" attribute, found {:?}", e), + } + match atts.next() { + None => (), + e => panic!("Expecting None, found {:?}", e), + } + } + e => panic!("Expecting Empty event, got {:?}", e), + } +} + +#[cfg(feature = "asynchronous")] +#[tokio::test] +async fn test_comment_starting_with_gt() { + let src = b"-->"; + let mut r = AsyncReader::from_reader(src as &[u8]); + r.trim_text(true).expand_empty_elements(false); + let mut buf = Vec::new(); + + loop { + match r.read_event(&mut buf).await { + Ok(Comment(ref e)) if &**e == b">" => break, + Ok(Eof) => panic!("Expecting Comment"), + _ => (), + } + } +} + +/// Single empty element with qualified attributes. +/// Empty element expansion: disabled +/// The code path for namespace handling is slightly different for `Empty` vs. `Start+End`. +#[cfg(feature = "asynchronous")] +#[tokio::test] +async fn test_attributes_empty_ns() { + let src = b""; + + let mut r = AsyncReader::from_reader(src as &[u8]); + r.trim_text(true).expand_empty_elements(false); + let mut buf = Vec::new(); + let mut ns_buf = Vec::new(); + + let e = match r.read_namespaced_event(&mut buf, &mut ns_buf).await { + Ok((None, Empty(e))) => e, + e => panic!("Expecting Empty event, got {:?}", e), + }; + + let mut atts = e + .attributes() + .map(|ar| ar.expect("Expecting attribute parsing to succeed.")) + // we don't care about xmlns attributes for this test + .filter(|kv| !kv.key.starts_with(b"xmlns")) + .map(|Attribute { key: name, value }| { + let (opt_ns, local_name) = r.attribute_namespace(name, &ns_buf); + (opt_ns, local_name, value) + }); + match atts.next() { + Some((None, b"att1", Cow::Borrowed(b"a"))) => (), + e => panic!("Expecting att1='a' attribute, found {:?}", e), + } + match atts.next() { + Some((Some(ns), b"att2", Cow::Borrowed(b"b"))) => { + assert_eq!(&ns[..], b"urn:example:r"); + } + e => panic!( + "Expecting {{urn:example:r}}att2='b' attribute, found {:?}", + e + ), + } + match atts.next() { + None => (), + e => panic!("Expecting None, found {:?}", e), + } +} + +/// Single empty element with qualified attributes. +/// Empty element expansion: enabled +/// The code path for namespace handling is slightly different for `Empty` vs. `Start+End`. +#[cfg(feature = "asynchronous")] +#[tokio::test] +async fn test_attributes_empty_ns_expanded() { + let src = b""; + + let mut r = AsyncReader::from_reader(src as &[u8]); + r.trim_text(true).expand_empty_elements(true); + let mut buf = Vec::new(); + let mut ns_buf = Vec::new(); + + { + let e = match r.read_namespaced_event(&mut buf, &mut ns_buf).await { + Ok((None, Start(e))) => e, + e => panic!("Expecting Empty event, got {:?}", e), + }; + + let mut atts = e + .attributes() + .map(|ar| ar.expect("Expecting attribute parsing to succeed.")) + // we don't care about xmlns attributes for this test + .filter(|kv| !kv.key.starts_with(b"xmlns")) + .map(|Attribute { key: name, value }| { + let (opt_ns, local_name) = r.attribute_namespace(name, &ns_buf); + (opt_ns, local_name, value) + }); + match atts.next() { + Some((None, b"att1", Cow::Borrowed(b"a"))) => (), + e => panic!("Expecting att1='a' attribute, found {:?}", e), + } + match atts.next() { + Some((Some(ns), b"att2", Cow::Borrowed(b"b"))) => { + assert_eq!(&ns[..], b"urn:example:r"); + } + e => panic!( + "Expecting {{urn:example:r}}att2='b' attribute, found {:?}", + e + ), + } + match atts.next() { + None => (), + e => panic!("Expecting None, found {:?}", e), + } + } + + match r.read_namespaced_event(&mut buf, &mut ns_buf).await { + Ok((None, End(e))) => assert_eq!(b"a", e.name()), + e => panic!("Expecting End event, got {:?}", e), + } +} + +#[cfg(feature = "asynchronous")] +#[tokio::test] +async fn test_default_ns_shadowing_empty() { + let src = b""; + + let mut r = AsyncReader::from_reader(src as &[u8]); + r.trim_text(true).expand_empty_elements(false); + let mut buf = Vec::new(); + let mut ns_buf = Vec::new(); + + // + { + match r.read_namespaced_event(&mut buf, &mut ns_buf).await { + Ok((Some(ns), Start(e))) => { + assert_eq!(&ns[..], b"urn:example:o"); + assert_eq!(e.name(), b"e"); + } + e => panic!("Expected Start event (), got {:?}", e), + } + } + + // + { + let e = match r.read_namespaced_event(&mut buf, &mut ns_buf).await { + Ok((Some(ns), Empty(e))) => { + assert_eq!(::std::str::from_utf8(ns).unwrap(), "urn:example:i"); + assert_eq!(e.name(), b"e"); + e + } + e => panic!("Expecting Empty event, got {:?}", e), + }; + + let mut atts = e + .attributes() + .map(|ar| ar.expect("Expecting attribute parsing to succeed.")) + // we don't care about xmlns attributes for this test + .filter(|kv| !kv.key.starts_with(b"xmlns")) + .map(|Attribute { key: name, value }| { + let (opt_ns, local_name) = r.attribute_namespace(name, &ns_buf); + (opt_ns, local_name, value) + }); + // the attribute should _not_ have a namespace name. The default namespace does not + // apply to attributes. + match atts.next() { + Some((None, b"att1", Cow::Borrowed(b"a"))) => (), + e => panic!("Expecting att1='a' attribute, found {:?}", e), + } + match atts.next() { + None => (), + e => panic!("Expecting None, found {:?}", e), + } + } + + // + match r.read_namespaced_event(&mut buf, &mut ns_buf).await { + Ok((Some(ns), End(e))) => { + assert_eq!(&ns[..], b"urn:example:o"); + assert_eq!(e.name(), b"e"); + } + e => panic!("Expected End event (), got {:?}", e), + } +} + +#[cfg(feature = "asynchronous")] +#[tokio::test] +async fn test_default_ns_shadowing_expanded() { + let src = b""; + + let mut r = AsyncReader::from_reader(src as &[u8]); + r.trim_text(true).expand_empty_elements(true); + let mut buf = Vec::new(); + let mut ns_buf = Vec::new(); + + // + { + match r.read_namespaced_event(&mut buf, &mut ns_buf).await { + Ok((Some(ns), Start(e))) => { + assert_eq!(&ns[..], b"urn:example:o"); + assert_eq!(e.name(), b"e"); + } + e => panic!("Expected Start event (), got {:?}", e), + } + } + buf.clear(); + + // + { + let e = match r.read_namespaced_event(&mut buf, &mut ns_buf).await { + Ok((Some(ns), Start(e))) => { + assert_eq!(&ns[..], b"urn:example:i"); + assert_eq!(e.name(), b"e"); + e + } + e => panic!("Expecting Start event (), got {:?}", e), + }; + let mut atts = e + .attributes() + .map(|ar| ar.expect("Expecting attribute parsing to succeed.")) + // we don't care about xmlns attributes for this test + .filter(|kv| !kv.key.starts_with(b"xmlns")) + .map(|Attribute { key: name, value }| { + let (opt_ns, local_name) = r.attribute_namespace(name, &ns_buf); + (opt_ns, local_name, value) + }); + // the attribute should _not_ have a namespace name. The default namespace does not + // apply to attributes. + match atts.next() { + Some((None, b"att1", Cow::Borrowed(b"a"))) => (), + e => panic!("Expecting att1='a' attribute, found {:?}", e), + } + match atts.next() { + None => (), + e => panic!("Expecting None, found {:?}", e), + } + } + + // virtual + match r.read_namespaced_event(&mut buf, &mut ns_buf).await { + Ok((Some(ns), End(e))) => { + assert_eq!(&ns[..], b"urn:example:i"); + assert_eq!(e.name(), b"e"); + } + e => panic!("Expected End event (), got {:?}", e), + } + + // + match r.read_namespaced_event(&mut buf, &mut ns_buf).await { + Ok((Some(ns), End(e))) => { + assert_eq!(&ns[..], b"urn:example:o"); + assert_eq!(e.name(), b"e"); + } + e => panic!("Expected End event (), got {:?}", e), + } +} + +#[cfg(feature = "asynchronous")] +#[tokio::test] +#[cfg(feature = "encoding_rs")] +async fn test_koi8_r_encoding() { + let src: &[u8] = include_bytes!("documents/opennews_all.rss"); + let mut r = AsyncReader::from_reader(src as &[u8]); + r.trim_text(true).expand_empty_elements(false); + let mut buf = Vec::new(); + + loop { + match r.read_event(&mut buf).await { + Ok(Text(e)) => { + e.unescape_and_decode(&r).unwrap(); + } + Ok(Eof) => break, + _ => (), + } + } +} + +#[cfg(feature = "asynchronous")] +#[tokio::test] +async fn fuzz_53() { + let data: &[u8] = b"\xe9\x00\x00\x00\x00\x00\x00\x00\x00\ +\x00\x00\x00\x00\n(\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\ +\x00<>\x00\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00<<\x00\x00\x00"; + let cursor = Cursor::new(data); + let mut reader = AsyncReader::from_reader(cursor); + let mut buf = vec![]; + + loop { + match reader.read_event(&mut buf).await { + Ok(quick_xml::events::Event::Eof) | Err(..) => break, + _ => buf.clear(), + } + } +} + +#[cfg(feature = "asynchronous")] +#[tokio::test] +async fn test_issue94() { + let data = br#" + +"#; + let mut reader = AsyncReader::from_reader(&data[..]); + reader.trim_text(true); + let mut buf = vec![]; + + loop { + match reader.read_event(&mut buf).await { + Ok(quick_xml::events::Event::Eof) | Err(..) => break, + _ => buf.clear(), + } + buf.clear(); + } +} + +#[cfg(feature = "asynchronous")] +#[tokio::test] +async fn fuzz_101() { + let data: &[u8] = b"\x00\x00<\x00\x00\x0a>�?#\x0a413518\ + #\x0a\x0a\x0a;<:<)(<:\x0a\x0a\x0a\x0a;<:\x0a\x0a\ + <:\x0a\x0a\x0a\x0a\x0a<\x00*\x00\x00\x00\x00"; + let cursor = Cursor::new(data); + let mut reader = AsyncReader::from_reader(cursor); + let mut buf = vec![]; + + loop { + match reader.read_event(&mut buf).await { + Ok(Start(ref e)) | Ok(Empty(ref e)) => { + if e.unescaped().is_err() { + break; + } + for a in e.attributes() { + if a.ok().map_or(true, |a| a.unescaped_value().is_err()) { + break; + } + } + } + Ok(Text(ref e)) => { + if e.unescaped().is_err() { + break; + } + } + Ok(Eof) | Err(..) => break, + _ => (), + } + buf.clear(); + } +} + +#[cfg(feature = "asynchronous")] +#[tokio::test] +async fn test_default_namespace() { + let mut r = AsyncReader::from_str(""); + r.trim_text(true); + + // + let mut buf = Vec::new(); + let mut ns_buf = Vec::new(); + + if let Ok((None, Start(_))) = r.read_namespaced_event(&mut buf, &mut ns_buf).await { + } else { + panic!("expecting outer start element with no namespace"); + } + + // + { + let event = match r.read_namespaced_event(&mut buf, &mut ns_buf).await { + Ok((Some(b"www1"), Start(event))) => event, + Ok((Some(_), Start(_))) => panic!("expecting namespace to resolve to 'www1'"), + _ => panic!("expecting namespace resolution"), + }; + + //We check if the resolve_namespace method also work properly + match r.event_namespace(event.name(), &mut ns_buf) { + (Some(b"www1"), _) => (), + (Some(_), _) => panic!("expecting namespace to resolve to 'www1'"), + ns => panic!( + "expecting namespace resolution by the resolve_nemespace method {:?}", + ns + ), + } + } + + // + match r.read_namespaced_event(&mut buf, &mut ns_buf).await { + Ok((Some(b"www1"), End(_))) => (), + Ok((Some(_), End(_))) => panic!("expecting namespace to resolve to 'www1'"), + _ => panic!("expecting namespace resolution"), + } + + // very important: a should not be in any namespace. The default namespace only applies to + // the sub-document it is defined on. + + if let Ok((None, End(_))) = r.read_namespaced_event(&mut buf, &mut ns_buf).await { + } else { + panic!("expecting outer end element with no namespace"); + } +} + +// #[cfg(feature = "asynchronous")] +// #[cfg(feature = "serialize")] +// #[tokio::test] +// async fn line_score() { +// #[derive(Debug, PartialEq, Deserialize)] +// struct LineScoreData { +// game_pk: u32, +// game_type: char, +// venue: String, +// venue_w_chan_loc: String, +// venue_id: u32, +// time: String, +// time_zone: String, +// ampm: String, +// home_team_id: u32, +// home_team_city: String, +// home_team_name: String, +// home_league_id: u32, +// away_team_id: u32, +// away_team_city: String, +// away_team_name: String, +// away_league_id: u32, +// #[serde(rename = "linescore", skip_serializing)] +// innings: Vec, +// } +// #[derive(Debug, PartialEq, Deserialize)] +// struct LineScore { +// #[serde(rename = "away_inning_runs")] +// away_runs: u32, +// #[serde(rename = "home_inning_runs")] +// //needs to be an Option, since home team doesn't always bat. +// home_runs: Option, +// // Keeping the inning as a string, since we'll need it to construct URLs later +// inning: String, +// } + +// let res: LineScoreData = quick_xml::de::from_str(include_str!("linescore.xml")).unwrap(); + +// let expected = LineScoreData { +// game_pk: 239575, +// game_type: 'R', +// venue: "Generic".to_owned(), +// venue_w_chan_loc: "USNY0996".to_owned(), +// venue_id: 401, +// time: "Gm 2".to_owned(), +// time_zone: "ET".to_owned(), +// ampm: "AM".to_owned(), +// home_team_id: 611, +// home_team_city: "DSL Dodgers".to_owned(), +// home_team_name: "DSL Dodgers".to_owned(), +// home_league_id: 130, +// away_team_id: 604, +// away_team_city: "DSL Blue Jays1".to_owned(), +// away_team_name: "DSL Blue Jays1".to_owned(), +// away_league_id: 130, +// innings: vec![ +// LineScore { +// away_runs: 1, +// home_runs: Some(0), +// inning: "1".to_owned(), +// }, +// LineScore { +// away_runs: 0, +// home_runs: Some(0), +// inning: "2".to_owned(), +// }, +// LineScore { +// away_runs: 1, +// home_runs: Some(1), +// inning: "3".to_owned(), +// }, +// LineScore { +// away_runs: 2, +// home_runs: Some(0), +// inning: "4".to_owned(), +// }, +// LineScore { +// away_runs: 0, +// home_runs: Some(0), +// inning: "5".to_owned(), +// }, +// LineScore { +// away_runs: 0, +// home_runs: Some(0), +// inning: "6".to_owned(), +// }, +// LineScore { +// away_runs: 0, +// home_runs: Some(0), +// inning: "7".to_owned(), +// }, +// ], +// }; +// assert_eq!(res, expected); +// } + +// #[cfg(feature = "serialize")] +// #[test] +// fn players() { +// #[derive(PartialEq, Deserialize, Serialize, Debug)] +// struct Game { +// #[serde(rename = "team")] +// teams: Vec, +// //umpires: Umpires +// } + +// #[derive(PartialEq, Deserialize, Serialize, Debug)] +// struct Team { +// #[serde(rename = "type")] +// home_away: HomeAway, +// id: String, +// name: String, +// #[serde(rename = "player")] +// players: Vec, +// #[serde(rename = "coach")] +// coaches: Vec, +// } + +// #[derive(PartialEq, Deserialize, Serialize, Debug)] +// enum HomeAway { +// #[serde(rename = "home")] +// Home, +// #[serde(rename = "away")] +// Away, +// } + +// #[derive(PartialEq, Deserialize, Serialize, Debug, Clone)] +// struct Player { +// id: u32, +// #[serde(rename = "first")] +// name_first: String, +// #[serde(rename = "last")] +// name_last: String, +// game_position: Option, +// bat_order: Option, +// position: String, +// } + +// #[derive(PartialEq, Deserialize, Serialize, Debug)] +// struct Coach { +// position: String, +// #[serde(rename = "first")] +// name_first: String, +// #[serde(rename = "last")] +// name_last: String, +// id: u32, +// } + +// let res: Game = quick_xml::de::from_str(include_str!("players.xml")).unwrap(); + +// let expected = Game { +// teams: vec![ +// Team { +// home_away: HomeAway::Away, +// id: "CIN".to_owned(), +// name: "Cincinnati Reds".to_owned(), +// players: vec![ +// Player { +// id: 115135, +// name_first: "Ken".to_owned(), +// name_last: "Griffey".to_owned(), +// game_position: Some("RF".to_owned()), +// bat_order: Some(3), +// position: "RF".to_owned(), +// }, +// Player { +// id: 115608, +// name_first: "Scott".to_owned(), +// name_last: "Hatteberg".to_owned(), +// game_position: None, +// bat_order: None, +// position: "1B".to_owned(), +// }, +// Player { +// id: 118967, +// name_first: "Kent".to_owned(), +// name_last: "Mercker".to_owned(), +// game_position: None, +// bat_order: None, +// position: "P".to_owned(), +// }, +// Player { +// id: 136460, +// name_first: "Alex".to_owned(), +// name_last: "Gonzalez".to_owned(), +// game_position: None, +// bat_order: None, +// position: "SS".to_owned(), +// }, +// Player { +// id: 150020, +// name_first: "Jerry".to_owned(), +// name_last: "Hairston".to_owned(), +// game_position: None, +// bat_order: None, +// position: "SS".to_owned(), +// }, +// Player { +// id: 150188, +// name_first: "Francisco".to_owned(), +// name_last: "Cordero".to_owned(), +// game_position: None, +// bat_order: None, +// position: "P".to_owned(), +// }, +// Player { +// id: 150221, +// name_first: "Mike".to_owned(), +// name_last: "Lincoln".to_owned(), +// game_position: None, +// bat_order: None, +// position: "P".to_owned(), +// }, +// Player { +// id: 150319, +// name_first: "Josh".to_owned(), +// name_last: "Fogg".to_owned(), +// game_position: None, +// bat_order: None, +// position: "P".to_owned(), +// }, +// Player { +// id: 150472, +// name_first: "Ryan".to_owned(), +// name_last: "Freel".to_owned(), +// game_position: Some("LF".to_owned()), +// bat_order: Some(2), +// position: "CF".to_owned(), +// }, +// Player { +// id: 276520, +// name_first: "Bronson".to_owned(), +// name_last: "Arroyo".to_owned(), +// game_position: None, +// bat_order: None, +// position: "P".to_owned(), +// }, +// Player { +// id: 279571, +// name_first: "Matt".to_owned(), +// name_last: "Belisle".to_owned(), +// game_position: Some("P".to_owned()), +// bat_order: Some(9), +// position: "P".to_owned(), +// }, +// Player { +// id: 279913, +// name_first: "Corey".to_owned(), +// name_last: "Patterson".to_owned(), +// game_position: Some("CF".to_owned()), +// bat_order: Some(1), +// position: "CF".to_owned(), +// }, +// Player { +// id: 346793, +// name_first: "Jeremy".to_owned(), +// name_last: "Affeldt".to_owned(), +// game_position: None, +// bat_order: None, +// position: "P".to_owned(), +// }, +// Player { +// id: 408252, +// name_first: "Brandon".to_owned(), +// name_last: "Phillips".to_owned(), +// game_position: Some("2B".to_owned()), +// bat_order: Some(4), +// position: "2B".to_owned(), +// }, +// Player { +// id: 421685, +// name_first: "Aaron".to_owned(), +// name_last: "Harang".to_owned(), +// game_position: None, +// bat_order: None, +// position: "P".to_owned(), +// }, +// Player { +// id: 424325, +// name_first: "David".to_owned(), +// name_last: "Ross".to_owned(), +// game_position: Some("C".to_owned()), +// bat_order: Some(8), +// position: "C".to_owned(), +// }, +// Player { +// id: 429665, +// name_first: "Edwin".to_owned(), +// name_last: "Encarnacion".to_owned(), +// game_position: Some("3B".to_owned()), +// bat_order: Some(6), +// position: "3B".to_owned(), +// }, +// Player { +// id: 433898, +// name_first: "Jeff".to_owned(), +// name_last: "Keppinger".to_owned(), +// game_position: Some("SS".to_owned()), +// bat_order: Some(7), +// position: "SS".to_owned(), +// }, +// Player { +// id: 435538, +// name_first: "Bill".to_owned(), +// name_last: "Bray".to_owned(), +// game_position: None, +// bat_order: None, +// position: "P".to_owned(), +// }, +// Player { +// id: 440361, +// name_first: "Norris".to_owned(), +// name_last: "Hopper".to_owned(), +// game_position: None, +// bat_order: None, +// position: "O".to_owned(), +// }, +// Player { +// id: 450172, +// name_first: "Edinson".to_owned(), +// name_last: "Volquez".to_owned(), +// game_position: None, +// bat_order: None, +// position: "P".to_owned(), +// }, +// Player { +// id: 454537, +// name_first: "Jared".to_owned(), +// name_last: "Burton".to_owned(), +// game_position: None, +// bat_order: None, +// position: "P".to_owned(), +// }, +// Player { +// id: 455751, +// name_first: "Bobby".to_owned(), +// name_last: "Livingston".to_owned(), +// game_position: None, +// bat_order: None, +// position: "P".to_owned(), +// }, +// Player { +// id: 456501, +// name_first: "Johnny".to_owned(), +// name_last: "Cueto".to_owned(), +// game_position: None, +// bat_order: None, +// position: "P".to_owned(), +// }, +// Player { +// id: 458015, +// name_first: "Joey".to_owned(), +// name_last: "Votto".to_owned(), +// game_position: Some("1B".to_owned()), +// bat_order: Some(5), +// position: "1B".to_owned(), +// }, +// ], +// coaches: vec![ +// Coach { +// position: "manager".to_owned(), +// name_first: "Dusty".to_owned(), +// name_last: "Baker".to_owned(), +// id: 110481, +// }, +// Coach { +// position: "batting_coach".to_owned(), +// name_first: "Brook".to_owned(), +// name_last: "Jacoby".to_owned(), +// id: 116461, +// }, +// Coach { +// position: "pitching_coach".to_owned(), +// name_first: "Dick".to_owned(), +// name_last: "Pole".to_owned(), +// id: 120649, +// }, +// Coach { +// position: "first_base_coach".to_owned(), +// name_first: "Billy".to_owned(), +// name_last: "Hatcher".to_owned(), +// id: 115602, +// }, +// Coach { +// position: "third_base_coach".to_owned(), +// name_first: "Mark".to_owned(), +// name_last: "Berry".to_owned(), +// id: 427028, +// }, +// Coach { +// position: "bench_coach".to_owned(), +// name_first: "Chris".to_owned(), +// name_last: "Speier".to_owned(), +// id: 122573, +// }, +// Coach { +// position: "bullpen_coach".to_owned(), +// name_first: "Juan".to_owned(), +// name_last: "Lopez".to_owned(), +// id: 427306, +// }, +// Coach { +// position: "bullpen_catcher".to_owned(), +// name_first: "Mike".to_owned(), +// name_last: "Stefanski".to_owned(), +// id: 150464, +// }, +// ], +// }, +// Team { +// home_away: HomeAway::Home, +// id: "NYM".to_owned(), +// name: "New York Mets".to_owned(), +// players: vec![ +// Player { +// id: 110189, +// name_first: "Moises".to_owned(), +// name_last: "Alou".to_owned(), +// game_position: Some("LF".to_owned()), +// bat_order: Some(6), +// position: "LF".to_owned(), +// }, +// Player { +// id: 112116, +// name_first: "Luis".to_owned(), +// name_last: "Castillo".to_owned(), +// game_position: Some("2B".to_owned()), +// bat_order: Some(2), +// position: "2B".to_owned(), +// }, +// Player { +// id: 113232, +// name_first: "Carlos".to_owned(), +// name_last: "Delgado".to_owned(), +// game_position: Some("1B".to_owned()), +// bat_order: Some(7), +// position: "1B".to_owned(), +// }, +// Player { +// id: 113702, +// name_first: "Damion".to_owned(), +// name_last: "Easley".to_owned(), +// game_position: None, +// bat_order: None, +// position: "2B".to_owned(), +// }, +// Player { +// id: 118377, +// name_first: "Pedro".to_owned(), +// name_last: "Martinez".to_owned(), +// game_position: None, +// bat_order: None, +// position: "P".to_owned(), +// }, +// Player { +// id: 123790, +// name_first: "Billy".to_owned(), +// name_last: "Wagner".to_owned(), +// game_position: None, +// bat_order: None, +// position: "P".to_owned(), +// }, +// Player { +// id: 133340, +// name_first: "Orlando".to_owned(), +// name_last: "Hernandez".to_owned(), +// game_position: None, +// bat_order: None, +// position: "P".to_owned(), +// }, +// Player { +// id: 135783, +// name_first: "Ramon".to_owned(), +// name_last: "Castro".to_owned(), +// game_position: None, +// bat_order: None, +// position: "C".to_owned(), +// }, +// Player { +// id: 136724, +// name_first: "Marlon".to_owned(), +// name_last: "Anderson".to_owned(), +// game_position: None, +// bat_order: None, +// position: "LF".to_owned(), +// }, +// Player { +// id: 136860, +// name_first: "Carlos".to_owned(), +// name_last: "Beltran".to_owned(), +// game_position: Some("CF".to_owned()), +// bat_order: Some(4), +// position: "CF".to_owned(), +// }, +// Player { +// id: 150411, +// name_first: "Brian".to_owned(), +// name_last: "Schneider".to_owned(), +// game_position: Some("C".to_owned()), +// bat_order: Some(8), +// position: "C".to_owned(), +// }, +// Player { +// id: 276371, +// name_first: "Johan".to_owned(), +// name_last: "Santana".to_owned(), +// game_position: Some("P".to_owned()), +// bat_order: Some(9), +// position: "P".to_owned(), +// }, +// Player { +// id: 277184, +// name_first: "Matt".to_owned(), +// name_last: "Wise".to_owned(), +// game_position: None, +// bat_order: None, +// position: "P".to_owned(), +// }, +// Player { +// id: 346795, +// name_first: "Endy".to_owned(), +// name_last: "Chavez".to_owned(), +// game_position: None, +// bat_order: None, +// position: "RF".to_owned(), +// }, +// Player { +// id: 407901, +// name_first: "Jorge".to_owned(), +// name_last: "Sosa".to_owned(), +// game_position: None, +// bat_order: None, +// position: "P".to_owned(), +// }, +// Player { +// id: 408230, +// name_first: "Pedro".to_owned(), +// name_last: "Feliciano".to_owned(), +// game_position: None, +// bat_order: None, +// position: "P".to_owned(), +// }, +// Player { +// id: 408310, +// name_first: "Aaron".to_owned(), +// name_last: "Heilman".to_owned(), +// game_position: None, +// bat_order: None, +// position: "P".to_owned(), +// }, +// Player { +// id: 408314, +// name_first: "Jose".to_owned(), +// name_last: "Reyes".to_owned(), +// game_position: Some("SS".to_owned()), +// bat_order: Some(1), +// position: "SS".to_owned(), +// }, +// Player { +// id: 425508, +// name_first: "Ryan".to_owned(), +// name_last: "Church".to_owned(), +// game_position: Some("RF".to_owned()), +// bat_order: Some(5), +// position: "RF".to_owned(), +// }, +// Player { +// id: 429720, +// name_first: "John".to_owned(), +// name_last: "Maine".to_owned(), +// game_position: None, +// bat_order: None, +// position: "P".to_owned(), +// }, +// Player { +// id: 431151, +// name_first: "David".to_owned(), +// name_last: "Wright".to_owned(), +// game_position: Some("3B".to_owned()), +// bat_order: Some(3), +// position: "3B".to_owned(), +// }, +// Player { +// id: 434586, +// name_first: "Ambiorix".to_owned(), +// name_last: "Burgos".to_owned(), +// game_position: None, +// bat_order: None, +// position: "P".to_owned(), +// }, +// Player { +// id: 434636, +// name_first: "Angel".to_owned(), +// name_last: "Pagan".to_owned(), +// game_position: None, +// bat_order: None, +// position: "LF".to_owned(), +// }, +// Player { +// id: 450306, +// name_first: "Jason".to_owned(), +// name_last: "Vargas".to_owned(), +// game_position: None, +// bat_order: None, +// position: "P".to_owned(), +// }, +// Player { +// id: 460059, +// name_first: "Mike".to_owned(), +// name_last: "Pelfrey".to_owned(), +// game_position: None, +// bat_order: None, +// position: "P".to_owned(), +// }, +// ], +// coaches: vec![ +// Coach { +// position: "manager".to_owned(), +// name_first: "Willie".to_owned(), +// name_last: "Randolph".to_owned(), +// id: 120927, +// }, +// Coach { +// position: "batting_coach".to_owned(), +// name_first: "Howard".to_owned(), +// name_last: "Johnson".to_owned(), +// id: 116593, +// }, +// Coach { +// position: "pitching_coach".to_owned(), +// name_first: "Rick".to_owned(), +// name_last: "Peterson".to_owned(), +// id: 427395, +// }, +// Coach { +// position: "first_base_coach".to_owned(), +// name_first: "Tom".to_owned(), +// name_last: "Nieto".to_owned(), +// id: 119796, +// }, +// Coach { +// position: "third_base_coach".to_owned(), +// name_first: "Sandy".to_owned(), +// name_last: "Alomar".to_owned(), +// id: 110185, +// }, +// Coach { +// position: "bench_coach".to_owned(), +// name_first: "Jerry".to_owned(), +// name_last: "Manuel".to_owned(), +// id: 118262, +// }, +// Coach { +// position: "bullpen_coach".to_owned(), +// name_first: "Guy".to_owned(), +// name_last: "Conti".to_owned(), +// id: 434699, +// }, +// Coach { +// position: "bullpen_catcher".to_owned(), +// name_first: "Dave".to_owned(), +// name_last: "Racaniello".to_owned(), +// id: 534948, +// }, +// Coach { +// position: "coach".to_owned(), +// name_first: "Sandy".to_owned(), +// name_last: "Alomar".to_owned(), +// id: 110184, +// }, +// Coach { +// position: "coach".to_owned(), +// name_first: "Juan".to_owned(), +// name_last: "Lopez".to_owned(), +// id: 495390, +// }, +// ], +// }, +// ], +// }; + +// assert_eq!(res, expected); +// } diff --git a/tests/unit_tests.rs b/tests/unit_tests.rs index 07491972..9be94f69 100644 --- a/tests/unit_tests.rs +++ b/tests/unit_tests.rs @@ -6,23 +6,12 @@ use quick_xml::events::{BytesDecl, BytesEnd, BytesStart, BytesText, Event}; use quick_xml::{Reader, Result, Writer}; use std::io::Cursor; use std::str::from_utf8; -#[cfg(feature = "asynchronous")] -use tokio::runtime::Runtime; macro_rules! next_eq_name { ($r:expr, $t:tt, $bytes:expr) => { let mut buf = Vec::new(); - #[cfg(not(feature = "asynchronous"))] - let event = $r.read_event(&mut buf); - - #[cfg(feature = "asynchronous")] - let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); - - #[cfg(feature = "asynchronous")] - let event = runtime.block_on(async { $r.read_event(&mut buf).await }); - - match event.unwrap() { + match $r.read_event(&mut buf).unwrap() { $t(ref e) if e.name() == $bytes => (), e => panic!( "expecting {}({:?}), found {:?}", @@ -39,16 +28,7 @@ macro_rules! next_eq_content { ($r:expr, $t:tt, $bytes:expr) => { let mut buf = Vec::new(); - #[cfg(not(feature = "asynchronous"))] - let event = $r.read_event(&mut buf); - - #[cfg(feature = "asynchronous")] - let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); - - #[cfg(feature = "asynchronous")] - let event = runtime.block_on(async { $r.read_event(&mut buf).await }); - - match event.unwrap() { + match $r.read_event(&mut buf).unwrap() { $t(ref e) if &**e == $bytes => (), e => panic!( "expecting {}({:?}), found {:?}", @@ -150,16 +130,7 @@ fn test_xml_decl() { r.trim_text(true); let mut buf = Vec::new(); - #[cfg(not(feature = "asynchronous"))] - let event = r.read_event(&mut buf); - - #[cfg(feature = "asynchronous")] - let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); - - #[cfg(feature = "asynchronous")] - let event = runtime.block_on(async { r.read_event(&mut buf).await }); - - match event.unwrap() { + match r.read_event(&mut buf).unwrap() { Decl(ref e) => { match e.version() { Ok(v) => assert_eq!( @@ -240,20 +211,10 @@ fn test_writer() -> Result<()> { let mut writer = Writer::new(Cursor::new(Vec::new())); let mut buf = Vec::new(); - #[cfg(feature = "asynchronous")] - let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); - loop { - #[cfg(not(feature = "asynchronous"))] - let event = reader.read_event(&mut buf); - - #[cfg(feature = "asynchronous")] - let event = runtime.block_on(async { reader.read_event(&mut buf).await }); - - match event { - Ok(Eof) => break, - Ok(e) => assert!(writer.write_event(e).is_ok()), - Err(e) => panic!("{}", e), + match reader.read_event(&mut buf)? { + Eof => break, + e => assert!(writer.write_event(e).is_ok()), } } @@ -270,20 +231,11 @@ fn test_writer_borrow() -> Result<()> { let mut writer = Writer::new(Cursor::new(Vec::new())); let mut buf = Vec::new(); - #[cfg(feature = "asynchronous")] - let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); - loop { - #[cfg(not(feature = "asynchronous"))] - let event = reader.read_event(&mut buf); - - #[cfg(feature = "asynchronous")] - let event = runtime.block_on(async { reader.read_event(&mut buf).await }); - + let event = reader.read_event(&mut buf)?; match event { - Ok(Eof) => break, - Ok(e) => assert!(writer.write_event(&e).is_ok()), // either `e` or `&e` - Err(e) => panic!("{}", e), + Eof => break, + e => assert!(writer.write_event(&e).is_ok()), // either `e` or `&e` } } @@ -304,20 +256,10 @@ fn test_writer_indent() -> Result<()> { let mut writer = Writer::new_with_indent(Cursor::new(Vec::new()), b' ', 4); let mut buf = Vec::new(); - #[cfg(feature = "asynchronous")] - let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); - loop { - #[cfg(not(feature = "asynchronous"))] - let event = reader.read_event(&mut buf); - - #[cfg(feature = "asynchronous")] - let event = runtime.block_on(async { reader.read_event(&mut buf).await }); - - match event { - Ok(Eof) => break, - Ok(e) => assert!(writer.write_event(e).is_ok()), - Err(e) => panic!("{}", e), + match reader.read_event(&mut buf)? { + Eof => break, + e => assert!(writer.write_event(e).is_ok()), } } @@ -340,20 +282,12 @@ fn test_writer_indent_cdata() -> Result<()> { let mut writer = Writer::new_with_indent(Cursor::new(Vec::new()), b' ', 4); let mut buf = Vec::new(); - #[cfg(feature = "asynchronous")] - let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); - loop { - #[cfg(not(feature = "asynchronous"))] - let event = reader.read_event(&mut buf); - - #[cfg(feature = "asynchronous")] - let event = runtime.block_on(async { reader.read_event(&mut buf).await }); + let event = reader.read_event(&mut buf)?; match event { - Ok(Eof) => break, - Ok(e) => assert!(writer.write_event(e).is_ok()), - Err(e) => panic!("{}", e), + Eof => break, + e => assert!(writer.write_event(e).is_ok()), } } @@ -377,25 +311,16 @@ fn test_write_empty_element_attrs() -> Result<()> { let mut writer = Writer::new(Cursor::new(Vec::new())); let mut buf = Vec::new(); - #[cfg(feature = "asynchronous")] - let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); - loop { - #[cfg(not(feature = "asynchronous"))] - let event = reader.read_event(&mut buf); - - #[cfg(feature = "asynchronous")] - let event = runtime.block_on(async { reader.read_event(&mut buf).await }); - - match event { - Ok(Eof) => break, - Ok(e) => assert!(writer.write_event(e).is_ok()), - Err(e) => panic!("{}", e), + match reader.read_event(&mut buf)? { + Eof => break, + e => assert!(writer.write_event(e).is_ok()), } } let result = writer.into_inner().into_inner(); assert_eq!(String::from_utf8(result).unwrap(), expected); + Ok(()) } @@ -408,19 +333,10 @@ fn test_write_attrs() -> Result<()> { let mut writer = Writer::new(Cursor::new(Vec::new())); let mut buf = Vec::new(); - #[cfg(feature = "asynchronous")] - let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); - loop { - #[cfg(not(feature = "asynchronous"))] - let ev = reader.read_event(&mut buf); - - #[cfg(feature = "asynchronous")] - let ev = runtime.block_on(async { reader.read_event(&mut buf).await }); - - let event = match ev { - Ok(Eof) => break, - Ok(Start(elem)) => { + let event = match reader.read_event(&mut buf)? { + Eof => break, + Start(elem) => { let mut attrs = elem.attributes().collect::>>().unwrap(); attrs.extend_from_slice(&[("a", "b").into(), ("c", "d").into()]); let mut elem = BytesStart::owned(b"copy".to_vec(), 4); @@ -428,9 +344,8 @@ fn test_write_attrs() -> Result<()> { elem.push_attribute(("x", "y\"z")); Start(elem) } - Ok(End(_)) => End(BytesEnd::borrowed(b"copy")), - Ok(e) => e, - Err(e) => panic!("{}", e), + End(_) => End(BytesEnd::borrowed(b"copy")), + e => e, }; assert!(writer.write_event(event).is_ok()); } @@ -526,16 +441,7 @@ fn test_buf_position_err_end_element() { let mut buf = Vec::new(); - #[cfg(not(feature = "asynchronous"))] - let event = r.read_event(&mut buf); - - #[cfg(feature = "asynchronous")] - let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); - - #[cfg(feature = "asynchronous")] - let event = runtime.block_on(async { r.read_event(&mut buf).await }); - - match event { + match r.read_event(&mut buf) { Err(_) if r.buffer_position() == 2 => (), // error at char 2: no opening tag Err(e) => panic!( "expecting buf_pos = 2, found {}, err: {:?}", @@ -556,16 +462,7 @@ fn test_buf_position_err_comment() { let mut buf = Vec::new(); - #[cfg(not(feature = "asynchronous"))] - let event = r.read_event(&mut buf); - - #[cfg(feature = "asynchronous")] - let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); - - #[cfg(feature = "asynchronous")] - let event = runtime.block_on(async { r.read_event(&mut buf).await }); - - match event { + match r.read_event(&mut buf) { Err(_) if r.buffer_position() == 4 => { // error at char 5: no closing --> tag found assert!(true); @@ -584,28 +481,15 @@ fn test_buf_position_err_comment_2_buf() { let mut r = Reader::from_str(" tag found assert!(true); @@ -629,16 +513,7 @@ fn test_buf_position_err_comment_trim_text() { let mut buf = Vec::new(); - #[cfg(not(feature = "asynchronous"))] - let event = r.read_event(&mut buf); - - #[cfg(feature = "asynchronous")] - let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); - - #[cfg(feature = "asynchronous")] - let event = runtime.block_on(async { r.read_event(&mut buf).await }); - - match event { + match r.read_event(&mut buf) { Err(_) if r.buffer_position() == 7 => { // error at char 5: no closing --> tag found assert!(true); @@ -660,27 +535,12 @@ fn test_namespace() { let mut buf = Vec::new(); let mut ns_buf = Vec::new(); - #[cfg(feature = "asynchronous")] - let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); - - #[cfg(not(feature = "asynchronous"))] - let event = r.read_namespaced_event(&mut buf, &mut ns_buf); - - #[cfg(feature = "asynchronous")] - let event = runtime.block_on(async { r.read_namespaced_event(&mut buf, &mut ns_buf).await }); - - if let Ok((None, Start(_))) = event { + if let Ok((None, Start(_))) = r.read_namespaced_event(&mut buf, &mut ns_buf) { } else { assert!(false, "expecting start element with no namespace"); } - #[cfg(not(feature = "asynchronous"))] - let event = r.read_namespaced_event(&mut buf, &mut ns_buf); - - #[cfg(feature = "asynchronous")] - let event = runtime.block_on(async { r.read_namespaced_event(&mut buf, &mut ns_buf).await }); - - if let Ok((Some(a), Start(_))) = event { + if let Ok((Some(a), Start(_))) = r.read_namespaced_event(&mut buf, &mut ns_buf) { if &*a == b"www1" { assert!(true); } else { @@ -698,30 +558,15 @@ fn test_default_namespace() { let mut buf = Vec::new(); let mut ns_buf = Vec::new(); - #[cfg(feature = "asynchronous")] - let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); // - #[cfg(not(feature = "asynchronous"))] - let event = r.read_namespaced_event(&mut buf, &mut ns_buf); - - #[cfg(feature = "asynchronous")] - let event = runtime.block_on(async { r.read_namespaced_event(&mut buf, &mut ns_buf).await }); - - if let Ok((None, Start(_))) = event { + if let Ok((None, Start(_))) = r.read_namespaced_event(&mut buf, &mut ns_buf) { } else { assert!(false, "expecting outer start element with no namespace"); } // - - #[cfg(not(feature = "asynchronous"))] - let event = r.read_namespaced_event(&mut buf, &mut ns_buf); - - #[cfg(feature = "asynchronous")] - let event = runtime.block_on(async { r.read_namespaced_event(&mut buf, &mut ns_buf).await }); - - if let Ok((Some(a), Start(_))) = event { + if let Ok((Some(a), Start(_))) = r.read_namespaced_event(&mut buf, &mut ns_buf) { if &*a == b"www1" { assert!(true); } else { @@ -732,14 +577,7 @@ fn test_default_namespace() { } // - - #[cfg(not(feature = "asynchronous"))] - let event = r.read_namespaced_event(&mut buf, &mut ns_buf); - - #[cfg(feature = "asynchronous")] - let event = runtime.block_on(async { r.read_namespaced_event(&mut buf, &mut ns_buf).await }); - - if let Ok((Some(a), End(_))) = event { + if let Ok((Some(a), End(_))) = r.read_namespaced_event(&mut buf, &mut ns_buf) { if &*a == b"www1" { assert!(true); } else { @@ -751,14 +589,7 @@ fn test_default_namespace() { // very important: a should not be in any namespace. The default namespace only applies to // the sub-document it is defined on. - - #[cfg(not(feature = "asynchronous"))] - let event = r.read_namespaced_event(&mut buf, &mut ns_buf); - - #[cfg(feature = "asynchronous")] - let event = runtime.block_on(async { r.read_namespaced_event(&mut buf, &mut ns_buf).await }); - - if let Ok((None, End(_))) = event { + if let Ok((None, End(_))) = r.read_namespaced_event(&mut buf, &mut ns_buf) { } else { assert!(false, "expecting outer end element with no namespace"); } @@ -771,16 +602,8 @@ fn test_default_namespace_reset() { let mut buf = Vec::new(); let mut ns_buf = Vec::new(); - #[cfg(feature = "asynchronous")] - let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); - #[cfg(not(feature = "asynchronous"))] - let event = r.read_namespaced_event(&mut buf, &mut ns_buf); - - #[cfg(feature = "asynchronous")] - let event = runtime.block_on(async { r.read_namespaced_event(&mut buf, &mut ns_buf).await }); - - if let Ok((Some(a), Start(_))) = event { + if let Ok((Some(a), Start(_))) = r.read_namespaced_event(&mut buf, &mut ns_buf) { assert_eq!( &a[..], b"www1", @@ -790,35 +613,17 @@ fn test_default_namespace_reset() { panic!("expecting outer start element with to resolve to 'www1'"); } - #[cfg(not(feature = "asynchronous"))] - let event = r.read_namespaced_event(&mut buf, &mut ns_buf); - - #[cfg(feature = "asynchronous")] - let event = runtime.block_on(async { r.read_namespaced_event(&mut buf, &mut ns_buf).await }); - - match event { + match r.read_namespaced_event(&mut buf, &mut ns_buf) { Ok((None, Start(_))) => (), e => panic!("expecting inner start element, got {:?}", e), } - #[cfg(not(feature = "asynchronous"))] - let event = r.read_namespaced_event(&mut buf, &mut ns_buf); - - #[cfg(feature = "asynchronous")] - let event = runtime.block_on(async { r.read_namespaced_event(&mut buf, &mut ns_buf).await }); - - if let Ok((None, End(_))) = event { + if let Ok((None, End(_))) = r.read_namespaced_event(&mut buf, &mut ns_buf) { } else { assert!(false, "expecting inner end element"); } - #[cfg(not(feature = "asynchronous"))] - let event = r.read_namespaced_event(&mut buf, &mut ns_buf); - - #[cfg(feature = "asynchronous")] - let event = runtime.block_on(async { r.read_namespaced_event(&mut buf, &mut ns_buf).await }); - - if let Ok((Some(a), End(_))) = event { + if let Ok((Some(a), End(_))) = r.read_namespaced_event(&mut buf, &mut ns_buf) { assert_eq!( &a[..], b"www1", @@ -836,16 +641,7 @@ fn test_escaped_content() { next_eq!(r, Start, b"a"); let mut buf = Vec::new(); - #[cfg(not(feature = "asynchronous"))] - let event = r.read_event(&mut buf); - - #[cfg(feature = "asynchronous")] - let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); - - #[cfg(feature = "asynchronous")] - let event = runtime.block_on(async { r.read_event(&mut buf).await }); - - match event { + match r.read_event(&mut buf) { Ok(Text(e)) => { if &*e != b"<test>" { panic!( @@ -894,25 +690,17 @@ fn test_read_write_roundtrip_results_in_identity() -> Result<()> { reader.trim_text(false).expand_empty_elements(false); let mut writer = Writer::new(Cursor::new(Vec::new())); let mut buf = Vec::new(); - #[cfg(feature = "asynchronous")] - let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); loop { - #[cfg(not(feature = "asynchronous"))] - let event = reader.read_event(&mut buf); - - #[cfg(feature = "asynchronous")] - let event = runtime.block_on(async { reader.read_event(&mut buf).await }); - - match event { - Ok(Eof) => break, - Ok(e) => assert!(writer.write_event(e).is_ok()), - Err(e) => panic!("{}", e), + match reader.read_event(&mut buf)? { + Eof => break, + e => assert!(writer.write_event(e).is_ok()), } } let result = writer.into_inner().into_inner(); assert_eq!(result, input.as_bytes()); + Ok(()) } @@ -931,25 +719,17 @@ fn test_read_write_roundtrip() -> Result<()> { reader.trim_text(false).expand_empty_elements(false); let mut writer = Writer::new(Cursor::new(Vec::new())); let mut buf = Vec::new(); - #[cfg(feature = "asynchronous")] - let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); loop { - #[cfg(not(feature = "asynchronous"))] - let event = reader.read_event(&mut buf); - - #[cfg(feature = "asynchronous")] - let event = runtime.block_on(async { reader.read_event(&mut buf).await }); - - match event { - Ok(Eof) => break, - Ok(e) => assert!(writer.write_event(e).is_ok()), - Err(e) => panic!("{}", e), + match reader.read_event(&mut buf)? { + Eof => break, + e => assert!(writer.write_event(e).is_ok()), } } let result = writer.into_inner().into_inner(); assert_eq!(String::from_utf8(result).unwrap(), input.to_string()); + Ok(()) } @@ -968,31 +748,23 @@ fn test_read_write_roundtrip_escape() -> Result<()> { reader.trim_text(false).expand_empty_elements(false); let mut writer = Writer::new(Cursor::new(Vec::new())); let mut buf = Vec::new(); - #[cfg(feature = "asynchronous")] - let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); loop { - #[cfg(not(feature = "asynchronous"))] - let event = reader.read_event(&mut buf); - - #[cfg(feature = "asynchronous")] - let event = runtime.block_on(async { reader.read_event(&mut buf).await }); - - match event { - Ok(Eof) => break, - Ok(Text(e)) => { + match reader.read_event(&mut buf)? { + Eof => break, + Text(e) => { let t = e.escaped(); assert!(writer .write_event(Event::Text(BytesText::from_escaped(t.to_vec()))) .is_ok()); } - Ok(e) => assert!(writer.write_event(e).is_ok()), - Err(e) => panic!("{}", e), + e => assert!(writer.write_event(e).is_ok()), } } let result = writer.into_inner().into_inner(); assert_eq!(String::from_utf8(result).unwrap(), input.to_string()); + Ok(()) } @@ -1011,31 +783,23 @@ fn test_read_write_roundtrip_escape_text() -> Result<()> { reader.trim_text(false).expand_empty_elements(false); let mut writer = Writer::new(Cursor::new(Vec::new())); let mut buf = Vec::new(); - #[cfg(feature = "asynchronous")] - let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); loop { - #[cfg(not(feature = "asynchronous"))] - let event = reader.read_event(&mut buf); - - #[cfg(feature = "asynchronous")] - let event = runtime.block_on(async { reader.read_event(&mut buf).await }); - - match event { - Ok(Eof) => break, - Ok(Text(e)) => { + match reader.read_event(&mut buf)? { + Eof => break, + Text(e) => { let t = e.unescape_and_decode(&reader).unwrap(); assert!(writer .write_event(Event::Text(BytesText::from_plain_str(&t))) .is_ok()); } - Ok(e) => assert!(writer.write_event(e).is_ok()), - Err(e) => panic!("{}", e), + e => assert!(writer.write_event(e).is_ok()), } } let result = writer.into_inner().into_inner(); assert_eq!(String::from_utf8(result).unwrap(), input.to_string()); + Ok(()) } @@ -1045,16 +809,7 @@ fn test_closing_bracket_in_single_quote_attr() { r.trim_text(true); let mut buf = Vec::new(); - #[cfg(not(feature = "asynchronous"))] - let event = r.read_event(&mut buf); - - #[cfg(feature = "asynchronous")] - let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); - - #[cfg(feature = "asynchronous")] - let event = runtime.block_on(async { r.read_event(&mut buf).await }); - - match event { + match r.read_event(&mut buf) { Ok(Start(e)) => { let mut attrs = e.attributes(); match attrs.next() { @@ -1078,16 +833,7 @@ fn test_closing_bracket_in_double_quote_attr() { r.trim_text(true); let mut buf = Vec::new(); - #[cfg(not(feature = "asynchronous"))] - let event = r.read_event(&mut buf); - - #[cfg(feature = "asynchronous")] - let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); - - #[cfg(feature = "asynchronous")] - let event = runtime.block_on(async { r.read_event(&mut buf).await }); - - match event { + match r.read_event(&mut buf) { Ok(Start(e)) => { let mut attrs = e.attributes(); match attrs.next() { @@ -1111,16 +857,7 @@ fn test_closing_bracket_in_double_quote_mixed() { r.trim_text(true); let mut buf = Vec::new(); - #[cfg(not(feature = "asynchronous"))] - let event = r.read_event(&mut buf); - - #[cfg(feature = "asynchronous")] - let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); - - #[cfg(feature = "asynchronous")] - let event = runtime.block_on(async { r.read_event(&mut buf).await }); - - match event { + match r.read_event(&mut buf) { Ok(Start(e)) => { let mut attrs = e.attributes(); match attrs.next() { @@ -1144,16 +881,7 @@ fn test_closing_bracket_in_single_quote_mixed() { r.trim_text(true); let mut buf = Vec::new(); - #[cfg(not(feature = "asynchronous"))] - let event = r.read_event(&mut buf); - - #[cfg(feature = "asynchronous")] - let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); - - #[cfg(feature = "asynchronous")] - let event = runtime.block_on(async { r.read_event(&mut buf).await }); - - match event { + match r.read_event(&mut buf) { Ok(Start(e)) => { let mut attrs = e.attributes(); match attrs.next() { @@ -1181,17 +909,9 @@ fn test_unescape_and_decode_without_bom_removes_utf8_bom() { let mut txt = Vec::new(); let mut buf = Vec::new(); - #[cfg(feature = "asynchronous")] - let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); loop { - #[cfg(not(feature = "asynchronous"))] - let event = reader.read_event(&mut buf); - - #[cfg(feature = "asynchronous")] - let event = runtime.block_on(async { reader.read_event(&mut buf).await }); - - match event { + match reader.read_event(&mut buf) { Ok(Event::Text(e)) => txt.push(e.unescape_and_decode_without_bom(&reader).unwrap()), Ok(Event::Eof) => break, _ => (), @@ -1208,17 +928,9 @@ fn test_unescape_and_decode_without_bom_removes_utf16be_bom() { let mut txt = Vec::new(); let mut buf = Vec::new(); - #[cfg(feature = "asynchronous")] - let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); loop { - #[cfg(not(feature = "asynchronous"))] - let event = reader.read_event(&mut buf); - - #[cfg(feature = "asynchronous")] - let event = runtime.block_on(async { reader.read_event(&mut buf).await }); - - match event { + match reader.read_event(&mut buf) { Ok(Event::Text(e)) => txt.push(e.unescape_and_decode_without_bom(&mut reader).unwrap()), Ok(Event::Eof) => break, _ => (), @@ -1235,17 +947,9 @@ fn test_unescape_and_decode_without_bom_removes_utf16le_bom() { let mut txt = Vec::new(); let mut buf = Vec::new(); - #[cfg(feature = "asynchronous")] - let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); loop { - #[cfg(not(feature = "asynchronous"))] - let event = reader.read_event(&mut buf); - - #[cfg(feature = "asynchronous")] - let event = runtime.block_on(async { reader.read_event(&mut buf).await }); - - match event { + match reader.read_event(&mut buf) { Ok(Event::Text(e)) => txt.push(e.unescape_and_decode_without_bom(&mut reader).unwrap()), Ok(Event::Eof) => break, _ => (), @@ -1264,17 +968,9 @@ fn test_unescape_and_decode_without_bom_does_nothing_if_no_bom_exists() { let mut txt = Vec::new(); let mut buf = Vec::new(); - #[cfg(feature = "asynchronous")] - let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); loop { - #[cfg(not(feature = "asynchronous"))] - let event = reader.read_event(&mut buf); - - #[cfg(feature = "asynchronous")] - let event = runtime.block_on(async { reader.read_event(&mut buf).await }); - - match event { + match reader.read_event(&mut buf) { Ok(Event::Text(e)) => txt.push(e.unescape_and_decode_without_bom(&mut reader).unwrap()), Ok(Event::Eof) => break, _ => (), diff --git a/tests/unit_tests_async.rs b/tests/unit_tests_async.rs new file mode 100644 index 00000000..f875c0d1 --- /dev/null +++ b/tests/unit_tests_async.rs @@ -0,0 +1,1026 @@ +#[cfg(feature = "asynchronous")] +use quick_xml::events::Event::*; +#[cfg(feature = "asynchronous")] +use quick_xml::events::{BytesDecl, BytesEnd, BytesStart, BytesText, Event}; +#[cfg(feature = "asynchronous")] +use quick_xml::{AsyncReader, Result, Writer}; +#[cfg(feature = "asynchronous")] +use std::io::Cursor; +#[cfg(feature = "asynchronous")] +use std::str::from_utf8; + +#[cfg(feature = "asynchronous")] +macro_rules! next_eq_name { + ($r:expr, $t:tt, $bytes:expr) => { + let mut buf = Vec::new(); + + match $r.read_event(&mut buf).await.unwrap() { + $t(ref e) if e.name() == $bytes => (), + e => panic!( + "expecting {}({:?}), found {:?}", + stringify!($t), + from_utf8($bytes), + e + ), + } + buf.clear(); + }; +} + +#[cfg(feature = "asynchronous")] +macro_rules! next_eq_content { + ($r:expr, $t:tt, $bytes:expr) => { + let mut buf = Vec::new(); + + match $r.read_event(&mut buf).await.unwrap() { + $t(ref e) if &**e == $bytes => (), + e => panic!( + "expecting {}({:?}), found {:?}", + stringify!($t), + from_utf8($bytes), + e + ), + } + buf.clear(); + }; +} + +#[cfg(feature = "asynchronous")] +macro_rules! next_eq { + ($r:expr, Start, $bytes:expr) => (next_eq_name!($r, Start, $bytes);); + ($r:expr, End, $bytes:expr) => (next_eq_name!($r, End, $bytes);); + ($r:expr, Empty, $bytes:expr) => (next_eq_name!($r, Empty, $bytes);); + ($r:expr, Comment, $bytes:expr) => (next_eq_content!($r, Comment, $bytes);); + ($r:expr, Text, $bytes:expr) => (next_eq_content!($r, Text, $bytes);); + ($r:expr, CData, $bytes:expr) => (next_eq_content!($r, CData, $bytes);); + ($r:expr, $t0:tt, $b0:expr, $($t:tt, $bytes:expr),*) => { + next_eq!($r, $t0, $b0); + next_eq!($r, $($t, $bytes),*); + }; +} + +#[cfg(feature = "asynchronous")] +#[tokio::test] +async fn test_start() { + let mut r = AsyncReader::from_str(""); + r.trim_text(true); + next_eq!(r, Start, b"a"); +} + +#[cfg(feature = "asynchronous")] +#[tokio::test] +async fn test_start_end() { + let mut r = AsyncReader::from_str(""); + r.trim_text(true); + next_eq!(r, Start, b"a", End, b"a"); +} + +#[cfg(feature = "asynchronous")] +#[tokio::test] +async fn test_start_end_with_ws() { + let mut r = AsyncReader::from_str(""); + r.trim_text(true); + next_eq!(r, Start, b"a", End, b"a"); +} + +#[cfg(feature = "asynchronous")] +#[tokio::test] +async fn test_start_end_attr() { + let mut r = AsyncReader::from_str(""); + r.trim_text(true); + next_eq!(r, Start, b"a", End, b"a"); +} + +#[cfg(feature = "asynchronous")] +#[tokio::test] +async fn test_empty() { + let mut r = AsyncReader::from_str(""); + r.trim_text(true).expand_empty_elements(false); + next_eq!(r, Empty, b"a"); +} + +#[cfg(feature = "asynchronous")] +#[tokio::test] +async fn test_empty_can_be_expanded() { + let mut r = AsyncReader::from_str(""); + r.trim_text(true).expand_empty_elements(true); + next_eq!(r, Start, b"a", End, b"a"); +} + +#[cfg(feature = "asynchronous")] +#[tokio::test] +async fn test_empty_attr() { + let mut r = AsyncReader::from_str(""); + r.trim_text(true).expand_empty_elements(false); + next_eq!(r, Empty, b"a"); +} + +#[cfg(feature = "asynchronous")] +#[tokio::test] +async fn test_start_end_comment() { + let mut r = AsyncReader::from_str(" "); + r.trim_text(true).expand_empty_elements(false); + next_eq!(r, Start, b"b", Empty, b"a", Empty, b"a", Comment, b"t", End, b"b"); +} + +#[cfg(feature = "asynchronous")] +#[tokio::test] +async fn test_start_txt_end() { + let mut r = AsyncReader::from_str("test"); + r.trim_text(true); + next_eq!(r, Start, b"a", Text, b"test", End, b"a"); +} + +#[cfg(feature = "asynchronous")] +#[tokio::test] +async fn test_comment() { + let mut r = AsyncReader::from_str(""); + r.trim_text(true); + next_eq!(r, Comment, b"test"); +} + +#[cfg(feature = "asynchronous")] +#[tokio::test] +async fn test_xml_decl() { + let mut r = AsyncReader::from_str(""); + r.trim_text(true); + let mut buf = Vec::new(); + + match r.read_event(&mut buf).await.unwrap() { + Decl(ref e) => { + match e.version() { + Ok(v) => assert_eq!( + &*v, + b"1.0", + "expecting version '1.0', got '{:?}", + from_utf8(&*v) + ), + Err(e) => assert!(false, "{:?}", e), + } + match e.encoding() { + Some(Ok(v)) => assert_eq!( + &*v, + b"utf-8", + "expecting encoding 'utf-8', got '{:?}", + from_utf8(&*v) + ), + Some(Err(e)) => panic!("{:?}", e), + None => panic!("cannot find encoding"), + } + match e.standalone() { + None => (), + e => panic!("doesn't expect standalone, got {:?}", e), + } + } + _ => panic!("unable to parse XmlDecl"), + } +} + +#[cfg(feature = "asynchronous")] +#[tokio::test] +async fn test_trim_test() { + let txt = " "; + let mut r = AsyncReader::from_str(txt); + r.trim_text(true); + next_eq!(r, Start, b"a", Start, b"b", End, b"b", End, b"a"); + + let mut r = AsyncReader::from_str(txt); + r.trim_text(false); + next_eq!( + r, Text, b"", Start, b"a", Text, b"", Start, b"b", Text, b" ", End, b"b", Text, b"", End, + b"a" + ); +} + +#[cfg(feature = "asynchronous")] +#[tokio::test] +async fn test_cdata() { + let mut r = AsyncReader::from_str(""); + r.trim_text(true); + next_eq!(r, CData, b"test"); +} + +#[cfg(feature = "asynchronous")] +#[tokio::test] +async fn test_cdata_open_close() { + let mut r = AsyncReader::from_str(" test]]>"); + r.trim_text(true); + next_eq!(r, CData, b"test <> test"); +} + +#[cfg(feature = "asynchronous")] +#[tokio::test] +async fn test_start_attr() { + let mut r = AsyncReader::from_str(""); + r.trim_text(true); + next_eq!(r, Start, b"a"); +} + +#[cfg(feature = "asynchronous")] +#[tokio::test] +async fn test_nested() { + let mut r = AsyncReader::from_str("test"); + r.trim_text(true).expand_empty_elements(false); + next_eq!(r, Start, b"a", Start, b"b", Text, b"test", End, b"b", Empty, b"c", End, b"a"); +} + +#[cfg(feature = "asynchronous")] +#[tokio::test] +async fn test_writer() -> Result<()> { + let txt = include_str!("../tests/documents/test_writer.xml").trim(); + let mut reader = AsyncReader::from_str(txt); + reader.trim_text(true); + let mut writer = Writer::new(Cursor::new(Vec::new())); + let mut buf = Vec::new(); + + loop { + match reader.read_event(&mut buf).await? { + Eof => break, + e => assert!(writer.write_event(e).is_ok()), + } + } + + let result = writer.into_inner().into_inner(); + assert_eq!(result, txt.as_bytes()); + + Ok(()) +} + +#[cfg(feature = "asynchronous")] +#[tokio::test] +async fn test_writer_borrow() -> Result<()> { + let txt = include_str!("../tests/documents/test_writer.xml").trim(); + let mut reader = AsyncReader::from_str(txt); + reader.trim_text(true); + let mut writer = Writer::new(Cursor::new(Vec::new())); + let mut buf = Vec::new(); + + loop { + match reader.read_event(&mut buf).await? { + Eof => break, + e => assert!(writer.write_event(&e).is_ok()), // either `e` or `&e` + } + } + + let result = writer.into_inner().into_inner(); + assert_eq!(result, txt.as_bytes()); + + Ok(()) +} + +#[cfg(feature = "asynchronous")] +#[tokio::test] +async fn test_writer_indent() -> Result<()> { + let txt = include_str!("../tests/documents/test_writer_indent.xml"); + let mut reader = AsyncReader::from_str(txt); + reader.trim_text(true); + let mut writer = Writer::new_with_indent(Cursor::new(Vec::new()), b' ', 4); + let mut buf = Vec::new(); + + loop { + match reader.read_event(&mut buf).await? { + Eof => break, + e => assert!(writer.write_event(e).is_ok()), + } + } + + let result = writer.into_inner().into_inner(); + assert_eq!(result, txt.as_bytes()); + + Ok(()) +} + +#[cfg(feature = "asynchronous")] +#[tokio::test] +async fn test_writer_indent_cdata() -> Result<()> { + let txt = include_str!("../tests/documents/test_writer_indent_cdata.xml"); + let mut reader = AsyncReader::from_str(txt); + reader.trim_text(true); + let mut writer = Writer::new_with_indent(Cursor::new(Vec::new()), b' ', 4); + let mut buf = Vec::new(); + + loop { + match reader.read_event(&mut buf).await? { + Eof => break, + e => assert!(writer.write_event(e).is_ok()), + } + } + + let result = writer.into_inner().into_inner(); + + #[cfg(windows)] + assert!(result.into_iter().eq(txt.bytes().filter(|b| *b != 13))); + + #[cfg(not(windows))] + assert_eq!(result, txt.as_bytes()); + + Ok(()) +} + +#[cfg(feature = "asynchronous")] +#[tokio::test] +async fn test_write_empty_element_attrs() -> Result<()> { + let str_from = r#""#; + let expected = r#""#; + let mut reader = AsyncReader::from_str(str_from); + reader.expand_empty_elements(false); + let mut writer = Writer::new(Cursor::new(Vec::new())); + let mut buf = Vec::new(); + + loop { + match reader.read_event(&mut buf).await? { + Eof => break, + e => assert!(writer.write_event(e).is_ok()), + } + } + + let result = writer.into_inner().into_inner(); + assert_eq!(String::from_utf8(result).unwrap(), expected); + + Ok(()) +} + +#[cfg(feature = "asynchronous")] +#[tokio::test] +async fn test_write_attrs() -> Result<()> { + let str_from = r#""#; + let expected = r#""#; + let mut reader = AsyncReader::from_str(str_from); + reader.trim_text(true); + let mut writer = Writer::new(Cursor::new(Vec::new())); + let mut buf = Vec::new(); + + loop { + let event = match reader.read_event(&mut buf).await? { + Eof => break, + Start(elem) => { + let mut attrs = elem.attributes().collect::>>().unwrap(); + attrs.extend_from_slice(&[("a", "b").into(), ("c", "d").into()]); + let mut elem = BytesStart::owned(b"copy".to_vec(), 4); + elem.extend_attributes(attrs); + elem.push_attribute(("x", "y\"z")); + Start(elem) + } + End(_) => End(BytesEnd::borrowed(b"copy")), + e => e, + }; + assert!(writer.write_event(event).is_ok()); + } + + let result = writer.into_inner().into_inner(); + assert_eq!(result, expected.as_bytes()); + + Ok(()) +} + +#[cfg(feature = "asynchronous")] +#[tokio::test] +async fn test_new_xml_decl_full() { + let mut writer = Writer::new(Vec::new()); + writer + .write_event(Decl(BytesDecl::new(b"1.2", Some(b"utf-X"), Some(b"yo")))) + .expect("writing xml decl should succeed"); + + let result = writer.into_inner(); + assert_eq!( + String::from_utf8(result).expect("utf-8 output"), + "".to_owned(), + "writer output (LHS)" + ); +} + +#[cfg(feature = "asynchronous")] +#[tokio::test] +async fn test_new_xml_decl_standalone() { + let mut writer = Writer::new(Vec::new()); + writer + .write_event(Decl(BytesDecl::new(b"1.2", None, Some(b"yo")))) + .expect("writing xml decl should succeed"); + + let result = writer.into_inner(); + assert_eq!( + String::from_utf8(result).expect("utf-8 output"), + "".to_owned(), + "writer output (LHS)" + ); +} + +#[cfg(feature = "asynchronous")] +#[tokio::test] +async fn test_new_xml_decl_encoding() { + let mut writer = Writer::new(Vec::new()); + writer + .write_event(Decl(BytesDecl::new(b"1.2", Some(b"utf-X"), None))) + .expect("writing xml decl should succeed"); + + let result = writer.into_inner(); + assert_eq!( + String::from_utf8(result).expect("utf-8 output"), + "".to_owned(), + "writer output (LHS)" + ); +} + +#[cfg(feature = "asynchronous")] +#[tokio::test] +async fn test_new_xml_decl_version() { + let mut writer = Writer::new(Vec::new()); + writer + .write_event(Decl(BytesDecl::new(b"1.2", None, None))) + .expect("writing xml decl should succeed"); + + let result = writer.into_inner(); + assert_eq!( + String::from_utf8(result).expect("utf-8 output"), + "".to_owned(), + "writer output (LHS)" + ); +} + +/// This test ensures that empty XML declaration attribute values are not a problem. +#[cfg(feature = "asynchronous")] +#[tokio::test] +async fn test_new_xml_decl_empty() { + let mut writer = Writer::new(Vec::new()); + // An empty version should arguably be an error, but we don't expect anyone to actually supply + // an empty version. + writer + .write_event(Decl(BytesDecl::new(b"", Some(b""), Some(b"")))) + .expect("writing xml decl should succeed"); + + let result = writer.into_inner(); + assert_eq!( + String::from_utf8(result).expect("utf-8 output"), + "".to_owned(), + "writer output (LHS)" + ); +} + +#[cfg(feature = "asynchronous")] +#[tokio::test] +async fn test_buf_position_err_end_element() { + let mut r = AsyncReader::from_str(""); + r.trim_text(true).check_end_names(true); + + let mut buf = Vec::new(); + + match r.read_event(&mut buf).await { + Err(_) if r.buffer_position() == 2 => (), // error at char 2: no opening tag + Err(e) => panic!( + "expecting buf_pos = 2, found {}, err: {:?}", + r.buffer_position(), + e + ), + e => panic!("expecting error, found {:?}", e), + } +} + +#[cfg(feature = "asynchronous")] +#[tokio::test] +async fn test_buf_position_err_comment() { + let mut r = AsyncReader::from_str(" tag found + assert!(true); + } + Err(e) => panic!( + "expecting buf_pos = 5, found {}, err: {:?}", + r.buffer_position(), + e + ), + e => assert!(false, "expecting error, found {:?}", e), + } +} + +#[cfg(feature = "asynchronous")] +#[tokio::test] +async fn test_buf_position_err_comment_2_buf() { + let mut r = AsyncReader::from_str(" tag found + assert!(true); + } + Err(e) => panic!( + "expecting buf_pos = 4, found {}, err: {:?}", + r.buffer_position(), + e + ), + e => assert!(false, "expecting error, found {:?}", e), + } +} + +#[cfg(feature = "asynchronous")] +#[tokio::test] +async fn test_buf_position_err_comment_trim_text() { + let mut r = AsyncReader::from_str("\r\n tag found + assert!(true); + } + Err(e) => panic!( + "expecting buf_pos = 5, found {}, err: {:?}", + r.buffer_position(), + e + ), + e => assert!(false, "expecting error, found {:?}", e), + } +} + +#[cfg(feature = "asynchronous")] +#[tokio::test] +async fn test_namespace() { + let mut r = AsyncReader::from_str("in namespace!"); + r.trim_text(true); + + let mut buf = Vec::new(); + let mut ns_buf = Vec::new(); + + if let Ok((None, Start(_))) = r.read_namespaced_event(&mut buf, &mut ns_buf).await { + } else { + assert!(false, "expecting start element with no namespace"); + } + + if let Ok((Some(a), Start(_))) = r.read_namespaced_event(&mut buf, &mut ns_buf).await { + if &*a == b"www1" { + assert!(true); + } else { + assert!(false, "expecting namespace to resolve to 'www1'"); + } + } else { + assert!(false, "expecting namespace resolution"); + } +} + +#[cfg(feature = "asynchronous")] +#[tokio::test] +async fn test_default_namespace() { + let mut r = AsyncReader::from_str(""); + r.trim_text(true); + + let mut buf = Vec::new(); + let mut ns_buf = Vec::new(); + + // + if let Ok((None, Start(_))) = r.read_namespaced_event(&mut buf, &mut ns_buf).await { + } else { + assert!(false, "expecting outer start element with no namespace"); + } + + // + if let Ok((Some(a), Start(_))) = r.read_namespaced_event(&mut buf, &mut ns_buf).await { + if &*a == b"www1" { + assert!(true); + } else { + assert!(false, "expecting namespace to resolve to 'www1'"); + } + } else { + assert!(false, "expecting namespace resolution"); + } + + // + if let Ok((Some(a), End(_))) = r.read_namespaced_event(&mut buf, &mut ns_buf).await { + if &*a == b"www1" { + assert!(true); + } else { + assert!(false, "expecting namespace to resolve to 'www1'"); + } + } else { + assert!(false, "expecting namespace resolution"); + } + + // very important: a should not be in any namespace. The default namespace only applies to + // the sub-document it is defined on. + if let Ok((None, End(_))) = r.read_namespaced_event(&mut buf, &mut ns_buf).await { + } else { + assert!(false, "expecting outer end element with no namespace"); + } +} + +#[cfg(feature = "asynchronous")] +#[tokio::test] +async fn test_default_namespace_reset() { + let mut r = AsyncReader::from_str(""); + r.trim_text(true); + + let mut buf = Vec::new(); + let mut ns_buf = Vec::new(); + + if let Ok((Some(a), Start(_))) = r.read_namespaced_event(&mut buf, &mut ns_buf).await { + assert_eq!( + &a[..], + b"www1", + "expecting outer start element with to resolve to 'www1'" + ); + } else { + panic!("expecting outer start element with to resolve to 'www1'"); + } + + match r.read_namespaced_event(&mut buf, &mut ns_buf).await { + Ok((None, Start(_))) => (), + e => panic!("expecting inner start element, got {:?}", e), + } + + if let Ok((None, End(_))) = r.read_namespaced_event(&mut buf, &mut ns_buf).await { + } else { + assert!(false, "expecting inner end element"); + } + + if let Ok((Some(a), End(_))) = r.read_namespaced_event(&mut buf, &mut ns_buf).await { + assert_eq!( + &a[..], + b"www1", + "expecting outer end element with to resolve to 'www1'" + ); + } else { + panic!("expecting outer end element with to resolve to 'www1'"); + } +} + +#[cfg(feature = "asynchronous")] +#[tokio::test] +async fn test_escaped_content() { + let mut r = AsyncReader::from_str("<test>"); + r.trim_text(true); + next_eq!(r, Start, b"a"); + let mut buf = Vec::new(); + + match r.read_event(&mut buf).await { + Ok(Text(e)) => { + if &*e != b"<test>" { + panic!( + "content unexpected: expecting '<test>', got '{:?}'", + from_utf8(&*e) + ); + } + match e.unescaped() { + Ok(ref c) => { + if &**c != b"" { + panic!( + "unescaped content unexpected: expecting '<test<', got '{:?}'", + from_utf8(c) + ) + } + } + Err(e) => panic!( + "cannot escape content at position {}: {:?}", + r.buffer_position(), + e + ), + } + } + Ok(e) => panic!("Expecting text event, got {:?}", e), + Err(e) => panic!( + "Cannot get next event at position {}: {:?}", + r.buffer_position(), + e + ), + } + next_eq!(r, End, b"a"); +} + +#[cfg(feature = "asynchronous")] +#[tokio::test] +async fn test_read_write_roundtrip_results_in_identity() -> Result<()> { + let input = r#" + +
+
+
+
data
+
+ "#; + + let mut reader = AsyncReader::from_str(input); + reader.trim_text(false).expand_empty_elements(false); + let mut writer = Writer::new(Cursor::new(Vec::new())); + let mut buf = Vec::new(); + + loop { + match reader.read_event(&mut buf).await? { + Eof => break, + e => assert!(writer.write_event(e).is_ok()), + } + } + + let result = writer.into_inner().into_inner(); + assert_eq!(result, input.as_bytes()); + + Ok(()) +} + +#[cfg(feature = "asynchronous")] +#[tokio::test] +async fn test_read_write_roundtrip() -> Result<()> { + let input = r#" + +
+
+
+
data <escaped>
+
+ "#; + + let mut reader = AsyncReader::from_str(input); + reader.trim_text(false).expand_empty_elements(false); + let mut writer = Writer::new(Cursor::new(Vec::new())); + let mut buf = Vec::new(); + + loop { + match reader.read_event(&mut buf).await? { + Eof => break, + e => assert!(writer.write_event(e).is_ok()), + } + } + + let result = writer.into_inner().into_inner(); + assert_eq!(String::from_utf8(result).unwrap(), input.to_string()); + + Ok(()) +} + +#[cfg(feature = "asynchronous")] +#[tokio::test] +async fn test_read_write_roundtrip_escape() -> Result<()> { + let input = r#" + +
+
+
+
data <escaped>
+
+ "#; + + let mut reader = AsyncReader::from_str(input); + reader.trim_text(false).expand_empty_elements(false); + let mut writer = Writer::new(Cursor::new(Vec::new())); + let mut buf = Vec::new(); + + loop { + match reader.read_event(&mut buf).await? { + Eof => break, + Text(e) => { + let t = e.escaped(); + assert!(writer + .write_event(Event::Text(BytesText::from_escaped(t.to_vec()))) + .is_ok()); + } + e => assert!(writer.write_event(e).is_ok()), + } + } + + let result = writer.into_inner().into_inner(); + assert_eq!(String::from_utf8(result).unwrap(), input.to_string()); + + Ok(()) +} + +#[cfg(feature = "asynchronous")] +#[tokio::test] +async fn test_read_write_roundtrip_escape_text() -> Result<()> { + let input = r#" + +
+
+
+
data <escaped>
+
+ "#; + + let mut reader = AsyncReader::from_str(input); + reader.trim_text(false).expand_empty_elements(false); + let mut writer = Writer::new(Cursor::new(Vec::new())); + let mut buf = Vec::new(); + + loop { + match reader.read_event(&mut buf).await? { + Eof => break, + Text(e) => { + let t = e.unescape_and_decode(&reader).unwrap(); + assert!(writer + .write_event(Event::Text(BytesText::from_plain_str(&t))) + .is_ok()); + } + e => assert!(writer.write_event(e).is_ok()), + } + } + + let result = writer.into_inner().into_inner(); + assert_eq!(String::from_utf8(result).unwrap(), input.to_string()); + + Ok(()) +} + +#[cfg(feature = "asynchronous")] +#[tokio::test] +async fn test_closing_bracket_in_single_quote_attr() { + let mut r = AsyncReader::from_str(""); + r.trim_text(true); + let mut buf = Vec::new(); + + match r.read_event(&mut buf).await { + Ok(Start(e)) => { + let mut attrs = e.attributes(); + match attrs.next() { + Some(Ok(attr)) => assert_eq!(attr, ("attr".as_bytes(), ">".as_bytes()).into()), + x => panic!("expected attribute 'attr', got {:?}", x), + } + match attrs.next() { + Some(Ok(attr)) => assert_eq!(attr, ("check".as_bytes(), "2".as_bytes()).into()), + x => panic!("expected attribute 'check', got {:?}", x), + } + assert!(attrs.next().is_none(), "expected only two attributes"); + } + x => panic!("expected , got {:?}", x), + } + next_eq!(r, End, b"a"); +} + +#[cfg(feature = "asynchronous")] +#[tokio::test] +async fn test_closing_bracket_in_double_quote_attr() { + let mut r = AsyncReader::from_str("\" check=\"2\">"); + r.trim_text(true); + let mut buf = Vec::new(); + + match r.read_event(&mut buf).await { + Ok(Start(e)) => { + let mut attrs = e.attributes(); + match attrs.next() { + Some(Ok(attr)) => assert_eq!(attr, ("attr".as_bytes(), ">".as_bytes()).into()), + x => panic!("expected attribute 'attr', got {:?}", x), + } + match attrs.next() { + Some(Ok(attr)) => assert_eq!(attr, ("check".as_bytes(), "2".as_bytes()).into()), + x => panic!("expected attribute 'check', got {:?}", x), + } + assert!(attrs.next().is_none(), "expected only two attributes"); + } + x => panic!("expected , got {:?}", x), + } + next_eq!(r, End, b"a"); +} + +#[cfg(feature = "asynchronous")] +#[tokio::test] +async fn test_closing_bracket_in_double_quote_mixed() { + let mut r = AsyncReader::from_str("'\" check=\"'2'\">"); + r.trim_text(true); + let mut buf = Vec::new(); + + match r.read_event(&mut buf).await { + Ok(Start(e)) => { + let mut attrs = e.attributes(); + match attrs.next() { + Some(Ok(attr)) => assert_eq!(attr, ("attr".as_bytes(), "'>'".as_bytes()).into()), + x => panic!("expected attribute 'attr', got {:?}", x), + } + match attrs.next() { + Some(Ok(attr)) => assert_eq!(attr, ("check".as_bytes(), "'2'".as_bytes()).into()), + x => panic!("expected attribute 'check', got {:?}", x), + } + assert!(attrs.next().is_none(), "expected only two attributes"); + } + x => panic!("expected , got {:?}", x), + } + next_eq!(r, End, b"a"); +} + +#[cfg(feature = "asynchronous")] +#[tokio::test] +async fn test_closing_bracket_in_single_quote_mixed() { + let mut r = AsyncReader::from_str(""); + r.trim_text(true); + let mut buf = Vec::new(); + + match r.read_event(&mut buf).await { + Ok(Start(e)) => { + let mut attrs = e.attributes(); + match attrs.next() { + Some(Ok(attr)) => assert_eq!(attr, ("attr".as_bytes(), "\">\"".as_bytes()).into()), + x => panic!("expected attribute 'attr', got {:?}", x), + } + match attrs.next() { + Some(Ok(attr)) => assert_eq!(attr, ("check".as_bytes(), "\"2\"".as_bytes()).into()), + x => panic!("expected attribute 'check', got {:?}", x), + } + assert!(attrs.next().is_none(), "expected only two attributes"); + } + x => panic!("expected , got {:?}", x), + } + next_eq!(r, End, b"a"); +} + +#[cfg(feature = "asynchronous")] +#[tokio::test] +#[cfg(not(feature = "encoding"))] +async fn test_unescape_and_decode_without_bom_removes_utf8_bom() { + let input: &str = std::str::from_utf8(b"\xEF\xBB\xBF").unwrap(); + + let mut reader = AsyncReader::from_str(&input); + reader.trim_text(true); + + let mut txt = Vec::new(); + let mut buf = Vec::new(); + + loop { + match reader.read_event(&mut buf).await { + Ok(Event::Text(e)) => txt.push(e.unescape_and_decode_without_bom(&reader).unwrap()), + Ok(Event::Eof) => break, + _ => (), + } + } + assert_eq!(txt, vec![""]); +} + +#[cfg(feature = "asynchronous")] +#[tokio::test] +#[cfg(feature = "encoding")] +async fn test_unescape_and_decode_without_bom_removes_utf16be_bom() { + let mut reader = AsyncReader::from_file("./tests/documents/utf16be.xml") + .await + .unwrap(); + reader.trim_text(true); + + let mut txt = Vec::new(); + let mut buf = Vec::new(); + + loop { + match reader.read_event(&mut buf).await { + Ok(Event::Text(e)) => txt.push(e.unescape_and_decode_without_bom(&mut reader).unwrap()), + Ok(Event::Eof) => break, + _ => (), + } + } + assert_eq!(txt[0], ""); +} + +#[cfg(feature = "asynchronous")] +#[tokio::test] +#[cfg(feature = "encoding")] +async fn test_unescape_and_decode_without_bom_removes_utf16le_bom() { + let mut reader = AsyncReader::from_file("./tests/documents/utf16le.xml") + .await + .unwrap(); + reader.trim_text(true); + + let mut txt = Vec::new(); + let mut buf = Vec::new(); + + loop { + match reader.read_event(&mut buf).await { + Ok(Event::Text(e)) => txt.push(e.unescape_and_decode_without_bom(&mut reader).unwrap()), + Ok(Event::Eof) => break, + _ => (), + } + } + assert_eq!(txt[0], ""); +} + +#[cfg(feature = "asynchronous")] +#[tokio::test] +#[cfg(not(feature = "encoding"))] +async fn test_unescape_and_decode_without_bom_does_nothing_if_no_bom_exists() { + let input: &str = std::str::from_utf8(b"").unwrap(); + + let mut reader = AsyncReader::from_str(&input); + reader.trim_text(true); + + let mut txt = Vec::new(); + let mut buf = Vec::new(); + + loop { + match reader.read_event(&mut buf).await { + Ok(Event::Text(e)) => txt.push(e.unescape_and_decode_without_bom(&mut reader).unwrap()), + Ok(Event::Eof) => break, + _ => (), + } + } + assert_eq!(txt.is_empty(), true); +} diff --git a/tests/xmlrs_reader_tests.rs b/tests/xmlrs_reader_tests.rs index 78a24b25..3da6623f 100644 --- a/tests/xmlrs_reader_tests.rs +++ b/tests/xmlrs_reader_tests.rs @@ -385,6 +385,7 @@ async fn test_async(input: &[u8], output: &[u8], is_short: bool) { loop { buf.clear(); +<<<<<<< HEAD let event = reader.read_namespaced_event(&mut buf, &mut ns_buffer).await; @@ -416,6 +417,39 @@ async fn test_async(input: &[u8], output: &[u8], is_short: bool) { let mut buf = Vec::new(); +======= + + let event = reader.read_namespaced_event(&mut buf, &mut ns_buffer).await; + + let line = xmlrs_display(&event); + if let Some((n, spec)) = spec_lines.next() { + if spec.trim() == "EndDocument" { + break; + } + if line.trim() != spec.trim() { + panic!( + "\n-------------------\n\ + Unexpected event at line {}:\n\ + Expected: {}\nFound: {}\n\ + -------------------\n", + n + 1, + spec, + line + ); + } + } else { + if line == "EndDocument" { + break; + } + panic!("Unexpected event: {}", line); + } + + if !is_short && line.starts_with("StartDocument") { + // advance next Characters(empty space) ... + + let mut buf = Vec::new(); + +>>>>>>> ee5a073... Introduce an AsyncReader instead of overloading the Reader if let Ok(Event::Text(ref e)) = reader.read_event(&mut buf).await { if e.iter().any(|b| match *b { b' ' | b'\r' | b'\n' | b'\t' => false, From 5457bed93c5b6eb72dbc3fc1628d4c578f66a521 Mon Sep 17 00:00:00 2001 From: greg Date: Wed, 25 Aug 2021 07:48:16 -0500 Subject: [PATCH 3/8] cleanup rebase --- src/events/attributes.rs | 21 ++++++++++++--------- src/events/mod.rs | 3 +-- src/lib.rs | 5 ----- src/reader/sync.rs | 4 ++-- tests/test.rs | 1 + tests/unit_tests.rs | 3 --- tests/xmlrs_reader_tests.rs | 34 ---------------------------------- 7 files changed, 16 insertions(+), 55 deletions(-) diff --git a/src/events/attributes.rs b/src/events/attributes.rs index 2d44435b..414340de 100644 --- a/src/events/attributes.rs +++ b/src/events/attributes.rs @@ -4,10 +4,8 @@ use crate::errors::{Error, Result}; use crate::escape::{do_unescape, escape}; -use crate::reader::{is_whitespace, Decode, Reader}; -use std::borrow::Cow; -use std::collections::HashMap; -use std::ops::Range; +use crate::reader::{is_whitespace, Decode}; +use std::{borrow::Cow, collections::HashMap, ops::Range}; /// Iterator over XML attributes. /// @@ -169,7 +167,8 @@ impl<'a> Attribute<'a> { custom_entities: Option<&HashMap, Vec>>, ) -> Result { let decoded = reader.decode(&*self.value); - let unescaped = do_unescape(decoded.as_bytes(), custom_entities).map_err(Error::EscapeError)?; + let unescaped = + do_unescape(decoded.as_bytes(), custom_entities).map_err(Error::EscapeError)?; String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error())) } @@ -180,7 +179,8 @@ impl<'a> Attribute<'a> { custom_entities: Option<&HashMap, Vec>>, ) -> Result { let decoded = reader.decode(&*self.value)?; - let unescaped = do_unescape(decoded.as_bytes(), custom_entities).map_err(Error::EscapeError)?; + let unescaped = + do_unescape(decoded.as_bytes(), custom_entities).map_err(Error::EscapeError)?; String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error())) } @@ -255,7 +255,8 @@ impl<'a> Attribute<'a> { custom_entities: Option<&HashMap, Vec>>, ) -> Result { let decoded = reader.decode_without_bom(&*self.value); - let unescaped = do_unescape(decoded.as_bytes(), custom_entities).map_err(Error::EscapeError)?; + let unescaped = + do_unescape(decoded.as_bytes(), custom_entities).map_err(Error::EscapeError)?; String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error())) } @@ -266,7 +267,8 @@ impl<'a> Attribute<'a> { custom_entities: Option<&HashMap, Vec>>, ) -> Result { let decoded = reader.decode_without_bom(&*self.value)?; - let unescaped = do_unescape(decoded.as_bytes(), custom_entities).map_err(Error::EscapeError)?; + let unescaped = + do_unescape(decoded.as_bytes(), custom_entities).map_err(Error::EscapeError)?; String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error())) } } @@ -340,8 +342,9 @@ impl<'a> Iterator for Attributes<'a> { self.position = len; if self.html { attr!($key, 0..0) + } else { + None } - return None; }}; ($key:expr, $val:expr) => { Some(Ok(Attribute { diff --git a/src/events/mod.rs b/src/events/mod.rs index 76f7880c..d886624b 100644 --- a/src/events/mod.rs +++ b/src/events/mod.rs @@ -39,7 +39,6 @@ pub mod attributes; #[cfg(feature = "encoding_rs")] use encoding_rs::Encoding; -use crate::escape::{do_unescape, escape}; use memchr; use std::borrow::Cow; @@ -50,7 +49,7 @@ use std::str::from_utf8; use self::attributes::{Attribute, Attributes}; use crate::errors::{Error, Result}; use crate::escapei::{do_unescape, escape}; -use crate::reader::{Decode, Reader}; +use crate::reader::{Decode}; /// Opening tag data (`Event::Start`), with optional attributes. /// diff --git a/src/lib.rs b/src/lib.rs index b90ff0f6..f705f459 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -133,15 +133,11 @@ #[cfg(feature = "serialize")] pub mod de; -mod errors; -mod escapei; pub mod escape { //! Manage xml character escapes pub(crate) use crate::escapei::{do_unescape, EscapeError}; pub use crate::escapei::{escape, partial_escape, unescape, unescape_with}; } -pub mod events; -mod reader; #[cfg(feature = "serialize")] pub mod se; @@ -150,7 +146,6 @@ mod escapei; mod utils; mod writer; -pub mod escape; pub mod events; pub mod reader; diff --git a/src/reader/sync.rs b/src/reader/sync.rs index c3cd260d..5d8b243b 100644 --- a/src/reader/sync.rs +++ b/src/reader/sync.rs @@ -4,13 +4,13 @@ use std::borrow::Cow; use std::fs::File; use std::io::{self, BufRead, BufReader}; -use std::{fs::File, path::Path, str::from_utf8}; +use std::{path::Path, str::from_utf8}; #[cfg(feature = "encoding")] use encoding_rs::Encoding; use crate::errors::{Error, Result}; -use crate::events::{attributes::Attribute, BytesDecl, BytesEnd, BytesStart, BytesText, Event}; +use crate::events::{BytesDecl, BytesEnd, BytesStart, BytesText, Event}; use super::{is_whitespace, Decode, Decoder, NamespaceBufferIndex, TagState}; diff --git a/tests/test.rs b/tests/test.rs index 17ea9bed..24c52edf 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -4,6 +4,7 @@ extern crate serde; use quick_xml::events::attributes::Attribute; use quick_xml::events::Event::*; +use quick_xml::Error; use quick_xml::Reader; #[cfg(feature = "serialize")] use serde::{Deserialize, Serialize}; diff --git a/tests/unit_tests.rs b/tests/unit_tests.rs index 9be94f69..d68af643 100644 --- a/tests/unit_tests.rs +++ b/tests/unit_tests.rs @@ -1,6 +1,3 @@ -use std::io::Cursor; -use std::str::from_utf8; - use quick_xml::events::Event::*; use quick_xml::events::{BytesDecl, BytesEnd, BytesStart, BytesText, Event}; use quick_xml::{Reader, Result, Writer}; diff --git a/tests/xmlrs_reader_tests.rs b/tests/xmlrs_reader_tests.rs index 3da6623f..78a24b25 100644 --- a/tests/xmlrs_reader_tests.rs +++ b/tests/xmlrs_reader_tests.rs @@ -385,7 +385,6 @@ async fn test_async(input: &[u8], output: &[u8], is_short: bool) { loop { buf.clear(); -<<<<<<< HEAD let event = reader.read_namespaced_event(&mut buf, &mut ns_buffer).await; @@ -417,39 +416,6 @@ async fn test_async(input: &[u8], output: &[u8], is_short: bool) { let mut buf = Vec::new(); -======= - - let event = reader.read_namespaced_event(&mut buf, &mut ns_buffer).await; - - let line = xmlrs_display(&event); - if let Some((n, spec)) = spec_lines.next() { - if spec.trim() == "EndDocument" { - break; - } - if line.trim() != spec.trim() { - panic!( - "\n-------------------\n\ - Unexpected event at line {}:\n\ - Expected: {}\nFound: {}\n\ - -------------------\n", - n + 1, - spec, - line - ); - } - } else { - if line == "EndDocument" { - break; - } - panic!("Unexpected event: {}", line); - } - - if !is_short && line.starts_with("StartDocument") { - // advance next Characters(empty space) ... - - let mut buf = Vec::new(); - ->>>>>>> ee5a073... Introduce an AsyncReader instead of overloading the Reader if let Ok(Event::Text(ref e)) = reader.read_event(&mut buf).await { if e.iter().any(|b| match *b { b' ' | b'\r' | b'\n' | b'\t' => false, From fdc87736e03d1969cd5a4e592d1a29e2caaf22c9 Mon Sep 17 00:00:00 2001 From: greg Date: Wed, 25 Aug 2021 08:30:25 -0500 Subject: [PATCH 4/8] make clippy happy --- src/escapei.rs | 14 ++++---------- src/events/attributes.rs | 2 +- src/reader/mod.rs | 5 +---- src/reader/sync.rs | 5 +++-- src/utils.rs | 14 ++------------ src/writer.rs | 7 ++----- tests/xmlrs_reader_tests.rs | 17 ++++++----------- 7 files changed, 19 insertions(+), 45 deletions(-) diff --git a/src/escapei.rs b/src/escapei.rs index f31675f7..686fffdf 100644 --- a/src/escapei.rs +++ b/src/escapei.rs @@ -64,10 +64,7 @@ impl std::error::Error for EscapeError {} pub fn escape(raw: &[u8]) -> Cow<[u8]> { #[inline] fn to_escape(b: u8) -> bool { - match b { - b'<' | b'>' | b'\'' | b'&' | b'"' => true, - _ => false, - } + matches!(b, b'<' | b'>' | b'\'' | b'&' | b'"') } _escape(raw, to_escape) @@ -80,10 +77,7 @@ pub fn escape(raw: &[u8]) -> Cow<[u8]> { pub fn partial_escape(raw: &[u8]) -> Cow<[u8]> { #[inline] fn to_escape(b: u8) -> bool { - match b { - b'<' | b'>' | b'&' => true, - _ => false, - } + matches!(b, b'<' | b'>' | b'&') } _escape(raw, to_escape) @@ -172,7 +166,7 @@ pub fn do_unescape<'a>( } else if pat.starts_with(b"#") { push_utf8(unescaped, parse_number(&pat[1..], start..end)?); } else if let Some(value) = custom_entities.and_then(|hm| hm.get(pat)) { - unescaped.extend_from_slice(&value); + unescaped.extend_from_slice(value); } else { return Err(EscapeError::UnrecognizedSymbol( start + 1..end, @@ -1680,7 +1674,7 @@ fn parse_number(bytes: &[u8], range: Range) -> Result let code = if bytes.starts_with(b"x") { parse_hexadecimal(&bytes[1..]) } else { - parse_decimal(&bytes) + parse_decimal(bytes) }?; if code == 0 { return Err(EscapeError::EntityWithNull(range)); diff --git a/src/events/attributes.rs b/src/events/attributes.rs index 414340de..ed41b00e 100644 --- a/src/events/attributes.rs +++ b/src/events/attributes.rs @@ -404,7 +404,7 @@ impl<'a> Iterator for Attributes<'a> { .iter() .filter(|r| r.len() == end_key - start_key) .find(|r| self.bytes[(*r).clone()] == self.bytes[start_key..end_key]) - .map(|ref r| r.start) + .map(|r| r.start) { err!(Error::DuplicatedAttribute(start_key, start)); } diff --git a/src/reader/mod.rs b/src/reader/mod.rs index 225683a4..f1c1736a 100644 --- a/src/reader/mod.rs +++ b/src/reader/mod.rs @@ -119,10 +119,7 @@ enum TagState { /// A function to check whether the byte is a whitespace (blank, new line, carriage return or tab) #[inline] pub(crate) fn is_whitespace(b: u8) -> bool { - match b { - b' ' | b'\r' | b'\n' | b'\t' => true, - _ => false, - } + matches!(b, b' ' | b'\r' | b'\n' | b'\t') } /// A namespace declaration. Can either bind a namespace to a prefix or define the current default diff --git a/src/reader/sync.rs b/src/reader/sync.rs index 5d8b243b..aa71877b 100644 --- a/src/reader/sync.rs +++ b/src/reader/sync.rs @@ -823,6 +823,7 @@ impl Reader> { } impl<'a> Reader<&'a [u8]> { + #[allow(clippy::should_implement_trait)] /// Creates an XML reader from a string slice. pub fn from_str(s: &'a str) -> Reader<&'a [u8]> { Reader::from_reader(s.as_bytes()) @@ -1200,7 +1201,7 @@ impl<'a> BufferedInput<'a, 'a, ()> for &'a [u8] { }; *self = &self[i..]; - return Ok(Some(bytes)); + Ok(Some(bytes)) } fn read_bang_element(&mut self, _buf: (), position: &mut usize) -> Result> { @@ -1327,6 +1328,6 @@ impl<'a> BufferedInput<'a, 'a, ()> for &'a [u8] { } fn input_borrowed(event: Event<'a>) -> Event<'a> { - return event; + event } } diff --git a/src/utils.rs b/src/utils.rs index a3fd7c83..45341ab2 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -1,17 +1,7 @@ -use std::borrow::Cow; use std::fmt::{Formatter, Result}; -pub fn write_cow_string(f: &mut Formatter<'_>, cow_string: &Cow<[u8]>) -> Result { - match cow_string { - Cow::Owned(s) => { - write!(f, "Owned(")?; - write_byte_string(f, &s)?; - } - Cow::Borrowed(s) => { - write!(f, "Borrowed(")?; - write_byte_string(f, s)?; - } - } +pub fn write_cow_string(f: &mut Formatter<'_>, s: &[u8]) -> Result { + write_byte_string(f, s)?; write!(f, ")") } diff --git a/src/writer.rs b/src/writer.rs index fab33abf..26602824 100644 --- a/src/writer.rs +++ b/src/writer.rs @@ -109,7 +109,7 @@ impl Writer { Event::Empty(ref e) => self.write_wrapped(b"<", e, b"/>"), Event::Text(ref e) => { next_should_line_break = false; - self.write(&e.escaped()) + self.write(e.escaped()) } Event::Comment(ref e) => self.write_wrapped(b""), Event::CData(ref e) => { @@ -329,10 +329,7 @@ impl Indentation { } fn shrink(&mut self) { - self.indents_len = match self.indents_len.checked_sub(self.indent_size) { - Some(result) => result, - None => 0, - }; + self.indents_len = self.indents_len.saturating_sub(self.indent_size); } } diff --git a/tests/xmlrs_reader_tests.rs b/tests/xmlrs_reader_tests.rs index 78a24b25..69bac59b 100644 --- a/tests/xmlrs_reader_tests.rs +++ b/tests/xmlrs_reader_tests.rs @@ -487,16 +487,12 @@ fn xmlrs_display(opt_event: &Result<(Option<&[u8]>, Event)>) -> String { Ok((ref n, Event::End(ref e))) => format!("EndElement({})", namespace_name(n, e.name())), Ok((_, Event::Comment(ref e))) => format!("Comment({})", from_utf8(e).unwrap()), Ok((_, Event::CData(ref e))) => format!("CData({})", from_utf8(e).unwrap()), - Ok((_, Event::Text(ref e))) => { - match e.unescaped() { - Ok(c) => { - match from_utf8(&*c) { - Ok(c) => format!("Characters({})", c), - Err(ref err) => format!("InvalidUtf8({:?}; {})", e.escaped(), err), - } - }, - Err(ref err) => format!("FailedUnescape({:?}; {})", e.escaped(), err), - } + Ok((_, Event::Text(ref e))) => match e.unescaped() { + Ok(c) => match from_utf8(&*c) { + Ok(c) => format!("Characters({})", c), + Err(ref err) => format!("InvalidUtf8({:?}; {})", e.escaped(), err), + }, + Err(ref err) => format!("FailedUnescape({:?}; {})", e.escaped(), err), }, Ok((_, Event::Decl(ref e))) => { let version_cow = e.version().unwrap(); @@ -540,4 +536,3 @@ impl<'a> Iterator for SpecIter<'a> { } } } - From ff9d41bf34ca2769bdae7acaf847489ef775d1e6 Mon Sep 17 00:00:00 2001 From: greg Date: Thu, 2 Sep 2021 17:34:08 -0500 Subject: [PATCH 5/8] Format, couple cleanups, feature import fix --- src/de/mod.rs | 1 - src/events/mod.rs | 21 +++++++++++++-------- src/reader/sync.rs | 2 -- src/utils.rs | 14 ++++++++++++-- tests/unit_tests.rs | 2 +- 5 files changed, 26 insertions(+), 14 deletions(-) diff --git a/src/de/mod.rs b/src/de/mod.rs index 242c14a8..f689463b 100644 --- a/src/de/mod.rs +++ b/src/de/mod.rs @@ -116,7 +116,6 @@ pub use crate::errors::serialize::DeError; use crate::{ errors::Error, events::{BytesStart, BytesText, Event}, - reader::Decode, reader::Decoder, Reader, }; diff --git a/src/events/mod.rs b/src/events/mod.rs index d886624b..da71c8dc 100644 --- a/src/events/mod.rs +++ b/src/events/mod.rs @@ -39,7 +39,6 @@ pub mod attributes; #[cfg(feature = "encoding_rs")] use encoding_rs::Encoding; - use memchr; use std::borrow::Cow; use std::collections::HashMap; @@ -49,7 +48,7 @@ use std::str::from_utf8; use self::attributes::{Attribute, Attributes}; use crate::errors::{Error, Result}; use crate::escapei::{do_unescape, escape}; -use crate::reader::{Decode}; +use crate::reader::Decode; /// Opening tag data (`Event::Start`), with optional attributes. /// @@ -306,7 +305,8 @@ impl<'a> BytesStart<'a> { custom_entities: Option<&HashMap, Vec>>, ) -> Result { let decoded = reader.decode(&*self); - let unescaped = do_unescape(decoded.as_bytes(), custom_entities).map_err(Error::EscapeError)?; + let unescaped = + do_unescape(decoded.as_bytes(), custom_entities).map_err(Error::EscapeError)?; String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error())) } @@ -318,7 +318,8 @@ impl<'a> BytesStart<'a> { custom_entities: Option<&HashMap, Vec>>, ) -> Result { let decoded = reader.decode(&*self)?; - let unescaped = do_unescape(decoded.as_bytes(), custom_entities).map_err(Error::EscapeError)?; + let unescaped = + do_unescape(decoded.as_bytes(), custom_entities).map_err(Error::EscapeError)?; String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error())) } @@ -772,7 +773,8 @@ impl<'a> BytesText<'a> { custom_entities: Option<&HashMap, Vec>>, ) -> Result { let decoded = reader.decode_without_bom(&*self); - let unescaped = do_unescape(decoded.as_bytes(), custom_entities).map_err(Error::EscapeError)?; + let unescaped = + do_unescape(decoded.as_bytes(), custom_entities).map_err(Error::EscapeError)?; String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error())) } @@ -783,7 +785,8 @@ impl<'a> BytesText<'a> { custom_entities: Option<&HashMap, Vec>>, ) -> Result { let decoded = reader.decode_without_bom(&*self)?; - let unescaped = do_unescape(decoded.as_bytes(), custom_entities).map_err(Error::EscapeError)?; + let unescaped = + do_unescape(decoded.as_bytes(), custom_entities).map_err(Error::EscapeError)?; String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error())) } @@ -822,7 +825,8 @@ impl<'a> BytesText<'a> { custom_entities: Option<&HashMap, Vec>>, ) -> Result { let decoded = reader.decode(&*self); - let unescaped = do_unescape(decoded.as_bytes(), custom_entities).map_err(Error::EscapeError)?; + let unescaped = + do_unescape(decoded.as_bytes(), custom_entities).map_err(Error::EscapeError)?; String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error())) } @@ -833,7 +837,8 @@ impl<'a> BytesText<'a> { custom_entities: Option<&HashMap, Vec>>, ) -> Result { let decoded = reader.decode(&*self)?; - let unescaped = do_unescape(decoded.as_bytes(), custom_entities).map_err(Error::EscapeError)?; + let unescaped = + do_unescape(decoded.as_bytes(), custom_entities).map_err(Error::EscapeError)?; String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error())) } diff --git a/src/reader/sync.rs b/src/reader/sync.rs index aa71877b..085db31d 100644 --- a/src/reader/sync.rs +++ b/src/reader/sync.rs @@ -1,7 +1,5 @@ //! A module to handle sync `Reader` -#[cfg(feature = "encoding")] -use std::borrow::Cow; use std::fs::File; use std::io::{self, BufRead, BufReader}; use std::{path::Path, str::from_utf8}; diff --git a/src/utils.rs b/src/utils.rs index 45341ab2..a3fd7c83 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -1,7 +1,17 @@ +use std::borrow::Cow; use std::fmt::{Formatter, Result}; -pub fn write_cow_string(f: &mut Formatter<'_>, s: &[u8]) -> Result { - write_byte_string(f, s)?; +pub fn write_cow_string(f: &mut Formatter<'_>, cow_string: &Cow<[u8]>) -> Result { + match cow_string { + Cow::Owned(s) => { + write!(f, "Owned(")?; + write_byte_string(f, &s)?; + } + Cow::Borrowed(s) => { + write!(f, "Borrowed(")?; + write_byte_string(f, s)?; + } + } write!(f, ")") } diff --git a/tests/unit_tests.rs b/tests/unit_tests.rs index d68af643..433eb17f 100644 --- a/tests/unit_tests.rs +++ b/tests/unit_tests.rs @@ -334,7 +334,7 @@ fn test_write_attrs() -> Result<()> { let event = match reader.read_event(&mut buf)? { Eof => break, Start(elem) => { - let mut attrs = elem.attributes().collect::>>().unwrap(); + let mut attrs = elem.attributes().collect::>>()?; attrs.extend_from_slice(&[("a", "b").into(), ("c", "d").into()]); let mut elem = BytesStart::owned(b"copy".to_vec(), 4); elem.extend_attributes(attrs); From c6d431fec4335ab6d4c3fdbb2db5311f3df62046 Mon Sep 17 00:00:00 2001 From: Greg Baraghimian Date: Sun, 12 Sep 2021 09:39:35 -0400 Subject: [PATCH 6/8] BufRead -> AsyncBufRead in docs --- src/reader/asynchronous.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/reader/asynchronous.rs b/src/reader/asynchronous.rs index 089ab093..9a0a5b31 100644 --- a/src/reader/asynchronous.rs +++ b/src/reader/asynchronous.rs @@ -42,7 +42,7 @@ impl Decode for AsyncReader { /// A low level encoding-agnostic XML event reader. /// -/// Consumes a `BufRead` and streams XML `Event`s. +/// Consumes a `AsyncBufRead` and streams XML `Event`s. /// /// # Examples /// From 7742c05c3b3ffd17c754e272d5e0b0931c52a6c7 Mon Sep 17 00:00:00 2001 From: Andrei Vasiliu Date: Sun, 25 Jul 2021 14:54:20 +0300 Subject: [PATCH 7/8] Add tests for escaped and encoded XML --- tests/xmlrs_reader_tests.rs | 97 ++++++++++++++++++++++++++++++++++--- 1 file changed, 90 insertions(+), 7 deletions(-) diff --git a/tests/xmlrs_reader_tests.rs b/tests/xmlrs_reader_tests.rs index 69bac59b..fbabe714 100644 --- a/tests/xmlrs_reader_tests.rs +++ b/tests/xmlrs_reader_tests.rs @@ -4,6 +4,7 @@ use quick_xml::events::{BytesStart, Event}; #[cfg(feature = "asynchronous")] use quick_xml::AsyncReader; use quick_xml::{Reader, Result}; +use std::borrow::Cow; use std::str::from_utf8; #[cfg(feature = "asynchronous")] use tokio::runtime::Runtime; @@ -64,6 +65,55 @@ fn html5() { ); } +#[test] +fn escaped_characters() { + test( + r#"'a' < '&'"#, + r#" + |StartElement(e [attr=""Hello""]) + |Characters('a' < '&') + |EndElement(e) + |EndDocument + "#, + true, + ) +} + +#[cfg(feature = "escape-html")] +#[test] +fn escaped_characters_html() { + test( + r#"╔╗╔╗╔╗"#, + r#" + |StartElement(e [attr="ℏÈℓ𝕝⨀"]) + |Characters(╔╗╔╗╔╗) + |EndElement(e) + |EndDocument + "#, + true, + ) +} + +#[cfg(feature = "encoding")] +#[test] +fn encoded_characters() { + test_bytes( + b"\ + \n\ + \x82\xA0\x82\xA2\x82\xA4\ + ", + " + |StartDocument(1.0, Shift_JIS) + |StartElement(a) + |Characters(あいう) + |EndElement(a) + |EndDocument + " + .as_bytes(), + true, + ) +} + // #[test] // fn sample_3_short() { // test( @@ -101,6 +151,19 @@ fn html5() { // // } +#[test] +// FIXME: Trips on the first byte-order-mark byte +// Expected: StartDocument(1.0, utf-16) +// Found: InvalidUtf8([255, 254]; invalid utf-8 sequence of 1 bytes from index 0) +#[ignore] +fn sample_5_short() { + test_bytes( + include_bytes!("documents/sample_5_utf16bom.xml"), + include_bytes!("documents/sample_5_short.txt"), + true, + ); +} + #[test] fn sample_ns_short() { test( @@ -304,6 +367,13 @@ fn default_namespace_applies_to_end_elem() { fn test_sync(input: &[u8], output: &[u8], is_short: bool) { let mut reader = Reader::from_reader(input); + // Normalize newlines on Windows to just \n, which is what the reader and + // writer use. + // let input = input.replace("\r\n", "\n"); + // let input = input.as_bytes(); + // let output = output.replace("\r\n", "\n"); + // let output = output.as_bytes(); + let mut reader = Reader::from_reader(input); reader .trim_text(is_short) .check_comments(true) @@ -322,8 +392,7 @@ fn test_sync(input: &[u8], output: &[u8], is_short: bool) { buf.clear(); let event = reader.read_namespaced_event(&mut buf, &mut ns_buffer); - - let line = xmlrs_display(&event); + let line = xmlrs_display(&event, &reader); if let Some((n, spec)) = spec_lines.next() { if spec.trim() == "EndDocument" { break; @@ -466,10 +535,21 @@ fn make_attrs(e: &BytesStart) -> ::std::result::Result { Ok(atts.join(", ")) } -fn xmlrs_display(opt_event: &Result<(Option<&[u8]>, Event)>) -> String { +// FIXME: The public API differs based on the "encoding" feature +fn decode<'a>(text: &'a [u8], reader: &Reader<&[u8]>) -> Cow<'a, str> { + #[cfg(feature = "encoding")] + let decoded = reader.decode(text); + + #[cfg(not(feature = "encoding"))] + let decoded = Cow::Borrowed(reader.decode(text).unwrap()); + + decoded +} + +fn xmlrs_display(opt_event: &Result<(Option<&[u8]>, Event)>, reader: &Reader<&[u8]>) -> String { match opt_event { Ok((ref n, Event::Start(ref e))) => { - let name = namespace_name(n, e.name()); + let name = namespace_name(n, decode(e.name(), reader).as_bytes()); match make_attrs(e) { Ok(ref attrs) if attrs.is_empty() => format!("StartElement({})", &name), Ok(ref attrs) => format!("StartElement({} [{}])", &name, &attrs), @@ -477,18 +557,21 @@ fn xmlrs_display(opt_event: &Result<(Option<&[u8]>, Event)>) -> String { } } Ok((ref n, Event::Empty(ref e))) => { - let name = namespace_name(n, e.name()); + let name = namespace_name(n, decode(e.name(), reader).as_bytes()); match make_attrs(e) { Ok(ref attrs) if attrs.is_empty() => format!("EmptyElement({})", &name), Ok(ref attrs) => format!("EmptyElement({} [{}])", &name, &attrs), Err(e) => format!("EmptyElement({}, attr-error: {})", &name, &e), } } - Ok((ref n, Event::End(ref e))) => format!("EndElement({})", namespace_name(n, e.name())), + Ok((ref n, Event::End(ref e))) => { + let name = namespace_name(n, decode(e.name(), reader).as_bytes()); + format!("EndElement({})", name) + } Ok((_, Event::Comment(ref e))) => format!("Comment({})", from_utf8(e).unwrap()), Ok((_, Event::CData(ref e))) => format!("CData({})", from_utf8(e).unwrap()), Ok((_, Event::Text(ref e))) => match e.unescaped() { - Ok(c) => match from_utf8(&*c) { + Ok(c) => match from_utf8(decode(&*c, reader).as_bytes()) { Ok(c) => format!("Characters({})", c), Err(ref err) => format!("InvalidUtf8({:?}; {})", e.escaped(), err), }, From 7e2e962bd3eda71844bbda16a59a3811ac933554 Mon Sep 17 00:00:00 2001 From: greg Date: Tue, 21 Sep 2021 22:13:17 -0500 Subject: [PATCH 8/8] fixing tests post cherrypick --- tests/xmlrs_reader_tests.rs | 36 +++++++++++++----------------------- 1 file changed, 13 insertions(+), 23 deletions(-) diff --git a/tests/xmlrs_reader_tests.rs b/tests/xmlrs_reader_tests.rs index fbabe714..0ec07fa6 100644 --- a/tests/xmlrs_reader_tests.rs +++ b/tests/xmlrs_reader_tests.rs @@ -1,6 +1,7 @@ extern crate quick_xml; use quick_xml::events::{BytesStart, Event}; +use quick_xml::reader::Decode; #[cfg(feature = "asynchronous")] use quick_xml::AsyncReader; use quick_xml::{Reader, Result}; @@ -45,7 +46,7 @@ fn sample_2_full() { ); } -#[cfg(all(not(windows), feature = "escape-html"))] +#[cfg(feature = "escape-html")] #[test] fn html5() { test( @@ -54,27 +55,17 @@ fn html5() { false, ); } - -#[cfg(all(windows, feature = "escape-html"))] -#[test] -fn html5() { - test( - include_bytes!("documents/html5.html"), - include_bytes!("documents/html5-windows.txt"), - false, - ); -} - #[test] fn escaped_characters() { test( - r#"'a' < '&'"#, + r#"'a' < '&'"#.as_bytes(), r#" |StartElement(e [attr=""Hello""]) |Characters('a' < '&') |EndElement(e) |EndDocument - "#, + "# + .as_bytes(), true, ) } @@ -83,13 +74,14 @@ fn escaped_characters() { #[test] fn escaped_characters_html() { test( - r#"╔╗╔╗╔╗"#, + r#"╔╗╔╗╔╗"#.as_bytes(), r#" |StartElement(e [attr="ℏÈℓ𝕝⨀"]) |Characters(╔╗╔╗╔╗) |EndElement(e) |EndDocument - "#, + "# + .as_bytes(), true, ) } @@ -97,7 +89,7 @@ fn escaped_characters_html() { #[cfg(feature = "encoding")] #[test] fn encoded_characters() { - test_bytes( + test( b"\ \n\ \x82\xA0\x82\xA2\x82\xA4\ @@ -157,7 +149,7 @@ fn encoded_characters() { // Found: InvalidUtf8([255, 254]; invalid utf-8 sequence of 1 bytes from index 0) #[ignore] fn sample_5_short() { - test_bytes( + test( include_bytes!("documents/sample_5_utf16bom.xml"), include_bytes!("documents/sample_5_short.txt"), true, @@ -365,8 +357,6 @@ fn default_namespace_applies_to_end_elem() { } fn test_sync(input: &[u8], output: &[u8], is_short: bool) { - let mut reader = Reader::from_reader(input); - // Normalize newlines on Windows to just \n, which is what the reader and // writer use. // let input = input.replace("\r\n", "\n"); @@ -457,7 +447,7 @@ async fn test_async(input: &[u8], output: &[u8], is_short: bool) { let event = reader.read_namespaced_event(&mut buf, &mut ns_buffer).await; - let line = xmlrs_display(&event); + let line = xmlrs_display(&event, &reader); if let Some((n, spec)) = spec_lines.next() { if spec.trim() == "EndDocument" { break; @@ -536,7 +526,7 @@ fn make_attrs(e: &BytesStart) -> ::std::result::Result { } // FIXME: The public API differs based on the "encoding" feature -fn decode<'a>(text: &'a [u8], reader: &Reader<&[u8]>) -> Cow<'a, str> { +fn decode<'a>(text: &'a [u8], reader: &impl Decode) -> Cow<'a, str> { #[cfg(feature = "encoding")] let decoded = reader.decode(text); @@ -546,7 +536,7 @@ fn decode<'a>(text: &'a [u8], reader: &Reader<&[u8]>) -> Cow<'a, str> { decoded } -fn xmlrs_display(opt_event: &Result<(Option<&[u8]>, Event)>, reader: &Reader<&[u8]>) -> String { +fn xmlrs_display(opt_event: &Result<(Option<&[u8]>, Event)>, reader: &impl Decode) -> String { match opt_event { Ok((ref n, Event::Start(ref e))) => { let name = namespace_name(n, decode(e.name(), reader).as_bytes());