From 50804211b4a22edbfe365002143c3065cd72ad2f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Frank=20Pr=C3=B6=C3=9Fdorf?= Date: Wed, 23 Sep 2020 11:06:37 +0300 Subject: [PATCH] Allow using tokio's AsyncBufRead --- Cargo.toml | 5 + README.md | 5 +- examples/issue68.rs | 16 +- examples/nested_readers.rs | 32 +- examples/read_texts.rs | 29 +- src/errors.rs | 2 +- src/events/attributes.rs | 21 +- src/events/mod.rs | 46 +- src/lib.rs | 30 +- src/reader/asynchronous.rs | 1079 +++++++++++++++++++++++++++++ src/reader/mod.rs | 233 +++++++ src/{reader.rs => reader/sync.rs} | 352 ++-------- src/se/mod.rs | 5 +- src/se/var.rs | 20 +- src/writer.rs | 20 +- tests/serde_attrs.rs | 35 +- tests/test.rs | 265 ++++++- tests/unit_tests.rs | 404 +++++++++-- tests/xmlrs_reader_tests.rs | 30 +- 19 files changed, 2173 insertions(+), 456 deletions(-) create mode 100644 src/reader/asynchronous.rs create mode 100644 src/reader/mod.rs rename src/{reader.rs => reader/sync.rs} (81%) diff --git a/Cargo.toml b/Cargo.toml index 401e7a64..d04dbdaa 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,6 +3,7 @@ name = "quick-xml" version = "0.18.1" authors = ["Johann Tuffe "] description = "High performance xml reader and writer" +edition = "2018" documentation = "https://docs.rs/quick-xml" repository = "https://github.com/tafia/quick-xml" @@ -16,13 +17,16 @@ license = "MIT" travis-ci = { repository = "tafia/quick-xml" } [dependencies] +async-recursion = { version = "0.3.1", optional = true } encoding_rs = { version = "0.8.22", optional = true } +tokio = { version = "0.2.22", features = ["fs", "io-util"], optional = true } serde = { version = "1.0", optional = true } memchr = "2.3.3" [dev-dependencies] serde = { version = "1.0", features = ["derive"] } regex = "1" +tokio = { version = "0.2.22", features = ["macros", "rt-threaded"] } [lib] bench = false @@ -31,6 +35,7 @@ bench = false default = [] encoding = ["encoding_rs"] serialize = ["serde"] +asynchronous = ["tokio", "async-recursion"] [package.metadata.docs.rs] features = ["serialize"] diff --git a/README.md b/README.md index bab4d3c7..e28b2783 100644 --- a/README.md +++ b/README.md @@ -210,8 +210,8 @@ fn crates_io() -> Result { ### Credits -This has largely been inspired by [serde-xml-rs](https://github.com/RReverser/serde-xml-rs). -quick-xml follows its convention for deserialization, including the +This has largely been inspired by [serde-xml-rs](https://github.com/RReverser/serde-xml-rs). +quick-xml follows its convention for deserialization, including the [`$value`](https://github.com/RReverser/serde-xml-rs#parsing-the-value-of-a-tag) special name. ### Parsing the "value" of a tag @@ -234,6 +234,7 @@ Note that despite not focusing on performance (there are several unecessary copi - `encoding`: support non utf8 xmls - `serialize`: support serde `Serialize`/`Deserialize` +- `asynchronous`: support for `AsyncRead`s in `tokio` ## Performance diff --git a/examples/issue68.rs b/examples/issue68.rs index a6ba1d7f..d738ed84 100644 --- a/examples/issue68.rs +++ b/examples/issue68.rs @@ -1,10 +1,10 @@ #![allow(unused)] -extern crate quick_xml; - use quick_xml::events::Event; use quick_xml::Reader; use std::io::Read; +#[cfg(feature = "asynchronous")] +use tokio::runtime::Runtime; struct Resource { etag: String, @@ -81,8 +81,18 @@ fn parse_report(xml_data: &str) -> Vec { let mut depth = 0; let mut state = State::MultiStatus; + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + loop { - match reader.read_namespaced_event(&mut buf, &mut ns_buffer) { + #[cfg(feature = "asynchronous")] + let event = runtime + .block_on(async { reader.read_namespaced_event(&mut buf, &mut ns_buffer).await }); + + #[cfg(not(feature = "asynchronous"))] + let event = reader.read_namespaced_event(&mut buf, &mut ns_buffer); + + match event { Ok((namespace_value, Event::Start(e))) => { let namespace_value = namespace_value.unwrap_or_default(); match (depth, state, namespace_value, e.local_name()) { diff --git a/examples/nested_readers.rs b/examples/nested_readers.rs index 892fdc92..5dd1dbbc 100644 --- a/examples/nested_readers.rs +++ b/examples/nested_readers.rs @@ -1,6 +1,8 @@ -extern crate quick_xml; use quick_xml::events::Event; use quick_xml::Reader; +#[cfg(feature = "asynchronous")] +use tokio::runtime::Runtime; + // a structure to capture the rows we've extracted // from a ECMA-376 table in document.xml #[derive(Debug, Clone)] @@ -16,10 +18,26 @@ fn main() -> Result<(), quick_xml::Error> { // buffer for nested reader let mut skip_buf = Vec::new(); let mut count = 0; + + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + + #[cfg(feature = "asynchronous")] + let mut reader = + runtime.block_on(async { Reader::from_file("tests/documents/document.xml").await })?; + + #[cfg(not(feature = "asynchronous"))] let mut reader = Reader::from_file("tests/documents/document.xml")?; + let mut found_tables = Vec::new(); loop { - match reader.read_event(&mut buf)? { + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { reader.read_event(&mut buf).await })?; + + #[cfg(not(feature = "asynchronous"))] + let event = reader.read_event(&mut buf)?; + + match event { Event::Start(element) => match element.name() { b"w:tbl" => { count += 1; @@ -32,7 +50,15 @@ fn main() -> Result<(), quick_xml::Error> { let mut row_index = 0; loop { skip_buf.clear(); - match reader.read_event(&mut skip_buf)? { + + #[cfg(feature = "asynchronous")] + let event = + runtime.block_on(async { reader.read_event(&mut skip_buf).await })?; + + #[cfg(not(feature = "asynchronous"))] + let event = reader.read_event(&mut skip_buf)?; + + match event { Event::Start(element) => match element.name() { b"w:tr" => { stats.rows.push(vec![]); diff --git a/examples/read_texts.rs b/examples/read_texts.rs index c0bb4778..227911cc 100644 --- a/examples/read_texts.rs +++ b/examples/read_texts.rs @@ -1,4 +1,5 @@ -extern crate quick_xml; +#[cfg(feature = "asynchronous")] +use tokio::runtime::Runtime; fn main() { use quick_xml::events::Event; @@ -13,14 +14,32 @@ fn main() { let mut txt = Vec::new(); let mut buf = Vec::new(); + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + loop { - match reader.read_event(&mut buf) { + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { reader.read_event(&mut buf).await }); + + #[cfg(not(feature = "asynchronous"))] + let event = reader.read_event(&mut buf); + + match event { Ok(Event::Start(ref e)) if e.name() == b"tag2" => { - txt.push( + #[cfg(feature = "asynchronous")] + let text = runtime.block_on(async { reader .read_text(b"tag2", &mut Vec::new()) - .expect("Cannot decode text value"), - ); + .await + .expect("Cannot decode text value") + }); + + #[cfg(not(feature = "asynchronous"))] + let text = reader + .read_text(b"tag2", &mut Vec::new()) + .expect("Cannot decode text value"); + + txt.push(text); println!("{:?}", txt); } Ok(Event::Eof) => break, // exits the loop when reaching end of file diff --git a/src/errors.rs b/src/errors.rs index 777e7fba..6683c9fe 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -33,7 +33,7 @@ pub enum Error { /// Duplicate attribute DuplicatedAttribute(usize, usize), /// Escape error - EscapeError(::escape::EscapeError), + EscapeError(crate::escape::EscapeError), } impl From<::std::io::Error> for Error { diff --git a/src/events/attributes.rs b/src/events/attributes.rs index bcdffdb9..b6c2e81f 100644 --- a/src/events/attributes.rs +++ b/src/events/attributes.rs @@ -2,11 +2,10 @@ //! //! Provides an iterator over attributes key/value pairs -use errors::{Error, Result}; -use escape::{escape, unescape}; -use reader::{is_whitespace, Reader}; +use crate::errors::{Error, Result}; +use crate::escape::{escape, unescape}; +use crate::reader::{is_whitespace, Decode}; use std::borrow::Cow; -use std::io::BufRead; use std::ops::Range; /// Iterator over XML attributes. @@ -107,7 +106,7 @@ impl<'a> Attribute<'a> { /// [`unescaped_value()`]: #method.unescaped_value /// [`Reader::decode()`]: ../../reader/struct.Reader.html#method.decode #[cfg(feature = "encoding")] - pub fn unescape_and_decode_value(&self, reader: &Reader) -> Result { + pub fn unescape_and_decode_value(&self, reader: &impl Decode) -> Result { let decoded = reader.decode(&*self.value); let unescaped = unescape(decoded.as_bytes()).map_err(Error::EscapeError)?; String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error())) @@ -124,7 +123,7 @@ impl<'a> Attribute<'a> { /// [`unescaped_value()`]: #method.unescaped_value /// [`Reader::decode()`]: ../../reader/struct.Reader.html#method.decode #[cfg(not(feature = "encoding"))] - pub fn unescape_and_decode_value(&self, reader: &Reader) -> Result { + pub fn unescape_and_decode_value(&self, reader: &impl Decode) -> Result { let decoded = reader.decode(&*self.value)?; let unescaped = unescape(decoded.as_bytes()).map_err(Error::EscapeError)?; String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error())) @@ -138,10 +137,7 @@ impl<'a> Attribute<'a> { /// 1. BytesText::unescaped() /// 2. Reader::decode(...) #[cfg(feature = "encoding")] - pub fn unescape_and_decode_without_bom( - &self, - reader: &mut Reader, - ) -> Result { + pub fn unescape_and_decode_without_bom(&self, reader: &impl Decode) -> Result { let decoded = reader.decode_without_bom(&*self.value); let unescaped = unescape(decoded.as_bytes()).map_err(Error::EscapeError)?; String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error())) @@ -155,10 +151,7 @@ impl<'a> Attribute<'a> { /// 1. BytesText::unescaped() /// 2. Reader::decode(...) #[cfg(not(feature = "encoding"))] - pub fn unescape_and_decode_without_bom( - &self, - reader: &Reader, - ) -> Result { + pub fn unescape_and_decode_without_bom(&self, reader: &impl Decode) -> Result { let decoded = reader.decode_without_bom(&*self.value)?; let unescaped = unescape(decoded.as_bytes()).map_err(Error::EscapeError)?; String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error())) diff --git a/src/events/mod.rs b/src/events/mod.rs index 02bd3785..6eee28c1 100644 --- a/src/events/mod.rs +++ b/src/events/mod.rs @@ -5,14 +5,13 @@ pub mod attributes; #[cfg(feature = "encoding_rs")] use encoding_rs::Encoding; use std::borrow::Cow; -use std::io::BufRead; use std::ops::Deref; use std::str::from_utf8; use self::attributes::{Attribute, Attributes}; -use errors::{Error, Result}; -use escape::{escape, unescape}; -use reader::Reader; +use crate::errors::{Error, Result}; +use crate::escape::{escape, unescape}; +use crate::reader::Decode; use memchr; @@ -175,7 +174,7 @@ impl<'a> BytesStart<'a> { /// [`Reader::decode()`]: ../reader/struct.Reader.html#method.decode #[cfg(feature = "encoding")] #[inline] - pub fn unescape_and_decode(&self, reader: &Reader) -> Result { + pub fn unescape_and_decode(&self, reader: &impl Decode) -> Result { let decoded = reader.decode(&*self); let unescaped = unescape(decoded.as_bytes()).map_err(Error::EscapeError)?; String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error())) @@ -193,7 +192,7 @@ impl<'a> BytesStart<'a> { /// [`Reader::decode()`]: ../reader/struct.Reader.html#method.decode #[cfg(not(feature = "encoding"))] #[inline] - pub fn unescape_and_decode(&self, reader: &Reader) -> Result { + pub fn unescape_and_decode(&self, reader: &impl Decode) -> Result { let decoded = reader.decode(&*self)?; let unescaped = unescape(decoded.as_bytes()).map_err(Error::EscapeError)?; String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error())) @@ -495,10 +494,7 @@ impl<'a> BytesText<'a> { /// 1. BytesText::unescaped() /// 2. Reader::decode(...) #[cfg(feature = "encoding")] - pub fn unescape_and_decode_without_bom( - &self, - reader: &mut Reader, - ) -> Result { + pub fn unescape_and_decode_without_bom(&self, reader: &mut impl Decode) -> Result { let decoded = reader.decode_without_bom(&*self); let unescaped = unescape(decoded.as_bytes()).map_err(Error::EscapeError)?; String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error())) @@ -512,10 +508,7 @@ impl<'a> BytesText<'a> { /// 1. BytesText::unescaped() /// 2. Reader::decode(...) #[cfg(not(feature = "encoding"))] - pub fn unescape_and_decode_without_bom( - &self, - reader: &Reader, - ) -> Result { + pub fn unescape_and_decode_without_bom(&self, reader: &impl Decode) -> Result { let decoded = reader.decode_without_bom(&*self)?; let unescaped = unescape(decoded.as_bytes()).map_err(Error::EscapeError)?; String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error())) @@ -528,7 +521,7 @@ impl<'a> BytesText<'a> { /// 1. BytesText::unescaped() /// 2. Reader::decode(...) #[cfg(feature = "encoding")] - pub fn unescape_and_decode(&self, reader: &Reader) -> Result { + pub fn unescape_and_decode(&self, reader: &impl Decode) -> Result { let decoded = reader.decode(&*self); let unescaped = unescape(decoded.as_bytes()).map_err(Error::EscapeError)?; String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error())) @@ -541,7 +534,7 @@ impl<'a> BytesText<'a> { /// 1. BytesText::unescaped() /// 2. Reader::decode(...) #[cfg(not(feature = "encoding"))] - pub fn unescape_and_decode(&self, reader: &Reader) -> Result { + pub fn unescape_and_decode(&self, reader: &impl Decode) -> Result { let decoded = reader.decode(&*self)?; let unescaped = unescape(decoded.as_bytes()).map_err(Error::EscapeError)?; String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error())) @@ -663,6 +656,8 @@ impl<'a> AsRef> for Event<'a> { #[cfg(test)] mod test { use super::*; + #[cfg(feature = "asynchronous")] + use tokio::runtime::Runtime; #[test] fn local_name() { @@ -673,11 +668,25 @@ mod test { <:foo attr='bar'>foobusbar foobusbar "#; - let mut rdr = Reader::from_str(xml); + let mut rdr = crate::Reader::from_str(xml); let mut buf = Vec::new(); let mut parsed_local_names = Vec::new(); + + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + loop { - match rdr.read_event(&mut buf).expect("unable to read xml event") { + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { + rdr.read_event(&mut buf) + .await + .expect("unable to read xml event") + }); + + #[cfg(not(feature = "asynchronous"))] + let event = rdr.read_event(&mut buf).expect("unable to read xml event"); + + match event { Event::Start(ref e) => parsed_local_names.push( from_utf8(e.local_name()) .expect("unable to build str from local_name") @@ -692,6 +701,7 @@ mod test { _ => {} } } + assert_eq!(parsed_local_names[0], "bus".to_string()); assert_eq!(parsed_local_names[1], "bus".to_string()); assert_eq!(parsed_local_names[2], "".to_string()); diff --git a/src/lib.rs b/src/lib.rs index 30156e86..dc7296c3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -9,7 +9,7 @@ //! //! ### Reader //! -//! ```rust +//! ```ignore //! use quick_xml::Reader; //! use quick_xml::events::Event; //! @@ -24,8 +24,8 @@ //! reader.trim_text(true); //! //! let mut count = 0; -//! let mut txt = Vec::new(); -//! let mut buf = Vec::new(); +//! let mut txt: Vec = Vec::new(); +//! let mut buf: Vec = Vec::new(); //! //! // The `Reader` does not implement `Iterator` because it outputs borrowed data (`Cow`s) //! loop { @@ -57,7 +57,7 @@ //! //! ### Writer //! -//! ```rust +//! ```ignore //! use quick_xml::Writer; //! use quick_xml::events::{Event, BytesEnd, BytesStart}; //! use quick_xml::Reader; @@ -68,7 +68,7 @@ //! let mut reader = Reader::from_str(xml); //! reader.trim_text(true); //! let mut writer = Writer::new(Cursor::new(Vec::new())); -//! let mut buf = Vec::new(); +//! let mut buf: Vec = Vec::new(); //! loop { //! match reader.read_event(&mut buf) { //! Ok(Event::Start(ref e)) if e.name() == b"this_tag" => { @@ -105,30 +105,25 @@ //! //! # Features //! -//! quick-xml supports 2 additional features, non activated by default: +//! quick-xml supports 3 additional features, non activated by default: //! - `encoding`: support non utf8 xmls //! - `serialize`: support serde `Serialize`/`Deserialize` +//! - `asynchronous`: support async reading #![forbid(unsafe_code)] #![deny(missing_docs)] #![recursion_limit = "1024"] -#[cfg(feature = "encoding_rs")] -extern crate encoding_rs; -extern crate memchr; -#[cfg(feature = "serialize")] -extern crate serde; - #[cfg(feature = "serialize")] pub mod de; mod errors; mod escapei; pub mod escape { //! Manage xml character escapes - pub(crate) use escapei::EscapeError; - pub use escapei::{escape, unescape}; + pub(crate) use crate::escapei::EscapeError; + pub use crate::escapei::{escape, unescape}; } pub mod events; -mod reader; +pub mod reader; #[cfg(feature = "serialize")] pub mod se; mod utils; @@ -138,5 +133,8 @@ mod writer; #[cfg(feature = "serialize")] pub use errors::serialize::DeError; pub use errors::{Error, Result}; -pub use reader::Reader; +#[cfg(feature = "asynchronous")] +pub use reader::asynchronous::Reader; +#[cfg(not(feature = "asynchronous"))] +pub use reader::sync::Reader; pub use writer::Writer; diff --git a/src/reader/asynchronous.rs b/src/reader/asynchronous.rs new file mode 100644 index 00000000..b6ed2e1f --- /dev/null +++ b/src/reader/asynchronous.rs @@ -0,0 +1,1079 @@ +//! A module to handle the async `Reader` + +use async_recursion::async_recursion; +#[cfg(feature = "encoding")] +use encoding_rs::{Encoding, UTF_16BE, UTF_16LE}; +#[cfg(feature = "encoding")] +use std::borrow::Cow; +use std::future::Future; +use std::io; +use std::marker::Unpin; +use std::path::Path; +use std::pin::Pin; +use std::str::from_utf8; +use std::task::{Context, Poll}; +use tokio::fs::File; +use tokio::io::{AsyncBufRead, AsyncBufReadExt, BufReader}; + +use crate::errors::{Error, Result}; +use crate::events::{BytesDecl, BytesEnd, BytesStart, BytesText, Event}; + +use memchr; + +use super::{is_whitespace, Decode, Decoder, NamespaceBufferIndex, TagState}; + +impl Decode for Reader { + /// Decodes a UTF8 slice regardless of XML declaration. + /// + /// Decode `bytes` with BOM sniffing and with malformed sequences replaced with the + /// `U+FFFD REPLACEMENT CHARACTER`. + /// + /// # Note + /// + /// If you instead want to use XML declared encoding, use the `encoding` feature + #[inline] + #[cfg(not(feature = "encoding"))] + fn decode<'c>(&self, bytes: &'c [u8]) -> Result<&'c str> { + from_utf8(bytes).map_err(Error::Utf8) + } + + /// Decodes a UTF8 slice without BOM (Byte order mark) regardless of XML declaration. + /// + /// Decode `bytes` without BOM and with malformed sequences replaced with the + /// `U+FFFD REPLACEMENT CHARACTER`. + /// + /// # Note + /// + /// If you instead want to use XML declared encoding, use the `encoding` feature + #[inline] + #[cfg(not(feature = "encoding"))] + fn decode_without_bom<'c>(&self, bytes: &'c [u8]) -> Result<&'c str> { + if bytes.starts_with(b"\xEF\xBB\xBF") { + from_utf8(&bytes[3..]).map_err(Error::Utf8) + } else { + from_utf8(bytes).map_err(Error::Utf8) + } + } + + /// Decodes a slice using without BOM (Byte order mark) the encoding specified in the XML declaration. + /// + /// Decode `bytes` without BOM and with malformed sequences replaced with the + /// `U+FFFD REPLACEMENT CHARACTER`. + /// + /// If no encoding is specified, defaults to UTF-8. + #[inline] + #[cfg(feature = "encoding")] + fn decode_without_bom<'b, 'c>(&'b mut self, mut bytes: &'c [u8]) -> Cow<'c, str> { + if self.is_encoding_set { + return self.encoding.decode_with_bom_removal(bytes).0; + } + if bytes.starts_with(b"\xEF\xBB\xBF") { + self.is_encoding_set = true; + bytes = &bytes[3..]; + } else if bytes.starts_with(b"\xFF\xFE") { + self.is_encoding_set = true; + self.encoding = UTF_16LE; + bytes = &bytes[2..]; + } else if bytes.starts_with(b"\xFE\xFF") { + self.is_encoding_set = true; + self.encoding = UTF_16BE; + bytes = &bytes[3..]; + }; + self.encoding.decode_without_bom_handling(bytes).0 + } +} + +/// A low level encoding-agnostic XML event reader. +/// +/// Consumes a `BufRead` and streams XML `Event`s. +/// +/// # Examples +/// +/// ``` +/// use quick_xml::Reader; +/// use quick_xml::events::Event; +/// +/// #[tokio::main] +/// async fn main() { +/// let xml = r#" +/// Test +/// Test 2 +/// "#; +/// let mut reader = Reader::from_str(xml); +/// reader.trim_text(true); +/// let mut count = 0; +/// let mut txt = Vec::new(); +/// let mut buf = Vec::new(); +/// loop { +/// match reader.read_event(&mut buf).await { +/// Ok(Event::Start(ref e)) => { +/// match e.name() { +/// b"tag1" => println!("attributes values: {:?}", +/// e.attributes().map(|a| a.unwrap().value) +/// .collect::>()), +/// b"tag2" => count += 1, +/// _ => (), +/// } +/// }, +/// Ok(Event::Text(e)) => txt.push(e.unescape_and_decode(&reader).unwrap()), +/// Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e), +/// Ok(Event::Eof) => break, +/// _ => (), +/// } +/// buf.clear(); +/// } +/// } +/// ``` +pub struct Reader { + /// reader + reader: B, + /// current buffer position, useful for debuging errors + buf_position: usize, + /// current state Open/Close + tag_state: TagState, + /// expand empty element into an opening and closing element + expand_empty_elements: bool, + /// trims Text events, skip the element if text is empty + trim_text: bool, + /// trims trailing whitespaces from markup names in closing tags `` + trim_markup_names_in_closing_tags: bool, + /// check if End nodes match last Start node + check_end_names: bool, + /// check if comments contains `--` (false per default) + check_comments: bool, + /// all currently Started elements which didn't have a matching + /// End element yet + opened_buffer: Vec, + /// opened name start indexes + opened_starts: Vec, + /// a buffer to manage namespaces + ns_buffer: NamespaceBufferIndex, + #[cfg(feature = "encoding")] + /// the encoding specified in the xml, defaults to utf8 + encoding: &'static Encoding, + #[cfg(feature = "encoding")] + /// check if quick-rs could find out the encoding + is_encoding_set: bool, +} + +impl Reader { + /// Creates a `Reader` that reads from a reader implementing `BufRead`. + pub fn from_reader(reader: B) -> Reader { + Reader { + reader, + opened_buffer: Vec::new(), + opened_starts: Vec::new(), + tag_state: TagState::Closed, + expand_empty_elements: false, + trim_text: false, + trim_markup_names_in_closing_tags: true, + check_end_names: true, + buf_position: 0, + check_comments: false, + ns_buffer: NamespaceBufferIndex::default(), + #[cfg(feature = "encoding")] + encoding: ::encoding_rs::UTF_8, + #[cfg(feature = "encoding")] + is_encoding_set: false, + } + } + + /// Changes whether empty elements should be split into an `Open` and a `Close` event. + /// + /// When set to `true`, all [`Empty`] events produced by a self-closing tag like `` are + /// expanded into a [`Start`] event followed by a [`End`] event. When set to `false` (the + /// default), those tags are represented by an [`Empty`] event instead. + /// + /// (`false` by default) + /// + /// [`Empty`]: events/enum.Event.html#variant.Empty + /// [`Start`]: events/enum.Event.html#variant.Start + /// [`End`]: events/enum.Event.html#variant.End + pub fn expand_empty_elements(&mut self, val: bool) -> &mut Reader { + self.expand_empty_elements = val; + self + } + + /// Changes whether whitespace before and after character data should be removed. + /// + /// When set to `true`, all [`Text`] events are trimmed. If they are empty, no event will be + /// pushed. + /// + /// (`false` by default) + /// + /// [`Text`]: events/enum.Event.html#variant.Text + pub fn trim_text(&mut self, val: bool) -> &mut Reader { + self.trim_text = val; + self + } + + /// Changes wether trailing whitespaces after the markup name are trimmed in closing tags + /// ``. + /// + /// If true the emitted [`End`] event is stripped of trailing whitespace after the markup name. + /// + /// Note that if set to `false` and `check_end_names` is true the comparison of markup names is + /// going to fail erronously if a closing tag contains trailing whitespaces. + /// + /// (`true` by default) + /// + /// [`End`]: events/enum.Event.html#variant.End + pub fn trim_markup_names_in_closing_tags(&mut self, val: bool) -> &mut Reader { + self.trim_markup_names_in_closing_tags = val; + self + } + + /// Changes whether mismatched closing tag names should be detected. + /// + /// When set to `false`, it won't check if a closing tag matches the corresponding opening tag. + /// For example, `` will be permitted. + /// + /// If the XML is known to be sane (already processed, etc.) this saves extra time. + /// + /// Note that the emitted [`End`] event will not be modified if this is disabled, ie. it will + /// contain the data of the mismatched end tag. + /// + /// (`true` by default) + /// + /// [`End`]: events/enum.Event.html#variant.End + pub fn check_end_names(&mut self, val: bool) -> &mut Reader { + self.check_end_names = val; + self + } + + /// Changes whether comments should be validated. + /// + /// When set to `true`, every [`Comment`] event will be checked for not containing `--`, which + /// is not allowed in XML comments. Most of the time we don't want comments at all so we don't + /// really care about comment correctness, thus the default value is `false` to improve + /// performance. + /// + /// (`false` by default) + /// + /// [`Comment`]: events/enum.Event.html#variant.Comment + pub fn check_comments(&mut self, val: bool) -> &mut Reader { + self.check_comments = val; + self + } + + /// Gets the current byte position in the input data. + /// + /// Useful when debugging errors. + pub fn buffer_position(&self) -> usize { + // when internal state is Opened, we have actually read until '<', + // which we don't want to show + if let TagState::Opened = self.tag_state { + self.buf_position - 1 + } else { + self.buf_position + } + } + + /// private function to read until '<' is found + /// return a `Text` event + #[async_recursion] + async fn read_until_open<'a, 'b>(&'a mut self, buf: &'b mut Vec) -> Result> { + self.tag_state = TagState::Opened; + let buf_start = buf.len(); + + match read_until(&mut self.reader, b'<', buf, &mut self.buf_position).await { + Ok(0) => Ok(Event::Eof), + Ok(_) => { + let (start, len) = if self.trim_text { + match buf.iter().skip(buf_start).position(|&b| !is_whitespace(b)) { + Some(start) => ( + buf_start + start, + buf.iter() + .rposition(|&b| !is_whitespace(b)) + .map_or_else(|| buf.len(), |p| p + 1), + ), + None => return self.read_event(buf).await, + } + } else { + (buf_start, buf.len()) + }; + Ok(Event::Text(BytesText::from_escaped(&buf[start..len]))) + } + Err(e) => Err(e), + } + } + + /// private function to read until '>' is found + async fn read_until_close<'a, 'b>(&'a mut self, buf: &'b mut Vec) -> Result> { + self.tag_state = TagState::Closed; + + // need to read 1 character to decide whether pay special attention to attribute values + let buf_start = buf.len(); + + let start = match read_one_dont_consume(&mut self.reader).await { + Ok(n) if n.is_none() => return Ok(Event::Eof), + Ok(n) => n.unwrap(), + Err(e) => return Err(Error::Io(e)), + }; + + if start != b'/' && start != b'!' && start != b'?' { + match read_elem_until(&mut self.reader, b'>', buf, &mut self.buf_position).await { + Ok(0) => Ok(Event::Eof), + Ok(_) => { + // we already *know* that we are in this case + self.read_start(&buf[buf_start..]) + } + Err(e) => Err(e), + } + } else { + match read_until(&mut self.reader, b'>', buf, &mut self.buf_position).await { + Ok(0) => Ok(Event::Eof), + Ok(_) => match start { + b'/' => self.read_end(&buf[buf_start..]), + b'!' => self.read_bang(buf_start, buf).await, + b'?' => self.read_question_mark(&buf[buf_start..]), + _ => unreachable!( + "We checked that `start` must be one of [/!?], was {:?} \ + instead.", + start + ), + }, + Err(e) => Err(e), + } + } + } + + /// reads `BytesElement` starting with a `/`, + /// if `self.check_end_names`, checks that element matches last opened element + /// return `End` event + fn read_end<'a, 'b>(&'a mut self, buf: &'b [u8]) -> Result> { + // XML standard permits whitespaces after the markup name in closing tags. + // Let's strip them from the buffer before comparing tag names. + let name = if self.trim_markup_names_in_closing_tags { + if let Some(pos_end_name) = buf[1..].iter().rposition(|&b| !b.is_ascii_whitespace()) { + let (name, _) = buf[1..].split_at(pos_end_name + 1); + name + } else { + &buf[1..] + } + } else { + &buf[1..] + }; + if self.check_end_names { + let mismatch_err = |expected: &[u8], found: &[u8], buf_position: &mut usize| { + *buf_position -= buf.len(); + Err(Error::EndEventMismatch { + expected: from_utf8(expected).unwrap_or("").to_owned(), + found: from_utf8(found).unwrap_or("").to_owned(), + }) + }; + match self.opened_starts.pop() { + Some(start) => { + if name != &self.opened_buffer[start..] { + let expected = &self.opened_buffer[start..]; + mismatch_err(expected, name, &mut self.buf_position) + } else { + self.opened_buffer.truncate(start); + Ok(Event::End(BytesEnd::borrowed(name))) + } + } + None => mismatch_err(b"", &buf[1..], &mut self.buf_position), + } + } else { + Ok(Event::End(BytesEnd::borrowed(name))) + } + } + + /// reads `BytesElement` starting with a `!`, + /// return `Comment`, `CData` or `DocType` event + /// + /// Note: depending on the start of the Event, we may need to read more + /// data, thus we need a mutable buffer + async fn read_bang<'a, 'b>( + &'a mut self, + buf_start: usize, + buf: &'b mut Vec, + ) -> Result> { + if buf[buf_start..].starts_with(b"!--") { + while buf.len() < buf_start + 5 || !buf.ends_with(b"--") { + buf.push(b'>'); + match read_until(&mut self.reader, b'>', buf, &mut self.buf_position).await { + Ok(0) => { + // In sync sometimes the last char is included and sometimes it isn't + self.buf_position -= 1; + self.buf_position -= buf.len() - buf_start; + return Err(Error::UnexpectedEof("Comment".to_string())); + } + Ok(_) => (), + Err(e) => return Err(e), + } + } + let len = buf.len(); + if self.check_comments { + // search if '--' not in comments + if let Some(p) = memchr::memchr_iter(b'-', &buf[buf_start + 3..len - 2]) + .position(|p| buf[buf_start + 3 + p + 1] == b'-') + { + self.buf_position -= buf.len() - buf_start + p; + return Err(Error::UnexpectedToken("--".to_string())); + } + } + Ok(Event::Comment(BytesText::from_escaped( + &buf[buf_start + 3..len - 2], + ))) + } else if buf.len() >= buf_start + 8 { + match &buf[buf_start + 1..buf_start + 8] { + b"[CDATA[" => { + while buf.len() < 10 || !buf.ends_with(b"]]") { + buf.push(b'>'); + match read_until(&mut self.reader, b'>', buf, &mut self.buf_position).await + { + Ok(0) => { + self.buf_position -= buf.len() - buf_start; + return Err(Error::UnexpectedEof("CData".to_string())); + } + Ok(_) => (), + Err(e) => return Err(e), + } + } + Ok(Event::CData(BytesText::from_escaped( + &buf[buf_start + 8..buf.len() - 2], + ))) + } + b"DOCTYPE" => { + let mut count = buf.iter().skip(buf_start).filter(|&&b| b == b'<').count(); + while count > 0 { + buf.push(b'>'); + match read_until(&mut self.reader, b'>', buf, &mut self.buf_position).await + { + Ok(0) => { + self.buf_position -= buf.len() - buf_start; + return Err(Error::UnexpectedEof("DOCTYPE".to_string())); + } + Ok(n) => { + let start = buf.len() - n; + count += buf.iter().skip(start).filter(|&&b| b == b'<').count(); + count -= 1; + } + Err(e) => return Err(e), + } + } + Ok(Event::DocType(BytesText::from_escaped( + &buf[buf_start + 8..buf.len()], + ))) + } + _ => return Err(Error::UnexpectedBang), + } + } else { + self.buf_position -= buf.len() - buf_start; + return Err(Error::UnexpectedBang); + } + } + + /// reads `BytesElement` starting with a `?`, + /// return `Decl` or `PI` event + #[cfg(feature = "encoding")] + fn read_question_mark<'a, 'b>(&'a mut self, buf: &'b [u8]) -> Result> { + let len = buf.len(); + if len > 2 && buf[len - 1] == b'?' { + if len > 5 && &buf[1..4] == b"xml" && is_whitespace(buf[4]) { + let event = BytesDecl::from_start(BytesStart::borrowed(&buf[1..len - 1], 3)); + // Try getting encoding from the declaration event + if let Some(enc) = event.encoder() { + self.encoding = enc; + self.is_encoding_set = true; + } + Ok(Event::Decl(event)) + } else { + Ok(Event::PI(BytesText::from_escaped(&buf[1..len - 1]))) + } + } else { + self.buf_position -= len; + Err(Error::UnexpectedEof("XmlDecl".to_string())) + } + } + + /// reads `BytesElement` starting with a `?`, + /// return `Decl` or `PI` event + #[cfg(not(feature = "encoding"))] + fn read_question_mark<'a, 'b>(&'a mut self, buf: &'b [u8]) -> Result> { + let len = buf.len(); + if len > 2 && buf[len - 1] == b'?' { + if len > 5 && &buf[1..4] == b"xml" && is_whitespace(buf[4]) { + let event = BytesDecl::from_start(BytesStart::borrowed(&buf[1..len - 1], 3)); + Ok(Event::Decl(event)) + } else { + Ok(Event::PI(BytesText::from_escaped(&buf[1..len - 1]))) + } + } else { + self.buf_position -= len; + Err(Error::UnexpectedEof("XmlDecl".to_string())) + } + } + + #[inline] + fn close_expanded_empty(&mut self) -> Result> { + self.tag_state = TagState::Closed; + let name = self + .opened_buffer + .split_off(self.opened_starts.pop().unwrap()); + Ok(Event::End(BytesEnd::owned(name))) + } + + /// reads `BytesElement` starting with any character except `/`, `!` or ``?` + /// return `Start` or `Empty` event + fn read_start<'a, 'b>(&'a mut self, buf: &'b [u8]) -> Result> { + // TODO: do this directly when reading bufreader ... + let len = buf.len(); + let name_end = buf.iter().position(|&b| is_whitespace(b)).unwrap_or(len); + + if let Some(&b'/') = buf.last() { + let end = if name_end < len { name_end } else { len - 1 }; + if self.expand_empty_elements { + self.tag_state = TagState::Empty; + self.opened_starts.push(self.opened_buffer.len()); + self.opened_buffer.extend(&buf[..end]); + Ok(Event::Start(BytesStart::borrowed(&buf[..len - 1], end))) + } else { + Ok(Event::Empty(BytesStart::borrowed(&buf[..len - 1], end))) + } + } else { + if self.check_end_names { + self.opened_starts.push(self.opened_buffer.len()); + self.opened_buffer.extend(&buf[..name_end]); + } + Ok(Event::Start(BytesStart::borrowed(buf, name_end))) + } + } + + /// Reads the next `Event`. + /// + /// This is the main entry point for reading XML `Event`s. + /// + /// `Event`s borrow `buf` and can be converted to own their data if needed (uses `Cow` + /// internally). + /// + /// Having the possibility to control the internal buffers gives you some additional benefits + /// such as: + /// + /// - Reduce the number of allocations by reusing the same buffer. For constrained systems, + /// you can call `buf.clear()` once you are done with processing the event (typically at the + /// end of your loop). + /// - Reserve the buffer length if you know the file size (using `Vec::with_capacity`). + /// + /// # Examples + /// + /// ``` + /// use quick_xml::Reader; + /// use quick_xml::events::Event; + /// + /// #[tokio::main] + /// async fn main() { + /// let xml = r#" + /// Test + /// Test 2 + /// "#; + /// let mut reader = Reader::from_str(xml); + /// reader.trim_text(true); + /// let mut count = 0; + /// let mut buf = Vec::new(); + /// let mut txt = Vec::new(); + /// loop { + /// match reader.read_event(&mut buf).await { + /// Ok(Event::Start(ref e)) => count += 1, + /// Ok(Event::Text(e)) => txt.push(e.unescape_and_decode(&reader).expect("Error!")), + /// Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e), + /// Ok(Event::Eof) => break, + /// _ => (), + /// } + /// buf.clear(); + /// } + /// println!("Found {} start events", count); + /// println!("Text events: {:?}", txt); + /// } + /// ``` + #[async_recursion] + pub async fn read_event<'a, 'b>(&'a mut self, buf: &'b mut Vec) -> Result> { + let event = match self.tag_state { + TagState::Opened => self.read_until_close(buf).await, + TagState::Closed => self.read_until_open(buf).await, + TagState::Empty => self.close_expanded_empty(), + TagState::Exit => return Ok(Event::Eof), + }; + match event { + Err(_) | Ok(Event::Eof) => self.tag_state = TagState::Exit, + _ => {} + } + event + } + + /// Resolves a potentially qualified **event name** into (namespace name, local name). + /// + /// *Qualified* attribute names have the form `prefix:local-name` where the`prefix` is defined + /// on any containing XML element via `xmlns:prefix="the:namespace:uri"`. The namespace prefix + /// can be defined on the same element as the attribute in question. + /// + /// *Unqualified* event inherits the current *default namespace*. + #[inline] + pub fn event_namespace<'a, 'b, 'c>( + &'a self, + qname: &'b [u8], + namespace_buffer: &'c [u8], + ) -> (Option<&'c [u8]>, &'b [u8]) { + self.ns_buffer + .resolve_namespace(qname, namespace_buffer, true) + } + + /// Resolves a potentially qualified **attribute name** into (namespace name, local name). + /// + /// *Qualified* attribute names have the form `prefix:local-name` where the`prefix` is defined + /// on any containing XML element via `xmlns:prefix="the:namespace:uri"`. The namespace prefix + /// can be defined on the same element as the attribute in question. + /// + /// *Unqualified* attribute names do *not* inherit the current *default namespace*. + #[inline] + pub fn attribute_namespace<'a, 'b, 'c>( + &'a self, + qname: &'b [u8], + namespace_buffer: &'c [u8], + ) -> (Option<&'c [u8]>, &'b [u8]) { + self.ns_buffer + .resolve_namespace(qname, namespace_buffer, false) + } + + /// Reads the next event and resolves its namespace (if applicable). + /// + /// # Examples + /// + /// ``` + /// use std::str::from_utf8; + /// use quick_xml::Reader; + /// use quick_xml::events::Event; + /// + /// #[tokio::main] + /// async fn main() { + /// let xml = r#" + /// Test + /// Test 2 + /// "#; + /// let mut reader = Reader::from_str(xml); + /// reader.trim_text(true); + /// let mut count = 0; + /// let mut buf = Vec::new(); + /// let mut ns_buf = Vec::new(); + /// let mut txt = Vec::new(); + /// loop { + /// match reader.read_namespaced_event(&mut buf, &mut ns_buf).await { + /// Ok((ref ns, Event::Start(ref e))) => { + /// count += 1; + /// match (*ns, e.local_name()) { + /// (Some(b"www.xxxx"), b"tag1") => (), + /// (Some(b"www.yyyy"), b"tag2") => (), + /// (ns, n) => panic!("Namespace and local name mismatch"), + /// } + /// println!("Resolved namespace: {:?}", ns.and_then(|ns| from_utf8(ns).ok())); + /// } + /// Ok((_, Event::Text(e))) => { + /// txt.push(e.unescape_and_decode(&reader).expect("Error!")) + /// }, + /// Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e), + /// Ok((_, Event::Eof)) => break, + /// _ => (), + /// } + /// buf.clear(); + /// } + /// println!("Found {} start events", count); + /// println!("Text events: {:?}", txt); + /// } + /// ``` + pub async fn read_namespaced_event<'a, 'b, 'c>( + &'a mut self, + buf: &'b mut Vec, + namespace_buffer: &'c mut Vec, + ) -> Result<(Option<&'c [u8]>, Event<'b>)> { + self.ns_buffer.pop_empty_namespaces(namespace_buffer); + match self.read_event(buf).await { + Ok(Event::Eof) => Ok((None, Event::Eof)), + Ok(Event::Start(e)) => { + self.ns_buffer.push_new_namespaces(&e, namespace_buffer); + Ok(( + self.ns_buffer + .find_namespace_value(e.name(), &**namespace_buffer), + Event::Start(e), + )) + } + Ok(Event::Empty(e)) => { + // For empty elements we need to 'artificially' keep the namespace scope on the + // stack until the next `next()` call occurs. + // Otherwise the caller has no chance to use `resolve` in the context of the + // namespace declarations that are 'in scope' for the empty element alone. + // Ex: + self.ns_buffer.push_new_namespaces(&e, namespace_buffer); + // notify next `read_namespaced_event()` invocation that it needs to pop this + // namespace scope + self.ns_buffer.pending_pop = true; + Ok(( + self.ns_buffer + .find_namespace_value(e.name(), &**namespace_buffer), + Event::Empty(e), + )) + } + Ok(Event::End(e)) => { + // notify next `read_namespaced_event()` invocation that it needs to pop this + // namespace scope + self.ns_buffer.pending_pop = true; + Ok(( + self.ns_buffer + .find_namespace_value(e.name(), &**namespace_buffer), + Event::End(e), + )) + } + Ok(e) => Ok((None, e)), + Err(e) => Err(e), + } + } + + /// Returns the `Reader`s encoding. + /// + /// The used encoding may change after parsing the XML declaration. + /// + /// This encoding will be used by [`decode`]. + /// + /// [`decode`]: #method.decode + #[cfg(feature = "encoding")] + pub fn encoding(&self) -> &'static Encoding { + self.encoding + } + + /// Get utf8 decoder + #[cfg(feature = "encoding")] + pub fn decoder(&self) -> Decoder { + Decoder { + encoding: self.encoding, + } + } + + /// Get utf8 decoder + #[cfg(not(feature = "encoding"))] + pub fn decoder(&self) -> Decoder { + Decoder + } + + /// Reads until end element is found + /// + /// Manages nested cases where parent and child elements have the same name + pub async fn read_to_end>(&mut self, end: K, buf: &mut Vec) -> Result<()> { + let mut depth = 0; + let end = end.as_ref(); + loop { + match self.read_event(buf).await { + Ok(Event::End(ref e)) if e.name() == end => { + if depth == 0 { + return Ok(()); + } + depth -= 1; + } + Ok(Event::Start(ref e)) if e.name() == end => depth += 1, + Err(e) => return Err(e), + Ok(Event::Eof) => { + return Err(Error::UnexpectedEof(format!("", from_utf8(end)))); + } + _ => (), + } + buf.clear(); + } + } + + /// Reads optional text between start and end tags. + /// + /// If the next event is a [`Text`] event, returns the decoded and unescaped content as a + /// `String`. If the next event is an [`End`] event, returns the empty string. In all other + /// cases, returns an error. + /// + /// Any text will be decoded using the XML encoding specified in the XML declaration (or UTF-8 + /// if none is specified). + /// + /// # Examples + /// + /// ``` + /// use quick_xml::Reader; + /// use quick_xml::events::Event; + /// + /// #[tokio::main] + /// async fn main() { + /// let mut xml = Reader::from_reader(b" + /// <b> + /// + /// " as &[u8]); + /// xml.trim_text(true); + /// + /// let expected = ["", ""]; + /// for &content in expected.iter() { + /// match xml.read_event(&mut Vec::new()).await { + /// Ok(Event::Start(ref e)) => { + /// assert_eq!(&xml.read_text(e.name(), &mut Vec::new()).await.unwrap(), content); + /// }, + /// e => panic!("Expecting Start event, found {:?}", e), + /// } + /// } + /// } + /// ``` + /// + /// [`Text`]: events/enum.Event.html#variant.Text + /// [`End`]: events/enum.Event.html#variant.End + pub async fn read_text>(&mut self, end: K, buf: &mut Vec) -> Result { + let s = match self.read_event(buf).await { + Ok(Event::Text(e)) => e.unescape_and_decode(self), + Ok(Event::End(ref e)) if e.name() == end.as_ref() => return Ok("".to_string()), + Err(e) => return Err(e), + Ok(Event::Eof) => return Err(Error::UnexpectedEof("Text".to_string())), + _ => return Err(Error::TextNotFound), + }; + self.read_to_end(end, buf).await?; + s + } + + /// Consumes `Reader` returning the underlying reader + /// + /// Can be used to compute line and column of a parsing error position + /// + /// # Examples + /// + /// ```ignore + /// use std::{str, io::Cursor}; + /// use quick_xml::Reader; + /// use quick_xml::events::Event; + /// + /// fn into_line_and_column(reader: Reader>) -> (usize, usize) { + /// let end_pos = reader.buffer_position(); + /// let mut cursor = reader.into_underlying_reader(); + /// let s = String::from_utf8(cursor.into_inner()[0..end_pos].to_owned()) + /// .expect("can't make a string"); + /// let mut line = 1; + /// let mut column = 0; + /// for c in s.chars() { + /// if c == '\n' { + /// line += 1; + /// column = 0; + /// } else { + /// column += 1; + /// } + /// } + /// (line, column) + /// } + /// + /// #[tokio::main] + /// async fn main() { + /// let xml = r#" + /// Test + /// Test 2 + /// "#; + /// let mut reader = Reader::from_reader(Cursor::new(xml.as_bytes())); + /// let mut buf = Vec::new(); + /// + /// loop { + /// match reader.read_event(&mut buf).await { + /// Ok(Event::Start(ref e)) => match e.name() { + /// b"tag1" | b"tag2" => (), + /// tag => { + /// assert_eq!(b"tag3", tag); + /// assert_eq!((3, 22), into_line_and_column(reader)); + /// break; + /// } + /// }, + /// Ok(Event::Eof) => unreachable!(), + /// _ => (), + /// } + /// buf.clear(); + /// } + /// } + /// ``` + pub fn into_underlying_reader(self) -> B { + self.reader + } +} + +impl Reader> { + /// Creates an XML reader from a file path. + pub async fn from_file>(path: P) -> Result>> { + let file = File::open(path).await.map_err(Error::Io)?; + let reader = BufReader::new(file); + Ok(Reader::from_reader(reader)) + } +} + +impl<'a> Reader<&'a [u8]> { + /// Creates an XML reader from a string slice. + pub fn from_str(s: &'a str) -> Reader<&'a [u8]> { + Reader::from_reader(s.as_bytes()) + } +} + +/// Container for a future that reads one byte from a reader +/// but does not consume the byte, so it can be read again. +#[derive(Debug)] +#[must_use = "futures do nothing unless you `.await` or poll them"] +pub struct ReadOneDontConsume<'a, R: ?Sized> { + reader: &'a mut R, +} + +fn read_one_dont_consume<'a, R>(reader: &'a mut R) -> ReadOneDontConsume<'a, R> +where + R: AsyncBufRead + ?Sized + Unpin, +{ + ReadOneDontConsume { reader } +} + +fn read_one_dont_consume_internal( + mut reader: Pin<&mut R>, + cx: &mut Context<'_>, +) -> Poll>> { + match reader.as_mut().poll_fill_buf(cx) { + Poll::Ready(t) => Poll::Ready(t.map(|s| if s.is_empty() { None } else { Some(s[0]) })), + Poll::Pending => Poll::Pending, + } +} + +impl Future for ReadOneDontConsume<'_, R> { + type Output = io::Result>; + + fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + let Self { reader } = &mut *self; + read_one_dont_consume_internal(Pin::new(reader), cx) + } +} + +/// read until `byte` is found or end of file +/// return the position of byte +#[inline] +async fn read_until( + r: &mut R, + byte: u8, + buf: &mut Vec, + buf_position: &mut usize, +) -> Result { + let result = r.read_until(byte, buf).await; + + if let Ok(size) = result { + if buf.len() > 0 && buf[buf.len() - 1] == byte { + buf.remove(buf.len() - 1); + } + *buf_position += size; + } + + result.map_err(Error::Io) + + // let mut read = 0; + // let mut done = false; + // while !done { + // let used = { + // let available = match r.fill_buf() { + // Ok(n) if n.is_empty() => break, + // Ok(n) => n, + // Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, + // Err(e) => { + // *position += read; + // return Err(Error::Io(e)); + // } + // }; + + // match memchr::memchr(byte, available) { + // Some(i) => { + // buf.extend_from_slice(&available[..i]); + // done = true; + // i + 1 + // } + // None => { + // buf.extend_from_slice(available); + // available.len() + // } + // } + // }; + // r.consume(used); + // read += used; + // } + // *position += read; + // Ok(read) +} + +/// Derived from `read_until`, but modified to handle XML attributes using a minimal state machine. +/// [W3C Extensible Markup Language (XML) 1.1 (2006)](https://www.w3.org/TR/xml11) +/// +/// Attribute values are defined as follows: +/// ```plain +/// AttValue := '"' (([^<&"]) | Reference)* '"' +/// | "'" (([^<&']) | Reference)* "'" +/// ``` +/// (`Reference` is something like `"`, but we don't care about escaped characters at this +/// level) +#[inline] +async fn read_elem_until( + r: &mut R, + end_byte: u8, + buf: &mut Vec, + position: &mut usize, +) -> Result { + #[derive(Clone, Copy)] + enum State { + /// The initial state (inside element, but outside of attribute value) + Elem, + /// Inside a single-quoted attribute value + SingleQ, + /// Inside a double-quoted attribute value + DoubleQ, + } + let mut state = State::Elem; + let mut read = 0; + let mut done = false; + while !done { + let used = { + let available = match r.read_until(end_byte, buf).await { + Ok(n) if n == 0 => { + buf.remove(buf.len() - 1); + return Ok(read); + } + Ok(n) => { + let len = buf.len(); + &buf[len - n..len] + } + Err(ref e) if e.kind() == tokio::io::ErrorKind::Interrupted => continue, + Err(e) => { + *position += read; + return Err(Error::Io(e)); + } + }; + + let mut memiter = memchr::memchr3_iter(end_byte, b'\'', b'"', &available); + let used: usize; + loop { + match memiter.next() { + Some(i) => { + state = match (state, available[i]) { + (State::Elem, b) if b == end_byte => { + // only allowed to match `end_byte` while we are in state `Elem` + done = true; + used = i + 1; + break; + } + (State::Elem, b'\'') => State::SingleQ, + (State::Elem, b'\"') => State::DoubleQ, + + // the only end_byte that gets us out if the same character + (State::SingleQ, b'\'') | (State::DoubleQ, b'\"') => State::Elem, + + // all other bytes: no state change + _ => state, + }; + } + None => { + used = available.len(); + break; + } + } + } + + used + }; + read += used; + } + + buf.remove(buf.len() - 1); + + *position += read; + Ok(read) +} diff --git a/src/reader/mod.rs b/src/reader/mod.rs new file mode 100644 index 00000000..f2ec73ef --- /dev/null +++ b/src/reader/mod.rs @@ -0,0 +1,233 @@ +//! A module to handle `Reader` + +use crate::errors::{Error, Result}; +use crate::events::{attributes::Attribute, BytesStart}; +use std::str::from_utf8; + +#[cfg(feature = "asynchronous")] +pub mod asynchronous; +#[cfg(not(feature = "asynchronous"))] +pub mod sync; + +/// Trait for decoding, which is shared by the sync and async `Reader` +pub trait Decode { + /// Decodes a UTF8 slice regardless of XML declaration. + fn decode<'c>(&self, bytes: &'c [u8]) -> Result<&'c str>; + /// Decodes a UTF8 slice without BOM (Byte order mark) regardless of XML declaration. + fn decode_without_bom<'c>(&self, bytes: &'c [u8]) -> Result<&'c str>; +} + +#[derive(Debug)] +enum TagState { + Opened, + Closed, + Empty, + /// Either Eof or Errored + Exit, +} + +/// A function to check whether the byte is a whitespace (blank, new line, carriage return or tab) +#[inline] +pub(crate) fn is_whitespace(b: u8) -> bool { + match b { + b' ' | b'\r' | b'\n' | b'\t' => true, + _ => false, + } +} + +/// A namespace declaration. Can either bind a namespace to a prefix or define the current default +/// namespace. +#[derive(Debug)] +struct Namespace { + /// Index of the namespace in the buffer + start: usize, + /// Length of the prefix + /// * if bigger than start, then binds this namespace to the corresponding slice. + /// * else defines the current default namespace. + prefix_len: usize, + /// The namespace name (the URI) of this namespace declaration. + /// + /// The XML standard specifies that an empty namespace value 'removes' a namespace declaration + /// for the extent of its scope. For prefix declarations that's not very interesting, but it is + /// vital for default namespace declarations. With `xmlns=""` you can revert back to the default + /// behaviour of leaving unqualified element names unqualified. + value_len: usize, + /// Level of nesting at which this namespace was declared. The declaring element is included, + /// i.e., a declaration on the document root has `level = 1`. + /// This is used to pop the namespace when the element gets closed. + level: i32, +} + +impl Namespace { + /// Gets the value slice out of namespace buffer + /// + /// Returns `None` if `value_len == 0` + #[inline] + fn opt_value<'a, 'b>(&'a self, ns_buffer: &'b [u8]) -> Option<&'b [u8]> { + if self.value_len == 0 { + None + } else { + let start = self.start + self.prefix_len; + Some(&ns_buffer[start..start + self.value_len]) + } + } + + /// Check if the namespace matches the potentially qualified name + #[inline] + fn is_match(&self, ns_buffer: &[u8], qname: &[u8]) -> bool { + if self.prefix_len == 0 { + !qname.contains(&b':') + } else { + qname.get(self.prefix_len).map_or(false, |n| *n == b':') + && qname.starts_with(&ns_buffer[self.start..self.start + self.prefix_len]) + } + } +} + +/// A namespace management buffer. +/// +/// Holds all internal logic to push/pop namespaces with their levels. +#[derive(Debug, Default)] +struct NamespaceBufferIndex { + /// a buffer of namespace ranges + slices: Vec, + /// The number of open tags at the moment. We need to keep track of this to know which namespace + /// declarations to remove when we encounter an `End` event. + nesting_level: i32, + /// For `Empty` events keep the 'scope' of the element on the stack artificially. That way, the + /// consumer has a chance to use `resolve` in the context of the empty element. We perform the + /// pop as the first operation in the next `next()` call. + pending_pop: bool, +} + +impl NamespaceBufferIndex { + #[inline] + fn find_namespace_value<'a, 'b, 'c>( + &'a self, + element_name: &'b [u8], + buffer: &'c [u8], + ) -> Option<&'c [u8]> { + self.slices + .iter() + .rfind(|n| n.is_match(buffer, element_name)) + .and_then(|n| n.opt_value(buffer)) + } + + fn pop_empty_namespaces(&mut self, buffer: &mut Vec) { + if !self.pending_pop { + return; + } + self.pending_pop = false; + self.nesting_level -= 1; + let current_level = self.nesting_level; + // from the back (most deeply nested scope), look for the first scope that is still valid + match self.slices.iter().rposition(|n| n.level <= current_level) { + // none of the namespaces are valid, remove all of them + None => { + buffer.clear(); + self.slices.clear(); + } + // drop all namespaces past the last valid namespace + Some(last_valid_pos) => { + if let Some(len) = self.slices.get(last_valid_pos + 1).map(|n| n.start) { + buffer.truncate(len); + self.slices.truncate(last_valid_pos + 1); + } + } + } + } + + fn push_new_namespaces(&mut self, e: &BytesStart, buffer: &mut Vec) { + self.nesting_level += 1; + let level = self.nesting_level; + // adds new namespaces for attributes starting with 'xmlns:' and for the 'xmlns' + // (default namespace) attribute. + for a in e.attributes().with_checks(false) { + if let Ok(Attribute { key: k, value: v }) = a { + if k.starts_with(b"xmlns") { + match k.get(5) { + None => { + let start = buffer.len(); + buffer.extend_from_slice(&*v); + self.slices.push(Namespace { + start: start, + prefix_len: 0, + value_len: v.len(), + level: level, + }); + } + Some(&b':') => { + let start = buffer.len(); + buffer.extend_from_slice(&k[6..]); + buffer.extend_from_slice(&*v); + self.slices.push(Namespace { + start: start, + prefix_len: k.len() - 6, + value_len: v.len(), + level: level, + }); + } + _ => break, + } + } + } else { + break; + } + } + } + + /// Resolves a potentially qualified **attribute name** into (namespace name, local name). + /// + /// *Qualified* attribute names have the form `prefix:local-name` where the`prefix` is defined + /// on any containing XML element via `xmlns:prefix="the:namespace:uri"`. The namespace prefix + /// can be defined on the same element as the attribute in question. + /// + /// *Unqualified* attribute names do *not* inherit the current *default namespace*. + #[inline] + fn resolve_namespace<'a, 'b, 'c>( + &'a self, + qname: &'b [u8], + buffer: &'c [u8], + use_default: bool, + ) -> (Option<&'c [u8]>, &'b [u8]) { + self.slices + .iter() + .rfind(|n| n.is_match(buffer, qname)) + .map_or((None, qname), |n| { + let len = n.prefix_len; + if len > 0 { + (n.opt_value(buffer), &qname[len + 1..]) + } else if use_default { + (n.opt_value(buffer), qname) + } else { + (None, qname) + } + }) + } +} + +/// Utf8 Decoder +#[cfg(not(feature = "encoding"))] +#[derive(Clone, Copy)] +pub struct Decoder; + +/// Utf8 Decoder +#[cfg(feature = "encoding")] +#[derive(Clone, Copy)] +pub struct Decoder { + encoding: &'static Encoding, +} + +impl Decoder { + /// Decode a slice of u8 into a UTF8 str + #[cfg(not(feature = "encoding"))] + pub fn decode<'c>(&self, bytes: &'c [u8]) -> Result<&'c str> { + from_utf8(bytes).map_err(Error::Utf8) + } + + /// Decode a slice of u8 into a Cow str + #[cfg(feature = "encoding")] + pub fn decode<'c>(&self, bytes: &'c [u8]) -> Cow<'c, str> { + self.encoding.decode(bytes).0 + } +} diff --git a/src/reader.rs b/src/reader/sync.rs similarity index 81% rename from src/reader.rs rename to src/reader/sync.rs index 8d04a37d..1347961b 100644 --- a/src/reader.rs +++ b/src/reader/sync.rs @@ -1,4 +1,4 @@ -//! A module to handle `Reader` +//! A module to handle sync `Reader` #[cfg(feature = "encoding")] use std::borrow::Cow; @@ -10,17 +10,72 @@ use std::str::from_utf8; #[cfg(feature = "encoding")] use encoding_rs::{Encoding, UTF_16BE, UTF_16LE}; -use errors::{Error, Result}; -use events::{attributes::Attribute, BytesDecl, BytesEnd, BytesStart, BytesText, Event}; +use crate::errors::{Error, Result}; +use crate::events::{BytesDecl, BytesEnd, BytesStart, BytesText, Event}; use memchr; -enum TagState { - Opened, - Closed, - Empty, - /// Either Eof or Errored - Exit, +use super::{is_whitespace, Decode, Decoder, NamespaceBufferIndex, TagState}; + +impl Decode for Reader { + /// Decodes a UTF8 slice regardless of XML declaration. + /// + /// Decode `bytes` with BOM sniffing and with malformed sequences replaced with the + /// `U+FFFD REPLACEMENT CHARACTER`. + /// + /// # Note + /// + /// If you instead want to use XML declared encoding, use the `encoding` feature + #[inline] + #[cfg(not(feature = "encoding"))] + fn decode<'c>(&self, bytes: &'c [u8]) -> Result<&'c str> { + from_utf8(bytes).map_err(Error::Utf8) + } + + /// Decodes a UTF8 slice without BOM (Byte order mark) regardless of XML declaration. + /// + /// Decode `bytes` without BOM and with malformed sequences replaced with the + /// `U+FFFD REPLACEMENT CHARACTER`. + /// + /// # Note + /// + /// If you instead want to use XML declared encoding, use the `encoding` feature + #[inline] + #[cfg(not(feature = "encoding"))] + fn decode_without_bom<'c>(&self, bytes: &'c [u8]) -> Result<&'c str> { + if bytes.starts_with(b"\xEF\xBB\xBF") { + from_utf8(&bytes[3..]).map_err(Error::Utf8) + } else { + from_utf8(bytes).map_err(Error::Utf8) + } + } + + /// Decodes a slice using without BOM (Byte order mark) the encoding specified in the XML declaration. + /// + /// Decode `bytes` without BOM and with malformed sequences replaced with the + /// `U+FFFD REPLACEMENT CHARACTER`. + /// + /// If no encoding is specified, defaults to UTF-8. + #[inline] + #[cfg(feature = "encoding")] + fn decode_without_bom<'b, 'c>(&'b mut self, mut bytes: &'c [u8]) -> Cow<'c, str> { + if self.is_encoding_set { + return self.encoding.decode_with_bom_removal(bytes).0; + } + if bytes.starts_with(b"\xEF\xBB\xBF") { + self.is_encoding_set = true; + bytes = &bytes[3..]; + } else if bytes.starts_with(b"\xFF\xFE") { + self.is_encoding_set = true; + self.encoding = UTF_16LE; + bytes = &bytes[2..]; + } else if bytes.starts_with(b"\xFE\xFF") { + self.is_encoding_set = true; + self.encoding = UTF_16BE; + bytes = &bytes[3..]; + }; + self.encoding.decode_without_bom_handling(bytes).0 + } } /// A low level encoding-agnostic XML event reader. @@ -211,6 +266,7 @@ impl Reader { fn read_until_open<'a, 'b>(&'a mut self, buf: &'b mut Vec) -> Result> { self.tag_state = TagState::Opened; let buf_start = buf.len(); + match read_until(&mut self.reader, b'<', buf, &mut self.buf_position) { Ok(0) => Ok(Event::Eof), Ok(_) => { @@ -270,7 +326,7 @@ impl Reader { b'?' => self.read_question_mark(&buf[buf_start..]), _ => unreachable!( "We checked that `start` must be one of [/!?], was {:?} \ - instead.", + instead.", start ), }, @@ -458,6 +514,7 @@ impl Reader { // TODO: do this directly when reading bufreader ... let len = buf.len(); let name_end = buf.iter().position(|&b| is_whitespace(b)).unwrap_or(len); + if let Some(&b'/') = buf.last() { let end = if name_end < len { name_end } else { len - 1 }; if self.expand_empty_elements { @@ -669,77 +726,6 @@ impl Reader { self.encoding } - /// Decodes a slice using the encoding specified in the XML declaration. - /// - /// Decode `bytes` with BOM sniffing and with malformed sequences replaced with the - /// `U+FFFD REPLACEMENT CHARACTER`. - /// - /// If no encoding is specified, defaults to UTF-8. - #[inline] - #[cfg(feature = "encoding")] - pub fn decode<'b, 'c>(&'b self, bytes: &'c [u8]) -> Cow<'c, str> { - self.encoding.decode(bytes).0 - } - - /// Decodes a UTF8 slice without BOM (Byte order mark) regardless of XML declaration. - /// - /// Decode `bytes` without BOM and with malformed sequences replaced with the - /// `U+FFFD REPLACEMENT CHARACTER`. - /// - /// # Note - /// - /// If you instead want to use XML declared encoding, use the `encoding` feature - #[inline] - #[cfg(not(feature = "encoding"))] - pub fn decode_without_bom<'c>(&self, bytes: &'c [u8]) -> Result<&'c str> { - if bytes.starts_with(b"\xEF\xBB\xBF") { - from_utf8(&bytes[3..]).map_err(Error::Utf8) - } else { - from_utf8(bytes).map_err(Error::Utf8) - } - } - - /// Decodes a slice using without BOM (Byte order mark) the encoding specified in the XML declaration. - /// - /// Decode `bytes` without BOM and with malformed sequences replaced with the - /// `U+FFFD REPLACEMENT CHARACTER`. - /// - /// If no encoding is specified, defaults to UTF-8. - #[inline] - #[cfg(feature = "encoding")] - pub fn decode_without_bom<'b, 'c>(&'b mut self, mut bytes: &'c [u8]) -> Cow<'c, str> { - if self.is_encoding_set { - return self.encoding.decode_with_bom_removal(bytes).0; - } - if bytes.starts_with(b"\xEF\xBB\xBF") { - self.is_encoding_set = true; - bytes = &bytes[3..]; - } else if bytes.starts_with(b"\xFF\xFE") { - self.is_encoding_set = true; - self.encoding = UTF_16LE; - bytes = &bytes[2..]; - } else if bytes.starts_with(b"\xFE\xFF") { - self.is_encoding_set = true; - self.encoding = UTF_16BE; - bytes = &bytes[3..]; - }; - self.encoding.decode_without_bom_handling(bytes).0 - } - - /// Decodes a UTF8 slice regardless of XML declaration. - /// - /// Decode `bytes` with BOM sniffing and with malformed sequences replaced with the - /// `U+FFFD REPLACEMENT CHARACTER`. - /// - /// # Note - /// - /// If you instead want to use XML declared encoding, use the `encoding` feature - #[inline] - #[cfg(not(feature = "encoding"))] - pub fn decode<'c>(&self, bytes: &'c [u8]) -> Result<&'c str> { - from_utf8(bytes).map_err(Error::Utf8) - } - /// Get utf8 decoder #[cfg(feature = "encoding")] pub fn decoder(&self) -> Decoder { @@ -1019,207 +1005,3 @@ fn read_elem_until( *position += read; Ok(read) } - -/// A function to check whether the byte is a whitespace (blank, new line, carriage return or tab) -#[inline] -pub(crate) fn is_whitespace(b: u8) -> bool { - match b { - b' ' | b'\r' | b'\n' | b'\t' => true, - _ => false, - } -} - -/// A namespace declaration. Can either bind a namespace to a prefix or define the current default -/// namespace. -#[derive(Debug)] -struct Namespace { - /// Index of the namespace in the buffer - start: usize, - /// Length of the prefix - /// * if bigger than start, then binds this namespace to the corresponding slice. - /// * else defines the current default namespace. - prefix_len: usize, - /// The namespace name (the URI) of this namespace declaration. - /// - /// The XML standard specifies that an empty namespace value 'removes' a namespace declaration - /// for the extent of its scope. For prefix declarations that's not very interesting, but it is - /// vital for default namespace declarations. With `xmlns=""` you can revert back to the default - /// behaviour of leaving unqualified element names unqualified. - value_len: usize, - /// Level of nesting at which this namespace was declared. The declaring element is included, - /// i.e., a declaration on the document root has `level = 1`. - /// This is used to pop the namespace when the element gets closed. - level: i32, -} - -impl Namespace { - /// Gets the value slice out of namespace buffer - /// - /// Returns `None` if `value_len == 0` - #[inline] - fn opt_value<'a, 'b>(&'a self, ns_buffer: &'b [u8]) -> Option<&'b [u8]> { - if self.value_len == 0 { - None - } else { - let start = self.start + self.prefix_len; - Some(&ns_buffer[start..start + self.value_len]) - } - } - - /// Check if the namespace matches the potentially qualified name - #[inline] - fn is_match(&self, ns_buffer: &[u8], qname: &[u8]) -> bool { - if self.prefix_len == 0 { - !qname.contains(&b':') - } else { - qname.get(self.prefix_len).map_or(false, |n| *n == b':') - && qname.starts_with(&ns_buffer[self.start..self.start + self.prefix_len]) - } - } -} - -/// A namespace management buffer. -/// -/// Holds all internal logic to push/pop namespaces with their levels. -#[derive(Debug, Default)] -struct NamespaceBufferIndex { - /// a buffer of namespace ranges - slices: Vec, - /// The number of open tags at the moment. We need to keep track of this to know which namespace - /// declarations to remove when we encounter an `End` event. - nesting_level: i32, - /// For `Empty` events keep the 'scope' of the element on the stack artificially. That way, the - /// consumer has a chance to use `resolve` in the context of the empty element. We perform the - /// pop as the first operation in the next `next()` call. - pending_pop: bool, -} - -impl NamespaceBufferIndex { - #[inline] - fn find_namespace_value<'a, 'b, 'c>( - &'a self, - element_name: &'b [u8], - buffer: &'c [u8], - ) -> Option<&'c [u8]> { - self.slices - .iter() - .rfind(|n| n.is_match(buffer, element_name)) - .and_then(|n| n.opt_value(buffer)) - } - - fn pop_empty_namespaces(&mut self, buffer: &mut Vec) { - if !self.pending_pop { - return; - } - self.pending_pop = false; - self.nesting_level -= 1; - let current_level = self.nesting_level; - // from the back (most deeply nested scope), look for the first scope that is still valid - match self.slices.iter().rposition(|n| n.level <= current_level) { - // none of the namespaces are valid, remove all of them - None => { - buffer.clear(); - self.slices.clear(); - } - // drop all namespaces past the last valid namespace - Some(last_valid_pos) => { - if let Some(len) = self.slices.get(last_valid_pos + 1).map(|n| n.start) { - buffer.truncate(len); - self.slices.truncate(last_valid_pos + 1); - } - } - } - } - - fn push_new_namespaces(&mut self, e: &BytesStart, buffer: &mut Vec) { - self.nesting_level += 1; - let level = self.nesting_level; - // adds new namespaces for attributes starting with 'xmlns:' and for the 'xmlns' - // (default namespace) attribute. - for a in e.attributes().with_checks(false) { - if let Ok(Attribute { key: k, value: v }) = a { - if k.starts_with(b"xmlns") { - match k.get(5) { - None => { - let start = buffer.len(); - buffer.extend_from_slice(&*v); - self.slices.push(Namespace { - start, - prefix_len: 0, - value_len: v.len(), - level, - }); - } - Some(&b':') => { - let start = buffer.len(); - buffer.extend_from_slice(&k[6..]); - buffer.extend_from_slice(&*v); - self.slices.push(Namespace { - start, - prefix_len: k.len() - 6, - value_len: v.len(), - level, - }); - } - _ => break, - } - } - } else { - break; - } - } - } - - /// Resolves a potentially qualified **attribute name** into (namespace name, local name). - /// - /// *Qualified* attribute names have the form `prefix:local-name` where the`prefix` is defined - /// on any containing XML element via `xmlns:prefix="the:namespace:uri"`. The namespace prefix - /// can be defined on the same element as the attribute in question. - /// - /// *Unqualified* attribute names do *not* inherit the current *default namespace*. - #[inline] - fn resolve_namespace<'a, 'b, 'c>( - &'a self, - qname: &'b [u8], - buffer: &'c [u8], - use_default: bool, - ) -> (Option<&'c [u8]>, &'b [u8]) { - self.slices - .iter() - .rfind(|n| n.is_match(buffer, qname)) - .map_or((None, qname), |n| { - let len = n.prefix_len; - if len > 0 { - (n.opt_value(buffer), &qname[len + 1..]) - } else if use_default { - (n.opt_value(buffer), qname) - } else { - (None, qname) - } - }) - } -} - -/// Utf8 Decoder -#[cfg(not(feature = "encoding"))] -#[derive(Clone, Copy)] -pub struct Decoder; - -/// Utf8 Decoder -#[cfg(feature = "encoding")] -#[derive(Clone, Copy)] -pub struct Decoder { - encoding: &'static Encoding, -} - -impl Decoder { - #[cfg(not(feature = "encoding"))] - pub fn decode<'c>(&self, bytes: &'c [u8]) -> Result<&'c str> { - from_utf8(bytes).map_err(Error::Utf8) - } - - #[cfg(feature = "encoding")] - pub fn decode<'c>(&self, bytes: &'c [u8]) -> Cow<'c, str> { - self.encoding.decode(bytes).0 - } -} diff --git a/src/se/mod.rs b/src/se/mod.rs index 2b89e654..b3a8e51d 100644 --- a/src/se/mod.rs +++ b/src/se/mod.rs @@ -209,9 +209,8 @@ impl<'w, W: Write> ser::Serializer for &'w mut Serializer { name: &'static str, _len: usize, ) -> Result { - write!(self.writer.inner(), "<{}", name).map_err(|err| { - DeError::Custom(format!("serialize struct {} failed: {}", name, err)) - })?; + write!(self.writer.inner(), "<{}", name) + .map_err(|err| DeError::Custom(format!("serialize struct {} failed: {}", name, err)))?; Ok(Struct::new(self, name)) } diff --git a/src/se/var.rs b/src/se/var.rs index dd4eb338..e348fc8f 100644 --- a/src/se/var.rs +++ b/src/se/var.rs @@ -83,7 +83,13 @@ where { /// Create a new `Struct` pub fn new(parent: &'w mut Serializer, name: &'w str) -> Struct<'w, W> { - Struct { parent, name, attrs: Vec::new(), children: Vec::new(), buffer: Vec::new() } + Struct { + parent, + name, + attrs: Vec::new(), + children: Vec::new(), + buffer: Vec::new(), + } } } @@ -120,15 +126,9 @@ where } fn end(self) -> Result { - self.parent - .writer - .write(&self.attrs)?; - self.parent - .writer - .write(">".as_bytes())?; - self.parent - .writer - .write(&self.children)?; + self.parent.writer.write(&self.attrs)?; + self.parent.writer.write(">".as_bytes())?; + self.parent.writer.write(&self.children)?; self.parent .writer .write_event(Event::End(BytesEnd::borrowed(self.name.as_bytes())))?; diff --git a/src/writer.rs b/src/writer.rs index 6b866e58..cab49d6d 100644 --- a/src/writer.rs +++ b/src/writer.rs @@ -2,8 +2,8 @@ use std::io::Write; -use errors::{Error, Result}; -use events::Event; +use crate::errors::{Error, Result}; +use crate::events::Event; /// XML writer. /// @@ -11,9 +11,9 @@ use events::Event; /// /// # Examples /// -/// ```rust -/// # extern crate quick_xml; -/// # fn main() { +/// ```ignore +/// extern crate quick_xml; +/// fn main() { /// use quick_xml::{Reader, Writer}; /// use quick_xml::events::{Event, BytesEnd, BytesStart}; /// use std::io::Cursor; @@ -27,7 +27,7 @@ use events::Event; /// match reader.read_event(&mut buf) { /// Ok(Event::Start(ref e)) if e.name() == b"this_tag" => { /// -/// // crates a new element ... alternatively we could reuse `e` by calling +/// // creates a new element ... alternatively we could reuse `e` by calling /// // `e.into_owned()` /// let mut elem = BytesStart::owned(b"my_elem".to_vec(), "my_elem".len()); /// @@ -54,7 +54,7 @@ use events::Event; /// let result = writer.into_inner().into_inner(); /// let expected = r#"text"#; /// assert_eq!(result, expected.as_bytes()); -/// # } +/// } /// ``` #[derive(Clone)] pub struct Writer { @@ -136,8 +136,7 @@ impl Writer { if let Some(ref i) = self.indent { if i.should_line_break { self.writer.write_all(b"\n").map_err(Error::Io)?; - self - .writer + self.writer .write_all(&i.indents[..i.indents_len]) .map_err(Error::Io)?; } @@ -160,8 +159,7 @@ impl Writer { pub fn write_indent(&mut self) -> Result<()> { if let Some(ref i) = self.indent { self.writer.write_all(b"\n").map_err(Error::Io)?; - self - .writer + self.writer .write_all(&i.indents[..i.indents_len]) .map_err(Error::Io)?; } diff --git a/tests/serde_attrs.rs b/tests/serde_attrs.rs index 32ca657d..3fd4e67c 100644 --- a/tests/serde_attrs.rs +++ b/tests/serde_attrs.rs @@ -1,39 +1,52 @@ #![cfg(feature = "serialize")] extern crate quick_xml; -extern crate serde; extern crate regex; +extern crate serde; use quick_xml::se::to_string; -use serde::Serialize; use regex::Regex; +use serde::Serialize; use std::borrow::Cow; #[derive(Serialize)] -#[serde(rename="classroom")] +#[serde(rename = "classroom")] struct Classroom { pub students: Vec, pub number: String, - pub adviser: Person + pub adviser: Person, } #[derive(Serialize)] -#[serde(rename="person")] +#[serde(rename = "person")] struct Person { pub name: String, - pub age: u32 + pub age: u32, } #[derive(Serialize)] -#[serde(rename="empty")] +#[serde(rename = "empty")] struct Empty {} #[test] fn test_nested() { - let s1 = Person { name: "sherlock".to_string(), age: 20 }; - let s2 = Person { name: "harry".to_string(), age: 19 }; - let t = Person { name: "albus".to_string(), age: 88 }; - let doc = Classroom { students: vec![s1, s2], number: "3-1".to_string(), adviser: t }; + let s1 = Person { + name: "sherlock".to_string(), + age: 20, + }; + let s2 = Person { + name: "harry".to_string(), + age: 19, + }; + let t = Person { + name: "albus".to_string(), + age: 88, + }; + let doc = Classroom { + students: vec![s1, s2], + number: "3-1".to_string(), + adviser: t, + }; let xml = quick_xml::se::to_string(&doc).unwrap(); let str = r#" diff --git a/tests/test.rs b/tests/test.rs index aeefa2b5..301292a4 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -1,15 +1,12 @@ -extern crate quick_xml; -#[cfg(feature = "serialize")] -extern crate serde; - use quick_xml::events::attributes::Attribute; use quick_xml::events::Event::*; use quick_xml::Reader; -use std::borrow::Cow; -use std::io::Cursor; - #[cfg(feature = "serialize")] use serde::{Deserialize, Serialize}; +use std::borrow::Cow; +use std::io::Cursor; +#[cfg(feature = "asynchronous")] +use tokio::runtime::Runtime; #[test] fn test_sample() { @@ -17,8 +14,45 @@ fn test_sample() { let mut buf = Vec::new(); let mut r = Reader::from_reader(src); let mut count = 0; + + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + + loop { + #[cfg(not(feature = "asynchronous"))] + let event = r.read_event(&mut buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { r.read_event(&mut buf).await }); + + match event.unwrap() { + Start(_) => count += 1, + Decl(e) => println!("{:?}", e.version()), + Eof => break, + _ => (), + } + buf.clear(); + } + println!("{}", count); +} + +#[test] +fn test_sample_async() { + let src: &[u8] = include_bytes!("sample_rss.xml"); + let mut buf = Vec::new(); + let mut r = Reader::from_reader(src); + let mut count = 0; + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + loop { - match r.read_event(&mut buf).unwrap() { + #[cfg(not(feature = "asynchronous"))] + let event = r.read_event(&mut buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { r.read_event(&mut buf).await }); + + match event.unwrap() { Start(_) => count += 1, Decl(e) => println!("{:?}", e.version()), Eof => break, @@ -35,7 +69,16 @@ fn test_attributes_empty() { let mut r = Reader::from_reader(src as &[u8]); r.trim_text(true).expand_empty_elements(false); let mut buf = Vec::new(); - match r.read_event(&mut buf) { + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + + #[cfg(not(feature = "asynchronous"))] + let event = r.read_event(&mut buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { r.read_event(&mut buf).await }); + + match event { Ok(Empty(e)) => { let mut atts = e.attributes(); match atts.next() { @@ -61,13 +104,23 @@ fn test_attributes_empty() { } } +#[cfg(not(feature = "asynchronous"))] #[test] fn test_attribute_equal() { let src = b""; let mut r = Reader::from_reader(src as &[u8]); r.trim_text(true).expand_empty_elements(false); let mut buf = Vec::new(); - match r.read_event(&mut buf) { + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + + #[cfg(not(feature = "asynchronous"))] + let event = r.read_event(&mut buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { r.read_event(&mut buf).await }); + + match event { Ok(Empty(e)) => { let mut atts = e.attributes(); match atts.next() { @@ -86,14 +139,24 @@ fn test_attribute_equal() { } } +#[cfg(not(feature = "asynchronous"))] #[test] fn test_comment_starting_with_gt() { let src = b"-->"; let mut r = Reader::from_reader(src as &[u8]); r.trim_text(true).expand_empty_elements(false); let mut buf = Vec::new(); + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + loop { - match r.read_event(&mut buf) { + #[cfg(not(feature = "asynchronous"))] + let event = r.read_event(&mut buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { r.read_event(&mut buf).await }); + + match event { Ok(Comment(ref e)) if &**e == b">" => break, Ok(Eof) => panic!("Expecting Comment"), _ => (), @@ -112,8 +175,16 @@ fn test_attributes_empty_ns() { r.trim_text(true).expand_empty_elements(false); let mut buf = Vec::new(); let mut ns_buf = Vec::new(); + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + + #[cfg(not(feature = "asynchronous"))] + let event = r.read_namespaced_event(&mut buf, &mut ns_buf); - let e = match r.read_namespaced_event(&mut buf, &mut ns_buf) { + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { r.read_namespaced_event(&mut buf, &mut ns_buf).await }); + + let e = match event { Ok((None, Empty(e))) => e, e => panic!("Expecting Empty event, got {:?}", e), }; @@ -149,6 +220,7 @@ fn test_attributes_empty_ns() { /// Single empty element with qualified attributes. /// Empty element expansion: enabled /// The code path for namespace handling is slightly different for `Empty` vs. `Start+End`. +#[cfg(not(feature = "asynchronous"))] #[test] fn test_attributes_empty_ns_expanded() { let src = b""; @@ -157,8 +229,18 @@ fn test_attributes_empty_ns_expanded() { r.trim_text(true).expand_empty_elements(true); let mut buf = Vec::new(); let mut ns_buf = Vec::new(); + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + { - let e = match r.read_namespaced_event(&mut buf, &mut ns_buf) { + #[cfg(not(feature = "asynchronous"))] + let event = r.read_namespaced_event(&mut buf, &mut ns_buf); + + #[cfg(feature = "asynchronous")] + let event = + runtime.block_on(async { r.read_namespaced_event(&mut buf, &mut ns_buf).await }); + + let e = match event { Ok((None, Start(e))) => e, e => panic!("Expecting Empty event, got {:?}", e), }; @@ -191,7 +273,13 @@ fn test_attributes_empty_ns_expanded() { } } - match r.read_namespaced_event(&mut buf, &mut ns_buf) { + #[cfg(not(feature = "asynchronous"))] + let event = r.read_namespaced_event(&mut buf, &mut ns_buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { r.read_namespaced_event(&mut buf, &mut ns_buf).await }); + + match event { Ok((None, End(e))) => assert_eq!(b"a", e.name()), e => panic!("Expecting End event, got {:?}", e), } @@ -205,10 +293,19 @@ fn test_default_ns_shadowing_empty() { r.trim_text(true).expand_empty_elements(false); let mut buf = Vec::new(); let mut ns_buf = Vec::new(); + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); // { - match r.read_namespaced_event(&mut buf, &mut ns_buf) { + #[cfg(not(feature = "asynchronous"))] + let event = r.read_namespaced_event(&mut buf, &mut ns_buf); + + #[cfg(feature = "asynchronous")] + let event = + runtime.block_on(async { r.read_namespaced_event(&mut buf, &mut ns_buf).await }); + + match event { Ok((Some(ns), Start(e))) => { assert_eq!(&ns[..], b"urn:example:o"); assert_eq!(e.name(), b"e"); @@ -219,7 +316,14 @@ fn test_default_ns_shadowing_empty() { // { - let e = match r.read_namespaced_event(&mut buf, &mut ns_buf) { + #[cfg(not(feature = "asynchronous"))] + let event = r.read_namespaced_event(&mut buf, &mut ns_buf); + + #[cfg(feature = "asynchronous")] + let event = + runtime.block_on(async { r.read_namespaced_event(&mut buf, &mut ns_buf).await }); + + let e = match event { Ok((Some(ns), Empty(e))) => { assert_eq!(::std::str::from_utf8(ns).unwrap(), "urn:example:i"); assert_eq!(e.name(), b"e"); @@ -250,7 +354,14 @@ fn test_default_ns_shadowing_empty() { } // - match r.read_namespaced_event(&mut buf, &mut ns_buf) { + + #[cfg(not(feature = "asynchronous"))] + let event = r.read_namespaced_event(&mut buf, &mut ns_buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { r.read_namespaced_event(&mut buf, &mut ns_buf).await }); + + match event { Ok((Some(ns), End(e))) => { assert_eq!(&ns[..], b"urn:example:o"); assert_eq!(e.name(), b"e"); @@ -259,6 +370,7 @@ fn test_default_ns_shadowing_empty() { } } +#[cfg(not(feature = "asynchronous"))] #[test] fn test_default_ns_shadowing_expanded() { let src = b""; @@ -267,10 +379,19 @@ fn test_default_ns_shadowing_expanded() { r.trim_text(true).expand_empty_elements(true); let mut buf = Vec::new(); let mut ns_buf = Vec::new(); + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); // { - match r.read_namespaced_event(&mut buf, &mut ns_buf) { + #[cfg(not(feature = "asynchronous"))] + let event = r.read_namespaced_event(&mut buf, &mut ns_buf); + + #[cfg(feature = "asynchronous")] + let event = + runtime.block_on(async { r.read_namespaced_event(&mut buf, &mut ns_buf).await }); + + match event { Ok((Some(ns), Start(e))) => { assert_eq!(&ns[..], b"urn:example:o"); assert_eq!(e.name(), b"e"); @@ -282,7 +403,14 @@ fn test_default_ns_shadowing_expanded() { // { - let e = match r.read_namespaced_event(&mut buf, &mut ns_buf) { + #[cfg(not(feature = "asynchronous"))] + let event = r.read_namespaced_event(&mut buf, &mut ns_buf); + + #[cfg(feature = "asynchronous")] + let event = + runtime.block_on(async { r.read_namespaced_event(&mut buf, &mut ns_buf).await }); + + let e = match event { Ok((Some(ns), Start(e))) => { assert_eq!(&ns[..], b"urn:example:i"); assert_eq!(e.name(), b"e"); @@ -312,15 +440,28 @@ fn test_default_ns_shadowing_expanded() { } // virtual - match r.read_namespaced_event(&mut buf, &mut ns_buf) { + #[cfg(not(feature = "asynchronous"))] + let event = r.read_namespaced_event(&mut buf, &mut ns_buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { r.read_namespaced_event(&mut buf, &mut ns_buf).await }); + + match event { Ok((Some(ns), End(e))) => { assert_eq!(&ns[..], b"urn:example:i"); assert_eq!(e.name(), b"e"); } e => panic!("Expected End event (), got {:?}", e), } + // - match r.read_namespaced_event(&mut buf, &mut ns_buf) { + #[cfg(not(feature = "asynchronous"))] + let event = r.read_namespaced_event(&mut buf, &mut ns_buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { r.read_namespaced_event(&mut buf, &mut ns_buf).await }); + + match event { Ok((Some(ns), End(e))) => { assert_eq!(&ns[..], b"urn:example:o"); assert_eq!(e.name(), b"e"); @@ -336,8 +477,17 @@ fn test_koi8_r_encoding() { let mut r = Reader::from_reader(src as &[u8]); r.trim_text(true).expand_empty_elements(false); let mut buf = Vec::new(); + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + loop { - match r.read_event(&mut buf) { + #[cfg(not(feature = "asynchronous"))] + let event = r.read_event(&mut buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { r.read_event(&mut buf).await }); + + match event { Ok(Text(e)) => { e.unescape_and_decode(&r).unwrap(); } @@ -355,14 +505,24 @@ fn fuzz_53() { let cursor = Cursor::new(data); let mut reader = Reader::from_reader(cursor); let mut buf = vec![]; + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + loop { - match reader.read_event(&mut buf) { + #[cfg(not(feature = "asynchronous"))] + let event = reader.read_event(&mut buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { reader.read_event(&mut buf).await }); + + match event { Ok(quick_xml::events::Event::Eof) | Err(..) => break, _ => buf.clear(), } } } +#[cfg(not(feature = "asynchronous"))] #[test] fn test_issue94() { let data = br#" @@ -371,8 +531,17 @@ fn test_issue94() { let mut reader = Reader::from_reader(&data[..]); reader.trim_text(true); let mut buf = vec![]; + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + loop { - match reader.read_event(&mut buf) { + #[cfg(not(feature = "asynchronous"))] + let event = reader.read_event(&mut buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { reader.read_event(&mut buf).await }); + + match event { Ok(quick_xml::events::Event::Eof) | Err(..) => break, _ => buf.clear(), } @@ -388,8 +557,17 @@ fn fuzz_101() { let cursor = Cursor::new(data); let mut reader = Reader::from_reader(cursor); let mut buf = vec![]; + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + loop { - match reader.read_event(&mut buf) { + #[cfg(not(feature = "asynchronous"))] + let event = reader.read_event(&mut buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { reader.read_event(&mut buf).await }); + + match event { Ok(Start(ref e)) | Ok(Empty(ref e)) => { if e.unescaped().is_err() { break; @@ -420,14 +598,30 @@ fn test_default_namespace() { // let mut buf = Vec::new(); let mut ns_buf = Vec::new(); - if let Ok((None, Start(_))) = r.read_namespaced_event(&mut buf, &mut ns_buf) { + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + + #[cfg(not(feature = "asynchronous"))] + let event = r.read_namespaced_event(&mut buf, &mut ns_buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { r.read_namespaced_event(&mut buf, &mut ns_buf).await }); + + if let Ok((None, Start(_))) = event { } else { panic!("expecting outer start element with no namespace"); } // { - let event = match r.read_namespaced_event(&mut buf, &mut ns_buf) { + #[cfg(not(feature = "asynchronous"))] + let event = r.read_namespaced_event(&mut buf, &mut ns_buf); + + #[cfg(feature = "asynchronous")] + let event = + runtime.block_on(async { r.read_namespaced_event(&mut buf, &mut ns_buf).await }); + + let event = match event { Ok((Some(b"www1"), Start(event))) => event, Ok((Some(_), Start(_))) => panic!("expecting namespace to resolve to 'www1'"), _ => panic!("expecting namespace resolution"), @@ -445,7 +639,13 @@ fn test_default_namespace() { } // - match r.read_namespaced_event(&mut buf, &mut ns_buf) { + #[cfg(not(feature = "asynchronous"))] + let event = r.read_namespaced_event(&mut buf, &mut ns_buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { r.read_namespaced_event(&mut buf, &mut ns_buf).await }); + + match event { Ok((Some(b"www1"), End(_))) => (), Ok((Some(_), End(_))) => panic!("expecting namespace to resolve to 'www1'"), _ => panic!("expecting namespace resolution"), @@ -453,7 +653,14 @@ fn test_default_namespace() { // very important: a should not be in any namespace. The default namespace only applies to // the sub-document it is defined on. - if let Ok((None, End(_))) = r.read_namespaced_event(&mut buf, &mut ns_buf) { + + #[cfg(not(feature = "asynchronous"))] + let event = r.read_namespaced_event(&mut buf, &mut ns_buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { r.read_namespaced_event(&mut buf, &mut ns_buf).await }); + + if let Ok((None, End(_))) = event { } else { panic!("expecting outer end element with no namespace"); } diff --git a/tests/unit_tests.rs b/tests/unit_tests.rs index e8cdb009..1cfe0d58 100644 --- a/tests/unit_tests.rs +++ b/tests/unit_tests.rs @@ -1,16 +1,25 @@ -extern crate quick_xml; - -use std::io::Cursor; -use std::str::from_utf8; - use quick_xml::events::Event::*; use quick_xml::events::{BytesDecl, BytesEnd, BytesStart, BytesText, Event}; use quick_xml::{Reader, Result, Writer}; +use std::io::Cursor; +use std::str::from_utf8; +#[cfg(feature = "asynchronous")] +use tokio::runtime::Runtime; macro_rules! next_eq_name { ($r:expr, $t:tt, $bytes:expr) => { let mut buf = Vec::new(); - match $r.read_event(&mut buf).unwrap() { + + #[cfg(not(feature = "asynchronous"))] + let event = $r.read_event(&mut buf); + + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { $r.read_event(&mut buf).await }); + + match event.unwrap() { $t(ref e) if e.name() == $bytes => (), e => panic!( "expecting {}({:?}), found {:?}", @@ -26,7 +35,17 @@ macro_rules! next_eq_name { macro_rules! next_eq_content { ($r:expr, $t:tt, $bytes:expr) => { let mut buf = Vec::new(); - match $r.read_event(&mut buf).unwrap() { + + #[cfg(not(feature = "asynchronous"))] + let event = $r.read_event(&mut buf); + + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { $r.read_event(&mut buf).await }); + + match event.unwrap() { $t(ref e) if &**e == $bytes => (), e => panic!( "expecting {}({:?}), found {:?}", @@ -127,7 +146,17 @@ fn test_xml_decl() { let mut r = Reader::from_str(""); r.trim_text(true); let mut buf = Vec::new(); - match r.read_event(&mut buf).unwrap() { + + #[cfg(not(feature = "asynchronous"))] + let event = r.read_event(&mut buf); + + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { r.read_event(&mut buf).await }); + + match event.unwrap() { Decl(ref e) => { match e.version() { Ok(v) => assert_eq!( @@ -207,8 +236,18 @@ fn test_writer() { reader.trim_text(true); let mut writer = Writer::new(Cursor::new(Vec::new())); let mut buf = Vec::new(); + + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + loop { - match reader.read_event(&mut buf) { + #[cfg(not(feature = "asynchronous"))] + let event = reader.read_event(&mut buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { reader.read_event(&mut buf).await }); + + match event { Ok(Eof) => break, Ok(e) => assert!(writer.write_event(e).is_ok()), Err(e) => panic!(e), @@ -226,8 +265,18 @@ fn test_writer_borrow() { reader.trim_text(true); let mut writer = Writer::new(Cursor::new(Vec::new())); let mut buf = Vec::new(); + + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + loop { - match reader.read_event(&mut buf) { + #[cfg(not(feature = "asynchronous"))] + let event = reader.read_event(&mut buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { reader.read_event(&mut buf).await }); + + match event { Ok(Eof) => break, Ok(e) => assert!(writer.write_event(&e).is_ok()), // either `e` or `&e` Err(e) => panic!(e), @@ -245,8 +294,18 @@ fn test_writer_indent() { reader.trim_text(true); let mut writer = Writer::new_with_indent(Cursor::new(Vec::new()), b' ', 4); let mut buf = Vec::new(); + + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + loop { - match reader.read_event(&mut buf) { + #[cfg(not(feature = "asynchronous"))] + let event = reader.read_event(&mut buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { reader.read_event(&mut buf).await }); + + match event { Ok(Eof) => break, Ok(e) => assert!(writer.write_event(e).is_ok()), Err(e) => panic!(e), @@ -254,7 +313,6 @@ fn test_writer_indent() { } let result = writer.into_inner().into_inner(); - // println!("{:?}", String::from_utf8_lossy(&result)); assert_eq!(result, txt.as_bytes()); } @@ -266,8 +324,18 @@ fn test_write_empty_element_attrs() { reader.expand_empty_elements(false); let mut writer = Writer::new(Cursor::new(Vec::new())); let mut buf = Vec::new(); + + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + loop { - match reader.read_event(&mut buf) { + #[cfg(not(feature = "asynchronous"))] + let event = reader.read_event(&mut buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { reader.read_event(&mut buf).await }); + + match event { Ok(Eof) => break, Ok(e) => assert!(writer.write_event(e).is_ok()), Err(e) => panic!(e), @@ -286,8 +354,18 @@ fn test_write_attrs() { reader.trim_text(true); let mut writer = Writer::new(Cursor::new(Vec::new())); let mut buf = Vec::new(); + + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + loop { - let event = match reader.read_event(&mut buf) { + #[cfg(not(feature = "asynchronous"))] + let ev = reader.read_event(&mut buf); + + #[cfg(feature = "asynchronous")] + let ev = runtime.block_on(async { reader.read_event(&mut buf).await }); + + let event = match ev { Ok(Eof) => break, Ok(Start(elem)) => { let mut attrs = elem.attributes().collect::>>().unwrap(); @@ -392,7 +470,17 @@ fn test_buf_position_err_end_element() { r.trim_text(true).check_end_names(true); let mut buf = Vec::new(); - match r.read_event(&mut buf) { + + #[cfg(not(feature = "asynchronous"))] + let event = r.read_event(&mut buf); + + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { r.read_event(&mut buf).await }); + + match event { Err(_) if r.buffer_position() == 2 => (), // error at char 2: no opening tag Err(e) => panic!( "expecting buf_pos = 2, found {}, err: {:?}", @@ -412,7 +500,17 @@ fn test_buf_position_err_comment() { assert_eq!(r.buffer_position(), 3); let mut buf = Vec::new(); - match r.read_event(&mut buf) { + + #[cfg(not(feature = "asynchronous"))] + let event = r.read_event(&mut buf); + + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { r.read_event(&mut buf).await }); + + match event { Err(_) if r.buffer_position() == 4 => { // error at char 5: no closing --> tag found assert!(true); @@ -431,18 +529,34 @@ fn test_buf_position_err_comment_2_buf() { let mut r = Reader::from_str(" tag found assert!(true); } Err(e) => panic!( - "expecting buf_pos = 5, found {}, err: {:?}", + "expecting buf_pos = 4, found {}, err: {:?}", r.buffer_position(), e ), @@ -459,7 +573,17 @@ fn test_buf_position_err_comment_trim_text() { assert_eq!(r.buffer_position(), 3); let mut buf = Vec::new(); - match r.read_event(&mut buf) { + + #[cfg(not(feature = "asynchronous"))] + let event = r.read_event(&mut buf); + + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { r.read_event(&mut buf).await }); + + match event { Err(_) if r.buffer_position() == 7 => { // error at char 5: no closing --> tag found assert!(true); @@ -480,12 +604,28 @@ fn test_namespace() { let mut buf = Vec::new(); let mut ns_buf = Vec::new(); - if let Ok((None, Start(_))) = r.read_namespaced_event(&mut buf, &mut ns_buf) { + + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + + #[cfg(not(feature = "asynchronous"))] + let event = r.read_namespaced_event(&mut buf, &mut ns_buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { r.read_namespaced_event(&mut buf, &mut ns_buf).await }); + + if let Ok((None, Start(_))) = event { } else { assert!(false, "expecting start element with no namespace"); } - if let Ok((Some(a), Start(_))) = r.read_namespaced_event(&mut buf, &mut ns_buf) { + #[cfg(not(feature = "asynchronous"))] + let event = r.read_namespaced_event(&mut buf, &mut ns_buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { r.read_namespaced_event(&mut buf, &mut ns_buf).await }); + + if let Ok((Some(a), Start(_))) = event { if &*a == b"www1" { assert!(true); } else { @@ -501,16 +641,32 @@ fn test_default_namespace() { let mut r = Reader::from_str(""); r.trim_text(true); - // let mut buf = Vec::new(); let mut ns_buf = Vec::new(); - if let Ok((None, Start(_))) = r.read_namespaced_event(&mut buf, &mut ns_buf) { + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + + // + #[cfg(not(feature = "asynchronous"))] + let event = r.read_namespaced_event(&mut buf, &mut ns_buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { r.read_namespaced_event(&mut buf, &mut ns_buf).await }); + + if let Ok((None, Start(_))) = event { } else { assert!(false, "expecting outer start element with no namespace"); } // - if let Ok((Some(a), Start(_))) = r.read_namespaced_event(&mut buf, &mut ns_buf) { + + #[cfg(not(feature = "asynchronous"))] + let event = r.read_namespaced_event(&mut buf, &mut ns_buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { r.read_namespaced_event(&mut buf, &mut ns_buf).await }); + + if let Ok((Some(a), Start(_))) = event { if &*a == b"www1" { assert!(true); } else { @@ -521,7 +677,14 @@ fn test_default_namespace() { } // - if let Ok((Some(a), End(_))) = r.read_namespaced_event(&mut buf, &mut ns_buf) { + + #[cfg(not(feature = "asynchronous"))] + let event = r.read_namespaced_event(&mut buf, &mut ns_buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { r.read_namespaced_event(&mut buf, &mut ns_buf).await }); + + if let Ok((Some(a), End(_))) = event { if &*a == b"www1" { assert!(true); } else { @@ -533,7 +696,14 @@ fn test_default_namespace() { // very important: a should not be in any namespace. The default namespace only applies to // the sub-document it is defined on. - if let Ok((None, End(_))) = r.read_namespaced_event(&mut buf, &mut ns_buf) { + + #[cfg(not(feature = "asynchronous"))] + let event = r.read_namespaced_event(&mut buf, &mut ns_buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { r.read_namespaced_event(&mut buf, &mut ns_buf).await }); + + if let Ok((None, End(_))) = event { } else { assert!(false, "expecting outer end element with no namespace"); } @@ -546,7 +716,16 @@ fn test_default_namespace_reset() { let mut buf = Vec::new(); let mut ns_buf = Vec::new(); - if let Ok((Some(a), Start(_))) = r.read_namespaced_event(&mut buf, &mut ns_buf) { + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + + #[cfg(not(feature = "asynchronous"))] + let event = r.read_namespaced_event(&mut buf, &mut ns_buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { r.read_namespaced_event(&mut buf, &mut ns_buf).await }); + + if let Ok((Some(a), Start(_))) = event { assert_eq!( &a[..], b"www1", @@ -556,16 +735,35 @@ fn test_default_namespace_reset() { panic!("expecting outer start element with to resolve to 'www1'"); } - match r.read_namespaced_event(&mut buf, &mut ns_buf) { + #[cfg(not(feature = "asynchronous"))] + let event = r.read_namespaced_event(&mut buf, &mut ns_buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { r.read_namespaced_event(&mut buf, &mut ns_buf).await }); + + match event { Ok((None, Start(_))) => (), e => panic!("expecting inner start element, got {:?}", e), } - if let Ok((None, End(_))) = r.read_namespaced_event(&mut buf, &mut ns_buf) { + + #[cfg(not(feature = "asynchronous"))] + let event = r.read_namespaced_event(&mut buf, &mut ns_buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { r.read_namespaced_event(&mut buf, &mut ns_buf).await }); + + if let Ok((None, End(_))) = event { } else { assert!(false, "expecting inner end element"); } - if let Ok((Some(a), End(_))) = r.read_namespaced_event(&mut buf, &mut ns_buf) { + #[cfg(not(feature = "asynchronous"))] + let event = r.read_namespaced_event(&mut buf, &mut ns_buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { r.read_namespaced_event(&mut buf, &mut ns_buf).await }); + + if let Ok((Some(a), End(_))) = event { assert_eq!( &a[..], b"www1", @@ -582,7 +780,17 @@ fn test_escaped_content() { r.trim_text(true); next_eq!(r, Start, b"a"); let mut buf = Vec::new(); - match r.read_event(&mut buf) { + + #[cfg(not(feature = "asynchronous"))] + let event = r.read_event(&mut buf); + + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { r.read_event(&mut buf).await }); + + match event { Ok(Text(e)) => { if &*e != b"<test>" { panic!( @@ -631,8 +839,17 @@ fn test_read_write_roundtrip_results_in_identity() { reader.trim_text(false).expand_empty_elements(false); let mut writer = Writer::new(Cursor::new(Vec::new())); let mut buf = Vec::new(); + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + loop { - match reader.read_event(&mut buf) { + #[cfg(not(feature = "asynchronous"))] + let event = reader.read_event(&mut buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { reader.read_event(&mut buf).await }); + + match event { Ok(Eof) => break, Ok(e) => assert!(writer.write_event(e).is_ok()), Err(e) => panic!(e), @@ -658,8 +875,17 @@ fn test_read_write_roundtrip() { reader.trim_text(false).expand_empty_elements(false); let mut writer = Writer::new(Cursor::new(Vec::new())); let mut buf = Vec::new(); + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + loop { - match reader.read_event(&mut buf) { + #[cfg(not(feature = "asynchronous"))] + let event = reader.read_event(&mut buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { reader.read_event(&mut buf).await }); + + match event { Ok(Eof) => break, Ok(e) => assert!(writer.write_event(e).is_ok()), Err(e) => panic!(e), @@ -685,8 +911,17 @@ fn test_read_write_roundtrip_escape() { reader.trim_text(false).expand_empty_elements(false); let mut writer = Writer::new(Cursor::new(Vec::new())); let mut buf = Vec::new(); + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + loop { - match reader.read_event(&mut buf) { + #[cfg(not(feature = "asynchronous"))] + let event = reader.read_event(&mut buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { reader.read_event(&mut buf).await }); + + match event { Ok(Eof) => break, Ok(Text(e)) => { let t = e.escaped(); @@ -718,8 +953,17 @@ fn test_read_write_roundtrip_escape_text() { reader.trim_text(false).expand_empty_elements(false); let mut writer = Writer::new(Cursor::new(Vec::new())); let mut buf = Vec::new(); + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + loop { - match reader.read_event(&mut buf) { + #[cfg(not(feature = "asynchronous"))] + let event = reader.read_event(&mut buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { reader.read_event(&mut buf).await }); + + match event { Ok(Eof) => break, Ok(Text(e)) => { let t = e.unescape_and_decode(&reader).unwrap(); @@ -741,7 +985,17 @@ fn test_closing_bracket_in_single_quote_attr() { let mut r = Reader::from_str(""); r.trim_text(true); let mut buf = Vec::new(); - match r.read_event(&mut buf) { + + #[cfg(not(feature = "asynchronous"))] + let event = r.read_event(&mut buf); + + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { r.read_event(&mut buf).await }); + + match event { Ok(Start(e)) => { let mut attrs = e.attributes(); match attrs.next() { @@ -764,7 +1018,17 @@ fn test_closing_bracket_in_double_quote_attr() { let mut r = Reader::from_str("\" check=\"2\">"); r.trim_text(true); let mut buf = Vec::new(); - match r.read_event(&mut buf) { + + #[cfg(not(feature = "asynchronous"))] + let event = r.read_event(&mut buf); + + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { r.read_event(&mut buf).await }); + + match event { Ok(Start(e)) => { let mut attrs = e.attributes(); match attrs.next() { @@ -787,7 +1051,17 @@ fn test_closing_bracket_in_double_quote_mixed() { let mut r = Reader::from_str("'\" check=\"'2'\">"); r.trim_text(true); let mut buf = Vec::new(); - match r.read_event(&mut buf) { + + #[cfg(not(feature = "asynchronous"))] + let event = r.read_event(&mut buf); + + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { r.read_event(&mut buf).await }); + + match event { Ok(Start(e)) => { let mut attrs = e.attributes(); match attrs.next() { @@ -810,7 +1084,17 @@ fn test_closing_bracket_in_single_quote_mixed() { let mut r = Reader::from_str(""); r.trim_text(true); let mut buf = Vec::new(); - match r.read_event(&mut buf) { + + #[cfg(not(feature = "asynchronous"))] + let event = r.read_event(&mut buf); + + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { r.read_event(&mut buf).await }); + + match event { Ok(Start(e)) => { let mut attrs = e.attributes(); match attrs.next() { @@ -838,9 +1122,17 @@ fn test_unescape_and_decode_without_bom_removes_utf8_bom() { let mut txt = Vec::new(); let mut buf = Vec::new(); + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); loop { - match reader.read_event(&mut buf) { + #[cfg(not(feature = "asynchronous"))] + let event = reader.read_event(&mut buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { reader.read_event(&mut buf).await }); + + match event { Ok(Event::Text(e)) => txt.push(e.unescape_and_decode_without_bom(&reader).unwrap()), Ok(Event::Eof) => break, _ => (), @@ -857,9 +1149,17 @@ fn test_unescape_and_decode_without_bom_removes_utf16be_bom() { let mut txt = Vec::new(); let mut buf = Vec::new(); + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); loop { - match reader.read_event(&mut buf) { + #[cfg(not(feature = "asynchronous"))] + let event = reader.read_event(&mut buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { reader.read_event(&mut buf).await }); + + match event { Ok(Event::Text(e)) => txt.push(e.unescape_and_decode_without_bom(&mut reader).unwrap()), Ok(Event::Eof) => break, _ => (), @@ -876,9 +1176,17 @@ fn test_unescape_and_decode_without_bom_removes_utf16le_bom() { let mut txt = Vec::new(); let mut buf = Vec::new(); + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); loop { - match reader.read_event(&mut buf) { + #[cfg(not(feature = "asynchronous"))] + let event = reader.read_event(&mut buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { reader.read_event(&mut buf).await }); + + match event { Ok(Event::Text(e)) => txt.push(e.unescape_and_decode_without_bom(&mut reader).unwrap()), Ok(Event::Eof) => break, _ => (), @@ -897,9 +1205,17 @@ fn test_unescape_and_decode_without_bom_does_nothing_if_no_bom_exists() { let mut txt = Vec::new(); let mut buf = Vec::new(); + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); loop { - match reader.read_event(&mut buf) { + #[cfg(not(feature = "asynchronous"))] + let event = reader.read_event(&mut buf); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { reader.read_event(&mut buf).await }); + + match event { Ok(Event::Text(e)) => txt.push(e.unescape_and_decode_without_bom(&mut reader).unwrap()), Ok(Event::Eof) => break, _ => (), diff --git a/tests/xmlrs_reader_tests.rs b/tests/xmlrs_reader_tests.rs index 10df35c9..804071e8 100644 --- a/tests/xmlrs_reader_tests.rs +++ b/tests/xmlrs_reader_tests.rs @@ -3,6 +3,8 @@ extern crate quick_xml; use quick_xml::events::{BytesStart, Event}; use quick_xml::{Reader, Result}; use std::str::from_utf8; +#[cfg(feature = "asynchronous")] +use tokio::runtime::Runtime; #[test] fn sample_1_short() { @@ -278,6 +280,9 @@ fn default_namespace_applies_to_end_elem() { } fn test(input: &[u8], output: &[u8], is_short: bool) { + #[cfg(feature = "asynchronous")] + let mut runtime = Runtime::new().expect("Runtime cannot be initialized"); + let mut reader = Reader::from_reader(input); reader .trim_text(is_short) @@ -290,12 +295,26 @@ fn test(input: &[u8], output: &[u8], is_short: bool) { if !is_short { // discard first whitespace + + #[cfg(feature = "asynchronous")] + runtime.block_on(async { + reader.read_event(&mut buf).await.unwrap(); + }); + + #[cfg(not(feature = "asynchronous"))] reader.read_event(&mut buf).unwrap(); } loop { buf.clear(); + + #[cfg(feature = "asynchronous")] + let event = runtime + .block_on(async { reader.read_namespaced_event(&mut buf, &mut ns_buffer).await }); + + #[cfg(not(feature = "asynchronous"))] let event = reader.read_namespaced_event(&mut buf, &mut ns_buffer); + let line = xmlrs_display(&event); if let Some((n, spec)) = spec_lines.next() { if spec.trim() == "EndDocument" { @@ -321,7 +340,16 @@ fn test(input: &[u8], output: &[u8], is_short: bool) { if !is_short && line.starts_with("StartDocument") { // advance next Characters(empty space) ... - if let Ok(Event::Text(ref e)) = reader.read_event(&mut Vec::new()) { + + let mut buf = Vec::new(); + + #[cfg(feature = "asynchronous")] + let event = runtime.block_on(async { reader.read_event(&mut buf).await }); + + #[cfg(not(feature = "asynchronous"))] + let event = reader.read_event(&mut buf); + + if let Ok(Event::Text(ref e)) = event { if e.iter().any(|b| match *b { b' ' | b'\r' | b'\n' | b'\t' => false, _ => true,