diff --git a/Cargo.toml b/Cargo.toml
index e13202e2..46e66ea5 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -16,6 +16,8 @@ document-features = { version = "0.2", optional = true }
encoding_rs = { version = "0.8", optional = true }
serde = { version = "1.0", optional = true }
memchr = "2.5"
+tokio = { version = "1.19", optional = true, default-features = false, features = ["io-util"] }
+async-recursion = { version = "1.0", optional = true }
[dev-dependencies]
criterion = "0.3"
@@ -23,6 +25,8 @@ pretty_assertions = "1.2"
regex = "1"
serde = { version = "1.0", features = ["derive"] }
serde-value = "0.7"
+tokio = { version = "1.20", default-features = false, features = ["macros", "rt-multi-thread"] }
+tokio-test = "0.4"
[lib]
bench = false
@@ -101,6 +105,19 @@ serialize = ["serde"]
## Enables support for recognizing all [HTML 5 entities](https://dev.w3.org/html5/html-author/charref)
escape-html = []
+## Enables support for asynchronous reading from `tokio`'s IO-Traits.
+##
+## This can be used for example with `Reader::from_async_reader(read)` where `read`
+## is some type implementing `tokio::io::AsyncBufRead`.
+async = ["tokio", "async-recursion"]
+
+## Enables support for asynchronous reading from files using `tokio`. This feature
+## also automatically enables the `async` feature as well.
+##
+## This can be used for example with `Reader::from_file_async(path)` where `path`
+## is a file path.
+async-fs = ["async", "tokio/fs"]
+
[package.metadata.docs.rs]
all-features = true
@@ -119,3 +136,7 @@ required-features = ["serialize"]
[[test]]
name = "serde-migrated"
required-features = ["serialize"]
+
+[[test]]
+name = "async_test"
+required-features = ["async"]
diff --git a/README.md b/README.md
index 4cbcac0d..0299e615 100644
--- a/README.md
+++ b/README.md
@@ -30,7 +30,11 @@ let xml = r#""#;
-let mut reader = Reader::from_str(xml);
+let mut reader = Reader::from_reader(xml.as_bytes());
+// If you want to read from a string or byte slice without buffering, use:
+// let mut reader = Reader::from_str(xml);
+// In that case, `Vec` is *not* needed for buffering below and you should use
+// `read_event` instead of `read_event_into`.
reader.trim_text(true);
let mut count = 0;
@@ -75,9 +79,8 @@ let xml = r#"text"#;
let mut reader = Reader::from_str(xml);
reader.trim_text(true);
let mut writer = Writer::new(Cursor::new(Vec::new()));
-let mut buf = Vec::new();
loop {
- match reader.read_event_into(&mut buf) {
+ match reader.read_event() {
Ok(Event::Start(ref e)) if e.name() == b"this_tag" => {
// crates a new element ... alternatively we could reuse `e` by calling
@@ -101,7 +104,6 @@ loop {
Ok(e) => assert!(writer.write_event(&e).is_ok()),
Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
}
- buf.clear();
}
let result = writer.into_inner().into_inner();
diff --git a/benches/macrobenches.rs b/benches/macrobenches.rs
index 3358f3a4..a4e2719e 100644
--- a/benches/macrobenches.rs
+++ b/benches/macrobenches.rs
@@ -19,7 +19,7 @@ static PLAYERS: &[u8] = include_bytes!("../tests/documents/players.xml");
// TODO: read the namespaces too
// TODO: use fully normalized attribute values
fn parse_document(doc: &[u8]) -> XmlResult<()> {
- let mut r = Reader::from_reader(doc);
+ let mut r = Reader::from_bytes(doc);
loop {
match r.read_event()? {
Event::Start(e) | Event::Empty(e) => {
diff --git a/benches/microbenches.rs b/benches/microbenches.rs
index 8bbe1a67..ee52b27b 100644
--- a/benches/microbenches.rs
+++ b/benches/microbenches.rs
@@ -29,17 +29,15 @@ fn read_event(c: &mut Criterion) {
let mut group = c.benchmark_group("read_event");
group.bench_function("trim_text = false", |b| {
b.iter(|| {
- let mut r = Reader::from_reader(SAMPLE);
+ let mut r = Reader::from_bytes(SAMPLE);
r.check_end_names(false).check_comments(false);
let mut count = criterion::black_box(0);
- let mut buf = Vec::new();
loop {
- match r.read_event_into(&mut buf) {
+ match r.read_event() {
Ok(Event::Start(_)) | Ok(Event::Empty(_)) => count += 1,
Ok(Event::Eof) => break,
_ => (),
}
- buf.clear();
}
assert_eq!(
count, 1550,
@@ -50,19 +48,17 @@ fn read_event(c: &mut Criterion) {
group.bench_function("trim_text = true", |b| {
b.iter(|| {
- let mut r = Reader::from_reader(SAMPLE);
+ let mut r = Reader::from_bytes(SAMPLE);
r.check_end_names(false)
.check_comments(false)
.trim_text(true);
let mut count = criterion::black_box(0);
- let mut buf = Vec::new();
loop {
- match r.read_event_into(&mut buf) {
+ match r.read_event() {
Ok(Event::Start(_)) | Ok(Event::Empty(_)) => count += 1,
Ok(Event::Eof) => break,
_ => (),
}
- buf.clear();
}
assert_eq!(
count, 1550,
@@ -79,18 +75,16 @@ fn read_namespaced_event(c: &mut Criterion) {
let mut group = c.benchmark_group("read_namespaced_event");
group.bench_function("trim_text = false", |b| {
b.iter(|| {
- let mut r = Reader::from_reader(SAMPLE);
+ let mut r = Reader::from_bytes(SAMPLE);
r.check_end_names(false).check_comments(false);
let mut count = criterion::black_box(0);
- let mut buf = Vec::new();
let mut ns_buf = Vec::new();
loop {
- match r.read_namespaced_event(&mut buf, &mut ns_buf) {
+ match r.read_namespaced_event(&mut ns_buf) {
Ok((_, Event::Start(_))) | Ok((_, Event::Empty(_))) => count += 1,
Ok((_, Event::Eof)) => break,
_ => (),
}
- buf.clear();
}
assert_eq!(
count, 1550,
@@ -101,20 +95,18 @@ fn read_namespaced_event(c: &mut Criterion) {
group.bench_function("trim_text = true", |b| {
b.iter(|| {
- let mut r = Reader::from_reader(SAMPLE);
+ let mut r = Reader::from_bytes(SAMPLE);
r.check_end_names(false)
.check_comments(false)
.trim_text(true);
let mut count = criterion::black_box(0);
- let mut buf = Vec::new();
let mut ns_buf = Vec::new();
loop {
- match r.read_namespaced_event(&mut buf, &mut ns_buf) {
+ match r.read_namespaced_event(&mut ns_buf) {
Ok((_, Event::Start(_))) | Ok((_, Event::Empty(_))) => count += 1,
Ok((_, Event::Eof)) => break,
_ => (),
}
- buf.clear();
}
assert_eq!(
count, 1550,
@@ -130,78 +122,66 @@ fn one_event(c: &mut Criterion) {
let mut group = c.benchmark_group("One event");
group.bench_function("StartText", |b| {
let src = "Hello world!".repeat(512 / 12).into_bytes();
- let mut buf = Vec::with_capacity(1024);
b.iter(|| {
- let mut r = Reader::from_reader(src.as_ref());
+ let mut r = Reader::from_bytes(src.as_ref());
let mut nbtxt = criterion::black_box(0);
r.check_end_names(false).check_comments(false);
- match r.read_event_into(&mut buf) {
+ match r.read_event() {
Ok(Event::StartText(e)) => nbtxt += e.len(),
something_else => panic!("Did not expect {:?}", something_else),
};
- buf.clear();
-
assert_eq!(nbtxt, 504);
})
});
group.bench_function("Start", |b| {
let src = format!(r#""#, "world".repeat(512 / 5)).into_bytes();
- let mut buf = Vec::with_capacity(1024);
b.iter(|| {
- let mut r = Reader::from_reader(src.as_ref());
+ let mut r = Reader::from_bytes(src.as_ref());
let mut nbtxt = criterion::black_box(0);
r.check_end_names(false)
.check_comments(false)
.trim_text(true);
- match r.read_event_into(&mut buf) {
+ match r.read_event() {
Ok(Event::Start(ref e)) => nbtxt += e.len(),
something_else => panic!("Did not expect {:?}", something_else),
};
- buf.clear();
-
assert_eq!(nbtxt, 525);
})
});
group.bench_function("Comment", |b| {
let src = format!(r#""#, "world".repeat(512 / 5)).into_bytes();
- let mut buf = Vec::with_capacity(1024);
b.iter(|| {
- let mut r = Reader::from_reader(src.as_ref());
+ let mut r = Reader::from_bytes(src.as_ref());
let mut nbtxt = criterion::black_box(0);
r.check_end_names(false)
.check_comments(false)
.trim_text(true);
- match r.read_event_into(&mut buf) {
+ match r.read_event() {
Ok(Event::Comment(e)) => nbtxt += e.decode_and_unescape(&r).unwrap().len(),
something_else => panic!("Did not expect {:?}", something_else),
};
- buf.clear();
-
assert_eq!(nbtxt, 520);
})
});
group.bench_function("CData", |b| {
let src = format!(r#""#, "world".repeat(512 / 5)).into_bytes();
- let mut buf = Vec::with_capacity(1024);
b.iter(|| {
- let mut r = Reader::from_reader(src.as_ref());
+ let mut r = Reader::from_bytes(src.as_ref());
let mut nbtxt = criterion::black_box(0);
r.check_end_names(false)
.check_comments(false)
.trim_text(true);
- match r.read_event_into(&mut buf) {
+ match r.read_event() {
Ok(Event::CData(ref e)) => nbtxt += e.len(),
something_else => panic!("Did not expect {:?}", something_else),
};
- buf.clear();
-
assert_eq!(nbtxt, 518);
})
});
@@ -213,12 +193,11 @@ fn attributes(c: &mut Criterion) {
let mut group = c.benchmark_group("attributes");
group.bench_function("with_checks = true", |b| {
b.iter(|| {
- let mut r = Reader::from_reader(PLAYERS);
+ let mut r = Reader::from_bytes(PLAYERS);
r.check_end_names(false).check_comments(false);
let mut count = criterion::black_box(0);
- let mut buf = Vec::new();
loop {
- match r.read_event_into(&mut buf) {
+ match r.read_event() {
Ok(Event::Empty(e)) => {
for attr in e.attributes() {
let _attr = attr.unwrap();
@@ -228,7 +207,6 @@ fn attributes(c: &mut Criterion) {
Ok(Event::Eof) => break,
_ => (),
}
- buf.clear();
}
assert_eq!(count, 1041);
})
@@ -236,12 +214,11 @@ fn attributes(c: &mut Criterion) {
group.bench_function("with_checks = false", |b| {
b.iter(|| {
- let mut r = Reader::from_reader(PLAYERS);
+ let mut r = Reader::from_bytes(PLAYERS);
r.check_end_names(false).check_comments(false);
let mut count = criterion::black_box(0);
- let mut buf = Vec::new();
loop {
- match r.read_event_into(&mut buf) {
+ match r.read_event() {
Ok(Event::Empty(e)) => {
for attr in e.attributes().with_checks(false) {
let _attr = attr.unwrap();
@@ -251,7 +228,6 @@ fn attributes(c: &mut Criterion) {
Ok(Event::Eof) => break,
_ => (),
}
- buf.clear();
}
assert_eq!(count, 1041);
})
@@ -259,12 +235,11 @@ fn attributes(c: &mut Criterion) {
group.bench_function("try_get_attribute", |b| {
b.iter(|| {
- let mut r = Reader::from_reader(PLAYERS);
+ let mut r = Reader::from_bytes(PLAYERS);
r.check_end_names(false).check_comments(false);
let mut count = criterion::black_box(0);
- let mut buf = Vec::new();
loop {
- match r.read_event_into(&mut buf) {
+ match r.read_event() {
Ok(Event::Empty(e)) if e.name() == QName(b"player") => {
for name in ["num", "status", "avg"] {
if let Some(_attr) = e.try_get_attribute(name).unwrap() {
@@ -279,7 +254,6 @@ fn attributes(c: &mut Criterion) {
Ok(Event::Eof) => break,
_ => (),
}
- buf.clear();
}
assert_eq!(count, 150);
})
diff --git a/examples/custom_entities.rs b/examples/custom_entities.rs
index 4d59d49e..3c31d4d1 100644
--- a/examples/custom_entities.rs
+++ b/examples/custom_entities.rs
@@ -27,12 +27,11 @@ fn main() -> Result<(), Box> {
let mut reader = Reader::from_str(DATA);
reader.trim_text(true);
- let mut buf = Vec::new();
let mut custom_entities: HashMap = HashMap::new();
let entity_re = Regex::new(r#""#)?;
loop {
- match reader.read_event_into(&mut buf) {
+ match reader.read_event() {
Ok(Event::DocType(ref e)) => {
for cap in entity_re.captures_iter(&e) {
custom_entities.insert(
diff --git a/examples/read_buffered.rs b/examples/read_buffered.rs
new file mode 100644
index 00000000..25b28ee2
--- /dev/null
+++ b/examples/read_buffered.rs
@@ -0,0 +1,34 @@
+// This example demonstrates how a reader (for example when reading from a file)
+// can be buffered. In that case, data read from the file is written to a supplied
+// buffer and returned XML events borrow from that buffer.
+// That way, allocations can be kept to a minimum.
+
+fn main() -> Result<(), quick_xml::Error> {
+ use quick_xml::events::Event;
+ use quick_xml::Reader;
+
+ let mut reader = Reader::from_file("tests/documents/document.xml")?;
+ reader.trim_text(true);
+
+ let mut buf = Vec::new();
+
+ let mut count = 0;
+
+ loop {
+ match reader.read_event_into(&mut buf) {
+ Ok(Event::Start(ref e)) => {
+ let name = e.name();
+ let name = reader.decoder().decode(name.as_ref())?;
+ println!("read start event {:?}", name.as_ref());
+ count += 1;
+ }
+ Ok(Event::Eof) => break, // exits the loop when reaching end of file
+ Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
+ _ => (), // There are several other `Event`s we do not consider here
+ }
+ }
+
+ println!("read {} start events in total", count);
+
+ Ok(())
+}
diff --git a/examples/read_texts.rs b/examples/read_texts.rs
index 40d71e63..70be0b5c 100644
--- a/examples/read_texts.rs
+++ b/examples/read_texts.rs
@@ -10,14 +10,13 @@ fn main() {
reader.trim_text(true);
let mut txt = Vec::new();
- let mut buf = Vec::new();
loop {
- match reader.read_event_into(&mut buf) {
+ match reader.read_event() {
Ok(Event::Start(ref e)) if e.name().as_ref() == b"tag2" => {
txt.push(
reader
- .read_text_into(QName(b"tag2"), &mut Vec::new())
+ .read_text(QName(b"tag2"))
.expect("Cannot decode text value"),
);
println!("{:?}", txt);
@@ -26,6 +25,5 @@ fn main() {
Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
_ => (), // There are several other `Event`s we do not consider here
}
- buf.clear();
}
}
diff --git a/src/de/mod.rs b/src/de/mod.rs
index e564e041..5b0b44d7 100644
--- a/src/de/mod.rs
+++ b/src/de/mod.rs
@@ -695,7 +695,7 @@ impl<'de> Deserializer<'de, SliceReader<'de>> {
/// Create new deserializer that will borrow data from the specified borrowing reader
#[inline]
- fn from_borrowing_reader(mut reader: Reader<&'de [u8]>) -> Self {
+ fn from_borrowing_reader(mut reader: Reader>) -> Self {
reader
.expand_empty_elements(true)
.check_end_names(true)
@@ -930,7 +930,7 @@ pub trait XmlRead<'i> {
/// You cannot create it, it is created automatically when you call
/// [`Deserializer::from_reader`]
pub struct IoReader {
- reader: Reader,
+ reader: Reader>,
buf: Vec,
}
@@ -975,7 +975,7 @@ impl<'i, R: BufRead> XmlRead<'i> for IoReader {
/// You cannot create it, it is created automatically when you call
/// [`Deserializer::from_str`] or [`Deserializer::from_slice`]
pub struct SliceReader<'de> {
- reader: Reader<&'de [u8]>,
+ reader: Reader>,
}
impl<'de> XmlRead<'de> for SliceReader<'de> {
diff --git a/src/lib.rs b/src/lib.rs
index 70a6c31d..be8903c0 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -35,7 +35,11 @@
//!
//! "#;
//!
-//! let mut reader = Reader::from_str(xml);
+//! let mut reader = Reader::from_reader(xml.as_bytes());
+//! // If you want to read from a string or byte slice without buffering, use:
+//! // let mut reader = Reader::from_str(xml);
+//! // In that case, `Vec` is *not* needed for buffering below and you should use
+//! // `read_event` instead of `read_event_into`.
//! reader.trim_text(true);
//!
//! let mut count = 0;
@@ -84,9 +88,8 @@
//! let mut reader = Reader::from_str(xml);
//! reader.trim_text(true);
//! let mut writer = Writer::new(Cursor::new(Vec::new()));
-//! let mut buf = Vec::new();
//! loop {
-//! match reader.read_event_into(&mut buf) {
+//! match reader.read_event() {
//! Ok(Event::Start(ref e)) if e.name().as_ref() == b"this_tag" => {
//!
//! // crates a new element ... alternatively we could reuse `e` by calling
@@ -111,7 +114,6 @@
//! // Ok(e) => assert!(writer.write(&buf).is_ok()),
//! Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
//! }
-//! buf.clear();
//! }
//!
//! let result = writer.into_inner().into_inner();
@@ -156,5 +158,7 @@ mod writer;
#[cfg(feature = "serialize")]
pub use crate::errors::serialize::DeError;
pub use crate::errors::{Error, Result};
-pub use crate::reader::{Decoder, Reader};
+#[cfg(feature = "async")]
+pub use crate::reader::AsyncReader;
+pub use crate::reader::{Decoder, IoReader, Reader, SliceReader};
pub use crate::writer::{ElementWriter, Writer};
diff --git a/src/reader.rs b/src/reader.rs
index bb496932..ce14a08d 100644
--- a/src/reader.rs
+++ b/src/reader.rs
@@ -1,8 +1,8 @@
//! A module to handle `Reader`
use std::borrow::Cow;
-use std::io::{self, BufRead, BufReader};
-use std::{fs::File, path::Path, str::from_utf8};
+use std::ops::{Deref, DerefMut};
+use std::str::from_utf8;
#[cfg(feature = "encoding")]
use encoding_rs::{Encoding, UTF_16BE, UTF_16LE, UTF_8};
@@ -13,6 +13,16 @@ use crate::name::{LocalName, NamespaceResolver, QName, ResolveResult};
use memchr;
+#[cfg(feature = "async")]
+mod async_reader;
+mod io_reader;
+mod slice_reader;
+
+#[cfg(feature = "async")]
+pub use self::async_reader::AsyncReader;
+pub use self::io_reader::IoReader;
+pub use self::slice_reader::SliceReader;
+
/// Possible reader states. The state transition diagram (`true` and `false` shows
/// value of [`Reader::expand_empty_elements()`] option):
///
@@ -103,6 +113,15 @@ impl EncodingRef {
}
}
+/// A trait for the underlying abstracion handling the actual reading part for the [`Reader`].
+pub trait InnerReader: Deref + DerefMut {
+ /// The real type of the inner reader.
+ type Reader;
+
+ /// Consumes this abstration returning the underlying reader.
+ fn into_inner(self) -> Self::Reader;
+}
+
////////////////////////////////////////////////////////////////////////////////////////////////////
/// A low level encoding-agnostic XML event reader.
@@ -123,9 +142,8 @@ impl EncodingRef {
/// reader.trim_text(true);
/// let mut count = 0;
/// let mut txt = Vec::new();
-/// let mut buf = Vec::new();
/// loop {
-/// match reader.read_event_into(&mut buf) {
+/// match reader.read_event() {
/// Ok(Event::Start(ref e)) => {
/// match e.name().as_ref() {
/// b"tag1" => println!("attributes values: {:?}",
@@ -140,7 +158,6 @@ impl EncodingRef {
/// Ok(Event::Eof) => break,
/// _ => (),
/// }
-/// buf.clear();
/// }
/// ```
#[derive(Clone)]
@@ -200,7 +217,7 @@ pub struct Reader {
/// Builder methods
impl Reader {
/// Creates a `Reader` that reads from a given reader.
- pub fn from_reader(reader: R) -> Self {
+ fn from_reader_internal(reader: R) -> Self {
Self {
reader,
opened_buffer: Vec::new(),
@@ -323,7 +340,7 @@ impl Reader {
}
/// Getters
-impl Reader {
+impl> Reader {
/// Consumes `Reader` returning the underlying reader
///
/// Can be used to compute line and column of a parsing error position
@@ -333,7 +350,7 @@ impl Reader {
/// ```
/// # use pretty_assertions::assert_eq;
/// use std::{str, io::Cursor};
- /// use quick_xml::Reader;
+ /// use quick_xml::{IoReader, Reader};
/// use quick_xml::events::Event;
///
/// let xml = r#"
@@ -343,7 +360,7 @@ impl Reader {
/// let mut reader = Reader::from_reader(Cursor::new(xml.as_bytes()));
/// let mut buf = Vec::new();
///
- /// fn into_line_and_column(reader: Reader>) -> (usize, usize) {
+ /// fn into_line_and_column(reader: Reader>>) -> (usize, usize) {
/// let end_pos = reader.buffer_position();
/// let mut cursor = reader.into_inner();
/// let s = String::from_utf8(cursor.into_inner()[0..end_pos].to_owned())
@@ -378,7 +395,7 @@ impl Reader {
/// }
/// ```
pub fn into_inner(self) -> R {
- self.reader
+ self.reader.into_inner()
}
/// Gets a reference to the underlying reader.
@@ -390,7 +407,10 @@ impl Reader {
pub fn get_mut(&mut self) -> &mut R {
&mut self.reader
}
+}
+/// Getters that are not specific to any inner reader implementation
+impl Reader {
/// Gets the current byte position in the input data.
///
/// Useful when debugging errors.
@@ -461,424 +481,8 @@ impl Reader {
}
}
-/// Read methods
-impl Reader {
- /// Reads the next `Event`.
- ///
- /// This is the main entry point for reading XML `Event`s.
- ///
- /// `Event`s borrow `buf` and can be converted to own their data if needed (uses `Cow`
- /// internally).
- ///
- /// Having the possibility to control the internal buffers gives you some additional benefits
- /// such as:
- ///
- /// - Reduce the number of allocations by reusing the same buffer. For constrained systems,
- /// you can call `buf.clear()` once you are done with processing the event (typically at the
- /// end of your loop).
- /// - Reserve the buffer length if you know the file size (using `Vec::with_capacity`).
- ///
- /// # Examples
- ///
- /// ```
- /// use quick_xml::Reader;
- /// use quick_xml::events::Event;
- ///
- /// let xml = r#"
- /// Test
- /// Test 2
- /// "#;
- /// let mut reader = Reader::from_str(xml);
- /// reader.trim_text(true);
- /// let mut count = 0;
- /// let mut buf = Vec::new();
- /// let mut txt = Vec::new();
- /// loop {
- /// match reader.read_event_into(&mut buf) {
- /// Ok(Event::Start(ref e)) => count += 1,
- /// Ok(Event::Text(e)) => txt.push(e.decode_and_unescape(&reader).unwrap().into_owned()),
- /// Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
- /// Ok(Event::Eof) => break,
- /// _ => (),
- /// }
- /// buf.clear();
- /// }
- /// println!("Found {} start events", count);
- /// println!("Text events: {:?}", txt);
- /// ```
- #[inline]
- pub fn read_event_into<'b>(&mut self, buf: &'b mut Vec) -> Result> {
- self.read_event_impl(buf)
- }
-
- /// Reads the next event and resolves its namespace (if applicable).
- ///
- /// # Examples
- ///
- /// ```
- /// use std::str::from_utf8;
- /// use quick_xml::Reader;
- /// use quick_xml::events::Event;
- /// use quick_xml::name::ResolveResult::*;
- ///
- /// let xml = r#"
- /// Test
- /// Test 2
- /// "#;
- /// let mut reader = Reader::from_str(xml);
- /// reader.trim_text(true);
- /// let mut count = 0;
- /// let mut buf = Vec::new();
- /// let mut ns_buf = Vec::new();
- /// let mut txt = Vec::new();
- /// loop {
- /// match reader.read_namespaced_event(&mut buf, &mut ns_buf) {
- /// Ok((Bound(ns), Event::Start(e))) => {
- /// count += 1;
- /// match (ns.as_ref(), e.local_name().as_ref()) {
- /// (b"www.xxxx", b"tag1") => (),
- /// (b"www.yyyy", b"tag2") => (),
- /// (ns, n) => panic!("Namespace and local name mismatch"),
- /// }
- /// println!("Resolved namespace: {:?}", ns);
- /// }
- /// Ok((Unbound, Event::Start(_))) => {
- /// panic!("Element not in any namespace")
- /// },
- /// Ok((Unknown(p), Event::Start(_))) => {
- /// panic!("Undeclared namespace prefix {:?}", String::from_utf8(p))
- /// }
- /// Ok((_, Event::Text(e))) => {
- /// txt.push(e.decode_and_unescape(&reader).unwrap().into_owned())
- /// },
- /// Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
- /// Ok((_, Event::Eof)) => break,
- /// _ => (),
- /// }
- /// buf.clear();
- /// }
- /// println!("Found {} start events", count);
- /// println!("Text events: {:?}", txt);
- /// ```
- pub fn read_namespaced_event<'b, 'ns>(
- &mut self,
- buf: &'b mut Vec,
- namespace_buffer: &'ns mut Vec,
- ) -> Result<(ResolveResult<'ns>, Event<'b>)> {
- if self.pending_pop {
- self.ns_resolver.pop(namespace_buffer);
- }
- self.pending_pop = false;
- match self.read_event_into(buf) {
- Ok(Event::Eof) => Ok((ResolveResult::Unbound, Event::Eof)),
- Ok(Event::Start(e)) => {
- self.ns_resolver.push(&e, namespace_buffer);
- Ok((
- self.ns_resolver.find(e.name(), namespace_buffer),
- Event::Start(e),
- ))
- }
- Ok(Event::Empty(e)) => {
- // For empty elements we need to 'artificially' keep the namespace scope on the
- // stack until the next `next()` call occurs.
- // Otherwise the caller has no chance to use `resolve` in the context of the
- // namespace declarations that are 'in scope' for the empty element alone.
- // Ex:
- self.ns_resolver.push(&e, namespace_buffer);
- // notify next `read_namespaced_event()` invocation that it needs to pop this
- // namespace scope
- self.pending_pop = true;
- Ok((
- self.ns_resolver.find(e.name(), namespace_buffer),
- Event::Empty(e),
- ))
- }
- Ok(Event::End(e)) => {
- // notify next `read_namespaced_event()` invocation that it needs to pop this
- // namespace scope
- self.pending_pop = true;
- Ok((
- self.ns_resolver.find(e.name(), namespace_buffer),
- Event::End(e),
- ))
- }
- Ok(e) => Ok((ResolveResult::Unbound, e)),
- Err(e) => Err(e),
- }
- }
-
- /// Reads until end element is found using provided buffer as intermediate
- /// storage for events content. This function is supposed to be called after
- /// you already read a [`Start`] event.
- ///
- /// Manages nested cases where parent and child elements have the same name.
- ///
- /// If corresponding [`End`] event will not be found, the [`Error::UnexpectedEof`]
- /// will be returned. In particularly, that error will be returned if you call
- /// this method without consuming the corresponding [`Start`] event first.
- ///
- /// If your reader created from a string slice or byte array slice, it is
- /// better to use [`read_to_end()`] method, because it will not copy bytes
- /// into intermediate buffer.
- ///
- /// The provided `buf` buffer will be filled only by one event content at time.
- /// Before reading of each event the buffer will be cleared. If you know an
- /// appropriate size of each event, you can preallocate the buffer to reduce
- /// number of reallocations.
- ///
- /// The `end` parameter should contain name of the end element _in the reader
- /// encoding_. It is good practice to always get that parameter using
- /// [`BytesStart::to_end()`] method.
- ///
- /// The correctness of the skipped events does not checked, if you disabled
- /// the [`check_end_names`] option.
- ///
- /// # Namespaces
- ///
- /// While the [`Reader`] does not support namespace resolution, namespaces
- /// does not change the algorithm for comparing names. Although the names
- /// `a:name` and `b:name` where both prefixes `a` and `b` resolves to the
- /// same namespace, are semantically equivalent, `` cannot close
- /// ``, because according to [the specification]
- ///
- /// > The end of every element that begins with a **start-tag** MUST be marked
- /// > by an **end-tag** containing a name that echoes the element's type as
- /// > given in the **start-tag**
- ///
- /// # Examples
- ///
- /// This example shows, how you can skip XML content after you read the
- /// start event.
- ///
- /// ```
- /// # use pretty_assertions::assert_eq;
- /// use quick_xml::events::{BytesStart, Event};
- /// use quick_xml::Reader;
- ///
- /// let mut reader = Reader::from_str(r#"
- ///
- ///
- ///
- ///
- ///
- ///
- ///
- ///
- /// "#);
- /// reader.trim_text(true);
- /// let mut buf = Vec::new();
- ///
- /// let start = BytesStart::borrowed_name(b"outer");
- /// let end = start.to_end().into_owned();
- ///
- /// // First, we read a start event...
- /// assert_eq!(reader.read_event_into(&mut buf).unwrap(), Event::Start(start));
- ///
- /// //...then, we could skip all events to the corresponding end event.
- /// // This call will correctly handle nested elements.
- /// // Note, however, that this method does not handle namespaces.
- /// reader.read_to_end_into(end.name(), &mut buf).unwrap();
- ///
- /// // At the end we should get an Eof event, because we ate the whole XML
- /// assert_eq!(reader.read_event_into(&mut buf).unwrap(), Event::Eof);
- /// ```
- ///
- /// [`Start`]: Event::Start
- /// [`End`]: Event::End
- /// [`read_to_end()`]: Self::read_to_end
- /// [`check_end_names`]: Self::check_end_names
- /// [the specification]: https://www.w3.org/TR/xml11/#dt-etag
- pub fn read_to_end_into(&mut self, end: QName, buf: &mut Vec) -> Result<()> {
- let mut depth = 0;
- loop {
- buf.clear();
- match self.read_event_into(buf) {
- Err(e) => return Err(e),
-
- Ok(Event::Start(e)) if e.name() == end => depth += 1,
- Ok(Event::End(e)) if e.name() == end => {
- if depth == 0 {
- return Ok(());
- }
- depth -= 1;
- }
- Ok(Event::Eof) => {
- let name = self.decoder().decode(end.as_ref());
- return Err(Error::UnexpectedEof(format!("{:?}>", name)));
- }
- _ => (),
- }
- }
- }
-
- /// Reads optional text between start and end tags.
- ///
- /// If the next event is a [`Text`] event, returns the decoded and unescaped content as a
- /// `String`. If the next event is an [`End`] event, returns the empty string. In all other
- /// cases, returns an error.
- ///
- /// Any text will be decoded using the XML encoding specified in the XML declaration (or UTF-8
- /// if none is specified).
- ///
- /// # Examples
- ///
- /// ```
- /// # use pretty_assertions::assert_eq;
- /// use quick_xml::Reader;
- /// use quick_xml::events::Event;
- ///
- /// let mut xml = Reader::from_reader(b"
- /// <b>
- ///
- /// " as &[u8]);
- /// xml.trim_text(true);
- ///
- /// let expected = ["", ""];
- /// for &content in expected.iter() {
- /// match xml.read_event_into(&mut Vec::new()) {
- /// Ok(Event::Start(ref e)) => {
- /// assert_eq!(&xml.read_text_into(e.name(), &mut Vec::new()).unwrap(), content);
- /// },
- /// e => panic!("Expecting Start event, found {:?}", e),
- /// }
- /// }
- /// ```
- ///
- /// [`Text`]: Event::Text
- /// [`End`]: Event::End
- pub fn read_text_into(&mut self, end: QName, buf: &mut Vec) -> Result {
- let s = match self.read_event_into(buf) {
- Err(e) => return Err(e),
-
- Ok(Event::Text(e)) => e.decode_and_unescape(self)?.into_owned(),
- Ok(Event::End(e)) if e.name() == end => return Ok("".to_string()),
- Ok(Event::Eof) => return Err(Error::UnexpectedEof("Text".to_string())),
- _ => return Err(Error::TextNotFound),
- };
- self.read_to_end_into(end, buf)?;
- Ok(s)
- }
-}
-
-/// Private methods
+/// Common parsing code for all reader implementations.
impl Reader {
- /// Read text into the given buffer, and return an event that borrows from
- /// either that buffer or from the input itself, based on the type of the
- /// reader.
- fn read_event_impl<'i, B>(&mut self, buf: B) -> Result>
- where
- R: XmlSource<'i, B>,
- {
- let event = match self.tag_state {
- TagState::Init => self.read_until_open(buf, true),
- TagState::Closed => self.read_until_open(buf, false),
- TagState::Opened => self.read_until_close(buf),
- TagState::Empty => self.close_expanded_empty(),
- TagState::Exit => return Ok(Event::Eof),
- };
- match event {
- Err(_) | Ok(Event::Eof) => self.tag_state = TagState::Exit,
- _ => {}
- }
- event
- }
-
- /// Read until '<' is found and moves reader to an `Opened` state.
- ///
- /// Return a `StartText` event if `first` is `true` and a `Text` event otherwise
- fn read_until_open<'i, B>(&mut self, buf: B, first: bool) -> Result>
- where
- R: XmlSource<'i, B>,
- {
- self.tag_state = TagState::Opened;
-
- if self.trim_text_start {
- self.reader.skip_whitespace(&mut self.buf_position)?;
- }
-
- // If we already at the `<` symbol, do not try to return an empty Text event
- if self.reader.skip_one(b'<', &mut self.buf_position)? {
- return self.read_event_impl(buf);
- }
-
- match self
- .reader
- .read_bytes_until(b'<', buf, &mut self.buf_position)
- {
- Ok(Some(bytes)) => {
- #[cfg(feature = "encoding")]
- if first && self.encoding.can_be_refined() {
- if let Some(encoding) = detect_encoding(bytes) {
- self.encoding = EncodingRef::BomDetected(encoding);
- }
- }
-
- let content = if self.trim_text_end {
- // Skip the ending '<
- let len = bytes
- .iter()
- .rposition(|&b| !is_whitespace(b))
- .map_or_else(|| bytes.len(), |p| p + 1);
- &bytes[..len]
- } else {
- bytes
- };
-
- Ok(if first {
- Event::StartText(BytesText::from_escaped(content).into())
- } else {
- Event::Text(BytesText::from_escaped(content))
- })
- }
- Ok(None) => Ok(Event::Eof),
- Err(e) => Err(e),
- }
- }
-
- /// Private function to read until `>` is found. This function expects that
- /// it was called just after encounter a `<` symbol.
- fn read_until_close<'i, B>(&mut self, buf: B) -> Result>
- where
- R: XmlSource<'i, B>,
- {
- self.tag_state = TagState::Closed;
-
- match self.reader.peek_one() {
- // ` match self.reader.read_bang_element(buf, &mut self.buf_position) {
- Ok(None) => Ok(Event::Eof),
- Ok(Some((bang_type, bytes))) => self.read_bang(bang_type, bytes),
- Err(e) => Err(e),
- },
- // `` - closing tag
- Ok(Some(b'/')) => match self
- .reader
- .read_bytes_until(b'>', buf, &mut self.buf_position)
- {
- Ok(None) => Ok(Event::Eof),
- Ok(Some(bytes)) => self.read_end(bytes),
- Err(e) => Err(e),
- },
- // `` - processing instruction
- Ok(Some(b'?')) => match self
- .reader
- .read_bytes_until(b'>', buf, &mut self.buf_position)
- {
- Ok(None) => Ok(Event::Eof),
- Ok(Some(bytes)) => self.read_question_mark(bytes),
- Err(e) => Err(e),
- },
- // `<...` - opening or self-closed tag
- Ok(Some(_)) => match self.reader.read_element(buf, &mut self.buf_position) {
- Ok(None) => Ok(Event::Eof),
- Ok(Some(bytes)) => self.read_start(bytes),
- Err(e) => Err(e),
- },
- Ok(None) => Ok(Event::Eof),
- Err(e) => Err(e),
- }
- }
-
/// reads `BytesElement` starting with a `!`,
/// return `Comment`, `CData` or `DocType` event
fn read_bang<'b>(&mut self, bang_type: BangType, buf: &'b [u8]) -> Result> {
@@ -1015,519 +619,49 @@ impl Reader {
Ok(Event::Start(BytesStart::borrowed(buf, name_end)))
}
}
-}
-impl Reader> {
- /// Creates an XML reader from a file path.
- pub fn from_file>(path: P) -> Result {
- let file = File::open(path).map_err(Error::Io)?;
- let reader = BufReader::new(file);
- Ok(Self::from_reader(reader))
- }
-}
-
-impl<'a> Reader<&'a [u8]> {
- /// Creates an XML reader from a string slice.
- pub fn from_str(s: &'a str) -> Self {
- // Rust strings are guaranteed to be UTF-8, so lock the encoding
- #[cfg(feature = "encoding")]
- {
- let mut reader = Self::from_reader(s.as_bytes());
- reader.encoding = EncodingRef::Explicit(UTF_8);
- reader
- }
-
- #[cfg(not(feature = "encoding"))]
- Self::from_reader(s.as_bytes())
- }
-
- /// Creates an XML reader from a slice of bytes.
- pub fn from_bytes(s: &'a [u8]) -> Self {
- Self::from_reader(s)
- }
-
- /// Read an event that borrows from the input rather than a buffer.
- #[inline]
- pub fn read_event(&mut self) -> Result> {
- self.read_event_impl(())
- }
-
- /// Reads until end element is found. This function is supposed to be called
- /// after you already read a [`Start`] event.
- ///
- /// Manages nested cases where parent and child elements have the same name.
- ///
- /// If corresponding [`End`] event will not be found, the [`Error::UnexpectedEof`]
- /// will be returned. In particularly, that error will be returned if you call
- /// this method without consuming the corresponding [`Start`] event first.
- ///
- /// The `end` parameter should contain name of the end element _in the reader
- /// encoding_. It is good practice to always get that parameter using
- /// [`BytesStart::to_end()`] method.
- ///
- /// The correctness of the skipped events does not checked, if you disabled
- /// the [`check_end_names`] option.
- ///
- /// # Namespaces
- ///
- /// While the [`Reader`] does not support namespace resolution, namespaces
- /// does not change the algorithm for comparing names. Although the names
- /// `a:name` and `b:name` where both prefixes `a` and `b` resolves to the
- /// same namespace, are semantically equivalent, `` cannot close
- /// ``, because according to [the specification]
- ///
- /// > The end of every element that begins with a **start-tag** MUST be marked
- /// > by an **end-tag** containing a name that echoes the element's type as
- /// > given in the **start-tag**
- ///
- /// # Examples
- ///
- /// This example shows, how you can skip XML content after you read the
- /// start event.
- ///
- /// ```
- /// # use pretty_assertions::assert_eq;
- /// use quick_xml::events::{BytesStart, Event};
- /// use quick_xml::Reader;
- ///
- /// let mut reader = Reader::from_str(r#"
- ///
- ///
- ///
- ///
- ///
- ///
- ///
- ///
- /// "#);
- /// reader.trim_text(true);
- ///
- /// let start = BytesStart::borrowed_name(b"outer");
- /// let end = start.to_end().into_owned();
- ///
- /// // First, we read a start event...
- /// assert_eq!(reader.read_event().unwrap(), Event::Start(start));
- ///
- /// //...then, we could skip all events to the corresponding end event.
- /// // This call will correctly handle nested elements.
- /// // Note, however, that this method does not handle namespaces.
- /// reader.read_to_end(end.name()).unwrap();
- ///
- /// // At the end we should get an Eof event, because we ate the whole XML
- /// assert_eq!(reader.read_event().unwrap(), Event::Eof);
- /// ```
- ///
- /// [`Start`]: Event::Start
- /// [`End`]: Event::End
- /// [`check_end_names`]: Self::check_end_names
- /// [the specification]: https://www.w3.org/TR/xml11/#dt-etag
- pub fn read_to_end(&mut self, end: QName) -> Result<()> {
- let mut depth = 0;
- loop {
- match self.read_event() {
- Err(e) => return Err(e),
-
- Ok(Event::Start(e)) if e.name() == end => depth += 1,
- Ok(Event::End(e)) if e.name() == end => {
- if depth == 0 {
- return Ok(());
- }
- depth -= 1;
- }
- Ok(Event::Eof) => {
- let name = self.decoder().decode(end.as_ref());
- return Err(Error::UnexpectedEof(format!("{:?}>", name)));
- }
- _ => (),
- }
- }
- }
-}
-
-/// Represents an input for a reader that can return borrowed data.
-///
-/// There are two implementors of this trait: generic one that read data from
-/// `Self`, copies some part of it into a provided buffer of type `B` and then
-/// returns data that borrow from that buffer.
-///
-/// The other implementor is for `&[u8]` and instead of copying data returns
-/// borrowed data from `Self` instead. This implementation allows zero-copy
-/// deserialization.
-///
-/// # Parameters
-/// - `'r`: lifetime of a buffer from which events will borrow
-/// - `B`: a type of a buffer that can be used to store data read from `Self` and
-/// from which events can borrow
-trait XmlSource<'r, B> {
- /// Read input until `byte` is found or end of input is reached.
- ///
- /// Returns a slice of data read up to `byte`, which does not include into result.
- /// If input (`Self`) is exhausted, returns `None`.
- ///
- /// # Example
- ///
- /// ```ignore
- /// let mut position = 0;
- /// let mut input = b"abc*def".as_ref();
- /// // ^= 4
- ///
- /// assert_eq!(
- /// input.read_bytes_until(b'*', (), &mut position).unwrap(),
- /// Some(b"abc".as_ref())
- /// );
- /// assert_eq!(position, 4); // position after the symbol matched
- /// ```
- ///
- /// # Parameters
- /// - `byte`: Byte for search
- /// - `buf`: Buffer that could be filled from an input (`Self`) and
- /// from which [events] could borrow their data
- /// - `position`: Will be increased by amount of bytes consumed
- ///
- /// [events]: crate::events::Event
- fn read_bytes_until(
- &mut self,
- byte: u8,
- buf: B,
- position: &mut usize,
- ) -> Result