Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Split Reader into SliceReader and BufferedReader #425

Closed
wants to merge 8 commits into from
4 changes: 4 additions & 0 deletions Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,9 @@
- [#423]: All escaping functions now accepts and returns strings instead of byte slices
- [#423]: Removed `BytesText::from_plain` because it internally did escaping of a byte array,
but since now escaping works on strings. Use `BytesText::from_plain_str` instead
- [#425]: Split the internal implementation of `Reader` into multiple files to better separate the
buffered and unbuffered implementations. The buffered methods, e.g. `read_event_into(&mut buf)`,
will no longer be available when reading from a slice.

### New Tests

Expand Down Expand Up @@ -167,6 +170,7 @@
[#418]: https://github.com/tafia/quick-xml/pull/418
[#421]: https://github.com/tafia/quick-xml/pull/421
[#423]: https://github.com/tafia/quick-xml/pull/423
[#425]: https://github.com/tafia/quick-xml/pull/425

## 0.23.0 -- 2022-05-08

Expand Down
6 changes: 5 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,11 @@ let xml = r#"<tag1 att1 = "test">
<tag2><!--Test comment-->Test</tag2>
<tag2>Test 2</tag2>
</tag1>"#;
let mut reader = Reader::from_str(xml);
let mut reader = Reader::from_reader(xml.as_bytes());
// If you want to read from a string or byte slice without buffering, use:
// let mut reader = Reader::from_str(xml);
// In that case, `Vec` is *not* needed for buffering below and you should use
// `read_event` instead of `read_event_into`.
reader.trim_text(true);

let mut count = 0;
Expand Down
2 changes: 1 addition & 1 deletion benches/macrobenches.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ static PLAYERS: &[u8] = include_bytes!("../tests/documents/players.xml");
// TODO: read the namespaces too
// TODO: use fully normalized attribute values
fn parse_document(doc: &[u8]) -> XmlResult<()> {
let mut r = Reader::from_reader(doc);
let mut r = Reader::from_bytes(doc);
loop {
match r.read_event()? {
Event::Start(e) | Event::Empty(e) => {
Expand Down
70 changes: 22 additions & 48 deletions benches/microbenches.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,17 +29,15 @@ fn read_event(c: &mut Criterion) {
let mut group = c.benchmark_group("read_event");
group.bench_function("trim_text = false", |b| {
b.iter(|| {
let mut r = Reader::from_reader(SAMPLE);
let mut r = Reader::from_bytes(SAMPLE);
r.check_end_names(false).check_comments(false);
let mut count = criterion::black_box(0);
let mut buf = Vec::new();
loop {
match r.read_event_into(&mut buf) {
match r.read_event() {
Ok(Event::Start(_)) | Ok(Event::Empty(_)) => count += 1,
Ok(Event::Eof) => break,
_ => (),
}
buf.clear();
}
assert_eq!(
count, 1550,
Expand All @@ -50,19 +48,17 @@ fn read_event(c: &mut Criterion) {

group.bench_function("trim_text = true", |b| {
b.iter(|| {
let mut r = Reader::from_reader(SAMPLE);
let mut r = Reader::from_bytes(SAMPLE);
r.check_end_names(false)
.check_comments(false)
.trim_text(true);
let mut count = criterion::black_box(0);
let mut buf = Vec::new();
loop {
match r.read_event_into(&mut buf) {
match r.read_event() {
Ok(Event::Start(_)) | Ok(Event::Empty(_)) => count += 1,
Ok(Event::Eof) => break,
_ => (),
}
buf.clear();
}
assert_eq!(
count, 1550,
Expand All @@ -79,18 +75,16 @@ fn read_namespaced_event(c: &mut Criterion) {
let mut group = c.benchmark_group("read_namespaced_event");
group.bench_function("trim_text = false", |b| {
b.iter(|| {
let mut r = Reader::from_reader(SAMPLE);
let mut r = Reader::from_bytes(SAMPLE);
r.check_end_names(false).check_comments(false);
let mut count = criterion::black_box(0);
let mut buf = Vec::new();
let mut ns_buf = Vec::new();
loop {
match r.read_namespaced_event(&mut buf, &mut ns_buf) {
match r.read_namespaced_event(&mut ns_buf) {
Ok((_, Event::Start(_))) | Ok((_, Event::Empty(_))) => count += 1,
Ok((_, Event::Eof)) => break,
_ => (),
}
buf.clear();
}
assert_eq!(
count, 1550,
Expand All @@ -101,20 +95,18 @@ fn read_namespaced_event(c: &mut Criterion) {

group.bench_function("trim_text = true", |b| {
b.iter(|| {
let mut r = Reader::from_reader(SAMPLE);
let mut r = Reader::from_bytes(SAMPLE);
r.check_end_names(false)
.check_comments(false)
.trim_text(true);
let mut count = criterion::black_box(0);
let mut buf = Vec::new();
let mut ns_buf = Vec::new();
loop {
match r.read_namespaced_event(&mut buf, &mut ns_buf) {
match r.read_namespaced_event(&mut ns_buf) {
Ok((_, Event::Start(_))) | Ok((_, Event::Empty(_))) => count += 1,
Ok((_, Event::Eof)) => break,
_ => (),
}
buf.clear();
}
assert_eq!(
count, 1550,
Expand All @@ -130,78 +122,66 @@ fn one_event(c: &mut Criterion) {
let mut group = c.benchmark_group("One event");
group.bench_function("StartText", |b| {
let src = "Hello world!".repeat(512 / 12).into_bytes();
let mut buf = Vec::with_capacity(1024);
b.iter(|| {
let mut r = Reader::from_reader(src.as_ref());
let mut r = Reader::from_bytes(src.as_ref());
let mut nbtxt = criterion::black_box(0);
r.check_end_names(false).check_comments(false);
match r.read_event_into(&mut buf) {
match r.read_event() {
Ok(Event::StartText(e)) => nbtxt += e.len(),
something_else => panic!("Did not expect {:?}", something_else),
};

buf.clear();

assert_eq!(nbtxt, 504);
})
});

group.bench_function("Start", |b| {
let src = format!(r#"<hello target="{}">"#, "world".repeat(512 / 5)).into_bytes();
let mut buf = Vec::with_capacity(1024);
b.iter(|| {
let mut r = Reader::from_reader(src.as_ref());
let mut r = Reader::from_bytes(src.as_ref());
let mut nbtxt = criterion::black_box(0);
r.check_end_names(false)
.check_comments(false)
.trim_text(true);
match r.read_event_into(&mut buf) {
match r.read_event() {
Ok(Event::Start(ref e)) => nbtxt += e.len(),
something_else => panic!("Did not expect {:?}", something_else),
};

buf.clear();

assert_eq!(nbtxt, 525);
})
});

group.bench_function("Comment", |b| {
let src = format!(r#"<!-- hello "{}" -->"#, "world".repeat(512 / 5)).into_bytes();
let mut buf = Vec::with_capacity(1024);
b.iter(|| {
let mut r = Reader::from_reader(src.as_ref());
let mut r = Reader::from_bytes(src.as_ref());
let mut nbtxt = criterion::black_box(0);
r.check_end_names(false)
.check_comments(false)
.trim_text(true);
match r.read_event_into(&mut buf) {
match r.read_event() {
Ok(Event::Comment(e)) => nbtxt += e.decode_and_unescape(&r).unwrap().len(),
something_else => panic!("Did not expect {:?}", something_else),
};

buf.clear();

assert_eq!(nbtxt, 520);
})
});

group.bench_function("CData", |b| {
let src = format!(r#"<![CDATA[hello "{}"]]>"#, "world".repeat(512 / 5)).into_bytes();
let mut buf = Vec::with_capacity(1024);
b.iter(|| {
let mut r = Reader::from_reader(src.as_ref());
let mut r = Reader::from_bytes(src.as_ref());
let mut nbtxt = criterion::black_box(0);
r.check_end_names(false)
.check_comments(false)
.trim_text(true);
match r.read_event_into(&mut buf) {
match r.read_event() {
Ok(Event::CData(ref e)) => nbtxt += e.len(),
something_else => panic!("Did not expect {:?}", something_else),
};

buf.clear();

assert_eq!(nbtxt, 518);
})
});
Expand All @@ -213,12 +193,11 @@ fn attributes(c: &mut Criterion) {
let mut group = c.benchmark_group("attributes");
group.bench_function("with_checks = true", |b| {
b.iter(|| {
let mut r = Reader::from_reader(PLAYERS);
let mut r = Reader::from_bytes(PLAYERS);
r.check_end_names(false).check_comments(false);
let mut count = criterion::black_box(0);
let mut buf = Vec::new();
loop {
match r.read_event_into(&mut buf) {
match r.read_event() {
Ok(Event::Empty(e)) => {
for attr in e.attributes() {
let _attr = attr.unwrap();
Expand All @@ -228,20 +207,18 @@ fn attributes(c: &mut Criterion) {
Ok(Event::Eof) => break,
_ => (),
}
buf.clear();
}
assert_eq!(count, 1041);
})
});

group.bench_function("with_checks = false", |b| {
b.iter(|| {
let mut r = Reader::from_reader(PLAYERS);
let mut r = Reader::from_bytes(PLAYERS);
r.check_end_names(false).check_comments(false);
let mut count = criterion::black_box(0);
let mut buf = Vec::new();
loop {
match r.read_event_into(&mut buf) {
match r.read_event() {
Ok(Event::Empty(e)) => {
for attr in e.attributes().with_checks(false) {
let _attr = attr.unwrap();
Expand All @@ -251,20 +228,18 @@ fn attributes(c: &mut Criterion) {
Ok(Event::Eof) => break,
_ => (),
}
buf.clear();
}
assert_eq!(count, 1041);
})
});

group.bench_function("try_get_attribute", |b| {
b.iter(|| {
let mut r = Reader::from_reader(PLAYERS);
let mut r = Reader::from_bytes(PLAYERS);
r.check_end_names(false).check_comments(false);
let mut count = criterion::black_box(0);
let mut buf = Vec::new();
loop {
match r.read_event_into(&mut buf) {
match r.read_event() {
Ok(Event::Empty(e)) if e.name() == QName(b"player") => {
for name in ["num", "status", "avg"] {
if let Some(_attr) = e.try_get_attribute(name).unwrap() {
Expand All @@ -279,7 +254,6 @@ fn attributes(c: &mut Criterion) {
Ok(Event::Eof) => break,
_ => (),
}
buf.clear();
}
assert_eq!(count, 150);
})
Expand Down
34 changes: 34 additions & 0 deletions examples/read_buffered.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// This example demonstrates how a reader (for example when reading from a file)
// can be buffered. In that case, data read from the file is written to a supplied
// buffer and returned XML events borrow from that buffer.
// That way, allocations can be kept to a minimum.

fn main() -> Result<(), quick_xml::Error> {
use quick_xml::events::Event;
use quick_xml::Reader;

let mut reader = Reader::from_file("tests/documents/document.xml")?;
reader.trim_text(true);

let mut buf = Vec::new();

let mut count = 0;

loop {
match reader.read_event_into(&mut buf) {
Ok(Event::Start(ref e)) => {
let name = e.name();
let name = reader.decoder().decode(name.as_ref())?;
println!("read start event {:?}", name.as_ref());
count += 1;
}
Ok(Event::Eof) => break, // exits the loop when reaching end of file
Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
_ => (), // There are several other `Event`s we do not consider here
}
}

println!("read {} start events in total", count);

Ok(())
}
6 changes: 2 additions & 4 deletions examples/read_texts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,13 @@ fn main() {
reader.trim_text(true);

let mut txt = Vec::new();
let mut buf = Vec::new();

loop {
match reader.read_event_into(&mut buf) {
match reader.read_event() {
Ok(Event::Start(ref e)) if e.name().as_ref() == b"tag2" => {
txt.push(
reader
.read_text_into(QName(b"tag2"), &mut Vec::new())
.read_text(QName(b"tag2"))
.expect("Cannot decode text value"),
);
println!("{:?}", txt);
Expand All @@ -26,6 +25,5 @@ fn main() {
Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
_ => (), // There are several other `Event`s we do not consider here
}
buf.clear();
}
}
6 changes: 3 additions & 3 deletions src/de/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -695,7 +695,7 @@ impl<'de> Deserializer<'de, SliceReader<'de>> {

/// Create new deserializer that will borrow data from the specified borrowing reader
#[inline]
fn from_borrowing_reader(mut reader: Reader<&'de [u8]>) -> Self {
fn from_borrowing_reader(mut reader: Reader<crate::SliceReader<'de>>) -> Self {
reader
.expand_empty_elements(true)
.check_end_names(true)
Expand Down Expand Up @@ -930,7 +930,7 @@ pub trait XmlRead<'i> {
/// You cannot create it, it is created automatically when you call
/// [`Deserializer::from_reader`]
pub struct IoReader<R: BufRead> {
reader: Reader<R>,
reader: Reader<crate::BufferedReader<R>>,
buf: Vec<u8>,
}

Expand Down Expand Up @@ -975,7 +975,7 @@ impl<'i, R: BufRead> XmlRead<'i> for IoReader<R> {
/// You cannot create it, it is created automatically when you call
/// [`Deserializer::from_str`] or [`Deserializer::from_slice`]
pub struct SliceReader<'de> {
reader: Reader<&'de [u8]>,
reader: Reader<crate::SliceReader<'de>>,
}

impl<'de> XmlRead<'de> for SliceReader<'de> {
Expand Down
2 changes: 1 addition & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,5 +65,5 @@ mod writer;
#[cfg(feature = "serialize")]
pub use crate::errors::serialize::DeError;
pub use crate::errors::{Error, Result};
pub use crate::reader::{Decoder, Reader};
pub use crate::reader::{BufferedReader, Decoder, Reader, SliceReader};
pub use crate::writer::{ElementWriter, Writer};
Loading