Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow using tokio's AsyncBufRead [Rebased] #314

Closed
wants to merge 9 commits into from
Closed
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,17 @@ license = "MIT"
travis-ci = { repository = "tafia/quick-xml" }

[dependencies]
async-recursion = { version = "0.3.2", optional = true }
encoding_rs = { version = "0.8.26", optional = true }
tokio = { version = "1.4.0", features = ["fs", "io-util"], optional = true }
serde = { version = "1.0", optional = true }
memchr = "2.3.4"

[dev-dependencies]
serde = { version = "1.0", features = ["derive"] }
serde-value = "0.7"
regex = "1"
tokio = { version = "1.4.0", features = ["macros", "rt-multi-thread"] }

[lib]
bench = false
Expand All @@ -33,6 +36,7 @@ default = []
encoding = ["encoding_rs"]
serialize = ["serde"]
escape-html = []
asynchronous = ["tokio", "async-recursion"]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd prefer to name it async


[package.metadata.docs.rs]
features = ["serialize"]
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -213,8 +213,8 @@ fn crates_io() -> Result<Html, DeError> {

### Credits

This has largely been inspired by [serde-xml-rs](https://github.com/RReverser/serde-xml-rs).
quick-xml follows its convention for deserialization, including the
This has largely been inspired by [serde-xml-rs](https://github.com/RReverser/serde-xml-rs).
quick-xml follows its convention for deserialization, including the
[`$value`](https://github.com/RReverser/serde-xml-rs#parsing-the-value-of-a-tag) special name.

### Parsing the "value" of a tag
Expand Down Expand Up @@ -251,6 +251,7 @@ Note that despite not focusing on performance (there are several unecessary copi

- `encoding`: support non utf8 xmls
- `serialize`: support serde `Serialize`/`Deserialize`
- `asynchronous`: support for `AsyncRead`s in `tokio`

## Performance

Expand Down
70 changes: 67 additions & 3 deletions examples/custom_entities.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,13 @@ extern crate quick_xml;
extern crate regex;

use quick_xml::events::Event;
#[cfg(feature = "asynchronous")]
use quick_xml::AsyncReader;
use quick_xml::Reader;
use regex::bytes::Regex;
use std::collections::HashMap;
#[cfg(feature = "asynchronous")]
use tokio::runtime::Runtime;

const DATA: &str = r#"

Expand All @@ -25,16 +29,18 @@ const DATA: &str = r#"

"#;

fn main() -> Result<(), Box<dyn std::error::Error>> {
let mut reader = Reader::from_str(DATA);
fn custom_entities(data: &str) -> Result<(), Box<dyn std::error::Error>> {
let mut reader = Reader::from_str(data);
reader.trim_text(true);

let mut buf = Vec::new();
let mut custom_entities = HashMap::new();
let entity_re = Regex::new(r#"<!ENTITY\s+([^ \t\r\n]+)\s+"([^"]*)"\s*>"#)?;

loop {
match reader.read_event(&mut buf) {
let event = reader.read_event(&mut buf);

match event {
Ok(Event::DocType(ref e)) => {
for cap in entity_re.captures_iter(&e) {
custom_entities.insert(cap[1].to_vec(), cap[2].to_vec());
Expand Down Expand Up @@ -69,3 +75,61 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
}
Ok(())
}

#[cfg(feature = "asynchronous")]
async fn custom_entities_async(data: &str) -> Result<(), Box<dyn std::error::Error>> {
let mut reader = AsyncReader::from_str(data);
reader.trim_text(true);

let mut buf = Vec::new();
let mut custom_entities = HashMap::new();
let entity_re = Regex::new(r#"<!ENTITY\s+([^ \t\r\n]+)\s+"([^"]*)"\s*>"#)?;

loop {
match reader.read_event(&mut buf).await {
Ok(Event::DocType(ref e)) => {
for cap in entity_re.captures_iter(&e) {
custom_entities.insert(cap[1].to_vec(), cap[2].to_vec());
}
}
Ok(Event::Start(ref e)) => match e.name() {
b"test" => println!(
"attributes values: {:?}",
e.attributes()
.map(|a| a
.unwrap()
.unescape_and_decode_value_with_custom_entities(
&reader,
&custom_entities
)
.unwrap())
.collect::<Vec<_>>()
),
_ => (),
},
Ok(Event::Text(ref e)) => {
println!(
"text value: {}",
e.unescape_and_decode_with_custom_entities(&reader, &custom_entities)
.unwrap()
);
}
Ok(Event::Eof) => break,
Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
_ => (),
}
}
Ok(())
}

fn main() -> Result<(), Box<dyn std::error::Error>> {
custom_entities(DATA)?;

#[cfg(feature = "asynchronous")]
let runtime = Runtime::new().expect("Runtime cannot be initialized");

#[cfg(feature = "asynchronous")]
runtime.block_on(async { custom_entities_async(DATA).await })?;

Ok(())
}
83 changes: 72 additions & 11 deletions examples/issue68.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
#![allow(unused)]

extern crate quick_xml;

use quick_xml::events::Event;
#[cfg(feature = "asynchronous")]
use quick_xml::AsyncReader;
use quick_xml::Reader;
use std::io::Read;
#[cfg(feature = "asynchronous")]
use tokio::runtime::Runtime;

struct Resource {
etag: String,
Expand Down Expand Up @@ -55,6 +57,68 @@ impl Response {
}
}

#[derive(Clone, Copy)]
enum State {
Root,
MultiStatus,
Response,
Success,
Error,
}

#[cfg(feature = "asynchronous")]
async fn parse_report_async(xml_data: &str) -> Vec<Resource> {
let result = Vec::<Resource>::new();

let mut reader = AsyncReader::from_str(xml_data);
reader.trim_text(true);

let mut count = 0;
let mut buf = Vec::new();
let mut ns_buffer = Vec::new();

let mut responses = Vec::<Response>::new();
let mut current_response = Response::new();
let mut current_prop = Prop::new();

let mut depth = 0;
let mut state = State::MultiStatus;

loop {
match reader.read_namespaced_event(&mut buf, &mut ns_buffer).await {
Ok((namespace_value, Event::Start(e))) => {
let namespace_value = namespace_value.unwrap_or_default();
match (depth, state, namespace_value, e.local_name()) {
(0, State::Root, b"DAV:", b"multistatus") => state = State::MultiStatus,
(1, State::MultiStatus, b"DAV:", b"response") => {
state = State::Response;
current_response = Response::new();
}
(2, State::Response, b"DAV:", b"href") => {
current_response.href = e.unescape_and_decode(&reader).unwrap();
}
_ => {}
}
depth += 1;
}
Ok((namespace_value, Event::End(e))) => {
let namespace_value = namespace_value.unwrap_or_default();
let local_name = e.local_name();
match (depth, state, &*namespace_value, local_name) {
(1, State::MultiStatus, b"DAV:", b"multistatus") => state = State::Root,
(2, State::MultiStatus, b"DAV:", b"multistatus") => state = State::MultiStatus,
_ => {}
}
depth -= 1;
}
Ok((_, Event::Eof)) => break,
Err(e) => break,
_ => (),
}
}
result
}

fn parse_report(xml_data: &str) -> Vec<Resource> {
let result = Vec::<Resource>::new();

Expand All @@ -65,15 +129,6 @@ fn parse_report(xml_data: &str) -> Vec<Resource> {
let mut buf = Vec::new();
let mut ns_buffer = Vec::new();

#[derive(Clone, Copy)]
enum State {
Root,
MultiStatus,
Response,
Success,
Error,
};

let mut responses = Vec::<Response>::new();
let mut current_response = Response::new();
let mut current_prop = Prop::new();
Expand Down Expand Up @@ -138,4 +193,10 @@ fn main() {
"#;

parse_report(test_data);

#[cfg(feature = "asynchronous")]
let runtime = Runtime::new().expect("Runtime cannot be initialized");

#[cfg(feature = "asynchronous")]
runtime.block_on(async { parse_report_async(test_data).await });
}
97 changes: 92 additions & 5 deletions examples/nested_readers.rs
Original file line number Diff line number Diff line change
@@ -1,22 +1,26 @@
extern crate quick_xml;
use quick_xml::events::Event;
#[cfg(feature = "asynchronous")]
use quick_xml::AsyncReader;
use quick_xml::Reader;
#[cfg(feature = "asynchronous")]
use tokio::runtime::Runtime;

// a structure to capture the rows we've extracted
// from a ECMA-376 table in document.xml
#[derive(Debug, Clone)]
struct TableStat {
index: u8,
rows: Vec<Vec<String>>,
}
// demonstrate how to nest readers
// This is useful for when you need to traverse
// a few levels of a document to extract things.
fn main() -> Result<(), quick_xml::Error> {

fn nest_readers() -> Result<(), quick_xml::Error> {
let mut buf = Vec::new();
// buffer for nested reader
let mut skip_buf = Vec::new();
let mut count = 0;

let mut reader = Reader::from_file("tests/documents/document.xml")?;

let mut found_tables = Vec::new();
loop {
match reader.read_event(&mut buf)? {
Expand All @@ -32,6 +36,7 @@ fn main() -> Result<(), quick_xml::Error> {
let mut row_index = 0;
loop {
skip_buf.clear();

match reader.read_event(&mut skip_buf)? {
Event::Start(element) => match element.name() {
b"w:tr" => {
Expand Down Expand Up @@ -73,3 +78,85 @@ fn main() -> Result<(), quick_xml::Error> {
assert_eq!(found_tables[1].rows[1].len(), 4);
Ok(())
}

#[cfg(feature = "asynchronous")]
async fn nest_readers_async() -> Result<(), quick_xml::Error> {
let mut buf = Vec::new();
// buffer for nested reader
let mut skip_buf = Vec::new();
let mut count = 0;

let mut reader = AsyncReader::from_file("tests/documents/document.xml").await?;

let mut found_tables = Vec::new();
loop {
match reader.read_event(&mut buf).await? {
Event::Start(element) => match element.name() {
b"w:tbl" => {
count += 1;
let mut stats = TableStat {
index: count,
rows: vec![],
};
// must define stateful variables
// outside the nested loop else they are overwritten
let mut row_index = 0;
loop {
skip_buf.clear();

match reader.read_event(&mut skip_buf).await? {
Event::Start(element) => match element.name() {
b"w:tr" => {
stats.rows.push(vec![]);
row_index = stats.rows.len() - 1;
}
b"w:tc" => {
stats.rows[row_index]
.push(String::from_utf8(element.name().to_vec()).unwrap());
}
_ => {}
},
Event::End(element) => {
if element.name() == b"w:tbl" {
found_tables.push(stats);
break;
}
}
_ => {}
}
}
}
_ => {}
},
Event::Eof => break,
_ => {}
}
buf.clear();
}
assert_eq!(found_tables.len(), 2);
// pretty print the table
println!("{:#?}", found_tables);
assert_eq!(found_tables[0].rows.len(), 2);
assert_eq!(found_tables[0].rows[0].len(), 4);
assert_eq!(found_tables[0].rows[1].len(), 4);

assert_eq!(found_tables[1].rows.len(), 2);
assert_eq!(found_tables[1].rows[0].len(), 4);
assert_eq!(found_tables[1].rows[1].len(), 4);
Ok(())
}

// demonstrate how to nest readers
// This is useful for when you need to traverse
// a few levels of a document to extract things.
fn main() -> Result<(), quick_xml::Error> {
#[cfg(feature = "asynchronous")]
let runtime = Runtime::new().expect("Runtime cannot be initialized");

#[cfg(feature = "asynchronous")]
runtime.block_on(async { nest_readers_async().await })?;

nest_readers()?;

Ok(())
}
Loading