Skip to content

Commit

Permalink
Introduce an AsyncReader instead of overloading the Reader
Browse files Browse the repository at this point in the history
  • Loading branch information
endor committed Mar 29, 2021
1 parent 75bf989 commit e5d300e
Show file tree
Hide file tree
Showing 20 changed files with 2,715 additions and 718 deletions.
4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,15 @@ travis-ci = { repository = "tafia/quick-xml" }
[dependencies]
async-recursion = { version = "0.3.2", optional = true }
encoding_rs = { version = "0.8.26", optional = true }
tokio = { version = "0.2.22", features = ["fs", "io-util"], optional = true }
tokio = { version = "1.4.0", features = ["fs", "io-util"], optional = true }
serde = { version = "1.0", optional = true }
memchr = "2.3.4"

[dev-dependencies]
serde = { version = "1.0", features = ["derive"] }
serde-value = "0.7"
regex = "1"
tokio = { version = "0.2.22", features = ["macros", "rt-threaded"] }
tokio = { version = "1.4.0", features = ["macros", "rt-multi-thread"] }

[lib]
bench = false
Expand Down
71 changes: 62 additions & 9 deletions examples/custom_entities.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ extern crate quick_xml;
extern crate regex;

use quick_xml::events::Event;
#[cfg(feature = "asynchronous")]
use quick_xml::AsyncReader;
use quick_xml::Reader;
use regex::bytes::Regex;
use std::collections::HashMap;
Expand All @@ -27,22 +29,15 @@ const DATA: &str = r#"
"#;

fn main() -> Result<(), Box<dyn std::error::Error>> {
let mut reader = Reader::from_str(DATA);
fn custom_entities(data: &str) -> Result<(), Box<dyn std::error::Error>> {
let mut reader = Reader::from_str(data);
reader.trim_text(true);

let mut buf = Vec::new();
let mut custom_entities = HashMap::new();
let entity_re = Regex::new(r#"<!ENTITY\s+([^ \t\r\n]+)\s+"([^"]*)"\s*>"#)?;

#[cfg(feature = "asynchronous")]
let mut runtime = Runtime::new().expect("Runtime cannot be initialized");

loop {
#[cfg(feature = "asynchronous")]
let event = runtime.block_on(async { reader.read_event(&mut buf).await });

#[cfg(not(feature = "asynchronous"))]
let event = reader.read_event(&mut buf);

match event {
Expand Down Expand Up @@ -80,3 +75,61 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
}
Ok(())
}

#[cfg(feature = "asynchronous")]
async fn custom_entities_async(data: &str) -> Result<(), Box<dyn std::error::Error>> {
let mut reader = AsyncReader::from_str(data);
reader.trim_text(true);

let mut buf = Vec::new();
let mut custom_entities = HashMap::new();
let entity_re = Regex::new(r#"<!ENTITY\s+([^ \t\r\n]+)\s+"([^"]*)"\s*>"#)?;

loop {
match reader.read_event(&mut buf).await {
Ok(Event::DocType(ref e)) => {
for cap in entity_re.captures_iter(&e) {
custom_entities.insert(cap[1].to_vec(), cap[2].to_vec());
}
}
Ok(Event::Start(ref e)) => match e.name() {
b"test" => println!(
"attributes values: {:?}",
e.attributes()
.map(|a| a
.unwrap()
.unescape_and_decode_value_with_custom_entities(
&reader,
&custom_entities
)
.unwrap())
.collect::<Vec<_>>()
),
_ => (),
},
Ok(Event::Text(ref e)) => {
println!(
"text value: {}",
e.unescape_and_decode_with_custom_entities(&reader, &custom_entities)
.unwrap()
);
}
Ok(Event::Eof) => break,
Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
_ => (),
}
}
Ok(())
}

fn main() -> Result<(), Box<dyn std::error::Error>> {
custom_entities(DATA)?;

#[cfg(feature = "asynchronous")]
let runtime = Runtime::new().expect("Runtime cannot be initialized");

#[cfg(feature = "asynchronous")]
runtime.block_on(async { custom_entities_async(DATA).await })?;

Ok(())
}
91 changes: 71 additions & 20 deletions examples/issue68.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#![allow(unused)]

use quick_xml::events::Event;
#[cfg(feature = "asynchronous")]
use quick_xml::AsyncReader;
use quick_xml::Reader;
use std::io::Read;
#[cfg(feature = "asynchronous")]
Expand Down Expand Up @@ -55,44 +57,87 @@ impl Response {
}
}

fn parse_report(xml_data: &str) -> Vec<Resource> {
#[derive(Clone, Copy)]
enum State {
Root,
MultiStatus,
Response,
Success,
Error,
}

#[cfg(feature = "asynchronous")]
async fn parse_report_async(xml_data: &str) -> Vec<Resource> {
let result = Vec::<Resource>::new();

let mut reader = Reader::from_str(xml_data);
let mut reader = AsyncReader::from_str(xml_data);
reader.trim_text(true);

let mut count = 0;
let mut buf = Vec::new();
let mut ns_buffer = Vec::new();

#[derive(Clone, Copy)]
enum State {
Root,
MultiStatus,
Response,
Success,
Error,
};

let mut responses = Vec::<Response>::new();
let mut current_response = Response::new();
let mut current_prop = Prop::new();

let mut depth = 0;
let mut state = State::MultiStatus;

#[cfg(feature = "asynchronous")]
let mut runtime = Runtime::new().expect("Runtime cannot be initialized");

loop {
#[cfg(feature = "asynchronous")]
let event = runtime
.block_on(async { reader.read_namespaced_event(&mut buf, &mut ns_buffer).await });
match reader.read_namespaced_event(&mut buf, &mut ns_buffer).await {
Ok((namespace_value, Event::Start(e))) => {
let namespace_value = namespace_value.unwrap_or_default();
match (depth, state, namespace_value, e.local_name()) {
(0, State::Root, b"DAV:", b"multistatus") => state = State::MultiStatus,
(1, State::MultiStatus, b"DAV:", b"response") => {
state = State::Response;
current_response = Response::new();
}
(2, State::Response, b"DAV:", b"href") => {
current_response.href = e.unescape_and_decode(&reader).unwrap();
}
_ => {}
}
depth += 1;
}
Ok((namespace_value, Event::End(e))) => {
let namespace_value = namespace_value.unwrap_or_default();
let local_name = e.local_name();
match (depth, state, &*namespace_value, local_name) {
(1, State::MultiStatus, b"DAV:", b"multistatus") => state = State::Root,
(2, State::MultiStatus, b"DAV:", b"multistatus") => state = State::MultiStatus,
_ => {}
}
depth -= 1;
}
Ok((_, Event::Eof)) => break,
Err(e) => break,
_ => (),
}
}
result
}

fn parse_report(xml_data: &str) -> Vec<Resource> {
let result = Vec::<Resource>::new();

#[cfg(not(feature = "asynchronous"))]
let event = reader.read_namespaced_event(&mut buf, &mut ns_buffer);
let mut reader = Reader::from_str(xml_data);
reader.trim_text(true);

match event {
let mut count = 0;
let mut buf = Vec::new();
let mut ns_buffer = Vec::new();

let mut responses = Vec::<Response>::new();
let mut current_response = Response::new();
let mut current_prop = Prop::new();

let mut depth = 0;
let mut state = State::MultiStatus;

loop {
match reader.read_namespaced_event(&mut buf, &mut ns_buffer) {
Ok((namespace_value, Event::Start(e))) => {
let namespace_value = namespace_value.unwrap_or_default();
match (depth, state, namespace_value, e.local_name()) {
Expand Down Expand Up @@ -148,4 +193,10 @@ fn main() {
"#;

parse_report(test_data);

#[cfg(feature = "asynchronous")]
let runtime = Runtime::new().expect("Runtime cannot be initialized");

#[cfg(feature = "asynchronous")]
runtime.block_on(async { parse_report_async(test_data).await });
}
111 changes: 86 additions & 25 deletions examples/nested_readers.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
use quick_xml::events::Event;
#[cfg(feature = "asynchronous")]
use quick_xml::AsyncReader;
use quick_xml::Reader;
#[cfg(feature = "asynchronous")]
use tokio::runtime::Runtime;
Expand All @@ -10,34 +12,18 @@ struct TableStat {
index: u8,
rows: Vec<Vec<String>>,
}
// demonstrate how to nest readers
// This is useful for when you need to traverse
// a few levels of a document to extract things.
fn main() -> Result<(), quick_xml::Error> {

fn nest_readers() -> Result<(), quick_xml::Error> {
let mut buf = Vec::new();
// buffer for nested reader
let mut skip_buf = Vec::new();
let mut count = 0;

#[cfg(feature = "asynchronous")]
let mut runtime = Runtime::new().expect("Runtime cannot be initialized");

#[cfg(feature = "asynchronous")]
let mut reader =
runtime.block_on(async { Reader::from_file("tests/documents/document.xml").await })?;

#[cfg(not(feature = "asynchronous"))]
let mut reader = Reader::from_file("tests/documents/document.xml")?;

let mut found_tables = Vec::new();
loop {
#[cfg(feature = "asynchronous")]
let event = runtime.block_on(async { reader.read_event(&mut buf).await })?;

#[cfg(not(feature = "asynchronous"))]
let event = reader.read_event(&mut buf)?;

match event {
match reader.read_event(&mut buf)? {
Event::Start(element) => match element.name() {
b"w:tbl" => {
count += 1;
Expand All @@ -51,14 +37,74 @@ fn main() -> Result<(), quick_xml::Error> {
loop {
skip_buf.clear();

#[cfg(feature = "asynchronous")]
let event =
runtime.block_on(async { reader.read_event(&mut skip_buf).await })?;
match reader.read_event(&mut skip_buf)? {
Event::Start(element) => match element.name() {
b"w:tr" => {
stats.rows.push(vec![]);
row_index = stats.rows.len() - 1;
}
b"w:tc" => {
stats.rows[row_index]
.push(String::from_utf8(element.name().to_vec()).unwrap());
}
_ => {}
},
Event::End(element) => {
if element.name() == b"w:tbl" {
found_tables.push(stats);
break;
}
}
_ => {}
}
}
}
_ => {}
},
Event::Eof => break,
_ => {}
}
buf.clear();
}
assert_eq!(found_tables.len(), 2);
// pretty print the table
println!("{:#?}", found_tables);
assert_eq!(found_tables[0].rows.len(), 2);
assert_eq!(found_tables[0].rows[0].len(), 4);
assert_eq!(found_tables[0].rows[1].len(), 4);

#[cfg(not(feature = "asynchronous"))]
let event = reader.read_event(&mut skip_buf)?;
assert_eq!(found_tables[1].rows.len(), 2);
assert_eq!(found_tables[1].rows[0].len(), 4);
assert_eq!(found_tables[1].rows[1].len(), 4);
Ok(())
}

#[cfg(feature = "asynchronous")]
async fn nest_readers_async() -> Result<(), quick_xml::Error> {
let mut buf = Vec::new();
// buffer for nested reader
let mut skip_buf = Vec::new();
let mut count = 0;

let mut reader = AsyncReader::from_file("tests/documents/document.xml").await?;

let mut found_tables = Vec::new();
loop {
match reader.read_event(&mut buf).await? {
Event::Start(element) => match element.name() {
b"w:tbl" => {
count += 1;
let mut stats = TableStat {
index: count,
rows: vec![],
};
// must define stateful variables
// outside the nested loop else they are overwritten
let mut row_index = 0;
loop {
skip_buf.clear();

match event {
match reader.read_event(&mut skip_buf).await? {
Event::Start(element) => match element.name() {
b"w:tr" => {
stats.rows.push(vec![]);
Expand Down Expand Up @@ -99,3 +145,18 @@ fn main() -> Result<(), quick_xml::Error> {
assert_eq!(found_tables[1].rows[1].len(), 4);
Ok(())
}

// demonstrate how to nest readers
// This is useful for when you need to traverse
// a few levels of a document to extract things.
fn main() -> Result<(), quick_xml::Error> {
#[cfg(feature = "asynchronous")]
let runtime = Runtime::new().expect("Runtime cannot be initialized");

#[cfg(feature = "asynchronous")]
runtime.block_on(async { nest_readers_async().await })?;

nest_readers()?;

Ok(())
}
Loading

0 comments on commit e5d300e

Please sign in to comment.