Skip to content

Commit

Permalink
Merge pull request #428 from Mingun/encoding
Browse files Browse the repository at this point in the history
Change all event and Attributes constructors to accept strings
  • Loading branch information
dralley authored Jul 24, 2022
2 parents e7f30d2 + 5467b6c commit e738b68
Show file tree
Hide file tree
Showing 20 changed files with 405 additions and 437 deletions.
24 changes: 17 additions & 7 deletions Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -129,13 +129,12 @@

- [#415]: Changed custom entity unescaping API to accept closures rather than a mapping of entity to
replacement text. This avoids needing to allocate a map and provides the user with more flexibility.
- [#415]: Renamed many functions following the pattern `unescape_and_decode*` to `decode_and_unescape*`
to better communicate their function. Renamed functions following the pattern `*_with_custom_entities`
to `decode_and_unescape_with` to be more consistent across the API.
- [#415]: `BytesText::escaped()` renamed to `BytesText::escape()`, `BytesText::unescaped()` renamed to
`BytesText::unescape()`, `BytesText::unescaped_with()` renamed to `BytesText::unescape_with()`,
`Attribute::escaped_value()` renamed to `Attribute::escape_value()`, and `Attribute::escaped_value_with()`
renamed to `Attribute::escape_value_with()` for consistency across the API.
- [#415]: Renamed functions for consistency across the API:
|Old Name |New Name
|------------------------|-------------------------------------------
|`*_with_custom_entities`|`*_with`
|`BytesText::unescaped()`|`BytesText::unescape()`
|`Attribute::unescaped_*`|`Attribute::unescape_*`

- [#416]: `BytesStart::to_borrowed` renamed to `BytesStart::borrow`, the same method
added to all events
Expand All @@ -150,6 +149,16 @@
- [#423]: Removed `BytesText::from_plain` because it internally did escaping of a byte array,
but since now escaping works on strings. Use `BytesText::from_plain_str` instead

- [#428]: Removed `BytesText::escaped()`. Use `.as_ref()` provided by `Deref` impl instead.
- [#428]: Removed `BytesText::from_escaped()`. Use constructors from strings instead,
because writer anyway works in UTF-8 only
- [#428]: Removed `BytesCData::new()`. Use constructors from strings instead,
because writer anyway works in UTF-8 only
- [#428]: Changed the event and `Attributes` constructors to accept a `&str` slices instead of `&[u8]` slices.
Handmade events has always been assumed to store their content UTF-8 encoded.
- [#428]: Removed `Decoder` parameter from `_and_decode` versions of functions for
`BytesText` (remember, that those functions was renamed in #415).

### New Tests

- [#9]: Added tests for incorrect nested tags in input
Expand Down Expand Up @@ -180,6 +189,7 @@
[#418]: https://github.com/tafia/quick-xml/pull/418
[#421]: https://github.com/tafia/quick-xml/pull/421
[#423]: https://github.com/tafia/quick-xml/pull/423
[#428]: https://github.com/tafia/quick-xml/pull/428
[#434]: https://github.com/tafia/quick-xml/pull/434
[#437]: https://github.com/tafia/quick-xml/pull/437

Expand Down
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ loop {
_ => (),
}
}
Ok(Event::Text(e)) => txt.push(e.decode_and_unescape(&reader).unwrap().into_owned()),
Ok(Event::Text(e)) => txt.push(e.unescape().unwrap().into_owned()),

// There are several other `Event`s we do not consider here
_ => (),
Expand All @@ -80,7 +80,7 @@ loop {

// crates a new element ... alternatively we could reuse `e` by calling
// `e.into_owned()`
let mut elem = BytesStart::owned_name(b"my_elem".to_vec());
let mut elem = BytesStart::owned_name("my_elem");

// collect existing attributes
elem.extend_attributes(e.attributes().map(|attr| attr.unwrap()));
Expand All @@ -92,7 +92,7 @@ loop {
assert!(writer.write_event(Event::Start(elem)).is_ok());
},
Ok(Event::End(e)) if e.name().as_ref() == b"this_tag" => {
assert!(writer.write_event(Event::End(BytesEnd::borrowed(b"my_elem"))).is_ok());
assert!(writer.write_event(Event::End(BytesEnd::borrowed("my_elem"))).is_ok());
},
Ok(Event::Eof) => break,
// we can either move or borrow the event to write, depending on your use-case
Expand Down
2 changes: 1 addition & 1 deletion benches/macrobenches.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ fn parse_document(doc: &[u8]) -> XmlResult<()> {
}
}
Event::Text(e) => {
criterion::black_box(e.decode_and_unescape(&r)?);
criterion::black_box(e.unescape()?);
}
Event::CData(e) => {
criterion::black_box(e.into_inner());
Expand Down
2 changes: 1 addition & 1 deletion benches/microbenches.rs
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ fn one_event(c: &mut Criterion) {
.check_comments(false)
.trim_text(true);
match r.read_event_into(&mut buf) {
Ok(Event::Comment(e)) => nbtxt += e.decode_and_unescape(&r).unwrap().len(),
Ok(Event::Comment(e)) => nbtxt += e.unescape().unwrap().len(),
something_else => panic!("Did not expect {:?}", something_else),
};

Expand Down
4 changes: 1 addition & 3 deletions examples/custom_entities.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
Ok(Event::Text(ref e)) => {
println!(
"text value: {}",
e.decode_and_unescape_with(&reader, |ent| custom_entities
.get(ent)
.map(|s| s.as_str()))
e.unescape_with(|ent| custom_entities.get(ent).map(|s| s.as_str()))
.unwrap()
);
}
Expand Down
132 changes: 58 additions & 74 deletions src/de/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -612,16 +612,15 @@ where
unescape: bool,
allow_start: bool,
) -> Result<Cow<'de, str>, DeError> {
let decoder = self.reader.decoder();
match self.next()? {
DeEvent::Text(e) => Ok(e.decode(decoder, unescape)?),
DeEvent::CData(e) => Ok(e.decode(decoder)?),
DeEvent::Text(e) => Ok(e.decode(unescape)?),
DeEvent::CData(e) => Ok(e.decode()?),
DeEvent::Start(e) if allow_start => {
// allow one nested level
let inner = self.next()?;
let t = match inner {
DeEvent::Text(t) => t.decode(decoder, unescape)?,
DeEvent::CData(t) => t.decode(decoder)?,
DeEvent::Text(t) => t.decode(unescape)?,
DeEvent::CData(t) => t.decode()?,
DeEvent::Start(s) => {
return Err(DeError::UnexpectedStart(s.name().as_ref().to_owned()))
}
Expand Down Expand Up @@ -1042,13 +1041,10 @@ mod tests {
assert_eq!(de.read, vec![]);
assert_eq!(de.write, vec![]);

assert_eq!(
de.next().unwrap(),
Start(BytesStart::borrowed_name(b"root"))
);
assert_eq!(de.next().unwrap(), Start(BytesStart::borrowed_name("root")));
assert_eq!(
de.peek().unwrap(),
&Start(BytesStart::borrowed_name(b"inner"))
&Start(BytesStart::borrowed_name("inner"))
);

// Should skip first <inner> tree
Expand All @@ -1057,11 +1053,11 @@ mod tests {
assert_eq!(
de.write,
vec![
Start(BytesStart::borrowed_name(b"inner")),
Start(BytesStart::borrowed_name("inner")),
Text(BytesText::from_escaped_str("text")),
Start(BytesStart::borrowed_name(b"inner")),
End(BytesEnd::borrowed(b"inner")),
End(BytesEnd::borrowed(b"inner")),
Start(BytesStart::borrowed_name("inner")),
End(BytesEnd::borrowed("inner")),
End(BytesEnd::borrowed("inner")),
]
);

Expand All @@ -1073,11 +1069,8 @@ mod tests {
// </inner>
// <target/>
// </root>
assert_eq!(
de.next().unwrap(),
Start(BytesStart::borrowed_name(b"next"))
);
assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed(b"next")));
assert_eq!(de.next().unwrap(), Start(BytesStart::borrowed_name("next")));
assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed("next")));

// We finish writing. Next call to `next()` should start replay that messages:
//
Expand All @@ -1094,27 +1087,27 @@ mod tests {
assert_eq!(
de.read,
vec![
Start(BytesStart::borrowed_name(b"inner")),
Start(BytesStart::borrowed_name("inner")),
Text(BytesText::from_escaped_str("text")),
Start(BytesStart::borrowed_name(b"inner")),
End(BytesEnd::borrowed(b"inner")),
End(BytesEnd::borrowed(b"inner")),
Start(BytesStart::borrowed_name("inner")),
End(BytesEnd::borrowed("inner")),
End(BytesEnd::borrowed("inner")),
]
);
assert_eq!(de.write, vec![]);
assert_eq!(
de.next().unwrap(),
Start(BytesStart::borrowed_name(b"inner"))
Start(BytesStart::borrowed_name("inner"))
);

// Skip `#text` node and consume <inner/> after it
de.skip().unwrap();
assert_eq!(
de.read,
vec![
Start(BytesStart::borrowed_name(b"inner")),
End(BytesEnd::borrowed(b"inner")),
End(BytesEnd::borrowed(b"inner")),
Start(BytesStart::borrowed_name("inner")),
End(BytesEnd::borrowed("inner")),
End(BytesEnd::borrowed("inner")),
]
);
assert_eq!(
Expand All @@ -1128,9 +1121,9 @@ mod tests {

assert_eq!(
de.next().unwrap(),
Start(BytesStart::borrowed_name(b"inner"))
Start(BytesStart::borrowed_name("inner"))
);
assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed(b"inner")));
assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed("inner")));

// We finish writing. Next call to `next()` should start replay messages:
//
Expand All @@ -1146,21 +1139,21 @@ mod tests {
de.read,
vec![
Text(BytesText::from_escaped_str("text")),
End(BytesEnd::borrowed(b"inner")),
End(BytesEnd::borrowed("inner")),
]
);
assert_eq!(de.write, vec![]);
assert_eq!(
de.next().unwrap(),
Text(BytesText::from_escaped_str("text"))
);
assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed(b"inner")));
assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed("inner")));
assert_eq!(
de.next().unwrap(),
Start(BytesStart::borrowed_name(b"target"))
Start(BytesStart::borrowed_name("target"))
);
assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed(b"target")));
assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed(b"root")));
assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed("target")));
assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed("root")));
}

/// Checks that `read_to_end()` behaves correctly after `skip()`
Expand All @@ -1184,22 +1177,19 @@ mod tests {
assert_eq!(de.read, vec![]);
assert_eq!(de.write, vec![]);

assert_eq!(
de.next().unwrap(),
Start(BytesStart::borrowed_name(b"root"))
);
assert_eq!(de.next().unwrap(), Start(BytesStart::borrowed_name("root")));

// Skip the <skip> tree
de.skip().unwrap();
assert_eq!(de.read, vec![]);
assert_eq!(
de.write,
vec![
Start(BytesStart::borrowed_name(b"skip")),
Start(BytesStart::borrowed_name("skip")),
Text(BytesText::from_escaped_str("text")),
Start(BytesStart::borrowed_name(b"skip")),
End(BytesEnd::borrowed(b"skip")),
End(BytesEnd::borrowed(b"skip")),
Start(BytesStart::borrowed_name("skip")),
End(BytesEnd::borrowed("skip")),
End(BytesEnd::borrowed("skip")),
]
);

Expand All @@ -1212,18 +1202,18 @@ mod tests {
// </root>
assert_eq!(
de.next().unwrap(),
Start(BytesStart::borrowed_name(b"target"))
Start(BytesStart::borrowed_name("target"))
);
de.read_to_end(QName(b"target")).unwrap();
assert_eq!(de.read, vec![]);
assert_eq!(
de.write,
vec![
Start(BytesStart::borrowed_name(b"skip")),
Start(BytesStart::borrowed_name("skip")),
Text(BytesText::from_escaped_str("text")),
Start(BytesStart::borrowed_name(b"skip")),
End(BytesEnd::borrowed(b"skip")),
End(BytesEnd::borrowed(b"skip")),
Start(BytesStart::borrowed_name("skip")),
End(BytesEnd::borrowed("skip")),
End(BytesEnd::borrowed("skip")),
]
);

Expand All @@ -1241,22 +1231,19 @@ mod tests {
assert_eq!(
de.read,
vec![
Start(BytesStart::borrowed_name(b"skip")),
Start(BytesStart::borrowed_name("skip")),
Text(BytesText::from_escaped_str("text")),
Start(BytesStart::borrowed_name(b"skip")),
End(BytesEnd::borrowed(b"skip")),
End(BytesEnd::borrowed(b"skip")),
Start(BytesStart::borrowed_name("skip")),
End(BytesEnd::borrowed("skip")),
End(BytesEnd::borrowed("skip")),
]
);
assert_eq!(de.write, vec![]);

assert_eq!(
de.next().unwrap(),
Start(BytesStart::borrowed_name(b"skip"))
);
assert_eq!(de.next().unwrap(), Start(BytesStart::borrowed_name("skip")));
de.read_to_end(QName(b"skip")).unwrap();

assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed(b"root")));
assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed("root")));
}

/// Checks that limiting buffer size works correctly
Expand Down Expand Up @@ -1306,34 +1293,31 @@ mod tests {
"#,
);

assert_eq!(
de.next().unwrap(),
Start(BytesStart::borrowed_name(b"root"))
);
assert_eq!(de.next().unwrap(), Start(BytesStart::borrowed_name("root")));

assert_eq!(
de.next().unwrap(),
Start(BytesStart::borrowed(br#"tag a="1""#, 3))
Start(BytesStart::borrowed(r#"tag a="1""#, 3))
);
assert_eq!(de.read_to_end(QName(b"tag")).unwrap(), ());

assert_eq!(
de.next().unwrap(),
Start(BytesStart::borrowed(br#"tag a="2""#, 3))
Start(BytesStart::borrowed(r#"tag a="2""#, 3))
);
assert_eq!(
de.next().unwrap(),
CData(BytesCData::from_str("cdata content"))
);
assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed(b"tag")));
assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed("tag")));

assert_eq!(
de.next().unwrap(),
Start(BytesStart::borrowed(b"self-closed", 11))
Start(BytesStart::borrowed_name("self-closed"))
);
assert_eq!(de.read_to_end(QName(b"self-closed")).unwrap(), ());

assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed(b"root")));
assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed("root")));
assert_eq!(de.next().unwrap(), Eof);
}

Expand Down Expand Up @@ -1402,17 +1386,17 @@ mod tests {
events,
vec![
Start(BytesStart::borrowed(
br#"item name="hello" source="world.rs""#,
r#"item name="hello" source="world.rs""#,
4
)),
Text(BytesText::from_escaped_str("Some text")),
End(BytesEnd::borrowed(b"item")),
Start(BytesStart::borrowed(b"item2", 5)),
End(BytesEnd::borrowed(b"item2")),
Start(BytesStart::borrowed(b"item3", 5)),
End(BytesEnd::borrowed(b"item3")),
Start(BytesStart::borrowed(br#"item4 value="world" "#, 5)),
End(BytesEnd::borrowed(b"item4")),
End(BytesEnd::borrowed("item")),
Start(BytesStart::borrowed("item2", 5)),
End(BytesEnd::borrowed("item2")),
Start(BytesStart::borrowed("item3", 5)),
End(BytesEnd::borrowed("item3")),
Start(BytesStart::borrowed(r#"item4 value="world" "#, 5)),
End(BytesEnd::borrowed("item4")),
]
)
}
Expand All @@ -1432,7 +1416,7 @@ mod tests {

assert_eq!(
reader.next().unwrap(),
DeEvent::Start(BytesStart::borrowed(b"item ", 4))
DeEvent::Start(BytesStart::borrowed("item ", 4))
);
reader.read_to_end(QName(b"item")).unwrap();
assert_eq!(reader.next().unwrap(), DeEvent::Eof);
Expand Down
Loading

0 comments on commit e738b68

Please sign in to comment.