Skip to content

Commit

Permalink
Store decoder inside the events
Browse files Browse the repository at this point in the history
  • Loading branch information
Mingun committed Jul 24, 2022
1 parent f3d6584 commit 4af1fc4
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 31 deletions.
60 changes: 40 additions & 20 deletions src/events/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ impl<'a> BytesStart<'a> {
///
/// # Warning
///
/// `&content` is not checked to be a valid name
/// `name` is not checked to be a valid name
#[inline]
pub fn borrowed_name(name: &'a str) -> BytesStart<'a> {
Self::borrowed(name, name.len())
Expand Down Expand Up @@ -694,24 +694,30 @@ pub struct BytesText<'a> {
/// document encoding when event comes from the reader and should be in the
/// document encoding when event passed to the writer
content: Cow<'a, [u8]>,
/// Encoding in which the `content` is stored inside the event
decoder: Decoder,
}

impl<'a> BytesText<'a> {
/// Creates a new `BytesText` from an escaped byte sequence.
/// Creates a new `BytesText` from an escaped byte sequence in the specified encoding.
#[inline]
pub(crate) fn wrap<C: Into<Cow<'a, [u8]>>>(content: C) -> Self {
pub(crate) fn wrap<C: Into<Cow<'a, [u8]>>>(content: C, decoder: Decoder) -> Self {
Self {
content: content.into(),
decoder,
}
}

/// Creates a new `BytesText` from an escaped string.
#[inline]
pub fn from_escaped_str<C: Into<Cow<'a, str>>>(content: C) -> Self {
Self::wrap(match content.into() {
Cow::Owned(o) => Cow::Owned(o.into_bytes()),
Cow::Borrowed(b) => Cow::Borrowed(b.as_bytes()),
})
Self::wrap(
match content.into() {
Cow::Owned(o) => Cow::Owned(o.into_bytes()),
Cow::Borrowed(b) => Cow::Borrowed(b.as_bytes()),
},
Decoder::utf8(),
)
}

/// Creates a new `BytesText` from a string. The string is expected not to
Expand All @@ -723,6 +729,7 @@ impl<'a> BytesText<'a> {
Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()),
Cow::Owned(s) => Cow::Owned(s.into_bytes()),
},
decoder: Decoder::utf8(),
}
}

Expand All @@ -732,6 +739,7 @@ impl<'a> BytesText<'a> {
pub fn into_owned(self) -> BytesText<'static> {
BytesText {
content: self.content.into_owned().into(),
decoder: self.decoder,
}
}

Expand All @@ -746,6 +754,7 @@ impl<'a> BytesText<'a> {
pub fn borrow(&self) -> BytesText {
BytesText {
content: Cow::Borrowed(&self.content),
decoder: self.decoder,
}
}

Expand Down Expand Up @@ -864,21 +873,24 @@ impl<'a> From<BytesStartText<'a>> for BytesText<'a> {
#[derive(Clone, Eq, PartialEq)]
pub struct BytesCData<'a> {
content: Cow<'a, [u8]>,
/// Encoding in which the `content` is stored inside the event
decoder: Decoder,
}

impl<'a> BytesCData<'a> {
/// Creates a new `BytesCData` from a byte sequence.
/// Creates a new `BytesCData` from a byte sequence in the specified encoding.
#[inline]
pub(crate) fn wrap<C: Into<Cow<'a, [u8]>>>(content: C) -> Self {
pub(crate) fn wrap<C: Into<Cow<'a, [u8]>>>(content: C, decoder: Decoder) -> Self {
Self {
content: content.into(),
decoder,
}
}

/// Creates a new `BytesCData` from a string
#[inline]
pub fn from_str(content: &'a str) -> Self {
Self::wrap(content.as_bytes())
Self::wrap(content.as_bytes(), Decoder::utf8())
}

/// Ensures that all data is owned to extend the object's lifetime if
Expand All @@ -887,6 +899,7 @@ impl<'a> BytesCData<'a> {
pub fn into_owned(self) -> BytesCData<'static> {
BytesCData {
content: self.content.into_owned().into(),
decoder: self.decoder,
}
}

Expand All @@ -901,6 +914,7 @@ impl<'a> BytesCData<'a> {
pub fn borrow(&self) -> BytesCData {
BytesCData {
content: Cow::Borrowed(&self.content),
decoder: self.decoder,
}
}

Expand All @@ -918,11 +932,14 @@ impl<'a> BytesCData<'a> {
/// | `"` | `&quot;`
pub fn escape(self, decoder: Decoder) -> Result<BytesText<'a>> {
let decoded = self.decode(decoder)?;
Ok(BytesText::wrap(match escape(&decoded) {
// Because result is borrowed, no replacements was done and we can use original content
Cow::Borrowed(_) => self.content,
Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()),
}))
Ok(BytesText::wrap(
match escape(&decoded) {
// Because result is borrowed, no replacements was done and we can use original content
Cow::Borrowed(_) => self.content,
Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()),
},
Decoder::utf8(),
))
}

/// Converts this CDATA content to an escaped version, that can be written
Expand All @@ -940,11 +957,14 @@ impl<'a> BytesCData<'a> {
/// | `&` | `&amp;`
pub fn partial_escape(self, decoder: Decoder) -> Result<BytesText<'a>> {
let decoded = self.decode(decoder)?;
Ok(BytesText::wrap(match partial_escape(&decoded) {
// Because result is borrowed, no replacements was done and we can use original content
Cow::Borrowed(_) => self.content,
Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()),
}))
Ok(BytesText::wrap(
match partial_escape(&decoded) {
// Because result is borrowed, no replacements was done and we can use original content
Cow::Borrowed(_) => self.content,
Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()),
},
Decoder::utf8(),
))
}

/// Gets content of this text buffer in the specified encoding
Expand Down
25 changes: 14 additions & 11 deletions src/reader/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -514,9 +514,9 @@ impl<R> Reader<R> {
};

Ok(if first {
Event::StartText(BytesText::wrap(content).into())
Event::StartText(BytesText::wrap(content, self.decoder()).into())
} else {
Event::Text(BytesText::wrap(content))
Event::Text(BytesText::wrap(content, self.decoder()))
})
}
Ok(None) => Ok(Event::Eof),
Expand Down Expand Up @@ -587,18 +587,24 @@ impl<R> Reader<R> {
return Err(Error::UnexpectedToken("--".to_string()));
}
}
Ok(Event::Comment(BytesText::wrap(&buf[3..len - 2])))
Ok(Event::Comment(BytesText::wrap(
&buf[3..len - 2],
self.decoder(),
)))
}
BangType::CData if uncased_starts_with(buf, b"![CDATA[") => {
Ok(Event::CData(BytesCData::wrap(&buf[8..])))
Ok(Event::CData(BytesCData::wrap(&buf[8..], self.decoder())))
}
BangType::DocType if uncased_starts_with(buf, b"!DOCTYPE") => {
let start = buf[8..]
.iter()
.position(|b| !is_whitespace(*b))
.unwrap_or_else(|| len - 8);
debug_assert!(start < len - 8, "DocType must have a name");
Ok(Event::DocType(BytesText::wrap(&buf[8 + start..])))
Ok(Event::DocType(BytesText::wrap(
&buf[8 + start..],
self.decoder(),
)))
}
_ => Err(bang_type.to_err()),
}
Expand Down Expand Up @@ -663,7 +669,7 @@ impl<R> Reader<R> {

Ok(Event::Decl(event))
} else {
Ok(Event::PI(BytesText::wrap(&buf[1..len - 1])))
Ok(Event::PI(BytesText::wrap(&buf[1..len - 1], self.decoder())))
}
} else {
self.buf_position -= len;
Expand Down Expand Up @@ -935,7 +941,7 @@ pub(crate) fn is_whitespace(b: u8) -> bool {
/// any XML declarations are ignored.
///
/// [utf16]: https://github.com/tafia/quick-xml/issues/158
#[derive(Clone, Copy, Debug)]
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub struct Decoder {
#[cfg(feature = "encoding")]
encoding: &'static Encoding,
Expand Down Expand Up @@ -1025,9 +1031,6 @@ impl Decoder {
}
}

/// This implementation is required for tests of other parts of the library
#[cfg(test)]
#[cfg(feature = "serialize")]
impl Decoder {
pub(crate) fn utf8() -> Self {
Decoder {
Expand All @@ -1036,7 +1039,7 @@ impl Decoder {
}
}

#[cfg(feature = "encoding")]
#[cfg(all(test, feature = "encoding", feature = "serialize"))]
pub(crate) fn utf16() -> Self {
Decoder { encoding: UTF_16LE }
}
Expand Down

0 comments on commit 4af1fc4

Please sign in to comment.