From 0a03feb2896866066ca6c7ccdc0785f52e6c8f1e Mon Sep 17 00:00:00 2001 From: Jonathan Giddy Date: Sat, 3 Jul 2021 11:51:15 +0100 Subject: [PATCH] Add end tag handler for elements --- src/rewritable_units/element.rs | 82 ++++++++++++++++++++++- src/rewritable_units/tokens/end_tag.rs | 17 +++++ src/rewritable_units/tokens/text_chunk.rs | 12 ++-- src/rewriter/settings.rs | 2 +- 4 files changed, 105 insertions(+), 8 deletions(-) diff --git a/src/rewritable_units/element.rs b/src/rewritable_units/element.rs index 5e992a4d..e3b2e207 100644 --- a/src/rewritable_units/element.rs +++ b/src/rewritable_units/element.rs @@ -1,6 +1,6 @@ use super::{Attribute, AttributeNameError, ContentType, EndTag, Mutations, StartTag}; use crate::base::Bytes; -use crate::rewriter::EndTagHandler; +use crate::rewriter::{EndTagHandler, HandlerResult}; use encoding_rs::Encoding; use std::any::Any; use std::fmt::{self, Debug}; @@ -30,6 +30,14 @@ pub enum TagNameError { UnencodableCharacter, } +/// An error that occurs when invalid value is provided for the tag name. +#[derive(Error, Debug, PartialEq, Copy, Clone)] +pub enum EndTagError { + /// The tag has no end tag. + #[error("No end tag.")] + NoEndTag, +} + /// An HTML element rewritable unit. /// /// Exposes API for examination and modification of a parsed HTML element. @@ -37,6 +45,7 @@ pub struct Element<'r, 't> { start_tag: &'r mut StartTag<'t>, end_tag_mutations: Option, modified_end_tag_name: Option>, + end_tag_handler: Option>, can_have_content: bool, should_remove_content: bool, encoding: &'static Encoding, @@ -51,6 +60,7 @@ impl<'r, 't> Element<'r, 't> { start_tag, end_tag_mutations: None, modified_end_tag_name: None, + end_tag_handler: None, can_have_content, should_remove_content: false, encoding, @@ -457,11 +467,73 @@ impl<'r, 't> Element<'r, 't> { self.should_remove_content } + /// Sets a handler to run when the end tag is reached. + /// + /// Subsequent calls to the method on the same element replace the previous handler. + /// + /// # Example + /// + /// ``` + /// use lol_html::html_content::ContentType; + /// use lol_html::{element, rewrite_str, text, RewriteStrSettings}; + /// let buffer = std::rc::Rc::new(std::cell::RefCell::new(String::new())); + /// let html = rewrite_str( + /// "Short13 characters", + /// RewriteStrSettings { + /// element_content_handlers: vec![ + /// element!("span", |el| { + /// // Truncate string for each new span. + /// buffer.borrow_mut().clear(); + /// let buffer = buffer.clone(); + /// el.on_end_tag(move |end| { + /// let s = buffer.borrow(); + /// if s.len() == 13 { + /// // add text before the end tag + /// end.before("!", ContentType::Text); + /// } else { + /// // replace the end tag with an uppercase version + /// end.remove(); + /// let name = end.name().to_uppercase(); + /// end.after(&format!("", name), ContentType::Html); + /// } + /// Ok(()) + /// })?; + /// Ok(()) + /// }), + /// text!("span", |t| { + /// // Save the text contents for the end tag handler. + /// buffer.borrow_mut().push_str(t.as_str()); + /// Ok(()) + /// }), + /// ], + /// ..RewriteStrSettings::default() + /// }, + /// ) + /// .unwrap(); + /// + /// assert_eq!(html, "Short13 characters!"); + /// ``` + pub fn on_end_tag( + &mut self, + handler: impl FnMut(&mut EndTag) -> HandlerResult + 'static, + ) -> Result<(), EndTagError> { + if self.can_have_content { + self.end_tag_handler = Some(Box::new(handler)); + Ok(()) + } else { + Err(EndTagError::NoEndTag) + } + } + pub(crate) fn into_end_tag_handler(self) -> Option> { let end_tag_mutations = self.end_tag_mutations; let modified_end_tag_name = self.modified_end_tag_name; + let end_tag_handler = self.end_tag_handler; - if end_tag_mutations.is_some() || modified_end_tag_name.is_some() { + if end_tag_mutations.is_some() + || modified_end_tag_name.is_some() + || end_tag_handler.is_some() + { Some(Box::new(move |end_tag: &mut EndTag| { if let Some(name) = modified_end_tag_name { end_tag.set_name(name); @@ -471,7 +543,11 @@ impl<'r, 't> Element<'r, 't> { end_tag.mutations = mutations; } - Ok(()) + if let Some(handler) = end_tag_handler { + handler(end_tag) + } else { + Ok(()) + } })) } else { None diff --git a/src/rewritable_units/tokens/end_tag.rs b/src/rewritable_units/tokens/end_tag.rs index 3dadbe09..c2ac2248 100644 --- a/src/rewritable_units/tokens/end_tag.rs +++ b/src/rewritable_units/tokens/end_tag.rs @@ -1,5 +1,6 @@ use super::{Mutations, Token}; use crate::base::Bytes; +use crate::rewritable_units::ContentType; use encoding_rs::Encoding; use std::fmt::{self, Debug}; @@ -35,6 +36,22 @@ impl<'i> EndTag<'i> { self.raw = None; } + #[inline] + pub fn before(&mut self, content: &str, content_type: ContentType) { + self.mutations.before(content, content_type); + } + + #[inline] + pub fn after(&mut self, content: &str, content_type: ContentType) { + self.mutations.after(content, content_type); + } + + /// Removes the end tag. + #[inline] + pub fn remove(&mut self) { + self.mutations.remove(); + } + #[inline] fn raw(&self) -> Option<&Bytes> { self.raw.as_ref() diff --git a/src/rewritable_units/tokens/text_chunk.rs b/src/rewritable_units/tokens/text_chunk.rs index efc97422..503ff624 100644 --- a/src/rewritable_units/tokens/text_chunk.rs +++ b/src/rewritable_units/tokens/text_chunk.rs @@ -12,11 +12,15 @@ use std::fmt::{self, Debug}; /// text node can be represented by multiple text chunks. The size of a chunk depends on multiple /// parameters, such as decoding buffer size and input chunk size. /// -/// It is up to a user of the rewriter to buffer content of chunks to get whole text node content -/// where desired. Last chunk in a text node can be determined by calling [`last_in_text_node`] -/// method of the chunk. +/// It is up to a user of the rewriter to buffer content of chunks to get the whole text node +/// content where desired. The last chunk in a text node can be determined by calling +/// [`last_in_text_node`] method of the chunk. /// -/// Note that the last chunk in a text node can have empty textual content. +/// Note that in the sequence `"red-or-blue"` the `span` element contains three text +/// nodes: `"red-"`, `"or"`, and `"-blue"`. Each of these can produce multiple text chunks and each will +/// produce one text chunk where [`last_in_text_node`] returns `true`. The last chunk in a text +/// node can have empty textual content. To perform an action once on the text contents of an +/// element, see [`Element::on_end_tag`][crate::rewritable_units::Element::on_end_tag]. /// /// # Example /// ``` diff --git a/src/rewriter/settings.rs b/src/rewriter/settings.rs index 2de97b5d..c3fbd50e 100644 --- a/src/rewriter/settings.rs +++ b/src/rewriter/settings.rs @@ -5,7 +5,7 @@ use super::AsciiCompatibleEncoding; use std::borrow::Cow; use std::error::Error; -pub(super) type HandlerResult = Result<(), Box>; +pub(crate) type HandlerResult = Result<(), Box>; pub type DoctypeHandler<'h> = Box HandlerResult + 'h>; pub type CommentHandler<'h> = Box HandlerResult + 'h>; pub type TextHandler<'h> = Box HandlerResult + 'h>;