diff --git a/src/html5/parser.rs b/src/html5/parser.rs
index ddc835f2f..511e3f198 100644
--- a/src/html5/parser.rs
+++ b/src/html5/parser.rs
@@ -2116,6 +2116,10 @@ impl<'chars> Html5Parser<'chars> {
/// Handle insertion mode "in_body"
fn handle_in_body(&mut self) {
match &self.current_token.clone() {
+ Token::Text(value) if self.current_token.is_mixed_null() => {
+ let tokens = self.split_mixed_token_null(value);
+ self.tokenizer.insert_tokens_at_queue_start(tokens);
+ }
Token::Text(..) if self.current_token.is_null() => {
self.parse_error("null character not allowed in in body insertion mode");
// ignore token
@@ -4021,6 +4025,33 @@ impl<'chars> Html5Parser<'chars> {
tokens
}
+
+ /// This will split tokens into \0 groups and non-\0 groups.
+ /// @todo: refactor this into split_mixed_token as well, but add a collection of groups callables
+ fn split_mixed_token_null(&self, text: &str) -> Vec {
+ let mut tokens = vec![];
+ let mut last_group = 'x';
+
+ let mut found = String::new();
+
+ for ch in text.chars() {
+ let group = if ch == '\0' { '0' } else { 'r' };
+
+ if last_group != group && !found.is_empty() {
+ tokens.push(Token::Text(found.clone()));
+ found.clear();
+ }
+
+ found.push(ch);
+ last_group = group;
+ }
+
+ if !found.is_empty() {
+ tokens.push(Token::Text(found.clone()));
+ }
+
+ tokens
+ }
}
#[cfg(test)]
diff --git a/src/html5/tokenizer/token.rs b/src/html5/tokenizer/token.rs
index 377551d75..f286ceb1b 100644
--- a/src/html5/tokenizer/token.rs
+++ b/src/html5/tokenizer/token.rs
@@ -31,6 +31,7 @@ pub enum Token {
}
impl Token {
+ /// Returns true when there is a mixture of white and non-white and \0 characters in the token
pub(crate) fn is_mixed(&self) -> bool {
// Check if there are white characters AND non-white characters in the token
if let Token::Text(value) = self {
@@ -53,6 +54,16 @@ impl Token {
false
}
}
+
+ /// Returns true when there is a mixture of \0 and non-\0 characters in the token
+ pub(crate) fn is_mixed_null(&self) -> bool {
+ // Check if there are white characters AND non-white characters in the token
+ if let Token::Text(value) = self {
+ value.chars().any(|ch| ch == '\0') && value.chars().any(|ch| ch != '\0')
+ } else {
+ false
+ }
+ }
}
impl Token {