From ccb37c5e1ac968941de0ab2600b0a84cbd0b9e6a Mon Sep 17 00:00:00 2001 From: Joshua Thijssen Date: Sat, 23 Sep 2023 14:37:15 +0200 Subject: [PATCH] Fixed a few CI issues --- src/bin/parser_test.rs | 10 +++++----- src/bin/tokenizer_test.rs | 4 +++- src/html5_parser/input_stream.rs | 2 +- src/html5_parser/node.rs | 12 +++++++++--- src/html5_parser/parser/adoption_agency.rs | 3 ++- src/html5_parser/parser/mod.rs | 16 ++++------------ src/html5_parser/parser/quirks.rs | 12 ++++++++---- src/html5_parser/tokenizer/mod.rs | 13 ++++++++++--- 8 files changed, 42 insertions(+), 30 deletions(-) diff --git a/src/bin/parser_test.rs b/src/bin/parser_test.rs index c5d233f1f..f8e521478 100755 --- a/src/bin/parser_test.rs +++ b/src/bin/parser_test.rs @@ -81,11 +81,11 @@ fn read_tests(file_path: PathBuf) -> io::Result> { }; let mut section: Option<&str> = None; - let mut line_num: usize = 0; - for line in reader.lines() { - line_num += 1; - - let line = line?; + for (line_num, line) in reader.lines().enumerate() { + if line.is_err() { + continue; + } + let line = line.unwrap(); if line.starts_with("#data") { if !current_test.data.is_empty() diff --git a/src/bin/tokenizer_test.rs b/src/bin/tokenizer_test.rs index 0a6601482..2d0f4ada1 100755 --- a/src/bin/tokenizer_test.rs +++ b/src/bin/tokenizer_test.rs @@ -223,7 +223,9 @@ fn match_error(tokenizer: &Tokenizer, expected_err: &Error) -> ErrorResult { // it's not always correct, it might be a off-by-one position. let mut result = ErrorResult::Failure; for got_err in tokenizer.get_error_logger().get_errors() { - if got_err.message == expected_err.code && (got_err.line as i64 != expected_err.line || got_err.col as i64 != expected_err.col) { + if got_err.message == expected_err.code + && (got_err.line as i64 != expected_err.line || got_err.col as i64 != expected_err.col) + { // println!("❌ Expected error '{}' at {}:{}", expected_err.code, expected_err.line, expected_err.col); result = ErrorResult::PositionFailure; break; diff --git a/src/html5_parser/input_stream.rs b/src/html5_parser/input_stream.rs index f3825df4f..039d434e4 100644 --- a/src/html5_parser/input_stream.rs +++ b/src/html5_parser/input_stream.rs @@ -1,7 +1,7 @@ use crate::html5_parser::tokenizer::{CHAR_CR, CHAR_LF}; use std::fs::File; -use std::{fmt, io}; use std::io::Read; +use std::{fmt, io}; // Encoding defines the way the buffer stream is read, as what defines a "character". #[derive(PartialEq)] diff --git a/src/html5_parser/node.rs b/src/html5_parser/node.rs index 0be8b239f..75bd5d9ed 100644 --- a/src/html5_parser/node.rs +++ b/src/html5_parser/node.rs @@ -113,13 +113,19 @@ impl Node { // Returns true if the given node is "special" node based on the namespace and name pub fn is_special(&self) -> bool { - if self.namespace == Some(HTML_NAMESPACE.into()) && SPECIAL_HTML_ELEMENTS.contains(&self.name.as_str()) { + if self.namespace == Some(HTML_NAMESPACE.into()) + && SPECIAL_HTML_ELEMENTS.contains(&self.name.as_str()) + { return true; } - if self.namespace == Some(MATHML_NAMESPACE.into()) && SPECIAL_MATHML_ELEMENTS.contains(&self.name.as_str()) { + if self.namespace == Some(MATHML_NAMESPACE.into()) + && SPECIAL_MATHML_ELEMENTS.contains(&self.name.as_str()) + { return true; } - if self.namespace == Some(SVG_NAMESPACE.into()) && SPECIAL_SVG_ELEMENTS.contains(&self.name.as_str()) { + if self.namespace == Some(SVG_NAMESPACE.into()) + && SPECIAL_SVG_ELEMENTS.contains(&self.name.as_str()) + { return true; } diff --git a/src/html5_parser/parser/adoption_agency.rs b/src/html5_parser/parser/adoption_agency.rs index 172e976ea..a25e32d01 100755 --- a/src/html5_parser/parser/adoption_agency.rs +++ b/src/html5_parser/parser/adoption_agency.rs @@ -54,7 +54,8 @@ impl<'a> Html5Parser<'a> { ref name, ref attributes, .. - } = temp_node.data { + } = temp_node.data + { if name == subject && !attributes.is_empty() { formatting_element_idx = idx; formatting_element_id = node_id; diff --git a/src/html5_parser/parser/mod.rs b/src/html5_parser/parser/mod.rs index cfc0196a7..dfde728e0 100644 --- a/src/html5_parser/parser/mod.rs +++ b/src/html5_parser/parser/mod.rs @@ -1625,18 +1625,10 @@ impl<'a> Html5Parser<'a> { } Token::StartTagToken { name, attributes, .. - } => { - Node::new_element(name, attributes.clone(), namespace) - } - Token::EndTagToken { name, .. } => { - Node::new_element(name, HashMap::new(), namespace) - } - Token::CommentToken { value } => { - Node::new_comment(value) - } - Token::TextToken { value } => { - Node::new_text(value.to_string().as_str()) - } + } => Node::new_element(name, attributes.clone(), namespace), + Token::EndTagToken { name, .. } => Node::new_element(name, HashMap::new(), namespace), + Token::CommentToken { value } => Node::new_comment(value), + Token::TextToken { value } => Node::new_text(value.to_string().as_str()), Token::EofToken => { panic!("EOF token not allowed"); } diff --git a/src/html5_parser/parser/quirks.rs b/src/html5_parser/parser/quirks.rs index 239039649..09b9cc31c 100644 --- a/src/html5_parser/parser/quirks.rs +++ b/src/html5_parser/parser/quirks.rs @@ -32,9 +32,11 @@ impl<'a> Html5Parser<'a> { return QuirksMode::Quirks; } - if sys_identifier.is_none() && QUIRKS_PUB_IDENTIFIER_PREFIX_MISSING_SYS + if sys_identifier.is_none() + && QUIRKS_PUB_IDENTIFIER_PREFIX_MISSING_SYS .iter() - .any(|&prefix| pub_id.as_str().starts_with(prefix)) { + .any(|&prefix| pub_id.as_str().starts_with(prefix)) + { return QuirksMode::Quirks; } @@ -45,9 +47,11 @@ impl<'a> Html5Parser<'a> { return QuirksMode::LimitedQuirks; } - if sys_identifier.is_some() && LIMITED_QUIRKS_PUB_IDENTIFIER_PREFIX + if sys_identifier.is_some() + && LIMITED_QUIRKS_PUB_IDENTIFIER_PREFIX .iter() - .any(|&prefix| pub_id.as_str().starts_with(prefix)) { + .any(|&prefix| pub_id.as_str().starts_with(prefix)) + { return QuirksMode::LimitedQuirks; } } diff --git a/src/html5_parser/tokenizer/mod.rs b/src/html5_parser/tokenizer/mod.rs index 61d0401bf..a54ffe008 100644 --- a/src/html5_parser/tokenizer/mod.rs +++ b/src/html5_parser/tokenizer/mod.rs @@ -91,7 +91,10 @@ macro_rules! set_public_identifier { macro_rules! add_public_identifier { ($self:expr, $c:expr) => { match &mut $self.current_token { - Some(Token::DocTypeToken { pub_identifier: Some(pid), .. }) => { + Some(Token::DocTypeToken { + pub_identifier: Some(pid), + .. + }) => { pid.push($c); } _ => {} @@ -113,7 +116,10 @@ macro_rules! set_system_identifier { macro_rules! add_system_identifier { ($self:expr, $c:expr) => { match &mut $self.current_token { - Some(Token::DocTypeToken { sys_identifier: Some(sid), .. }) => { + Some(Token::DocTypeToken { + sys_identifier: Some(sid), + .. + }) => { sid.push($c); } _ => {} @@ -2291,7 +2297,8 @@ impl<'a> Tokenizer<'a> { // Set force_quirk mode in current token fn set_quirks_mode(&mut self, quirky: bool) { - if let Token::DocTypeToken { force_quirks, .. } = &mut self.current_token.as_mut().unwrap() { + if let Token::DocTypeToken { force_quirks, .. } = &mut self.current_token.as_mut().unwrap() + { *force_quirks = quirky; } }