diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index c5a10c3de..2a57f52ad 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -12,31 +12,110 @@ env: jobs: build: runs-on: ubuntu-latest - steps: - uses: actions/checkout@v2 - - name: Setup Rust - uses: actions-rs/toolchain@v1 + - uses: actions-rs/toolchain@v1 with: - toolchain: stable profile: minimal - components: rustfmt, clippy - - name: Cache Cargo registry + toolchain: stable + override: true + - name: Cache cargo registry uses: actions/cache@v2 with: path: ~/.cargo/registry - key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} - - name: Cache target + key: ${{ runner.os }}-cargo-registry-${{ hashFiles('**/Cargo.lock') }} + - name: Cache cargo index + uses: actions/cache@v2 + with: + path: ~/.cargo/git + key: ${{ runner.os }}-cargo-git-${{ hashFiles('**/Cargo.lock') }} + - name: Cache cargo build uses: actions/cache@v2 with: path: target - key: ${{ runner.os }}-target-${{ hashFiles('**/Cargo.lock') }} - + key: ${{ runner.os }}-cargo-build-target-${{ hashFiles('**/Cargo.lock') }} - name: Build run: cargo build --verbose + + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: stable + override: true + - name: Cache cargo registry + uses: actions/cache@v2 + with: + path: ~/.cargo/registry + key: ${{ runner.os }}-cargo-registry-${{ hashFiles('**/Cargo.lock') }} + - name: Cache cargo index + uses: actions/cache@v2 + with: + path: ~/.cargo/git + key: ${{ runner.os }}-cargo-git-${{ hashFiles('**/Cargo.lock') }} + - name: Cache cargo build + uses: actions/cache@v2 + with: + path: target + key: ${{ runner.os }}-cargo-build-target-${{ hashFiles('**/Cargo.lock') }} - name: Run tests run: cargo test --verbose - - name: Check formatting - run: cargo fmt -- --check + + clippy: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: stable + components: clippy + override: true + - name: Cache cargo registry + uses: actions/cache@v2 + with: + path: ~/.cargo/registry + key: ${{ runner.os }}-cargo-registry-${{ hashFiles('**/Cargo.lock') }} + - name: Cache cargo index + uses: actions/cache@v2 + with: + path: ~/.cargo/git + key: ${{ runner.os }}-cargo-git-${{ hashFiles('**/Cargo.lock') }} + - name: Cache cargo build + uses: actions/cache@v2 + with: + path: target + key: ${{ runner.os }}-cargo-build-target-${{ hashFiles('**/Cargo.lock') }} - name: Run Clippy run: cargo clippy -- -D warnings + + fmt: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: stable + components: rustfmt + override: true + - name: Cache cargo registry + uses: actions/cache@v2 + with: + path: ~/.cargo/registry + key: ${{ runner.os }}-cargo-registry-${{ hashFiles('**/Cargo.lock') }} + - name: Cache cargo index + uses: actions/cache@v2 + with: + path: ~/.cargo/git + key: ${{ runner.os }}-cargo-git-${{ hashFiles('**/Cargo.lock') }} + - name: Cache cargo build + uses: actions/cache@v2 + with: + path: target + key: ${{ runner.os }}-cargo-build-target-${{ hashFiles('**/Cargo.lock') }} + - name: Run fmt + run: cargo fmt -- --check diff --git a/src/bin/parser_test.rs b/src/bin/parser_test.rs index 2706e02d8..c5d233f1f 100755 --- a/src/bin/parser_test.rs +++ b/src/bin/parser_test.rs @@ -158,7 +158,7 @@ fn run_tree_test(test_idx: usize, test: &Test, results: &mut TestResults) { let mut parser = Html5Parser::new(&mut is); let (document, _parse_errors) = parser.parse(); - match_document_tree(&document, &test.document); + match_document_tree(document, &test.document); // if parse_errors.len() != test.errors.len() { // println!("❌ Unexpected errors found (wanted {}, got {}): ", test.errors.len(), parse_errors.len()); @@ -349,5 +349,6 @@ fn match_error(got_err: &Error, expected_err: &Error) -> ErrorResult { "⚠️ Unexpected error position '{}' at {}:{} (got: {}:{})", expected_err.code, expected_err.line, expected_err.col, got_err.line, got_err.col ); - return ErrorResult::PositionFailure; + + ErrorResult::PositionFailure } diff --git a/src/bin/tokenizer_test.rs b/src/bin/tokenizer_test.rs index dae82f6f3..0a6601482 100755 --- a/src/bin/tokenizer_test.rs +++ b/src/bin/tokenizer_test.rs @@ -174,7 +174,7 @@ fn run_token_test(test: &Test, results: &mut TestResults) { // Check error messages for error in &test.errors { - match match_error(&tokenizer, &error) { + match match_error(&tokenizer, error) { ErrorResult::Failure => { results.assertions += 1; results.failed += 1; @@ -223,12 +223,10 @@ fn match_error(tokenizer: &Tokenizer, expected_err: &Error) -> ErrorResult { // it's not always correct, it might be a off-by-one position. let mut result = ErrorResult::Failure; for got_err in tokenizer.get_error_logger().get_errors() { - if got_err.message == expected_err.code { - if got_err.line as i64 != expected_err.line || got_err.col as i64 != expected_err.col { - // println!("❌ Expected error '{}' at {}:{}", expected_err.code, expected_err.line, expected_err.col); - result = ErrorResult::PositionFailure; - break; - } + if got_err.message == expected_err.code && (got_err.line as i64 != expected_err.line || got_err.col as i64 != expected_err.col) { + // println!("❌ Expected error '{}' at {}:{}", expected_err.code, expected_err.line, expected_err.col); + result = ErrorResult::PositionFailure; + break; } } @@ -342,7 +340,7 @@ fn check_match_starttag( return Err(()); } - if expected_attrs.is_none() && attributes.len() == 0 { + if expected_attrs.is_none() && attributes.is_empty() { // No attributes to check return Ok(()); } @@ -452,7 +450,7 @@ fn check_match_doctype( let expected_sys = expected.get(3).unwrap().as_str(); let expected_quirk = expected.get(4).unwrap().as_bool(); - if expected_name.is_none() && !name.is_none() { + if expected_name.is_none() && name.is_some() { println!( "❌ Incorrect doctype (no name expected, but got '{}')", name.unwrap() diff --git a/src/html5_parser/dom/dom.rs b/src/html5_parser/dom/dom.rs deleted file mode 100755 index 55d4b5197..000000000 --- a/src/html5_parser/dom/dom.rs +++ /dev/null @@ -1,65 +0,0 @@ -use std::collections::HashMap; - -pub struct ShadowRoot { - pub mode: ShadowRootMode, - pub delegates_focus: bool, - pub slot_assignment: SlotAssignmentMode, - pub host: Box, - // pub onslotchange: Option, -} - -pub enum SlotAssignmentMode { - Manual, - Named, -} - -pub enum ShadowRootMode { - Open, - Closed, -} - -pub struct Element { - pub namespace_uri: Option, - pub prefix: Option, - pub local_name: String, - pub tag_name: String, - pub id: String, - pub class_name: String, - pub class_list: Vec, - pub slot: String, - pub attributes: HashMap, - pub shadow_root: Option>, -} - -pub struct HtmlElement { - // Element fields - pub namespace_uri: Option, - pub prefix: Option, - pub local_name: String, - pub tag_name: String, - pub id: String, - pub class_name: String, - pub class_list: Vec, - pub slot: String, - pub attributes: HashMap, - pub shadow_root: Option, - - // HTML Element - pub title: String, - pub lang: String, - pub translate: bool, - pub dir: String, - - pub hidden: Option, - pub insert: bool, - pub access_key: String, - pub access_key_label: String, - pub draggable: bool, - pub spellcheck: bool, - pub autocapitalize: String, - - pub inner_text: String, - pub outer_text: String, - - pub popover: Option, -} diff --git a/src/html5_parser/dom/mod.rs b/src/html5_parser/dom/mod.rs index 8d04d6c57..55d4b5197 100755 --- a/src/html5_parser/dom/mod.rs +++ b/src/html5_parser/dom/mod.rs @@ -1 +1,65 @@ -pub mod dom; +use std::collections::HashMap; + +pub struct ShadowRoot { + pub mode: ShadowRootMode, + pub delegates_focus: bool, + pub slot_assignment: SlotAssignmentMode, + pub host: Box, + // pub onslotchange: Option, +} + +pub enum SlotAssignmentMode { + Manual, + Named, +} + +pub enum ShadowRootMode { + Open, + Closed, +} + +pub struct Element { + pub namespace_uri: Option, + pub prefix: Option, + pub local_name: String, + pub tag_name: String, + pub id: String, + pub class_name: String, + pub class_list: Vec, + pub slot: String, + pub attributes: HashMap, + pub shadow_root: Option>, +} + +pub struct HtmlElement { + // Element fields + pub namespace_uri: Option, + pub prefix: Option, + pub local_name: String, + pub tag_name: String, + pub id: String, + pub class_name: String, + pub class_list: Vec, + pub slot: String, + pub attributes: HashMap, + pub shadow_root: Option, + + // HTML Element + pub title: String, + pub lang: String, + pub translate: bool, + pub dir: String, + + pub hidden: Option, + pub insert: bool, + pub access_key: String, + pub access_key_label: String, + pub draggable: bool, + pub spellcheck: bool, + pub autocapitalize: String, + + pub inner_text: String, + pub outer_text: String, + + pub popover: Option, +} diff --git a/src/html5_parser/error_logger.rs b/src/html5_parser/error_logger.rs index 219a0757f..0d35029d3 100755 --- a/src/html5_parser/error_logger.rs +++ b/src/html5_parser/error_logger.rs @@ -173,6 +173,12 @@ impl ErrorLogger { } } +impl Default for ErrorLogger { + fn default() -> Self { + Self::new() + } +} + impl ErrorLogger { // Returns a cloned instance of the errors pub fn get_errors(&self) -> Vec { @@ -183,7 +189,7 @@ impl ErrorLogger { pub fn add_error(&mut self, pos: Position, message: &str) { // Check if the error already exists, if so, don't add it again for err in &self.errors { - if err.line == pos.line && err.col == pos.col && err.message == message.to_string() { + if err.line == pos.line && err.col == pos.col && err.message == *message { return; } } diff --git a/src/html5_parser/input_stream.rs b/src/html5_parser/input_stream.rs index b8f229cb0..f3825df4f 100644 --- a/src/html5_parser/input_stream.rs +++ b/src/html5_parser/input_stream.rs @@ -1,6 +1,6 @@ use crate::html5_parser::tokenizer::{CHAR_CR, CHAR_LF}; use std::fs::File; -use std::io; +use std::{fmt, io}; use std::io::Read; // Encoding defines the way the buffer stream is read, as what defines a "character". @@ -32,10 +32,11 @@ impl Position { pub fn new(offset: usize, line: usize, col: usize) -> Self { Position { offset, line, col } } +} - // Returns a string representation of the position - pub fn to_string(&self) -> String { - format!("{}:{}:{}", self.offset, self.line, self.col) +impl fmt::Display for Position { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}:{}:{}", self.offset, self.line, self.col) } } @@ -48,24 +49,15 @@ pub enum Element { impl Element { pub fn is_eof(&self) -> bool { - match self { - Element::Eof => true, - _ => false, - } + matches!(self, Element::Eof) } pub fn is_utf8(&self) -> bool { - match self { - Element::Utf8(_) => true, - _ => false, - } + matches!(self, Element::Utf8(_)) } pub fn is_surrogate(&self) -> bool { - match self { - Element::Surrogate(_) => true, - _ => false, - } + matches!(self, Element::Surrogate(_)) } pub fn u32(&self) -> u32 { @@ -83,12 +75,14 @@ impl Element { Element::Eof => 0x0000 as char, } } +} - pub fn to_string(&self) -> String { +impl fmt::Display for Element { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { - Element::Utf8(ch) => ch.to_string(), - Element::Surrogate(surrogate) => format!("U+{:04X}", surrogate), // Or some other representation - Element::Eof => "EOF".to_string(), // Or an empty string + Element::Utf8(ch) => write!(f, "{}", ch), + Element::Surrogate(surrogate) => write!(f, "U+{:04X}", surrogate), + Element::Eof => write!(f, "EOF"), } } } @@ -114,6 +108,12 @@ pub enum SeekMode { SeekEnd, // Seek (backwards) from the end of the stream } +impl Default for InputStream { + fn default() -> Self { + Self::new() + } +} + impl InputStream { // Create a new default empty input stream pub fn new() -> Self { @@ -167,7 +167,7 @@ impl InputStream { } SeekMode::SeekCur => { if offset.is_negative() { - self.position.offset - offset.abs() as usize + self.position.offset - offset.unsigned_abs() } else { self.position.offset + offset as usize } @@ -177,7 +177,7 @@ impl InputStream { if offset.abs() > self.length as isize { 0 } else { - self.length - offset.abs() as usize + self.length - offset.unsigned_abs() } } }; @@ -219,11 +219,11 @@ impl InputStream { } // Set position values - return Position { + Position { offset: abs_offset, line: last_line + 1, col: abs_offset - last_offset + 1, - }; + } } pub fn tell(&self) -> usize { @@ -254,7 +254,7 @@ impl InputStream { unsafe { str_buf = std::str::from_utf8_unchecked(&self.u8_buffer) .replace("\u{000D}\u{000A}", "\u{000A}") - .replace("\u{000D}", "\u{000A}"); + .replace('\u{000D}', "\u{000A}"); } // Convert the utf8 string into characters so we can use easy indexing @@ -308,7 +308,7 @@ impl InputStream { } } - return result; + result } // Populates the current buffer with the contents of given file f @@ -340,25 +340,24 @@ impl InputStream { } // If we still can move forward in the stream, move forwards - return if self.position.offset < self.length { - let c = self.buffer[self.position.offset].clone(); + if self.position.offset < self.length { + let c = self.buffer[self.position.offset]; self.seek(SeekMode::SeekCur, 1); - c - } else { - // otherwise, we have reached the end of the stream - self.has_read_eof = true; + return c; + } - self.seek(SeekMode::SeekEnd, 0); + // otherwise, we have reached the end of the stream + self.has_read_eof = true; - // // This is a kind of dummy position so the end of the files are read correctly. - // self.position = Position{ - // offset: self.position.offset, - // line: self.position.line, - // col: self.position.col, - // }; + self.seek(SeekMode::SeekEnd, 0); - Element::Eof - }; + // // This is a kind of dummy position so the end of the files are read correctly. + // self.position = Position{ + // offset: self.position.offset, + // line: self.position.line, + // col: self.position.col, + // }; + Element::Eof } pub(crate) fn unread(&mut self) { @@ -404,7 +403,7 @@ impl InputStream { } // Check the next char to see if it's a '\n' - let c = self.buffer[last_offset].clone(); + let c = self.buffer[last_offset]; if c == Element::Utf8('\n') { self.line_offsets.push(last_offset + 1); } @@ -421,25 +420,25 @@ mod test { #[test] fn test_stream() { let mut is = InputStream::new(); - assert_eq!(is.eof(), true); + assert_eq!(is.eof()); is.read_from_str("foo", Some(Encoding::ASCII)); assert_eq!(is.length, 3); - assert_eq!(is.eof(), false); + assert_ne!(is.eof()); assert_eq!(is.chars_left(), 3); is.read_from_str("f👽f", Some(Encoding::UTF8)); assert_eq!(is.length, 3); - assert_eq!(is.eof(), false); + assert_ne!(is.eof()); assert_eq!(is.chars_left(), 3); assert_eq!(is.read_char().utf8(), 'f'); assert_eq!(is.chars_left(), 2); - assert_eq!(is.eof(), false); + assert_ne!(is.eof()); assert_eq!(is.read_char().utf8(), '👽'); - assert_eq!(is.eof(), false); + assert_ne!(is.eof()); assert_eq!(is.chars_left(), 1); assert_eq!(is.read_char().utf8(), 'f'); - assert_eq!(is.eof(), true); + assert!(is.eof()); assert_eq!(is.chars_left(), 0); is.reset(); @@ -451,7 +450,7 @@ mod test { assert_eq!(is.read_char().utf8(), '?'); assert_eq!(is.read_char().utf8(), '?'); assert_eq!(is.read_char().utf8(), 'f'); - assert_eq!(is.read_char().is_eof(), true); + assert!(is.read_char().is_eof()); is.unread(); // unread eof is.unread(); // unread 'f' @@ -476,21 +475,21 @@ mod test { assert_eq!(is.read_char().utf8(), 'c'); is.unread(); assert_eq!(is.read_char().utf8(), 'c'); - assert_eq!(is.read_char().is_eof(), true); + assert!(is.read_char().is_eof()); is.unread(); - assert_eq!(is.read_char().is_eof(), true); + assert!(is.read_char().is_eof()); } #[test] fn test_certainty() { let mut is = InputStream::new(); - assert_eq!(is.is_certain_encoding(), false); + assert_ne!(is.is_certain_encoding()); is.set_confidence(Confidence::Certain); - assert_eq!(is.is_certain_encoding(), true); + assert!(is.is_certain_encoding()); is.set_confidence(Confidence::Tentative); - assert_eq!(is.is_certain_encoding(), false); + assert_ne!(is.is_certain_encoding()); } #[test] diff --git a/src/html5_parser/node.rs b/src/html5_parser/node.rs index b186764ae..0be8b239f 100644 --- a/src/html5_parser/node.rs +++ b/src/html5_parser/node.rs @@ -76,7 +76,7 @@ impl Node { children: vec![], data: NodeData::Element { name: name.to_string(), - attributes: attributes, + attributes, }, name: name.to_string(), namespace: Some(namespace.into()), @@ -113,20 +113,14 @@ impl Node { // Returns true if the given node is "special" node based on the namespace and name pub fn is_special(&self) -> bool { - if self.namespace == Some(HTML_NAMESPACE.into()) { - if SPECIAL_HTML_ELEMENTS.contains(&self.name.as_str()) { - return true; - } + if self.namespace == Some(HTML_NAMESPACE.into()) && SPECIAL_HTML_ELEMENTS.contains(&self.name.as_str()) { + return true; } - if self.namespace == Some(MATHML_NAMESPACE.into()) { - if SPECIAL_MATHML_ELEMENTS.contains(&self.name.as_str()) { - return true; - } + if self.namespace == Some(MATHML_NAMESPACE.into()) && SPECIAL_MATHML_ELEMENTS.contains(&self.name.as_str()) { + return true; } - if self.namespace == Some(SVG_NAMESPACE.into()) { - if SPECIAL_SVG_ELEMENTS.contains(&self.name.as_str()) { - return true; - } + if self.namespace == Some(SVG_NAMESPACE.into()) && SPECIAL_SVG_ELEMENTS.contains(&self.name.as_str()) { + return true; } false @@ -395,7 +389,7 @@ mod test { node.data, NodeData::Element { name: "div".to_string(), - attributes: attributes, + attributes, } ); } @@ -409,7 +403,7 @@ mod test { node.data, NodeData::Element { name: "div".to_string(), - attributes: attributes, + attributes, } ); } diff --git a/src/html5_parser/parser/adoption_agency.rs b/src/html5_parser/parser/adoption_agency.rs index 4d89f7e9c..172e976ea 100755 --- a/src/html5_parser/parser/adoption_agency.rs +++ b/src/html5_parser/parser/adoption_agency.rs @@ -50,20 +50,17 @@ impl<'a> Html5Parser<'a> { ActiveElement::Marker => break, ActiveElement::Node(node_id) => { let temp_node = self.document.get_node_by_id(node_id).unwrap().clone(); - match temp_node.data { - NodeData::Element { - ref name, - ref attributes, - .. - } => { - if name == subject && attributes.len() > 0 { - formatting_element_idx = idx; - formatting_element_id = node_id; - formatting_element_name = String::from(name); - formatting_element_attributes = attributes.clone(); - } + if let NodeData::Element { + ref name, + ref attributes, + .. + } = temp_node.data { + if name == subject && !attributes.is_empty() { + formatting_element_idx = idx; + formatting_element_id = node_id; + formatting_element_name = String::from(name); + formatting_element_attributes = attributes.clone(); } - _ => {} } } } @@ -215,7 +212,7 @@ impl<'a> Html5Parser<'a> { ); // Step 4.16 - if furthest_block_children.len() > 0 { + if !furthest_block_children.is_empty() { for &child in furthest_block_children.iter() { self.document.append(child, new_element.id) } diff --git a/src/html5_parser/parser/document.rs b/src/html5_parser/parser/document.rs index 6f1d49431..da94d5077 100755 --- a/src/html5_parser/parser/document.rs +++ b/src/html5_parser/parser/document.rs @@ -15,13 +15,23 @@ pub struct Document { pub quirks_mode: QuirksMode, // Quirks mode } +impl Default for Document { + fn default() -> Self { + Self { + arena: NodeArena::new(), + doctype: DocumentType::HTML, + quirks_mode: QuirksMode::NoQuirks, + } + } +} + impl Document { // Creates a new document pub fn new() -> Self { let mut arena = NodeArena::new(); arena.add_node(Node::new_document()); Self { - arena: arena, + arena, doctype: DocumentType::HTML, quirks_mode: QuirksMode::NoQuirks, } @@ -86,7 +96,7 @@ impl Document { for child_id in &node.children { if let Some(child) = self.arena.get_node(*child_id) { - self.display_tree(&child, indent + 2, f)?; + self.display_tree(child, indent + 2, f)?; } } diff --git a/src/html5_parser/parser/mod.rs b/src/html5_parser/parser/mod.rs index 6b0b45825..cfc0196a7 100644 --- a/src/html5_parser/parser/mod.rs +++ b/src/html5_parser/parser/mod.rs @@ -1626,16 +1626,16 @@ impl<'a> Html5Parser<'a> { Token::StartTagToken { name, attributes, .. } => { - return Node::new_element(name, attributes.clone(), namespace); + Node::new_element(name, attributes.clone(), namespace) } Token::EndTagToken { name, .. } => { - return Node::new_element(name, HashMap::new(), namespace); + Node::new_element(name, HashMap::new(), namespace) } Token::CommentToken { value } => { - return Node::new_comment(value); + Node::new_comment(value) } Token::TextToken { value } => { - return Node::new_text(format!("{}", value).as_str()); + Node::new_text(value.to_string().as_str()) } Token::EofToken => { panic!("EOF token not allowed"); @@ -1669,7 +1669,7 @@ impl<'a> Html5Parser<'a> { // This function will pop elements off the stack until it reaches the first element that matches // our condition (which can be changed with the except and thoroughly parameters) fn generate_all_implied_end_tags(&mut self, except: Option<&str>, thoroughly: bool) { - while self.open_elements.len() > 0 { + while !self.open_elements.is_empty() { let val = current_node!(self).name.clone(); if except.is_some() && except.unwrap() == val { @@ -1764,7 +1764,7 @@ impl<'a> Html5Parser<'a> { return; } if node.name == "template" { - self.insertion_mode = self.template_insertion_mode.last().unwrap().clone(); + self.insertion_mode = *self.template_insertion_mode.last().unwrap(); return; } if node.name == "head" && !last { @@ -1798,7 +1798,7 @@ impl<'a> Html5Parser<'a> { // Pop all elements back to a table context fn clear_stack_back_to_table_context(&mut self) { - while self.open_elements.len() > 0 { + while !self.open_elements.is_empty() { if ["tbody", "tfoot", "thead", "template", "html"] .contains(¤t_node!(self).name.as_str()) { @@ -1810,7 +1810,7 @@ impl<'a> Html5Parser<'a> { // Pop all elements back to a table row context fn clear_stack_back_to_table_row_context(&mut self) { - while self.open_elements.len() > 0 { + while !self.open_elements.is_empty() { let val = current_node!(self).name.clone(); if ["tr", "template", "html"].contains(&val.as_str()) { return; @@ -1975,7 +1975,7 @@ impl<'a> Html5Parser<'a> { return; } - if self.frameset_ok == false { + if !self.frameset_ok { // ignore token return; } @@ -2137,7 +2137,7 @@ impl<'a> Html5Parser<'a> { } Token::EndTagToken { name, .. } if name == "form" => { if !open_elements_has!(self, "template") { - let node_id = self.form_element.clone(); + let node_id = self.form_element; self.form_element = None; if node_id.is_none() || !self.in_scope(name, Scope::Regular) { @@ -2964,57 +2964,48 @@ impl<'a> Html5Parser<'a> { // Adjusts attributes names in the given token for SVG fn adjust_svg_attributes(&self, token: &mut Token) { - match token { - Token::StartTagToken { attributes, .. } => { - let mut new_attributes = HashMap::new(); - for (name, value) in attributes.iter() { - if SVG_ADJUSTMENTS.contains_key(name) { - let new_name = SVG_ADJUSTMENTS.get(name).unwrap(); - new_attributes.insert(new_name.to_string(), value.clone()); - } else { - new_attributes.insert(name.clone(), value.clone()); - } + if let Token::StartTagToken { attributes, .. } = token { + let mut new_attributes = HashMap::new(); + for (name, value) in attributes.iter() { + if SVG_ADJUSTMENTS.contains_key(name) { + let new_name = SVG_ADJUSTMENTS.get(name).unwrap(); + new_attributes.insert(new_name.to_string(), value.clone()); + } else { + new_attributes.insert(name.clone(), value.clone()); } - *attributes = new_attributes; } - _ => {} + *attributes = new_attributes; } } // Adjust attribute names in the given token for MathML fn adjust_mathml_attributes(&self, token: &mut Token) { - match token { - Token::StartTagToken { attributes, .. } => { - let mut new_attributes = HashMap::new(); - for (name, value) in attributes.iter() { - if MATHML_ADJUSTMENTS.contains_key(name) { - let new_name = SVG_ADJUSTMENTS.get(name).unwrap(); - new_attributes.insert(new_name.to_string(), value.clone()); - } else { - new_attributes.insert(name.clone(), value.clone()); - } + if let Token::StartTagToken { attributes, .. } = token { + let mut new_attributes = HashMap::new(); + for (name, value) in attributes.iter() { + if MATHML_ADJUSTMENTS.contains_key(name) { + let new_name = SVG_ADJUSTMENTS.get(name).unwrap(); + new_attributes.insert(new_name.to_string(), value.clone()); + } else { + new_attributes.insert(name.clone(), value.clone()); } - *attributes = new_attributes; } - _ => {} + *attributes = new_attributes; } } fn adjust_foreign_attributes(&self, token: &mut Token) { - match token { - Token::StartTagToken { attributes, .. } => { - let mut new_attributes = HashMap::new(); - for (name, value) in attributes.iter() { - if XML_ADJUSTMENTS.contains_key(name) { - let new_name = SVG_ADJUSTMENTS.get(name).unwrap(); - new_attributes.insert(new_name.to_string(), value.clone()); - } else { - new_attributes.insert(name.clone(), value.clone()); - } + if let Token::StartTagToken { attributes, .. } = token { + let mut new_attributes = HashMap::new(); + for (name, value) in attributes.iter() { + if XML_ADJUSTMENTS.contains_key(name) { + let new_name = SVG_ADJUSTMENTS.get(name).unwrap(); + new_attributes.insert(new_name.to_string(), value.clone()); + } else { + new_attributes.insert(name.clone(), value.clone()); } - *attributes = new_attributes; } - _ => {} + *attributes = new_attributes; } } @@ -3043,7 +3034,7 @@ impl<'a> Html5Parser<'a> { self.open_elements.push(node_id); // return element - return node_id; + node_id } fn parse_raw_data(&mut self) { @@ -3093,6 +3084,6 @@ impl<'a> Html5Parser<'a> { // be the content } - return adjusted_insertion_location; + adjusted_insertion_location } } diff --git a/src/html5_parser/parser/quirks.rs b/src/html5_parser/parser/quirks.rs index 0c706d4ec..239039649 100644 --- a/src/html5_parser/parser/quirks.rs +++ b/src/html5_parser/parser/quirks.rs @@ -20,65 +20,59 @@ impl<'a> Html5Parser<'a> { return QuirksMode::Quirks; } - if pub_identifer.is_some() { - let pub_id = pub_identifer.unwrap().to_lowercase(); + if let Some(value) = pub_identifer { + let pub_id = value.to_lowercase(); if QUIRKS_PUB_IDENTIFIER_EQ.contains(&pub_id.as_str()) { return QuirksMode::Quirks; } if QUIRKS_PUB_IDENTIFIER_PREFIX .iter() - .any(|&prefix| pub_id.as_str().starts_with(&prefix)) + .any(|&prefix| pub_id.as_str().starts_with(prefix)) { return QuirksMode::Quirks; } - if sys_identifier.is_none() { - if QUIRKS_PUB_IDENTIFIER_PREFIX_MISSING_SYS + if sys_identifier.is_none() && QUIRKS_PUB_IDENTIFIER_PREFIX_MISSING_SYS .iter() - .any(|&prefix| pub_id.as_str().starts_with(&prefix)) - { - return QuirksMode::Quirks; - } + .any(|&prefix| pub_id.as_str().starts_with(prefix)) { + return QuirksMode::Quirks; } if LIMITED_QUIRKS_PUB_IDENTIFIER_PREFIX .iter() - .any(|&prefix| pub_id.as_str().starts_with(&prefix)) + .any(|&prefix| pub_id.as_str().starts_with(prefix)) { return QuirksMode::LimitedQuirks; } - if sys_identifier.is_some() { - if LIMITED_QUIRKS_PUB_IDENTIFIER_PREFIX + if sys_identifier.is_some() && LIMITED_QUIRKS_PUB_IDENTIFIER_PREFIX .iter() - .any(|&prefix| pub_id.as_str().starts_with(&prefix)) - { - return QuirksMode::LimitedQuirks; - } + .any(|&prefix| pub_id.as_str().starts_with(prefix)) { + return QuirksMode::LimitedQuirks; } } - if sys_identifier.is_some() { - let sys_id = sys_identifier.unwrap().to_lowercase(); + if let Some(value) = sys_identifier { + let sys_id = value.to_lowercase(); if QUIRKS_SYS_IDENTIFIER_EQ .iter() - .any(|&prefix| sys_id.as_str().starts_with(&prefix)) + .any(|&prefix| sys_id.as_str().starts_with(prefix)) { return QuirksMode::Quirks; } } - return QuirksMode::NoQuirks; + QuirksMode::NoQuirks } } -static QUIRKS_PUB_IDENTIFIER_EQ: &'static [&'static str] = &[ +static QUIRKS_PUB_IDENTIFIER_EQ: &[&str] = &[ "-//W3O//DTD W3 HTML Strict 3.0//EN//", "-/W3C/DTD HTML 4.0 Transitional/EN", "HTML", ]; -static QUIRKS_PUB_IDENTIFIER_PREFIX: &'static [&'static str] = &[ +static QUIRKS_PUB_IDENTIFIER_PREFIX: &[&str] = &[ "+//Silmaril//dtd html Pro v0r11 19970101//", "-//AS//DTD HTML 3.0 asWedit + extensions//", "-//AdvaSoft Ltd//DTD HTML 3.0 asWedit + extensions//", @@ -136,20 +130,20 @@ static QUIRKS_PUB_IDENTIFIER_PREFIX: &'static [&'static str] = &[ "-//WebTechs//DTD Mozilla HTML//", ]; -static QUIRKS_PUB_IDENTIFIER_PREFIX_MISSING_SYS: &'static [&'static str] = &[ +static QUIRKS_PUB_IDENTIFIER_PREFIX_MISSING_SYS: &[&str] = &[ "-//W3C//DTD HTML 4.01 Frameset//", "-//W3C//DTD HTML 4.01 Transitional//", ]; -static QUIRKS_SYS_IDENTIFIER_EQ: &'static [&'static str] = +static QUIRKS_SYS_IDENTIFIER_EQ: &[&str] = &["http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"]; -static LIMITED_QUIRKS_PUB_IDENTIFIER_PREFIX: &'static [&'static str] = &[ +static LIMITED_QUIRKS_PUB_IDENTIFIER_PREFIX: &[&str] = &[ "-//W3C//DTD XHTML 1.0 Frameset//", "-//W3C//DTD XHTML 1.0 Transitional//", ]; -static LIMITED_QUIRKS_PUB_IDENTIFIER_PREFIX_NOT_MISSING_SYS: &'static [&'static str] = &[ +static LIMITED_QUIRKS_PUB_IDENTIFIER_PREFIX_NOT_MISSING_SYS: &[&str] = &[ "-//W3C//DTD HTML 4.01 Frameset//", "-//W3C//DTD HTML 4.01 Transitional//", ]; diff --git a/src/html5_parser/tokenizer/character_reference.rs b/src/html5_parser/tokenizer/character_reference.rs index 4f84a1374..8dadce2a7 100644 --- a/src/html5_parser/tokenizer/character_reference.rs +++ b/src/html5_parser/tokenizer/character_reference.rs @@ -10,15 +10,15 @@ use lazy_static::lazy_static; // Different states for the character references pub enum CcrState { - CharacterReferenceState, - NamedCharacterReferenceState, - AmbiguousAmpersandState, - NumericCharacterReferenceState, - HexadecimalCharacterReferenceStartState, - DecimalCharacterReferenceStartState, - HexadecimalCharacterReferenceState, - DecimalCharacterReferenceState, - NumericalCharacterReferenceEndState, + CharacterReference, + NamedCharacterReference, + AmbiguousAmpersand, + NumericCharacterReference, + HexadecimalCharacterReferenceStart, + DecimalCharacterReferenceStart, + HexadecimalCharacterReference, + DecimalCharacterReference, + NumericalCharacterReferenceEnd, } macro_rules! consume_temp_buffer { @@ -44,12 +44,12 @@ impl<'a> Tokenizer<'a> { _additional_allowed_char: Option, as_attribute: bool, ) { - let mut ccr_state = CcrState::CharacterReferenceState; + let mut ccr_state = CcrState::CharacterReference; let mut char_ref_code: Option = Some(0); loop { match ccr_state { - CcrState::CharacterReferenceState => { + CcrState::CharacterReference => { self.temporary_buffer = vec!['&']; let c = read_char!(self); @@ -62,11 +62,11 @@ impl<'a> Tokenizer<'a> { | Element::Utf8('a'..='z') | Element::Utf8('0'..='9') => { self.stream.unread(); - ccr_state = CcrState::NamedCharacterReferenceState; + ccr_state = CcrState::NamedCharacterReference; } Element::Utf8('#') => { self.temporary_buffer.push(c.utf8()); - ccr_state = CcrState::NumericCharacterReferenceState; + ccr_state = CcrState::NumericCharacterReference; } _ => { consume_temp_buffer!(self, as_attribute); @@ -76,12 +76,12 @@ impl<'a> Tokenizer<'a> { } } } - CcrState::NamedCharacterReferenceState => { + CcrState::NamedCharacterReference => { if let Some(entity) = self.find_entity() { self.stream.seek(SeekCur, entity.len() as isize); let c = self.stream.look_ahead(0); if as_attribute - && entity.chars().last().unwrap() != ';' + && !entity.ends_with(';') && c.is_utf8() && (c.utf8() == '=' || c.utf8().is_ascii_alphanumeric()) { @@ -106,7 +106,7 @@ impl<'a> Tokenizer<'a> { } self.temporary_buffer.clear(); - if entity.chars().last().unwrap() != ';' { + if !entity.ends_with(';') { // We need to return the position where we expected the ';' self.stream.read_char(); // We can't use skip, as this might interfere with EOF stuff (fix it) self.parse_error(ParserError::MissingSemicolonAfterCharacterReference); @@ -117,9 +117,9 @@ impl<'a> Tokenizer<'a> { } consume_temp_buffer!(self, as_attribute); - ccr_state = CcrState::AmbiguousAmpersandState; + ccr_state = CcrState::AmbiguousAmpersand; } - CcrState::AmbiguousAmpersandState => { + CcrState::AmbiguousAmpersand => { let c = read_char!(self); match c { // Element::Eof => return, @@ -143,7 +143,7 @@ impl<'a> Tokenizer<'a> { } } } - CcrState::NumericCharacterReferenceState => { + CcrState::NumericCharacterReference => { char_ref_code = Some(0); let c = read_char!(self); @@ -151,15 +151,15 @@ impl<'a> Tokenizer<'a> { // Element::Eof => ccr_state = CcrState::NumericalCharacterReferenceEndState, Element::Utf8('X') | Element::Utf8('x') => { self.temporary_buffer.push(c.utf8()); - ccr_state = CcrState::HexadecimalCharacterReferenceStartState; + ccr_state = CcrState::HexadecimalCharacterReferenceStart; } _ => { self.stream.unread(); - ccr_state = CcrState::DecimalCharacterReferenceStartState; + ccr_state = CcrState::DecimalCharacterReferenceStart; } } } - CcrState::HexadecimalCharacterReferenceStartState => { + CcrState::HexadecimalCharacterReferenceStart => { let c = read_char!(self); match c { // Element::Eof => ccr_state = CcrState::NumericalCharacterReferenceEndState, @@ -167,7 +167,7 @@ impl<'a> Tokenizer<'a> { | Element::Utf8('A'..='F') | Element::Utf8('a'..='f') => { self.stream.unread(); - ccr_state = CcrState::HexadecimalCharacterReferenceState + ccr_state = CcrState::HexadecimalCharacterReference } _ => { self.parse_error( @@ -180,12 +180,12 @@ impl<'a> Tokenizer<'a> { } } } - CcrState::DecimalCharacterReferenceStartState => { + CcrState::DecimalCharacterReferenceStart => { let c = read_char!(self); match c { Element::Utf8('0'..='9') => { self.stream.unread(); - ccr_state = CcrState::DecimalCharacterReferenceState; + ccr_state = CcrState::DecimalCharacterReference; } _ => { self.parse_error( @@ -198,7 +198,7 @@ impl<'a> Tokenizer<'a> { } } } - CcrState::HexadecimalCharacterReferenceState => { + CcrState::HexadecimalCharacterReference => { let c = read_char!(self); match c { // Element::Eof => ccr_state = CcrState::NumericalCharacterReferenceEndState, @@ -227,16 +227,16 @@ impl<'a> Tokenizer<'a> { } } Element::Utf8(';') => { - ccr_state = CcrState::NumericalCharacterReferenceEndState; + ccr_state = CcrState::NumericalCharacterReferenceEnd; } _ => { self.parse_error(ParserError::MissingSemicolonAfterCharacterReference); self.stream.unread(); - ccr_state = CcrState::NumericalCharacterReferenceEndState; + ccr_state = CcrState::NumericalCharacterReferenceEnd; } } } - CcrState::DecimalCharacterReferenceState => { + CcrState::DecimalCharacterReference => { let c = read_char!(self); match c { // Element::Eof => ccr_state = CcrState::NumericalCharacterReferenceEndState, @@ -249,16 +249,16 @@ impl<'a> Tokenizer<'a> { } } Element::Utf8(';') => { - ccr_state = CcrState::NumericalCharacterReferenceEndState; + ccr_state = CcrState::NumericalCharacterReferenceEnd; } _ => { self.parse_error(ParserError::MissingSemicolonAfterCharacterReference); self.stream.unread(); - ccr_state = CcrState::NumericalCharacterReferenceEndState; + ccr_state = CcrState::NumericalCharacterReferenceEnd; } } } - CcrState::NumericalCharacterReferenceEndState => { + CcrState::NumericalCharacterReferenceEnd => { let overflow = char_ref_code.is_none(); let mut char_ref_code = char_ref_code.unwrap_or(0); @@ -310,7 +310,7 @@ impl<'a> Tokenizer<'a> { } pub(crate) fn is_surrogate(&self, num: u32) -> bool { - num >= 0xD800 && num <= 0xDFFF + (0xD800..=0xDFFF).contains(&num) } pub(crate) fn is_noncharacter(&self, num: u32) -> bool { @@ -330,7 +330,7 @@ impl<'a> Tokenizer<'a> { return false; } - return (0x0001..=0x001F).contains(&num) || (0x007F..=0x009F).contains(&num); + (0x0001..=0x001F).contains(&num) || (0x007F..=0x009F).contains(&num) } // Finds the longest entity from the current position in the stream. Returns the entity diff --git a/src/html5_parser/tokenizer/mod.rs b/src/html5_parser/tokenizer/mod.rs index 5f2dae05f..61d0401bf 100644 --- a/src/html5_parser/tokenizer/mod.rs +++ b/src/html5_parser/tokenizer/mod.rs @@ -91,10 +91,8 @@ macro_rules! set_public_identifier { macro_rules! add_public_identifier { ($self:expr, $c:expr) => { match &mut $self.current_token { - Some(Token::DocTypeToken { pub_identifier, .. }) => { - if let Some(pid) = pub_identifier { - pid.push($c); - } + Some(Token::DocTypeToken { pub_identifier: Some(pid), .. }) => { + pid.push($c); } _ => {} } @@ -111,13 +109,12 @@ macro_rules! set_system_identifier { } }; } + macro_rules! add_system_identifier { ($self:expr, $c:expr) => { match &mut $self.current_token { - Some(Token::DocTypeToken { sys_identifier, .. }) => { - if let Some(sid) = sys_identifier { - sid.push($c); - } + Some(Token::DocTypeToken { sys_identifier: Some(sid), .. }) => { + sid.push($c); } _ => {} } @@ -210,23 +207,23 @@ impl<'a> Tokenizer<'a> { current_attr_value: String::new(), current_attrs: HashMap::new(), temporary_buffer: vec![], - error_logger: error_logger, + error_logger, }; } pub(crate) fn get_position(&self) -> Position { - return self.stream.position; + self.stream.position } // Retrieves the next token from the input stream or Token::EOF when the end is reached pub fn next_token(&mut self) -> Token { self.consume_stream(); - if self.token_queue.len() == 0 { + if self.token_queue.is_empty() { return Token::EofToken {}; } - return self.token_queue.remove(0); + self.token_queue.remove(0) } pub fn get_error_logger(&self) -> Ref { @@ -237,7 +234,7 @@ impl<'a> Tokenizer<'a> { fn consume_stream(&mut self) { loop { // Something is already in the token buffer, so we can return it. - if self.token_queue.len() > 0 { + if !self.token_queue.is_empty() { return; } @@ -263,7 +260,7 @@ impl<'a> Tokenizer<'a> { } } State::CharacterReferenceInDataState => { - _ = self.consume_character_reference(None, false); + self.consume_character_reference(None, false); self.state = State::DataState; } State::RcDataState => { @@ -287,7 +284,7 @@ impl<'a> Tokenizer<'a> { } State::CharacterReferenceInRcDataState => { // consume character reference - _ = self.consume_character_reference(None, false); + self.consume_character_reference(None, false); self.state = State::RcDataState; } State::RawTextState => { @@ -1257,7 +1254,7 @@ impl<'a> Tokenizer<'a> { match c { Element::Utf8('"') => self.state = State::AfterAttributeValueQuotedState, Element::Utf8('&') => { - _ = self.consume_character_reference(Some(Element::Utf8('"')), true) + self.consume_character_reference(Some(Element::Utf8('"')), true); } Element::Utf8(CHAR_NUL) => { self.parse_error(ParserError::UnexpectedNullCharacter); @@ -1277,7 +1274,7 @@ impl<'a> Tokenizer<'a> { match c { Element::Utf8('\'') => self.state = State::AfterAttributeValueQuotedState, Element::Utf8('&') => { - _ = self.consume_character_reference(Some(Element::Utf8('\'')), true) + self.consume_character_reference(Some(Element::Utf8('\'')), true); } Element::Utf8(CHAR_NUL) => { self.parse_error(ParserError::UnexpectedNullCharacter); @@ -1302,7 +1299,7 @@ impl<'a> Tokenizer<'a> { self.state = State::BeforeAttributeNameState; } Element::Utf8('&') => { - _ = self.consume_character_reference(Some(Element::Utf8('>')), true) + self.consume_character_reference(Some(Element::Utf8('>')), true); } Element::Utf8('>') => { self.store_and_clear_current_attribute(); @@ -2247,7 +2244,7 @@ impl<'a> Tokenizer<'a> { } // Return true when the given end_token matches the stored start token (ie: 'table' matches when last_start_token = 'table') - fn is_appropriate_end_token(&self, end_token: &Vec) -> bool { + fn is_appropriate_end_token(&self, end_token: &[char]) -> bool { let s: String = end_token.iter().collect(); self.last_start_token == s } @@ -2259,7 +2256,7 @@ impl<'a> Tokenizer<'a> { // Returns true if there is anything in the consume buffer pub fn has_consumed_data(&self) -> bool { - return self.consumed.len() > 0; + !self.consumed.is_empty() } // Clears the current consume buffer @@ -2294,21 +2291,15 @@ impl<'a> Tokenizer<'a> { // Set force_quirk mode in current token fn set_quirks_mode(&mut self, quirky: bool) { - match &mut self.current_token.as_mut().unwrap() { - Token::DocTypeToken { force_quirks, .. } => { - *force_quirks = quirky; - } - _ => {} + if let Token::DocTypeToken { force_quirks, .. } = &mut self.current_token.as_mut().unwrap() { + *force_quirks = quirky; } } // Adds a new attribute to the current token fn set_add_attribute_to_current_token(&mut self, name: String, value: String) { - match &mut self.current_token.as_mut().unwrap() { - Token::StartTagToken { attributes, .. } => { - attributes.insert(name.clone(), value.clone()); - } - _ => {} + if let Token::StartTagToken { attributes, .. } = &mut self.current_token.as_mut().unwrap() { + attributes.insert(name.clone(), value.clone()); } self.current_attr_name.clear() @@ -2329,7 +2320,7 @@ impl<'a> Tokenizer<'a> { // This function checks to see if there is already an attribute name like the one in current_attr_name. fn attr_already_exists(&mut self) -> bool { - return self.current_attrs.contains_key(&self.current_attr_name); + self.current_attrs.contains_key(&self.current_attr_name) } // Saves the current attribute name and value onto the current_attrs stack, if there is anything to store diff --git a/src/html5_parser/tokenizer/token.rs b/src/html5_parser/tokenizer/token.rs index c82dec586..b2d114d14 100644 --- a/src/html5_parser/tokenizer/token.rs +++ b/src/html5_parser/tokenizer/token.rs @@ -58,11 +58,7 @@ impl Token { // Returns true when the token is an EOF token pub fn is_eof(&self) -> bool { - if let Token::EofToken = self { - true - } else { - false - } + matches!(self, Token::EofToken) } // Returns true if the text token is empty or only contains whitespace