From 12ac8a87ef1b17a8acb6f0768daccf57b1be452c Mon Sep 17 00:00:00 2001 From: Joshua Thijssen Date: Fri, 3 Nov 2023 14:48:27 +0100 Subject: [PATCH 01/14] Refactor of the tree-construction test harness --- src/bin/html5-parser-test.rs | 36 +- src/bin/parser-test.rs | 305 +++++------- src/html5/parser.rs | 256 +++++----- src/html5/parser/helper.rs | 29 +- src/testing.rs | 1 + src/testing/tree_construction.rs | 551 +++++++++------------ src/testing/tree_construction/fixture.rs | 95 ++++ src/testing/tree_construction/generator.rs | 110 ++++ src/testing/tree_construction/parser.rs | 18 +- src/testing/tree_construction/result.rs | 69 +++ tests/tree_construction.rs | 53 +- 11 files changed, 841 insertions(+), 682 deletions(-) create mode 100644 src/testing/tree_construction/fixture.rs create mode 100644 src/testing/tree_construction/generator.rs create mode 100644 src/testing/tree_construction/result.rs diff --git a/src/bin/html5-parser-test.rs b/src/bin/html5-parser-test.rs index fe2118581..4d80f0788 100755 --- a/src/bin/html5-parser-test.rs +++ b/src/bin/html5-parser-test.rs @@ -1,28 +1,40 @@ -use gosub_engine::testing::tree_construction::fixture_from_filename; -use gosub_engine::testing::FIXTURE_ROOT; +use gosub_engine::testing::tree_construction::fixture::{ + get_fixture_root_path, read_fixture_from_path, +}; +use gosub_engine::testing::tree_construction::Harness; use gosub_engine::types::Result; use std::io::Write; -use std::path::Path; +use std::path::{Path, PathBuf}; use walkdir::WalkDir; fn main() -> Result<()> { - let mut files = get_files_from_path(format!("{}/tree-construction", FIXTURE_ROOT).as_str()); + let mut files = get_files_from_path(get_fixture_root_path()); files.sort(); let mut total = 0; let mut failed = 0; for file in files.iter() { - let fixture = fixture_from_filename(file)?; + // if file != "math.dat" { + // continue; + // } + let fixture = read_fixture_from_path(&get_fixture_root_path().join(file))?; + print!("Test: ({:3}) {} [", fixture.tests.len(), file); let _ = std::io::stdout().flush(); + let mut harness = Harness::new(); + // Run tests - for test in fixture.tests { - let results = test.run().expect("problem running tree construction test"); - for result in results { + for test in fixture.tests.iter() { + for &scripting_enabled in test.script_modes() { + let result = harness + .run_test(test.clone(), scripting_enabled) + .expect("problem parsing"); + total += 1; - if result.success() { + + if result.is_success() { print!("."); } else { print!("X"); @@ -45,10 +57,10 @@ fn main() -> Result<()> { Ok(()) } -fn get_files_from_path(root_dir: &str) -> Vec { +fn get_files_from_path(dir: PathBuf) -> Vec { let mut files = Vec::new(); - for entry in WalkDir::new(root_dir) + for entry in WalkDir::new(dir.clone()) .follow_links(true) .into_iter() .flatten() @@ -58,7 +70,7 @@ fn get_files_from_path(root_dir: &str) -> Vec { if extension == "dat" { if let Ok(relative_path) = entry .path() - .strip_prefix(root_dir) + .strip_prefix(dir.clone()) .map(Path::to_str) .map(|s| s.unwrap().to_string()) { diff --git a/src/bin/parser-test.rs b/src/bin/parser-test.rs index 1ac1c7ec7..63776e3a1 100755 --- a/src/bin/parser-test.rs +++ b/src/bin/parser-test.rs @@ -1,12 +1,10 @@ -use gosub_engine::{ - testing::{ - self, - tree_construction::{ErrorResult, NodeResult, SubtreeResult, Test, TestResult}, - }, - types::Result, -}; +use gosub_engine::testing::tree_construction::fixture::read_fixtures; +use gosub_engine::testing::tree_construction::result::ResultStatus; +use gosub_engine::testing::tree_construction::Harness; +use gosub_engine::testing::tree_construction::Test; -pub struct TestResults { +/// Holds the results from all tests that are executed +pub struct GlobalTestResults { /// Number of tests (as defined in the suite) tests: usize, /// Number of assertions (different combinations of input/output per test) @@ -21,8 +19,8 @@ pub struct TestResults { tests_failed: Vec<(usize, usize, String)>, } -fn main() -> Result<()> { - let mut results = TestResults { +fn main() { + let mut results = GlobalTestResults { tests: 0, assertions: 0, succeeded: 0, @@ -31,8 +29,8 @@ fn main() -> Result<()> { tests_failed: Vec::new(), }; - let filenames = Some(&["doctype01.dat"][..]); - let fixtures = testing::tree_construction::fixtures(filenames).expect("fixtures"); + let filenames = Some(&["template.dat"][..]); + let fixtures = read_fixtures(filenames).expect("fixtures"); for fixture_file in fixtures { println!( @@ -43,9 +41,9 @@ fn main() -> Result<()> { let mut test_idx = 1; for test in fixture_file.tests { - // if test_idx == 81 { - run_tree_test(test_idx, &test, &mut results); - // } + if test_idx == 1 { + run_test(test_idx, test, &mut results); + } test_idx += 1; } @@ -67,10 +65,9 @@ fn main() -> Result<()> { println!(" {}", data); } } - Ok(()) } -fn run_tree_test(test_idx: usize, test: &Test, all_results: &mut TestResults) { +fn run_test(_test_idx: usize, test: Test, all_results: &mut GlobalTestResults) { #[cfg(feature = "debug_parser_verbose")] println!( "๐Ÿงช Running test #{test_idx}: {}:{}", @@ -79,189 +76,127 @@ fn run_tree_test(test_idx: usize, test: &Test, all_results: &mut TestResults) { all_results.tests += 1; - let results = test.run().expect("problem running tree construction test"); + let mut harness = Harness::new(); + let result = harness + .run_test(test.clone(), false) + .expect("problem parsing"); - for result in results { - #[cfg(feature = "debug_parser")] - print_test_result(&result); + // #[cfg(feature = "debug_parser")] + // print_test_result(&result); - // Check the document tree, which counts as a single assertion + for entry in &result.tree_results { all_results.assertions += 1; - if result.success() { - all_results.succeeded += 1; - } else { - all_results.failed += 1; - } - - let errors = test.errors(); - if result.actual_errors.len() != errors.len() { - #[cfg(feature = "debug_parser")] - println!( - "โš ๏ธ Unexpected errors found (wanted {}, got {}): ", - errors.len(), - result.actual_errors.len() - ); - - // for want_err in &test.errors { - // println!( - // " * Want: '{}' at {}:{}", - // want_err.code, want_err.line, want_err.col - // ); - // } - // for got_err in &parse_errors { - // println!( - // " * Got: '{}' at {}:{}", - // got_err.message, got_err.line, got_err.col - // ); - // } - // results.assertions += 1; - // results.failed += 1; - } else { - #[cfg(feature = "debug_parser")] - println!("โœ… Found {} errors", result.actual_errors.len()); - } + match entry.result { + ResultStatus::Success => { + all_results.succeeded += 1; - // For now, we skip the tests that checks for errors as most of the errors do not match - // with the actual tests, as these errors as specific from html5lib. Either we reuse them - // or have some kind of mapping to our own errors if we decide to use our custom errors. + #[cfg(feature = "debug_parser")] + println!("โœ… {}", entry.actual); + } + ResultStatus::Missing => { + all_results.failed += 1; - // // Check each error messages - // let mut idx = 0; - // for error in &test.errors { - // if parse_errors.get(idx).is_none() { - // println!("โŒ Expected error '{}' at {}:{}", error.code, error.line, error.col); - // results.assertions += 1; - // results.failed += 1; - // continue; - // } - // - // let err = parse_errors.get(idx).unwrap(); - // let got_error = Error{ - // code: err.message.to_string(), - // line: err.line as i64, - // col: err.col as i64, - // }; - // - // match match_error(&got_error, &error) { - // ErrorResult::Failure => { - // results.assertions += 1; - // results.failed += 1; - // }, - // ErrorResult::PositionFailure => { - // results.assertions += 1; - // results.failed += 1; - // results.failed_position += 1; - // }, - // ErrorResult::Success => { - // results.assertions += 1; - // results.succeeded += 1; - // } - // } - // - // idx += 1; - // } + #[cfg(feature = "debug_parser")] + println!("โŒ {} (missing)", entry.expected); + } + ResultStatus::Additional => { + all_results.failed += 1; - // Display additional data if there a failure is found - if !result.success() { - all_results - .tests_failed - .push((test_idx, test.line, test.data().to_string())); + #[cfg(feature = "debug_parser")] + println!("โŒ {} (unexpected)", entry.actual); + } + ResultStatus::Mismatch => { + all_results.failed += 1; - if cfg!(feature = "debug_parser") { - println!("----------------------------------------"); - println!("๐Ÿ“„ Input stream: "); - println!("{}", test.data()); - println!("----------------------------------------"); - println!("๐ŸŒณ Generated tree: "); - println!("{}", result.actual_document); - println!("----------------------------------------"); - println!("๐ŸŒณ Expected tree: "); - for line in &test.document { - println!("{line}"); - } + #[cfg(feature = "debug_parser")] + println!("โŒ {} (wanted: {})", entry.actual, entry.expected); } + _ => {} } - - #[cfg(feature = "debug_parser")] - println!("----------------------------------------"); } -} -#[allow(dead_code)] -fn print_test_result(result: &TestResult) { - // We need a better tree match system. Right now we match the tree based on the (debug) output - // of the tree. Instead, we should generate a document-tree from the expected output and compare - // it against the current generated tree. - print_node_result(&result.root) -} - -#[allow(dead_code)] -fn print_node_result(result: &SubtreeResult) { - match &result.node { - Some(NodeResult::ElementMatchSuccess { actual }) => { - println!("โœ… {actual}"); - } - - Some(NodeResult::AttributeMatchFailure { name, expected, .. }) => { - println!("โŒ {expected}, Found unexpected attribute: {name}"); - } - - Some(NodeResult::ElementMatchFailure { - actual, expected, .. - }) => { - println!("โŒ {expected}, Found unexpected element node: {actual}"); - } - - Some(NodeResult::TextMatchSuccess { expected }) => { - println!("โœ… {expected}"); - } + for entry in &result.error_results { + all_results.assertions += 1; - Some(NodeResult::TextMatchFailure { expected, text, .. }) => { - println!("โŒ {expected}, Found unexpected text node: {text}"); - } + match entry.result { + ResultStatus::Success => { + all_results.succeeded += 1; - Some(NodeResult::DocTypeMatchFailure { - actual, expected, .. - }) => { - println!("โŒ {actual}, Found unexpected doctype node: {expected}"); - } - - Some(NodeResult::CommentMatchFailure { - expected, comment, .. - }) => { - println!("โŒ {expected}, Found unexpected comment node: {comment}"); + #[cfg(feature = "debug_parser")] + println!( + "โœ… ({}:{}) {}", + entry.actual.line, entry.actual.col, entry.actual.message + ); + } + ResultStatus::Missing => { + all_results.failed += 1; + + #[cfg(feature = "debug_parser")] + println!( + "โŒ ({}:{}) {} (missing)", + entry.expected.line, entry.expected.col, entry.expected.message + ); + } + ResultStatus::Additional => { + all_results.failed += 1; + + #[cfg(feature = "debug_parser")] + println!( + "โŒ ({}:{}) {} (unexpected)", + entry.actual.line, entry.actual.col, entry.actual.message + ); + } + ResultStatus::Mismatch => { + all_results.failed += 1; + + #[cfg(feature = "debug_parser")] + println!( + "โŒ ({}:{}) {} (wanted: {})", + entry.actual.line, + entry.actual.col, + entry.actual.message, + entry.expected.message + ); + } + ResultStatus::IncorrectPosition => { + all_results.failed += 1; + all_results.failed_position += 1; + + #[cfg(feature = "debug_parser")] + println!( + "โŒ ({}:{}) (wanted: ({}::{})) {}", + entry.actual.line, + entry.actual.col, + entry.expected.line, + entry.expected.col, + entry.expected.message + ); + } } - - None => {} } - result.children.iter().for_each(print_node_result); -} - -#[allow(dead_code)] -fn match_error(result: ErrorResult) { - match result { - ErrorResult::Success { actual } => { - println!( - "โœ… Found parse error '{}' at {}:{}", - actual.code, actual.line, actual.col - ); - } - - ErrorResult::Failure { expected, .. } => { - println!( - "โŒ Expected error '{}' at {}:{}", - expected.code, expected.line, expected.col - ); - } - - ErrorResult::PositionFailure { actual, expected } => { - // Found an error with the same code, but different line/pos - println!( - "โš ๏ธ Unexpected error position '{}' at {}:{} (got: {}:{})", - expected.code, expected.line, expected.col, actual.line, actual.col - ); - } - } + // // Display additional data if there a failure is found + // if !result.success() { + // all_results + // .tests_failed + // .push((test_idx, test.line, test.data().to_string())); + // + // if cfg!(feature = "debug_parser") { + // println!("----------------------------------------"); + // println!("๐Ÿ“„ Input stream: "); + // println!("{}", test.data()); + // println!("----------------------------------------"); + // println!("๐ŸŒณ Generated tree: "); + // println!("{}", result.actual_document); + // println!("----------------------------------------"); + // println!("๐ŸŒณ Expected tree: "); + // for line in &test.document { + // println!("{line}"); + // } + // } + // } + // + // #[cfg(feature = "debug_parser")] + // println!("----------------------------------------"); } diff --git a/src/html5/parser.rs b/src/html5/parser.rs index 615df3087..3d07c1aab 100644 --- a/src/html5/parser.rs +++ b/src/html5/parser.rs @@ -401,7 +401,7 @@ impl<'chars> Html5Parser<'chars> { } /// Internal parser function that does the actual parsing - pub fn do_parse(&mut self) -> Result> { + fn do_parse(&mut self) -> Result> { let mut dispatcher_mode = DispatcherMode::Html; loop { @@ -1203,7 +1203,7 @@ impl<'chars> Html5Parser<'chars> { Token::EndTag { name, .. } if name == "tbody" || name == "tfoot" || name == "thead" => { - if !self.is_in_scope(name, Scope::Table) { + if !self.is_in_scope(name, HTML_NAMESPACE, Scope::Table) { self.parse_error("tbody, tfoot or thead tag not allowed in in table body insertion mode"); // ignore token return; @@ -1218,9 +1218,9 @@ impl<'chars> Html5Parser<'chars> { if ["caption", "col", "colgroup", "tbody", "tfoot", "thead"] .contains(&name.as_str()) => { - if !self.is_in_scope("tbody", Scope::Table) - && !self.is_in_scope("tfoot", Scope::Table) - && !self.is_in_scope("thead", Scope::Table) + if !self.is_in_scope("tbody", HTML_NAMESPACE, Scope::Table) + && !self.is_in_scope("tfoot", HTML_NAMESPACE, Scope::Table) + && !self.is_in_scope("thead", HTML_NAMESPACE, Scope::Table) { self.parse_error("caption, col, colgroup, tbody, tfoot or thead tag not allowed in in table body insertion mode"); // ignore token @@ -1234,9 +1234,9 @@ impl<'chars> Html5Parser<'chars> { self.reprocess_token = true; } Token::EndTag { name, .. } if name == "table" => { - if !self.is_in_scope("tbody", Scope::Table) - && !self.is_in_scope("tfoot", Scope::Table) - && !self.is_in_scope("thead", Scope::Table) + if !self.is_in_scope("tbody", HTML_NAMESPACE, Scope::Table) + && !self.is_in_scope("tfoot", HTML_NAMESPACE, Scope::Table) + && !self.is_in_scope("thead", HTML_NAMESPACE, Scope::Table) { self.parse_error( "table end tag not allowed in in table body insertion mode", @@ -1275,7 +1275,7 @@ impl<'chars> Html5Parser<'chars> { self.active_formatting_elements_push_marker(); } Token::EndTag { name, .. } if name == "tr" => { - if !self.is_in_scope("tr", Scope::Table) { + if !self.is_in_scope("tr", HTML_NAMESPACE, Scope::Table) { self.parse_error("tr tag not allowed in in row insertion mode"); // ignore token return; @@ -1292,7 +1292,7 @@ impl<'chars> Html5Parser<'chars> { ] .contains(&name.as_str()) => { - if !self.is_in_scope("tr", Scope::Table) { + if !self.is_in_scope("tr", HTML_NAMESPACE, Scope::Table) { self.parse_error("caption, col, colgroup, tbody, tfoot or thead tag not allowed in in row insertion mode"); // ignore token return; @@ -1305,7 +1305,7 @@ impl<'chars> Html5Parser<'chars> { self.reprocess_token = true; } Token::EndTag { name, .. } if name == "table" => { - if !self.is_in_scope("tr", Scope::Table) { + if !self.is_in_scope("tr", HTML_NAMESPACE, Scope::Table) { self.parse_error("table tag not allowed in in row insertion mode"); // ignore token return; @@ -1320,13 +1320,13 @@ impl<'chars> Html5Parser<'chars> { Token::EndTag { name, .. } if name == "tbody" || name == "tfoot" || name == "thead" => { - if !self.is_in_scope(name, Scope::Table) { + if !self.is_in_scope(name, HTML_NAMESPACE, Scope::Table) { self.parse_error("tbody, tfoot or thead tag not allowed in in table body insertion mode"); // ignore token return; } - if !self.is_in_scope("tr", Scope::Table) { + if !self.is_in_scope("tr", HTML_NAMESPACE, Scope::Table) { // ignore token return; } @@ -1357,7 +1357,7 @@ impl<'chars> Html5Parser<'chars> { Token::EndTag { name, .. } if name == "th" || name == "td" => { let token_name = name.clone(); - if !self.is_in_scope(name.as_str(), Scope::Table) { + if !self.is_in_scope(name.as_str(), HTML_NAMESPACE, Scope::Table) { self.parse_error("th or td tag not allowed in in cell insertion mode"); // ignore token return; @@ -1381,8 +1381,8 @@ impl<'chars> Html5Parser<'chars> { ] .contains(&name.as_str()) => { - if !self.is_in_scope("td", Scope::Table) - && !self.is_in_scope("th", Scope::Table) + if !self.is_in_scope("td", HTML_NAMESPACE, Scope::Table) + && !self.is_in_scope("th", HTML_NAMESPACE, Scope::Table) { // fragment case self.parse_error("caption, col, colgroup, tbody, tfoot or thead tag not allowed in in cell insertion mode"); @@ -1410,7 +1410,7 @@ impl<'chars> Html5Parser<'chars> { || name == "thead" || name == "tr" => { - if !self.is_in_scope(name.as_str(), Scope::Table) { + if !self.is_in_scope(name.as_str(), HTML_NAMESPACE, Scope::Table) { self.parse_error("tbody, tfoot or thead tag not allowed in in table body insertion mode"); // ignore token return; @@ -1453,7 +1453,7 @@ impl<'chars> Html5Parser<'chars> { { self.parse_error("caption, table, tbody, tfoot, thead, tr, td or th tag not allowed in in select in table insertion mode"); - if !self.is_in_scope(name, Scope::Table) { + if !self.is_in_scope(name, HTML_NAMESPACE, Scope::Table) { // ignore token return; } @@ -1982,10 +1982,10 @@ impl<'chars> Html5Parser<'chars> { } /// Checks if the given element is in given scope - fn is_in_scope(&self, tag: &str, scope: Scope) -> bool { + fn is_in_scope(&self, tag: &str, namespace: &str, scope: Scope) -> bool { for &node_id in self.open_elements.iter().rev() { let node = get_node_by_id!(self.document, node_id).clone(); - if node.name == tag { + if node.name == tag && node.is_namespace(namespace) { return true; } let default_html_scope = [ @@ -2188,7 +2188,7 @@ impl<'chars> Html5Parser<'chars> { } } Token::EndTag { name, .. } if name == "body" => { - if !self.is_in_scope("body", Scope::Regular) { + if !self.is_in_scope("body", HTML_NAMESPACE, Scope::Regular) { self.parse_error("body end tag not in scope"); // ignore token return; @@ -2199,7 +2199,7 @@ impl<'chars> Html5Parser<'chars> { self.insertion_mode = InsertionMode::AfterBody; } Token::EndTag { name, .. } if name == "html" => { - if !self.is_in_scope("body", Scope::Regular) { + if !self.is_in_scope("body", HTML_NAMESPACE, Scope::Regular) { self.parse_error("body end tag not in scope"); // ignore token return; @@ -2237,7 +2237,7 @@ impl<'chars> Html5Parser<'chars> { || name == "summary" || name == "ul" => { - if self.is_in_scope("p", Scope::Button) { + if self.is_in_scope("p", HTML_NAMESPACE, Scope::Button) { self.close_p_element(); } @@ -2251,7 +2251,7 @@ impl<'chars> Html5Parser<'chars> { || name == "h5" || name == "h6" => { - if self.is_in_scope("p", Scope::Button) { + if self.is_in_scope("p", HTML_NAMESPACE, Scope::Button) { self.close_p_element(); } @@ -2264,7 +2264,7 @@ impl<'chars> Html5Parser<'chars> { self.insert_html_element(&self.current_token.clone()); } Token::StartTag { name, .. } if name == "pre" || name == "listing" => { - if self.is_in_scope("p", Scope::Button) { + if self.is_in_scope("p", HTML_NAMESPACE, Scope::Button) { self.close_p_element(); } @@ -2281,7 +2281,7 @@ impl<'chars> Html5Parser<'chars> { return; } - if self.is_in_scope("p", Scope::Button) { + if self.is_in_scope("p", HTML_NAMESPACE, Scope::Button) { self.close_p_element(); } @@ -2316,7 +2316,7 @@ impl<'chars> Html5Parser<'chars> { idx -= 1; } - if self.is_in_scope("p", Scope::Button) { + if self.is_in_scope("p", HTML_NAMESPACE, Scope::Button) { self.close_p_element(); } @@ -2348,14 +2348,14 @@ impl<'chars> Html5Parser<'chars> { idx -= 1; } - if self.is_in_scope("p", Scope::Button) { + if self.is_in_scope("p", HTML_NAMESPACE, Scope::Button) { self.close_p_element(); } self.insert_html_element(&self.current_token.clone()); } Token::StartTag { name, .. } if name == "plaintext" => { - if self.is_in_scope("p", Scope::Button) { + if self.is_in_scope("p", HTML_NAMESPACE, Scope::Button) { self.close_p_element(); } @@ -2364,7 +2364,7 @@ impl<'chars> Html5Parser<'chars> { self.tokenizer.state = State::PLAINTEXT; } Token::StartTag { name, .. } if name == "button" => { - if self.is_in_scope("button", Scope::Regular) { + if self.is_in_scope("button", HTML_NAMESPACE, Scope::Regular) { self.parse_error("button tag not allowed in in body insertion mode"); self.generate_implied_end_tags(None, false); self.pop_until_named("button"); @@ -2403,7 +2403,7 @@ impl<'chars> Html5Parser<'chars> { || name == "summary" || name == "ul" => { - if !self.is_in_scope(name, Scope::Regular) { + if !self.is_in_scope(name, HTML_NAMESPACE, Scope::Regular) { self.parse_error("end tag not in scope"); // ignore token return; @@ -2423,7 +2423,8 @@ impl<'chars> Html5Parser<'chars> { let node_id = self.form_element; self.form_element = None; - if node_id.is_none() || !self.is_in_scope(name, Scope::Regular) { + if node_id.is_none() || !self.is_in_scope(name, HTML_NAMESPACE, Scope::Regular) + { self.parse_error("end tag not in scope"); // ignore token return; @@ -2442,7 +2443,7 @@ impl<'chars> Html5Parser<'chars> { } self.open_elements_remove(node_id); } else { - if !self.is_in_scope(name, Scope::Regular) { + if !self.is_in_scope(name, HTML_NAMESPACE, Scope::Regular) { self.parse_error("end tag not in scope"); // ignore token return; @@ -2459,7 +2460,7 @@ impl<'chars> Html5Parser<'chars> { } } Token::EndTag { name, .. } if name == "p" => { - if !self.is_in_scope(name, Scope::Button) { + if !self.is_in_scope(name, HTML_NAMESPACE, Scope::Button) { self.parse_error("end tag not in scope"); let token = Token::StartTag { @@ -2473,7 +2474,7 @@ impl<'chars> Html5Parser<'chars> { self.close_p_element(); } Token::EndTag { name, .. } if name == "li" => { - if !self.is_in_scope(name, Scope::ListItem) { + if !self.is_in_scope(name, HTML_NAMESPACE, Scope::ListItem) { self.parse_error("end tag not in scope"); // ignore token return; @@ -2488,7 +2489,7 @@ impl<'chars> Html5Parser<'chars> { self.pop_until_named(name); } Token::EndTag { name, .. } if name == "dd" || name == "dt" => { - if !self.is_in_scope(name, Scope::Regular) { + if !self.is_in_scope(name, HTML_NAMESPACE, Scope::Regular) { self.parse_error("end tag not in scope"); // ignore token return; @@ -2512,7 +2513,7 @@ impl<'chars> Html5Parser<'chars> { { if ["h1", "h2", "h3", "h4", "h5", "h6"] .iter() - .map(|tag| self.is_in_scope(tag, Scope::Regular)) + .map(|tag| self.is_in_scope(tag, HTML_NAMESPACE, Scope::Regular)) .any(|res| res) { self.generate_implied_end_tags(Some(name), false); @@ -2568,7 +2569,7 @@ impl<'chars> Html5Parser<'chars> { Token::StartTag { name, .. } if name == "nobr" => { self.reconstruct_formatting(); - if self.is_in_scope("nobr", Scope::Regular) { + if self.is_in_scope("nobr", HTML_NAMESPACE, Scope::Regular) { self.parse_error("nobr tag in scope"); self.adoption_agency_algorithm(&self.current_token.clone()); self.reconstruct_formatting(); @@ -2611,7 +2612,7 @@ impl<'chars> Html5Parser<'chars> { Token::EndTag { name, .. } if name == "applet" || name == "marquee" || name == "object" => { - if !self.is_in_scope(name, Scope::Regular) { + if !self.is_in_scope(name, HTML_NAMESPACE, Scope::Regular) { self.parse_error("end tag not in scope"); // ignore token return; @@ -2628,7 +2629,7 @@ impl<'chars> Html5Parser<'chars> { } Token::StartTag { name, .. } if name == "table" => { if self.document.get_mut().quirks_mode != QuirksMode::Quirks - && self.is_in_scope("p", Scope::Button) + && self.is_in_scope("p", HTML_NAMESPACE, Scope::Button) { self.close_p_element(); } @@ -2710,7 +2711,7 @@ impl<'chars> Html5Parser<'chars> { is_self_closing, .. } if name == "hr" => { - if self.is_in_scope("p", Scope::Button) { + if self.is_in_scope("p", HTML_NAMESPACE, Scope::Button) { self.close_p_element(); } @@ -2744,7 +2745,7 @@ impl<'chars> Html5Parser<'chars> { self.insertion_mode = InsertionMode::Text; } Token::StartTag { name, .. } if name == "xmp" => { - if self.is_in_scope("p", Scope::Button) { + if self.is_in_scope("p", HTML_NAMESPACE, Scope::Button) { self.close_p_element(); } @@ -2790,7 +2791,7 @@ impl<'chars> Html5Parser<'chars> { self.insert_html_element(&self.current_token.clone()); } Token::StartTag { name, .. } if name == "rb" || name == "rtc" => { - if self.is_in_scope("ruby", Scope::Regular) { + if self.is_in_scope("ruby", HTML_NAMESPACE, Scope::Regular) { self.generate_implied_end_tags(None, false); } @@ -2801,7 +2802,7 @@ impl<'chars> Html5Parser<'chars> { self.insert_html_element(&self.current_token.clone()); } Token::StartTag { name, .. } if name == "rp" || name == "rt" => { - if self.is_in_scope("ruby", Scope::Regular) { + if self.is_in_scope("ruby", HTML_NAMESPACE, Scope::Regular) { self.generate_implied_end_tags(Some("rtc"), false); } @@ -2966,6 +2967,22 @@ impl<'chars> Html5Parser<'chars> { Token::StartTag { name, .. } if name == "template" => { let node_id = self.insert_html_element(&self.current_token.clone()); + self.active_formatting_elements_push_marker(); + self.frameset_ok = false; + self.insertion_mode = InsertionMode::InTemplate; + self.template_insertion_mode.push(InsertionMode::InTemplate); + + // Let adjusted insert location + // intended parent + // document = indented parent's node document + + // check shadow root != none + // or allow declarative shadow roots == true + // or adjusted current node is not topmost element in stack open elements + // then insert html element for token + // else: + // + { let current_node_id = current_node!(self).id; @@ -2976,11 +2993,6 @@ impl<'chars> Html5Parser<'chars> { data.template_contents = Some(DocumentFragment::new(doc, current_node_id)); } } - - self.active_formatting_elements_push_marker(); - self.frameset_ok = false; - self.insertion_mode = InsertionMode::InTemplate; - self.template_insertion_mode.push(InsertionMode::InTemplate); } Token::EndTag { name, .. } if name == "template" => { if !self.open_elements_has("template") { @@ -3352,7 +3364,7 @@ impl<'chars> Html5Parser<'chars> { } } Token::EndTag { name, .. } if name == "select" => { - if !self.is_in_scope("select", Scope::Select) { + if !self.is_in_scope("select", HTML_NAMESPACE, Scope::Select) { // fragment case self.parse_error("select end tag not allowed in in select insertion mode"); // ignore token @@ -3365,7 +3377,7 @@ impl<'chars> Html5Parser<'chars> { Token::StartTag { name, .. } if name == "select" => { self.parse_error("select tag not allowed in in select insertion mode"); - if !self.is_in_scope("select", Scope::Select) { + if !self.is_in_scope("select", HTML_NAMESPACE, Scope::Select) { // fragment case // ignore token return; @@ -3381,7 +3393,7 @@ impl<'chars> Html5Parser<'chars> { "input, keygen or textarea tag not allowed in in select insertion mode", ); - if !self.is_in_scope("select", Scope::Select) { + if !self.is_in_scope("select", HTML_NAMESPACE, Scope::Select) { // fragment case // ignore token return; @@ -3884,10 +3896,10 @@ mod test { node_create!(parser, "div"); node_create!(parser, "p"); node_create!(parser, "button"); - assert!(parser.is_in_scope("p", Scope::Regular)); - assert!(!parser.is_in_scope("p", Scope::Button)); - assert!(parser.is_in_scope("p", Scope::ListItem)); - assert!(!parser.is_in_scope("p", Scope::Select)); + assert!(parser.is_in_scope("p", HTML_NAMESPACE, Scope::Regular)); + assert!(!parser.is_in_scope("p", HTML_NAMESPACE, Scope::Button)); + assert!(parser.is_in_scope("p", HTML_NAMESPACE, Scope::ListItem)); + assert!(!parser.is_in_scope("p", HTML_NAMESPACE, Scope::Select)); } #[test] @@ -3896,10 +3908,10 @@ mod test { let mut parser = Html5Parser::new_parser(chars); parser.open_elements.clear(); - assert!(!parser.is_in_scope("p", Scope::Regular)); - assert!(!parser.is_in_scope("p", Scope::Button)); - assert!(!parser.is_in_scope("p", Scope::ListItem)); - assert!(!parser.is_in_scope("p", Scope::Select)); + assert!(!parser.is_in_scope("p", HTML_NAMESPACE, Scope::Regular)); + assert!(!parser.is_in_scope("p", HTML_NAMESPACE, Scope::Button)); + assert!(!parser.is_in_scope("p", HTML_NAMESPACE, Scope::ListItem)); + assert!(!parser.is_in_scope("p", HTML_NAMESPACE, Scope::Select)); } #[test] @@ -3912,10 +3924,10 @@ mod test { node_create!(parser, "p"); node_create!(parser, "button"); - assert!(!parser.is_in_scope("foo", Scope::Regular)); - assert!(!parser.is_in_scope("foo", Scope::Button)); - assert!(!parser.is_in_scope("foo", Scope::ListItem)); - assert!(!parser.is_in_scope("foo", Scope::Select)); + assert!(!parser.is_in_scope("foo", HTML_NAMESPACE, Scope::Regular)); + assert!(!parser.is_in_scope("foo", HTML_NAMESPACE, Scope::Button)); + assert!(!parser.is_in_scope("foo", HTML_NAMESPACE, Scope::ListItem)); + assert!(!parser.is_in_scope("foo", HTML_NAMESPACE, Scope::Select)); } #[test] @@ -3931,29 +3943,29 @@ mod test { node_create!(parser, "p"); node_create!(parser, "span"); - assert!(parser.is_in_scope("p", Scope::Regular)); - assert!(parser.is_in_scope("p", Scope::ListItem)); - assert!(parser.is_in_scope("p", Scope::Button)); - assert!(parser.is_in_scope("p", Scope::Table)); - assert!(!parser.is_in_scope("p", Scope::Select)); - - assert!(!parser.is_in_scope("div", Scope::Regular)); - assert!(!parser.is_in_scope("div", Scope::ListItem)); - assert!(!parser.is_in_scope("div", Scope::Button)); - assert!(!parser.is_in_scope("div", Scope::Table)); - assert!(!parser.is_in_scope("div", Scope::Select)); - - assert!(!parser.is_in_scope("tr", Scope::Regular)); - assert!(!parser.is_in_scope("tr", Scope::ListItem)); - assert!(!parser.is_in_scope("tr", Scope::Button)); - assert!(parser.is_in_scope("tr", Scope::Table)); - assert!(!parser.is_in_scope("tr", Scope::Select)); - - assert!(!parser.is_in_scope("xmp", Scope::Regular)); - assert!(!parser.is_in_scope("xmp", Scope::ListItem)); - assert!(!parser.is_in_scope("xmp", Scope::Button)); - assert!(!parser.is_in_scope("xmp", Scope::Table)); - assert!(!parser.is_in_scope("xmp", Scope::Select)); + assert!(parser.is_in_scope("p", HTML_NAMESPACE, Scope::Regular)); + assert!(parser.is_in_scope("p", HTML_NAMESPACE, Scope::ListItem)); + assert!(parser.is_in_scope("p", HTML_NAMESPACE, Scope::Button)); + assert!(parser.is_in_scope("p", HTML_NAMESPACE, Scope::Table)); + assert!(!parser.is_in_scope("p", HTML_NAMESPACE, Scope::Select)); + + assert!(!parser.is_in_scope("div", HTML_NAMESPACE, Scope::Regular)); + assert!(!parser.is_in_scope("div", HTML_NAMESPACE, Scope::ListItem)); + assert!(!parser.is_in_scope("div", HTML_NAMESPACE, Scope::Button)); + assert!(!parser.is_in_scope("div", HTML_NAMESPACE, Scope::Table)); + assert!(!parser.is_in_scope("div", HTML_NAMESPACE, Scope::Select)); + + assert!(!parser.is_in_scope("tr", HTML_NAMESPACE, Scope::Regular)); + assert!(!parser.is_in_scope("tr", HTML_NAMESPACE, Scope::ListItem)); + assert!(!parser.is_in_scope("tr", HTML_NAMESPACE, Scope::Button)); + assert!(parser.is_in_scope("tr", HTML_NAMESPACE, Scope::Table)); + assert!(!parser.is_in_scope("tr", HTML_NAMESPACE, Scope::Select)); + + assert!(!parser.is_in_scope("xmp", HTML_NAMESPACE, Scope::Regular)); + assert!(!parser.is_in_scope("xmp", HTML_NAMESPACE, Scope::ListItem)); + assert!(!parser.is_in_scope("xmp", HTML_NAMESPACE, Scope::Button)); + assert!(!parser.is_in_scope("xmp", HTML_NAMESPACE, Scope::Table)); + assert!(!parser.is_in_scope("xmp", HTML_NAMESPACE, Scope::Select)); } #[test] @@ -3968,11 +3980,11 @@ mod test { node_create!(parser, "div"); node_create!(parser, "button"); - assert!(parser.is_in_scope("li", Scope::Regular)); - assert!(parser.is_in_scope("li", Scope::ListItem)); - assert!(!parser.is_in_scope("li", Scope::Button)); - assert!(parser.is_in_scope("li", Scope::Table)); - assert!(!parser.is_in_scope("li", Scope::Select)); + assert!(parser.is_in_scope("li", HTML_NAMESPACE, Scope::Regular)); + assert!(parser.is_in_scope("li", HTML_NAMESPACE, Scope::ListItem)); + assert!(!parser.is_in_scope("li", HTML_NAMESPACE, Scope::Button)); + assert!(parser.is_in_scope("li", HTML_NAMESPACE, Scope::Table)); + assert!(!parser.is_in_scope("li", HTML_NAMESPACE, Scope::Select)); } #[test] @@ -3987,11 +3999,11 @@ mod test { node_create!(parser, "li"); node_create!(parser, "p"); - assert!(parser.is_in_scope("li", Scope::Regular)); - assert!(parser.is_in_scope("li", Scope::ListItem)); - assert!(parser.is_in_scope("li", Scope::Button)); - assert!(parser.is_in_scope("li", Scope::Table)); - assert!(!parser.is_in_scope("li", Scope::Select)); + assert!(parser.is_in_scope("li", HTML_NAMESPACE, Scope::Regular)); + assert!(parser.is_in_scope("li", HTML_NAMESPACE, Scope::ListItem)); + assert!(parser.is_in_scope("li", HTML_NAMESPACE, Scope::Button)); + assert!(parser.is_in_scope("li", HTML_NAMESPACE, Scope::Table)); + assert!(!parser.is_in_scope("li", HTML_NAMESPACE, Scope::Select)); } #[test] @@ -4008,11 +4020,11 @@ mod test { node_create!(parser, "button"); node_create!(parser, "span"); - assert!(parser.is_in_scope("td", Scope::Regular)); - assert!(parser.is_in_scope("td", Scope::ListItem)); - assert!(!parser.is_in_scope("td", Scope::Button)); - assert!(parser.is_in_scope("td", Scope::Table)); - assert!(!parser.is_in_scope("td", Scope::Select)); + assert!(parser.is_in_scope("td", HTML_NAMESPACE, Scope::Regular)); + assert!(parser.is_in_scope("td", HTML_NAMESPACE, Scope::ListItem)); + assert!(!parser.is_in_scope("td", HTML_NAMESPACE, Scope::Button)); + assert!(parser.is_in_scope("td", HTML_NAMESPACE, Scope::Table)); + assert!(!parser.is_in_scope("td", HTML_NAMESPACE, Scope::Select)); } #[test] @@ -4028,11 +4040,11 @@ mod test { node_create!(parser, "a"); node_create!(parser, "span"); - assert!(!parser.is_in_scope("div", Scope::Regular)); - assert!(!parser.is_in_scope("div", Scope::ListItem)); - assert!(!parser.is_in_scope("div", Scope::Button)); - assert!(parser.is_in_scope("div", Scope::Table)); - assert!(!parser.is_in_scope("div", Scope::Select)); + assert!(!parser.is_in_scope("div", HTML_NAMESPACE, Scope::Regular)); + assert!(!parser.is_in_scope("div", HTML_NAMESPACE, Scope::ListItem)); + assert!(!parser.is_in_scope("div", HTML_NAMESPACE, Scope::Button)); + assert!(parser.is_in_scope("div", HTML_NAMESPACE, Scope::Table)); + assert!(!parser.is_in_scope("div", HTML_NAMESPACE, Scope::Select)); } #[test] @@ -4048,11 +4060,11 @@ mod test { node_create!(parser, "marquee"); node_create!(parser, "p"); - assert!(!parser.is_in_scope("ul", Scope::Regular)); - assert!(!parser.is_in_scope("ul", Scope::ListItem)); - assert!(!parser.is_in_scope("ul", Scope::Button)); - assert!(parser.is_in_scope("ul", Scope::Table)); - assert!(!parser.is_in_scope("ul", Scope::Select)); + assert!(!parser.is_in_scope("ul", HTML_NAMESPACE, Scope::Regular)); + assert!(!parser.is_in_scope("ul", HTML_NAMESPACE, Scope::ListItem)); + assert!(!parser.is_in_scope("ul", HTML_NAMESPACE, Scope::Button)); + assert!(parser.is_in_scope("ul", HTML_NAMESPACE, Scope::Table)); + assert!(!parser.is_in_scope("ul", HTML_NAMESPACE, Scope::Select)); } #[test] @@ -4067,11 +4079,11 @@ mod test { node_create!(parser, "caption"); node_create!(parser, "p"); - assert!(!parser.is_in_scope("table", Scope::Regular)); - assert!(!parser.is_in_scope("table", Scope::ListItem)); - assert!(!parser.is_in_scope("table", Scope::Button)); - assert!(parser.is_in_scope("table", Scope::Table)); - assert!(!parser.is_in_scope("table", Scope::Select)); + assert!(!parser.is_in_scope("table", HTML_NAMESPACE, Scope::Regular)); + assert!(!parser.is_in_scope("table", HTML_NAMESPACE, Scope::ListItem)); + assert!(!parser.is_in_scope("table", HTML_NAMESPACE, Scope::Button)); + assert!(parser.is_in_scope("table", HTML_NAMESPACE, Scope::Table)); + assert!(!parser.is_in_scope("table", HTML_NAMESPACE, Scope::Select)); } #[test] @@ -4085,11 +4097,11 @@ mod test { node_create!(parser, "optgroup"); node_create!(parser, "option"); - assert!(parser.is_in_scope("select", Scope::Regular)); - assert!(parser.is_in_scope("select", Scope::ListItem)); - assert!(parser.is_in_scope("select", Scope::Button)); - assert!(parser.is_in_scope("select", Scope::Table)); - assert!(parser.is_in_scope("select", Scope::Select)); + assert!(parser.is_in_scope("select", HTML_NAMESPACE, Scope::Regular)); + assert!(parser.is_in_scope("select", HTML_NAMESPACE, Scope::ListItem)); + assert!(parser.is_in_scope("select", HTML_NAMESPACE, Scope::Button)); + assert!(parser.is_in_scope("select", HTML_NAMESPACE, Scope::Table)); + assert!(parser.is_in_scope("select", HTML_NAMESPACE, Scope::Select)); } #[test] diff --git a/src/html5/parser/helper.rs b/src/html5/parser/helper.rs index 889f2f052..2582614c4 100644 --- a/src/html5/parser/helper.rs +++ b/src/html5/parser/helper.rs @@ -90,12 +90,7 @@ impl Html5Parser<'_> { } } - pub fn insert_text_helper( - &mut self, - node: NodeId, - position: InsertionPositionMode, - token: &Token, - ) { + pub fn insert_text_helper(&mut self, position: InsertionPositionMode, token: &Token) { match position { InsertionPositionMode::Sibling { handle, @@ -107,7 +102,8 @@ impl Html5Parser<'_> { let position = parent_node.children.iter().position(|&x| x == before); match position { None | Some(0) => { - doc.attach_node_to_parent(node, parent, position); + let node = self.create_node(token, HTML_NAMESPACE); + doc.add_node(node, parent, position); } Some(index) => { let last_node_id = parent_node.children[index - 1]; @@ -120,7 +116,9 @@ impl Html5Parser<'_> { value.push_str(&token.to_string()); return; }; - doc.attach_node_to_parent(node, parent, Some(index)); + + let node = self.create_node(token, HTML_NAMESPACE); + doc.add_node(node, parent, Some(index)); } } } @@ -138,10 +136,13 @@ impl Html5Parser<'_> { value.push_str(&token.to_string()); return; }; - doc.attach_node_to_parent(node, parent, None); + let node = self.create_node(token, HTML_NAMESPACE); + doc.add_node(node, parent, None); return; } - doc.attach_node_to_parent(node, parent, None); + + let node = self.create_node(token, HTML_NAMESPACE); + doc.add_node(node, parent, None); } } } @@ -239,11 +240,9 @@ impl Html5Parser<'_> { } pub fn insert_text_element(&mut self, token: &Token) { - let node = self.create_node(token, HTML_NAMESPACE); - let node_id = self.document.get_mut().add_new_node(node); let insertion_position = self.appropriate_place_insert(None); - // TODO, for text element, if the insertion_position is Docuement, should not do next step. - self.insert_text_helper(node_id, insertion_position, token); + // TODO, for text element, if the insertion_position is Document, should not do next step. + self.insert_text_helper(insertion_position, token); } // @todo: where is the fragment case handled? (substep 4: https://html.spec.whatwg.org/multipage/parsing.html#appropriate-place-for-inserting-a-node) @@ -363,7 +362,7 @@ impl Html5Parser<'_> { }; // step 4.5 - if !self.is_in_scope(&format_elem_node.name, Scope::Regular) { + if !self.is_in_scope(&format_elem_node.name, HTML_NAMESPACE, Scope::Regular) { self.parse_error("format_element_node not in regular scope"); return; } diff --git a/src/testing.rs b/src/testing.rs index 3d66b2abf..e0deb4be0 100644 --- a/src/testing.rs +++ b/src/testing.rs @@ -3,3 +3,4 @@ pub mod tokenizer; pub mod tree_construction; pub const FIXTURE_ROOT: &str = "./tests/data/html5lib-tests"; +pub const TREE_CONSTRUCTION_PATH: &str = "tree-construction"; diff --git a/src/testing/tree_construction.rs b/src/testing/tree_construction.rs index e953ec5bb..5030c4bbb 100644 --- a/src/testing/tree_construction.rs +++ b/src/testing/tree_construction.rs @@ -1,16 +1,19 @@ -mod parser; +pub mod fixture; +mod generator; +pub(crate) mod parser; +pub mod result; -use self::parser::{ErrorSpec, ScriptMode, TestSpec, QUOTED_DOUBLE_NEWLINE}; -use super::FIXTURE_ROOT; -use crate::html5::node::data::doctype::DocTypeData; use crate::html5::node::{HTML_NAMESPACE, MATHML_NAMESPACE, SVG_NAMESPACE}; use crate::html5::parser::document::DocumentBuilder; use crate::html5::parser::tree_builder::TreeBuilder; use crate::html5::parser::Html5ParserOptions; +use crate::testing::tree_construction::generator::TreeOutputGenerator; +use crate::testing::tree_construction::parser::{ScriptMode, TestSpec}; +use crate::testing::tree_construction::result::{ResultStatus, TreeLineResult}; use crate::{ bytes::CharIterator, html5::{ - node::{NodeData, NodeId}, + node::NodeId, parser::{ document::{Document, DocumentHandle}, Html5Parser, @@ -18,28 +21,7 @@ use crate::{ }, types::{ParseError, Result}, }; -use std::{ - fs, - path::{Path, PathBuf}, -}; - -/// Holds all tests as found in the given fixture file -#[derive(Debug, PartialEq)] -pub struct FixtureFile { - pub tests: Vec, - pub path: String, -} - -/// Holds information about an error -#[derive(Clone, Debug, PartialEq)] -pub struct TestError { - /// The code or message of the error - pub code: String, - /// The line number (1-based) where the error occurred - pub line: i64, - /// The column number (1-based) where the error occurred - pub col: i64, -} +use result::TestResult; /// Holds a single parser test #[derive(Debug, PartialEq)] @@ -50,188 +32,80 @@ pub struct Test { pub line: usize, /// The specification of the test provided in the test file pub spec: TestSpec, - /// The document tree that is expected to be parsed + /// The document tree as found in the spec converted to an array pub document: Vec, } -/// Holds the result of a single "node" (which is either an element, text or comment) -pub enum NodeResult { - /// An attribute of an element node did not match - AttributeMatchFailure { - name: String, - actual: String, - expected: String, - }, - - /// The actual element does not match the expected element - ElementMatchFailure { - name: String, - actual: String, - expected: String, - }, - - /// The element matches the expected element - ElementMatchSuccess { - actual: String, - }, - - /// A text node did not match - TextMatchFailure { - actual: String, - expected: String, - text: String, - }, - - // A doctype node did not match - DocTypeMatchFailure { - actual: String, - expected: String, - }, - - /// A comment node did not match - CommentMatchFailure { - actual: String, - expected: String, - comment: String, - }, - - /// A text node matches - TextMatchSuccess { - expected: String, - }, -} - -pub struct SubtreeResult { - pub node: Option, - pub children: Vec, - next_expected_idx: Option, -} - -impl SubtreeResult { - pub fn valid(&self) -> bool { - self.next_expected_idx.is_some() - } -} - -#[derive(PartialEq)] -pub enum ErrorResult { - /// Found the correct error - Success { actual: TestError }, - /// Didn't find the error (not even with incorrect position) - Failure { - actual: TestError, - expected: TestError, - }, - /// Found the error, but on an incorrect position - PositionFailure { - actual: TestError, - expected: TestError, - }, -} - -pub struct TestResult { - pub root: SubtreeResult, - pub actual_document: DocumentHandle, - pub actual_errors: Vec, -} - -impl TestResult { - pub fn success(&self) -> bool { - self.root.valid() +impl Clone for Test { + fn clone(&self) -> Self { + Self { + file_path: self.file_path.clone(), + line: self.line, + spec: self.spec.clone(), + document: self.document.clone(), + } } } impl Test { - pub fn data(&self) -> &str { - self.spec.data.strip_suffix('\n').unwrap_or_default() + /// Returns the script modes that should be tested as an array + pub fn script_modes(&self) -> &[bool] { + match self.spec.script_mode { + ScriptMode::ScriptOff => &[false], + ScriptMode::ScriptOn => &[true], + ScriptMode::Both => &[false, true], + } } - pub fn errors(&self) -> &Vec { - &self.spec.errors + pub fn get_document_as_str(&self) -> &str { + return self.spec.document.as_str(); } +} - /// Runs the test and returns the result - pub fn run(&self) -> Result> { - let mut results = vec![]; - - for &scripting_enabled in self.script_modes() { - let (actual_document, actual_errors) = self.parse(scripting_enabled)?; - let root = self.match_document_tree(&actual_document.get()); - results.push(TestResult { - root, - actual_document, - actual_errors, - }); +impl Default for Test { + fn default() -> Self { + Self { + file_path: "".to_string(), + line: 0, + spec: TestSpec::default(), + document: vec![], } - - Ok(results) } +} - /// Verifies that the tree construction code obtains the right result - pub fn assert_valid(&self) { - let results = self.run().expect("failed to parse"); - - fn assert_tree(tree: &SubtreeResult) { - match &tree.node { - Some(NodeResult::ElementMatchSuccess { .. }) - | Some(NodeResult::TextMatchSuccess { .. }) - | None => {} - - Some(NodeResult::TextMatchFailure { - actual, expected, .. - }) => { - panic!("text match failed, wanted: [{expected}], got: [{actual}]"); - } - - Some(NodeResult::DocTypeMatchFailure { - actual, expected, .. - }) => { - panic!("doctype match failed, wanted: [{expected}], got: [{actual}]"); - } - - Some(NodeResult::ElementMatchFailure { - actual, - expected, - name, - }) => { - panic!("element [{name}] match failed, wanted: [{expected}], got: [{actual}]"); - } +/// Harness is a wrapper to run tree-construction tests +#[derive(Default)] +pub struct Harness { + // Test that is currently being run + test: Test, + /// Next line in the document array + next_document_line: usize, +} - Some(NodeResult::AttributeMatchFailure { - name, - actual, - expected, - }) => { - panic!( - "attribute [{name}] match failed, wanted: [{expected}], got: [{actual}]" - ); - } +impl Harness { + /// Generated a new harness instance. It uses a dummy test that is replaced when run_test is called + pub fn new() -> Self { + Self::default() + } - Some(NodeResult::CommentMatchFailure { - actual, expected, .. - }) => { - panic!("comment match failed, wanted: [{expected}], got: [{actual}]"); - } - } + /// Runs a single test and returns the test result of that run + pub fn run_test(&mut self, test: Test, scripting_enabled: bool) -> Result { + self.test = test; + self.next_document_line = 0; - tree.children.iter().for_each(assert_tree); - } + let (actual_document, actual_errors) = self.do_parse(scripting_enabled)?; + let result = self.generate_test_result(Document::clone(&actual_document), &actual_errors); - for result in results { - assert_tree(&result.root); - assert!(result.success(), "invalid tree-construction result"); - } + Ok(result) } - /// Run the parser and return the document and errors - pub fn parse(&self, scripting_enabled: bool) -> Result<(DocumentHandle, Vec)> { - // let mut is_fragment = false; + /// Run the html5 parser and return the document tree and errors + fn do_parse(&mut self, scripting_enabled: bool) -> Result<(DocumentHandle, Vec)> { let mut context_node = None; let document; - let is_fragment; - if let Some(fragment) = self.spec.document_fragment.clone() { + if let Some(fragment) = self.test.spec.document_fragment.clone() { // First, create a (fake) main document that contains only the fragment as node let main_document = DocumentBuilder::new_document(); let mut main_document = Document::clone(&main_document); @@ -267,11 +141,10 @@ impl Test { document = DocumentBuilder::new_document(); }; - // Create a new parser let options = Html5ParserOptions { scripting_enabled }; let mut chars = CharIterator::new(); - chars.read_from_str(self.data(), None); + chars.read_from_str(self.test.spec.data.as_str(), None); let parse_errors = if is_fragment { Html5Parser::parse_fragment( @@ -287,8 +160,171 @@ impl Test { Ok((document, parse_errors)) } + /// Retrieves the next line from the spec document + fn get_next_line(&mut self) -> Option { + let mut line = String::new(); + let mut is_multi_line_text = false; + + loop { + // If we are at the end of the document, we return None + if self.next_document_line >= self.test.document.len() { + return None; + } + + // Get the next line + let tmp = self.test.document[self.next_document_line].to_owned(); + self.next_document_line += 1; + + // If we have a starting quote, but not an ending quote, we are a multi-line text + if tmp.starts_with('\"') && !tmp.ends_with('\"') { + is_multi_line_text = true; + } + + // Add the line to the current line if we are a multiline + if is_multi_line_text { + line.push_str(&tmp); + } else { + line = tmp; + } + + // Only break if we're in a multi-line text and we found the ending double-quote + if is_multi_line_text && line.ends_with('\"') { + break; + } + + // if we are not a multi-line, we can just break + if !is_multi_line_text { + break; + } + + // Otherwise we continue with the next line (multi-line text) + } + + Some(line.to_string()) + } + + fn generate_test_result( + &mut self, + document: DocumentHandle, + _parse_errors: &[ParseError], + ) -> TestResult { + let mut result = TestResult::default(); + + let generator = TreeOutputGenerator::new(document); + let actual = generator.generate(); + + let mut line_idx = 1; + for actual_line in actual { + let mut status = ResultStatus::Success; + + let expected_line = self.get_next_line(); + match expected_line.clone() { + Some(expected_line) => { + if actual_line != expected_line { + status = ResultStatus::Mismatch; + } + } + None => { + status = ResultStatus::Missing; + } + } + + result.tree_results.push(TreeLineResult { + index: line_idx, + result: status, + expected: expected_line.unwrap_or_default().to_string(), + actual: actual_line.to_string(), + }); + line_idx += 1; + } + + // Check if we have additional lines and if so, add as errors + loop { + let expected_line = self.get_next_line(); + if expected_line.is_none() { + break; + } + + result.tree_results.push(TreeLineResult { + index: line_idx, + result: ResultStatus::Additional, + expected: expected_line.expect("").to_string(), + actual: "".into(), + }); + line_idx += 1; + } + + // if ! result.is_success() { + // let actual = generator.generate(); + // let expected = self.test.document.clone(); + // println!("\n\nactual : {:?}", actual); + // println!("expected : {:?}\n\n", expected); + // } + + result + } +} + +/* + /// Verifies that the tree construction code obtains the right result + pub fn assert_valid(&self) { + let results = self.run().expect("failed to parse"); + + fn assert_tree(tree: &SubtreeResult) { + match &tree.node { + Some(NodeResult::ElementMatchSuccess { .. }) + | Some(NodeResult::TextMatchSuccess { .. }) + | None => {} + + Some(NodeResult::TextMatchFailure { + actual, expected, .. + }) => { + panic!("text match failed, wanted: [{expected}], got: [{actual}]"); + } + + Some(NodeResult::DocTypeMatchFailure { + actual, expected, .. + }) => { + panic!("doctype match failed, wanted: [{expected}], got: [{actual}]"); + } + + Some(NodeResult::ElementMatchFailure { + actual, + expected, + name, + }) => { + panic!("element [{name}] match failed, wanted: [{expected}], got: [{actual}]"); + } + + Some(NodeResult::AttributeMatchFailure { + name, + actual, + expected, + }) => { + panic!( + "attribute [{name}] match failed, wanted: [{expected}], got: [{actual}]" + ); + } + + Some(NodeResult::CommentMatchFailure { + actual, expected, .. + }) => { + panic!("comment match failed, wanted: [{expected}], got: [{actual}]"); + } + } + + tree.children.iter().for_each(assert_tree); + } + + for result in results { + assert_tree(&result.result); + assert!(result.success(), "invalid tree-construction result"); + } + } + + /// Returns true if the whole document tree matches the expected result - pub fn match_document_tree(&self, document: &Document) -> SubtreeResult { + fn match_document_tree(&self, document: &Document) -> SubtreeResult { if self.spec.document_fragment.is_some() { self.match_node(NodeId::from(1), 0, 0, document) } else { @@ -310,10 +346,10 @@ impl Test { let node_result = match &node.data { NodeData::DocType(DocTypeData { - name, - pub_identifier, - sys_identifier, - }) => { + name, + pub_identifier, + sys_identifier, + }) => { let doctype_text = if pub_identifier.is_empty() && sys_identifier.is_empty() { // name.to_string() @@ -328,9 +364,7 @@ impl Test { doctype_text.trim(), ); - let expected = self.document[next_expected_idx as usize].to_owned(); - next_expected_idx += 1; - + let expected = self.get_next_line().expect("line"); if actual != expected { let node = Some(NodeResult::DocTypeMatchFailure { actual, @@ -366,8 +400,7 @@ impl Test { element.name() ); - let expected = self.document[next_expected_idx as usize].to_owned(); - next_expected_idx += 1; + let expected = self.get_next_line().expect("line"); if actual != expected { let node = Some(NodeResult::ElementMatchFailure { @@ -393,8 +426,7 @@ impl Test { sorted_attrs.sort_by(|a, b| a.0.cmp(b.0)); for attr in sorted_attrs { - let expected = self.document[next_expected_idx as usize].to_owned(); - next_expected_idx += 1; + let expected = self.get_next_line().expect("line"); let actual = format!( "|{}{}=\"{}\"", @@ -427,23 +459,7 @@ impl Test { text.value() ); - // Text might be split over multiple lines, read all lines until we find the closing - // quote. - let mut expected = String::new(); - loop { - let tmp = self.document[next_expected_idx as usize].to_owned(); - next_expected_idx += 1; - - expected.push_str(&tmp); - - if tmp.ends_with('\"') { - break; - } else { - // each line is terminated with a newline - expected.push('\n'); - } - } - + let expected = self.get_next_line().expect("line"); if actual != expected { let node = Some(NodeResult::TextMatchFailure { actual, @@ -466,8 +482,7 @@ impl Test { " ".repeat(indent as usize * 2 + 1), comment.value() ); - let expected = self.document[next_expected_idx as usize].to_owned(); - next_expected_idx += 1; + let expected = self.get_next_line().expect("line"); if actual != expected { let node = Some(NodeResult::CommentMatchFailure { @@ -535,103 +550,5 @@ impl Test { actual: actual.to_owned(), } } - - pub fn script_modes(&self) -> &[bool] { - match self.spec.script_mode { - ScriptMode::ScriptOff => &[false], - ScriptMode::ScriptOn => &[true], - ScriptMode::Both => &[false, true], - } - } -} - -pub fn fixture_from_filename(filename: &str) -> Result { - let path = PathBuf::from(FIXTURE_ROOT) - .join("tree-construction") - .join(filename); - fixture_from_path(&path) -} - -// Split into an array of lines. Combine lines in cases where a subsequent line does not -// have a "|" prefix using an "\n" delimiter. Otherwise strip "\n" from lines. -fn document(s: &str) -> Vec { - let mut document = s - .replace(QUOTED_DOUBLE_NEWLINE, "\"\n\n\"") - .split('|') - .skip(1) - .filter_map(|l| { - if l.is_empty() { - None - } else { - Some(format!("|{}", l.trim_end())) - } - }) - .collect::>(); - - // TODO: drop the following line - document.push("".into()); - document -} - -/// Read a given test file and extract all test data -pub fn fixture_from_path(path: &PathBuf) -> Result { - let input = fs::read_to_string(path)?; - let path = path.to_string_lossy().into_owned(); - - let tests = parser::parse_str(&input)? - .into_iter() - .map(|spec| Test { - file_path: path.to_string(), - line: spec.position.line, - document: document(&spec.document), - spec, - }) - .collect::>(); - - Ok(FixtureFile { - tests, - path: path.to_string(), - }) -} - -fn use_fixture(filenames: &[&str], path: &Path) -> bool { - if !path.is_file() || path.extension().expect("file ending") != "dat" { - return false; - } - - if filenames.is_empty() { - return true; - } - - filenames.iter().any(|filename| path.ends_with(filename)) -} - -pub fn fixtures(filenames: Option<&[&str]>) -> Result> { - let root = PathBuf::from(FIXTURE_ROOT).join("tree-construction"); - let filenames = filenames.unwrap_or_default(); - let mut files = vec![]; - - for entry in fs::read_dir(root)? { - let path = entry?.path(); - - if path.is_file() { - if !use_fixture(filenames, &path) { - continue; - } - - let file = fixture_from_path(&path)?; - files.push(file); - } else { - for subentry in fs::read_dir(path)? { - let path = subentry?.path(); - if !use_fixture(filenames, &path) { - continue; - } - let file = fixture_from_path(&path)?; - files.push(file); - } - } - } - - Ok(files) } + */ diff --git a/src/testing/tree_construction/fixture.rs b/src/testing/tree_construction/fixture.rs new file mode 100644 index 000000000..e8e6dc39c --- /dev/null +++ b/src/testing/tree_construction/fixture.rs @@ -0,0 +1,95 @@ +use crate::testing::tree_construction::parser::{parse_fixture, QUOTED_DOUBLE_NEWLINE}; +use crate::testing::tree_construction::Test; +use crate::testing::{FIXTURE_ROOT, TREE_CONSTRUCTION_PATH}; +use crate::types::Error; +use std::fs; +use std::path::{Path, PathBuf}; + +/// Holds all tests as found in the given fixture file +#[derive(Debug, PartialEq)] +pub struct FixtureFile { + /// All the tests extracted from this fixture file + pub tests: Vec, + /// Path to the fixture file + pub path: String, +} + +/// Reads a given test file and extract all test data +pub fn read_fixture_from_path(path: &PathBuf) -> Result { + let input = fs::read_to_string(path)?; + let path = path.to_string_lossy().into_owned(); + + let tests = parse_fixture(&input)? + .into_iter() + .map(|spec| Test { + file_path: path.to_string(), + line: spec.position.line, + document: create_document_array(&spec.document), + spec, + }) + .collect::>(); + + Ok(FixtureFile { + tests, + path: path.to_string(), + }) +} + +/// Returns true when the fixture at 'path' is a correct fixture file and is allowed to be used +/// according to the list of given filenames. If no filenames are given, all fixtures are used. +fn use_fixture(filenames: &[&str], path: &Path) -> bool { + if !path.is_file() || path.extension().expect("file ending") != "dat" { + return false; + } + + if filenames.is_empty() { + return true; + } + + filenames.iter().any(|filename| path.ends_with(filename)) +} + +/// Returns the root path for the fixtures +pub fn get_fixture_root_path() -> PathBuf { + PathBuf::from(FIXTURE_ROOT).join(TREE_CONSTRUCTION_PATH) +} + +/// Read tree construction fixtures from the given path. If no filenames are given, all +/// fixtures are read, otherwise only the fixes with the given filenames are read. +pub fn read_fixtures(filenames: Option<&[&str]>) -> Result, Error> { + let filenames = filenames.unwrap_or_default(); + let mut files = vec![]; + + for entry in fs::read_dir(get_fixture_root_path())? { + let path = entry?.path(); + + // Check if the fixture is a correct fixture file and if it's allowed to be used + if !use_fixture(filenames, &path) { + continue; + } + + let file = read_fixture_from_path(&path)?; + files.push(file); + } + + Ok(files) +} + +// Split a string into an array of lines. Combine lines in cases where a subsequent line does not +// have a "|" prefix using an "\n" delimiter. Otherwise strip "\n" from lines. +fn create_document_array(s: &str) -> Vec { + let document = s + .replace(QUOTED_DOUBLE_NEWLINE, "\"\n\n\"") + .split('|') + .skip(1) + .filter_map(|l| { + if l.is_empty() { + None + } else { + Some(format!("|{}", l.trim_end())) + } + }) + .collect::>(); + + document +} diff --git a/src/testing/tree_construction/generator.rs b/src/testing/tree_construction/generator.rs new file mode 100644 index 000000000..d5a5db53f --- /dev/null +++ b/src/testing/tree_construction/generator.rs @@ -0,0 +1,110 @@ +use crate::html5::node::{Node, NodeData, NodeTrait, NodeType}; +use crate::html5::node::{MATHML_NAMESPACE, SVG_NAMESPACE, XLINK_NAMESPACE, XMLNS_NAMESPACE}; +use crate::html5::parser::document::DocumentHandle; + +/// Generates a tree output that can be used for matching with the expected output +pub struct TreeOutputGenerator { + document: DocumentHandle, +} + +impl TreeOutputGenerator { + /// Initializes a new tree output generator + pub fn new(document: DocumentHandle) -> Self { + Self { document } + } + + /// Generates a tree + pub fn generate(&self) -> Vec { + self.output_treeline(self.document.get().get_root(), 0) + } + + /// Generates an array of indented tree line and its children. Note that text lines can have newlines in them + fn output_treeline(&self, node: &Node, indent_level: usize) -> Vec { + let mut indent_level = indent_level; + let mut output = Vec::new(); + + // We can skip the document node, as it is always the root node (either a document node, or + // a html node when it's a fragment) + if indent_level > 0 { + output.push(format!( + "| {}{}", + " ".repeat(indent_level - 1), + self.output_node(node) + )); + + if node.type_of() == NodeType::Element { + if let NodeData::Element(element) = &node.data { + let mut sorted_attrs = vec![]; + for attr in element.attributes.iter() { + sorted_attrs.push(attr); + } + sorted_attrs.sort_by(|a, b| a.0.cmp(b.0)); + + for attr in sorted_attrs.iter() { + output.push(format!( + "| {}{}=\"{}\"", + " ".repeat(indent_level), + attr.0, + attr.1 + )); + } + } + } + + // Template tags have an extra "content" node in the test tree ouput + if node.name == "template" { + output.push(format!("| {}content", " ".repeat(indent_level))); + indent_level += 1; + } + } + + for child_id in node.children.iter() { + let doc = self.document.get(); + let child_node = doc.get_node_by_id(*child_id).expect("node not found"); + + output.append(&mut self.output_treeline(child_node, indent_level + 1)); + } + + output + } + + /// Generate the output for a single node + fn output_node(&self, node: &Node) -> String { + match node.data.clone() { + NodeData::Element(element) => { + match node.namespace.clone() { + Some(ns) => { + let ns_prefix = match ns.as_str() { + MATHML_NAMESPACE => "math ", + SVG_NAMESPACE => "svg ", + XMLNS_NAMESPACE => "xml ", + XLINK_NAMESPACE => "xlink ", + _ => "", + }; + format!("<{}{}>", ns_prefix, element.name()) + } + None => format!("<{}>", element.name()), + } + // format!("<{}{}>", ns_prefix, element.name()) + } + NodeData::Text(text) => format!("\"{}\"", text.value()), + NodeData::Comment(comment) => format!("", comment.value()), + NodeData::DocType(doctype) => { + let doctype_text = + if doctype.pub_identifier.is_empty() && doctype.sys_identifier.is_empty() { + // + doctype.name.to_string() + } else { + // + format!( + r#"{0} "{1}" "{2}""#, + doctype.name, doctype.pub_identifier, doctype.sys_identifier + ) + }; + + format!("", doctype_text.trim()) + } + _ => "".to_string(), + } + } +} diff --git a/src/testing/tree_construction/parser.rs b/src/testing/tree_construction/parser.rs index 3c7839c4b..6456ab540 100644 --- a/src/testing/tree_construction/parser.rs +++ b/src/testing/tree_construction/parser.rs @@ -15,13 +15,13 @@ pub const QUOTED_DOUBLE_NEWLINE: &str = ":quoted-double-newline:"; type Span<'a> = LocatedSpan<&'a str>; -#[derive(Copy, Clone, Debug, PartialEq)] +#[derive(Copy, Clone, Debug, PartialEq, Default)] pub struct Position { pub line: usize, pub col: usize, } -#[derive(Debug, PartialEq)] +#[derive(Debug, PartialEq, Clone)] pub enum ErrorSpec { Message(String), @@ -42,14 +42,15 @@ pub enum ErrorSpec { }, } -#[derive(Debug, PartialEq)] +#[derive(Debug, PartialEq, Clone, Default)] pub enum ScriptMode { ScriptOn, ScriptOff, + #[default] Both, } -#[derive(Debug, PartialEq)] +#[derive(Debug, PartialEq, Default, Clone)] pub struct TestSpec { /// #data section pub data: String, @@ -296,7 +297,7 @@ fn test(i: Span) -> IResult { TestSpec { position, - data: data.to_string(), + data: data.to_string().trim_end().to_string(), errors, new_errors: new_errors.unwrap_or_default(), script_mode, @@ -307,7 +308,7 @@ fn test(i: Span) -> IResult { )(i) } -pub fn parse_str(i: &str) -> Result> { +pub fn parse_fixture(i: &str) -> Result> { // Deal with a corner case that makes it hard to parse tricky01.dat. let input = i.replace("\"\n\n\"", QUOTED_DOUBLE_NEWLINE).to_owned() + "\n"; @@ -333,6 +334,9 @@ mod tests { #[test] fn parse_data() { + let (_, s) = data("#data\n Test \n#errors\n".into()).unwrap(); + assert_eq!(*s.fragment(), "Test \n"); + let (_, s) = data("#data\n Test \n#errors".into()).unwrap(); assert_eq!(*s.fragment(), "Test \n"); @@ -387,7 +391,7 @@ Test "#, ); - assert_eq!(test.data, "Test\n"); + assert_eq!(test.data, "Test"); assert_eq!( test.errors, &[ErrorSpec::Location { diff --git a/src/testing/tree_construction/result.rs b/src/testing/tree_construction/result.rs new file mode 100644 index 000000000..5fc983ca8 --- /dev/null +++ b/src/testing/tree_construction/result.rs @@ -0,0 +1,69 @@ +#[derive(PartialEq)] +pub enum ResultStatus { + /// This is a correct match + Success, + /// This line is found in expected, but wasn't found in actual (too few lines) + Missing, + /// This line is found in actual, but wasn't found in expected (too many lines) + Additional, + /// The error is correct, but is throwed on an incorrect position + IncorrectPosition, + // The tree line or message mismatches between expected and actual + Mismatch, +} + +/// The result for a single line in the tree +pub struct TreeLineResult { + /// Index (line number) inside the generated tree + pub index: usize, + /// Actual found line + pub actual: String, + /// Expected line + pub expected: String, + /// Result + pub result: ResultStatus, +} + +/// Defines a message that is generated on a certain line and column +pub struct ErrorMessagePosition { + /// The error message + pub message: String, + /// The line on which the error was thrown by the parser + pub line: usize, + /// The column on which the error was thrown by the parser + pub col: usize, +} + +/// The result for a single error +pub struct ErrorResult { + /// The actual error found during testing + pub actual: ErrorMessagePosition, + /// The expected error that should have been found + pub expected: ErrorMessagePosition, + /// Index of the error (0 based) + pub index: usize, + /// The actual result + pub result: ResultStatus, +} + +/// A combined result from a single test with all the result from the tree and parser errors +#[derive(Default)] +pub struct TestResult { + /// Results of each line in the node tree (as generated by the parser) + pub tree_results: Vec, + /// Results of each error generated by the parser + pub error_results: Vec, +} + +impl TestResult { + /// Returns true when both results and error_results are either empty or have a result that is not Success + pub fn is_success(&self) -> bool { + self.tree_results + .iter() + .all(|r| r.result == ResultStatus::Success) + && self + .error_results + .iter() + .all(|r| r.result == ResultStatus::Success) + } +} diff --git a/tests/tree_construction.rs b/tests/tree_construction.rs index 85005b784..45a1a3f99 100644 --- a/tests/tree_construction.rs +++ b/tests/tree_construction.rs @@ -1,6 +1,7 @@ -use gosub_engine::testing::tree_construction::fixture_from_filename; -use lazy_static::lazy_static; -use std::collections::HashSet; +use gosub_engine::testing::tree_construction::fixture::{ + get_fixture_root_path, read_fixture_from_path, +}; +use gosub_engine::testing::tree_construction::Harness; use test_case::test_case; const DISABLED_CASES: &[&str] = &[ @@ -8,20 +9,13 @@ const DISABLED_CASES: &[&str] = &[ "