Skip to content

Commit

Permalink
Cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
jaytaph committed Oct 2, 2023
1 parent 2b26150 commit d7e8e31
Show file tree
Hide file tree
Showing 3 changed files with 146 additions and 89 deletions.
83 changes: 54 additions & 29 deletions src/bin/parser_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,24 +9,35 @@ use std::path::PathBuf;
use std::{env, fs, io};

pub struct TestResults {
tests: usize, // Number of tests (as defined in the suite)
assertions: usize, // Number of assertions (different combinations of input/output per test)
succeeded: usize, // How many succeeded assertions
failed: usize, // How many failed assertions
failed_position: usize, // How many failed assertions where position is not correct
/// Number of tests (as defined in the suite)
tests: usize,
/// Number of assertions (different combinations of input/output per test)
assertions: usize,
/// How many succeeded assertions
succeeded: usize,
/// How many failed assertions
failed: usize,
/// How many failed assertions where position is not correct
failed_position: usize,
}

struct Test {
file_path: String, // Filename of the test
line: usize, // Line number of the test
data: String, // input stream
errors: Vec<Error>, // errors
document: Vec<String>, // document tree
document_fragment: Vec<String>, // fragment
/// Filename of the test
file_path: String,
/// Line number of the test
line: usize,
/// input stream
data: String,
/// errors
errors: Vec<Error>,
/// document tree
document: Vec<String>,
/// fragment
document_fragment: Vec<String>,
}

fn main() -> io::Result<()> {
let default_dir = "./html5lib-tests";
let default_dir = "./tests/data/html5lib-tests";
let dir = env::args().nth(1).unwrap_or(default_dir.to_string());

let mut results = TestResults {
Expand All @@ -41,10 +52,12 @@ fn main() -> io::Result<()> {
let entry = entry?;
let path = entry.path();

// Only run the tests1.dat file for now
if !path.ends_with("tests1.dat") {
continue;
}

// Skip dirs and non-dat files
if !path.is_file() || path.extension().unwrap() != "dat" {
continue;
}
Expand All @@ -54,10 +67,7 @@ fn main() -> io::Result<()> {

let mut test_idx = 1;
for test in tests {
if test_idx == 23 {
run_tree_test(test_idx, &test, &mut results);
}

run_tree_test(test_idx, &test, &mut results);
test_idx += 1;
}
}
Expand All @@ -66,6 +76,7 @@ fn main() -> io::Result<()> {
Ok(())
}

/// Read given tests file and extract all test data
fn read_tests(file_path: PathBuf) -> io::Result<Vec<Test>> {
let file = File::open(file_path.clone())?;
let reader = BufReader::new(file);
Expand Down Expand Up @@ -149,6 +160,7 @@ fn run_tree_test(test_idx: usize, test: &Test, results: &mut TestResults) {

let old_failed = results.failed;

// Do the actual parsing
let mut is = InputStream::new();
is.read_from_str(test.data.as_str(), None);

Expand All @@ -169,18 +181,23 @@ fn run_tree_test(test_idx: usize, test: &Test, results: &mut TestResults) {
test.errors.len(),
parse_errors.len()
);
// for want_err in &test.errors {
// println!(" * Want: '{}' at {}:{}", want_err.code, want_err.line, want_err.col);
// }
// for got_err in &parse_errors {
// println!(" * Got: '{}' at {}:{}", got_err.message, got_err.line, got_err.col);
// }
// results.assertions += 1;
// results.failed += 1;

for want_err in &test.errors {
println!(" * Want: '{}' at {}:{}", want_err.code, want_err.line, want_err.col);
}
for got_err in &parse_errors {
println!(" * Got: '{}' at {}:{}", got_err.message, got_err.line, got_err.col);
}
results.assertions += 1;
results.failed += 1;
} else {
println!("✅ Found {} errors", parse_errors.len());
}
//

// For now, we skip the tests that checks for errors as most of the errors do not match
// with the actual tests, as these errors as specific from html5lib. Either we reuse them
// or have some kind of mapping to our own errors if we decide to use our custom errors.

// // Check each error messages
// let mut idx = 0;
// for error in &test.errors {
Expand Down Expand Up @@ -217,6 +234,7 @@ fn run_tree_test(test_idx: usize, test: &Test, results: &mut TestResults) {
// idx += 1;
// }

// Display additional data if there a failure is found
if old_failed != results.failed {
println!("----------------------------------------");
println!("📄 Input stream: ");
Expand All @@ -230,17 +248,21 @@ fn run_tree_test(test_idx: usize, test: &Test, results: &mut TestResults) {
println!("{}", line);
}

std::process::exit(1);
// // End at the first failure
// std::process::exit(1);
}

println!("----------------------------------------");
}

#[derive(PartialEq)]
enum ErrorResult {
Success, // Found the correct error
Failure, // Didn't find the error (not even with incorrect position)
PositionFailure, // Found the error, but on an incorrect position
/// Found the correct error
Success,
/// Didn't find the error (not even with incorrect position)
Failure,
/// Found the error, but on an incorrect position
PositionFailure,
}

#[derive(PartialEq)]
Expand All @@ -251,6 +273,9 @@ pub struct Error {
}

fn match_document_tree(document: &Document, expected: &Vec<String>) -> bool {
// We need a better tree match system. Right now we match the tree based on the (debug) output
// of the tree. Instead, we should generate a document-tree from the expected output and compare
// it against the current generated tree.
match_node(0, -1, -1, document, expected).is_some()
}

Expand Down
87 changes: 54 additions & 33 deletions src/html5_parser/parser/adoption_agency.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ impl<'a> Html5Parser<'a> {
.any(|elem| elem == &ActiveElement::NodeId(current_node_id))
{
self.open_elements.pop();
return AdoptionResult::Completed
return AdoptionResult::Completed;
}

// Step 3
Expand All @@ -39,7 +39,7 @@ impl<'a> Html5Parser<'a> {
loop {
// Step 4.1
if outer_loop_counter >= ADOPTION_AGENCY_OUTER_LOOP_DEPTH {
return AdoptionResult::Completed
return AdoptionResult::Completed;
}

// Step 4.2
Expand All @@ -48,25 +48,31 @@ impl<'a> Html5Parser<'a> {
// Step 4.3
let formatting_element_idx = self.find_formatting_element(subject);
if formatting_element_idx.is_none() {
return AdoptionResult::ProcessAsAnyOther
return AdoptionResult::ProcessAsAnyOther;
}

let formatting_element_idx = formatting_element_idx.expect("formatting element not found");
let formatting_element_id = self.active_formatting_elements[formatting_element_idx].node_id().expect("formatting element not found");
let formatting_element_node= self.document.get_node_by_id(formatting_element_id).expect("formatting element not found").clone();
let formatting_element_idx =
formatting_element_idx.expect("formatting element not found");
let formatting_element_id = self.active_formatting_elements[formatting_element_idx]
.node_id()
.expect("formatting element not found");
let formatting_element_node = self
.document
.get_node_by_id(formatting_element_id)
.expect("formatting element not found")
.clone();

// Step 4.4
if !open_elements_has_id!(self, formatting_element_id) {
self.parse_error("formatting element not in open elements");
self.active_formatting_elements
.remove(formatting_element_idx);

return AdoptionResult::Completed
return AdoptionResult::Completed;
}

// Step 4.5
if !self.is_in_scope(&formatting_element_node.name, Scope::Regular)
{
if !self.is_in_scope(&formatting_element_node.name, Scope::Regular) {
self.parse_error("formatting element not in scope");
return AdoptionResult::Completed;
}
Expand All @@ -93,20 +99,34 @@ impl<'a> Html5Parser<'a> {
}

// Remove the formatting element from the list of active formatting elements
if let Some(pos) = self.active_formatting_elements.iter().position(|elem| elem == &ActiveElement::NodeId(formatting_element_id)) {
if let Some(pos) = self
.active_formatting_elements
.iter()
.position(|elem| elem == &ActiveElement::NodeId(formatting_element_id))
{
self.active_formatting_elements.remove(pos);
}

return AdoptionResult::Completed
return AdoptionResult::Completed;
}

let furthest_block_idx = furthest_block_idx.expect("furthest block not found");

let node_id = *self.open_elements.get(furthest_block_idx).expect("node not found");
let furthest_block = self.document.get_node_by_id(node_id).expect("node not found").clone();
let node_id = *self
.open_elements
.get(furthest_block_idx)
.expect("node not found");
let furthest_block = self
.document
.get_node_by_id(node_id)
.expect("node not found")
.clone();

// Step 4.9
let common_ancestor_id = *self.open_elements.get(formatting_element_idx + 1).expect("node not found");
let common_ancestor_id = *self
.open_elements
.get(formatting_element_idx + 1)
.expect("node not found");

// Step 4.10
let mut bookmark = formatting_element_idx;
Expand Down Expand Up @@ -184,14 +204,12 @@ impl<'a> Html5Parser<'a> {

// Step 4.15
let new_element = match formatting_element_node.data {
NodeData::Element { ref attributes, .. } => {
Node::new_element(
formatting_element_node.name.as_str(),
attributes.clone(),
HTML_NAMESPACE,
)
}
_ => panic!("formatting element is not an element")
NodeData::Element { ref attributes, .. } => Node::new_element(
formatting_element_node.name.as_str(),
attributes.clone(),
HTML_NAMESPACE,
),
_ => panic!("formatting element is not an element"),
};

// Step 4.16
Expand All @@ -211,7 +229,8 @@ impl<'a> Html5Parser<'a> {
// Step 4.19
// Remove formatting element from the stack of open elements, and insert the new element into the stack of open elements immediately below the position of furthest block in that stack.
self.open_elements.remove(formatting_element_idx);
self.open_elements.insert(furthest_block_idx - 1, new_element_id);
self.open_elements
.insert(furthest_block_idx - 1, new_element_id);
}
}

Expand Down Expand Up @@ -251,7 +270,10 @@ impl<'a> Html5Parser<'a> {
// Iterate
for idx in (index_of_formatting_element..self.open_elements.len()).rev() {
let element_id = self.open_elements[idx];
let element = self.document.get_node_by_id(element_id).expect("element not found");
let element = self
.document
.get_node_by_id(element_id)
.expect("element not found");

if element.is_special() {
return Some(idx);
Expand All @@ -261,7 +283,6 @@ impl<'a> Html5Parser<'a> {
None
}


// Find the formatting element with the given subject between the end of the list and the first marker (or start when there is no marker)
fn find_formatting_element(&self, subject: &str) -> Option<usize> {
if self.active_formatting_elements.is_empty() {
Expand All @@ -273,15 +294,15 @@ impl<'a> Html5Parser<'a> {
ActiveElement::Marker => {
// Marker found, do not continue
break;
},
}
ActiveElement::NodeId(node_id) => {
// Check if the given node is an element with the given subject
let node = self.document.get_node_by_id(node_id).expect("node not found").clone();
if let NodeData::Element {
ref name,
..
} = node.data
{
let node = self
.document
.get_node_by_id(node_id)
.expect("node not found")
.clone();
if let NodeData::Element { ref name, .. } = node.data {
if name == subject {
return Some(idx);
}
Expand All @@ -292,4 +313,4 @@ impl<'a> Html5Parser<'a> {

None
}
}
}
Loading

0 comments on commit d7e8e31

Please sign in to comment.