Cleanup

gosub-io · Oct 2, 2023 · d7e8e31 · d7e8e31
1 parent 2b26150
commit d7e8e31
Show file tree

Hide file tree

Showing 3 changed files with 146 additions and 89 deletions.
diff --git a/src/bin/parser_test.rs b/src/bin/parser_test.rs
@@ -9,24 +9,35 @@ use std::path::PathBuf;
 use std::{env, fs, io};
 
 pub struct TestResults {
-    tests: usize,           // Number of tests (as defined in the suite)
-    assertions: usize, // Number of assertions (different combinations of input/output per test)
-    succeeded: usize,  // How many succeeded assertions
-    failed: usize,     // How many failed assertions
-    failed_position: usize, // How many failed assertions where position is not correct
+    /// Number of tests (as defined in the suite)
+    tests: usize,
+    /// Number of assertions (different combinations of input/output per test)
+    assertions: usize,
+    /// How many succeeded assertions
+    succeeded: usize,
+    /// How many failed assertions
+    failed: usize,
+    /// How many failed assertions where position is not correct
+    failed_position: usize,
 }
 
 struct Test {
-    file_path: String,              // Filename of the test
-    line: usize,                    // Line number of the test
-    data: String,                   // input stream
-    errors: Vec<Error>,             // errors
-    document: Vec<String>,          // document tree
-    document_fragment: Vec<String>, // fragment
+    /// Filename of the test
+    file_path: String,
+    /// Line number of the test
+    line: usize,
+    /// input stream
+    data: String,
+    /// errors
+    errors: Vec<Error>,
+    /// document tree
+    document: Vec<String>,
+    /// fragment
+    document_fragment: Vec<String>,
 }
 
 fn main() -> io::Result<()> {
-    let default_dir = "./html5lib-tests";
+    let default_dir = "./tests/data/html5lib-tests";
     let dir = env::args().nth(1).unwrap_or(default_dir.to_string());
 
     let mut results = TestResults {
@@ -41,10 +52,12 @@ fn main() -> io::Result<()> {
         let entry = entry?;
         let path = entry.path();
 
+        // Only run the tests1.dat file for now
         if !path.ends_with("tests1.dat") {
             continue;
         }
 
+        // Skip dirs and non-dat files
         if !path.is_file() || path.extension().unwrap() != "dat" {
             continue;
         }
@@ -54,10 +67,7 @@ fn main() -> io::Result<()> {
 
         let mut test_idx = 1;
         for test in tests {
-            if test_idx == 23 {
-                run_tree_test(test_idx, &test, &mut results);
-            }
-
+            run_tree_test(test_idx, &test, &mut results);
             test_idx += 1;
         }
     }
@@ -66,6 +76,7 @@ fn main() -> io::Result<()> {
     Ok(())
 }
 
+/// Read given tests file and extract all test data
 fn read_tests(file_path: PathBuf) -> io::Result<Vec<Test>> {
     let file = File::open(file_path.clone())?;
     let reader = BufReader::new(file);
@@ -149,6 +160,7 @@ fn run_tree_test(test_idx: usize, test: &Test, results: &mut TestResults) {
 
     let old_failed = results.failed;
 
+    // Do the actual parsing
     let mut is = InputStream::new();
     is.read_from_str(test.data.as_str(), None);
 
@@ -169,18 +181,23 @@ fn run_tree_test(test_idx: usize, test: &Test, results: &mut TestResults) {
             test.errors.len(),
             parse_errors.len()
         );
-    //     for want_err in &test.errors {
-    //         println!("     * Want: '{}' at {}:{}", want_err.code, want_err.line, want_err.col);
-    //     }
-    //     for got_err in &parse_errors {
-    //         println!("     * Got: '{}' at {}:{}", got_err.message, got_err.line, got_err.col);
-    //     }
-    //     results.assertions += 1;
-    //     results.failed += 1;
+
+        for want_err in &test.errors {
+            println!("     * Want: '{}' at {}:{}", want_err.code, want_err.line, want_err.col);
+        }
+        for got_err in &parse_errors {
+            println!("     * Got: '{}' at {}:{}", got_err.message, got_err.line, got_err.col);
+        }
+        results.assertions += 1;
+        results.failed += 1;
     } else {
         println!("✅ Found {} errors", parse_errors.len());
     }
-    //
+
+    // For now, we skip the tests that checks for errors as most of the errors do not match
+    // with the actual tests, as these errors as specific from html5lib. Either we reuse them
+    // or have some kind of mapping to our own errors if we decide to use our custom errors.
+
     // // Check each error messages
     // let mut idx = 0;
     // for error in &test.errors {
@@ -217,6 +234,7 @@ fn run_tree_test(test_idx: usize, test: &Test, results: &mut TestResults) {
     //     idx += 1;
     // }
 
+    // Display additional data if there a failure is found
     if old_failed != results.failed {
         println!("----------------------------------------");
         println!("📄 Input stream: ");
@@ -230,17 +248,21 @@ fn run_tree_test(test_idx: usize, test: &Test, results: &mut TestResults) {
             println!("{}", line);
         }
 
-        std::process::exit(1);
+        // // End at the first failure
+        // std::process::exit(1);
     }
 
     println!("----------------------------------------");
 }
 
 #[derive(PartialEq)]
 enum ErrorResult {
-    Success,         // Found the correct error
-    Failure,         // Didn't find the error (not even with incorrect position)
-    PositionFailure, // Found the error, but on an incorrect position
+    /// Found the correct error
+    Success,
+    /// Didn't find the error (not even with incorrect position)
+    Failure,
+    /// Found the error, but on an incorrect position
+    PositionFailure,
 }
 
 #[derive(PartialEq)]
@@ -251,6 +273,9 @@ pub struct Error {
 }
 
 fn match_document_tree(document: &Document, expected: &Vec<String>) -> bool {
+    // We need a better tree match system. Right now we match the tree based on the (debug) output
+    // of the tree. Instead, we should generate a document-tree from the expected output and compare
+    // it against the current generated tree.
     match_node(0, -1, -1, document, expected).is_some()
 }
 

diff --git a/src/html5_parser/parser/adoption_agency.rs b/src/html5_parser/parser/adoption_agency.rs
@@ -29,7 +29,7 @@ impl<'a> Html5Parser<'a> {
                 .any(|elem| elem == &ActiveElement::NodeId(current_node_id))
         {
             self.open_elements.pop();
-            return AdoptionResult::Completed
+            return AdoptionResult::Completed;
         }
 
         // Step 3
@@ -39,7 +39,7 @@ impl<'a> Html5Parser<'a> {
         loop {
             // Step 4.1
             if outer_loop_counter >= ADOPTION_AGENCY_OUTER_LOOP_DEPTH {
-                return AdoptionResult::Completed
+                return AdoptionResult::Completed;
             }
 
             // Step 4.2
@@ -48,25 +48,31 @@ impl<'a> Html5Parser<'a> {
             // Step 4.3
             let formatting_element_idx = self.find_formatting_element(subject);
             if formatting_element_idx.is_none() {
-                return AdoptionResult::ProcessAsAnyOther
+                return AdoptionResult::ProcessAsAnyOther;
             }
 
-            let formatting_element_idx = formatting_element_idx.expect("formatting element not found");
-            let formatting_element_id = self.active_formatting_elements[formatting_element_idx].node_id().expect("formatting element not found");
-            let formatting_element_node= self.document.get_node_by_id(formatting_element_id).expect("formatting element not found").clone();
+            let formatting_element_idx =
+                formatting_element_idx.expect("formatting element not found");
+            let formatting_element_id = self.active_formatting_elements[formatting_element_idx]
+                .node_id()
+                .expect("formatting element not found");
+            let formatting_element_node = self
+                .document
+                .get_node_by_id(formatting_element_id)
+                .expect("formatting element not found")
+                .clone();
 
             // Step 4.4
             if !open_elements_has_id!(self, formatting_element_id) {
                 self.parse_error("formatting element not in open elements");
                 self.active_formatting_elements
                     .remove(formatting_element_idx);
 
-                return AdoptionResult::Completed
+                return AdoptionResult::Completed;
             }
 
             // Step 4.5
-            if !self.is_in_scope(&formatting_element_node.name, Scope::Regular)
-            {
+            if !self.is_in_scope(&formatting_element_node.name, Scope::Regular) {
                 self.parse_error("formatting element not in scope");
                 return AdoptionResult::Completed;
             }
@@ -93,20 +99,34 @@ impl<'a> Html5Parser<'a> {
                 }
 
                 // Remove the formatting element from the list of active formatting elements
-                if let Some(pos) = self.active_formatting_elements.iter().position(|elem| elem == &ActiveElement::NodeId(formatting_element_id)) {
+                if let Some(pos) = self
+                    .active_formatting_elements
+                    .iter()
+                    .position(|elem| elem == &ActiveElement::NodeId(formatting_element_id))
+                {
                     self.active_formatting_elements.remove(pos);
                 }
 
-                return AdoptionResult::Completed
+                return AdoptionResult::Completed;
             }
 
             let furthest_block_idx = furthest_block_idx.expect("furthest block not found");
 
-            let node_id = *self.open_elements.get(furthest_block_idx).expect("node not found");
-            let furthest_block = self.document.get_node_by_id(node_id).expect("node not found").clone();
+            let node_id = *self
+                .open_elements
+                .get(furthest_block_idx)
+                .expect("node not found");
+            let furthest_block = self
+                .document
+                .get_node_by_id(node_id)
+                .expect("node not found")
+                .clone();
 
             // Step 4.9
-            let common_ancestor_id = *self.open_elements.get(formatting_element_idx + 1).expect("node not found");
+            let common_ancestor_id = *self
+                .open_elements
+                .get(formatting_element_idx + 1)
+                .expect("node not found");
 
             // Step 4.10
             let mut bookmark = formatting_element_idx;
@@ -184,14 +204,12 @@ impl<'a> Html5Parser<'a> {
 
             // Step 4.15
             let new_element = match formatting_element_node.data {
-                NodeData::Element { ref attributes, .. } => {
-                     Node::new_element(
-                        formatting_element_node.name.as_str(),
-                        attributes.clone(),
-                        HTML_NAMESPACE,
-                    )
-                }
-                _ => panic!("formatting element is not an element")
+                NodeData::Element { ref attributes, .. } => Node::new_element(
+                    formatting_element_node.name.as_str(),
+                    attributes.clone(),
+                    HTML_NAMESPACE,
+                ),
+                _ => panic!("formatting element is not an element"),
             };
 
             // Step 4.16
@@ -211,7 +229,8 @@ impl<'a> Html5Parser<'a> {
             // Step 4.19
             // Remove formatting element from the stack of open elements, and insert the new element into the stack of open elements immediately below the position of furthest block in that stack.
             self.open_elements.remove(formatting_element_idx);
-            self.open_elements.insert(furthest_block_idx - 1, new_element_id);
+            self.open_elements
+                .insert(furthest_block_idx - 1, new_element_id);
         }
     }
 
@@ -251,7 +270,10 @@ impl<'a> Html5Parser<'a> {
         // Iterate
         for idx in (index_of_formatting_element..self.open_elements.len()).rev() {
             let element_id = self.open_elements[idx];
-            let element = self.document.get_node_by_id(element_id).expect("element not found");
+            let element = self
+                .document
+                .get_node_by_id(element_id)
+                .expect("element not found");
 
             if element.is_special() {
                 return Some(idx);
@@ -261,7 +283,6 @@ impl<'a> Html5Parser<'a> {
         None
     }
 
-
     // Find the formatting element with the given subject between the end of the list and the first marker (or start when there is no marker)
     fn find_formatting_element(&self, subject: &str) -> Option<usize> {
         if self.active_formatting_elements.is_empty() {
@@ -273,15 +294,15 @@ impl<'a> Html5Parser<'a> {
                 ActiveElement::Marker => {
                     // Marker found, do not continue
                     break;
-                },
+                }
                 ActiveElement::NodeId(node_id) => {
                     // Check if the given node is an element with the given subject
-                    let node = self.document.get_node_by_id(node_id).expect("node not found").clone();
-                    if let NodeData::Element {
-                        ref name,
-                        ..
-                    } = node.data
-                    {
+                    let node = self
+                        .document
+                        .get_node_by_id(node_id)
+                        .expect("node not found")
+                        .clone();
+                    if let NodeData::Element { ref name, .. } = node.data {
                         if name == subject {
                             return Some(idx);
                         }
@@ -292,4 +313,4 @@ impl<'a> Html5Parser<'a> {
 
         None
     }
-}
+}