From 4f3755f97f6f98613149ab641344b1a1c0cc6392 Mon Sep 17 00:00:00 2001 From: kiyoshika Date: Wed, 8 Nov 2023 21:24:02 -0500 Subject: [PATCH] add remaining conditions TODO: need to write tests for new conditions. Too tired right now. Also squash commits when final changes are ready --- src/html5/parser/document.rs | 80 +++++++++++++++++++++++++++++++----- 1 file changed, 69 insertions(+), 11 deletions(-) diff --git a/src/html5/parser/document.rs b/src/html5/parser/document.rs index 15384a0e..78dbbd8a 100755 --- a/src/html5/parser/document.rs +++ b/src/html5/parser/document.rs @@ -298,7 +298,7 @@ impl Document { self.arena.get_node_mut(*node_id) } - /// Retrieves the next sibling NodeId (to the right) of the current node or None. + /// Retrieves the next sibling NodeId (to the right) of the reference_node or None. pub fn get_next_sibling(&self, reference_node: NodeId) -> Option { let node = self.get_node_by_id(reference_node)?; let parent = self.get_node_by_id(node.parent?)?; @@ -312,6 +312,7 @@ impl Document { if parent.children.len() > idx + 1 { return Some(parent.children[idx + 1]); } + None } @@ -437,6 +438,21 @@ impl Document { pub fn has_cyclic_reference(&self, node_id: NodeId, parent_id: NodeId) -> bool { has_child_recursive(&self.arena, node_id, parent_id) } + + /// Check if a given node's children contain a certain tag name + pub fn contains_child_tag(&self, node_id: NodeId, tag: &str) -> bool { + if let Some(node) = self.get_node_by_id(node_id) { + for child_id in &node.children { + if let Some(child) = self.get_node_by_id(*child_id) { + if child.name == tag { + return true; + } + } + } + } + + false + } } /// Returns true when the parent node has the child node as a child, or if any of the children of @@ -712,6 +728,55 @@ impl DocumentHandle { Ok(()) } + fn check_query_predicate( + &self, + doc_read: &Document, + current_node: &Node, + condition: &Condition, + ) -> bool { + match condition { + Condition::EqualsTag(tag) => current_node.name == *tag, + Condition::EqualsId(id) => { + let node_data = ¤t_node.data; + if let NodeData::Element(element) = node_data { + if let Some(id_attr) = element.attributes.get("id") { + return *id_attr == *id; + } + } + + false + } + Condition::ContainsClass(class) => { + let node_data = ¤t_node.data; + if let NodeData::Element(element) = node_data { + return element.classes.contains(class.as_str()); + } + + false + } + Condition::ContainsAttribute(attribute) => { + let node_data = ¤t_node.data; + if let NodeData::Element(element) = node_data { + return element.attributes.contains_key(attribute); + } + + false + } + Condition::ContainsChildTag(child_tag) => { + doc_read.contains_child_tag(current_node.id, child_tag) + } + Condition::HasParentTag(parent_tag) => { + if let Some(parent_id) = current_node.parent { + // making an assumption here that the parent node is actually valid + let parent = doc_read.get_node_by_id(parent_id).unwrap(); + return parent.name == *parent_tag; + } + + false + } + } + } + /// Perform a single query against the document. /// If query search type is uninitialized, returns an error. /// Otherwise, returns a vector of NodeIds that match the predicate in tree order (preorder depth-first.) @@ -743,16 +808,9 @@ impl DocumentHandle { let mut predicate_result: bool = true; for condition in &query.conditions { - match condition { - Condition::EqualsTag(tag) => { - if current_node.name != *tag { - predicate_result = false; - } - } - // TODO: implement remaining conditions - _ => { - predicate_result = false; - } + if !self.check_query_predicate(&doc_read, current_node, condition) { + predicate_result = false; + break; } }