From 89296b88a0acfa9ce68c6fde836c58faf30e6c32 Mon Sep 17 00:00:00 2001 From: Sam Atkins Date: Tue, 7 Jan 2025 13:50:19 +0000 Subject: [PATCH] LibWeb: Bring fragment parsing up to spec Corresponds to https://github.com/whatwg/html/pull/10874 Also, parse_fragment() returns ExceptionOr, so stop voiding the error from append_child(). --- Libraries/LibWeb/DOM/Element.cpp | 11 +++-- Libraries/LibWeb/HTML/Parser/HTMLParser.cpp | 48 +++++++++++---------- 2 files changed, 30 insertions(+), 29 deletions(-) diff --git a/Libraries/LibWeb/DOM/Element.cpp b/Libraries/LibWeb/DOM/Element.cpp index aaabbbae035c..1a77f1d89051 100644 --- a/Libraries/LibWeb/DOM/Element.cpp +++ b/Libraries/LibWeb/DOM/Element.cpp @@ -1545,18 +1545,17 @@ WebIDL::ExceptionOr> Element::parse_fragment(Stri dbgln("FIXME: Handle fragment parsing of XML documents"); } - // 3. Let new children be the result of invoking algorithm given markup, with context set to context. + // 3. Let newChildren be the result of invoking algorithm given context and markup. auto new_children = algorithm(*this, markup, HTML::HTMLParser::AllowDeclarativeShadowRoots::No); // 4. Let fragment be a new DocumentFragment whose node document is context's node document. auto fragment = realm().create(document()); - // 5. Append each Node in new children to fragment (in tree order). - for (auto& child : new_children) { - // I don't know if this can throw here, but let's be safe. - (void)TRY(fragment->append_child(*child)); - } + // 5. For each node of newChildren, in tree order: append node to fragment. + for (auto& child : new_children) + TRY(fragment->append_child(*child)); + // 6. Return fragment. return fragment; } diff --git a/Libraries/LibWeb/HTML/Parser/HTMLParser.cpp b/Libraries/LibWeb/HTML/Parser/HTMLParser.cpp index 923b945bc89e..30095350d72d 100644 --- a/Libraries/LibWeb/HTML/Parser/HTMLParser.cpp +++ b/Libraries/LibWeb/HTML/Parser/HTMLParser.cpp @@ -4453,7 +4453,7 @@ DOM::Document& HTMLParser::document() // https://html.spec.whatwg.org/multipage/parsing.html#parsing-html-fragments Vector> HTMLParser::parse_html_fragment(DOM::Element& context_element, StringView markup, AllowDeclarativeShadowRoots allow_declarative_shadow_roots) { - // 1. Create a new Document node, and mark it as being an HTML document. + // 1. Let document be a Document node whose type is "html". auto temp_document = DOM::Document::create_for_fragment_parsing(context_element.realm()); temp_document->set_document_type(DOM::Document::Type::HTML); @@ -4461,21 +4461,24 @@ Vector> HTMLParser::parse_html_fragment(DOM::Element& contex // This is required for Document::parse_url() to work inside iframe srcdoc documents. temp_document->set_about_base_url(context_element.document().about_base_url()); - // 2. If the node document of the context element is in quirks mode, then let the Document be in quirks mode. - // Otherwise, the node document of the context element is in limited-quirks mode, then let the Document be in limited-quirks mode. - // Otherwise, leave the Document in no-quirks mode. - temp_document->set_quirks_mode(context_element.document().mode()); + // 2. If context's node document is in quirks mode, then set document's mode to "quirks". + if (context_element.document().in_quirks_mode()) + temp_document->set_quirks_mode(DOM::QuirksMode::Yes); - // 3. If allowDeclarativeShadowRoots is true, then set Document's allow declarative shadow roots to true. + // 3. Otherwise, if context's node document is in limited-quirks mode, then set document's mode to "limited-quirks". + else if (context_element.document().in_limited_quirks_mode()) + temp_document->set_quirks_mode(DOM::QuirksMode::Limited); + + // 4. If allowDeclarativeShadowRoots is true, then set document's allow declarative shadow roots to true. if (allow_declarative_shadow_roots == AllowDeclarativeShadowRoots::Yes) temp_document->set_allow_declarative_shadow_roots(true); - // 4. Create a new HTML parser, and associate it with the just created Document node. + // 5. Create a new HTML parser, and associate it with document. auto parser = HTMLParser::create(*temp_document, markup, "utf-8"sv); parser->m_context_element = context_element; parser->m_parsing_fragment = true; - // 5. Set the state of the HTML parser's tokenization stage as follows, switching on the context element: + // 6. Set the state of the HTML parser's tokenization stage as follows, switching on the context element: // - title // - textarea if (context_element.local_name().is_one_of(HTML::TagNames::title, HTML::TagNames::textarea)) { @@ -4512,37 +4515,36 @@ Vector> HTMLParser::parse_html_fragment(DOM::Element& contex // Leave the tokenizer in the data state. } - // 6. Let root be a new html element with no attributes. - auto root = create_element(context_element.document(), HTML::TagNames::html, Namespace::HTML).release_value_but_fixme_should_propagate_errors(); + // 7. Let root be the result of creating an element given document, "html", and the HTML namespace. + auto root = MUST(create_element(context_element.document(), HTML::TagNames::html, Namespace::HTML)); - // 7. Append the element root to the Document node created above. + // 8. Append root to document. MUST(temp_document->append_child(root)); - // 8. Set up the parser's stack of open elements so that it contains just the single element root. + // 9. Set up the HTML parser's stack of open elements so that it contains just the single element root. parser->m_stack_of_open_elements.push(root); - // 9. If the context element is a template element, - if (context_element.local_name() == HTML::TagNames::template_) { - // push "in template" onto the stack of template insertion modes so that it is the new current template insertion mode. + // 10. If context is a template element, then push "in template" onto the stack of template insertion modes + // so that it is the new current template insertion mode. + if (context_element.local_name() == HTML::TagNames::template_) parser->m_stack_of_template_insertion_modes.append(InsertionMode::InTemplate); - } - // FIXME: 10. Create a start tag token whose name is the local name of context and whose attributes are the attributes of context. - // Let this start tag token be the start tag token of the context node, e.g. for the purposes of determining if it is an HTML integration point. + // FIXME: 11. Create a start tag token whose name is the local name of context and whose attributes are the attributes of context. + // Let this start tag token be the start tag token of context; e.g. for the purposes of determining if it is an HTML integration point. - // 11. Reset the parser's insertion mode appropriately. + // 12. Reset the parser's insertion mode appropriately. parser->reset_the_insertion_mode_appropriately(); - // 12. Set the parser's form element pointer to the nearest node to the context element that is a form element + // 13. Set the HTML parser's form element pointer to the nearest node to context that is a form element // (going straight up the ancestor chain, and including the element itself, if it is a form element), if any. // (If there is no such form element, the form element pointer keeps its initial value, null.) parser->m_form_element = context_element.first_ancestor_of_type(); - // 13. Place the input into the input stream for the HTML parser just created. The encoding confidence is irrelevant. - // 14. Start the parser and let it run until it has consumed all the characters just inserted into the input stream. + // 14. Place the input into the input stream for the HTML parser just created. The encoding confidence is irrelevant. + // 15. Start the HTML parser and let it run until it has consumed all the characters just inserted into the input stream. parser->run(context_element.document().url()); - // 15. Return the child nodes of root, in tree order. + // 16. Return root's children, in tree order. Vector> children; while (GC::Ptr child = root->first_child()) { MUST(root->remove_child(*child));