Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Block Processor for Multibyte Processing and Synced Patterns #876

Merged
merged 5 commits into from
Dec 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .changeset/happy-radios-move.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
"@headstartwp/headstartwp": patch
---

Added - Improved tests for the Gutenberg block attribute processing
Fixed - Gutenberg post content block attribute processing for Synced Patterns and support for multibyte characters.

221 changes: 159 additions & 62 deletions wp/headless-wp/includes/classes/Integrations/Gutenberg.php
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,10 @@
namespace HeadlessWP\Integrations;

use DOMDocument;
use DOMElement;
use Exception;
use WP_Block;
use WP_HTML_Tag_Processor;

/**
* The Gutenberg integration class
Expand All @@ -21,32 +24,75 @@ public function register() {
add_filter( 'render_block', [ $this, 'render_block' ], 10, 3 );
}

/**
* Check if the current block will bypass block attribute processing
*
* @param string $block_name The block name
* @param WP_Block $block_instance The block instance
*
* @return bool
*/
protected function bypass_block_attributes( string $block_name, WP_Block $block_instance ): bool {
$is_synced_pattern = 'core/block' === $block_name;

/**
* Filter whether to bypass adding block attributes to the current blocks HTML
* - Defaults to match Synced Pattern (core/block) blocks
*
* @param bool $is_synced_pattern Whether the block is a synced pattern block
* @param string $block_name The blocks name
* @param WP_Block $block_instance The blocks instance
*/
return apply_filters( 'tenup_headless_wp_render_block_bypass_block_attributes', $is_synced_pattern, $block_name, $block_instance );
}

/**
* Process the block with the DOMDocument api
*
* @param string $html The block Markup
* @param string $block_name The name of the block
* @param string $block_attrs_serialized The serialized block attributes
* @param array $block The block array
* @param WP_Block $block_instance The block instance
*
* @return string The processed html
*/
public function process_block_with_dom_document_api( $html, $block_name, $block_attrs_serialized, $block, $block_instance ) {
try {
return $this->bypass_block_attributes( $block_name, $block_instance )
? $this->process_dom_document_bypassed_block( $html )
: $this->process_dom_document_block( $html, $block_name, $block_attrs_serialized, $block, $block_instance );
} catch ( Exception $e ) {
return $html;
}
}

/**
* Process the block with the WP_HTML_Tag_Processor
*
* @param string $html The Block's Markup
* @param string $block_name The name of the block
* @param string $block_attrs_serialized The serialized block attributes
* @param array $block The block's array
* @param \WP_Block $block_instance The block instance
* @param string $html The block markup
* @param string $block_name The block name
* @param string $block_attrs_serialized The serialized block attributes
* @param array $block The block schema
* @param WP_Block $block_instance The block instance
*
* @return string The processed html
*/
public function process_block_with_html_tag_api( $html, $block_name, $block_attrs_serialized, $block, $block_instance ) {
try {
$doc = new \WP_HTML_Tag_Processor( $html );
$doc = new WP_HTML_Tag_Processor( $html );

if ( $doc->next_tag() ) {
if ( ! $this->bypass_block_attributes( $block_name, $block_instance ) && $doc->next_tag() ) {
$doc->set_attribute( 'data-wp-block-name', $block_name );
$doc->set_attribute( 'data-wp-block', $block_attrs_serialized );

/**
* Filter the block's before rendering
* Filter the block before rendering
*
* @param \WP_HTML_Tag_Processor $doc
* @param string $html The original block markup
* @param array $block The Block's schema
* @param \WP_Block $block_instance The block's instance
* @param WP_HTML_Tag_Processor $doc
* @param string $html The block markup
* @param array $block The block schema
* @param WP_Block $block_instance The block instance
*/
$doc = apply_filters( 'tenup_headless_wp_render_html_tag_processor_block_markup', $doc, $html, $block, $block_instance );

Expand All @@ -60,59 +106,110 @@ public function process_block_with_html_tag_api( $html, $block_name, $block_attr
}

/**
* Process the block with the DOMDocument api
* Process Standard blocks into output HTML
*
* @param string $html The Block's Markup
* @param string $block_name The name of the block
* @param string $block_attrs_serialized The serialized block attributes
* @param array $block The block's array
* @param \WP_Block $block_instance The block instance
* @param string $html The block markup
* @param string $block_name The block name
* @param string $serialized_attributes Serialized attributes
* @param array $block The block array
* @param WP_Block $block_instance The block instance
*
* @return string The processed html
* @return string
*/
public function process_block_with_dom_document_api( $html, $block_name, $block_attrs_serialized, $block, $block_instance ) {
try {
libxml_use_internal_errors( true );
$doc = new DomDocument( '1.0', 'UTF-8' );
$doc->loadHTML( htmlspecialchars_decode( htmlentities( $html ) ), LIBXML_HTML_NODEFDTD | LIBXML_HTML_NOIMPLIED );
public function process_dom_document_block(
string $html,
string $block_name,
string $serialized_attributes,
array $block,
WP_Block $block_instance
): string {
$document = $this->read_converted_dom_document( $html );

$root_node = $doc->documentElement; // phpcs:ignore
// phpcs:ignore WordPress.NamingConventions.ValidVariableName.UsedPropertyNotSnakeCase
$root_node = $document->documentElement;

if ( is_null( $root_node ) ) {
return $html;
}
$attrs = $document->createAttribute( 'data-wp-block' );
$attrs->value = $serialized_attributes;

$attrs = $doc->createAttribute( 'data-wp-block' );
$attrs->value = $block_attrs_serialized;
$block_name_obj = $document->createAttribute( 'data-wp-block-name' );
$block_name_obj->value = $block_name;

$block_name_obj = $doc->createAttribute( 'data-wp-block-name' );
$block_name_obj->value = $block_name;
$root_node->appendChild( $attrs );
$root_node->appendChild( $block_name_obj );

$root_node->appendChild( $attrs );
$root_node->appendChild( $block_name_obj );
/**
* Filter the block's DOMElement before rendering
*
* @param DOMElement $root_node Root node of the DOM document
* @param string $html The original block markup
* @param array $block The block schema
* @param WP_Block $block_instance The block instance
*/
$root_node = apply_filters( 'tenup_headless_wp_render_block_markup', $root_node, $html, $block, $block_instance );

/**
* Filter the block's DOMElement before rendering
*
* @param \DOMElement $root_node
* @param string $html The original block markup
* @param array $block The Block's schema
* @param \WP_Block $block_instance The block's instance
*/
$root_node = apply_filters( 'tenup_headless_wp_render_block_markup', $root_node, $html, $block, $block_instance );
return $document->saveHTML();
}

return $doc->saveHTML();
} catch ( Exception $e ) {
return $html;
/**
* Process block as direct, multiple HTML nodes without adding block attributes
* - Useful for Synced Block Patterns which return a set of already processed blocks with attributes
*
* @param string $html The block markup
*
* @return string
*/
public function process_dom_document_bypassed_block( string $html ): string {
$document = $this->read_converted_dom_document( "<body>{$html}</body>" );
$body = $document->getElementsByTagName( 'body' )->item( 0 );
$node_html = [];

// phpcs:ignore WordPress.NamingConventions.ValidVariableName.UsedPropertyNotSnakeCase
foreach ( $body->childNodes as $child ) {
$block = new DOMDocument( '1.0', 'UTF-8' );
$block->appendChild( $block->importNode( $child, true ) );

$child_html = $block->saveHTML();
$process_html = is_string( $child_html ) ? trim( $child_html ) : '';

if ( ! empty( $process_html ) ) {
$node_html[] = $process_html;
}
}

return implode( '', $node_html );
}

/**
* Read an HTML Entity Decoded DOM Document which allows multi-byte characters
*
* @param string $html HTML markup to process
*
* @throws Exception Empty DOM exception
*
* @return DOMDocument
*/
protected function read_converted_dom_document( string $html ) {
$converted_html = htmlspecialchars_decode( htmlentities( mb_convert_encoding( $html, 'HTML-ENTITIES', 'UTF-8' ) ) );
$document = new DomDocument( '1.0', 'UTF-8' );

libxml_use_internal_errors( true );
$document->loadHTML( $converted_html, LIBXML_HTML_NODEFDTD | LIBXML_HTML_NOIMPLIED );
libxml_clear_errors();

// phpcs:ignore WordPress.NamingConventions.ValidVariableName.UsedPropertyNotSnakeCase
if ( null === $document->documentElement ) {
throw new Exception( 'Empty DOM document, fallback to use provided HTML.' );
}

return $document;
}

/**
* Filter rendered blocks to include a data-wp-blocks attribute with block's attrs
* Filter rendered blocks to include data-wp-blocks and data-wp-block-name attributes within the block attributes
*
* @param string $html Rendered block content.
* @param array $block Block data.
* @param \WP_Block $block_instance The block's instance
* @param string $html Rendered block content
* @param array $block The block schema
* @param WP_Block $block_instance The block instance
*
* @return string
*/
Expand All @@ -129,21 +226,21 @@ public function render_block( $html, $block, $block_instance ) {
$block_attrs = $block_instance->attributes;

/**
* Filter's out the block's attributes before serializing in the block markup.
* Filter out any of the block attributes before serializing in the block markup
*
* @param array $attrs The Block's Attributes
* @param array $block The Block's schema
* @param \WP_Block $block_instance The block's instance
* @param array $attrs The block attributes
* @param array $block The block schema
* @param WP_Block $block_instance The block instance
*/
$block_attrs = apply_filters( 'tenup_headless_wp_render_block_attrs', $block_attrs, $block, $block_instance );

/**
* Filter's out the block's attributes after serialization
* Filter out the block attributes after serialization
*
* @param string $encoded_attrs The serialized block's Attributes
* @param array $attrs The Block's Attributes
* @param array $block The Block's schema
* @param \WP_Block $block_instance The block's instance
* @param string $encoded_attrs The serialized block attributes
* @param array $attrs The block attributes
* @param array $block The block schema
* @param WP_Block $block_instance The block instance
*/
$block_attrs_serialized = apply_filters(
'tenup_headless_wp_render_blocks_attrs_serialized',
Expand All @@ -158,11 +255,11 @@ public function render_block( $html, $block, $block_instance ) {
/**
* Filter for enabling the use of the new HTML_Tag_Processor API
*
* @param boolean $enable Whether enable the new api. Defaults to false
* @param boolean $enable Whether enable the new HTML Tag API, defaults to off/false
*/
$parser_api = apply_filters( 'tenup_headless_wp_render_block_use_tag_processor', false );
$use_html_tag_api = apply_filters( 'tenup_headless_wp_render_block_use_tag_processor', false );

if ( class_exists( '\WP_HTML_Tag_Processor' ) && $parser_api ) {
if ( class_exists( WP_HTML_Tag_Processor::class ) && $use_html_tag_api ) {
return $this->process_block_with_html_tag_api(
$html,
$block_name,
Expand Down
Loading
Loading