diff --git a/.php-cs-fixer.php b/.php-cs-fixer.php old mode 100644 new mode 100755 diff --git a/CHANGELOG.md b/CHANGELOG.md index 4fa112b8f..8d5e7c505 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,11 @@ -## [Unreleased] - 2023-08-29 +## [Unreleased] - 2023-09-06 ### Added +- [#474](https://github.com/flow-php/flow/pull/474) - **XMLEntry** - [@norberttech](https://github.com/norberttech) +- [#474](https://github.com/flow-php/flow/pull/474) - **XMLNodeEntry** - [@norberttech](https://github.com/norberttech) +- [#474](https://github.com/flow-php/flow/pull/474) - **ref('...')->xpath('...') - for extracting specific nodes from XMLEntry** - [@norberttech](https://github.com/norberttech) +- [#474](https://github.com/flow-php/flow/pull/474) - **ref('...')->domNodeAttribute('...') - for extracting value of attribute** - [@norberttech](https://github.com/norberttech) +- [#474](https://github.com/flow-php/flow/pull/474) - **ref('...')->domNodeValue('...') - for extracting value of node** - [@norberttech](https://github.com/norberttech) - [#450](https://github.com/flow-php/flow/pull/450) - **Add new `ulid()` expression based on Symfony Uid** - [@stloyd](https://github.com/stloyd) - [#445](https://github.com/flow-php/flow/pull/445) - **Add `fig/log-test` package for mock logger** - [@stloyd](https://github.com/stloyd) - [#440](https://github.com/flow-php/flow/pull/440) - **Add MariaDB to supported platforms for Doctrine adapter** - [@stloyd](https://github.com/stloyd) @@ -58,6 +63,7 @@ - [#388](https://github.com/flow-php/flow/pull/388) - **Added `ext-hash` PHP extension as required for Flow** - [@stloyd](https://github.com/stloyd) ### Changed +- [#474](https://github.com/flow-php/flow/pull/474) - **XMLReaderExtractor is now returning XMLEntry type instead of casting XML's to array** - [@norberttech](https://github.com/norberttech) - [#445](https://github.com/flow-php/flow/pull/445) - **Allow usage of `psr/log` v2 & v3`** - [@stloyd](https://github.com/stloyd) - [#438](https://github.com/flow-php/flow/pull/438) - **Mark methods on DataFrame api as @lazy or @trigger** - [@norberttech](https://github.com/norberttech) - [#436](https://github.com/flow-php/flow/pull/436) - **Moved limit functionality into LimitingPipeline** - [@norberttech](https://github.com/norberttech) diff --git a/composer.lock b/composer.lock index 8f87e3c58..63cbf47e4 100644 --- a/composer.lock +++ b/composer.lock @@ -89,16 +89,16 @@ }, { "name": "amphp/byte-stream", - "version": "v2.0.1", + "version": "v2.0.2", "source": { "type": "git", "url": "https://github.com/amphp/byte-stream.git", - "reference": "7e7a77579f3e90c6fbd56e49628e6ace02d8f88a" + "reference": "408a3b4fc4f4c7604575dc8704f18c1bd91c3ceb" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/amphp/byte-stream/zipball/7e7a77579f3e90c6fbd56e49628e6ace02d8f88a", - "reference": "7e7a77579f3e90c6fbd56e49628e6ace02d8f88a", + "url": "https://api.github.com/repos/amphp/byte-stream/zipball/408a3b4fc4f4c7604575dc8704f18c1bd91c3ceb", + "reference": "408a3b4fc4f4c7604575dc8704f18c1bd91c3ceb", "shasum": "" }, "require": { @@ -119,7 +119,8 @@ "type": "library", "autoload": { "files": [ - "src/functions.php" + "src/functions.php", + "src/Internal/functions.php" ], "psr-4": { "Amp\\ByteStream\\": "src" @@ -151,7 +152,7 @@ ], "support": { "issues": "https://github.com/amphp/byte-stream/issues", - "source": "https://github.com/amphp/byte-stream/tree/v2.0.1" + "source": "https://github.com/amphp/byte-stream/tree/v2.0.2" }, "funding": [ { @@ -159,7 +160,7 @@ "type": "github" } ], - "time": "2023-02-03T04:06:20+00:00" + "time": "2023-09-01T04:41:26+00:00" }, { "name": "amphp/cache", @@ -1546,16 +1547,16 @@ }, { "name": "google/apiclient-services", - "version": "v0.313.0", + "version": "v0.314.0", "source": { "type": "git", "url": "https://github.com/googleapis/google-api-php-client-services.git", - "reference": "e41289c4488563af75bd291972f0fa00949e084a" + "reference": "fe2f7513dc5a4a6cf82715fd0edf7589423d6535" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/googleapis/google-api-php-client-services/zipball/e41289c4488563af75bd291972f0fa00949e084a", - "reference": "e41289c4488563af75bd291972f0fa00949e084a", + "url": "https://api.github.com/repos/googleapis/google-api-php-client-services/zipball/fe2f7513dc5a4a6cf82715fd0edf7589423d6535", + "reference": "fe2f7513dc5a4a6cf82715fd0edf7589423d6535", "shasum": "" }, "require": { @@ -1584,9 +1585,9 @@ ], "support": { "issues": "https://github.com/googleapis/google-api-php-client-services/issues", - "source": "https://github.com/googleapis/google-api-php-client-services/tree/v0.313.0" + "source": "https://github.com/googleapis/google-api-php-client-services/tree/v0.314.0" }, - "time": "2023-08-25T01:10:13+00:00" + "time": "2023-09-03T01:04:12+00:00" }, { "name": "google/auth", @@ -2293,20 +2294,20 @@ }, { "name": "league/uri", - "version": "7.1.0", + "version": "7.2.1", "source": { "type": "git", "url": "https://github.com/thephpleague/uri.git", - "reference": "c0bf6dfa86b7804fe870b3f3d9c653e35a2c9e3e" + "reference": "8b644f8ff80352530bbc0ea467d5b5a89b60d832" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/thephpleague/uri/zipball/c0bf6dfa86b7804fe870b3f3d9c653e35a2c9e3e", - "reference": "c0bf6dfa86b7804fe870b3f3d9c653e35a2c9e3e", + "url": "https://api.github.com/repos/thephpleague/uri/zipball/8b644f8ff80352530bbc0ea467d5b5a89b60d832", + "reference": "8b644f8ff80352530bbc0ea467d5b5a89b60d832", "shasum": "" }, "require": { - "league/uri-interfaces": "^7.1", + "league/uri-interfaces": "^7.2", "php": "^8.1" }, "conflict": { @@ -2371,7 +2372,7 @@ "docs": "https://uri.thephpleague.com", "forum": "https://thephpleague.slack.com", "issues": "https://github.com/thephpleague/uri-src/issues", - "source": "https://github.com/thephpleague/uri/tree/7.1.0" + "source": "https://github.com/thephpleague/uri/tree/7.2.1" }, "funding": [ { @@ -2379,20 +2380,20 @@ "type": "github" } ], - "time": "2023-08-21T20:15:03+00:00" + "time": "2023-08-30T21:06:57+00:00" }, { "name": "league/uri-interfaces", - "version": "7.1.0", + "version": "7.2.0", "source": { "type": "git", "url": "https://github.com/thephpleague/uri-interfaces.git", - "reference": "c3ea9306c67c9a1a72312705e8adfcb9cf167310" + "reference": "43fa071050fcba89aefb5d4789a4a5a73874c44b" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/thephpleague/uri-interfaces/zipball/c3ea9306c67c9a1a72312705e8adfcb9cf167310", - "reference": "c3ea9306c67c9a1a72312705e8adfcb9cf167310", + "url": "https://api.github.com/repos/thephpleague/uri-interfaces/zipball/43fa071050fcba89aefb5d4789a4a5a73874c44b", + "reference": "43fa071050fcba89aefb5d4789a4a5a73874c44b", "shasum": "" }, "require": { @@ -2455,7 +2456,7 @@ "docs": "https://uri.thephpleague.com", "forum": "https://thephpleague.slack.com", "issues": "https://github.com/thephpleague/uri-src/issues", - "source": "https://github.com/thephpleague/uri-interfaces/tree/7.1.0" + "source": "https://github.com/thephpleague/uri-interfaces/tree/7.2.0" }, "funding": [ { @@ -2463,7 +2464,7 @@ "type": "github" } ], - "time": "2023-08-21T20:15:03+00:00" + "time": "2023-08-30T19:43:38+00:00" }, { "name": "monolog/monolog", @@ -4207,16 +4208,16 @@ }, { "name": "aws/aws-sdk-php", - "version": "3.279.8", + "version": "3.280.2", "source": { "type": "git", "url": "https://github.com/aws/aws-sdk-php.git", - "reference": "47a454538ec6bf38cf658cf5573585c64915691a" + "reference": "d68b83b3bc39b70bf33e9b8b5166facbe3e4fe9b" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/aws/aws-sdk-php/zipball/47a454538ec6bf38cf658cf5573585c64915691a", - "reference": "47a454538ec6bf38cf658cf5573585c64915691a", + "url": "https://api.github.com/repos/aws/aws-sdk-php/zipball/d68b83b3bc39b70bf33e9b8b5166facbe3e4fe9b", + "reference": "d68b83b3bc39b70bf33e9b8b5166facbe3e4fe9b", "shasum": "" }, "require": { @@ -4296,9 +4297,9 @@ "support": { "forum": "https://forums.aws.amazon.com/forum.jspa?forumID=80", "issues": "https://github.com/aws/aws-sdk-php/issues", - "source": "https://github.com/aws/aws-sdk-php/tree/3.279.8" + "source": "https://github.com/aws/aws-sdk-php/tree/3.280.2" }, - "time": "2023-08-28T18:14:34+00:00" + "time": "2023-09-01T18:06:10+00:00" }, { "name": "brick/math", @@ -5510,16 +5511,16 @@ }, { "name": "phpstan/phpstan", - "version": "1.10.32", + "version": "1.10.33", "source": { "type": "git", "url": "https://github.com/phpstan/phpstan.git", - "reference": "c47e47d3ab03137c0e121e77c4d2cb58672f6d44" + "reference": "03b1cf9f814ba0863c4e9affea49a4d1ed9a2ed1" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/phpstan/phpstan/zipball/c47e47d3ab03137c0e121e77c4d2cb58672f6d44", - "reference": "c47e47d3ab03137c0e121e77c4d2cb58672f6d44", + "url": "https://api.github.com/repos/phpstan/phpstan/zipball/03b1cf9f814ba0863c4e9affea49a4d1ed9a2ed1", + "reference": "03b1cf9f814ba0863c4e9affea49a4d1ed9a2ed1", "shasum": "" }, "require": { @@ -5568,7 +5569,7 @@ "type": "tidelift" } ], - "time": "2023-08-24T21:54:50+00:00" + "time": "2023-09-04T12:20:53+00:00" }, { "name": "psr/container", diff --git a/examples/data/salaries.xml b/examples/data/salaries.xml new file mode 100644 index 000000000..ab637155d --- /dev/null +++ b/examples/data/salaries.xml @@ -0,0 +1,104 @@ + + + + 71883 + + + 192644 + + + 174187 + + + 179932 + + + 52056 + + + + + 102342 + + + 111102 + + + 81938 + + + 132202 + + + 173225 + + + + + 79619 + + + 99387 + + + 198847 + + + 50550 + + + 98212 + + + + + 69721 + + + 151826 + + + 158168 + + + 111872 + + + 172334 + + + + + 174220 + + + 164086 + + + 104257 + + + 105817 + + + 145490 + + + + + 127383 + + + 52592 + + + 71732 + + + 165083 + + + 85138 + + + \ No newline at end of file diff --git a/examples/data/simple_items.xml b/examples/data/simple_items.xml new file mode 100644 index 000000000..ed8dc7bbf --- /dev/null +++ b/examples/data/simple_items.xml @@ -0,0 +1,10 @@ + + + 1 + 2 + 3 + 4 + 5 + 6 + + \ No newline at end of file diff --git a/examples/topics/types/xml/reading.php b/examples/topics/types/xml/reading.php new file mode 100644 index 000000000..3c27d7874 --- /dev/null +++ b/examples/topics/types/xml/reading.php @@ -0,0 +1,16 @@ +read(XML::from(__FLOW_DATA__ . '/simple_items.xml', 'root/items/item')) + ->write(To::output(false)) + ->run(); diff --git a/examples/topics/types/xml/salaries.php b/examples/topics/types/xml/salaries.php new file mode 100644 index 000000000..2efe1f867 --- /dev/null +++ b/examples/topics/types/xml/salaries.php @@ -0,0 +1,29 @@ +read(XML::from(__FLOW_DATA__ . '/salaries.xml')) + ->withEntry('months', ref('row')->xpath('/Salaries/Month')) + ->withEntry('month', ref('months')->expand()) + ->withEntry('month_name', ref('month')->domNodeAttribute('name')) + ->withEntry('departments', ref('month')->xpath('/Month/Department')) + ->withEntry('department', ref('departments')->expand()) + ->withEntry('department_name', ref('department')->domNodeAttribute('name')) + ->withEntry('department_salary', ref('department')->xpath('/Department/TotalSalary')->domNodeValue()) + ->drop('row', 'months', 'month', 'departments', 'department') + ->groupBy(ref('month_name')) + ->aggregate(Aggregation::sum(ref('department_salary'))) + ->rename('department_salary_sum', 'total_monthly_salaries') + ->write(To::output(false)) + ->run(); diff --git a/src/adapter/etl-adapter-xml/README.md b/src/adapter/etl-adapter-xml/README.md index 7790aec58..71ec42a60 100644 --- a/src/adapter/etl-adapter-xml/README.md +++ b/src/adapter/etl-adapter-xml/README.md @@ -40,22 +40,36 @@ Memory safe XML extractor read(XML::from_file(__DIR__ . '/xml/simple_items.xml', 'root/items/item')) - ->fetch() + ->read(XML::from(__FLOW_DATA__ . '/simple_items.xml', 'root/items/item')) + ->write(To::output(false)) + ->run() +; ``` Above code will generate Rows with 5 entries like the one below: -```php -1 | +| 2 | +| 3 | +| 4 | +| 5 | +| 6 | ++----------------------------------------------+ +``` + +Each entry will be an XMLEntry type. +From there you can use built in expressions to extract data from XML. + +- `ref('row')->xpath('...');` +- `ref('row')->domNodeAttribute('...');` +- `ref('row')->domNodeValue('...');` + +When working with collections XPath will return an ListEntry with XMLEntries inside. +From there you can for example unpack or expand them. + +For more examples please look into `/examples/topics/xml` directory in [flow monorepo](https://github.com/flow-php/flow) -Row::create( - Entry::array('row', [ - 'item' => [ - 'id' => [ - '@value' => 1 - ] - ] - ]) -) -``` \ No newline at end of file diff --git a/src/adapter/etl-adapter-xml/src/Flow/ETL/Adapter/XML/XMLReaderExtractor.php b/src/adapter/etl-adapter-xml/src/Flow/ETL/Adapter/XML/XMLReaderExtractor.php index 82fc2e7bc..f2d2f972c 100644 --- a/src/adapter/etl-adapter-xml/src/Flow/ETL/Adapter/XML/XMLReaderExtractor.php +++ b/src/adapter/etl-adapter-xml/src/Flow/ETL/Adapter/XML/XMLReaderExtractor.php @@ -51,11 +51,11 @@ public function extract(FlowContext $context) : \Generator if ($xmlReader->nodeType === \XMLReader::ELEMENT) { if ($previousDepth === $xmlReader->depth) { \array_pop($currentPathBreadCrumbs); - \array_push($currentPathBreadCrumbs, $xmlReader->name); + $currentPathBreadCrumbs[] = $xmlReader->name; } if ($xmlReader->depth > $previousDepth) { - \array_push($currentPathBreadCrumbs, $xmlReader->name); + $currentPathBreadCrumbs[] = $xmlReader->name; } if ($xmlReader->depth < $previousDepth) { @@ -71,11 +71,11 @@ public function extract(FlowContext $context) : \Generator if ($context->config->shouldPutInputIntoRows()) { $rows[] = Row::create( - Entry::array($this->rowEntryName, $this->convertDOMDocument($node)), + Entry::xml($this->rowEntryName, $node), Entry::string('input_file_uri', $filePath->uri()) ); } else { - $rows[] = Row::create(Entry::array($this->rowEntryName, $this->convertDOMDocument($node))); + $rows[] = Row::create(Entry::xml($this->rowEntryName, $node)); } if (\count($rows) >= $this->rowsInBatch) { @@ -95,90 +95,4 @@ public function extract(FlowContext $context) : \Generator } } } - - /** - * @param \DOMDocument $document - * - * @return array - */ - private function convertDOMDocument(\DOMDocument $document) : array - { - $xmlArray = []; - - if ($document->hasChildNodes()) { - $children = $document->childNodes; - - foreach ($children as $child) { - /** @psalm-suppress ArgumentTypeCoercion */ - $xmlArray[$child->nodeName] = $this->convertDOMElement($child); - } - } - - return $xmlArray; - } - - /** - * @psalm-suppress ArgumentTypeCoercion - * @psalm-suppress PossiblyNullArgument - * @psalm-suppress UnnecessaryVarAnnotation - * @psalm-suppress PossiblyNullIterator - * - * @return array - */ - private function convertDOMElement(\DOMElement|\DOMNode $element) : array - { - $xmlArray = []; - - if ($element->hasAttributes()) { - /** - * @var \DOMAttr $attribute - * - * @phpstan-ignore-next-line - */ - foreach ($element->attributes as $attribute) { - $xmlArray['@attributes'][$attribute->name] = $attribute->value; - } - } - - foreach ($element->childNodes as $childNode) { - if ($childNode->nodeType === XML_TEXT_NODE) { - /** @phpstan-ignore-next-line */ - if (\trim($childNode->nodeValue)) { - $xmlArray['@value'] = $childNode->nodeValue; - } - } - - if ($childNode->nodeType === XML_ELEMENT_NODE) { - if ($this->isElementCollection($element)) { - /** @phpstan-ignore-next-line */ - $xmlArray[$childNode->nodeName][] = $this->convertDOMElement($childNode); - } else { - $xmlArray[$childNode->nodeName] = $this->convertDOMElement($childNode); - } - } - } - - return $xmlArray; - } - - private function isElementCollection(\DOMElement|\DOMNode $element) : bool - { - if ($element->childNodes->count() <= 1) { - return false; - } - - $nodeNames = []; - /** @var \DOMElement $childNode */ - foreach ($element->childNodes as $childNode) { - if ($childNode->nodeType === XML_ELEMENT_NODE) { - $nodeNames[] = $childNode->nodeName; - } - } - - if (!\count($nodeNames) || \count($nodeNames) === 1) { - return false; - } - - return \count(\array_unique($nodeNames)) === 1; - } } diff --git a/src/adapter/etl-adapter-xml/tests/Flow/ETL/Tests/Integration/Adapter/XML/XMLReaderExtractorTest.php b/src/adapter/etl-adapter-xml/tests/Flow/ETL/Tests/Integration/Adapter/XML/XMLReaderExtractorTest.php index 6e254d6f7..4da95a772 100644 --- a/src/adapter/etl-adapter-xml/tests/Flow/ETL/Tests/Integration/Adapter/XML/XMLReaderExtractorTest.php +++ b/src/adapter/etl-adapter-xml/tests/Flow/ETL/Tests/Integration/Adapter/XML/XMLReaderExtractorTest.php @@ -13,70 +13,15 @@ final class XMLReaderExtractorTest extends TestCase { - public function test_reading_xml_collection() : void + public function test_reading_xml() : void { + $xml = new \DOMDocument(); + $xml->load(__DIR__ . '/xml/simple_items.xml'); + $this->assertEquals( - new Rows( - Row::create( - Entry::array('row', [ - 'items' => [ - 'item' => [ - [ - 'id' => [ - '@value' => 1, - '@attributes' => [ - 'id_attribute_01' => '1', - ], - ], - '@attributes' => ['item_attribute_01' => '1'], - ], - [ - 'id' => [ - '@value' => 2, - '@attributes' => [ - 'id_attribute_01' => '2', - ], - ], - '@attributes' => ['item_attribute_01' => '2'], - ], - [ - 'id' => [ - '@value' => 3, - '@attributes' => [ - 'id_attribute_01' => '3', - ], - ], - '@attributes' => ['item_attribute_01' => '3'], - ], - [ - 'id' => [ - '@value' => 4, - '@attributes' => [ - 'id_attribute_01' => '4', - ], - ], - '@attributes' => ['item_attribute_01' => '4'], - ], - [ - 'id' => [ - '@value' => 5, - '@attributes' => [ - 'id_attribute_01' => '5', - ], - ], - '@attributes' => ['item_attribute_01' => '5'], - ], - ], - '@attributes' => [ - 'items_attribute_01' => '1', - 'items_attribute_02' => '2', - ], - ], - ]) - ) - ), + (new Rows(Row::create(Entry::xml('row', $xml)))), (new Flow()) - ->read(XML::from(__DIR__ . '/xml/simple_items.xml', 'root/items')) + ->read(XML::from(__DIR__ . '/xml/simple_items.xml')) ->fetch() ); } @@ -85,198 +30,46 @@ public function test_reading_xml_each_collection_item() : void { $this->assertEquals( new Rows( - Row::create( - Entry::array('row', [ - 'item' => [ - 'id' => [ - '@value' => 1, - '@attributes' => [ - 'id_attribute_01' => '1', - ], - ], - '@attributes' => ['item_attribute_01' => '1'], - ], - ]) - ), - Row::create( - Entry::array('row', [ - 'item' => [ - 'id' => [ - '@value' => 2, - '@attributes' => [ - 'id_attribute_01' => '2', - ], - ], - '@attributes' => ['item_attribute_01' => '2'], - ], - ]) - ), - Row::create( - Entry::array('row', [ - 'item' => [ - 'id' => [ - '@value' => 3, - '@attributes' => [ - 'id_attribute_01' => '3', - ], - ], - '@attributes' => ['item_attribute_01' => '3'], - ], - ]) - ), - Row::create( - Entry::array('row', [ - 'item' => [ - 'id' => [ - '@value' => 4, - '@attributes' => [ - 'id_attribute_01' => '4', - ], - ], - '@attributes' => ['item_attribute_01' => '4'], - ], - ]) - ), - Row::create( - Entry::array('row', [ - 'item' => [ - 'id' => [ - '@value' => 5, - '@attributes' => [ - 'id_attribute_01' => '5', - ], - ], - '@attributes' => ['item_attribute_01' => '5'], - ], - ]) - ) + Row::create(Entry::xml('row', '1')), + Row::create(Entry::xml('row', '2')), + Row::create(Entry::xml('row', '3')), + Row::create(Entry::xml('row', '4')), + Row::create(Entry::xml('row', '5')), ), (new Flow()) - ->read(XML::from(__DIR__ . '/xml/simple_items.xml', 'root/items/item')) + ->read(XML::from(__DIR__ . '/xml/simple_items_flat.xml', 'root/items/item')) ->fetch() ); } - public function test_reading_xml_each_collection_item_id() : void + public function test_reading_xml_from_path() : void { - $this->assertEquals( - new Rows( - Row::create( - Entry::array('row', [ - 'id' => [ - '@value' => '1', - '@attributes' => ['id_attribute_01' => '1'], - ], - ]) - ), - Row::create( - Entry::array('row', [ - 'id' => [ - '@value' => '2', - '@attributes' => ['id_attribute_01' => '2'], - ], - ]) - ), - Row::create( - Entry::array('row', [ - 'id' => [ - '@value' => '3', - '@attributes' => ['id_attribute_01' => '3'], - ], - ]) - ), - Row::create( - Entry::array('row', [ - 'id' => [ - '@value' => '4', - '@attributes' => ['id_attribute_01' => '4'], - ], - ]) - ), - Row::create( - Entry::array('row', [ - 'id' => [ - '@value' => '5', - '@attributes' => ['id_attribute_01' => '5'], - ], - ]) - ) - ), - (new Flow()) - ->read(XML::from(__DIR__ . '/xml/simple_items.xml', 'root/items/item/id')) - ->fetch() - ); - } + $xml = new \DOMDocument(); + $xml->loadXML(<<<'XML' + + + + 1 + + + 2 + + + 3 + + + 4 + + + 5 + + - public function test_reading_xml_root() : void - { +XML); $this->assertEquals( - new Rows( - Row::create( - Entry::array('row', [ - 'root' => [ - 'items' => [ - 'item' => [ - [ - 'id' => [ - '@value' => 1, - '@attributes' => [ - 'id_attribute_01' => '1', - ], - ], - '@attributes' => ['item_attribute_01' => '1'], - ], - [ - 'id' => [ - '@value' => 2, - '@attributes' => [ - 'id_attribute_01' => '2', - ], - ], - '@attributes' => ['item_attribute_01' => '2'], - ], - [ - 'id' => [ - '@value' => 3, - '@attributes' => [ - 'id_attribute_01' => '3', - ], - ], - '@attributes' => ['item_attribute_01' => '3'], - ], - [ - 'id' => [ - '@value' => 4, - '@attributes' => [ - 'id_attribute_01' => '4', - ], - ], - '@attributes' => ['item_attribute_01' => '4'], - ], - [ - 'id' => [ - '@value' => 5, - '@attributes' => [ - 'id_attribute_01' => '5', - ], - ], - '@attributes' => ['item_attribute_01' => '5'], - ], - ], - '@attributes' => [ - 'items_attribute_01' => '1', - 'items_attribute_02' => '2', - ], - ], - '@attributes' => [ - 'root_attribute_01' => '1', - ], - ], - ]) - ) - ), + new Rows(Row::create(Entry::xml('row', $xml))), (new Flow()) - ->read(XML::from(__DIR__ . '/xml/simple_items.xml')) + ->read(XML::from(__DIR__ . '/xml/simple_items.xml', 'root/items')) ->fetch() ); } diff --git a/src/adapter/etl-adapter-xml/tests/Flow/ETL/Tests/Integration/Adapter/XML/xml/simple_items_flat.xml b/src/adapter/etl-adapter-xml/tests/Flow/ETL/Tests/Integration/Adapter/XML/xml/simple_items_flat.xml new file mode 100644 index 000000000..8c5c2d66f --- /dev/null +++ b/src/adapter/etl-adapter-xml/tests/Flow/ETL/Tests/Integration/Adapter/XML/xml/simple_items_flat.xml @@ -0,0 +1 @@ +12345 \ No newline at end of file diff --git a/src/core/etl/src/Flow/ETL/DSL/Entry.php b/src/core/etl/src/Flow/ETL/DSL/Entry.php index 024baa3b0..6d06ab325 100644 --- a/src/core/etl/src/Flow/ETL/DSL/Entry.php +++ b/src/core/etl/src/Flow/ETL/DSL/Entry.php @@ -289,4 +289,17 @@ final public static function structure(string $name, RowEntry ...$entries) : Row { return new RowEntry\StructureEntry($name, ...$entries); } + + /** + * @return RowEntry\XMLEntry + */ + final public static function xml(string $name, \DOMDocument|string $data) : RowEntry + { + return new RowEntry\XMLEntry($name, $data); + } + + final public static function xml_node(string $name, \DOMNode $data) : RowEntry + { + return new RowEntry\XMLNodeEntry($name, $data); + } } diff --git a/src/core/etl/src/Flow/ETL/Formatter/ASCII/ASCIIValue.php b/src/core/etl/src/Flow/ETL/Formatter/ASCII/ASCIIValue.php index 30ce77822..96fac80b1 100644 --- a/src/core/etl/src/Flow/ETL/Formatter/ASCII/ASCIIValue.php +++ b/src/core/etl/src/Flow/ETL/Formatter/ASCII/ASCIIValue.php @@ -94,6 +94,10 @@ private function stringValue() : string if ($val instanceof Entry) { $this->stringValue = $val->toString(); + if ($val instanceof Entry\XMLEntry || $val instanceof Entry\XMLNodeEntry) { + $this->stringValue = \str_replace("\n", '', $this->stringValue); + } + return $this->stringValue; } diff --git a/src/core/etl/src/Flow/ETL/Row/Entry/XMLEntry.php b/src/core/etl/src/Flow/ETL/Row/Entry/XMLEntry.php new file mode 100644 index 000000000..eb0865d43 --- /dev/null +++ b/src/core/etl/src/Flow/ETL/Row/Entry/XMLEntry.php @@ -0,0 +1,106 @@ + + */ +final class XMLEntry implements \Stringable, Entry +{ + use EntryRef; + + private readonly \DOMDocument $value; + + public function __construct(private readonly string $name, \DOMDocument|string $value) + { + if (\is_string($value)) { + $doc = new \DOMDocument(); + + if (!@$doc->loadXML($value)) { + throw new InvalidArgumentException(\sprintf('Given string "%s" is not valid XML', $value)); + } + + $this->value = $doc; + } else { + $this->value = $value; + } + } + + public function __serialize() : array + { + return [ + 'name' => $this->name, + 'value' => $this->value, + ]; + } + + public function __toString() : string + { + /** @phpstan-ignore-next-line */ + return $this->value->saveXML(); + } + + public function __unserialize(array $data) : void + { + $this->name = $data['name']; + $this->value = $data['value']; + } + + public function definition() : Definition + { + return Definition::xml($this->ref(), false); + } + + public function is(Reference|string $name) : bool + { + if ($name instanceof Reference) { + return $this->name === $name->name(); + } + + return $this->name === $name; + } + + public function isEqual(Entry $entry) : bool + { + if (!$entry instanceof self || !$this->is($entry->name())) { + return false; + } + + if ($entry->value->documentElement === null && $this->value->documentElement === null) { + return true; + } + + return $entry->value()->C14N() === $this->value->C14N(); + } + + public function map(callable $mapper) : Entry + { + return new self($this->name, $mapper($this->value())); + } + + public function name() : string + { + return $this->name; + } + + public function rename(string $name) : Entry + { + return new self($name, $this->value); + } + + public function toString() : string + { + /** @phpstan-ignore-next-line */ + return $this->value->saveXML(); + } + + public function value() : \DOMDocument + { + return $this->value; + } +} diff --git a/src/core/etl/src/Flow/ETL/Row/Entry/XMLNodeEntry.php b/src/core/etl/src/Flow/ETL/Row/Entry/XMLNodeEntry.php new file mode 100644 index 000000000..28f0486e8 --- /dev/null +++ b/src/core/etl/src/Flow/ETL/Row/Entry/XMLNodeEntry.php @@ -0,0 +1,96 @@ + + */ +final class XMLNodeEntry implements \Stringable, Entry +{ + use EntryRef; + + public function __construct(private readonly string $name, private readonly \DOMNode $value) + { + } + + public function __serialize() : array + { + return [ + 'name' => $this->name, + 'value' => $this->value, + ]; + } + + public function __toString() : string + { + /** + * @psalm-suppress PossiblyNullReference + * + * @phpstan-ignore-next-line + */ + return $this->value->ownerDocument->saveXML($this->value); + } + + public function __unserialize(array $data) : void + { + $this->name = $data['name']; + $this->value = $data['value']; + } + + public function definition() : Definition + { + return Definition::xml_node($this->ref(), false); + } + + public function is(Reference|string $name) : bool + { + if ($name instanceof Reference) { + return $this->name === $name->name(); + } + + return $this->name === $name; + } + + public function isEqual(Entry $entry) : bool + { + if (!$entry instanceof self || !$this->is($entry->name())) { + return false; + } + + return $this->value->C14N() === $entry->value->C14N(); + } + + public function map(callable $mapper) : Entry + { + return new self($this->name, $mapper($this->value())); + } + + public function name() : string + { + return $this->name; + } + + public function rename(string $name) : Entry + { + return new self($name, $this->value); + } + + public function toString() : string + { + /** + * @psalm-suppress PossiblyNullReference + * + * @phpstan-ignore-next-line + */ + return $this->value->ownerDocument->saveXML($this->value); + } + + public function value() : \DOMNode + { + return $this->value; + } +} diff --git a/src/core/etl/src/Flow/ETL/Row/Factory/NativeEntryFactory.php b/src/core/etl/src/Flow/ETL/Row/Factory/NativeEntryFactory.php index a4c4679cf..ae86ed0fa 100644 --- a/src/core/etl/src/Flow/ETL/Row/Factory/NativeEntryFactory.php +++ b/src/core/etl/src/Flow/ETL/Row/Factory/NativeEntryFactory.php @@ -57,6 +57,10 @@ public function create(string $entryName, mixed $value) : Entry return Row\Entry\JsonEntry::fromJsonString($entryName, $value); } + if ($this->isXML($value)) { + return new Entry\XMLEntry($entryName, $value); + } + return new Row\Entry\StringEntry($entryName, $value); } @@ -73,6 +77,14 @@ public function create(string $entryName, mixed $value) : Entry } if (\is_object($value)) { + if ($value instanceof \DOMDocument) { + return new Row\Entry\XMLEntry($entryName, $value); + } + + if ($value instanceof \DOMNode) { + return new Row\Entry\XMLNodeEntry($entryName, $value); + } + if ($value instanceof \DateTimeImmutable) { return new Row\Entry\DateTimeEntry($entryName, $value); } @@ -121,6 +133,11 @@ public function create(string $entryName, mixed $value) : Entry if ($class === \DateTimeImmutable::class || $class === \DateTime::class) { $class = \DateTimeInterface::class; } + + if ($class === \DOMElement::class) { + $class = \DOMNode::class; + } + /** * @psalm-suppress PossiblyNullArgument */ @@ -174,6 +191,10 @@ private function fromDefinition(Schema\Definition $definition, mixed $value) : E } } + if ($type === Entry\XMLEntry::class && (\is_string($value) || $value instanceof \DOMDocument)) { + return EntryDSL::xml($definition->entry()->name(), $value); + } + if ($type === Entry\ObjectEntry::class && \is_object($value)) { return EntryDSL::object($definition->entry()->name(), $value); } @@ -267,4 +288,24 @@ private function isJson(string $string) : bool return false; } } + + private function isXML(string $string) : bool + { + try { + \libxml_use_internal_errors(true); + + $doc = new \DOMDocument(); + $result = $doc->loadXML($string); + \libxml_clear_errors(); // Clear any errors if needed + \libxml_use_internal_errors(false); // Restore standard error handling + + /** @psalm-suppress RedundantCastGivenDocblockType */ + return (bool) $result; + } catch (\Exception) { + \libxml_clear_errors(); // Clear any errors if needed + \libxml_use_internal_errors(false); // Restore standard error handling + + return false; + } + } } diff --git a/src/core/etl/src/Flow/ETL/Row/Reference/EntryExpression.php b/src/core/etl/src/Flow/ETL/Row/Reference/EntryExpression.php index 1d82f3d59..d7fb670f5 100644 --- a/src/core/etl/src/Flow/ETL/Row/Reference/EntryExpression.php +++ b/src/core/etl/src/Flow/ETL/Row/Reference/EntryExpression.php @@ -96,6 +96,16 @@ public function divide(Expression $ref) : Expression|EntryReference return new Expressions(new Divide($this, $ref)); } + public function domNodeAttribute(string $attribute) : Expression|EntryReference + { + return new Expressions(new Expression\DOMNodeAttribute($this, $attribute)); + } + + public function domNodeValue() : Expression|EntryReference + { + return new Expressions(new Expression\DOMNodeValue($this)); + } + public function endsWith(Expression $needle) : Expression|EntryReference { return new Expressions(new EndsWith($this, $needle)); @@ -376,4 +386,9 @@ public function upper() : Expression|EntryReference { return new Expressions(new Expression\ToUpper($this)); } + + public function xpath(string $string) : Expression|EntryReference + { + return new Expressions(new Expression\XPath($this, $string)); + } } diff --git a/src/core/etl/src/Flow/ETL/Row/Reference/Expression/Cast.php b/src/core/etl/src/Flow/ETL/Row/Reference/Expression/Cast.php index 944bf4adf..a62b85206 100644 --- a/src/core/etl/src/Flow/ETL/Row/Reference/Expression/Cast.php +++ b/src/core/etl/src/Flow/ETL/Row/Reference/Expression/Cast.php @@ -46,12 +46,41 @@ public function eval(Row $row) : mixed default => (string) $value }, 'bool', 'boolean' => (bool) $value, - 'array' => (array) $value, + 'array' => $this->toArray($value), 'object' => (object) $value, 'null' => null, 'json' => \json_encode($value, JSON_THROW_ON_ERROR), 'json_pretty' => \json_encode($value, JSON_THROW_ON_ERROR | JSON_PRETTY_PRINT), + 'xml' => $this->toXML($value), default => throw new InvalidArgumentException("Unknown cast type '{$this->type}'") }; } + + private function toArray(mixed $data) : array + { + if ($data instanceof \DOMDocument) { + return (new Cast\XMLConverter())->toArray($data); + } + + return (array) $data; + } + + private function toXML(mixed $value) : \DOMDocument + { + if (\is_string($value)) { + $doc = new \DOMDocument(); + + if (!@$doc->load($value)) { + throw new InvalidArgumentException('Invalid XML string given: ' . $value); + } + + return $doc; + } + + if ($value instanceof \DOMDocument) { + return $value; + } + + throw new InvalidArgumentException(\sprintf('Cannot cast %s to XML', \gettype($value))); + } } diff --git a/src/core/etl/src/Flow/ETL/Row/Reference/Expression/Cast/XMLConverter.php b/src/core/etl/src/Flow/ETL/Row/Reference/Expression/Cast/XMLConverter.php new file mode 100644 index 000000000..5bfcd3453 --- /dev/null +++ b/src/core/etl/src/Flow/ETL/Row/Reference/Expression/Cast/XMLConverter.php @@ -0,0 +1,88 @@ + + */ + public function toArray(\DOMDocument $document) : array + { + $xmlArray = []; + + if ($document->hasChildNodes()) { + foreach ($document->childNodes as $child) { + /** @psalm-suppress ArgumentTypeCoercion */ + $xmlArray[$child->nodeName] = $this->convertDOMElement($child); + } + } + + return $xmlArray; + } + + /** + * @psalm-suppress ArgumentTypeCoercion + * @psalm-suppress PossiblyNullArgument + * @psalm-suppress UnnecessaryVarAnnotation + * @psalm-suppress PossiblyNullIterator + */ + private function convertDOMElement(\DOMElement|\DOMNode $element) : array + { + $xmlArray = []; + + if ($element->hasAttributes()) { + /** + * @var \DOMAttr $attribute + * + * @phpstan-ignore-next-line + */ + foreach ($element->attributes as $attribute) { + $xmlArray['@attributes'][$attribute->name] = $attribute->value; + } + } + + foreach ($element->childNodes as $childNode) { + if ($childNode->nodeType === XML_TEXT_NODE) { + /** @phpstan-ignore-next-line */ + if (\trim($childNode->nodeValue)) { + $xmlArray['@value'] = $childNode->nodeValue; + } + } + + if ($childNode->nodeType === XML_ELEMENT_NODE) { + if ($this->isElementCollection($element)) { + /** @phpstan-ignore-next-line */ + $xmlArray[$childNode->nodeName][] = $this->convertDOMElement($childNode); + } else { + $xmlArray[$childNode->nodeName] = $this->convertDOMElement($childNode); + } + } + } + + return $xmlArray; + } + + private function isElementCollection(\DOMElement|\DOMNode $element) : bool + { + if ($element->childNodes->count() <= 1) { + return false; + } + + $nodeNames = []; + /** @var \DOMElement $childNode */ + foreach ($element->childNodes as $childNode) { + if ($childNode->nodeType === XML_ELEMENT_NODE) { + $nodeNames[] = $childNode->nodeName; + } + } + + if (\count($nodeNames) <= 1) { + return false; + } + + return \count(\array_unique($nodeNames)) === 1; + } +} diff --git a/src/core/etl/src/Flow/ETL/Row/Reference/Expression/DOMNodeAttribute.php b/src/core/etl/src/Flow/ETL/Row/Reference/Expression/DOMNodeAttribute.php new file mode 100644 index 000000000..a4ccdb27b --- /dev/null +++ b/src/core/etl/src/Flow/ETL/Row/Reference/Expression/DOMNodeAttribute.php @@ -0,0 +1,45 @@ +ref->eval($row); + + if (!$value instanceof \DOMNode) { + return null; + } + + if (!$value->hasAttributes()) { + return null; + } + + $attributes = $value->attributes; + + /** + * @psalm-suppress PossiblyNullReference + * + * @phpstan-ignore-next-line + */ + if (!$attributes->getNamedItem($this->attribute)) { + return null; + } + + /** + * @psalm-suppress PossiblyNullPropertyFetch + * + * @phpstan-ignore-next-line + */ + return $attributes->getNamedItem($this->attribute)->nodeValue; + } +} diff --git a/src/core/etl/src/Flow/ETL/Row/Reference/Expression/DOMNodeValue.php b/src/core/etl/src/Flow/ETL/Row/Reference/Expression/DOMNodeValue.php new file mode 100644 index 000000000..c1c636b58 --- /dev/null +++ b/src/core/etl/src/Flow/ETL/Row/Reference/Expression/DOMNodeValue.php @@ -0,0 +1,25 @@ +ref->eval($row); + + if (!$value instanceof \DOMNode) { + return null; + } + + return $value->nodeValue; + } +} diff --git a/src/core/etl/src/Flow/ETL/Row/Reference/Expression/XPath.php b/src/core/etl/src/Flow/ETL/Row/Reference/Expression/XPath.php new file mode 100644 index 000000000..d14417eec --- /dev/null +++ b/src/core/etl/src/Flow/ETL/Row/Reference/Expression/XPath.php @@ -0,0 +1,54 @@ +ref->eval($row); + + if ($value instanceof \DOMNode && !$value instanceof \DOMDocument) { + $newDom = new \DOMDocument(); + $newNode = $newDom->importNode($value, true); + $newDom->append($newNode); + + $value = $newDom; + } + + if (!$value instanceof \DOMDocument) { + return null; + } + + $xpath = new \DOMXPath($value); + $result = @$xpath->query($this->path); + + if ($result === false) { + return null; + } + + if ($result->length === 0) { + return null; + } + + if ($result->length === 1) { + return $result->item(0); + } + + $nodes = []; + + foreach ($result as $node) { + $nodes[] = $node; + } + + return $nodes; + } +} diff --git a/src/core/etl/src/Flow/ETL/Row/Schema/Definition.php b/src/core/etl/src/Flow/ETL/Row/Schema/Definition.php index 7d9c98d89..c132554cb 100644 --- a/src/core/etl/src/Flow/ETL/Row/Schema/Definition.php +++ b/src/core/etl/src/Flow/ETL/Row/Schema/Definition.php @@ -20,6 +20,7 @@ use Flow\ETL\Row\Entry\StringEntry; use Flow\ETL\Row\Entry\StructureEntry; use Flow\ETL\Row\Entry\TypedCollection\Type; +use Flow\ETL\Row\Entry\XMLEntry; use Flow\ETL\Row\EntryReference; use Flow\ETL\Row\Schema\Constraint\Any; use Flow\ETL\Row\Schema\Constraint\VoidConstraint; @@ -162,6 +163,16 @@ public static function union(string|EntryReference $entry, array $entryClasses, return new self($entry, $types, $constraint, $metadata); } + public static function xml(string|EntryReference $entry, bool $nullable = false, ?Constraint $constraint = null, ?Metadata $metadata = null) : self + { + return new self($entry, ($nullable) ? [XMLEntry::class, NullEntry::class] : [XMLEntry::class], $constraint, $metadata); + } + + public static function xml_node(string|EntryReference $entry, bool $nullable = false, ?Constraint $constraint = null, ?Metadata $metadata = null) : self + { + return new self($entry, ($nullable) ? [Entry\XMLNodeEntry::class, NullEntry::class] : [Entry\XMLNodeEntry::class], $constraint, $metadata); + } + // @codeCoverageIgnoreStart public function __serialize() : array { diff --git a/src/core/etl/tests/Flow/ETL/Tests/Unit/DataFrameTest.php b/src/core/etl/tests/Flow/ETL/Tests/Unit/DataFrameTest.php index d57d8bcd5..e5180ce65 100644 --- a/src/core/etl/tests/Flow/ETL/Tests/Unit/DataFrameTest.php +++ b/src/core/etl/tests/Flow/ETL/Tests/Unit/DataFrameTest.php @@ -326,7 +326,8 @@ public function extract(FlowContext $context) : \Generator new Row\Entries(new IntegerEntry('item-id', 3), new StringEntry('name', 'three')) ), new Row\Entry\ObjectEntry('object', new \ArrayIterator([1, 2, 3])), - new Row\Entry\EnumEntry('enum', BackedStringEnum::three) + new Row\Entry\EnumEntry('enum', BackedStringEnum::three), + new Row\Entry\XMLEntry('xml', 'testbar'), ), ); } @@ -336,15 +337,15 @@ public function extract(FlowContext $context) : \Generator $this->assertSame( <<<'ASCIITABLE' -+------+--------+-----+---------+----------------------+-------+----------------------+----------------------+----------------------+----------------------+-------+ -| id | price | 100 | deleted | created-at | phase | array | items | tags | object | enum | -+------+--------+-----+---------+----------------------+-------+----------------------+----------------------+----------------------+----------------------+-------+ -| 1234 | 123.45 | 100 | false | 2020-07-13T15:00:00+ | null | [{"id":1,"status":"N | {"item-id":"1","name | [{"item-id":"1","nam | ArrayIterator Object | three | -| 1234 | 123.45 | 100 | false | 2020-07-13T15:00:00+ | null | [{"id":1,"status":"N | {"item-id":"1","name | [{"item-id":"1","nam | ArrayIterator Object | three | -| 1234 | 123.45 | 100 | false | 2020-07-13T15:00:00+ | null | [{"id":1,"status":"N | {"item-id":"1","name | [{"item-id":"1","nam | ArrayIterator Object | three | -| 1234 | 123.45 | 100 | false | 2020-07-13T15:00:00+ | null | [{"id":1,"status":"N | {"item-id":"1","name | [{"item-id":"1","nam | ArrayIterator Object | three | -| 1234 | 123.45 | 100 | false | 2020-07-13T15:00:00+ | null | [{"id":1,"status":"N | {"item-id":"1","name | [{"item-id":"1","nam | ArrayIterator Object | three | -+------+--------+-----+---------+----------------------+-------+----------------------+----------------------+----------------------+----------------------+-------+ ++------+--------+-----+---------+----------------------+-------+----------------------+----------------------+----------------------+----------------------+-------+----------------------+ +| id | price | 100 | deleted | created-at | phase | array | items | tags | object | enum | xml | ++------+--------+-----+---------+----------------------+-------+----------------------+----------------------+----------------------+----------------------+-------+----------------------+ +| 1234 | 123.45 | 100 | false | 2020-07-13T15:00:00+ | null | [{"id":1,"status":"N | {"item-id":"1","name | [{"item-id":"1","nam | ArrayIterator Object | three | assertSame( <<<'ASCIITABLE' -+------+--------+-----+---------+---------------------------+-------+-------------------------------------------------------+------------------------------+--------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------+-------+ -| id | price | 100 | deleted | created-at | phase | array | items | tags | object | enum | -+------+--------+-----+---------+---------------------------+-------+-------------------------------------------------------+------------------------------+--------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------+-------+ -| 1234 | 123.45 | 100 | false | 2020-07-13T15:00:00+00:00 | null | [{"id":1,"status":"NEW"},{"id":2,"status":"PENDING"}] | {"item-id":"1","name":"one"} | [{"item-id":"1","name":"one"},{"item-id":"2","name":"two"},{"item-id":"3","name":"three"}] | ArrayIterator Object( [storage:ArrayIterator:private] => Array ( [0] => 1 [1] => 2 [2] => 3 )) | three | -| 1234 | 123.45 | 100 | false | 2020-07-13T15:00:00+00:00 | null | [{"id":1,"status":"NEW"},{"id":2,"status":"PENDING"}] | {"item-id":"1","name":"one"} | [{"item-id":"1","name":"one"},{"item-id":"2","name":"two"},{"item-id":"3","name":"three"}] | ArrayIterator Object( [storage:ArrayIterator:private] => Array ( [0] => 1 [1] => 2 [2] => 3 )) | three | -| 1234 | 123.45 | 100 | false | 2020-07-13T15:00:00+00:00 | null | [{"id":1,"status":"NEW"},{"id":2,"status":"PENDING"}] | {"item-id":"1","name":"one"} | [{"item-id":"1","name":"one"},{"item-id":"2","name":"two"},{"item-id":"3","name":"three"}] | ArrayIterator Object( [storage:ArrayIterator:private] => Array ( [0] => 1 [1] => 2 [2] => 3 )) | three | -| 1234 | 123.45 | 100 | false | 2020-07-13T15:00:00+00:00 | null | [{"id":1,"status":"NEW"},{"id":2,"status":"PENDING"}] | {"item-id":"1","name":"one"} | [{"item-id":"1","name":"one"},{"item-id":"2","name":"two"},{"item-id":"3","name":"three"}] | ArrayIterator Object( [storage:ArrayIterator:private] => Array ( [0] => 1 [1] => 2 [2] => 3 )) | three | -| 1234 | 123.45 | 100 | false | 2020-07-13T15:00:00+00:00 | null | [{"id":1,"status":"NEW"},{"id":2,"status":"PENDING"}] | {"item-id":"1","name":"one"} | [{"item-id":"1","name":"one"},{"item-id":"2","name":"two"},{"item-id":"3","name":"three"}] | ArrayIterator Object( [storage:ArrayIterator:private] => Array ( [0] => 1 [1] => 2 [2] => 3 )) | three | -| 1234 | 123.45 | 100 | false | 2020-07-13T15:00:00+00:00 | null | [{"id":1,"status":"NEW"},{"id":2,"status":"PENDING"}] | {"item-id":"1","name":"one"} | [{"item-id":"1","name":"one"},{"item-id":"2","name":"two"},{"item-id":"3","name":"three"}] | ArrayIterator Object( [storage:ArrayIterator:private] => Array ( [0] => 1 [1] => 2 [2] => 3 )) | three || id | price | 100 | deleted | created-at | phase | array | items | tags | object | enum | xml | ++------+--------+-----+---------+---------------------------+-------+-------------------------------------------------------+------------------------------+--------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------+-------+--------------------------------------------------------------------------+ +| 1234 | 123.45 | 100 | false | 2020-07-13T15:00:00+00:00 | null | [{"id":1,"status":"NEW"},{"id":2,"status":"PENDING"}] | {"item-id":"1","name":"one"} | [{"item-id":"1","name":"one"},{"item-id":"2","name":"two"},{"item-id":"3","name":"three"}] | ArrayIterator Object( [storage:ArrayIterator:private] => Array ( [0] => 1 [1] => 2 [2] => 3 )) | three | testbar | +| 1234 | 123.45 | 100 | false | 2020-07-13T15:00:00+00:00 | null | [{"id":1,"status":"NEW"},{"id":2,"status":"PENDING"}] | {"item-id":"1","name":"one"} | [{"item-id":"1","name":"one"},{"item-id":"2","name":"two"},{"item-id":"3","name":"three"}] | ArrayIterator Object( [storage:ArrayIterator:private] => Array ( [0] => 1 [1] => 2 [2] => 3 )) | three | testbar | +| 1234 | 123.45 | 100 | false | 2020-07-13T15:00:00+00:00 | null | [{"id":1,"status":"NEW"},{"id":2,"status":"PENDING"}] | {"item-id":"1","name":"one"} | [{"item-id":"1","name":"one"},{"item-id":"2","name":"two"},{"item-id":"3","name":"three"}] | ArrayIterator Object( [storage:ArrayIterator:private] => Array ( [0] => 1 [1] => 2 [2] => 3 )) | three | testbar | +| 1234 | 123.45 | 100 | false | 2020-07-13T15:00:00+00:00 | null | [{"id":1,"status":"NEW"},{"id":2,"status":"PENDING"}] | {"item-id":"1","name":"one"} | [{"item-id":"1","name":"one"},{"item-id":"2","name":"two"},{"item-id":"3","name":"three"}] | ArrayIterator Object( [storage:ArrayIterator:private] => Array ( [0] => 1 [1] => 2 [2] => 3 )) | three | testbar | +| 1234 | 123.45 | 100 | false | 2020-07-13T15:00:00+00:00 | null | [{"id":1,"status":"NEW"},{"id":2,"status":"PENDING"}] | {"item-id":"1","name":"one"} | [{"item-id":"1","name":"one"},{"item-id":"2","name":"two"},{"item-id":"3","name":"three"}] | ArrayIterator Object( [storage:ArrayIterator:private] => Array ( [0] => 1 [1] => 2 [2] => 3 )) | three | testbar | +| 1234 | 123.45 | 100 | false | 2020-07-13T15:00:00+00:00 | null | [{"id":1,"status":"NEW"},{"id":2,"status":"PENDING"}] | {"item-id":"1","name":"one"} | [{"item-id":"1","name":"one"},{"item-id":"2","name":"two"},{"item-id":"3","name":"three"}] | ArrayIterator Object( [storage:ArrayIterator:private] => Array ( [0] => 1 [1] => 2 [2] => 3 )) | three | testbar | ++------+--------+-----+---------+---------------------------+-------+-------------------------------------------------------+------------------------------+--------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------+-------+--------------------------------------------------------------------------+ 6 rows ASCIITABLE, diff --git a/src/core/etl/tests/Flow/ETL/Tests/Unit/Row/Entry/XMLEntryTest.php b/src/core/etl/tests/Flow/ETL/Tests/Unit/Row/Entry/XMLEntryTest.php new file mode 100644 index 000000000..fe1cb3885 --- /dev/null +++ b/src/core/etl/tests/Flow/ETL/Tests/Unit/Row/Entry/XMLEntryTest.php @@ -0,0 +1,136 @@ +loadXML('123'); + $doc2 = new \DOMDocument(); + $doc2->loadXML('123'); + + yield 'equal names and equal simple xml documents' => [ + true, + new XMLEntry('name', $doc1), + new XMLEntry('name', $doc2), + ]; + + $doc1 = new \DOMDocument(); + $doc1->loadXML('123'); + $doc2 = new \DOMDocument(); + $doc2->loadXML('123'); + + yield 'equal names and equal simple xml documents with different order of attributes' => [ + true, + new XMLEntry('name', $doc1), + new XMLEntry('name', $doc2), + ]; + + $doc1 = new \DOMDocument(); + $doc1->loadXML('123'); + $doc2 = new \DOMDocument(); + $doc2->loadXML('123'); + + yield 'equal nodes but different attributes' => [ + false, + new XMLEntry('name', $doc1), + new XMLEntry('name', $doc2), + ]; + + $doc1 = new \DOMDocument(); + $doc1->loadXML('123'); + $doc2 = new \DOMDocument(); + $doc2->loadXML('23'); + + yield 'equal attributes but different nodes' => [ + false, + new XMLEntry('name', $doc1), + new XMLEntry('name', $doc2), + ]; + + $doc1 = new \DOMDocument(); + $doc1->loadXML('123'); + $doc2 = new \DOMDocument(); + + yield 'compare with empty document' => [ + false, + new XMLEntry('name', $doc1), + new XMLEntry('name', $doc2), + ]; + + $doc1 = new \DOMDocument(); + $doc2 = new \DOMDocument(); + + yield 'compare twp empty documents' => [ + true, + new XMLEntry('name', $doc1), + new XMLEntry('name', $doc2), + ]; + } + + /** + * The C14N() method in PHP's DOMDocument class does not provide an option to remove all whitespace between nodes; + * it's designed to produce a canonical form of the XML document according to the Canonical XML standard, + * which generally preserves whitespace within text nodes. + */ + public function test_canonicalization() : void + { + $doc = new \DOMDocument(); + $doc->loadXML('1'); + + $doc2 = new \DOMDocument(); + $doc2->loadXML(<<<'XML' + + 1 + +XML); + + $this->assertNotEquals( + Entry::xml('row', $doc), + Entry::xml('row', $doc2), + ); + } + + public function test_creating_entry_from_invalid_xml_string() : void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('Given string "foo" is not valid XML'); + + new XMLEntry('name', 'foo'); + } + + public function test_creating_entry_from_valid_xml_string() : void + { + $entry = new XMLEntry('name', '123'); + + $this->assertSame('name', $entry->name()); + $this->assertSame("\n123\n", $entry->__toString()); + } + + public function test_creating_xml_entry_with_empty_dom_document() : void + { + $doc = new \DOMDocument(); + $entry = new XMLEntry('name', $doc); + + $this->assertSame('name', $entry->name()); + $this->assertSame($doc, $entry->value()); + $this->assertSame("\n", $entry->__toString()); + } + + /** + * @dataProvider is_equal_data_provider + */ + public function test_is_equal(bool $equals, XMLEntry $entry, XMLEntry $nextEntry) : void + { + $this->assertSame($equals, $entry->isEqual($nextEntry)); + } +} diff --git a/src/core/etl/tests/Flow/ETL/Tests/Unit/Row/Factory/NativeEntryFactoryTest.php b/src/core/etl/tests/Flow/ETL/Tests/Unit/Row/Factory/NativeEntryFactoryTest.php index 37e593524..29942f58c 100644 --- a/src/core/etl/tests/Flow/ETL/Tests/Unit/Row/Factory/NativeEntryFactoryTest.php +++ b/src/core/etl/tests/Flow/ETL/Tests/Unit/Row/Factory/NativeEntryFactoryTest.php @@ -268,4 +268,30 @@ public function test_string_with_schema() : void (new NativeEntryFactory(new Schema(Schema\Definition::string('e'))))->create('e', 'string') ); } + + public function test_xml_from_dom_document() : void + { + $doc = new \DOMDocument(); + $doc->loadXML($xml = '123'); + $this->assertEquals( + Entry::xml('e', $xml), + (new NativeEntryFactory())->create('e', $doc) + ); + } + + public function test_xml_from_string() : void + { + $this->assertEquals( + Entry::xml('e', $xml = '123'), + (new NativeEntryFactory())->create('e', $xml) + ); + } + + public function test_xml_string_with_xml_definition_provided() : void + { + $this->assertEquals( + Entry::xml('e', $xml = '123'), + (new NativeEntryFactory(new Schema(Schema\Definition::xml('e'))))->create('e', $xml) + ); + } } diff --git a/src/core/etl/tests/Flow/ETL/Tests/Unit/Row/Reference/Expression/CastTest.php b/src/core/etl/tests/Flow/ETL/Tests/Unit/Row/Reference/Expression/CastTest.php index bec7d9164..a73a97fbb 100644 --- a/src/core/etl/tests/Flow/ETL/Tests/Unit/Row/Reference/Expression/CastTest.php +++ b/src/core/etl/tests/Flow/ETL/Tests/Unit/Row/Reference/Expression/CastTest.php @@ -6,6 +6,7 @@ use function Flow\ETL\DSL\cast; use function Flow\ETL\DSL\ref; +use Flow\ETL\Exception\InvalidArgumentException; use Flow\ETL\Row; use Flow\ETL\Row\Factory\NativeEntryFactory; use PHPUnit\Framework\TestCase; @@ -14,6 +15,9 @@ final class CastTest extends TestCase { public static function cast_provider() : array { + $xml = new \DOMDocument(); + $xml->loadXML($xmlString = 'bar'); + return [ 'int' => ['1', 'int', 1], 'integer' => ['1', 'integer', 1], @@ -28,6 +32,8 @@ public static function cast_provider() : array 'null' => ['1', 'null', null], 'json' => [[1], 'json', '[1]'], 'json_pretty' => [[1], 'json_pretty', "[\n 1\n]"], + 'xml_to_array' => [$xml, 'array', ['root' => ['foo' => ['@attributes' => ['baz' => 'buz'], '@value' => 'bar']]]], + 'string_to_xml' => [$xmlString, 'xml', $xml], ]; } @@ -45,4 +51,20 @@ public function test_cast(mixed $from, string $to, mixed $expected) : void cast(ref('value'), $to)->eval(Row::create((new NativeEntryFactory())->create('value', $from))) ); } + + public function test_casting_integer_to_xml() : void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('Cannot cast integer to XML'); + + ref('value')->cast('xml')->eval(Row::create((new NativeEntryFactory())->create('value', 1))); + } + + public function test_casting_non_xml_string_to_xml() : void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('Invalid XML string given: foo'); + + ref('value')->cast('xml')->eval(Row::create((new NativeEntryFactory())->create('value', 'foo'))); + } } diff --git a/src/core/etl/tests/Flow/ETL/Tests/Unit/Row/Reference/Expression/DOMNodeAttributeTest.php b/src/core/etl/tests/Flow/ETL/Tests/Unit/Row/Reference/Expression/DOMNodeAttributeTest.php new file mode 100644 index 000000000..218f35c61 --- /dev/null +++ b/src/core/etl/tests/Flow/ETL/Tests/Unit/Row/Reference/Expression/DOMNodeAttributeTest.php @@ -0,0 +1,32 @@ +loadXML('bar'); + + $this->assertEquals( + 'buz', + ref('value')->domNodeAttribute('baz')->eval(Row::create((new NativeEntryFactory())->create('value', $xml->documentElement->firstChild))) + ); + } + + public function test_extracting_non_existing_attribute_from_dom_node_entry() : void + { + $xml = new \DOMDocument(); + $xml->loadXML('bar'); + + $this->assertNull( + ref('value')->domNodeAttribute('bar')->eval(Row::create((new NativeEntryFactory())->create('value', $xml->documentElement->firstChild))) + ); + } +} diff --git a/src/core/etl/tests/Flow/ETL/Tests/Unit/Row/Reference/Expression/DOMNodeValueTest.php b/src/core/etl/tests/Flow/ETL/Tests/Unit/Row/Reference/Expression/DOMNodeValueTest.php new file mode 100644 index 000000000..f31a09ee1 --- /dev/null +++ b/src/core/etl/tests/Flow/ETL/Tests/Unit/Row/Reference/Expression/DOMNodeValueTest.php @@ -0,0 +1,33 @@ +loadXML('baz'); + + $this->assertEquals( + 'baz', + ref('value')->domNodeValue()->eval(Row::create((new NativeEntryFactory())->create('value', $xml->documentElement->firstChild))) + ); + } + + public function test_getting_simple_node_value() : void + { + $xml = new \DOMDocument(); + $xml->loadXML('bar'); + + $this->assertEquals( + 'bar', + ref('value')->domNodeValue()->eval(Row::create((new NativeEntryFactory())->create('value', $xml->documentElement->firstChild))) + ); + } +} diff --git a/src/core/etl/tests/Flow/ETL/Tests/Unit/Row/Reference/Expression/XPathTest.php b/src/core/etl/tests/Flow/ETL/Tests/Unit/Row/Reference/Expression/XPathTest.php new file mode 100644 index 000000000..bea74775b --- /dev/null +++ b/src/core/etl/tests/Flow/ETL/Tests/Unit/Row/Reference/Expression/XPathTest.php @@ -0,0 +1,56 @@ +loadXML('bar'); + + $this->assertEquals( + $xml->documentElement->firstChild, + ref('value')->xpath('/root/foo')->eval(Row::create((new NativeEntryFactory())->create('value', $xml))) + ); + } + + public function test_xpath_when_there_are_more_than_one_elements_under_given_path() : void + { + $xml = new \DOMDocument(); + $xml->loadXML('barbar'); + + $this->assertEquals( + [ + $xml->documentElement->firstChild, + $xml->documentElement->lastChild, + ], + ref('value')->xpath('/root/foo')->eval(Row::create((new NativeEntryFactory())->create('value', $xml))) + ); + } + + public function test_xpath_with_invalid_path_syntax() : void + { + $xml = new \DOMDocument(); + $xml->loadXML('bar'); + + $this->assertNull( + ref('value')->xpath('/root/foo/@')->eval(Row::create((new NativeEntryFactory())->create('value', $xml))) + ); + } + + public function test_xpath_with_non_existing_path() : void + { + $xml = new \DOMDocument(); + $xml->loadXML('bar'); + + $this->assertNull( + ref('value')->xpath('/root/bar')->eval(Row::create((new NativeEntryFactory())->create('value', $xml))) + ); + } +} diff --git a/src/core/etl/tests/Flow/ETL/Tests/Unit/Row/Schema/Formatter/ASCIISchemaFormatterTest.php b/src/core/etl/tests/Flow/ETL/Tests/Unit/Row/Schema/Formatter/ASCIISchemaFormatterTest.php index a63de95b0..af4a14d70 100644 --- a/src/core/etl/tests/Flow/ETL/Tests/Unit/Row/Schema/Formatter/ASCIISchemaFormatterTest.php +++ b/src/core/etl/tests/Flow/ETL/Tests/Unit/Row/Schema/Formatter/ASCIISchemaFormatterTest.php @@ -18,7 +18,8 @@ public function test_format_schema() : void Schema\Definition::union('number', [IntegerEntry::class, FloatEntry::class]), Schema\Definition::string('name', nullable: true), Schema\Definition::array('tags', nullable: false), - Schema\Definition::boolean('active', false) + Schema\Definition::boolean('active', false), + Schema\Definition::xml('xml', false) ); $this->assertSame( @@ -28,6 +29,7 @@ public function test_format_schema() : void |-- name: [Flow\ETL\Row\Entry\StringEntry, Flow\ETL\Row\Entry\NullEntry] (nullable = true) |-- number: [Flow\ETL\Row\Entry\IntegerEntry, Flow\ETL\Row\Entry\FloatEntry] (nullable = false) |-- tags: Flow\ETL\Row\Entry\ArrayEntry (nullable = false) +|-- xml: Flow\ETL\Row\Entry\XMLEntry (nullable = false) SCHEMA, (new ASCIISchemaFormatter())->format($schema) diff --git a/src/core/etl/tests/Flow/ETL/Tests/Unit/Transformer/EntryExpressionEvalTransformerTest.php b/src/core/etl/tests/Flow/ETL/Tests/Unit/Transformer/EntryExpressionEvalTransformerTest.php index bc1002c1a..b6f9223c5 100644 --- a/src/core/etl/tests/Flow/ETL/Tests/Unit/Transformer/EntryExpressionEvalTransformerTest.php +++ b/src/core/etl/tests/Flow/ETL/Tests/Unit/Transformer/EntryExpressionEvalTransformerTest.php @@ -81,4 +81,26 @@ public function test_plus_expression_on_non_existing_rows() : void ->toArray() ); } + + public function test_xml_xpath_expression_when_there_is_more_than_one_node_under_given_path() : void + { + $xml = 'barbaz'; + $document = new \DOMDocument(); + $document->loadXML($xml); + $xpath = new \DOMXPath($document); + + $this->assertEquals( + Entry::list_of_objects('xpath', \DOMNode::class, [ + $xpath->query('/root/foo')->item(0), + $xpath->query('/root/foo')->item(1), + ]), + (new EntryExpressionEvalTransformer('xpath', ref('xml')->xpath('/root/foo'))) + ->transform( + new Rows(Row::create(Entry::xml('xml', $xml))), + new FlowContext(Config::default()) + ) + ->first() + ->get(ref('xpath')) + ); + } } diff --git a/tools/phpstan/composer.lock b/tools/phpstan/composer.lock index d19e27b3b..83a1eeedd 100644 --- a/tools/phpstan/composer.lock +++ b/tools/phpstan/composer.lock @@ -9,16 +9,16 @@ "packages-dev": [ { "name": "phpstan/phpstan", - "version": "1.10.32", + "version": "1.10.33", "source": { "type": "git", "url": "https://github.com/phpstan/phpstan.git", - "reference": "c47e47d3ab03137c0e121e77c4d2cb58672f6d44" + "reference": "03b1cf9f814ba0863c4e9affea49a4d1ed9a2ed1" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/phpstan/phpstan/zipball/c47e47d3ab03137c0e121e77c4d2cb58672f6d44", - "reference": "c47e47d3ab03137c0e121e77c4d2cb58672f6d44", + "url": "https://api.github.com/repos/phpstan/phpstan/zipball/03b1cf9f814ba0863c4e9affea49a4d1ed9a2ed1", + "reference": "03b1cf9f814ba0863c4e9affea49a4d1ed9a2ed1", "shasum": "" }, "require": { @@ -67,7 +67,7 @@ "type": "tidelift" } ], - "time": "2023-08-24T21:54:50+00:00" + "time": "2023-09-04T12:20:53+00:00" } ], "aliases": [],