diff --git a/composer.json b/composer.json index 8ba23b716..d4f2a3b2e 100644 --- a/composer.json +++ b/composer.json @@ -36,6 +36,7 @@ "webmozart/glob": "^3.0 || ^4.0" }, "require-dev": { + "ext-xmlwriter": "*", "aeon-php/calendar": "^1.0", "fakerphp/faker": "^1.23", "fig/log-test": "^1.1", diff --git a/composer.lock b/composer.lock index e8d9d8376..f14c669d6 100644 --- a/composer.lock +++ b/composer.lock @@ -4,7 +4,7 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", "This file is @generated automatically" ], - "content-hash": "1d0b7c3ac2049a5f683028381583325f", + "content-hash": "659bb7868bd0d67111542fafa31303b7", "packages": [ { "name": "aeon-php/calendar", @@ -5462,6 +5462,8 @@ "ext-zlib": "*", "composer-runtime-api": "^2.1" }, - "platform-dev": [], + "platform-dev": { + "ext-xmlwriter": "*" + }, "plugin-api-version": "2.6.0" } diff --git a/src/adapter/etl-adapter-avro/src/Flow/ETL/Adapter/Avro/FlixTech/AvroLoader.php b/src/adapter/etl-adapter-avro/src/Flow/ETL/Adapter/Avro/FlixTech/AvroLoader.php index cdb57fbe5..c5e8049ce 100644 --- a/src/adapter/etl-adapter-avro/src/Flow/ETL/Adapter/Avro/FlixTech/AvroLoader.php +++ b/src/adapter/etl-adapter-avro/src/Flow/ETL/Adapter/Avro/FlixTech/AvroLoader.php @@ -64,6 +64,7 @@ public function load(Rows $rows, FlowContext $context) : void Row\Entry\DateTimeEntry::class => (int) $entry->value()->format('Uu'), Row\Entry\UuidEntry::class => $entry->value()->toString(), Row\Entry\EnumEntry::class => $entry->value()->name, + Row\Entry\XMLEntry::class => $entry->toString(), default => $entry->value(), }; } diff --git a/src/adapter/etl-adapter-parquet/src/Flow/ETL/Adapter/Parquet/RowsNormalizer.php b/src/adapter/etl-adapter-parquet/src/Flow/ETL/Adapter/Parquet/RowsNormalizer.php index 2b320bba9..ed9bf20fa 100644 --- a/src/adapter/etl-adapter-parquet/src/Flow/ETL/Adapter/Parquet/RowsNormalizer.php +++ b/src/adapter/etl-adapter-parquet/src/Flow/ETL/Adapter/Parquet/RowsNormalizer.php @@ -3,6 +3,7 @@ namespace Flow\ETL\Adapter\Parquet; use Flow\ETL\Row\Entry\UuidEntry; +use Flow\ETL\Row\Entry\XMLEntry; use Flow\ETL\Rows; final class RowsNormalizer @@ -20,6 +21,7 @@ public function normalize(Rows $rows) : array foreach ($row->entries() as $entry) { $columns[$entry->name()] = match ($entry::class) { UuidEntry::class => $entry->value()->toString(), + XMLEntry::class => $entry->toString(), default => $entry->value(), }; } diff --git a/src/adapter/etl-adapter-xml/composer.json b/src/adapter/etl-adapter-xml/composer.json index 782b06e9a..caa4ff7a4 100644 --- a/src/adapter/etl-adapter-xml/composer.json +++ b/src/adapter/etl-adapter-xml/composer.json @@ -16,6 +16,9 @@ "ext-xmlreader": "*", "flow-php/etl": "^0.5.0 || 1.x-dev" }, + "require-dev": { + "ext-xmlwriter": "*" + }, "config": { "optimize-autoloader": true, "sort-packages": true diff --git a/src/adapter/etl-adapter-xml/src/Flow/ETL/Adapter/XML/Loader/DomDocumentLoader.php b/src/adapter/etl-adapter-xml/src/Flow/ETL/Adapter/XML/Loader/DomDocumentLoader.php new file mode 100644 index 000000000..e761e9abc --- /dev/null +++ b/src/adapter/etl-adapter-xml/src/Flow/ETL/Adapter/XML/Loader/DomDocumentLoader.php @@ -0,0 +1,83 @@ +path->isPattern()) { + throw new \InvalidArgumentException("XMLLoader path can't be pattern, given: " . $this->path->path()); + } + } + + public function closure(FlowContext $context) : void + { + foreach ($context->streams() as $stream) { + if ($stream->path()->extension() === 'xml') { + \fwrite($stream->resource(), "collectionName}>"); + } + } + + $context->streams()->close($this->path); + } + + public function destination() : Path + { + return $this->path; + } + + public function load(Rows $rows, FlowContext $context) : void + { + $streams = $context->streams(); + + if (!$streams->isOpen($this->path, $rows->partitions()->toArray())) { + $stream = $streams->open($this->path, 'xml', $context->appendSafe(), $rows->partitions()->toArray()); + + \fwrite($stream->resource(), (new \DOMDocument('1.0', 'utf-8'))->saveXML() . "<{$this->collectionName}>"); + } else { + $stream = $streams->open($this->path, 'xml', $context->appendSafe(), $rows->partitions()->toArray()); + } + + $this->writeXML($rows, $stream); + } + + /** + * @throws RuntimeException + * @throws \DOMException + */ + private function writeXML(Rows $rows, FileStream $stream) : void + { + foreach ($this->normalizer->normalize($rows) as $row) { + $dom = new \DOMDocument('1.0', 'utf-8'); + + $rowElement = $dom->createElement($this->collectionElementName); + + foreach ($row as $name => $value) { + $rowItem = $dom->createElement($name); + $rowItem->appendChild($dom->createTextNode($value)); + + $rowElement->appendChild($rowItem); + } + + $dom->appendChild($rowElement); + + \fwrite($stream->resource(), $dom->saveXML($dom->documentElement) ?: ''); + } + } +} diff --git a/src/adapter/etl-adapter-xml/src/Flow/ETL/Adapter/XML/Loader/XMLWriterLoader.php b/src/adapter/etl-adapter-xml/src/Flow/ETL/Adapter/XML/Loader/XMLWriterLoader.php new file mode 100644 index 000000000..8001702ed --- /dev/null +++ b/src/adapter/etl-adapter-xml/src/Flow/ETL/Adapter/XML/Loader/XMLWriterLoader.php @@ -0,0 +1,87 @@ + + */ + private array $writers = []; + + public function __construct( + private readonly Path $path, + private readonly RowsNormalizer $normalizer = new RowsNormalizer(), + private readonly string $collectionName = 'rows', + private readonly string $collectionElementName = 'row', + ) { + if ($this->path->isPattern()) { + throw new \InvalidArgumentException("XMLLoader path can't be pattern, given: " . $this->path->path()); + } + } + + public function closure(FlowContext $context) : void + { + foreach ($context->streams() as $stream) { + if ($stream->path()->extension() === 'xml') { + $this->writers[$stream->path()->path()]->endDocument(); + $this->writers[$stream->path()->path()]->flush(); + } + } + + $context->streams()->close($this->path); + } + + public function destination() : Path + { + return $this->path; + } + + public function load(Rows $rows, FlowContext $context) : void + { + $this->write($rows, $rows->partitions()->toArray(), $context); + } + + /** + * @param array $partitions + */ + private function write(Rows $rows, array $partitions, FlowContext $context) : void + { + $streams = $context->streams(); + + $stream = $streams->open($this->path, 'xml', $context->appendSafe(), $partitions); + + if (!\array_key_exists($stream->path()->path(), $this->writers)) { + $writer = new \XMLWriter(); + $writer->openUri($stream->path()->path()); + $writer->startDocument('1.0', 'UTF-8'); + $writer->startElement($this->collectionName); + + $this->writers[$stream->path()->path()] = $writer; + } else { + $writer = $this->writers[$stream->path()->path()]; + } + + foreach ($this->normalizer->normalize($rows) as $row) { + $writer->startElement($this->collectionElementName); + + foreach ($row as $name => $value) { + $writer->writeElement($name, $value); + } + + $writer->endElement(); + } + + $writer->flush(); + } +} diff --git a/src/adapter/etl-adapter-xml/src/Flow/ETL/Adapter/XML/RowsNormalizer.php b/src/adapter/etl-adapter-xml/src/Flow/ETL/Adapter/XML/RowsNormalizer.php new file mode 100644 index 000000000..cec5c85fe --- /dev/null +++ b/src/adapter/etl-adapter-xml/src/Flow/ETL/Adapter/XML/RowsNormalizer.php @@ -0,0 +1,42 @@ +> + */ + public function normalize(Rows $rows) : \Generator + { + foreach ($rows as $row) { + $columns = []; + + /** @psalm-suppress InvalidCast */ + foreach ($row->entries() as $entry) { + $columns[$entry->name()] = match ($entry::class) { + ArrayEntry::class, + ListEntry::class, + MapEntry::class, + StructureEntry::class => throw new RuntimeException('Entry of type ' . $entry::class . ' cannot be normalized to XML values.'), + ObjectEntry::class => match ($entry->value() instanceof \Stringable) { + false => throw new RuntimeException('Entry of type ' . \get_class($entry->value()) . ' cannot be normalized to XML values.'), + /** @phpstan-ignore-next-line */ + true => (string) $entry->value(), + }, + default => $entry->toString(), + }; + } + + yield $columns; + } + } +} diff --git a/src/adapter/etl-adapter-xml/src/Flow/ETL/Adapter/XML/functions.php b/src/adapter/etl-adapter-xml/src/Flow/ETL/Adapter/XML/functions.php index 914d5784f..004a13e8e 100644 --- a/src/adapter/etl-adapter-xml/src/Flow/ETL/Adapter/XML/functions.php +++ b/src/adapter/etl-adapter-xml/src/Flow/ETL/Adapter/XML/functions.php @@ -5,14 +5,14 @@ namespace Flow\ETL\Adapter\XML; use function Flow\ETL\DSL\from_all; +use Flow\ETL\Adapter\XML\Loader\DomDocumentLoader; +use Flow\ETL\Adapter\XML\Loader\XMLWriterLoader; use Flow\ETL\Extractor; use Flow\ETL\Filesystem\Path; +use Flow\ETL\Loader; /** * @param array|Path|string $path - * @param string $xml_node_path - * - * @return Extractor */ function from_xml( string|Path|array $path, @@ -37,3 +37,12 @@ function from_xml( $xml_node_path ); } + +function to_xml(string|Path $path) : Loader +{ + if (\class_exists(\XMLWriter::class)) { + return new XMLWriterLoader(\is_string($path) ? Path::realpath($path) : $path); + } + + return new DomDocumentLoader(\is_string($path) ? Path::realpath($path) : $path); +} diff --git a/src/adapter/etl-adapter-xml/tests/Flow/ETL/Adapter/XML/Tests/Benchmark/Loader/DomDocumentLoaderBench.php b/src/adapter/etl-adapter-xml/tests/Flow/ETL/Adapter/XML/Tests/Benchmark/Loader/DomDocumentLoaderBench.php new file mode 100644 index 000000000..cb69cd230 --- /dev/null +++ b/src/adapter/etl-adapter-xml/tests/Flow/ETL/Adapter/XML/Tests/Benchmark/Loader/DomDocumentLoaderBench.php @@ -0,0 +1,47 @@ +context = new FlowContext(Config::default()); + $this->outputPath = \tempnam(\sys_get_temp_dir(), 'etl_xml_loader_bench') . '.xml'; + $this->rows = new Rows(); + + foreach (from_xml(__DIR__ . '/../Fixtures/flow_orders.xml')->extract($this->context) as $rows) { + $this->rows = $this->rows->merge($rows); + } + } + + public function __destruct() + { + if (!\file_exists($this->outputPath)) { + throw new \RuntimeException("Benchmark failed, \"{$this->outputPath}\" doesn't exist"); + } + + \unlink($this->outputPath); + } + + public function bench_load_10k() : void + { + $loader = new DomDocumentLoader(Path::realpath($this->outputPath)); + $loader->load($this->rows, $this->context); + } +} diff --git a/src/adapter/etl-adapter-xml/tests/Flow/ETL/Adapter/XML/Tests/Benchmark/Loader/XMLWriterLoaderBench.php b/src/adapter/etl-adapter-xml/tests/Flow/ETL/Adapter/XML/Tests/Benchmark/Loader/XMLWriterLoaderBench.php new file mode 100644 index 000000000..b5da14300 --- /dev/null +++ b/src/adapter/etl-adapter-xml/tests/Flow/ETL/Adapter/XML/Tests/Benchmark/Loader/XMLWriterLoaderBench.php @@ -0,0 +1,47 @@ +context = new FlowContext(Config::default()); + $this->outputPath = \tempnam(\sys_get_temp_dir(), 'etl_xml_loader_bench') . '.xml'; + $this->rows = new Rows(); + + foreach (from_xml(__DIR__ . '/../Fixtures/flow_orders.xml')->extract($this->context) as $rows) { + $this->rows = $this->rows->merge($rows); + } + } + + public function __destruct() + { + if (!\file_exists($this->outputPath)) { + throw new \RuntimeException("Benchmark failed, \"{$this->outputPath}\" doesn't exist"); + } + + \unlink($this->outputPath); + } + + public function bench_load_10k() : void + { + $loader = new XMLWriterLoader(Path::realpath($this->outputPath)); + $loader->load($this->rows, $this->context); + } +} diff --git a/src/adapter/etl-adapter-xml/tests/Flow/ETL/Adapter/XML/Tests/Fixtures/simple_items.xml b/src/adapter/etl-adapter-xml/tests/Flow/ETL/Adapter/XML/Tests/Fixtures/simple_items.xml index d865214b4..45dfb4c51 100644 --- a/src/adapter/etl-adapter-xml/tests/Flow/ETL/Adapter/XML/Tests/Fixtures/simple_items.xml +++ b/src/adapter/etl-adapter-xml/tests/Flow/ETL/Adapter/XML/Tests/Fixtures/simple_items.xml @@ -16,4 +16,4 @@ 5 - \ No newline at end of file + diff --git a/src/adapter/etl-adapter-xml/tests/Flow/ETL/Adapter/XML/Tests/Integration/Loader/DomDocumentLoaderTest.php b/src/adapter/etl-adapter-xml/tests/Flow/ETL/Adapter/XML/Tests/Integration/Loader/DomDocumentLoaderTest.php new file mode 100644 index 000000000..8d7230317 --- /dev/null +++ b/src/adapter/etl-adapter-xml/tests/Flow/ETL/Adapter/XML/Tests/Integration/Loader/DomDocumentLoaderTest.php @@ -0,0 +1,343 @@ +read(from_array([ + ['id' => 1, 'partition' => 'a'], + ['id' => 2, 'partition' => 'a'], + ['id' => 3, 'partition' => 'a'], + ['id' => 4, 'partition' => 'b'], + ['id' => 5, 'partition' => 'b'], + ])) + ->partitionBy(ref('partition')) + ->mode(exception_if_exists()) + ->write(new DomDocumentLoader(Path::realpath($path))) + ->run(); + + $this->expectExceptionMessage('Destination path "file:/' . $path . '/partition=b" already exists, please change path to different or set different SaveMode'); + + df() + ->read(from_array([ + ['id' => 8, 'partition' => 'b'], + ['id' => 10, 'partition' => 'b'], + ])) + ->partitionBy(ref('partition')) + ->mode(SaveMode::ExceptionIfExists) + ->write(new DomDocumentLoader(Path::realpath($path))) + ->run(); + } + + public function test_save_unsupported_entry_types() : void + { + $path = \sys_get_temp_dir() . '/' . \uniqid('flow_php_etl_xml_loader_exception_mode', true); + + if (\file_exists($path)) { + \unlink($path); + } + + $this->expectExceptionMessage('Entry of type Flow\ETL\Row\Entry\ListEntry cannot be normalized to XML values.'); + + df() + ->read(from_array([ + ['id' => 1, 'list' => ['a', 'b']], + ['id' => 2, 'list' => ['c', 'd']], + ])) + ->mode(exception_if_exists()) + ->write(new DomDocumentLoader(Path::realpath($path))) + ->run(); + } + + public function test_save_with_ignore_mode() : void + { + $path = \sys_get_temp_dir() . '/' . \uniqid('flow_php_etl_xml_loader_ignore_mode', true) . '.xml'; + + if (\file_exists($path)) { + \unlink($path); + } + + df() + ->read(from_array([ + ['id' => 1], + ['id' => 2], + ['id' => 3], + ])) + ->saveMode(ignore()) + ->write(new DomDocumentLoader(Path::realpath($path))) + ->run(); + + df() + ->read(from_array([ + ['id' => 4], + ['id' => 5], + ['id' => 6], + ])) + ->mode(SaveMode::Ignore) + ->write(new DomDocumentLoader(Path::realpath($path))) + ->run(); + + $this->assertEquals( + new Rows(Row::create(xml_entry('node', \file_get_contents($path)))), + df() + ->read(from_xml($path, 'rows')) + ->sortBy(ref('id')->asc()) + ->fetch() + ); + } + + public function test_xml_loader() : void + { + $stream = \sys_get_temp_dir() . '/' . \uniqid('flow_php_etl_xml_loader', true) . '.xml'; + + (new Flow()) + ->process( + new Rows( + ...\array_map( + fn (int $i) : Row => Row::create( + new Row\Entry\IntegerEntry('id', $i), + new Row\Entry\StringEntry('name', 'name_' . $i), + new Row\Entry\StringEntry('special', 'one, two & three') + ), + \range(0, 5) + ) + ) + ) + ->sortBy(ref('id')->asc()) + ->write(new DomDocumentLoader(Path::realpath($stream))) + ->run(); + + $this->assertXmlStringEqualsXmlString( + <<<'XML' + + + + 0 + name_0 + one, two & three + + + 1 + name_1 + one, two & three + + + 2 + name_2 + one, two & three + + + 3 + name_3 + one, two & three + + + 4 + name_4 + one, two & three + + + 5 + name_5 + one, two & three + + +XML, + \file_get_contents($stream) + ); + + if (\file_exists($stream)) { + \unlink($stream); + } + } + + public function test_xml_loader_loading_empty_string() : void + { + $stream = \sys_get_temp_dir() . '/' . \uniqid('flow_php_etl_xml_loader', true) . '.xml'; + + $loader = new DomDocumentLoader(Path::realpath($stream)); + $loader->load(new Rows(), $context = new FlowContext(Config::default())); + $loader->closure($context); + + $this->assertXmlStringEqualsXmlString( + <<<'XML' + + +XML, + \file_get_contents($stream) + ); + + if (\file_exists($stream)) { + \unlink($stream); + } + } + + public function test_xml_loader_with_a_thread_safe_and_overwrite() : void + { + $stream = \sys_get_temp_dir() . '/' . \uniqid('flow_php_etl_xml_loader', true) . '.xml'; + + $loader = new DomDocumentLoader(Path::realpath($stream)); + $loader->load( + new Rows( + ...\array_map( + fn (int $i) : Row => Row::create( + new Row\Entry\IntegerEntry('id', $i), + new Row\Entry\StringEntry('name', 'name_' . $i) + ), + \range(0, 5) + ) + ), + ($context = new FlowContext(Config::default()))->setAppendSafe() + ); + + $loader->load( + new Rows( + ...\array_map( + fn (int $i) : Row => Row::create( + new Row\Entry\IntegerEntry('id', $i), + new Row\Entry\StringEntry('name', 'name_' . $i) + ), + \range(6, 10) + ) + ), + $context = $context->setAppendSafe() + ); + + $loader->closure($context); + + $files = \array_values(\array_diff(\scandir($stream), ['..', '.'])); + + $this->assertXmlStringEqualsXmlString( + <<<'XML' + + + + 0 + name_0 + + + 1 + name_1 + + + 2 + name_2 + + + 3 + name_3 + + + 4 + name_4 + + + 5 + name_5 + + + 6 + name_6 + + + 7 + name_7 + + + 8 + name_8 + + + 9 + name_9 + + + 10 + name_10 + + +XML, + \file_get_contents($stream . DIRECTORY_SEPARATOR . $files[0]) + ); + + if (\file_exists($stream . DIRECTORY_SEPARATOR . $files[0])) { + \unlink($stream . DIRECTORY_SEPARATOR . $files[0]); + } + } + + public function test_xml_loader_with_partitioning() : void + { + $path = \sys_get_temp_dir() . '/' . \str_replace('.', '', \uniqid('partitioned_', true)); + + (new Flow()) + ->process( + new Rows( + Row::create(int_entry('id', 1), int_entry('group', 1)), + Row::create(int_entry('id', 2), int_entry('group', 1)), + Row::create(int_entry('id', 3), int_entry('group', 2)), + Row::create(int_entry('id', 4), int_entry('group', 2)), + ) + ) + ->partitionBy('group') + ->load(new DomDocumentLoader(Path::realpath($path), collectionName: 'items', collectionElementName: 'item')) + ->run(); + + $partitions = \array_values(\array_diff(\scandir($path), ['..', '.'])); + + $this->assertSame( + [ + 'group=1', + 'group=2', + ], + $partitions + ); + + $group1 = \array_values(\array_diff(\scandir($path . DIRECTORY_SEPARATOR . 'group=1'), ['..', '.']))[0]; + $group2 = \array_values(\array_diff(\scandir($path . DIRECTORY_SEPARATOR . 'group=2'), ['..', '.']))[0]; + + $this->assertXmlStringEqualsXmlString( + <<<'XML' + +1121 +XML, + \file_get_contents($path . DIRECTORY_SEPARATOR . 'group=1' . DIRECTORY_SEPARATOR . $group1) + ); + + $this->assertXmlStringEqualsXmlString( + <<<'XML' + +3242 +XML, + \file_get_contents($path . DIRECTORY_SEPARATOR . 'group=2' . DIRECTORY_SEPARATOR . $group2) + ); + } +} diff --git a/src/adapter/etl-adapter-xml/tests/Flow/ETL/Adapter/XML/Tests/Integration/Loader/XMLWriterLoaderTest.php b/src/adapter/etl-adapter-xml/tests/Flow/ETL/Adapter/XML/Tests/Integration/Loader/XMLWriterLoaderTest.php new file mode 100644 index 000000000..37b2020b2 --- /dev/null +++ b/src/adapter/etl-adapter-xml/tests/Flow/ETL/Adapter/XML/Tests/Integration/Loader/XMLWriterLoaderTest.php @@ -0,0 +1,367 @@ +read(from_array([ + ['id' => 1, 'partition' => 'a'], + ['id' => 2, 'partition' => 'a'], + ['id' => 3, 'partition' => 'a'], + ['id' => 4, 'partition' => 'b'], + ['id' => 5, 'partition' => 'b'], + ])) + ->partitionBy(ref('partition')) + ->mode(exception_if_exists()) + ->write(new XMLWriterLoader(Path::realpath($path))) + ->run(); + + $this->expectExceptionMessage('Destination path "file:/' . $path . '/partition=b" already exists, please change path to different or set different SaveMode'); + + df() + ->read(from_array([ + ['id' => 8, 'partition' => 'b'], + ['id' => 10, 'partition' => 'b'], + ])) + ->partitionBy(ref('partition')) + ->mode(SaveMode::ExceptionIfExists) + ->write(new XMLWriterLoader(Path::realpath($path))) + ->run(); + } + + public function test_save_unsupported_entry_types() : void + { + $path = \sys_get_temp_dir() . '/' . \uniqid('flow_php_etl_xml_loader_exception_mode', true); + + if (\file_exists($path)) { + \unlink($path); + } + + $this->expectExceptionMessage('Entry of type Flow\ETL\Row\Entry\ListEntry cannot be normalized to XML values.'); + + df() + ->read(from_array([ + ['id' => 1, 'list' => ['a', 'b']], + ['id' => 2, 'list' => ['c', 'd']], + ])) + ->mode(exception_if_exists()) + ->write(new XMLWriterLoader(Path::realpath($path))) + ->run(); + } + + public function test_save_with_ignore_mode() : void + { + $path = \sys_get_temp_dir() . '/' . \uniqid('flow_php_etl_xml_loader_ignore_mode', true) . '.xml'; + + if (\file_exists($path)) { + \unlink($path); + } + + df() + ->read(from_array([ + ['id' => 1], + ['id' => 2], + ['id' => 3], + ])) + ->saveMode(ignore()) + ->write(new XMLWriterLoader(Path::realpath($path))) + ->run(); + + df() + ->read(from_array([ + ['id' => 4], + ['id' => 5], + ['id' => 6], + ])) + ->mode(SaveMode::Ignore) + ->write(new XMLWriterLoader(Path::realpath($path))) + ->run(); + + $this->assertEquals( + new Rows(Row::create(xml_entry('node', \file_get_contents($path)))), + df() + ->read(from_xml($path, 'rows')) + ->sortBy(ref('id')->asc()) + ->fetch() + ); + } + + public function test_xml_loader() : void + { + $stream = \sys_get_temp_dir() . '/' . \uniqid('flow_php_etl_xml_loader', true) . '.xml'; + + (new Flow()) + ->process( + new Rows( + ...\array_map( + fn (int $i) : Row => Row::create( + new Row\Entry\IntegerEntry('id', $i), + new Row\Entry\StringEntry('name', 'name_' . $i), + new Row\Entry\StringEntry('special', 'one, two & three') + ), + \range(0, 10) + ) + ) + ) + ->sortBy(ref('id')->asc()) + ->write(new XMLWriterLoader(Path::realpath($stream))) + ->run(); + + $this->assertXmlStringEqualsXmlString( + <<<'XML' + + + + 0 + name_0 + one, two & three + + + 1 + name_1 + one, two & three + + + 2 + name_2 + one, two & three + + + 3 + name_3 + one, two & three + + + 4 + name_4 + one, two & three + + + 5 + name_5 + one, two & three + + + 6 + name_6 + one, two & three + + + 7 + name_7 + one, two & three + + + 8 + name_8 + one, two & three + + + 9 + name_9 + one, two & three + + + 10 + name_10 + one, two & three + + +XML, + \file_get_contents($stream) + ); + + if (\file_exists($stream)) { + \unlink($stream); + } + } + + public function test_xml_loader_loading_empty_string() : void + { + $stream = \sys_get_temp_dir() . '/' . \uniqid('flow_php_etl_xml_loader', true) . '.xml'; + + $loader = new XMLWriterLoader(Path::realpath($stream)); + $loader->load(new Rows(), $context = new FlowContext(Config::default())); + $loader->closure($context); + + $this->assertXmlStringEqualsXmlString( + <<<'XML' + + +XML, + \file_get_contents($stream) + ); + + if (\file_exists($stream)) { + \unlink($stream); + } + } + + public function test_xml_loader_with_a_thread_safe_and_overwrite() : void + { + $stream = \sys_get_temp_dir() . '/' . \uniqid('flow_php_etl_xml_loader', true) . '.xml'; + + $loader = new XMLWriterLoader(Path::realpath($stream)); + $loader->load( + new Rows( + ...\array_map( + fn (int $i) : Row => Row::create( + new Row\Entry\IntegerEntry('id', $i), + new Row\Entry\StringEntry('name', 'name_' . $i) + ), + \range(0, 5) + ) + ), + ($context = new FlowContext(Config::default()))->setAppendSafe() + ); + + $loader->load( + new Rows( + ...\array_map( + fn (int $i) : Row => Row::create( + new Row\Entry\IntegerEntry('id', $i), + new Row\Entry\StringEntry('name', 'name_' . $i) + ), + \range(6, 10) + ) + ), + $context = $context->setAppendSafe() + ); + + $loader->closure($context); + + $files = \array_values(\array_diff(\scandir($stream), ['..', '.'])); + + $this->assertXmlStringEqualsXmlString( + <<<'XML' + + + + 0 + name_0 + + + 1 + name_1 + + + 2 + name_2 + + + 3 + name_3 + + + 4 + name_4 + + + 5 + name_5 + + + 6 + name_6 + + + 7 + name_7 + + + 8 + name_8 + + + 9 + name_9 + + + 10 + name_10 + + +XML, + \file_get_contents($stream . DIRECTORY_SEPARATOR . $files[0]) + ); + + if (\file_exists($stream . DIRECTORY_SEPARATOR . $files[0])) { + \unlink($stream . DIRECTORY_SEPARATOR . $files[0]); + } + } + + public function test_xml_loader_with_partitioning() : void + { + $path = \sys_get_temp_dir() . '/' . \str_replace('.', '', \uniqid('partitioned_', true)); + + (new Flow()) + ->process( + new Rows( + Row::create(int_entry('id', 1), int_entry('group', 1)), + Row::create(int_entry('id', 2), int_entry('group', 1)), + Row::create(int_entry('id', 3), int_entry('group', 2)), + Row::create(int_entry('id', 4), int_entry('group', 2)), + ) + ) + ->partitionBy('group') + ->load(new XMLWriterLoader(Path::realpath($path), collectionName: 'items', collectionElementName: 'item')) + ->run(); + + $partitions = \array_values(\array_diff(\scandir($path), ['..', '.'])); + + $this->assertSame( + [ + 'group=1', + 'group=2', + ], + $partitions + ); + + $group1 = \array_values(\array_diff(\scandir($path . DIRECTORY_SEPARATOR . 'group=1'), ['..', '.']))[0]; + $group2 = \array_values(\array_diff(\scandir($path . DIRECTORY_SEPARATOR . 'group=2'), ['..', '.']))[0]; + + $this->assertXmlStringEqualsXmlString( + <<<'XML' + +1121 +XML, + \file_get_contents($path . DIRECTORY_SEPARATOR . 'group=1' . DIRECTORY_SEPARATOR . $group1) + ); + $this->assertXmlStringEqualsXmlString( + <<<'XML' + +3242 +XML, + \file_get_contents($path . DIRECTORY_SEPARATOR . 'group=2' . DIRECTORY_SEPARATOR . $group2) + ); + } +} diff --git a/src/core/etl/src/Flow/ETL/Row/Entry/XMLEntry.php b/src/core/etl/src/Flow/ETL/Row/Entry/XMLEntry.php index cbaed37b5..5289e98fc 100644 --- a/src/core/etl/src/Flow/ETL/Row/Entry/XMLEntry.php +++ b/src/core/etl/src/Flow/ETL/Row/Entry/XMLEntry.php @@ -43,15 +43,14 @@ public function __serialize() : array return [ 'name' => $this->name, /** @phpstan-ignore-next-line */ - 'value' => \base64_encode(\gzcompress($this->value->saveXML())), + 'value' => \base64_encode(\gzcompress($this->toString())), 'type' => $this->type, ]; } public function __toString() : string { - /** @phpstan-ignore-next-line */ - return $this->value->saveXML(); + return $this->toString(); } public function __unserialize(array $data) : void @@ -119,7 +118,7 @@ public function rename(string $name) : Entry public function toString() : string { /** @phpstan-ignore-next-line */ - return $this->value->saveXML(); + return $this->value->saveXML($this->value->documentElement); } public function type() : Type diff --git a/src/core/etl/tests/Flow/ETL/Tests/Integration/DataFrame/DisplayTest.php b/src/core/etl/tests/Flow/ETL/Tests/Integration/DataFrame/DisplayTest.php index ed1d2c6a4..45531e0df 100644 --- a/src/core/etl/tests/Flow/ETL/Tests/Integration/DataFrame/DisplayTest.php +++ b/src/core/etl/tests/Flow/ETL/Tests/Integration/DataFrame/DisplayTest.php @@ -94,11 +94,11 @@ enum_entry('enum', BackedStringEnum::three), +------+--------+-----+---------+----------------------+-------+----------------------+---------+-------------------+----------------------+----------------------+-------+----------------------+ | id | price | 100 | deleted | created-at | phase | array | list | map | items | object | enum | xml | +------+--------+-----+---------+----------------------+-------+----------------------+---------+-------------------+----------------------+----------------------+-------+----------------------+ -| 1234 | 123.45 | 100 | false | 2020-07-13T15:00:00+ | null | [{"id":1,"status":"N | [1,2,3] | ["NEW","PENDING"] | {"item-id":"1","name | ArrayIterator Object | three | | +| 1234 | 123.45 | 100 | false | 2020-07-13T15:00:00+ | null | [{"id":1,"status":"N | [1,2,3] | ["NEW","PENDING"] | {"item-id":"1","name | ArrayIterator Object | three | | +| 1234 | 123.45 | 100 | false | 2020-07-13T15:00:00+ | null | [{"id":1,"status":"N | [1,2,3] | ["NEW","PENDING"] | {"item-id":"1","name | ArrayIterator Object | three | | +| 1234 | 123.45 | 100 | false | 2020-07-13T15:00:00+ | null | [{"id":1,"status":"N | [1,2,3] | ["NEW","PENDING"] | {"item-id":"1","name | ArrayIterator Object | three | | +| 1234 | 123.45 | 100 | false | 2020-07-13T15:00:00+ | null | [{"id":1,"status":"N | [1,2,3] | ["NEW","PENDING"] | {"item-id":"1","name | ArrayIterator Object | three | | +------+--------+-----+---------+----------------------+-------+----------------------+---------+-------------------+----------------------+----------------------+-------+----------------------+ 5 rows diff --git a/src/core/etl/tests/Flow/ETL/Tests/Unit/Row/Entry/XMLEntryTest.php b/src/core/etl/tests/Flow/ETL/Tests/Unit/Row/Entry/XMLEntryTest.php index ec1392f1c..ea9e2bd07 100644 --- a/src/core/etl/tests/Flow/ETL/Tests/Unit/Row/Entry/XMLEntryTest.php +++ b/src/core/etl/tests/Flow/ETL/Tests/Unit/Row/Entry/XMLEntryTest.php @@ -113,7 +113,7 @@ public function test_creating_entry_from_valid_xml_string() : void $entry = new XMLEntry('name', '123'); $this->assertSame('name', $entry->name()); - $this->assertSame("\n123\n", $entry->__toString()); + $this->assertSame('123', $entry->__toString()); } public function test_creating_xml_entry_with_empty_dom_document() : void