From ebc64b8749b30ee2c54c29db02838e28751d7fc6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Mleczko?= Date: Sat, 20 Jul 2024 07:46:39 +0200 Subject: [PATCH 1/6] Use generic hash algorithm --- .../EntryIdFactory/HashIdFactory.php | 16 ++++++---------- .../Unit/EntryIdFactory/HashIdFactoryTest.php | 12 ++---------- src/core/etl/src/Flow/ETL/DSL/functions.php | 5 +++-- src/core/etl/src/Flow/ETL/Function/Hash.php | 13 +++++-------- .../Flow/ETL/Function/ScalarFunctionChain.php | 5 +++-- src/core/etl/src/Flow/ETL/GroupBy.php | 3 ++- .../Flow/ETL/Pipeline/PartitioningPipeline.php | 7 +++++-- src/core/etl/src/Flow/ETL/Row.php | 9 ++++----- src/core/etl/src/Flow/ETL/Rows.php | 13 ++++++------- .../Transformer/DropDuplicatesTransformer.php | 7 +++++-- .../ETL/Tests/Integration/Function/HashTest.php | 3 ++- .../Flow/ETL/Tests/Unit/Function/HashTest.php | 5 +++-- 12 files changed, 46 insertions(+), 52 deletions(-) diff --git a/src/adapter/etl-adapter-elasticsearch/src/Flow/ETL/Adapter/Elasticsearch/EntryIdFactory/HashIdFactory.php b/src/adapter/etl-adapter-elasticsearch/src/Flow/ETL/Adapter/Elasticsearch/EntryIdFactory/HashIdFactory.php index 4742b31b5..926bd43b8 100644 --- a/src/adapter/etl-adapter-elasticsearch/src/Flow/ETL/Adapter/Elasticsearch/EntryIdFactory/HashIdFactory.php +++ b/src/adapter/etl-adapter-elasticsearch/src/Flow/ETL/Adapter/Elasticsearch/EntryIdFactory/HashIdFactory.php @@ -5,7 +5,7 @@ namespace Flow\ETL\Adapter\Elasticsearch\EntryIdFactory; use Flow\ETL\Adapter\Elasticsearch\IdFactory; -use Flow\ETL\Exception\InvalidArgumentException; +use Flow\ETL\Hash\{Algorithm, NativePHPHash}; use Flow\ETL\Row; use Flow\ETL\Row\Entry; @@ -16,32 +16,28 @@ final class HashIdFactory implements IdFactory */ private array $entryNames; - private string $hashName = 'xxh128'; + private Algorithm $hashAlgorithm; public function __construct(string ...$entryNames) { $this->entryNames = $entryNames; + $this->hashAlgorithm = new NativePHPHash(); } public function create(Row $row) : Entry { return new Entry\StringEntry( 'id', - \hash( - $this->hashName, + $this->hashAlgorithm->hash( \implode(':', \array_map(fn (string $name) : string => (string) $row->valueOf($name), $this->entryNames)) ) ); } - public function withAlgorithm(string $hashName) : self + public function withAlgorithm(Algorithm $algorithm) : self { - if (!\in_array($hashName, \hash_algos(), true)) { - throw InvalidArgumentException::because('Unsupported hash algorithm name provided: ' . $hashName . ', did you mean: ' . \implode(', ', \hash_algos())); - } - $factory = new self(...$this->entryNames); - $factory->hashName = $hashName; + $factory->hashAlgorithm = $algorithm; return $factory; } diff --git a/src/adapter/etl-adapter-elasticsearch/tests/Flow/ETL/Adapter/Elasticsearch/Tests/Unit/EntryIdFactory/HashIdFactoryTest.php b/src/adapter/etl-adapter-elasticsearch/tests/Flow/ETL/Adapter/Elasticsearch/Tests/Unit/EntryIdFactory/HashIdFactoryTest.php index 4b7025620..59b56e173 100644 --- a/src/adapter/etl-adapter-elasticsearch/tests/Flow/ETL/Adapter/Elasticsearch/Tests/Unit/EntryIdFactory/HashIdFactoryTest.php +++ b/src/adapter/etl-adapter-elasticsearch/tests/Flow/ETL/Adapter/Elasticsearch/Tests/Unit/EntryIdFactory/HashIdFactoryTest.php @@ -6,7 +6,7 @@ use function Flow\ETL\DSL\str_entry; use Flow\ETL\Adapter\Elasticsearch\EntryIdFactory\HashIdFactory; -use Flow\ETL\Exception\InvalidArgumentException; +use Flow\ETL\Hash\NativePHPHash; use Flow\ETL\Row; use PHPUnit\Framework\TestCase; @@ -29,7 +29,7 @@ public function test_create_row() : void public function test_create_row_with_different_hash() : void { - $factory = (new HashIdFactory('first_name', 'last_name'))->withAlgorithm('sha1'); + $factory = (new HashIdFactory('first_name', 'last_name'))->withAlgorithm(new NativePHPHash('sha1')); self::assertEquals( new Row\Entry\StringEntry( @@ -41,12 +41,4 @@ public function test_create_row_with_different_hash() : void ) ); } - - public function test_invalid_hash_algorithm_name() : void - { - $this->expectException(InvalidArgumentException::class); - $this->expectExceptionMessage('Unsupported hash algorithm name provided: whatever, did you mean: '); - - (new HashIdFactory('first_name'))->withAlgorithm('whatever'); - } } diff --git a/src/core/etl/src/Flow/ETL/DSL/functions.php b/src/core/etl/src/Flow/ETL/DSL/functions.php index 7d0402a15..005306453 100644 --- a/src/core/etl/src/Flow/ETL/DSL/functions.php +++ b/src/core/etl/src/Flow/ETL/DSL/functions.php @@ -42,6 +42,7 @@ Flow, FlowContext, Formatter, + Hash\Algorithm, Join\Comparison, Join\Comparison\Equal, Join\Comparison\Identical, @@ -593,9 +594,9 @@ function concat(ScalarFunction ...$functions) : Concat return new Concat(...$functions); } -function hash(ScalarFunction $function, string $algorithm = 'xxh128', bool $binary = false, array $options = []) : Hash +function hash(ScalarFunction $function, Algorithm $algorithm) : Hash { - return new Hash($function, $algorithm, $binary, $options); + return new Hash($function, $algorithm); } function cast(ScalarFunction $function, string|Type $type) : Cast diff --git a/src/core/etl/src/Flow/ETL/Function/Hash.php b/src/core/etl/src/Flow/ETL/Function/Hash.php index 96aa58395..2d43b0bf1 100644 --- a/src/core/etl/src/Flow/ETL/Function/Hash.php +++ b/src/core/etl/src/Flow/ETL/Function/Hash.php @@ -4,19 +4,16 @@ namespace Flow\ETL\Function; +use Flow\ETL\Hash\Algorithm; use Flow\ETL\Row; final class Hash extends ScalarFunctionChain { public function __construct( private readonly ScalarFunction $ref, - private readonly string $algorithm = 'xxh128', - private readonly bool $binary = false, - private readonly array $options = [] + private readonly Algorithm $algorithm, ) { - if (!\in_array($this->algorithm, \hash_algos(), true)) { - throw new \InvalidArgumentException(\sprintf('Hash algorithm "%s" is not supported', $this->algorithm)); - } + } public function eval(Row $row) : ?string @@ -27,8 +24,8 @@ public function eval(Row $row) : ?string return match ($value) { null => null, default => match (\gettype($value)) { - 'array', 'object' => \hash($this->algorithm, \serialize($value), $this->binary, $this->options), - default => \hash($this->algorithm, (string) $value, $this->binary, $this->options), + 'array', 'object' => $this->algorithm->hash(\serialize($value)), + default => $this->algorithm->hash((string) $value), } }; } diff --git a/src/core/etl/src/Flow/ETL/Function/ScalarFunctionChain.php b/src/core/etl/src/Flow/ETL/Function/ScalarFunctionChain.php index 7b50fe651..78145b145 100644 --- a/src/core/etl/src/Flow/ETL/Function/ScalarFunctionChain.php +++ b/src/core/etl/src/Flow/ETL/Function/ScalarFunctionChain.php @@ -10,6 +10,7 @@ use Flow\ETL\Function\ArrayExpand\ArrayExpand; use Flow\ETL\Function\ArraySort\Sort; use Flow\ETL\Function\Between\Boundary; +use Flow\ETL\Hash\{Algorithm, NativePHPHash}; use Flow\ETL\PHP\Type\Type; use Flow\ETL\Row\Entry; @@ -150,9 +151,9 @@ public function greaterThanEqual(ScalarFunction $ref) : self return new GreaterThanEqual($this, $ref); } - public function hash(string $algorithm = 'xxh128', bool $binary = false, array $options = []) : self + public function hash(?Algorithm $algorithm = null) : self { - return new Hash($this, $algorithm, $binary, $options); + return new Hash($this, $algorithm ?? new NativePHPHash()); } public function isEven() : self diff --git a/src/core/etl/src/Flow/ETL/GroupBy.php b/src/core/etl/src/Flow/ETL/GroupBy.php index e1be73f66..b322b49d0 100644 --- a/src/core/etl/src/Flow/ETL/GroupBy.php +++ b/src/core/etl/src/Flow/ETL/GroupBy.php @@ -7,6 +7,7 @@ use function Flow\ETL\DSL\array_to_rows; use Flow\ETL\Exception\{InvalidArgumentException, RuntimeException}; use Flow\ETL\Function\AggregatingFunction; +use Flow\ETL\Hash\NativePHPHash; use Flow\ETL\Row\{Reference, References}; final class GroupBy @@ -198,6 +199,6 @@ private function hash(array $values) : string } } - return \hash('xxh128', \implode('', $stringValues)); + return (new NativePHPHash('xxh128'))->hash(\implode('', $stringValues)); } } diff --git a/src/core/etl/src/Flow/ETL/Pipeline/PartitioningPipeline.php b/src/core/etl/src/Flow/ETL/Pipeline/PartitioningPipeline.php index 0628ddf88..81533e90a 100644 --- a/src/core/etl/src/Flow/ETL/Pipeline/PartitioningPipeline.php +++ b/src/core/etl/src/Flow/ETL/Pipeline/PartitioningPipeline.php @@ -8,11 +8,13 @@ use Flow\ETL\Exception\InvalidArgumentException; use Flow\ETL\Extractor\CollectingExtractor; use Flow\ETL\Row\Reference; -use Flow\ETL\{Extractor, FlowContext, Loader, Pipeline, Transformer}; +use Flow\ETL\{Extractor, FlowContext, Hash\Algorithm, Hash\NativePHPHash, Loader, Pipeline, Transformer}; use Flow\Filesystem\Partition; final class PartitioningPipeline implements Pipeline { + private readonly Algorithm $hashAlgorithm; + /** * @param Pipeline $pipeline * @param array $partitionBy @@ -28,6 +30,7 @@ public function __construct( if (!\count($this->partitionBy)) { throw new InvalidArgumentException('PartitioningPipeline requires at least one partitionBy entry'); } + $this->hashAlgorithm = new NativePHPHash(); } public function add(Loader|Transformer $pipe) : Pipeline @@ -56,7 +59,7 @@ public function process(FlowContext $context) : \Generator $rows = $partitionedRows->sortBy(...$this->orderBy); - $partitionId = \hash('xxh128', $context->config->id() . '_' . \implode('_', \array_map( + $partitionId = $this->hashAlgorithm->hash($context->config->id() . '_' . \implode('_', \array_map( static fn (Partition $partition) : string => $partition->id(), $partitionedRows->partitions()->toArray() ))); diff --git a/src/core/etl/src/Flow/ETL/Row.php b/src/core/etl/src/Flow/ETL/Row.php index fbbd1aaf1..5102d7e0a 100644 --- a/src/core/etl/src/Flow/ETL/Row.php +++ b/src/core/etl/src/Flow/ETL/Row.php @@ -5,6 +5,7 @@ namespace Flow\ETL; use Flow\ETL\Exception\InvalidArgumentException; +use Flow\ETL\Hash\{Algorithm, NativePHPHash}; use Flow\ETL\Row\{Entries, Entry, Reference, References, Schema}; final class Row @@ -54,11 +55,9 @@ public function has(string|Reference $ref) : bool return $this->entries->has($ref); } - public function hash(string $algorithm = 'xxh128', bool $binary = false, array $options = []) : string + public function hash(?Algorithm $algorithm = null) : string { - if (!\in_array($algorithm, \hash_algos(), true)) { - throw new \InvalidArgumentException(\sprintf('Hashing algorithm "%s" is not supported', $algorithm)); - } + $algorithm = $algorithm ?? new NativePHPHash(); $string = ''; @@ -66,7 +65,7 @@ public function hash(string $algorithm = 'xxh128', bool $binary = false, array $ $string .= $entry->name() . $entry->toString(); } - return \hash($algorithm, $string, $binary, $options); + return $algorithm->hash($string); } public function isEqual(self $row) : bool diff --git a/src/core/etl/src/Flow/ETL/Rows.php b/src/core/etl/src/Flow/ETL/Rows.php index 5ff8fd892..371c71d48 100644 --- a/src/core/etl/src/Flow/ETL/Rows.php +++ b/src/core/etl/src/Flow/ETL/Rows.php @@ -6,6 +6,7 @@ use function Flow\ETL\DSL\{array_to_rows, row}; use Flow\ETL\Exception\{DuplicatedEntriesException, InvalidArgumentException, RuntimeException}; +use Flow\ETL\Hash\{Algorithm, NativePHPHash}; use Flow\ETL\Join\Expression; use Flow\ETL\Row\CartesianProduct; use Flow\ETL\Row\Comparator\NativeComparator; @@ -267,19 +268,17 @@ public function getIterator() : \Iterator return new \ArrayIterator($this->rows); } - public function hash(string $algorithm = 'xxh128', bool $binary = false, array $options = []) : string + public function hash(?Algorithm $algorithm = null) : string { - $hashes = []; + $algorithm = $algorithm ?? new NativePHPHash(); - if (!\in_array($algorithm, \hash_algos(), true)) { - throw new \InvalidArgumentException(\sprintf('Hashing algorithm "%s" is not supported', $algorithm)); - } + $hashes = []; foreach ($this->rows as $row) { - $hashes[] = $row->hash($algorithm, $binary, $options); + $hashes[] = $row->hash($algorithm); } - return \hash($algorithm, \implode('', $hashes), $binary, $options); + return $algorithm->hash(\implode('', $hashes)); } public function isPartitioned() : bool diff --git a/src/core/etl/src/Flow/ETL/Transformer/DropDuplicatesTransformer.php b/src/core/etl/src/Flow/ETL/Transformer/DropDuplicatesTransformer.php index d996efa92..cab2fda19 100644 --- a/src/core/etl/src/Flow/ETL/Transformer/DropDuplicatesTransformer.php +++ b/src/core/etl/src/Flow/ETL/Transformer/DropDuplicatesTransformer.php @@ -7,7 +7,7 @@ use Flow\ETL\Exception\InvalidArgumentException; use Flow\ETL\Row\Reference; use Flow\ETL\Transformer\DropDuplicates\Hashes; -use Flow\ETL\{FlowContext, Rows, Transformer}; +use Flow\ETL\{FlowContext, Hash\Algorithm, Hash\NativePHPHash, Rows, Transformer}; final class DropDuplicatesTransformer implements Transformer { @@ -18,6 +18,8 @@ final class DropDuplicatesTransformer implements Transformer */ private array $entries; + private Algorithm $hashAlgorithm; + public function __construct(string|Reference ...$entries) { if ([] === $entries) { @@ -26,6 +28,7 @@ public function __construct(string|Reference ...$entries) $this->entries = $entries; $this->deduplication = new Hashes(); + $this->hashAlgorithm = new NativePHPHash(); } public function transform(Rows $rows, FlowContext $context) : Rows @@ -43,7 +46,7 @@ public function transform(Rows $rows, FlowContext $context) : Rows } } - $hash = \hash('xxh128', \serialize($values)); + $hash = $this->hashAlgorithm->hash(\serialize($values)); if (!$this->deduplication->exists($hash)) { $newRows[] = $row; diff --git a/src/core/etl/tests/Flow/ETL/Tests/Integration/Function/HashTest.php b/src/core/etl/tests/Flow/ETL/Tests/Integration/Function/HashTest.php index b0c0c3904..d8ddb534a 100644 --- a/src/core/etl/tests/Flow/ETL/Tests/Integration/Function/HashTest.php +++ b/src/core/etl/tests/Flow/ETL/Tests/Integration/Function/HashTest.php @@ -6,6 +6,7 @@ use function Flow\ETL\DSL\{from_array, ref, to_memory}; use Flow\ETL\Flow; +use Flow\ETL\Hash\NativePHPHash; use Flow\ETL\Memory\ArrayMemory; use PHPUnit\Framework\Attributes\DataProvider; use PHPUnit\Framework\TestCase; @@ -52,7 +53,7 @@ public function test_hash_with_different_algorithm() : void ] ) ) - ->withEntry('hash', ref('key')->hash('sha512')) + ->withEntry('hash', ref('key')->hash(new NativePHPHash('sha512'))) ->write(to_memory($memory = new ArrayMemory())) ->run(); diff --git a/src/core/etl/tests/Flow/ETL/Tests/Unit/Function/HashTest.php b/src/core/etl/tests/Flow/ETL/Tests/Unit/Function/HashTest.php index ee47f5259..e8d6786a2 100644 --- a/src/core/etl/tests/Flow/ETL/Tests/Unit/Function/HashTest.php +++ b/src/core/etl/tests/Flow/ETL/Tests/Unit/Function/HashTest.php @@ -5,6 +5,7 @@ namespace Flow\ETL\Tests\Unit\Function; use function Flow\ETL\DSL\{array_entry, concat, datetime_entry, hash, lit, ref, str_entry}; +use Flow\ETL\Hash\NativePHPHash; use Flow\ETL\Row; use PHPUnit\Framework\TestCase; @@ -21,8 +22,8 @@ public function test_hashing_array_value() : void public function test_hashing_concat() : void { self::assertSame( - \hash('xxh128', 'test_test'), - hash(concat(ref('value'), lit('_'), ref('value')))->eval(Row::create(str_entry('value', 'test'))) + (new NativePHPHash('xxh128'))->hash('test_test'), + hash(concat(ref('value'), lit('_'), ref('value')), new NativePHPHash('xxh128'))->eval(Row::create(str_entry('value', 'test'))) ); } From a4b64e5442bd40d688cafc98887ba071723423fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Mleczko?= Date: Sat, 20 Jul 2024 14:04:59 +0200 Subject: [PATCH 2/6] Fix nullability using php 8.1 syntax --- src/core/etl/src/Flow/ETL/Function/ScalarFunctionChain.php | 4 ++-- src/core/etl/src/Flow/ETL/Row.php | 4 +--- src/core/etl/src/Flow/ETL/Rows.php | 4 +--- 3 files changed, 4 insertions(+), 8 deletions(-) diff --git a/src/core/etl/src/Flow/ETL/Function/ScalarFunctionChain.php b/src/core/etl/src/Flow/ETL/Function/ScalarFunctionChain.php index 78145b145..df8c7ad0d 100644 --- a/src/core/etl/src/Flow/ETL/Function/ScalarFunctionChain.php +++ b/src/core/etl/src/Flow/ETL/Function/ScalarFunctionChain.php @@ -151,9 +151,9 @@ public function greaterThanEqual(ScalarFunction $ref) : self return new GreaterThanEqual($this, $ref); } - public function hash(?Algorithm $algorithm = null) : self + public function hash(Algorithm $algorithm = new NativePHPHash()) : self { - return new Hash($this, $algorithm ?? new NativePHPHash()); + return new Hash($this, $algorithm); } public function isEven() : self diff --git a/src/core/etl/src/Flow/ETL/Row.php b/src/core/etl/src/Flow/ETL/Row.php index 5102d7e0a..697f7f542 100644 --- a/src/core/etl/src/Flow/ETL/Row.php +++ b/src/core/etl/src/Flow/ETL/Row.php @@ -55,10 +55,8 @@ public function has(string|Reference $ref) : bool return $this->entries->has($ref); } - public function hash(?Algorithm $algorithm = null) : string + public function hash(Algorithm $algorithm = new NativePHPHash()) : string { - $algorithm = $algorithm ?? new NativePHPHash(); - $string = ''; foreach ($this->entries->sort()->all() as $entry) { diff --git a/src/core/etl/src/Flow/ETL/Rows.php b/src/core/etl/src/Flow/ETL/Rows.php index 371c71d48..2fd5de2b6 100644 --- a/src/core/etl/src/Flow/ETL/Rows.php +++ b/src/core/etl/src/Flow/ETL/Rows.php @@ -268,10 +268,8 @@ public function getIterator() : \Iterator return new \ArrayIterator($this->rows); } - public function hash(?Algorithm $algorithm = null) : string + public function hash(Algorithm $algorithm = new NativePHPHash()) : string { - $algorithm = $algorithm ?? new NativePHPHash(); - $hashes = []; foreach ($this->rows as $row) { From 64ede42c87b7928f402122d0d428f9d063049355 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Mleczko?= Date: Sat, 20 Jul 2024 14:24:16 +0200 Subject: [PATCH 3/6] Add static helper function for hashing --- src/core/etl/src/Flow/ETL/Function/Hash.php | 5 ++- src/core/etl/src/Flow/ETL/GroupBy.php | 2 +- .../etl/src/Flow/ETL/Hash/NativePHPHash.php | 5 +++ .../Flow/ETL/Tests/Unit/Function/HashTest.php | 2 +- .../ETL/Tests/Unit/Hash/NativePHPHashTest.php | 36 +++++++++++++++++++ 5 files changed, 45 insertions(+), 5 deletions(-) create mode 100644 src/core/etl/tests/Flow/ETL/Tests/Unit/Hash/NativePHPHashTest.php diff --git a/src/core/etl/src/Flow/ETL/Function/Hash.php b/src/core/etl/src/Flow/ETL/Function/Hash.php index 2d43b0bf1..530d0fb1f 100644 --- a/src/core/etl/src/Flow/ETL/Function/Hash.php +++ b/src/core/etl/src/Flow/ETL/Function/Hash.php @@ -4,16 +4,15 @@ namespace Flow\ETL\Function; -use Flow\ETL\Hash\Algorithm; +use Flow\ETL\Hash\{Algorithm, NativePHPHash}; use Flow\ETL\Row; final class Hash extends ScalarFunctionChain { public function __construct( private readonly ScalarFunction $ref, - private readonly Algorithm $algorithm, + private readonly Algorithm $algorithm = new NativePHPHash(), ) { - } public function eval(Row $row) : ?string diff --git a/src/core/etl/src/Flow/ETL/GroupBy.php b/src/core/etl/src/Flow/ETL/GroupBy.php index b322b49d0..1251c532d 100644 --- a/src/core/etl/src/Flow/ETL/GroupBy.php +++ b/src/core/etl/src/Flow/ETL/GroupBy.php @@ -199,6 +199,6 @@ private function hash(array $values) : string } } - return (new NativePHPHash('xxh128'))->hash(\implode('', $stringValues)); + return NativePHPHash::xxh128(\implode('', $stringValues)); } } diff --git a/src/core/etl/src/Flow/ETL/Hash/NativePHPHash.php b/src/core/etl/src/Flow/ETL/Hash/NativePHPHash.php index 7e7ee2947..646368912 100644 --- a/src/core/etl/src/Flow/ETL/Hash/NativePHPHash.php +++ b/src/core/etl/src/Flow/ETL/Hash/NativePHPHash.php @@ -13,6 +13,11 @@ public function __construct(private string $algorithm = 'xxh128', private bool $ } } + public static function xxh128(string $string) : string + { + return (new self('xxh128'))->hash($string); + } + public function hash(string $value) : string { return \hash($this->algorithm, $value, $this->binary, $this->options); diff --git a/src/core/etl/tests/Flow/ETL/Tests/Unit/Function/HashTest.php b/src/core/etl/tests/Flow/ETL/Tests/Unit/Function/HashTest.php index e8d6786a2..44ac9a4d1 100644 --- a/src/core/etl/tests/Flow/ETL/Tests/Unit/Function/HashTest.php +++ b/src/core/etl/tests/Flow/ETL/Tests/Unit/Function/HashTest.php @@ -22,7 +22,7 @@ public function test_hashing_array_value() : void public function test_hashing_concat() : void { self::assertSame( - (new NativePHPHash('xxh128'))->hash('test_test'), + NativePHPHash::xxh128('test_test'), hash(concat(ref('value'), lit('_'), ref('value')), new NativePHPHash('xxh128'))->eval(Row::create(str_entry('value', 'test'))) ); } diff --git a/src/core/etl/tests/Flow/ETL/Tests/Unit/Hash/NativePHPHashTest.php b/src/core/etl/tests/Flow/ETL/Tests/Unit/Hash/NativePHPHashTest.php new file mode 100644 index 000000000..fa9bd11e8 --- /dev/null +++ b/src/core/etl/tests/Flow/ETL/Tests/Unit/Hash/NativePHPHashTest.php @@ -0,0 +1,36 @@ +hash('test') + ); + + } +} From 1c99c1fc186b9a8d302ce58fabcb3f401c35637e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Mleczko?= Date: Sat, 20 Jul 2024 14:36:14 +0200 Subject: [PATCH 4/6] Fix BC breaking change --- src/core/etl/src/Flow/ETL/DSL/functions.php | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/core/etl/src/Flow/ETL/DSL/functions.php b/src/core/etl/src/Flow/ETL/DSL/functions.php index 005306453..3d084942c 100644 --- a/src/core/etl/src/Flow/ETL/DSL/functions.php +++ b/src/core/etl/src/Flow/ETL/DSL/functions.php @@ -43,6 +43,7 @@ FlowContext, Formatter, Hash\Algorithm, + Hash\NativePHPHash, Join\Comparison, Join\Comparison\Equal, Join\Comparison\Identical, @@ -594,7 +595,7 @@ function concat(ScalarFunction ...$functions) : Concat return new Concat(...$functions); } -function hash(ScalarFunction $function, Algorithm $algorithm) : Hash +function hash(ScalarFunction $function, Algorithm $algorithm = new NativePHPHash()) : Hash { return new Hash($function, $algorithm); } From 1c90abc17b453142034761e51b85b266d6d9a074 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Mleczko?= Date: Sat, 20 Jul 2024 14:55:22 +0200 Subject: [PATCH 5/6] Code style improvement --- src/core/etl/src/Flow/ETL/Rows.php | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/core/etl/src/Flow/ETL/Rows.php b/src/core/etl/src/Flow/ETL/Rows.php index 2fd5de2b6..a5babc232 100644 --- a/src/core/etl/src/Flow/ETL/Rows.php +++ b/src/core/etl/src/Flow/ETL/Rows.php @@ -270,13 +270,13 @@ public function getIterator() : \Iterator public function hash(Algorithm $algorithm = new NativePHPHash()) : string { - $hashes = []; + $hash = ''; foreach ($this->rows as $row) { - $hashes[] = $row->hash($algorithm); + $hash .= $row->hash($algorithm); } - return $algorithm->hash(\implode('', $hashes)); + return $algorithm->hash($hash); } public function isPartitioned() : bool From b57dfc2753c6c66081f1153b6764812fb939fa46 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Mleczko?= Date: Sat, 20 Jul 2024 14:55:39 +0200 Subject: [PATCH 6/6] Fix duplication --- .../etl/tests/Flow/ETL/Tests/Unit/Hash/NativePHPHashTest.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/etl/tests/Flow/ETL/Tests/Unit/Hash/NativePHPHashTest.php b/src/core/etl/tests/Flow/ETL/Tests/Unit/Hash/NativePHPHashTest.php index fa9bd11e8..aa1bf3a24 100644 --- a/src/core/etl/tests/Flow/ETL/Tests/Unit/Hash/NativePHPHashTest.php +++ b/src/core/etl/tests/Flow/ETL/Tests/Unit/Hash/NativePHPHashTest.php @@ -21,7 +21,7 @@ public function test_hashing_string_using_xxh128_by_default() : void { static::assertSame( '6c78e0e3bd51d358d01e758642b85fb8', - NativePHPHash::xxh128('test'), + (new NativePHPHash())->hash('test'), ); }