From 5eb5fc85c06d10fda0ce472274b37946cb5a9f8e Mon Sep 17 00:00:00 2001 From: Norbert Orzechowicz <1921950+norberttech@users.noreply.github.com> Date: Wed, 21 Feb 2024 14:33:10 +0100 Subject: [PATCH] Allow to group by uuid/datetime objects without casting them to scalar first (#998) --- src/core/etl/src/Flow/ETL/GroupBy.php | 38 ++++++++++++------- .../Integration/DataFrame/GroupByTest.php | 24 ++++++++++++ .../tests/Flow/ETL/Tests/Unit/GroupByTest.php | 15 -------- 3 files changed, 48 insertions(+), 29 deletions(-) diff --git a/src/core/etl/src/Flow/ETL/GroupBy.php b/src/core/etl/src/Flow/ETL/GroupBy.php index cc65430fc..662f95b33 100644 --- a/src/core/etl/src/Flow/ETL/GroupBy.php +++ b/src/core/etl/src/Flow/ETL/GroupBy.php @@ -8,6 +8,7 @@ use Flow\ETL\Exception\InvalidArgumentException; use Flow\ETL\Exception\RuntimeException; use Flow\ETL\Function\AggregatingFunction; +use Flow\ETL\Row\Entry; use Flow\ETL\Row\Reference; use Flow\ETL\Row\References; @@ -59,13 +60,7 @@ public function group(Rows $rows) : void if ($this->pivot) { foreach ($rows as $row) { try { - $pivotValue = $row->valueOf($this->pivot); - - if (!\is_scalar($pivotValue) && null !== $pivotValue) { - throw new RuntimeException('Pivoting by non scalar values is not supported, given: ' . \gettype($pivotValue)); - } - - $this->pivotColumns[] = $pivotValue; + $this->pivotColumns[] = $this->toScalar($row->get($this->pivot)); } catch (InvalidArgumentException) { $this->pivotColumns[] = null; } @@ -99,13 +94,7 @@ public function group(Rows $rows) : void foreach ($this->refs as $ref) { try { - $value = $row->valueOf($ref); - - if (!\is_scalar($value) && null !== $value) { - throw new RuntimeException('Grouping by non scalar values is not supported, given: ' . \gettype($value)); - } - - $values[$ref->name()] = $value; + $values[$ref->name()] = $this->toScalar($row->get($ref)); } catch (InvalidArgumentException) { $values[$ref->name()] = null; } @@ -204,4 +193,25 @@ private function hash(array $values) : string return \hash('xxh128', \implode('', $stringValues)); } + + private function toScalar(Entry $entry) : int|string|float|null + { + if ($entry->value() === null) { + return null; + } + + if (\is_bool($entry->value())) { + return $entry->toString(); + } + + if (\is_scalar($entry->value())) { + return $entry->value(); + } + + return match ($entry::class) { + Entry\UuidEntry::class => $entry->value()->toString(), + Entry\DateTimeEntry::class => $entry->value()->format(\DateTimeImmutable::ATOM), + default => $entry->toString() + }; + } } diff --git a/src/core/etl/tests/Flow/ETL/Tests/Integration/DataFrame/GroupByTest.php b/src/core/etl/tests/Flow/ETL/Tests/Integration/DataFrame/GroupByTest.php index 86cf9baa0..0af343853 100644 --- a/src/core/etl/tests/Flow/ETL/Tests/Integration/DataFrame/GroupByTest.php +++ b/src/core/etl/tests/Flow/ETL/Tests/Integration/DataFrame/GroupByTest.php @@ -11,6 +11,7 @@ use function Flow\ETL\DSL\from_memory; use function Flow\ETL\DSL\from_rows; use function Flow\ETL\DSL\int_entry; +use function Flow\ETL\DSL\integer_entry; use function Flow\ETL\DSL\lit; use function Flow\ETL\DSL\max; use function Flow\ETL\DSL\null_entry; @@ -24,6 +25,7 @@ use Flow\ETL\Row; use Flow\ETL\Rows; use Flow\ETL\Tests\Integration\IntegrationTestCase; +use Ramsey\Uuid\Uuid; final class GroupByTest extends IntegrationTestCase { @@ -216,6 +218,28 @@ public function test_group_by_twice() : void ); } + public function test_group_by_uuid() : void + { + $rows = df() + ->read(from_array([ + ['id' => Uuid::uuid4()->toString()], + ['id' => Uuid::uuid4()->toString()], + ['id' => Uuid::uuid4()->toString()], + ['id' => Uuid::uuid4()->toString()], + ['id' => Uuid::uuid4()->toString()], + ['id' => Uuid::uuid4()->toString()], + ['id' => Uuid::uuid4()->toString()], + ['id' => Uuid::uuid4()->toString()], + ])) + ->aggregate(count(ref('id'))) + ->fetch(); + + $this->assertEquals( + new Rows(Row::create(integer_entry('id_count', 8))), + $rows + ); + } + public function test_pivot() : void { $dataset1 = [ diff --git a/src/core/etl/tests/Flow/ETL/Tests/Unit/GroupByTest.php b/src/core/etl/tests/Flow/ETL/Tests/Unit/GroupByTest.php index a800b0700..7ac6b575f 100644 --- a/src/core/etl/tests/Flow/ETL/Tests/Unit/GroupByTest.php +++ b/src/core/etl/tests/Flow/ETL/Tests/Unit/GroupByTest.php @@ -4,7 +4,6 @@ namespace Flow\ETL\Tests\Unit; -use function Flow\ETL\DSL\array_entry; use function Flow\ETL\DSL\int_entry; use function Flow\ETL\DSL\null_entry; use function Flow\ETL\DSL\ref; @@ -21,20 +20,6 @@ final class GroupByTest extends TestCase { - public function test_group_by_array_entry() : void - { - $this->expectExceptionMessage('Grouping by non scalar values is not supported, given: array'); - $this->expectException(RuntimeException::class); - - $groupBy = new GroupBy('array'); - - $groupBy->group(new Rows( - Row::create(array_entry('array', [1, 2, 3])), - Row::create(array_entry('array', [1, 2, 3])), - Row::create(array_entry('array', [4, 5, 6])) - )); - } - public function test_group_by_missing_entry() : void { $groupBy = new GroupBy('type');