From 42f03f7101f087a0fee68ea1335b3a7ed0f15221 Mon Sep 17 00:00:00 2001 From: Norbert Orzechowicz <1921950+norberttech@users.noreply.github.com> Date: Sun, 21 Jan 2024 22:59:00 +0100 Subject: [PATCH] Entry factory schema (#928) * Added type Caster * use Type in Scalar Cast function * Unified and reorganized casting/type detection logic * Allow to pass schema into CSV/Json extractors * Casting Lists/Maps/Structures * Microoptimization --- .../src/Flow/ETL/Adapter/CSV/CSVExtractor.php | 6 +- .../src/Flow/ETL/Adapter/CSV/functions.php | 6 +- .../Tests/Integration/CSVExtractorTest.php | 56 +++++++++ .../JSON/JSONMachine/JsonExtractor.php | 4 +- .../src/Flow/ETL/Adapter/JSON/functions.php | 4 + .../JSONMachine/JsonExtractorTest.php | 44 +++++++ src/core/etl/src/Flow/ETL/DSL/functions.php | 48 ++++++- src/core/etl/src/Flow/ETL/DataFrame.php | 4 +- .../Flow/ETL/Exception/CastingException.php | 19 +++ src/core/etl/src/Flow/ETL/Function/Cast.php | 117 ++++++------------ .../Flow/ETL/Function/ScalarFunctionChain.php | 3 +- .../ETL/PHP/Type/ArrayContentDetector.php | 16 +-- .../etl/src/Flow/ETL/PHP/Type/AutoCaster.php | 94 ++++++++++++++ src/core/etl/src/Flow/ETL/PHP/Type/Caster.php | 80 ++++++++++++ .../PHP/Type/Caster/ArrayCastingHandler.php | 44 +++++++ .../PHP/Type/Caster/BooleanCastingHandler.php | 42 +++++++ .../ETL/PHP/Type/Caster/CastingContext.php | 32 +++++ .../ETL/PHP/Type/Caster/CastingHandler.php | 15 +++ .../Type/Caster/DateTimeCastingHandler.php | 54 ++++++++ .../PHP/Type/Caster/EnumCastingHandler.php | 39 ++++++ .../PHP/Type/Caster/FloatCastingHandler.php | 43 +++++++ .../PHP/Type/Caster/IntegerCastingHandler.php | 43 +++++++ .../PHP/Type/Caster/JsonCastingHandler.php | 36 ++++++ .../PHP/Type/Caster/ListCastingHandler.php | 42 +++++++ .../ETL/PHP/Type/Caster/MapCastingHandler.php | 50 ++++++++ .../PHP/Type/Caster/NullCastingHandler.php | 22 ++++ .../PHP/Type/Caster/ObjectCastingHandler.php | 39 ++++++ .../PHP/Type/Caster/StringCastingHandler.php | 52 ++++++++ .../StringTypeChecker.php | 4 +- .../Type/Caster/StructureCastingHandler.php | 42 +++++++ .../PHP/Type/Caster/UuidCastingHandler.php | 40 ++++++ .../Type/Caster/XML}/XMLConverter.php | 2 +- .../ETL/PHP/Type/Caster/XMLCastingHandler.php | 51 ++++++++ .../Flow/ETL/PHP/Type/Logical/JsonType.php | 2 +- .../ETL/Row/Factory/NativeEntryFactory.php | 50 ++++---- .../src/Flow/ETL/Row/Schema/Definition.php | 28 ++++- .../ETL/Transformer/AutoCastTransformer.php | 55 ++------ .../ETL/Tests/Benchmark/TypeDetectorBench.php | 4 +- .../Tests/Integration/PHP/Type/CasterTest.php | 82 ++++++++++++ .../Tests/Unit/PHP/Type/AutoCasterTest.php | 20 +++ .../Type/Caster/ArrayCastingHandlerTest.php | 64 ++++++++++ .../Type/Caster/BooleanCastingHandlerTest.php | 40 ++++++ .../Caster/DateTimeCastingHandlerTest.php | 30 +++++ .../Type/Caster/EnumCastingHandlerTest.php | 31 +++++ .../PHP/Type/Caster/Fixtures/ColorsEnum.php | 12 ++ .../Type/Caster/FloatCastingHandlerTest.php | 31 +++++ .../Type/Caster/IntegerCastingHandlerTest.php | 31 +++++ .../Type/Caster/JsonCastingHandlerTest.php | 54 ++++++++ .../Type/Caster/ListCastingHandlerTest.php | 31 +++++ .../PHP/Type/Caster/MapCastingHandlerTest.php | 55 ++++++++ .../Type/Caster/ObjectCastingHandlerTest.php | 25 ++++ .../StringTypeCheckerTest.php | 8 +- .../Type/Caster/StringCastingHandlerTest.php | 46 +++++++ .../Caster/StructureCastingHandlerTest.php | 115 +++++++++++++++++ .../Type/Caster/UuidCastingHandlerTest.php | 39 ++++++ .../PHP/Type/Caster/XMLCastingHandlerTest.php | 30 +++++ .../Row/Factory/NativeEntryFactoryTest.php | 24 ++-- .../Tests/Unit/Row/Schema/DefinitionTest.php | 8 ++ .../Transformer/AutoCastTransformerTest.php | 4 +- 59 files changed, 1915 insertions(+), 197 deletions(-) create mode 100644 src/core/etl/src/Flow/ETL/Exception/CastingException.php create mode 100644 src/core/etl/src/Flow/ETL/PHP/Type/AutoCaster.php create mode 100644 src/core/etl/src/Flow/ETL/PHP/Type/Caster.php create mode 100644 src/core/etl/src/Flow/ETL/PHP/Type/Caster/ArrayCastingHandler.php create mode 100644 src/core/etl/src/Flow/ETL/PHP/Type/Caster/BooleanCastingHandler.php create mode 100644 src/core/etl/src/Flow/ETL/PHP/Type/Caster/CastingContext.php create mode 100644 src/core/etl/src/Flow/ETL/PHP/Type/Caster/CastingHandler.php create mode 100644 src/core/etl/src/Flow/ETL/PHP/Type/Caster/DateTimeCastingHandler.php create mode 100644 src/core/etl/src/Flow/ETL/PHP/Type/Caster/EnumCastingHandler.php create mode 100644 src/core/etl/src/Flow/ETL/PHP/Type/Caster/FloatCastingHandler.php create mode 100644 src/core/etl/src/Flow/ETL/PHP/Type/Caster/IntegerCastingHandler.php create mode 100644 src/core/etl/src/Flow/ETL/PHP/Type/Caster/JsonCastingHandler.php create mode 100644 src/core/etl/src/Flow/ETL/PHP/Type/Caster/ListCastingHandler.php create mode 100644 src/core/etl/src/Flow/ETL/PHP/Type/Caster/MapCastingHandler.php create mode 100644 src/core/etl/src/Flow/ETL/PHP/Type/Caster/NullCastingHandler.php create mode 100644 src/core/etl/src/Flow/ETL/PHP/Type/Caster/ObjectCastingHandler.php create mode 100644 src/core/etl/src/Flow/ETL/PHP/Type/Caster/StringCastingHandler.php rename src/core/etl/src/Flow/ETL/{Row/Factory => PHP/Type/Caster/StringCastingHandler}/StringTypeChecker.php (97%) create mode 100644 src/core/etl/src/Flow/ETL/PHP/Type/Caster/StructureCastingHandler.php create mode 100644 src/core/etl/src/Flow/ETL/PHP/Type/Caster/UuidCastingHandler.php rename src/core/etl/src/Flow/ETL/{Function/Cast => PHP/Type/Caster/XML}/XMLConverter.php (98%) create mode 100644 src/core/etl/src/Flow/ETL/PHP/Type/Caster/XMLCastingHandler.php create mode 100644 src/core/etl/tests/Flow/ETL/Tests/Integration/PHP/Type/CasterTest.php create mode 100644 src/core/etl/tests/Flow/ETL/Tests/Unit/PHP/Type/AutoCasterTest.php create mode 100644 src/core/etl/tests/Flow/ETL/Tests/Unit/PHP/Type/Caster/ArrayCastingHandlerTest.php create mode 100644 src/core/etl/tests/Flow/ETL/Tests/Unit/PHP/Type/Caster/BooleanCastingHandlerTest.php create mode 100644 src/core/etl/tests/Flow/ETL/Tests/Unit/PHP/Type/Caster/DateTimeCastingHandlerTest.php create mode 100644 src/core/etl/tests/Flow/ETL/Tests/Unit/PHP/Type/Caster/EnumCastingHandlerTest.php create mode 100644 src/core/etl/tests/Flow/ETL/Tests/Unit/PHP/Type/Caster/Fixtures/ColorsEnum.php create mode 100644 src/core/etl/tests/Flow/ETL/Tests/Unit/PHP/Type/Caster/FloatCastingHandlerTest.php create mode 100644 src/core/etl/tests/Flow/ETL/Tests/Unit/PHP/Type/Caster/IntegerCastingHandlerTest.php create mode 100644 src/core/etl/tests/Flow/ETL/Tests/Unit/PHP/Type/Caster/JsonCastingHandlerTest.php create mode 100644 src/core/etl/tests/Flow/ETL/Tests/Unit/PHP/Type/Caster/ListCastingHandlerTest.php create mode 100644 src/core/etl/tests/Flow/ETL/Tests/Unit/PHP/Type/Caster/MapCastingHandlerTest.php create mode 100644 src/core/etl/tests/Flow/ETL/Tests/Unit/PHP/Type/Caster/ObjectCastingHandlerTest.php rename src/core/etl/tests/Flow/ETL/Tests/Unit/{Row/Factory => PHP/Type/Caster/StringCastingHandler}/StringTypeCheckerTest.php (90%) create mode 100644 src/core/etl/tests/Flow/ETL/Tests/Unit/PHP/Type/Caster/StringCastingHandlerTest.php create mode 100644 src/core/etl/tests/Flow/ETL/Tests/Unit/PHP/Type/Caster/StructureCastingHandlerTest.php create mode 100644 src/core/etl/tests/Flow/ETL/Tests/Unit/PHP/Type/Caster/UuidCastingHandlerTest.php create mode 100644 src/core/etl/tests/Flow/ETL/Tests/Unit/PHP/Type/Caster/XMLCastingHandlerTest.php diff --git a/src/adapter/etl-adapter-csv/src/Flow/ETL/Adapter/CSV/CSVExtractor.php b/src/adapter/etl-adapter-csv/src/Flow/ETL/Adapter/CSV/CSVExtractor.php index acbdd5903..a9cab672a 100644 --- a/src/adapter/etl-adapter-csv/src/Flow/ETL/Adapter/CSV/CSVExtractor.php +++ b/src/adapter/etl-adapter-csv/src/Flow/ETL/Adapter/CSV/CSVExtractor.php @@ -15,6 +15,7 @@ use Flow\ETL\Filesystem\Path; use Flow\ETL\Filesystem\Stream\Mode; use Flow\ETL\FlowContext; +use Flow\ETL\Row\Schema; final class CSVExtractor implements Extractor, FileExtractor, LimitableExtractor, PartitionsExtractor { @@ -31,7 +32,8 @@ public function __construct( private readonly string|null $separator = null, private readonly string|null $enclosure = null, private readonly string|null $escape = null, - private readonly int $charactersReadInLine = 1000 + private readonly int $charactersReadInLine = 1000, + private readonly Schema|null $schema = null ) { $this->resetLimit(); } @@ -98,7 +100,7 @@ public function extract(FlowContext $context) : \Generator $row['_input_file_uri'] = $stream->path()->uri(); } - $signal = yield array_to_rows($row, $context->entryFactory(), $path->partitions()); + $signal = yield array_to_rows($row, $context->entryFactory(), $path->partitions(), $this->schema); $this->countRow(); if ($signal === Signal::STOP || $this->reachedLimit()) { diff --git a/src/adapter/etl-adapter-csv/src/Flow/ETL/Adapter/CSV/functions.php b/src/adapter/etl-adapter-csv/src/Flow/ETL/Adapter/CSV/functions.php index 7d0f05cc1..5900b0721 100644 --- a/src/adapter/etl-adapter-csv/src/Flow/ETL/Adapter/CSV/functions.php +++ b/src/adapter/etl-adapter-csv/src/Flow/ETL/Adapter/CSV/functions.php @@ -10,6 +10,7 @@ use Flow\ETL\Extractor; use Flow\ETL\Filesystem\Path; use Flow\ETL\Loader; +use Flow\ETL\Row\Schema; /** * @param int<0, max> $characters_read_in_line @@ -21,7 +22,8 @@ function from_csv( string|null $delimiter = null, string|null $enclosure = null, string|null $escape = null, - int $characters_read_in_line = 1000 + int $characters_read_in_line = 1000, + Schema|null $schema = null ) : Extractor { if (\is_array($path)) { $extractors = []; @@ -35,6 +37,7 @@ function from_csv( $enclosure, $escape, $characters_read_in_line, + $schema ); } @@ -49,6 +52,7 @@ function from_csv( $enclosure, $escape, $characters_read_in_line, + $schema ); } diff --git a/src/adapter/etl-adapter-csv/tests/Flow/ETL/Adapter/CSV/Tests/Integration/CSVExtractorTest.php b/src/adapter/etl-adapter-csv/tests/Flow/ETL/Adapter/CSV/Tests/Integration/CSVExtractorTest.php index 58adaf78e..69f766c30 100644 --- a/src/adapter/etl-adapter-csv/tests/Flow/ETL/Adapter/CSV/Tests/Integration/CSVExtractorTest.php +++ b/src/adapter/etl-adapter-csv/tests/Flow/ETL/Adapter/CSV/Tests/Integration/CSVExtractorTest.php @@ -8,6 +8,7 @@ use function Flow\ETL\Adapter\CSV\to_csv; use function Flow\ETL\DSL\df; use function Flow\ETL\DSL\from_array; +use function Flow\ETL\DSL\print_schema; use function Flow\ETL\DSL\ref; use Flow\ETL\Adapter\CSV\CSVExtractor; use Flow\ETL\Config; @@ -143,6 +144,61 @@ public function test_extracting_csv_files_with_header() : void $this->assertSame(998, $rows->count()); } + public function test_extracting_csv_files_with_schema() : void + { + $path = __DIR__ . '/../Fixtures/annual-enterprise-survey-2019-financial-year-provisional-csv.csv'; + + $rows = df() + ->read( + from_csv($path, schema: $schema = df() + ->read(from_csv($path)) + ->autoCast() + ->schema()) + ) + ->fetch(); + + foreach ($rows as $row) { + $this->assertSame( + [ + 'Year', + 'Industry_aggregation_NZSIOC', + 'Industry_code_NZSIOC', + 'Industry_name_NZSIOC', + 'Units', + 'Variable_code', + 'Variable_name', + 'Variable_category', + 'Value', + 'Industry_code_ANZSIC06', + + ], + \array_keys($row->toArray()) + ); + } + + $this->assertSame(998, $rows->count()); + $this->assertEquals($schema, $rows->schema()); + + $this->assertSame( + <<<'SCHEMA' +schema +|-- Year: integer +|-- Industry_aggregation_NZSIOC: string +|-- Industry_code_NZSIOC: string +|-- Industry_name_NZSIOC: string +|-- Units: string +|-- Variable_code: string +|-- Variable_name: string +|-- Variable_category: string +|-- Value: string +|-- Industry_code_ANZSIC06: string + +SCHEMA, + print_schema($rows->schema()) + ); + + } + public function test_extracting_csv_files_without_header() : void { $extractor = from_csv( diff --git a/src/adapter/etl-adapter-json/src/Flow/ETL/Adapter/JSON/JSONMachine/JsonExtractor.php b/src/adapter/etl-adapter-json/src/Flow/ETL/Adapter/JSON/JSONMachine/JsonExtractor.php index ae701b072..335e52db7 100644 --- a/src/adapter/etl-adapter-json/src/Flow/ETL/Adapter/JSON/JSONMachine/JsonExtractor.php +++ b/src/adapter/etl-adapter-json/src/Flow/ETL/Adapter/JSON/JSONMachine/JsonExtractor.php @@ -15,6 +15,7 @@ use Flow\ETL\Filesystem\Path; use Flow\ETL\Filesystem\Stream\Mode; use Flow\ETL\FlowContext; +use Flow\ETL\Row\Schema; use JsonMachine\Items; use JsonMachine\JsonDecoder\ExtJsonDecoder; @@ -26,6 +27,7 @@ final class JsonExtractor implements Extractor, FileExtractor, LimitableExtracto public function __construct( private readonly Path $path, private readonly ?string $pointer = null, + private readonly Schema|null $schema = null, ) { $this->resetLimit(); } @@ -46,7 +48,7 @@ public function extract(FlowContext $context) : \Generator $row['_input_file_uri'] = $filePath->uri(); } - $signal = yield array_to_rows($row, $context->entryFactory(), $filePath->partitions()); + $signal = yield array_to_rows($row, $context->entryFactory(), $filePath->partitions(), $this->schema); $this->countRow(); if ($signal === Signal::STOP || $this->reachedLimit()) { diff --git a/src/adapter/etl-adapter-json/src/Flow/ETL/Adapter/JSON/functions.php b/src/adapter/etl-adapter-json/src/Flow/ETL/Adapter/JSON/functions.php index c4509ee8a..9574e2a1e 100644 --- a/src/adapter/etl-adapter-json/src/Flow/ETL/Adapter/JSON/functions.php +++ b/src/adapter/etl-adapter-json/src/Flow/ETL/Adapter/JSON/functions.php @@ -9,6 +9,7 @@ use Flow\ETL\Extractor; use Flow\ETL\Filesystem\Path; use Flow\ETL\Loader; +use Flow\ETL\Row\Schema; /** * @param array|Path|string $path - string is internally turned into stream @@ -19,6 +20,7 @@ function from_json( string|Path|array $path, ?string $pointer = null, + Schema|null $schema = null, ) : Extractor { if (\is_array($path)) { $extractors = []; @@ -27,6 +29,7 @@ function from_json( $extractors[] = new JsonExtractor( \is_string($file) ? Path::realpath($file) : $file, $pointer, + $schema ); } @@ -36,6 +39,7 @@ function from_json( return new JsonExtractor( \is_string($path) ? Path::realpath($path) : $path, $pointer, + $schema ); } diff --git a/src/adapter/etl-adapter-json/tests/Flow/ETL/Adapter/JSON/Tests/Integration/JSONMachine/JsonExtractorTest.php b/src/adapter/etl-adapter-json/tests/Flow/ETL/Adapter/JSON/Tests/Integration/JSONMachine/JsonExtractorTest.php index ebb20e533..4ce4f2af0 100644 --- a/src/adapter/etl-adapter-json/tests/Flow/ETL/Adapter/JSON/Tests/Integration/JSONMachine/JsonExtractorTest.php +++ b/src/adapter/etl-adapter-json/tests/Flow/ETL/Adapter/JSON/Tests/Integration/JSONMachine/JsonExtractorTest.php @@ -6,7 +6,9 @@ use function Flow\ETL\Adapter\JSON\from_json; use function Flow\ETL\Adapter\JSON\to_json; +use function Flow\ETL\DSL\df; use function Flow\ETL\DSL\from_array; +use function Flow\ETL\DSL\print_schema; use Flow\ETL\Adapter\JSON\JSONMachine\JsonExtractor; use Flow\ETL\Config; use Flow\ETL\Extractor\Signal; @@ -65,6 +67,48 @@ public function test_extracting_json_from_local_file_stream_using_pointer() : vo $this->assertSame(247, $rows->count()); } + public function test_extracting_json_from_local_file_stream_with_schema() : void + { + $rows = df() + ->read(from_json( + __DIR__ . '/../../Fixtures/timezones.json', + schema: $schema = df() + ->read(from_json(__DIR__ . '/../../Fixtures/timezones.json')) + ->autoCast() + ->schema() + )) + ->fetch(); + + foreach ($rows as $row) { + $this->assertSame( + [ + 'timezones', + 'latlng', + 'name', + 'country_code', + 'capital', + ], + \array_keys($row->toArray()) + ); + } + + $this->assertSame(247, $rows->count()); + $this->assertEquals($schema, $rows->schema()); + $this->assertSame( + <<<'SCHEMA' +schema +|-- timezones: list +|-- latlng: list +|-- name: string +|-- country_code: string +|-- capital: ?string + +SCHEMA + , + print_schema($schema) + ); + } + public function test_extracting_json_from_local_file_string_uri() : void { $extractor = new JsonExtractor(Path::realpath(__DIR__ . '/../../Fixtures/timezones.json')); diff --git a/src/core/etl/src/Flow/ETL/DSL/functions.php b/src/core/etl/src/Flow/ETL/DSL/functions.php index bc635997a..9775b8f10 100644 --- a/src/core/etl/src/Flow/ETL/DSL/functions.php +++ b/src/core/etl/src/Flow/ETL/DSL/functions.php @@ -110,6 +110,7 @@ use Flow\ETL\PHP\Type\Native\ResourceType; use Flow\ETL\PHP\Type\Native\ScalarType; use Flow\ETL\PHP\Type\Type; +use Flow\ETL\PHP\Type\TypeDetector; use Flow\ETL\Pipeline; use Flow\ETL\Row; use Flow\ETL\Row\EntryFactory; @@ -357,6 +358,11 @@ function struct_entry(string $name, array $value, StructureType $type) : Row\Ent return new Row\Entry\StructureEntry($name, $value, $type); } +function structure_entry(string $name, array $value, StructureType $type) : Row\Entry\StructureEntry +{ + return new Row\Entry\StructureEntry($name, $value, $type); +} + /** * @param array $elements */ @@ -365,11 +371,21 @@ function struct_type(array $elements, bool $nullable = false) : StructureType return new StructureType($elements, $nullable); } +function structure_type(array $elements, bool $nullable = false) : StructureType +{ + return new StructureType($elements, $nullable); +} + function struct_element(string $name, Type $type) : StructureElement { return new StructureElement($name, $type); } +function structure_element(string $name, Type $type) : StructureElement +{ + return new StructureElement($name, $type); +} + function list_entry(string $name, array $value, ListType $type) : Row\Entry\ListEntry { return new Row\Entry\ListEntry($name, $value, $type); @@ -420,6 +436,11 @@ function type_int(bool $nullable = false) : ScalarType return ScalarType::integer($nullable); } +function type_integer(bool $nullable = false) : ScalarType +{ + return ScalarType::integer($nullable); +} + function type_string(bool $nullable = false) : ScalarType { return ScalarType::string($nullable); @@ -661,7 +682,7 @@ function hash(ScalarFunction $function, string $algorithm = 'xxh128', bool $bina return new Hash($function, $algorithm, $binary, $options); } -function cast(ScalarFunction $function, string $type) : Cast +function cast(ScalarFunction $function, string|Type $type) : Cast { return new Cast($function, $type); } @@ -862,7 +883,7 @@ function number_format(ScalarFunction $function, ?ScalarFunction $decimals = nul * @param array>|array $data * @param array|\Flow\ETL\Partitions $partitions */ -function array_to_rows(array $data, EntryFactory $entryFactory = new NativeEntryFactory(), array|\Flow\ETL\Partitions $partitions = []) : Rows +function array_to_rows(array $data, EntryFactory $entryFactory = new NativeEntryFactory(), array|\Flow\ETL\Partitions $partitions = [], ?Schema $schema = null) : Rows { $partitions = \is_array($partitions) ? new \Flow\ETL\Partitions(...$partitions) : $partitions; @@ -882,12 +903,12 @@ function array_to_rows(array $data, EntryFactory $entryFactory = new NativeEntry foreach ($data as $key => $value) { $name = \is_int($key) ? 'e' . \str_pad((string) $key, 2, '0', STR_PAD_LEFT) : $key; - $entries[$name] = $entryFactory->create($name, $value); + $entries[$name] = $entryFactory->create($name, $value, $schema); } foreach ($partitions as $partition) { if (!\array_key_exists($partition->name, $entries)) { - $entries[$partition->name] = $entryFactory->create($partition->name, $partition->value); + $entries[$partition->name] = $entryFactory->create($partition->name, $partition->value, $schema); } } @@ -901,12 +922,12 @@ function array_to_rows(array $data, EntryFactory $entryFactory = new NativeEntry foreach ($row as $column => $value) { $name = \is_int($column) ? 'e' . \str_pad((string) $column, 2, '0', STR_PAD_LEFT) : $column; - $entries[$name] = $entryFactory->create(\is_int($column) ? 'e' . \str_pad((string) $column, 2, '0', STR_PAD_LEFT) : $column, $value); + $entries[$name] = $entryFactory->create(\is_int($column) ? 'e' . \str_pad((string) $column, 2, '0', STR_PAD_LEFT) : $column, $value, $schema); } foreach ($partitions as $partition) { if (!\array_key_exists($partition->name, $entries)) { - $entries[$partition->name] = $entryFactory->create($partition->name, $partition->value); + $entries[$partition->name] = $entryFactory->create($partition->name, $partition->value, $schema); } } @@ -1108,3 +1129,18 @@ function append() : SaveMode { return SaveMode::Append; } + +function get_type(mixed $value) : Type +{ + return (new TypeDetector())->detectType($value); +} + +function print_schema(Schema $schema, ?SchemaFormatter $formatter = null) : string +{ + return ($formatter ?? new ASCIISchemaFormatter())->format($schema); +} + +function print_rows(Rows $rows, int|bool $truncate = false, ?Formatter $formatter = null) : string +{ + return ($formatter ?? new Formatter\AsciiTableFormatter())->format($rows, $truncate); +} diff --git a/src/core/etl/src/Flow/ETL/DataFrame.php b/src/core/etl/src/Flow/ETL/DataFrame.php index 7f2db4484..97998824f 100644 --- a/src/core/etl/src/Flow/ETL/DataFrame.php +++ b/src/core/etl/src/Flow/ETL/DataFrame.php @@ -21,6 +21,8 @@ use Flow\ETL\Loader\SchemaValidationLoader; use Flow\ETL\Loader\StreamLoader\Output; use Flow\ETL\Partition\ScalarFunctionFilter; +use Flow\ETL\PHP\Type\AutoCaster; +use Flow\ETL\PHP\Type\Caster; use Flow\ETL\Pipeline\BatchingPipeline; use Flow\ETL\Pipeline\CachingPipeline; use Flow\ETL\Pipeline\CollectingPipeline; @@ -151,7 +153,7 @@ public function appendSafe(bool $appendSafe = true) : self public function autoCast() : self { - $this->pipeline->add(new AutoCastTransformer()); + $this->pipeline->add(new AutoCastTransformer(new AutoCaster(Caster::default()))); return $this; } diff --git a/src/core/etl/src/Flow/ETL/Exception/CastingException.php b/src/core/etl/src/Flow/ETL/Exception/CastingException.php new file mode 100644 index 000000000..c4ca90900 --- /dev/null +++ b/src/core/etl/src/Flow/ETL/Exception/CastingException.php @@ -0,0 +1,19 @@ +toString()), + 0, + $previous + ); + } +} diff --git a/src/core/etl/src/Flow/ETL/Function/Cast.php b/src/core/etl/src/Flow/ETL/Function/Cast.php index 5195c8fee..e2966b3ea 100644 --- a/src/core/etl/src/Flow/ETL/Function/Cast.php +++ b/src/core/etl/src/Flow/ETL/Function/Cast.php @@ -4,14 +4,26 @@ namespace Flow\ETL\Function; +use function Flow\ETL\DSL\type_array; +use function Flow\ETL\DSL\type_boolean; +use function Flow\ETL\DSL\type_datetime; +use function Flow\ETL\DSL\type_float; +use function Flow\ETL\DSL\type_integer; +use function Flow\ETL\DSL\type_json; +use function Flow\ETL\DSL\type_object; +use function Flow\ETL\DSL\type_string; +use function Flow\ETL\DSL\type_xml; +use Flow\ETL\Exception\CastingException; use Flow\ETL\Exception\InvalidArgumentException; +use Flow\ETL\PHP\Type\Caster; +use Flow\ETL\PHP\Type\Type; use Flow\ETL\Row; final class Cast extends ScalarFunctionChain { public function __construct( private readonly ScalarFunction $ref, - private readonly string $type + private readonly string|Type $type ) { } @@ -27,88 +39,35 @@ public function eval(Row $row) : mixed return null; } - return match (\mb_strtolower($this->type)) { - 'datetime' => match (\gettype($value)) { - 'string' => new \DateTimeImmutable($value), - 'integer' => \DateTimeImmutable::createFromFormat('U', (string) $value), - default => null, - }, - 'date' => match (\gettype($value)) { - 'string' => (new \DateTimeImmutable($value))->setTime(0, 0, 0, 0), - 'integer' => \DateTimeImmutable::createFromFormat('U', (string) $value), - default => null, - }, - 'int', 'integer' => (int) $value, - 'float', 'double', 'real' => (float) $value, - 'string' => $this->toString($value), - 'bool', 'boolean' => (bool) $value, - 'array' => $this->toArray($value), - 'object' => (object) $value, - 'json' => \json_encode($value, JSON_THROW_ON_ERROR), - 'json_pretty' => \json_encode($value, JSON_THROW_ON_ERROR | JSON_PRETTY_PRINT), - 'xml' => $this->toXML($value), - default => null - }; - } - - private function toArray(mixed $data) : array - { - if ($data instanceof \DOMDocument) { - return (new Cast\XMLConverter())->toArray($data); - } - - return (array) $data; - } - - private function toString(mixed $value) : ?string - { - if ($value === null) { - return null; - } - - if (\is_string($value)) { - return $value; - } - - if (\is_bool($value)) { - return $value ? 'true' : 'false'; - } + $caster = Caster::default(); - if (\is_array($value)) { - return \json_encode($value, JSON_THROW_ON_ERROR); - } - - if ($value instanceof \DateTimeInterface) { - return $value->format(\DateTimeInterface::RFC3339); - } - - if ($value instanceof \Stringable) { - return (string) $value; - } - - if ($value instanceof \DOMDocument) { - return $value->saveXML() ?: null; - } + $type = $this->type; - return (string) $value; - } - - private function toXML(mixed $value) : null|\DOMDocument - { - if (\is_string($value)) { - $doc = new \DOMDocument(); - - if (!@$doc->load($value)) { - return null; - } - - return $doc; + if ($type instanceof Type) { + return $caster->to($type)->value($value); } - if ($value instanceof \DOMDocument) { - return $value; + try { + return match (\mb_strtolower($type)) { + 'datetime' => $caster->to(type_datetime())->value($value), + 'date' => match (\gettype($value)) { + 'string' => (new \DateTimeImmutable($value))->setTime(0, 0, 0, 0), + 'integer' => \DateTimeImmutable::createFromFormat('U', (string) $value), + default => null, + }, + 'int', 'integer' => $caster->to(type_integer())->value($value), + 'float', 'double', 'real' => $caster->to(type_float())->value($value), + 'string' => $caster->to(type_string())->value($value), + 'bool', 'boolean' => $caster->to(type_boolean())->value($value), + 'array' => $caster->to(type_array())->value($value), + 'object' => $caster->to(type_object(\stdClass::class))->value($value), + 'json' => $caster->to(type_json())->value($value), + 'json_pretty' => \json_encode($value, JSON_THROW_ON_ERROR | JSON_PRETTY_PRINT), + 'xml' => $caster->to(type_xml())->value($value), + default => null + }; + } catch (CastingException $e) { + return null; } - - return null; } } diff --git a/src/core/etl/src/Flow/ETL/Function/ScalarFunctionChain.php b/src/core/etl/src/Flow/ETL/Function/ScalarFunctionChain.php index f7a9850ab..66b593b00 100644 --- a/src/core/etl/src/Flow/ETL/Function/ScalarFunctionChain.php +++ b/src/core/etl/src/Flow/ETL/Function/ScalarFunctionChain.php @@ -10,6 +10,7 @@ use Flow\ETL\Function\ArrayExpand\ArrayExpand; use Flow\ETL\Function\ArraySort\Sort; use Flow\ETL\Function\Between\Boundary; +use Flow\ETL\PHP\Type\Type; use Flow\ETL\Row\Entry; abstract class ScalarFunctionChain implements ScalarFunction @@ -59,7 +60,7 @@ public function capitalize() : self return new Capitalize($this); } - public function cast(string $type) : self + public function cast(string|Type $type) : self { return new Cast($this, $type); } diff --git a/src/core/etl/src/Flow/ETL/PHP/Type/ArrayContentDetector.php b/src/core/etl/src/Flow/ETL/PHP/Type/ArrayContentDetector.php index 1debc23db..e53b5d3c9 100644 --- a/src/core/etl/src/Flow/ETL/PHP/Type/ArrayContentDetector.php +++ b/src/core/etl/src/Flow/ETL/PHP/Type/ArrayContentDetector.php @@ -15,16 +15,16 @@ final class ArrayContentDetector private readonly ?Type $firstValueType; - private readonly int $uniqueKeysCount; + private readonly int $uniqueKeysTypeCount; - private readonly int $uniqueValuesCount; + private readonly int $uniqueValuesTypeCount; public function __construct(Types $uniqueKeysType, Types $uniqueValuesType) { $this->firstKeyType = $uniqueKeysType->first(); $this->firstValueType = $uniqueValuesType->first(); - $this->uniqueKeysCount = $uniqueKeysType->count(); - $this->uniqueValuesCount = $uniqueValuesType->without(type_array(true), type_null())->count(); + $this->uniqueKeysTypeCount = $uniqueKeysType->count(); + $this->uniqueValuesTypeCount = $uniqueValuesType->without(type_array(true), type_null())->count(); } public function firstKeyType() : ?ScalarType @@ -43,12 +43,12 @@ public function firstValueType() : ?Type public function isList() : bool { - return 1 === $this->uniqueValuesCount && $this->firstKeyType()?->isInteger(); + return 1 === $this->uniqueValuesTypeCount && $this->firstKeyType()?->isInteger(); } public function isMap() : bool { - if (1 === $this->uniqueValuesCount && 1 === $this->uniqueKeysCount) { + if (1 === $this->uniqueValuesTypeCount && 1 === $this->uniqueKeysTypeCount) { return !$this->firstKeyType()?->isInteger(); } @@ -61,8 +61,8 @@ public function isStructure() : bool return false; } - return 0 !== $this->uniqueValuesCount - && 1 === $this->uniqueKeysCount + return 0 !== $this->uniqueValuesTypeCount + && 1 === $this->uniqueKeysTypeCount && $this->firstKeyType()?->isString(); } } diff --git a/src/core/etl/src/Flow/ETL/PHP/Type/AutoCaster.php b/src/core/etl/src/Flow/ETL/PHP/Type/AutoCaster.php new file mode 100644 index 000000000..99f1abdf4 --- /dev/null +++ b/src/core/etl/src/Flow/ETL/PHP/Type/AutoCaster.php @@ -0,0 +1,94 @@ +castToString($value); + } + + if (\is_array($value)) { + return $this->castArray($value); + } + + return $value; + } + + private function castArray(array $value) : array + { + $keyTypes = []; + $valueTypes = []; + + foreach ($value as $key => $item) { + $keyType = get_type($key); + $valueType = get_type($item); + $keyTypes[$keyType->toString()] = $keyType; + $valueTypes[$valueType->toString()] = $valueType; + } + + if (isset($valueTypes['integer'], $valueTypes['float']) && \count($valueTypes) === 2) { + $castedArray = []; + + foreach ($value as $key => $item) { + $castedArray[$key] = $this->caster->to(type_float())->value($item); + } + + return $castedArray; + } + + return $value; + } + + private function castToString(string $value) : mixed + { + $typeChecker = new StringTypeChecker($value); + + if ($typeChecker->isNull()) { + return null; + } + + if ($typeChecker->isInteger()) { + return $this->caster->to(type_integer())->value($value); + } + + if ($typeChecker->isFloat()) { + return $this->caster->to(type_float())->value($value); + } + + if ($typeChecker->isBoolean()) { + return $this->caster->to(type_boolean())->value($value); + } + + if ($typeChecker->isJson()) { + return $this->caster->to(type_json())->value($value); + } + + if ($typeChecker->isUuid()) { + return $this->caster->to(type_uuid())->value($value); + } + + if ($typeChecker->isDateTime()) { + return $this->caster->to(type_datetime())->value($value); + } + + return $value; + } +} diff --git a/src/core/etl/src/Flow/ETL/PHP/Type/Caster.php b/src/core/etl/src/Flow/ETL/PHP/Type/Caster.php new file mode 100644 index 000000000..c1670226b --- /dev/null +++ b/src/core/etl/src/Flow/ETL/PHP/Type/Caster.php @@ -0,0 +1,80 @@ + $handlers + */ + public function __construct(private readonly array $handlers) + { + } + + public static function default() : self + { + return new self([ + type_string()->toString() => new StringCastingHandler(), + type_integer()->toString() => new IntegerCastingHandler(), + type_boolean()->toString() => new BooleanCastingHandler(), + type_float()->toString() => new FloatCastingHandler(), + type_xml()->toString() => new XMLCastingHandler(), + type_uuid()->toString() => new UuidCastingHandler(), + 'object' => new ObjectCastingHandler(), + type_datetime()->toString() => new DateTimeCastingHandler(), + type_json()->toString() => new JsonCastingHandler(), + type_array()->toString() => new ArrayCastingHandler(), + 'list' => new ListCastingHandler(), + 'map' => new MapCastingHandler(), + 'structure' => new StructureCastingHandler(), + type_null()->toString() => new NullCastingHandler(), + 'enum' => new EnumCastingHandler(), + ]); + } + + public function to(Type $type) : CastingContext + { + if (\array_key_exists($type->toString(), $this->handlers)) { + return new CastingContext($this->handlers[$type->toString()], $type, $this); + } + + foreach ($this->handlers as $handler) { + if ($handler->supports($type)) { + return new CastingContext($handler, $type, $this); + } + } + + throw new RuntimeException("There is no casting handler for \"{$type->toString()}\" type"); + } +} diff --git a/src/core/etl/src/Flow/ETL/PHP/Type/Caster/ArrayCastingHandler.php b/src/core/etl/src/Flow/ETL/PHP/Type/Caster/ArrayCastingHandler.php new file mode 100644 index 000000000..7ac885ced --- /dev/null +++ b/src/core/etl/src/Flow/ETL/PHP/Type/Caster/ArrayCastingHandler.php @@ -0,0 +1,44 @@ +toArray($value); + } + + if (\is_object($value)) { + return \json_decode(\json_encode($value, JSON_THROW_ON_ERROR), true, 512, \JSON_THROW_ON_ERROR); + } + + return (array) $value; + } catch (\Throwable $e) { + throw new CastingException($value, $type); + } + } +} diff --git a/src/core/etl/src/Flow/ETL/PHP/Type/Caster/BooleanCastingHandler.php b/src/core/etl/src/Flow/ETL/PHP/Type/Caster/BooleanCastingHandler.php new file mode 100644 index 000000000..3af29cbfe --- /dev/null +++ b/src/core/etl/src/Flow/ETL/PHP/Type/Caster/BooleanCastingHandler.php @@ -0,0 +1,42 @@ +isBoolean(); + } + + public function value(mixed $value, Type $type, Caster $caster) : mixed + { + if (\is_bool($value)) { + return $value; + } + + if (\is_string($value)) { + if (\in_array(\mb_strtolower($value), ['true', '1', 'yes', 'on'], true)) { + return true; + } + + if (\in_array(\mb_strtolower($value), ['false', '0', 'no', 'off'], true)) { + return false; + } + } + + try { + return (bool) $value; + /* @phpstan-ignore-next-line */ + } catch (\Throwable $e) { + throw new CastingException($value, $type); + } + } +} diff --git a/src/core/etl/src/Flow/ETL/PHP/Type/Caster/CastingContext.php b/src/core/etl/src/Flow/ETL/PHP/Type/Caster/CastingContext.php new file mode 100644 index 000000000..295512098 --- /dev/null +++ b/src/core/etl/src/Flow/ETL/PHP/Type/Caster/CastingContext.php @@ -0,0 +1,32 @@ +type->nullable()) { + return null; + } + + if ($value === null && !$this->type->nullable()) { + throw new CastingException($value, $this->type); + } + + return $this->handler->value($value, $this->type, $this->caster); + } +} diff --git a/src/core/etl/src/Flow/ETL/PHP/Type/Caster/CastingHandler.php b/src/core/etl/src/Flow/ETL/PHP/Type/Caster/CastingHandler.php new file mode 100644 index 000000000..0bee5a76d --- /dev/null +++ b/src/core/etl/src/Flow/ETL/PHP/Type/Caster/CastingHandler.php @@ -0,0 +1,15 @@ +add($value); + + } + } catch (\Throwable $e) { + throw new CastingException($value, type_datetime()); + } + + throw new CastingException($value, $type); + } +} diff --git a/src/core/etl/src/Flow/ETL/PHP/Type/Caster/EnumCastingHandler.php b/src/core/etl/src/Flow/ETL/PHP/Type/Caster/EnumCastingHandler.php new file mode 100644 index 000000000..44cc721dc --- /dev/null +++ b/src/core/etl/src/Flow/ETL/PHP/Type/Caster/EnumCastingHandler.php @@ -0,0 +1,39 @@ +class) { + return $value; + } + + try { + /** @var EnumType $type */ + $enumClass = $type->class; + + if (\is_a($enumClass, \BackedEnum::class, true)) { + return $enumClass::from($value); + } + + throw new CastingException($value, $type); + } catch (\Throwable $e) { + throw new CastingException($value, $type); + } + } +} diff --git a/src/core/etl/src/Flow/ETL/PHP/Type/Caster/FloatCastingHandler.php b/src/core/etl/src/Flow/ETL/PHP/Type/Caster/FloatCastingHandler.php new file mode 100644 index 000000000..48edae6a1 --- /dev/null +++ b/src/core/etl/src/Flow/ETL/PHP/Type/Caster/FloatCastingHandler.php @@ -0,0 +1,43 @@ +isFloat(); + } + + public function value(mixed $value, Type $type, Caster $caster) : mixed + { + if (\is_float($value)) { + return $value; + } + + if ($value instanceof \DateTimeImmutable) { + return (float) $value->format('Uu'); + } + + if ($value instanceof \DateInterval) { + $reference = new \DateTimeImmutable(); + $endTime = $reference->add($value); + + return (float) ($endTime->format('Uu')) - (float) ($reference->format('Uu')); + } + + try { + return (float) $value; + /* @phpstan-ignore-next-line */ + } catch (\Throwable $e) { + throw new CastingException($value, $type); + } + } +} diff --git a/src/core/etl/src/Flow/ETL/PHP/Type/Caster/IntegerCastingHandler.php b/src/core/etl/src/Flow/ETL/PHP/Type/Caster/IntegerCastingHandler.php new file mode 100644 index 000000000..59e79ac36 --- /dev/null +++ b/src/core/etl/src/Flow/ETL/PHP/Type/Caster/IntegerCastingHandler.php @@ -0,0 +1,43 @@ +isInteger(); + } + + public function value(mixed $value, Type $type, Caster $caster) : mixed + { + if (\is_int($value)) { + return $value; + } + + if ($value instanceof \DateTimeImmutable) { + return (int) $value->format('Uu'); + } + + if ($value instanceof \DateInterval) { + $reference = new \DateTimeImmutable(); + $endTime = $reference->add($value); + + return (int) ($endTime->format('Uu')) - (int) ($reference->format('Uu')); + } + + try { + return (int) $value; + /* @phpstan-ignore-next-line */ + } catch (\Throwable $e) { + throw new CastingException($value, $type); + } + } +} diff --git a/src/core/etl/src/Flow/ETL/PHP/Type/Caster/JsonCastingHandler.php b/src/core/etl/src/Flow/ETL/PHP/Type/Caster/JsonCastingHandler.php new file mode 100644 index 000000000..4c1a2bf1d --- /dev/null +++ b/src/core/etl/src/Flow/ETL/PHP/Type/Caster/JsonCastingHandler.php @@ -0,0 +1,36 @@ +to($type->element()->type())->value($value)]; + } + + $castedList = []; + + foreach ($value as $key => $item) { + $castedList[$key] = $caster->to($type->element()->type())->value($item); + } + + return $castedList; + } catch (\Throwable $e) { + throw new CastingException($value, $type); + } + } +} diff --git a/src/core/etl/src/Flow/ETL/PHP/Type/Caster/MapCastingHandler.php b/src/core/etl/src/Flow/ETL/PHP/Type/Caster/MapCastingHandler.php new file mode 100644 index 000000000..ce57f70b3 --- /dev/null +++ b/src/core/etl/src/Flow/ETL/PHP/Type/Caster/MapCastingHandler.php @@ -0,0 +1,50 @@ +to($type->key()->type())->value(0) => $caster->to($type->value()->type())->value($value), + ]; + } + + $castedMap = []; + + foreach ($value as $key => $item) { + $castedKey = $caster->to($type->key()->type())->value($key); + + if (\array_key_exists($castedKey, $castedMap)) { + throw new CastingException($value, $type); + } + + $castedMap[$caster->to($type->key()->type())->value($key)] = $caster->to($type->value()->type())->value($item); + } + + return $castedMap; + } catch (\Throwable $e) { + throw new CastingException($value, $type, $e); + } + } +} diff --git a/src/core/etl/src/Flow/ETL/PHP/Type/Caster/NullCastingHandler.php b/src/core/etl/src/Flow/ETL/PHP/Type/Caster/NullCastingHandler.php new file mode 100644 index 000000000..a4e6f8b98 --- /dev/null +++ b/src/core/etl/src/Flow/ETL/PHP/Type/Caster/NullCastingHandler.php @@ -0,0 +1,22 @@ +class) { + throw new CastingException($value, type_object($type->class)); + } + + return $object; + } catch (\Throwable $e) { + throw new CastingException($value, $type); + } + } +} diff --git a/src/core/etl/src/Flow/ETL/PHP/Type/Caster/StringCastingHandler.php b/src/core/etl/src/Flow/ETL/PHP/Type/Caster/StringCastingHandler.php new file mode 100644 index 000000000..190017b39 --- /dev/null +++ b/src/core/etl/src/Flow/ETL/PHP/Type/Caster/StringCastingHandler.php @@ -0,0 +1,52 @@ +isString(); + } + + public function value(mixed $value, Type $type, Caster $caster) : mixed + { + if (\is_string($value)) { + return $value; + } + + if (\is_bool($value)) { + return $value ? 'true' : 'false'; + } + + if (\is_array($value)) { + return \json_encode($value, JSON_THROW_ON_ERROR); + } + + if ($value instanceof \DateTimeInterface) { + return $value->format(\DateTimeInterface::RFC3339); + } + + if ($value instanceof \Stringable) { + return (string) $value; + } + + if ($value instanceof \DOMDocument) { + return $value->saveXML() ?: null; + } + + try { + return (string) $value; + /* @phpstan-ignore-next-line */ + } catch (\Throwable $e) { + throw new CastingException($value, $type); + } + } +} diff --git a/src/core/etl/src/Flow/ETL/Row/Factory/StringTypeChecker.php b/src/core/etl/src/Flow/ETL/PHP/Type/Caster/StringCastingHandler/StringTypeChecker.php similarity index 97% rename from src/core/etl/src/Flow/ETL/Row/Factory/StringTypeChecker.php rename to src/core/etl/src/Flow/ETL/PHP/Type/Caster/StringCastingHandler/StringTypeChecker.php index 9e3c1543b..f1e00a6eb 100644 --- a/src/core/etl/src/Flow/ETL/Row/Factory/StringTypeChecker.php +++ b/src/core/etl/src/Flow/ETL/PHP/Type/Caster/StringCastingHandler/StringTypeChecker.php @@ -2,7 +2,7 @@ declare(strict_types=1); -namespace Flow\ETL\Row\Factory; +namespace Flow\ETL\PHP\Type\Caster\StringCastingHandler; use Flow\ETL\Row\Entry\Type\Uuid; @@ -21,7 +21,7 @@ public function isBoolean() : bool return false; } - return \in_array(\strtolower($this->string), ['true', 'false'], true); + return \in_array(\strtolower($this->string), ['true', 'false', 'yes', 'no', 'on', 'off'], true); } public function isDateTime() : bool diff --git a/src/core/etl/src/Flow/ETL/PHP/Type/Caster/StructureCastingHandler.php b/src/core/etl/src/Flow/ETL/PHP/Type/Caster/StructureCastingHandler.php new file mode 100644 index 000000000..0b4c69d1c --- /dev/null +++ b/src/core/etl/src/Flow/ETL/PHP/Type/Caster/StructureCastingHandler.php @@ -0,0 +1,42 @@ +elements() as $element) { + $elementName = $element->name(); + + $castedStructure[$elementName] = (\is_array($value) && \array_key_exists($elementName, $value)) + ? $caster->to($element->type())->value($value[$elementName]) + : $caster->to($element->type())->value(null); + } + + return $castedStructure; + } catch (\Throwable $e) { + throw new CastingException($value, $type, $e); + } + } +} diff --git a/src/core/etl/src/Flow/ETL/PHP/Type/Caster/UuidCastingHandler.php b/src/core/etl/src/Flow/ETL/PHP/Type/Caster/UuidCastingHandler.php new file mode 100644 index 000000000..32fb9c635 --- /dev/null +++ b/src/core/etl/src/Flow/ETL/PHP/Type/Caster/UuidCastingHandler.php @@ -0,0 +1,40 @@ +toRfc4122()); + } + + throw new CastingException($value, $type); + } +} diff --git a/src/core/etl/src/Flow/ETL/Function/Cast/XMLConverter.php b/src/core/etl/src/Flow/ETL/PHP/Type/Caster/XML/XMLConverter.php similarity index 98% rename from src/core/etl/src/Flow/ETL/Function/Cast/XMLConverter.php rename to src/core/etl/src/Flow/ETL/PHP/Type/Caster/XML/XMLConverter.php index 7345e124d..7f0456fb7 100644 --- a/src/core/etl/src/Flow/ETL/Function/Cast/XMLConverter.php +++ b/src/core/etl/src/Flow/ETL/PHP/Type/Caster/XML/XMLConverter.php @@ -1,6 +1,6 @@ loadXML($value)) { + throw new CastingException($value, type_xml()); + } + + return $doc; + } + + try { + $stringValue = $caster->to(type_string())->value($value); + + $doc = new \DOMDocument(); + + if (!@$doc->loadXML($stringValue)) { + throw new CastingException($stringValue, type_xml()); + } + + return $doc; + } catch (CastingException $e) { + throw new CastingException($value, type_xml(), $e); + } + } +} diff --git a/src/core/etl/src/Flow/ETL/PHP/Type/Logical/JsonType.php b/src/core/etl/src/Flow/ETL/PHP/Type/Logical/JsonType.php index a422998a7..de3d56ae3 100644 --- a/src/core/etl/src/Flow/ETL/PHP/Type/Logical/JsonType.php +++ b/src/core/etl/src/Flow/ETL/PHP/Type/Logical/JsonType.php @@ -5,9 +5,9 @@ namespace Flow\ETL\PHP\Type\Logical; use Flow\ETL\Exception\InvalidArgumentException; +use Flow\ETL\PHP\Type\Caster\StringCastingHandler\StringTypeChecker; use Flow\ETL\PHP\Type\Native\NullType; use Flow\ETL\PHP\Type\Type; -use Flow\ETL\Row\Factory\StringTypeChecker; final class JsonType implements LogicalType { diff --git a/src/core/etl/src/Flow/ETL/Row/Factory/NativeEntryFactory.php b/src/core/etl/src/Flow/ETL/Row/Factory/NativeEntryFactory.php index d625b9232..7e288d56e 100644 --- a/src/core/etl/src/Flow/ETL/Row/Factory/NativeEntryFactory.php +++ b/src/core/etl/src/Flow/ETL/Row/Factory/NativeEntryFactory.php @@ -23,6 +23,8 @@ use function Flow\ETL\DSL\xml_node_entry; use Flow\ETL\Exception\InvalidArgumentException; use Flow\ETL\Exception\RuntimeException; +use Flow\ETL\PHP\Type\Caster; +use Flow\ETL\PHP\Type\Caster\StringCastingHandler\StringTypeChecker; use Flow\ETL\PHP\Type\Logical\DateTimeType; use Flow\ETL\PHP\Type\Logical\JsonType; use Flow\ETL\PHP\Type\Logical\ListType; @@ -44,6 +46,13 @@ final class NativeEntryFactory implements EntryFactory { + private readonly Caster $caster; + + public function __construct() + { + $this->caster = Caster::default(); + } + /** * @throws InvalidArgumentException * @throws RuntimeException @@ -168,67 +177,56 @@ private function fromDefinition(Schema\Definition $definition, mixed $value) : E try { if ($definition->type() instanceof ScalarType) { return match ($definition->type()->type()) { - ScalarType::STRING => str_entry($definition->entry()->name(), $value), - ScalarType::INTEGER => int_entry($definition->entry()->name(), $value), - ScalarType::FLOAT => float_entry($definition->entry()->name(), $value), - ScalarType::BOOLEAN => bool_entry($definition->entry()->name(), $value), + ScalarType::STRING => str_entry($definition->entry()->name(), $this->caster->to($definition->type())->value($value)), + ScalarType::INTEGER => int_entry($definition->entry()->name(), $this->caster->to($definition->type())->value($value)), + ScalarType::FLOAT => float_entry($definition->entry()->name(), $this->caster->to($definition->type())->value($value)), + ScalarType::BOOLEAN => bool_entry($definition->entry()->name(), $this->caster->to($definition->type())->value($value)), default => throw new InvalidArgumentException("Can't convert value into entry \"{$definition->entry()}\""), }; } if ($definition->type() instanceof XMLType) { - return xml_entry($definition->entry()->name(), $value); + return xml_entry($definition->entry()->name(), $this->caster->to($definition->type())->value($value)); } if ($definition->type() instanceof UuidType) { - return uuid_entry($definition->entry()->name(), $value); + return uuid_entry($definition->entry()->name(), $this->caster->to($definition->type())->value($value)); } if ($definition->type() instanceof ObjectType) { - return obj_entry($definition->entry()->name(), $value); + return obj_entry($definition->entry()->name(), $this->caster->to($definition->type())->value($value)); } if ($definition->type() instanceof DateTimeType) { - return datetime_entry($definition->entry()->name(), $value); + return datetime_entry($definition->entry()->name(), $this->caster->to($definition->type())->value($value)); } if ($definition->type() instanceof EnumType) { - /** @var class-string<\UnitEnum> $enumClass */ - $enumClass = $definition->type()->class; - /** @var array<\UnitEnum> $cases */ - $cases = $definition->type()->class::cases(); - - foreach ($cases as $case) { - if ($case->name === $value) { - return enum_entry($definition->entry()->name(), $case); - } - } - - throw new InvalidArgumentException("Value \"{$value}\" can't be converted to " . $enumClass . ' enum'); + return enum_entry($definition->entry()->name(), $this->caster->to($definition->type())->value($value)); } if ($definition->type() instanceof JsonType) { try { - return json_object_entry($definition->entry()->name(), $value); + return json_object_entry($definition->entry()->name(), $this->caster->to($definition->type())->value($value)); } catch (InvalidArgumentException) { - return json_entry($definition->entry()->name(), $value); + return json_entry($definition->entry()->name(), $this->caster->to($definition->type())->value($value)); } } if ($definition->type() instanceof ArrayType) { - return array_entry($definition->entry()->name(), $value); + return array_entry($definition->entry()->name(), $this->caster->to($definition->type())->value($value)); } if ($definition->type() instanceof MapType) { - return map_entry($definition->entry()->name(), $value, $definition->type()); + return map_entry($definition->entry()->name(), $this->caster->to($definition->type())->value($value), $definition->type()); } if ($definition->type() instanceof StructureType) { - return struct_entry($definition->entry()->name(), $value, $definition->type()); + return struct_entry($definition->entry()->name(), $this->caster->to($definition->type())->value($value), $definition->type()); } if ($definition->type() instanceof ListType) { - return new Entry\ListEntry($definition->entry()->name(), $value, $definition->type()); + return new Entry\ListEntry($definition->entry()->name(), $this->caster->to($definition->type())->value($value), $definition->type()); } } catch (InvalidArgumentException|\TypeError $e) { throw new InvalidArgumentException("Field \"{$definition->entry()}\" conversion exception. {$e->getMessage()}", previous: $e); diff --git a/src/core/etl/src/Flow/ETL/Row/Schema/Definition.php b/src/core/etl/src/Flow/ETL/Row/Schema/Definition.php index 6ddf008bc..c1fb7b349 100644 --- a/src/core/etl/src/Flow/ETL/Row/Schema/Definition.php +++ b/src/core/etl/src/Flow/ETL/Row/Schema/Definition.php @@ -11,6 +11,7 @@ use function Flow\ETL\DSL\type_float; use function Flow\ETL\DSL\type_int; use function Flow\ETL\DSL\type_json; +use function Flow\ETL\DSL\type_list; use function Flow\ETL\DSL\type_null; use function Flow\ETL\DSL\type_string; use function Flow\ETL\DSL\type_uuid; @@ -254,6 +255,21 @@ public function merge(self $definition) : self $constraint = $this->constraint; } + if ($this->type instanceof ListType && $definition->type instanceof ListType && !$this->type->isEqual($definition->type)) { + $thisTypeString = $this->type->element()->toString(); + $definitionTypeString = $definition->type->element()->toString(); + + if (\in_array($thisTypeString, ['integer', 'float', '?integer', '?float'], true) && \in_array($definitionTypeString, ['integer', 'float', '?integer', '?float'], true)) { + return new self( + $this->ref, + $this->entryClass, + type_list(type_float($this->type->element()->type()->nullable() || $definition->type->element()->type()->nullable())), + $constraint, + $this->metadata->merge($definition->metadata) + ); + } + } + if ($this->entryClass === $definition->entryClass && \in_array($this->entryClass, [ListEntry::class, MapEntry::class, StructureEntry::class], true)) { if (!$this->type->isEqual($definition->type)) { return new self( @@ -312,7 +328,17 @@ public function merge(self $definition) : self ); } - throw new RuntimeException(\sprintf('Cannot merge definitions for entries, "%s" and "%s"', $this->ref->name(), $definition->ref->name())); + if (\in_array(ArrayEntry::class, $entryClasses, true)) { + return new self( + $this->ref, + ArrayEntry::class, + type_array(false, $this->isNullable() || $definition->isNullable()), + $constraint, + $this->metadata->merge($definition->metadata) + ); + } + + throw new RuntimeException(\sprintf('Cannot merge definitions for entries, "%s (%s)" and "%s (%s)"', $this->ref->name(), $this->type->toString(), $definition->ref->name(), $definition->type->toString())); } public function metadata() : Metadata diff --git a/src/core/etl/src/Flow/ETL/Transformer/AutoCastTransformer.php b/src/core/etl/src/Flow/ETL/Transformer/AutoCastTransformer.php index 10d8d7fcd..202271545 100644 --- a/src/core/etl/src/Flow/ETL/Transformer/AutoCastTransformer.php +++ b/src/core/etl/src/Flow/ETL/Transformer/AutoCastTransformer.php @@ -4,14 +4,8 @@ namespace Flow\ETL\Transformer; -use function Flow\ETL\DSL\bool_entry; -use function Flow\ETL\DSL\datetime_entry; -use function Flow\ETL\DSL\float_entry; -use function Flow\ETL\DSL\int_entry; -use function Flow\ETL\DSL\json_entry; -use function Flow\ETL\DSL\null_entry; -use function Flow\ETL\DSL\uuid_entry; use Flow\ETL\FlowContext; +use Flow\ETL\PHP\Type\AutoCaster; use Flow\ETL\Row; use Flow\ETL\Row\Entry; use Flow\ETL\Row\Entry\StringEntry; @@ -20,50 +14,19 @@ final class AutoCastTransformer implements Transformer { - public function autoCast(Entry $entry) : Entry + public function __construct(private readonly AutoCaster $caster) { - if (!$entry instanceof StringEntry) { - return $entry; - } - - $typeChecker = new Row\Factory\StringTypeChecker($entry->value()); - - if ($typeChecker->isNull()) { - return null_entry($entry->name()); - } - - if ($typeChecker->isInteger()) { - return int_entry($entry->name(), (int) $entry->value()); - } - - if ($typeChecker->isFloat()) { - return float_entry($entry->name(), (float) $entry->value()); - } - - if ($typeChecker->isBoolean()) { - return bool_entry($entry->name(), (bool) $entry->value()); - } - - if ($typeChecker->isJson()) { - return json_entry($entry->name(), $entry->value()); - } - - if ($typeChecker->isUuid()) { - return uuid_entry($entry->name(), $entry->value()); - } - - if ($typeChecker->isDateTime()) { - return datetime_entry($entry->name(), $entry->value()); - } - - return $entry; } public function transform(Rows $rows, FlowContext $context) : Rows { - return $rows->map(function (Row $row) { - return $row->map(function (Entry $entry) { - return $this->autoCast($entry); + return $rows->map(function (Row $row) use ($context) { + return $row->map(function (Entry $entry) use ($context) { + // if (!$entry instanceof StringEntry) { + // return $entry; + // } + + return $context->entryFactory()->create($entry->name(), $this->caster->cast($entry->value())); }); }); } diff --git a/src/core/etl/tests/Flow/ETL/Tests/Benchmark/TypeDetectorBench.php b/src/core/etl/tests/Flow/ETL/Tests/Benchmark/TypeDetectorBench.php index a561d25fe..aee30619e 100644 --- a/src/core/etl/tests/Flow/ETL/Tests/Benchmark/TypeDetectorBench.php +++ b/src/core/etl/tests/Flow/ETL/Tests/Benchmark/TypeDetectorBench.php @@ -2,7 +2,7 @@ namespace Flow\ETL\Tests\Benchmark; -use Flow\ETL\PHP\Type\TypeDetector; +use function Flow\ETL\DSL\get_type; use PhpBench\Attributes\Groups; use PhpBench\Attributes\ParamProviders; @@ -12,7 +12,7 @@ final class TypeDetectorBench #[ParamProviders('provideRows')] public function bench_type_detector(array $params) : void { - (new TypeDetector())->detectType($params['data']); + get_type($params['data']); } public function provideRows() : \Generator diff --git a/src/core/etl/tests/Flow/ETL/Tests/Integration/PHP/Type/CasterTest.php b/src/core/etl/tests/Flow/ETL/Tests/Integration/PHP/Type/CasterTest.php new file mode 100644 index 000000000..20a490cdc --- /dev/null +++ b/src/core/etl/tests/Flow/ETL/Tests/Integration/PHP/Type/CasterTest.php @@ -0,0 +1,82 @@ +assertSame( + '{"items":{"item":1}}', + (Caster::default())->to(type_json())->value(['items' => ['item' => 1]]) + ); + } + + public function test_casting_string_to_datetime() : void + { + $this->assertSame( + '2021-01-01 00:00:00.000000', + (Caster::default())->to(type_datetime())->value('2021-01-01 00:00:00 UTC')->format('Y-m-d H:i:s.u') + ); + } + + public function test_casting_string_to_uuid() : void + { + $this->assertEquals( + new Uuid('6c2f6e0e-8d8e-4e9e-8f0e-5a2d9c1c4f6e'), + (Caster::default())->to(type_uuid())->value('6c2f6e0e-8d8e-4e9e-8f0e-5a2d9c1c4f6e') + ); + } + + public function test_casting_string_to_xml() : void + { + $this->assertSame( + '' . "\n" . '1' . "\n", + (Caster::default())->to(type_xml())->value('1')->saveXML() + ); + } + + public function test_casting_to_boolean() : void + { + $this->assertTrue( + (Caster::default())->to(type_boolean())->value('true') + ); + } + + public function test_casting_to_integer() : void + { + $this->assertSame( + 1, + (Caster::default())->to(type_integer())->value('1') + ); + } + + public function test_casting_to_string() : void + { + $this->assertSame( + '1', + (Caster::default())->to(type_string())->value(1) + ); + } + + public function test_casting_values_to_null() : void + { + $this->assertNull( + (Caster::default())->to(type_null())->value('qweqwqw') + ); + } +} diff --git a/src/core/etl/tests/Flow/ETL/Tests/Unit/PHP/Type/AutoCasterTest.php b/src/core/etl/tests/Flow/ETL/Tests/Unit/PHP/Type/AutoCasterTest.php new file mode 100644 index 000000000..b7de528b6 --- /dev/null +++ b/src/core/etl/tests/Flow/ETL/Tests/Unit/PHP/Type/AutoCasterTest.php @@ -0,0 +1,20 @@ +assertSame( + [1.0, 2.0, 3.0], + (new AutoCaster(Caster::default()))->cast([1, 2, 3.0]) + ); + } +} diff --git a/src/core/etl/tests/Flow/ETL/Tests/Unit/PHP/Type/Caster/ArrayCastingHandlerTest.php b/src/core/etl/tests/Flow/ETL/Tests/Unit/PHP/Type/Caster/ArrayCastingHandlerTest.php new file mode 100644 index 000000000..c4c6f205a --- /dev/null +++ b/src/core/etl/tests/Flow/ETL/Tests/Unit/PHP/Type/Caster/ArrayCastingHandlerTest.php @@ -0,0 +1,64 @@ +assertEquals( + [true], + (new ArrayCastingHandler())->value(true, type_array(), Caster::default()) + ); + } + + public function test_casting_datetime_to_array() : void + { + $this->assertEquals( + ['date' => '2021-01-01 00:00:00.000000', 'timezone_type' => 3, 'timezone' => 'UTC'], + (new ArrayCastingHandler())->value(new \DateTimeImmutable('2021-01-01 00:00:00 UTC'), type_array(), Caster::default()) + ); + } + + public function test_casting_float_to_array() : void + { + $this->assertEquals( + [1.1], + (new ArrayCastingHandler())->value(1.1, type_array(), Caster::default()) + ); + } + + public function test_casting_integer_to_array() : void + { + $this->assertEquals( + [1], + (new ArrayCastingHandler())->value(1, type_array(), Caster::default()) + ); + } + + public function test_casting_string_to_array() : void + { + $this->assertSame( + ['items' => ['item' => 1]], + (new ArrayCastingHandler())->value('{"items":{"item":1}}', type_array(), Caster::default()) + ); + } + + public function test_casting_xml_document_to_array() : void + { + $xml = new \DOMDocument(); + $xml->loadXML($xmlString = 'bar'); + + $this->assertSame( + ['root' => ['foo' => ['@attributes' => ['baz' => 'buz'], '@value' => 'bar']]], + (new ArrayCastingHandler())->value($xml, type_array(), Caster::default()) + ); + } +} diff --git a/src/core/etl/tests/Flow/ETL/Tests/Unit/PHP/Type/Caster/BooleanCastingHandlerTest.php b/src/core/etl/tests/Flow/ETL/Tests/Unit/PHP/Type/Caster/BooleanCastingHandlerTest.php new file mode 100644 index 000000000..b333fbe41 --- /dev/null +++ b/src/core/etl/tests/Flow/ETL/Tests/Unit/PHP/Type/Caster/BooleanCastingHandlerTest.php @@ -0,0 +1,40 @@ + ['string', true]; + yield 'string true' => ['true', true]; + yield 'string 1' => ['1', true]; + yield 'string yes' => ['yes', true]; + yield 'string on' => ['on', true]; + yield 'string false' => ['false', false]; + yield 'string 0' => ['0', false]; + yield 'string no' => ['no', false]; + yield 'string off' => ['off', false]; + yield 'int' => [1, true]; + yield 'float' => [1.1, true]; + yield 'bool' => [true, true]; + yield 'array' => [[1, 2, 3], true]; + yield 'DateTimeInterface' => [new \DateTimeImmutable('2021-01-01 00:00:00'), true]; + yield 'DateInterval' => [new \DateInterval('P1D'), true]; + yield 'DOMDocument' => [new \DOMDocument(), true]; + } + + #[DataProvider('boolean_castable_data_provider')] + public function test_casting_different_data_types_to_integer(mixed $value, bool $expected) : void + { + $this->assertSame($expected, (new BooleanCastingHandler())->value($value, type_boolean(), Caster::default())); + } +} diff --git a/src/core/etl/tests/Flow/ETL/Tests/Unit/PHP/Type/Caster/DateTimeCastingHandlerTest.php b/src/core/etl/tests/Flow/ETL/Tests/Unit/PHP/Type/Caster/DateTimeCastingHandlerTest.php new file mode 100644 index 000000000..b2ce75276 --- /dev/null +++ b/src/core/etl/tests/Flow/ETL/Tests/Unit/PHP/Type/Caster/DateTimeCastingHandlerTest.php @@ -0,0 +1,30 @@ + ['2021-01-01 00:00:00', new \DateTimeImmutable('2021-01-01 00:00:00')]; + yield 'int' => [1609459200, new \DateTimeImmutable('2021-01-01 00:00:00')]; + yield 'float' => [1609459200.0, new \DateTimeImmutable('2021-01-01 00:00:00')]; + yield 'bool' => [true, new \DateTimeImmutable('1970-01-01 00:00:01')]; + yield 'DateTimeInterface' => [new \DateTimeImmutable('2021-01-01 00:00:00'), new \DateTimeImmutable('2021-01-01 00:00:00')]; + yield 'DateInterval' => [new \DateInterval('P1D'), new \DateTimeImmutable('1970-01-02 00:00:00')]; + } + + #[DataProvider('datetime_castable_data_provider')] + public function test_casting_different_data_types_to_datetime(mixed $value, \DateTimeImmutable $expected) : void + { + $this->assertEquals($expected, (new DateTimeCastingHandler())->value($value, type_datetime(), Caster::default())); + } +} diff --git a/src/core/etl/tests/Flow/ETL/Tests/Unit/PHP/Type/Caster/EnumCastingHandlerTest.php b/src/core/etl/tests/Flow/ETL/Tests/Unit/PHP/Type/Caster/EnumCastingHandlerTest.php new file mode 100644 index 000000000..fccb309b1 --- /dev/null +++ b/src/core/etl/tests/Flow/ETL/Tests/Unit/PHP/Type/Caster/EnumCastingHandlerTest.php @@ -0,0 +1,31 @@ +expectException(CastingException::class); + $this->expectExceptionMessage('Can\'t cast "integer" into "enum" type'); + + (new EnumCastingHandler())->value(1, type_enum(ColorsEnum::class), Caster::default()); + } + + public function test_casting_string_to_enum() : void + { + $this->assertEquals( + ColorsEnum::RED, + (new EnumCastingHandler())->value('red', type_enum(ColorsEnum::class), Caster::default()) + ); + } +} diff --git a/src/core/etl/tests/Flow/ETL/Tests/Unit/PHP/Type/Caster/Fixtures/ColorsEnum.php b/src/core/etl/tests/Flow/ETL/Tests/Unit/PHP/Type/Caster/Fixtures/ColorsEnum.php new file mode 100644 index 000000000..18a99a718 --- /dev/null +++ b/src/core/etl/tests/Flow/ETL/Tests/Unit/PHP/Type/Caster/Fixtures/ColorsEnum.php @@ -0,0 +1,12 @@ + ['string', 0.0]; + yield 'int' => [1, 1.0]; + yield 'float' => [1.1, 1.1]; + yield 'bool' => [true, 1.0]; + yield 'array' => [[1, 2, 3], 1.0]; + yield 'DateTimeInterface' => [new \DateTimeImmutable('2021-01-01 00:00:00'), 1609459200000000.0]; + yield 'DateInterval' => [new \DateInterval('P1D'), 86400000000.0]; + } + + #[DataProvider('float_castable_data_provider')] + public function test_casting_different_data_types_to_float(mixed $value, float $expected) : void + { + $this->assertSame($expected, (new FloatCastingHandler())->value($value, type_float(), Caster::default())); + } +} diff --git a/src/core/etl/tests/Flow/ETL/Tests/Unit/PHP/Type/Caster/IntegerCastingHandlerTest.php b/src/core/etl/tests/Flow/ETL/Tests/Unit/PHP/Type/Caster/IntegerCastingHandlerTest.php new file mode 100644 index 000000000..ac1560d43 --- /dev/null +++ b/src/core/etl/tests/Flow/ETL/Tests/Unit/PHP/Type/Caster/IntegerCastingHandlerTest.php @@ -0,0 +1,31 @@ + ['string', 0]; + yield 'int' => [1, 1]; + yield 'float' => [1.1, 1]; + yield 'bool' => [true, 1]; + yield 'array' => [[1, 2, 3], 1]; + yield 'DateTimeInterface' => [new \DateTimeImmutable('2021-01-01 00:00:00'), 1609459200000000]; + yield 'DateInterval' => [new \DateInterval('P1D'), 86400000000]; + } + + #[DataProvider('integer_castable_data_provider')] + public function test_casting_different_data_types_to_integer(mixed $value, int $expected) : void + { + $this->assertSame($expected, (new IntegerCastingHandler())->value($value, type_integer(), Caster::default())); + } +} diff --git a/src/core/etl/tests/Flow/ETL/Tests/Unit/PHP/Type/Caster/JsonCastingHandlerTest.php b/src/core/etl/tests/Flow/ETL/Tests/Unit/PHP/Type/Caster/JsonCastingHandlerTest.php new file mode 100644 index 000000000..ca02ed1e2 --- /dev/null +++ b/src/core/etl/tests/Flow/ETL/Tests/Unit/PHP/Type/Caster/JsonCastingHandlerTest.php @@ -0,0 +1,54 @@ +assertSame( + '{"items":{"item":1}}', + (new JsonCastingHandler())->value(['items' => ['item' => 1]], type_json(), Caster::default()) + ); + } + + public function test_casting_datetime_to_json() : void + { + $this->assertSame( + '{"date":"2021-01-01 00:00:00.000000","timezone_type":3,"timezone":"UTC"}', + (new JsonCastingHandler())->value(new \DateTimeImmutable('2021-01-01 00:00:00 UTC'), type_json(), Caster::default()) + ); + } + + public function test_casting_integer_to_json() : void + { + $this->expectException(CastingException::class); + $this->expectExceptionMessage('Can\'t cast "integer" into "json" type'); + + (new JsonCastingHandler())->value(1, type_json(), Caster::default()); + } + + public function test_casting_json_string_to_json() : void + { + $this->assertSame( + '{"items":{"item":1}}', + (new JsonCastingHandler())->value('{"items":{"item":1}}', type_json(), Caster::default()) + ); + } + + public function test_casting_non_json_string_to_json() : void + { + $this->expectException(CastingException::class); + $this->expectExceptionMessage('Can\'t cast "string" into "json" type'); + + (new JsonCastingHandler())->value('string', type_json(), Caster::default()); + } +} diff --git a/src/core/etl/tests/Flow/ETL/Tests/Unit/PHP/Type/Caster/ListCastingHandlerTest.php b/src/core/etl/tests/Flow/ETL/Tests/Unit/PHP/Type/Caster/ListCastingHandlerTest.php new file mode 100644 index 000000000..e8a296a5a --- /dev/null +++ b/src/core/etl/tests/Flow/ETL/Tests/Unit/PHP/Type/Caster/ListCastingHandlerTest.php @@ -0,0 +1,31 @@ +assertSame( + [1.0, 2.0, 3.0], + (new ListCastingHandler())->value([1, 2, 3], type_list(type_float()), Caster::default()) + ); + } + + public function test_casting_string_to_list_of_ints() : void + { + $this->assertSame( + [1], + (new ListCastingHandler())->value(['1'], type_list(type_int()), Caster::default()) + ); + } +} diff --git a/src/core/etl/tests/Flow/ETL/Tests/Unit/PHP/Type/Caster/MapCastingHandlerTest.php b/src/core/etl/tests/Flow/ETL/Tests/Unit/PHP/Type/Caster/MapCastingHandlerTest.php new file mode 100644 index 000000000..3537d4e8c --- /dev/null +++ b/src/core/etl/tests/Flow/ETL/Tests/Unit/PHP/Type/Caster/MapCastingHandlerTest.php @@ -0,0 +1,55 @@ +assertSame( + [ + 'a' => 1.0, + 'b' => 2.0, + 'c' => 3.0, + ], + (new MapCastingHandler())->value(['a' => 1, 'b' => 2, 'c' => 3], type_map(type_string(), type_float()), Caster::default()) + ); + } + + public function test_casting_map_of_string_to_ints_into_map_of_int_to_float() : void + { + $this->expectException(CastingException::class); + $this->expectExceptionMessage('Can\'t cast "array" into "map"'); + + $this->assertSame( + [ + 'a' => 1.0, + 'b' => 2.0, + 'c' => 3.0, + ], + (new MapCastingHandler())->value(['a' => 1, 'b' => 2, 'c' => 3], type_map(type_int(), type_float()), Caster::default()) + ); + } + + public function test_casting_scalar_to_map() : void + { + $this->assertSame( + [ + '0' => 2, + ], + (new MapCastingHandler())->value('2', type_map(type_string(), type_integer()), Caster::default()) + ); + } +} diff --git a/src/core/etl/tests/Flow/ETL/Tests/Unit/PHP/Type/Caster/ObjectCastingHandlerTest.php b/src/core/etl/tests/Flow/ETL/Tests/Unit/PHP/Type/Caster/ObjectCastingHandlerTest.php new file mode 100644 index 000000000..343140b70 --- /dev/null +++ b/src/core/etl/tests/Flow/ETL/Tests/Unit/PHP/Type/Caster/ObjectCastingHandlerTest.php @@ -0,0 +1,25 @@ +assertEquals( + (object) ['foo' => 'bar'], + (new ObjectCastingHandler())->value((object) ['foo' => 'bar'], type_object(\stdClass::class), Caster::default()) + ); + $this->assertInstanceOf( + \stdClass::class, + (new ObjectCastingHandler())->value((object) ['foo' => 'bar'], type_object(\stdClass::class), Caster::default()) + ); + } +} diff --git a/src/core/etl/tests/Flow/ETL/Tests/Unit/Row/Factory/StringTypeCheckerTest.php b/src/core/etl/tests/Flow/ETL/Tests/Unit/PHP/Type/Caster/StringCastingHandler/StringTypeCheckerTest.php similarity index 90% rename from src/core/etl/tests/Flow/ETL/Tests/Unit/Row/Factory/StringTypeCheckerTest.php rename to src/core/etl/tests/Flow/ETL/Tests/Unit/PHP/Type/Caster/StringCastingHandler/StringTypeCheckerTest.php index 0f02050e1..dd690feb2 100644 --- a/src/core/etl/tests/Flow/ETL/Tests/Unit/Row/Factory/StringTypeCheckerTest.php +++ b/src/core/etl/tests/Flow/ETL/Tests/Unit/PHP/Type/Caster/StringCastingHandler/StringTypeCheckerTest.php @@ -2,9 +2,9 @@ declare(strict_types=1); -namespace Flow\ETL\Tests\Unit\Row\Factory; +namespace Flow\ETL\Tests\Unit\PHP\Type\Caster\StringCastingHandler; -use Flow\ETL\Row\Factory\StringTypeChecker; +use Flow\ETL\PHP\Type\Caster\StringCastingHandler\StringTypeChecker; use PHPUnit\Framework\TestCase; final class StringTypeCheckerTest extends TestCase @@ -13,6 +13,10 @@ public function test_detecting_boolean() : void { $this->assertTrue((new StringTypeChecker('true'))->isBoolean()); $this->assertTrue((new StringTypeChecker('false'))->isBoolean()); + $this->assertTrue((new StringTypeChecker('yes'))->isBoolean()); + $this->assertTrue((new StringTypeChecker('no'))->isBoolean()); + $this->assertTrue((new StringTypeChecker('on'))->isBoolean()); + $this->assertTrue((new StringTypeChecker('off'))->isBoolean()); $this->assertFalse((new StringTypeChecker('0'))->isBoolean()); $this->assertFalse((new StringTypeChecker('not bool'))->isBoolean()); } diff --git a/src/core/etl/tests/Flow/ETL/Tests/Unit/PHP/Type/Caster/StringCastingHandlerTest.php b/src/core/etl/tests/Flow/ETL/Tests/Unit/PHP/Type/Caster/StringCastingHandlerTest.php new file mode 100644 index 000000000..efe5a2287 --- /dev/null +++ b/src/core/etl/tests/Flow/ETL/Tests/Unit/PHP/Type/Caster/StringCastingHandlerTest.php @@ -0,0 +1,46 @@ + ['string', 'string']; + yield 'int' => [1, '1']; + yield 'float' => [1.1, '1.1']; + yield 'bool' => [true, 'true']; + yield 'array' => [[1, 2, 3], '[1,2,3]']; + yield 'DateTimeInterface' => [new \DateTimeImmutable('2021-01-01 00:00:00'), '2021-01-01T00:00:00+00:00']; + yield 'Stringable' => [new class() implements \Stringable { + public function __toString() : string + { + return 'stringable'; + } + }, 'stringable']; + yield 'DOMDocument' => [new \DOMDocument(), '']; + } + + #[DataProvider('string_castable_data_provider')] + public function test_casting_different_data_types_to_string(mixed $value, string $expected) : void + { + $this->assertSame($expected, \trim((new StringCastingHandler())->value($value, type_string(), Caster::default()))); + } + + public function test_casting_object_to_string() : void + { + $this->expectException(CastingException::class); + $this->expectExceptionMessage('Can\'t cast "object" into "string" type'); + + (new StringCastingHandler())->value(new class() {}, type_string(), Caster::default()); + } +} diff --git a/src/core/etl/tests/Flow/ETL/Tests/Unit/PHP/Type/Caster/StructureCastingHandlerTest.php b/src/core/etl/tests/Flow/ETL/Tests/Unit/PHP/Type/Caster/StructureCastingHandlerTest.php new file mode 100644 index 000000000..e9728d63b --- /dev/null +++ b/src/core/etl/tests/Flow/ETL/Tests/Unit/PHP/Type/Caster/StructureCastingHandlerTest.php @@ -0,0 +1,115 @@ +assertSame( + [ + 'name' => 'Norbert Orzechowicz', + 'age' => 30, + 'address' => [ + 'street' => 'Polna', + 'city' => 'Warsaw', + ], + ], + (new StructureCastingHandler())->value( + [ + 'name' => 'Norbert Orzechowicz', + 'age' => 30, + 'address' => [ + 'street' => 'Polna', + 'city' => 'Warsaw', + ], + ], + struct_type([ + structure_element('name', type_string()), + structure_element('age', type_integer()), + structure_element( + 'address', + structure_type([ + structure_element('street', type_string()), + structure_element('city', type_string()), + ]) + ), + ]), + Caster::default() + ) + ); + } + + public function test_casting_structure_with_empty_not_nullable_fields() : void + { + $this->assertSame( + [ + 'name' => 'Norbert Orzechowicz', + 'age' => 30, + 'address' => [ + 'street' => null, + 'city' => null, + ], + ], + (new StructureCastingHandler())->value( + [ + 'name' => 'Norbert Orzechowicz', + 'age' => 30, + 'address' => [], + ], + struct_type([ + structure_element('name', type_string()), + structure_element('age', type_integer()), + structure_element( + 'address', + structure_type([ + structure_element('street', type_string(true)), + structure_element('city', type_string(true)), + ]) + ), + ]), + Caster::default() + ) + ); + } + + public function test_casting_structure_with_missing_nullable_fields() : void + { + $this->assertSame( + [ + 'name' => 'Norbert Orzechowicz', + 'age' => 30, + 'address' => null, + ], + (new StructureCastingHandler())->value( + [ + 'name' => 'Norbert Orzechowicz', + 'age' => 30, + ], + struct_type([ + structure_element('name', type_string()), + structure_element('age', type_integer()), + structure_element( + 'address', + structure_type([ + structure_element('street', type_string()), + structure_element('city', type_string()), + ], true) + ), + ], true), + Caster::default() + ) + ); + } +} diff --git a/src/core/etl/tests/Flow/ETL/Tests/Unit/PHP/Type/Caster/UuidCastingHandlerTest.php b/src/core/etl/tests/Flow/ETL/Tests/Unit/PHP/Type/Caster/UuidCastingHandlerTest.php new file mode 100644 index 000000000..bff84cb81 --- /dev/null +++ b/src/core/etl/tests/Flow/ETL/Tests/Unit/PHP/Type/Caster/UuidCastingHandlerTest.php @@ -0,0 +1,39 @@ +expectException(CastingException::class); + $this->expectExceptionMessage('Can\'t cast "integer" into "uuid" type'); + + (new UuidCastingHandler())->value(1, type_uuid(), Caster::default()); + } + + public function test_casting_ramsey_uuid_to_uuid() : void + { + $this->assertEquals( + new Uuid('6c2f6e0e-8d8e-4e9e-8f0e-5a2d9c1c4f6e'), + (new UuidCastingHandler())->value(\Ramsey\Uuid\Uuid::fromString('6c2f6e0e-8d8e-4e9e-8f0e-5a2d9c1c4f6e'), type_uuid(), Caster::default()) + ); + } + + public function test_casting_string_to_uuid() : void + { + $this->assertEquals( + new Uuid('6c2f6e0e-8d8e-4e9e-8f0e-5a2d9c1c4f6e'), + (new UuidCastingHandler())->value('6c2f6e0e-8d8e-4e9e-8f0e-5a2d9c1c4f6e', type_uuid(), Caster::default()) + ); + } +} diff --git a/src/core/etl/tests/Flow/ETL/Tests/Unit/PHP/Type/Caster/XMLCastingHandlerTest.php b/src/core/etl/tests/Flow/ETL/Tests/Unit/PHP/Type/Caster/XMLCastingHandlerTest.php new file mode 100644 index 000000000..8a19b8f48 --- /dev/null +++ b/src/core/etl/tests/Flow/ETL/Tests/Unit/PHP/Type/Caster/XMLCastingHandlerTest.php @@ -0,0 +1,30 @@ +expectException(CastingException::class); + $this->expectExceptionMessage('Can\'t cast "integer" into "xml" type'); + + (new XMLCastingHandler())->value(1, type_xml(), Caster::default())->saveXML(); + } + + public function test_casting_string_to_xml() : void + { + $this->assertSame( + '' . "\n" . '1' . "\n", + (new XMLCastingHandler())->value('1', type_xml(), Caster::default())->saveXML() + ); + } +} diff --git a/src/core/etl/tests/Flow/ETL/Tests/Unit/Row/Factory/NativeEntryFactoryTest.php b/src/core/etl/tests/Flow/ETL/Tests/Unit/Row/Factory/NativeEntryFactoryTest.php index 38972fe73..6a103f097 100644 --- a/src/core/etl/tests/Flow/ETL/Tests/Unit/Row/Factory/NativeEntryFactoryTest.php +++ b/src/core/etl/tests/Flow/ETL/Tests/Unit/Row/Factory/NativeEntryFactoryTest.php @@ -24,6 +24,7 @@ use function Flow\ETL\DSL\type_string; use function Flow\ETL\DSL\uuid_entry; use function Flow\ETL\DSL\xml_entry; +use Flow\ETL\Exception\CastingException; use Flow\ETL\Exception\InvalidArgumentException; use Flow\ETL\PHP\Type\Logical\List\ListElement; use Flow\ETL\PHP\Type\Logical\ListType; @@ -103,15 +104,6 @@ public function test_boolean_with_schema() : void ); } - public function test_conversion_to_different_type_with_schema() : void - { - $this->expectException(InvalidArgumentException::class); - $this->expectExceptionMessage("Field \"e\" conversion exception. Flow\ETL\DSL\str_entry(): Argument #2 (\$value) must be of type string, int given, called in"); - - (new NativeEntryFactory()) - ->create('e', 1, new Schema(Schema\Definition::string('e'))); - } - public function test_datetime() : void { $this->assertEquals( @@ -152,14 +144,14 @@ public function test_enum_from_string_with_schema() : void $this->assertEquals( enum_entry('e', BackedIntEnum::one), (new NativeEntryFactory()) - ->create('e', 'one', new Schema(Schema\Definition::enum('e', BackedIntEnum::class))) + ->create('e', 1, new Schema(Schema\Definition::enum('e', BackedIntEnum::class))) ); } public function test_enum_invalid_value_with_schema() : void { - $this->expectException(InvalidArgumentException::class); - $this->expectExceptionMessage("Value \"invalid\" can't be converted to " . BackedIntEnum::class . ' enum'); + $this->expectException(CastingException::class); + $this->expectExceptionMessage("Can't cast \"string\" into \"enum\" type"); (new NativeEntryFactory()) ->create('e', 'invalid', new Schema(Schema\Definition::enum('e', BackedIntEnum::class))); @@ -255,10 +247,10 @@ public function test_list_int_with_schema() : void public function test_list_int_with_schema_but_string_list() : void { - $this->expectException(InvalidArgumentException::class); - $this->expectExceptionMessage('Field "e" conversion exception. Expected list got different types: list'); - - (new NativeEntryFactory())->create('e', ['1', '2', '3'], new Schema(Schema\Definition::list('e', new ListType(ListElement::integer())))); + $this->assertEquals( + list_entry('e', ['false', 'true', 'true'], type_list(type_string())), + (new NativeEntryFactory())->create('e', [false, true, true], new Schema(Schema\Definition::list('e', new ListType(ListElement::string())))) + ); } public function test_list_of_datetime_with_schema() : void diff --git a/src/core/etl/tests/Flow/ETL/Tests/Unit/Row/Schema/DefinitionTest.php b/src/core/etl/tests/Flow/ETL/Tests/Unit/Row/Schema/DefinitionTest.php index aaa285be4..43ba6c415 100644 --- a/src/core/etl/tests/Flow/ETL/Tests/Unit/Row/Schema/DefinitionTest.php +++ b/src/core/etl/tests/Flow/ETL/Tests/Unit/Row/Schema/DefinitionTest.php @@ -179,6 +179,14 @@ public function test_merging_different_entries() : void Definition::integer('int')->merge(Definition::string('string')); } + public function test_merging_list_of_ints_and_floats() : void + { + $this->assertEquals( + Definition::list('list', type_list(type_float())), + Definition::list('list', type_list(type_int()))->merge(Definition::list('list', type_list(type_float()))) + ); + } + public function test_merging_numeric_types() : void { $this->assertEquals( diff --git a/src/core/etl/tests/Flow/ETL/Tests/Unit/Transformer/AutoCastTransformerTest.php b/src/core/etl/tests/Flow/ETL/Tests/Unit/Transformer/AutoCastTransformerTest.php index 1830c52da..56b8e5b84 100644 --- a/src/core/etl/tests/Flow/ETL/Tests/Unit/Transformer/AutoCastTransformerTest.php +++ b/src/core/etl/tests/Flow/ETL/Tests/Unit/Transformer/AutoCastTransformerTest.php @@ -6,6 +6,8 @@ use function Flow\ETL\DSL\array_to_rows; use function Flow\ETL\DSL\flow_context; +use Flow\ETL\PHP\Type\AutoCaster; +use Flow\ETL\PHP\Type\Caster; use Flow\ETL\Transformer\AutoCastTransformer; use PHPUnit\Framework\TestCase; @@ -13,7 +15,7 @@ final class AutoCastTransformerTest extends TestCase { public function test_transforming_row() : void { - $transformer = new AutoCastTransformer(); + $transformer = new AutoCastTransformer(new AutoCaster(Caster::default())); $rows = array_to_rows([ [