Skip to content

Commit

Permalink
Added Schema::match() with strict/evolving matchers (#1027)
Browse files Browse the repository at this point in the history
  • Loading branch information
norberttech authored Mar 28, 2024
1 parent fd4671d commit 8d16fcd
Show file tree
Hide file tree
Showing 9 changed files with 368 additions and 3 deletions.
12 changes: 11 additions & 1 deletion src/core/etl/src/Flow/ETL/DSL/functions.php
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
use Flow\ETL\PHP\Type\{Type, TypeDetector};
use Flow\ETL\Row\Factory\NativeEntryFactory;
use Flow\ETL\Row\Schema\Formatter\ASCIISchemaFormatter;
use Flow\ETL\Row\Schema\{Definition, SchemaFormatter};
use Flow\ETL\Row\Schema\{Definition, Matcher\EvolvingSchemaMatcher, Matcher\StrictSchemaMatcher, SchemaFormatter};
use Flow\ETL\Row\{EntryFactory, EntryReference, Reference, References, Schema};
use Flow\ETL\{Config, ConfigBuilder, DataFrame, Extractor, Flow, FlowContext, Formatter, Loader, Partition, Pipeline, Row, Rows, Transformer, Window};

Expand Down Expand Up @@ -974,6 +974,16 @@ function schema_from_json(string $schema) : Schema
return Schema::fromArray(\json_decode($schema, true, 512, JSON_THROW_ON_ERROR));
}

function schema_strict_matcher() : StrictSchemaMatcher
{
return new StrictSchemaMatcher();
}

function schema_evolving_matcher() : EvolvingSchemaMatcher
{
return new EvolvingSchemaMatcher();
}

function int_schema(string $name, bool $nullable = false, ?Schema\Metadata $metadata = null) : Definition
{
return Definition::integer($name, $nullable, $metadata);
Expand Down
7 changes: 6 additions & 1 deletion src/core/etl/src/Flow/ETL/Row/Schema.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
namespace Flow\ETL\Row;

use Flow\ETL\Exception\{InvalidArgumentException, SchemaDefinitionNotFoundException, SchemaDefinitionNotUniqueException};
use Flow\ETL\Row\Schema\Definition;
use Flow\ETL\Row\Schema\{Definition, Matcher\StrictSchemaMatcher, SchemaMatcher};

final class Schema implements \Countable
{
Expand Down Expand Up @@ -136,6 +136,11 @@ public function keep(string|Reference ...$entries) : self
return $this;
}

public function matches(self $schema, SchemaMatcher $matcher = new StrictSchemaMatcher()) : bool
{
return $matcher->match($this, $schema);
}

public function merge(self $schema) : self
{
$newDefinitions = $this->definitions;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
<?php

declare(strict_types=1);

namespace Flow\ETL\Row\Schema\Matcher;

use Flow\ETL\Row\Schema;
use Flow\ETL\Row\Schema\SchemaMatcher;

final class EvolvingSchemaMatcher implements SchemaMatcher
{
/**
* Rules of evolving schema matching:
* - if schemas are the same, return true
* - if right schema has less fields than left schema, return false
* - if right schema is making a nullable field non-nullable, return false
* - if right schema is making a non-nullable field nullable, return true
* - if right schema is changing the type of a field, return false
* - if right schema is adding a field, return true
*/
public function match(Schema $left, Schema $right) : bool
{
if ($right->count() < $left->count()) {
return false;
}

foreach ($right->definitions() as $rightDefinition) {
$leftDefinition = $left->findDefinition($rightDefinition->entry());

if ($leftDefinition === null) {
if ($right->count() === $left->count()) {
return false;
}

continue;
}

if (!$rightDefinition->isNullable() && $leftDefinition->isNullable()) {
return false;
}

// making both sides nullable to compare just types of the fields
if (!$rightDefinition->type()->makeNullable(true)->isEqual($leftDefinition->type()->makeNullable(true))) {
return false;
}
}

return true;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
<?php

declare(strict_types=1);

namespace Flow\ETL\Row\Schema\Matcher;

use Flow\ETL\Row\Schema;
use Flow\ETL\Row\Schema\SchemaMatcher;

final class StrictSchemaMatcher implements SchemaMatcher
{
public function match(Schema $left, Schema $right) : bool
{
if (\count($left->definitions()) !== \count($right->definitions())) {
return false;
}

foreach ($left->definitions() as $leftDefinition) {
$rightDefinition = $right->findDefinition($leftDefinition->entry());

if ($rightDefinition === null) {
return false;
}

if (!$leftDefinition->isEqual($rightDefinition)) {
return false;
}
}

return true;
}
}
12 changes: 12 additions & 0 deletions src/core/etl/src/Flow/ETL/Row/Schema/SchemaMatcher.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
<?php

declare(strict_types=1);

namespace Flow\ETL\Row\Schema;

use Flow\ETL\Row\Schema;

interface SchemaMatcher
{
public function match(Schema $left, Schema $right) : bool;
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,22 @@ public function test_creating_definition_without_class() : void
new Definition('name', \DateTimeInterface::class, type_datetime());
}

public function test_equals_nullability() : void
{
$def = Definition::integer('id', nullable: true);

self::assertFalse(
$def->isEqual(
Definition::integer('id', nullable: false)
)
);
self::assertTrue(
$def->isEqual(
Definition::integer('id', nullable: true)
)
);
}

public function test_equals_types() : void
{
$def = Definition::list('list', new ListType(ListElement::integer()));
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
<?php

declare(strict_types=1);

namespace Flow\ETL\Tests\Unit\Row\Schema\Matcher;

use function Flow\ETL\DSL\{bool_schema, int_schema, schema, str_schema};
use Flow\ETL\Row\Schema\Matcher\EvolvingSchemaMatcher;
use PHPUnit\Framework\TestCase;

final class EvolvingSchemaMatcherTest extends TestCase
{
public function test_right_having_less_definitions_than_left() : void
{
$left = schema(
int_schema('id'),
str_schema('name'),
);

$right = schema(
int_schema('id'),
);

self::assertFalse((new EvolvingSchemaMatcher())->match($left, $right));
}

public function test_right_having_same_number_of_definitions_but_different_names() : void
{
$left = schema(
int_schema('id'),
str_schema('name'),
);

$right = schema(
int_schema('id'),
str_schema('surname'),
);

self::assertFalse((new EvolvingSchemaMatcher())->match($left, $right));
}

public function test_right_schema_adding_new_field() : void
{
$left = schema(
int_schema('id'),
str_schema('name'),
);

$right = schema(
int_schema('id'),
str_schema('name'),
bool_schema('active'),
);

self::assertTrue((new EvolvingSchemaMatcher())->match($left, $right));
}

public function test_right_schema_changing_nullable_field_to_non_nullable() : void
{
$left = schema(
int_schema('id'),
str_schema('name', nullable: true),
);

$right = schema(
int_schema('id'),
str_schema('name'),
);

self::assertFalse((new EvolvingSchemaMatcher())->match($left, $right));
}

public function test_right_schema_changing_type_of_field() : void
{
$left = schema(
int_schema('id'),
str_schema('name'),
);

$right = schema(
int_schema('id'),
bool_schema('name'),
);

self::assertFalse((new EvolvingSchemaMatcher())->match($left, $right));
}

public function test_right_schema_is_the_same_as_left_schema() : void
{
$left = schema(
int_schema('id'),
str_schema('name'),
);

$right = schema(
int_schema('id'),
str_schema('name'),
);

self::assertTrue((new EvolvingSchemaMatcher())->match($left, $right));
}

public function test_right_schema_making_non_nullable_field_into_nullable() : void
{
$left = schema(
int_schema('id'),
str_schema('name'),
);

$right = schema(
int_schema('id'),
str_schema('name', nullable: true),
);

self::assertTrue((new EvolvingSchemaMatcher())->match($left, $right));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
<?php

declare(strict_types=1);

namespace Flow\ETL\Tests\Unit\Row\Schema\Matcher;

use function Flow\ETL\DSL\{int_schema, schema, str_schema};
use Flow\ETL\Row\Schema\Matcher\StrictSchemaMatcher;
use PHPUnit\Framework\TestCase;

final class StrictSchemaMatcherTest extends TestCase
{
public function test_matching_different_schemas() : void
{
$left = schema(
str_schema('id'),
str_schema('name'),
);

$right = schema(
str_schema('id'),
str_schema('name'),
int_schema('age'),
);

self::assertFalse((new StrictSchemaMatcher())->match($left, $right));
}

public function test_matching_same_number_of_definitions_but_different_names() : void
{
$left = schema(
str_schema('id'),
str_schema('name'),
);

$right = schema(
str_schema('id'),
str_schema('surname'),
);

self::assertFalse((new StrictSchemaMatcher())->match($left, $right));
}

public function test_matching_schemas_with_different_nullable_fields() : void
{
$left = schema(
str_schema('id'),
str_schema('name', nullable: true),
);

$right = schema(
str_schema('id'),
str_schema('name'),
);

self::assertFalse((new StrictSchemaMatcher())->match($left, $right));
}

public function test_matching_the_same_schema() : void
{
$left = schema(
str_schema('id'),
str_schema('name'),
);

$right = schema(
str_schema('id'),
str_schema('name'),
);

self::assertTrue((new StrictSchemaMatcher())->match($left, $right));
}
}
Loading

0 comments on commit 8d16fcd

Please sign in to comment.