Skip to content

Commit

Permalink
Add UniqueFactory for creating random string or int from given range (#…
Browse files Browse the repository at this point in the history
…1128)

* Add UniqueFactory for creating random string or int from given range

* CS fixes

Co-authored-by: Joseph Bielawski <[email protected]>

* Rename to RandomGenerator, introduce interface

* Fix missing import after move

* Use functions instead of static calls for tests requiring randomness

* Delete src/core/etl/tests/Flow/ETL/Tests/Integration/Cache/PSRSimpleCacheTest.php

* Use functions instead of static calls for tests requiring randomness

* Add scalar function for random string

---------

Co-authored-by: Joseph Bielawski <[email protected]>
  • Loading branch information
mleczakm and stloyd authored Jul 31, 2024
1 parent 042b5f8 commit 63ef619
Show file tree
Hide file tree
Showing 37 changed files with 421 additions and 166 deletions.
2 changes: 1 addition & 1 deletion docs/components/libs/snappy.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ $faker = \Faker\Factory::create();

$texts = [];
for ($i = 0; $i < 10_000; $i++) {
$textSize = \random_int(100, 5000);
$textSize = \Flow\ETL\UniqueFactory::int(100, 5000);
$texts[] = $faker->text($textSize);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
namespace Flow\ETL\Adapter\Elasticsearch\Tests\Integration\ElasticsearchPHP;

use function Flow\ETL\Adapter\Elasticsearch\{es_hits_to_rows, from_es, to_es_bulk_index};
use function Flow\ETL\DSL\df;
use function Flow\ETL\DSL\{df, generate_random_int};
use Flow\ETL\Adapter\Elasticsearch\ElasticsearchPHP\DocumentDataSource;
use Flow\ETL\Adapter\Elasticsearch\EntryIdFactory\EntryIdFactory;
use Flow\ETL\Adapter\Elasticsearch\Tests\Integration\TestCase;
Expand Down Expand Up @@ -40,7 +40,7 @@ public function test_empty_extraction() : void
new Row\Entry\StringEntry('id', \sha1((string) $i)),
new Row\Entry\IntegerEntry('position', $i),
new Row\Entry\StringEntry('name', 'id_' . $i),
new Row\Entry\BooleanEntry('active', (bool) \random_int(0, 1))
new Row\Entry\BooleanEntry('active', (bool) generate_random_int(0, 1))
),
\range(1, 100)
),
Expand Down Expand Up @@ -80,7 +80,7 @@ public function test_extraction_index_with_from_and_size() : void
new Row\Entry\StringEntry('id', \sha1((string) $i)),
new Row\Entry\IntegerEntry('position', $i),
new Row\Entry\StringEntry('name', 'id_' . $i),
new Row\Entry\BooleanEntry('active', (bool) \random_int(0, 1))
new Row\Entry\BooleanEntry('active', (bool) generate_random_int(0, 1))
),
\range(1, 2000)
),
Expand Down Expand Up @@ -123,7 +123,7 @@ public function test_extraction_index_with_search_after() : void
new Row\Entry\StringEntry('id', \sha1((string) $i)),
new Row\Entry\IntegerEntry('position', $i),
new Row\Entry\StringEntry('name', 'id_' . $i),
new Row\Entry\BooleanEntry('active', (bool) \random_int(0, 1))
new Row\Entry\BooleanEntry('active', (bool) generate_random_int(0, 1))
),
\range(1, 2005)
),
Expand Down Expand Up @@ -159,7 +159,7 @@ public function test_extraction_index_with_search_after_with_point_in_time() : v
new Row\Entry\StringEntry('id', \sha1((string) $i)),
new Row\Entry\IntegerEntry('position', $i),
new Row\Entry\StringEntry('name', 'id_' . $i),
new Row\Entry\BooleanEntry('active', (bool) \random_int(0, 1))
new Row\Entry\BooleanEntry('active', (bool) generate_random_int(0, 1))
),
\range(1, 2005)
),
Expand Down Expand Up @@ -200,7 +200,7 @@ public function test_extraction_whole_index_with_point_in_time() : void
new Row\Entry\StringEntry('id', \sha1((string) $i)),
new Row\Entry\IntegerEntry('position', $i),
new Row\Entry\StringEntry('name', 'id_' . $i),
new Row\Entry\BooleanEntry('active', (bool) \random_int(0, 1))
new Row\Entry\BooleanEntry('active', (bool) generate_random_int(0, 1))
),
\range(1, 2005)
),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
namespace Flow\ETL\Adapter\Elasticsearch\Tests\Integration\ElasticsearchPHP;

use function Flow\ETL\Adapter\Elasticsearch\{to_es_bulk_index, to_es_bulk_update};
use function Flow\ETL\DSL\generate_random_string;
use Flow\ETL\Adapter\Elasticsearch\EntryIdFactory\{EntryIdFactory, HashIdFactory};
use Flow\ETL\Adapter\Elasticsearch\Tests\Integration\TestCase;
use Flow\ETL\{Config, FlowContext, Row, Rows};
Expand Down Expand Up @@ -53,19 +54,19 @@ public function test_integration_with_entry_factory() : void

$loader->load(new Rows(
Row::create(
new Row\Entry\StringEntry('id', \sha1('id' . bin2hex(random_bytes(16)))),
new Row\Entry\StringEntry('id', \sha1('id' . generate_random_string())),
new Row\Entry\StringEntry('name', 'Łukasz')
),
Row::create(
new Row\Entry\StringEntry('id', \sha1('id' . bin2hex(random_bytes(16)))),
new Row\Entry\StringEntry('id', \sha1('id' . \Flow\ETL\DSL\generate_random_string())),
new Row\Entry\StringEntry('name', 'Norbert')
),
Row::create(
new Row\Entry\StringEntry('id', \sha1('id' . bin2hex(random_bytes(16)))),
new Row\Entry\StringEntry('id', \sha1('id' . \Flow\ETL\DSL\generate_random_string())),
new Row\Entry\StringEntry('name', 'Dawid')
),
Row::create(
new Row\Entry\StringEntry('id', \sha1('id' . bin2hex(random_bytes(16)))),
new Row\Entry\StringEntry('id', \sha1('id' . \Flow\ETL\DSL\generate_random_string())),
new Row\Entry\StringEntry('name', 'Tomek')
),
), new FlowContext(Config::default()));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
namespace Flow\ETL\Adapter\Meilisearch\Tests\Integration\MeilisearchPHP;

use function Flow\ETL\Adapter\Meilisearch\{from_meilisearch, meilisearch_hits_to_rows, to_meilisearch_bulk_index};
use function Flow\ETL\DSL\generate_random_int;
use Flow\ETL\Adapter\Meilisearch\Tests\Context\MeilisearchContext;
use Flow\ETL\{Config, Flow, FlowContext, Row, Rows};
use PHPUnit\Framework\TestCase;
Expand Down Expand Up @@ -37,7 +38,7 @@ public function test_empty_extraction() : void
new Row\Entry\StringEntry('id', \sha1((string) $i)),
new Row\Entry\IntegerEntry('position', $i),
new Row\Entry\StringEntry('name', 'id_' . $i),
new Row\Entry\BooleanEntry('active', (bool) \random_int(0, 1))
new Row\Entry\BooleanEntry('active', (bool) generate_random_int(0, 1))
),
\range(1, 100)
),
Expand All @@ -63,7 +64,7 @@ public function test_extraction_index_with_from_and_size() : void
new Row\Entry\StringEntry('id', \sha1((string) $i)),
new Row\Entry\IntegerEntry('position', $i),
new Row\Entry\StringEntry('name', 'id_' . $i),
new Row\Entry\BooleanEntry('active', (bool) \random_int(0, 1))
new Row\Entry\BooleanEntry('active', (bool) generate_random_int(0, 1))
),
// Default limit for Meilisearch is 1000 documents: https://www.meilisearch.com/docs/reference/api/settings#pagination
\range(1, 999)
Expand Down Expand Up @@ -102,7 +103,7 @@ public function test_extraction_index_with_sort() : void
new Row\Entry\StringEntry('id', \sha1((string) $i)),
new Row\Entry\IntegerEntry('position', $i),
new Row\Entry\StringEntry('name', 'id_' . $i),
new Row\Entry\BooleanEntry('active', (bool) \random_int(0, 1))
new Row\Entry\BooleanEntry('active', (bool) generate_random_int(0, 1))
),
// Default limit for Meilisearch is 1000 documents: https://www.meilisearch.com/docs/reference/api/settings#pagination
\range(1, 999)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,19 +43,19 @@ public function test_integration_with_entry_factory() : void
$loader = to_meilisearch_bulk_index($this->meilisearchContext->clientConfig(), self::INDEX_NAME);
$loader->load(new Rows(
Row::create(
new Row\Entry\StringEntry('id', \sha1('id' . bin2hex(random_bytes(16)))),
new Row\Entry\StringEntry('id', \sha1('id' . \Flow\ETL\DSL\generate_random_string())),
new Row\Entry\StringEntry('name', 'Łukasz')
),
Row::create(
new Row\Entry\StringEntry('id', \sha1('id' . bin2hex(random_bytes(16)))),
new Row\Entry\StringEntry('id', \sha1('id' . \Flow\ETL\DSL\generate_random_string())),
new Row\Entry\StringEntry('name', 'Norbert')
),
Row::create(
new Row\Entry\StringEntry('id', \sha1('id' . bin2hex(random_bytes(16)))),
new Row\Entry\StringEntry('id', \sha1('id' . \Flow\ETL\DSL\generate_random_string())),
new Row\Entry\StringEntry('name', 'Dawid')
),
Row::create(
new Row\Entry\StringEntry('id', \sha1('id' . bin2hex(random_bytes(16)))),
new Row\Entry\StringEntry('id', \sha1('id' . \Flow\ETL\DSL\generate_random_string())),
new Row\Entry\StringEntry('name', 'Tomek')
),
), new FlowContext(Config::default()));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ final class TextTest extends TestCase
{
public function test_loading_text_files() : void
{
$path = __DIR__ . '/var/flow_php_etl_csv_loader' . bin2hex(random_bytes(16)) . '.csv';
$path = __DIR__ . '/var/flow_php_etl_csv_loader' . \Flow\ETL\DSL\generate_random_string() . '.csv';

(new Flow())
->process(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ public function test_using_put_blob_with_content_when_data_is_larger_than_block_
$blockFactory->method('create')
->willReturnCallback(
function () use ($blockSize) {
return new Block($id = \bin2hex(\random_bytes(16)), $blockSize, new Path(sys_get_temp_dir() . '/' . $id . '_block_01.txt'));
return new Block($id = \Flow\ETL\DSL\generate_random_string(), $blockSize, new Path(sys_get_temp_dir() . '/' . $id . '_block_01.txt'));
}
);

Expand Down Expand Up @@ -74,7 +74,7 @@ public function test_using_put_blob_with_content_when_data_is_smaller_than_block
$blockFactory->method('create')
->willReturnCallback(
function () use ($blockSize) {
return new Block($id = \bin2hex(\random_bytes(16)), $blockSize, new Path(sys_get_temp_dir() . '/' . $id . '_block_01.txt'));
return new Block($id = \Flow\ETL\DSL\generate_random_string(), $blockSize, new Path(sys_get_temp_dir() . '/' . $id . '_block_01.txt'));
}
);
$stream = AzureBlobDestinationStream::openBlank(
Expand Down
7 changes: 5 additions & 2 deletions src/core/etl/src/Flow/ETL/Config/ConfigBuilder.php
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
use Flow\ETL\PHP\Type\Caster;
use Flow\ETL\Pipeline\Optimizer;
use Flow\ETL\Row\Factory\NativeEntryFactory;
use Flow\ETL\{Cache, Config};
use Flow\ETL\{Cache, Config, NativePHPRandomValueGenerator, RandomValueGenerator};
use Flow\Filesystem\{Filesystem, FilesystemTable};
use Flow\Serializer\{Base64Serializer, NativePHPSerializer, Serializer};

Expand All @@ -33,6 +33,8 @@ final class ConfigBuilder

private bool $putInputIntoRows;

private RandomValueGenerator $randomValueGenerator;

private ?Serializer $serializer;

public function __construct()
Expand All @@ -45,11 +47,12 @@ public function __construct()
$this->caster = null;
$this->cache = new CacheConfigBuilder();
$this->sort = new SortConfigBuilder();
$this->randomValueGenerator = new NativePHPRandomValueGenerator();
}

public function build() : Config
{
$this->id ??= 'flow_php_' . bin2hex(random_bytes(16));
$this->id ??= 'flow_php' . $this->randomValueGenerator->string(32);
$entryFactory = new NativeEntryFactory();
$this->serializer ??= new Base64Serializer(new NativePHPSerializer());

Expand Down
79 changes: 78 additions & 1 deletion src/core/etl/src/Flow/ETL/DSL/functions.php
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,65 @@
use Flow\ETL\Function\ArraySort\Sort;
use Flow\ETL\Function\Between\Boundary;
use Flow\ETL\Function\StyleConverter\StringStyles;
use Flow\ETL\Function\{All, Any, ArrayExists, ArrayGet, ArrayGetCollection, ArrayKeyRename, ArrayKeysStyleConvert, ArrayMerge, ArrayMergeCollection, ArrayReverse, ArraySort, ArrayUnpack, Average, Between, CallMethod, Capitalize, Cast, Collect, CollectUnique, Combine, Concat, Count, DateTimeFormat, DenseRank, Exists, First, Hash, Last, ListFunctions, Literal, Max, Min, Not, Now, NumberFormat, Optional, PregMatch, PregMatchAll, PregReplace, Rank, Round, RowNumber, Sanitize, ScalarFunction, Size, Split, Sprintf, StructureFunctions, Sum, ToDate, ToDateTime, ToLower, ToMoney, ToTimeZone, ToUpper, Ulid, Uuid, When};
use Flow\ETL\Function\{All,
Any,
ArrayExists,
ArrayGet,
ArrayGetCollection,
ArrayKeyRename,
ArrayKeysStyleConvert,
ArrayMerge,
ArrayMergeCollection,
ArrayReverse,
ArraySort,
ArrayUnpack,
Average,
Between,
CallMethod,
Capitalize,
Cast,
Collect,
CollectUnique,
Combine,
Concat,
Count,
DateTimeFormat,
DenseRank,
Exists,
First,
Hash,
Last,
ListFunctions,
Literal,
Max,
Min,
Not,
Now,
NumberFormat,
Optional,
PregMatch,
PregMatchAll,
PregReplace,
RandomString,
Rank,
Round,
RowNumber,
Sanitize,
ScalarFunction,
Size,
Split,
Sprintf,
StructureFunctions,
Sum,
ToDate,
ToDateTime,
ToLower,
ToMoney,
ToTimeZone,
ToUpper,
Ulid,
Uuid,
When};
use Flow\ETL\Loader\StreamLoader\Output;
use Flow\ETL\Loader\{CallbackLoader, MemoryLoader, StreamLoader, TransformerLoader};
use Flow\ETL\Memory\Memory;
Expand Down Expand Up @@ -50,7 +108,9 @@
Join\Comparison\Identical,
Join\Expression,
Loader,
NativePHPRandomValueGenerator,
Pipeline,
RandomValueGenerator,
Row,
Rows,
Transformer,
Expand Down Expand Up @@ -1209,3 +1269,20 @@ function is_type(array $types, mixed $value) : bool

return false;
}

function generate_random_string(int $length = 32, NativePHPRandomValueGenerator $generator = new NativePHPRandomValueGenerator()) : string
{
return $generator->string($length);
}

function generate_random_int(int $start = PHP_INT_MIN, int $end = PHP_INT_MAX, NativePHPRandomValueGenerator $generator = new NativePHPRandomValueGenerator()) : int
{
return $generator->int($start, $end);
}

function random_string(
int|ScalarFunction $length,
RandomValueGenerator $generator = new NativePHPRandomValueGenerator()
) : RandomString {
return new RandomString($length, $generator);
}
25 changes: 25 additions & 0 deletions src/core/etl/src/Flow/ETL/Function/RandomString.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
<?php

declare(strict_types=1);

namespace Flow\ETL\Function;

use Flow\ETL\{NativePHPRandomValueGenerator, RandomValueGenerator, Row};

class RandomString implements ScalarFunction
{
private RandomValueGenerator|NativePHPRandomValueGenerator $generator;

private int|ScalarFunction $length;

public function __construct(ScalarFunction|int $length, RandomValueGenerator $generator = new NativePHPRandomValueGenerator())
{
$this->length = $length;
$this->generator = $generator;
}

public function eval(Row $row) : string
{
return $this->generator->string(is_int($this->length) ? $this->length : $this->length->eval($row));
}
}
21 changes: 21 additions & 0 deletions src/core/etl/src/Flow/ETL/NativePHPRandomValueGenerator.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
<?php

declare(strict_types=1);

namespace Flow\ETL;

final class NativePHPRandomValueGenerator implements RandomValueGenerator
{
public function int(int $min, int $max) : int
{
return \random_int($min, $max);
}

public function string(int $int) : string
{
$bytes = (int) \ceil($int / 2);
$bytes >= 1 ?: $bytes = 1;

return \substr(\bin2hex(\random_bytes($bytes)), 0, \max(0, $int));
}
}
12 changes: 12 additions & 0 deletions src/core/etl/src/Flow/ETL/RandomValueGenerator.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
<?php

declare(strict_types=1);

namespace Flow\ETL;

interface RandomValueGenerator
{
public function int(int $min, int $max) : int;

public function string(int $int) : string;
}
3 changes: 2 additions & 1 deletion src/core/etl/tests/Flow/ETL/Tests/Double/FakeExtractor.php
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
datetime_entry,
enum_entry,
float_entry,
generate_random_int,
int_entry,
json_entry,
list_entry,
Expand Down Expand Up @@ -50,7 +51,7 @@ public function extract(FlowContext $context) : \Generator
yield rows(
row(
int_entry('int', $id),
float_entry('float', \random_int(100, 100000) / 100),
float_entry('float', generate_random_int(100, 100000) / 100),
bool_entry('bool', false),
datetime_entry('datetime', new \DateTimeImmutable('now')),
str_entry('null', null),
Expand Down
Loading

0 comments on commit 63ef619

Please sign in to comment.