Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add UniqueFactory for creating random string or int from given range #1128

Merged
merged 8 commits into from
Jul 31, 2024
2 changes: 1 addition & 1 deletion docs/components/libs/snappy.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ $faker = \Faker\Factory::create();

$texts = [];
for ($i = 0; $i < 10_000; $i++) {
$textSize = \random_int(100, 5000);
$textSize = \Flow\ETL\UniqueFactory::int(100, 5000);
$texts[] = $faker->text($textSize);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
namespace Flow\ETL\Adapter\Elasticsearch\Tests\Integration\ElasticsearchPHP;

use function Flow\ETL\Adapter\Elasticsearch\{es_hits_to_rows, from_es, to_es_bulk_index};
use function Flow\ETL\DSL\df;
use function Flow\ETL\DSL\{df, generate_random_int};
use Flow\ETL\Adapter\Elasticsearch\ElasticsearchPHP\DocumentDataSource;
use Flow\ETL\Adapter\Elasticsearch\EntryIdFactory\EntryIdFactory;
use Flow\ETL\Adapter\Elasticsearch\Tests\Integration\TestCase;
Expand Down Expand Up @@ -40,7 +40,7 @@ public function test_empty_extraction() : void
new Row\Entry\StringEntry('id', \sha1((string) $i)),
new Row\Entry\IntegerEntry('position', $i),
new Row\Entry\StringEntry('name', 'id_' . $i),
new Row\Entry\BooleanEntry('active', (bool) \random_int(0, 1))
new Row\Entry\BooleanEntry('active', (bool) generate_random_int(0, 1))
),
\range(1, 100)
),
Expand Down Expand Up @@ -80,7 +80,7 @@ public function test_extraction_index_with_from_and_size() : void
new Row\Entry\StringEntry('id', \sha1((string) $i)),
new Row\Entry\IntegerEntry('position', $i),
new Row\Entry\StringEntry('name', 'id_' . $i),
new Row\Entry\BooleanEntry('active', (bool) \random_int(0, 1))
new Row\Entry\BooleanEntry('active', (bool) generate_random_int(0, 1))
),
\range(1, 2000)
),
Expand Down Expand Up @@ -123,7 +123,7 @@ public function test_extraction_index_with_search_after() : void
new Row\Entry\StringEntry('id', \sha1((string) $i)),
new Row\Entry\IntegerEntry('position', $i),
new Row\Entry\StringEntry('name', 'id_' . $i),
new Row\Entry\BooleanEntry('active', (bool) \random_int(0, 1))
new Row\Entry\BooleanEntry('active', (bool) generate_random_int(0, 1))
),
\range(1, 2005)
),
Expand Down Expand Up @@ -159,7 +159,7 @@ public function test_extraction_index_with_search_after_with_point_in_time() : v
new Row\Entry\StringEntry('id', \sha1((string) $i)),
new Row\Entry\IntegerEntry('position', $i),
new Row\Entry\StringEntry('name', 'id_' . $i),
new Row\Entry\BooleanEntry('active', (bool) \random_int(0, 1))
new Row\Entry\BooleanEntry('active', (bool) generate_random_int(0, 1))
),
\range(1, 2005)
),
Expand Down Expand Up @@ -200,7 +200,7 @@ public function test_extraction_whole_index_with_point_in_time() : void
new Row\Entry\StringEntry('id', \sha1((string) $i)),
new Row\Entry\IntegerEntry('position', $i),
new Row\Entry\StringEntry('name', 'id_' . $i),
new Row\Entry\BooleanEntry('active', (bool) \random_int(0, 1))
new Row\Entry\BooleanEntry('active', (bool) generate_random_int(0, 1))
),
\range(1, 2005)
),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
namespace Flow\ETL\Adapter\Elasticsearch\Tests\Integration\ElasticsearchPHP;

use function Flow\ETL\Adapter\Elasticsearch\{to_es_bulk_index, to_es_bulk_update};
use function Flow\ETL\DSL\generate_random_string;
use Flow\ETL\Adapter\Elasticsearch\EntryIdFactory\{EntryIdFactory, HashIdFactory};
use Flow\ETL\Adapter\Elasticsearch\Tests\Integration\TestCase;
use Flow\ETL\{Config, FlowContext, Row, Rows};
Expand Down Expand Up @@ -53,19 +54,19 @@ public function test_integration_with_entry_factory() : void

$loader->load(new Rows(
Row::create(
new Row\Entry\StringEntry('id', \sha1('id' . bin2hex(random_bytes(16)))),
new Row\Entry\StringEntry('id', \sha1('id' . generate_random_string())),
new Row\Entry\StringEntry('name', 'Łukasz')
),
Row::create(
new Row\Entry\StringEntry('id', \sha1('id' . bin2hex(random_bytes(16)))),
new Row\Entry\StringEntry('id', \sha1('id' . \Flow\ETL\DSL\generate_random_string())),
new Row\Entry\StringEntry('name', 'Norbert')
),
Row::create(
new Row\Entry\StringEntry('id', \sha1('id' . bin2hex(random_bytes(16)))),
new Row\Entry\StringEntry('id', \sha1('id' . \Flow\ETL\DSL\generate_random_string())),
new Row\Entry\StringEntry('name', 'Dawid')
),
Row::create(
new Row\Entry\StringEntry('id', \sha1('id' . bin2hex(random_bytes(16)))),
new Row\Entry\StringEntry('id', \sha1('id' . \Flow\ETL\DSL\generate_random_string())),
new Row\Entry\StringEntry('name', 'Tomek')
),
), new FlowContext(Config::default()));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
namespace Flow\ETL\Adapter\Meilisearch\Tests\Integration\MeilisearchPHP;

use function Flow\ETL\Adapter\Meilisearch\{from_meilisearch, meilisearch_hits_to_rows, to_meilisearch_bulk_index};
use function Flow\ETL\DSL\generate_random_int;
use Flow\ETL\Adapter\Meilisearch\Tests\Context\MeilisearchContext;
use Flow\ETL\{Config, Flow, FlowContext, Row, Rows};
use PHPUnit\Framework\TestCase;
Expand Down Expand Up @@ -37,7 +38,7 @@ public function test_empty_extraction() : void
new Row\Entry\StringEntry('id', \sha1((string) $i)),
new Row\Entry\IntegerEntry('position', $i),
new Row\Entry\StringEntry('name', 'id_' . $i),
new Row\Entry\BooleanEntry('active', (bool) \random_int(0, 1))
new Row\Entry\BooleanEntry('active', (bool) generate_random_int(0, 1))
),
\range(1, 100)
),
Expand All @@ -63,7 +64,7 @@ public function test_extraction_index_with_from_and_size() : void
new Row\Entry\StringEntry('id', \sha1((string) $i)),
new Row\Entry\IntegerEntry('position', $i),
new Row\Entry\StringEntry('name', 'id_' . $i),
new Row\Entry\BooleanEntry('active', (bool) \random_int(0, 1))
new Row\Entry\BooleanEntry('active', (bool) generate_random_int(0, 1))
),
// Default limit for Meilisearch is 1000 documents: https://www.meilisearch.com/docs/reference/api/settings#pagination
\range(1, 999)
Expand Down Expand Up @@ -102,7 +103,7 @@ public function test_extraction_index_with_sort() : void
new Row\Entry\StringEntry('id', \sha1((string) $i)),
new Row\Entry\IntegerEntry('position', $i),
new Row\Entry\StringEntry('name', 'id_' . $i),
new Row\Entry\BooleanEntry('active', (bool) \random_int(0, 1))
new Row\Entry\BooleanEntry('active', (bool) generate_random_int(0, 1))
),
// Default limit for Meilisearch is 1000 documents: https://www.meilisearch.com/docs/reference/api/settings#pagination
\range(1, 999)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,19 +43,19 @@ public function test_integration_with_entry_factory() : void
$loader = to_meilisearch_bulk_index($this->meilisearchContext->clientConfig(), self::INDEX_NAME);
$loader->load(new Rows(
Row::create(
new Row\Entry\StringEntry('id', \sha1('id' . bin2hex(random_bytes(16)))),
new Row\Entry\StringEntry('id', \sha1('id' . \Flow\ETL\DSL\generate_random_string())),
new Row\Entry\StringEntry('name', 'Łukasz')
),
Row::create(
new Row\Entry\StringEntry('id', \sha1('id' . bin2hex(random_bytes(16)))),
new Row\Entry\StringEntry('id', \sha1('id' . \Flow\ETL\DSL\generate_random_string())),
new Row\Entry\StringEntry('name', 'Norbert')
),
Row::create(
new Row\Entry\StringEntry('id', \sha1('id' . bin2hex(random_bytes(16)))),
new Row\Entry\StringEntry('id', \sha1('id' . \Flow\ETL\DSL\generate_random_string())),
new Row\Entry\StringEntry('name', 'Dawid')
),
Row::create(
new Row\Entry\StringEntry('id', \sha1('id' . bin2hex(random_bytes(16)))),
new Row\Entry\StringEntry('id', \sha1('id' . \Flow\ETL\DSL\generate_random_string())),
new Row\Entry\StringEntry('name', 'Tomek')
),
), new FlowContext(Config::default()));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ final class TextTest extends TestCase
{
public function test_loading_text_files() : void
{
$path = __DIR__ . '/var/flow_php_etl_csv_loader' . bin2hex(random_bytes(16)) . '.csv';
$path = __DIR__ . '/var/flow_php_etl_csv_loader' . \Flow\ETL\DSL\generate_random_string() . '.csv';

(new Flow())
->process(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ public function test_using_put_blob_with_content_when_data_is_larger_than_block_
$blockFactory->method('create')
->willReturnCallback(
function () use ($blockSize) {
return new Block($id = \bin2hex(\random_bytes(16)), $blockSize, new Path(sys_get_temp_dir() . '/' . $id . '_block_01.txt'));
return new Block($id = \Flow\ETL\DSL\generate_random_string(), $blockSize, new Path(sys_get_temp_dir() . '/' . $id . '_block_01.txt'));
}
);

Expand Down Expand Up @@ -74,7 +74,7 @@ public function test_using_put_blob_with_content_when_data_is_smaller_than_block
$blockFactory->method('create')
->willReturnCallback(
function () use ($blockSize) {
return new Block($id = \bin2hex(\random_bytes(16)), $blockSize, new Path(sys_get_temp_dir() . '/' . $id . '_block_01.txt'));
return new Block($id = \Flow\ETL\DSL\generate_random_string(), $blockSize, new Path(sys_get_temp_dir() . '/' . $id . '_block_01.txt'));
}
);
$stream = AzureBlobDestinationStream::openBlank(
Expand Down
7 changes: 5 additions & 2 deletions src/core/etl/src/Flow/ETL/Config/ConfigBuilder.php
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
use Flow\ETL\PHP\Type\Caster;
use Flow\ETL\Pipeline\Optimizer;
use Flow\ETL\Row\Factory\NativeEntryFactory;
use Flow\ETL\{Cache, Config};
use Flow\ETL\{Cache, Config, NativePHPRandomValueGenerator, RandomValueGenerator};
use Flow\Filesystem\{Filesystem, FilesystemTable};
use Flow\Serializer\{Base64Serializer, NativePHPSerializer, Serializer};

Expand All @@ -33,6 +33,8 @@ final class ConfigBuilder

private bool $putInputIntoRows;

private RandomValueGenerator $randomValueGenerator;

private ?Serializer $serializer;

public function __construct()
Expand All @@ -45,11 +47,12 @@ public function __construct()
$this->caster = null;
$this->cache = new CacheConfigBuilder();
$this->sort = new SortConfigBuilder();
$this->randomValueGenerator = new NativePHPRandomValueGenerator();
}

public function build() : Config
{
$this->id ??= 'flow_php_' . bin2hex(random_bytes(16));
$this->id ??= 'flow_php' . $this->randomValueGenerator->string(32);
$entryFactory = new NativeEntryFactory();
$this->serializer ??= new Base64Serializer(new NativePHPSerializer());

Expand Down
79 changes: 78 additions & 1 deletion src/core/etl/src/Flow/ETL/DSL/functions.php
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,65 @@
use Flow\ETL\Function\ArraySort\Sort;
use Flow\ETL\Function\Between\Boundary;
use Flow\ETL\Function\StyleConverter\StringStyles;
use Flow\ETL\Function\{All, Any, ArrayExists, ArrayGet, ArrayGetCollection, ArrayKeyRename, ArrayKeysStyleConvert, ArrayMerge, ArrayMergeCollection, ArrayReverse, ArraySort, ArrayUnpack, Average, Between, CallMethod, Capitalize, Cast, Collect, CollectUnique, Combine, Concat, Count, DateTimeFormat, DenseRank, Exists, First, Hash, Last, ListFunctions, Literal, Max, Min, Not, Now, NumberFormat, Optional, PregMatch, PregMatchAll, PregReplace, Rank, Round, RowNumber, Sanitize, ScalarFunction, Size, Split, Sprintf, StructureFunctions, Sum, ToDate, ToDateTime, ToLower, ToMoney, ToTimeZone, ToUpper, Ulid, Uuid, When};
use Flow\ETL\Function\{All,
Any,
ArrayExists,
ArrayGet,
ArrayGetCollection,
ArrayKeyRename,
ArrayKeysStyleConvert,
ArrayMerge,
ArrayMergeCollection,
ArrayReverse,
ArraySort,
ArrayUnpack,
Average,
Between,
CallMethod,
Capitalize,
Cast,
Collect,
CollectUnique,
Combine,
Concat,
Count,
DateTimeFormat,
DenseRank,
Exists,
First,
Hash,
Last,
ListFunctions,
Literal,
Max,
Min,
Not,
Now,
NumberFormat,
Optional,
PregMatch,
PregMatchAll,
PregReplace,
RandomString,
Rank,
Round,
RowNumber,
Sanitize,
ScalarFunction,
Size,
Split,
Sprintf,
StructureFunctions,
Sum,
ToDate,
ToDateTime,
ToLower,
ToMoney,
ToTimeZone,
ToUpper,
Ulid,
Uuid,
When};
use Flow\ETL\Loader\StreamLoader\Output;
use Flow\ETL\Loader\{CallbackLoader, MemoryLoader, StreamLoader, TransformerLoader};
use Flow\ETL\Memory\Memory;
Expand Down Expand Up @@ -50,7 +108,9 @@
Join\Comparison\Identical,
Join\Expression,
Loader,
NativePHPRandomValueGenerator,
Pipeline,
RandomValueGenerator,
Row,
Rows,
Transformer,
Expand Down Expand Up @@ -1209,3 +1269,20 @@ function is_type(array $types, mixed $value) : bool

return false;
}

function generate_random_string(int $length = 32, NativePHPRandomValueGenerator $generator = new NativePHPRandomValueGenerator()) : string
{
return $generator->string($length);
}

function generate_random_int(int $start = PHP_INT_MIN, int $end = PHP_INT_MAX, NativePHPRandomValueGenerator $generator = new NativePHPRandomValueGenerator()) : int
{
return $generator->int($start, $end);
}

function random_string(
int|ScalarFunction $length,
RandomValueGenerator $generator = new NativePHPRandomValueGenerator()
) : RandomString {
return new RandomString($length, $generator);
}
25 changes: 25 additions & 0 deletions src/core/etl/src/Flow/ETL/Function/RandomString.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
<?php

declare(strict_types=1);

namespace Flow\ETL\Function;

use Flow\ETL\{NativePHPRandomValueGenerator, RandomValueGenerator, Row};

class RandomString implements ScalarFunction
{
private RandomValueGenerator|NativePHPRandomValueGenerator $generator;

private int|ScalarFunction $length;

public function __construct(ScalarFunction|int $length, RandomValueGenerator $generator = new NativePHPRandomValueGenerator())
{
$this->length = $length;
$this->generator = $generator;
}

public function eval(Row $row) : string
{
return $this->generator->string(is_int($this->length) ? $this->length : $this->length->eval($row));
}
}
21 changes: 21 additions & 0 deletions src/core/etl/src/Flow/ETL/NativePHPRandomValueGenerator.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
<?php

declare(strict_types=1);

namespace Flow\ETL;

final class NativePHPRandomValueGenerator implements RandomValueGenerator
{
public function int(int $min, int $max) : int
{
return \random_int($min, $max);
}

public function string(int $int) : string
{
$bytes = (int) \ceil($int / 2);
$bytes >= 1 ?: $bytes = 1;

return \substr(\bin2hex(\random_bytes($bytes)), 0, \max(0, $int));
}
}
12 changes: 12 additions & 0 deletions src/core/etl/src/Flow/ETL/RandomValueGenerator.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
<?php

declare(strict_types=1);

namespace Flow\ETL;

interface RandomValueGenerator
{
public function int(int $min, int $max) : int;

public function string(int $int) : string;
}
3 changes: 2 additions & 1 deletion src/core/etl/tests/Flow/ETL/Tests/Double/FakeExtractor.php
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
datetime_entry,
enum_entry,
float_entry,
generate_random_int,
int_entry,
json_entry,
list_entry,
Expand Down Expand Up @@ -50,7 +51,7 @@ public function extract(FlowContext $context) : \Generator
yield rows(
row(
int_entry('int', $id),
float_entry('float', \random_int(100, 100000) / 100),
float_entry('float', generate_random_int(100, 100000) / 100),
bool_entry('bool', false),
datetime_entry('datetime', new \DateTimeImmutable('now')),
str_entry('null', null),
Expand Down
Loading