Skip to content

Commit

Permalink
Add UniqueFactory for creating random string or int from given range
Browse files Browse the repository at this point in the history
  • Loading branch information
mleczakm committed Jul 20, 2024
1 parent 8d4b8b3 commit 97efc3c
Show file tree
Hide file tree
Showing 32 changed files with 266 additions and 160 deletions.
2 changes: 1 addition & 1 deletion docs/components/libs/snappy.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ $faker = \Faker\Factory::create();

$texts = [];
for ($i = 0; $i < 10_000; $i++) {
$textSize = \random_int(100, 5000);
$textSize = \Flow\ETL\UniqueFactory::int(100, 5000);
$texts[] = $faker->text($textSize);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ public function test_empty_extraction() : void
new Row\Entry\StringEntry('id', \sha1((string) $i)),
new Row\Entry\IntegerEntry('position', $i),
new Row\Entry\StringEntry('name', 'id_' . $i),
new Row\Entry\BooleanEntry('active', (bool) \random_int(0, 1))
new Row\Entry\BooleanEntry('active', (bool) \Flow\ETL\UniqueFactory::int(0, 1))
),
\range(1, 100)
),
Expand Down Expand Up @@ -80,7 +80,7 @@ public function test_extraction_index_with_from_and_size() : void
new Row\Entry\StringEntry('id', \sha1((string) $i)),
new Row\Entry\IntegerEntry('position', $i),
new Row\Entry\StringEntry('name', 'id_' . $i),
new Row\Entry\BooleanEntry('active', (bool) \random_int(0, 1))
new Row\Entry\BooleanEntry('active', (bool) \Flow\ETL\UniqueFactory::int(0, 1))
),
\range(1, 2000)
),
Expand Down Expand Up @@ -123,7 +123,7 @@ public function test_extraction_index_with_search_after() : void
new Row\Entry\StringEntry('id', \sha1((string) $i)),
new Row\Entry\IntegerEntry('position', $i),
new Row\Entry\StringEntry('name', 'id_' . $i),
new Row\Entry\BooleanEntry('active', (bool) \random_int(0, 1))
new Row\Entry\BooleanEntry('active', (bool) \Flow\ETL\UniqueFactory::int(0, 1))
),
\range(1, 2005)
),
Expand Down Expand Up @@ -159,7 +159,7 @@ public function test_extraction_index_with_search_after_with_point_in_time() : v
new Row\Entry\StringEntry('id', \sha1((string) $i)),
new Row\Entry\IntegerEntry('position', $i),
new Row\Entry\StringEntry('name', 'id_' . $i),
new Row\Entry\BooleanEntry('active', (bool) \random_int(0, 1))
new Row\Entry\BooleanEntry('active', (bool) \Flow\ETL\UniqueFactory::int(0, 1))
),
\range(1, 2005)
),
Expand Down Expand Up @@ -200,7 +200,7 @@ public function test_extraction_whole_index_with_point_in_time() : void
new Row\Entry\StringEntry('id', \sha1((string) $i)),
new Row\Entry\IntegerEntry('position', $i),
new Row\Entry\StringEntry('name', 'id_' . $i),
new Row\Entry\BooleanEntry('active', (bool) \random_int(0, 1))
new Row\Entry\BooleanEntry('active', (bool) \Flow\ETL\UniqueFactory::int(0, 1))
),
\range(1, 2005)
),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,19 +53,19 @@ public function test_integration_with_entry_factory() : void

$loader->load(new Rows(
Row::create(
new Row\Entry\StringEntry('id', \sha1('id' . bin2hex(random_bytes(16)))),
new Row\Entry\StringEntry('id', \sha1('id' . \Flow\ETL\UniqueFactory::string(32))),
new Row\Entry\StringEntry('name', 'Łukasz')
),
Row::create(
new Row\Entry\StringEntry('id', \sha1('id' . bin2hex(random_bytes(16)))),
new Row\Entry\StringEntry('id', \sha1('id' . \Flow\ETL\UniqueFactory::string(32))),
new Row\Entry\StringEntry('name', 'Norbert')
),
Row::create(
new Row\Entry\StringEntry('id', \sha1('id' . bin2hex(random_bytes(16)))),
new Row\Entry\StringEntry('id', \sha1('id' . \Flow\ETL\UniqueFactory::string(32))),
new Row\Entry\StringEntry('name', 'Dawid')
),
Row::create(
new Row\Entry\StringEntry('id', \sha1('id' . bin2hex(random_bytes(16)))),
new Row\Entry\StringEntry('id', \sha1('id' . \Flow\ETL\UniqueFactory::string(32))),
new Row\Entry\StringEntry('name', 'Tomek')
),
), new FlowContext(Config::default()));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ public function test_empty_extraction() : void
new Row\Entry\StringEntry('id', \sha1((string) $i)),
new Row\Entry\IntegerEntry('position', $i),
new Row\Entry\StringEntry('name', 'id_' . $i),
new Row\Entry\BooleanEntry('active', (bool) \random_int(0, 1))
new Row\Entry\BooleanEntry('active', (bool) \Flow\ETL\UniqueFactory::int(0, 1))
),
\range(1, 100)
),
Expand All @@ -63,7 +63,7 @@ public function test_extraction_index_with_from_and_size() : void
new Row\Entry\StringEntry('id', \sha1((string) $i)),
new Row\Entry\IntegerEntry('position', $i),
new Row\Entry\StringEntry('name', 'id_' . $i),
new Row\Entry\BooleanEntry('active', (bool) \random_int(0, 1))
new Row\Entry\BooleanEntry('active', (bool) \Flow\ETL\UniqueFactory::int(0, 1))
),
// Default limit for Meilisearch is 1000 documents: https://www.meilisearch.com/docs/reference/api/settings#pagination
\range(1, 999)
Expand Down Expand Up @@ -102,7 +102,7 @@ public function test_extraction_index_with_sort() : void
new Row\Entry\StringEntry('id', \sha1((string) $i)),
new Row\Entry\IntegerEntry('position', $i),
new Row\Entry\StringEntry('name', 'id_' . $i),
new Row\Entry\BooleanEntry('active', (bool) \random_int(0, 1))
new Row\Entry\BooleanEntry('active', (bool) \Flow\ETL\UniqueFactory::int(0, 1))
),
// Default limit for Meilisearch is 1000 documents: https://www.meilisearch.com/docs/reference/api/settings#pagination
\range(1, 999)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,19 +43,19 @@ public function test_integration_with_entry_factory() : void
$loader = to_meilisearch_bulk_index($this->meilisearchContext->clientConfig(), self::INDEX_NAME);
$loader->load(new Rows(
Row::create(
new Row\Entry\StringEntry('id', \sha1('id' . bin2hex(random_bytes(16)))),
new Row\Entry\StringEntry('id', \sha1('id' . \Flow\ETL\UniqueFactory::string(32))),
new Row\Entry\StringEntry('name', 'Łukasz')
),
Row::create(
new Row\Entry\StringEntry('id', \sha1('id' . bin2hex(random_bytes(16)))),
new Row\Entry\StringEntry('id', \sha1('id' . \Flow\ETL\UniqueFactory::string(32))),
new Row\Entry\StringEntry('name', 'Norbert')
),
Row::create(
new Row\Entry\StringEntry('id', \sha1('id' . bin2hex(random_bytes(16)))),
new Row\Entry\StringEntry('id', \sha1('id' . \Flow\ETL\UniqueFactory::string(32))),
new Row\Entry\StringEntry('name', 'Dawid')
),
Row::create(
new Row\Entry\StringEntry('id', \sha1('id' . bin2hex(random_bytes(16)))),
new Row\Entry\StringEntry('id', \sha1('id' . \Flow\ETL\UniqueFactory::string(32))),
new Row\Entry\StringEntry('name', 'Tomek')
),
), new FlowContext(Config::default()));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ final class TextTest extends TestCase
{
public function test_loading_text_files() : void
{
$path = __DIR__ . '/var/flow_php_etl_csv_loader' . bin2hex(random_bytes(16)) . '.csv';
$path = __DIR__ . '/var/flow_php_etl_csv_loader' . \Flow\ETL\UniqueFactory::string(32) . '.csv';

(new Flow())
->process(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ public function test_using_put_blob_with_content_when_data_is_larger_than_block_
$blockFactory->method('create')
->willReturnCallback(
function () use ($blockSize) {
return new Block($id = \bin2hex(\random_bytes(16)), $blockSize, new Path(sys_get_temp_dir() . '/' . $id . '_block_01.txt'));
return new Block($id = \Flow\ETL\UniqueFactory::string(32), $blockSize, new Path(sys_get_temp_dir() . '/' . $id . '_block_01.txt'));
}
);

Expand Down Expand Up @@ -74,7 +74,7 @@ public function test_using_put_blob_with_content_when_data_is_smaller_than_block
$blockFactory->method('create')
->willReturnCallback(
function () use ($blockSize) {
return new Block($id = \bin2hex(\random_bytes(16)), $blockSize, new Path(sys_get_temp_dir() . '/' . $id . '_block_01.txt'));
return new Block($id = \Flow\ETL\UniqueFactory::string(32), $blockSize, new Path(sys_get_temp_dir() . '/' . $id . '_block_01.txt'));
}
);
$stream = AzureBlobDestinationStream::openBlank(
Expand Down
2 changes: 1 addition & 1 deletion src/core/etl/src/Flow/ETL/ConfigBuilder.php
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ public function __construct()
*/
public function build() : Config
{
$this->id ??= 'flow_php' . bin2hex(random_bytes(16));
$this->id ??= 'flow_php' . UniqueFactory::string(32);
$entryFactory = new NativeEntryFactory();
$this->serializer ??= new Base64Serializer(new NativePHPSerializer());
$cachePath = \is_string(\getenv(Config::CACHE_DIR_ENV)) && \realpath(\getenv(Config::CACHE_DIR_ENV))
Expand Down
22 changes: 22 additions & 0 deletions src/core/etl/src/Flow/ETL/UniqueFactory.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
<?php

declare(strict_types=1);

namespace Flow\ETL;

class UniqueFactory
{
public static function int(int $min, int $max) : int
{
return \random_int($min, $max);
}

/** @param int<1, max> $int */
public static function string(int $int) : string
{
$bytes = (int) \ceil($int / 2);
$bytes >= 1 ?: $bytes = 1;

return \mb_substr(\bin2hex(\random_bytes($bytes)), 0, \max(0, $int));
}
}
2 changes: 1 addition & 1 deletion src/core/etl/tests/Flow/ETL/Tests/Double/FakeExtractor.php
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ public function extract(FlowContext $context) : \Generator
yield rows(
row(
int_entry('int', $id),
float_entry('float', \random_int(100, 100000) / 100),
float_entry('float', \Flow\ETL\UniqueFactory::int(100, 100000) / 100),
bool_entry('bool', false),
datetime_entry('datetime', new \DateTimeImmutable('now')),
str_entry('null', null),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ public function test_saving_to_psr_simple_cache_implementation() : void
{
$cache = new PSRSimpleCache(
new Psr16Cache(
new FilesystemAdapter(directory: __DIR__ . '/var/flow-etl-cache-' . bin2hex(random_bytes(16)))
new FilesystemAdapter(directory: __DIR__ . '/var/flow-etl-cache-' . \Flow\ETL\UniqueFactory::string(32))
),
);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -163,13 +163,13 @@ public function test_partition_by_partitions_order() : void
function (int $i) : array {
$data = [];

$maxItems = \random_int(2, 10);
$maxItems = \Flow\ETL\UniqueFactory::int(2, 10);

for ($d = 0; $d < $maxItems; $d++) {
$data[] = [
'id' => bin2hex(random_bytes(16)),
'created_at' => (new \DateTimeImmutable('2020-01-01'))->add(new \DateInterval('P' . $i . 'D'))->setTime(\random_int(0, 23), \random_int(0, 59), \random_int(0, 59)),
'value' => \random_int(1, 1000),
'id' => \Flow\ETL\UniqueFactory::string(32),
'created_at' => (new \DateTimeImmutable('2020-01-01'))->add(new \DateInterval('P' . $i . 'D'))->setTime(\Flow\ETL\UniqueFactory::int(0, 23), \Flow\ETL\UniqueFactory::int(0, 59), \Flow\ETL\UniqueFactory::int(0, 59)),
'value' => \Flow\ETL\UniqueFactory::int(1, 1000),
];
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,27 +7,27 @@
use Flow\ETL\Hash\NativePHPHash;
use PHPUnit\Framework\TestCase;

class NativePHPHashTest extends TestCase
final class NativePHPHashTest extends TestCase
{
public static function test_hashing_xxh128_by_static_call() : void
{
static::assertSame(
self::assertSame(
'6c78e0e3bd51d358d01e758642b85fb8',
NativePHPHash::xxh128('test'),
);
}

public function test_hashing_string_using_xxh128_by_default() : void
{
static::assertSame(
self::assertSame(
'6c78e0e3bd51d358d01e758642b85fb8',
(new NativePHPHash())->hash('test'),
);
}

public function test_support_sha512_hash() : void
{
static::assertSame(
self::assertSame(
'ee26b0dd4af7e749aa1a8ee3c10ae9923f618980772e473f8819a5d4940e0db27ac185f8a0e1d5f84f88bc887fd67b143732c304cc5fa9ad8e6f57f50028a8ff',
(new NativePHPHash('sha512'))->hash('test')
);
Expand Down
4 changes: 2 additions & 2 deletions src/core/etl/tests/Flow/ETL/Tests/Unit/RowTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,8 @@ public static function is_equal_data_provider() : \Generator
public function test_getting_schema_from_row() : void
{
$row = row(
int_entry('id', \random_int(100, 100000)),
float_entry('price', \random_int(100, 100000) / 100),
int_entry('id', \Flow\ETL\UniqueFactory::int(100, 100000)),
float_entry('price', \Flow\ETL\UniqueFactory::int(100, 100000) / 100),
bool_entry('deleted', false),
datetime_entry('created-at', new \DateTimeImmutable('now')),
str_entry('phase', null),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@ public function test_ordering_entries_by_name_and_type() : void
row(
int_entry('int_a', 1),
int_entry('int_b', 1),
float_entry('float_a', \random_int(100, 100000) / 100),
float_entry('float_b', \random_int(100, 100000) / 100),
float_entry('float_a', \Flow\ETL\UniqueFactory::int(100, 100000) / 100),
float_entry('float_b', \Flow\ETL\UniqueFactory::int(100, 100000) / 100),
bool_entry('bool', false),
bool_entry('bool_a', false),
bool_entry('bool_c', false),
Expand Down Expand Up @@ -141,7 +141,7 @@ public function test_ordering_entries_by_type() : void
$rows = rows(
row(
int_entry('int', 1),
float_entry('float', \random_int(100, 100000) / 100),
float_entry('float', \Flow\ETL\UniqueFactory::int(100, 100000) / 100),
bool_entry('bool', false),
datetime_entry('datetime', new \DateTimeImmutable('now')),
str_entry('null', null),
Expand Down
84 changes: 84 additions & 0 deletions src/core/etl/tests/Flow/ETL/Tests/Unit/UniqueFactoryTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
<?php

declare(strict_types=1);

namespace Flow\ETL\Tests\Unit;

use Flow\ETL\UniqueFactory;
use PHPUnit\Framework\TestCase;

final class UniqueFactoryTest extends TestCase
{
public static function integers_provider() : \Generator
{
foreach (range(1, 10) as $i) {
yield [$i];
}
}

public static function invalid_range_provider() : array
{
return [
'min greater than max' => [2, 1],
];
}

public static function valid_range_provider() : array
{
return [
'min equal to max' => [1, 1],
'min less than max' => [1, 2],
'min less than zero' => [-1, 1],
'max and min less than zero' => [-1, -1],
];
}

public function test_can_create_random_int_from_given_range() : void
{
self::assertSame(1, (UniqueFactory::int(1, 1)));
self::assertThat(
UniqueFactory::int(1, 2),
self::logicalOr(
self::equalTo(1),
self::equalTo(2)
)
);
}

/** @dataProvider integers_provider */
public function test_can_create_random_string_with_given_length(int $expectedLength) : void
{
self::assertSame($expectedLength, mb_strlen(UniqueFactory::string($expectedLength)));
}

/** @dataProvider invalid_range_provider */
public function test_fail_on_invalid_range(int $min, int $max) : void
{
self::expectException(\ValueError::class);
UniqueFactory::int($min, $max);
}

/** @dataProvider valid_range_provider */
public function test_return_random_int_on_valid_range(int $min, int $max) : void
{
self::assertThat(
UniqueFactory::int($min, $max),
self::logicalOr(
self::greaterThanOrEqual($min),
self::lessThanOrEqual($max)
)
);
}

public function test_empty_string_on_length_below_1()
{
self::assertSame(
'',
UniqueFactory::string(0)
);
self::assertSame(
'',
UniqueFactory::string(-1)
);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,9 @@ public function test_inserts_multiple_rows_at_once() : void
$this->databaseContext->connection(),
$table,
new BulkData([
['id' => $id1 = \bin2hex(\random_bytes(5)), 'age' => 20, 'name' => 'Name One', 'description' => 'Description One', 'active' => false, 'updated_at' => $date1 = new \DateTime(), 'tags' => \json_encode(['a', 'b', 'c'])],
['id' => $id2 = \bin2hex(\random_bytes(5)), 'age' => 30, 'name' => 'Name Two', 'description' => null, 'active' => true, 'updated_at' => $date2 = new \DateTime(), 'tags' => \json_encode(['a', 'b', 'c'])],
['id' => $id3 = \bin2hex(\random_bytes(5)), 'age' => 40, 'name' => 'Name Three', 'description' => 'Description Three', 'active' => false, 'updated_at' => $date3 = new \DateTime(), 'tags' => \json_encode(['a', 'b', 'c'])],
['id' => $id1 = \Flow\ETL\UniqueFactory::string(10), 'age' => 20, 'name' => 'Name One', 'description' => 'Description One', 'active' => false, 'updated_at' => $date1 = new \DateTime(), 'tags' => \json_encode(['a', 'b', 'c'])],
['id' => $id2 = \Flow\ETL\UniqueFactory::string(10), 'age' => 30, 'name' => 'Name Two', 'description' => null, 'active' => true, 'updated_at' => $date2 = new \DateTime(), 'tags' => \json_encode(['a', 'b', 'c'])],
['id' => $id3 = \Flow\ETL\UniqueFactory::string(10), 'age' => 40, 'name' => 'Name Three', 'description' => 'Description Three', 'active' => false, 'updated_at' => $date3 = new \DateTime(), 'tags' => \json_encode(['a', 'b', 'c'])],
])
);

Expand Down
Loading

0 comments on commit 97efc3c

Please sign in to comment.