From 14935736fe2a9d919a3646a8c6b7bf6941e77a0c Mon Sep 17 00:00:00 2001 From: Joseph Bielawski Date: Wed, 11 Oct 2023 15:05:51 +0200 Subject: [PATCH] Add way to run all examples --- composer.json | 1 + composer.lock | 66 ++++++++++++++++++- examples/bootstrap.php | 12 ++-- examples/data/extractor.php | 2 +- examples/run_examples.php | 60 +++++++++++++++++ .../csv => setup}/php_to_csv_and_json.php | 2 +- .../csv => setup}/php_to_csv_partition.php | 2 +- examples/topics/types/csv/README.md | 3 - examples/topics/types/csv/csv_read.php | 6 ++ examples/topics/types/csv/csv_to_avro.php | 6 ++ .../topics/types/csv/csv_to_parquet_100k.php | 6 ++ .../topics/types/csv/csv_to_parquet_10k.php | 6 ++ examples/topics/types/csv/php_to_csv.php | 38 ----------- 13 files changed, 160 insertions(+), 50 deletions(-) create mode 100755 examples/run_examples.php rename examples/{topics/types/csv => setup}/php_to_csv_and_json.php (95%) rename examples/{topics/types/csv => setup}/php_to_csv_partition.php (94%) delete mode 100644 examples/topics/types/csv/php_to_csv.php diff --git a/composer.json b/composer.json index 6afc5d6a5..d118326ae 100644 --- a/composer.json +++ b/composer.json @@ -49,6 +49,7 @@ "ramsey/uuid": "^4.5", "symfony/cache": "^6.2", "symfony/dotenv": "^6.2", + "symfony/finder": "^6.3", "symfony/uid": "^6.3" }, "autoload": { diff --git a/composer.lock b/composer.lock index 1ddfaf484..0f3a24573 100644 --- a/composer.lock +++ b/composer.lock @@ -4,7 +4,7 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", "This file is @generated automatically" ], - "content-hash": "4af5e1bc3dd5f51f026ef03f08e2d7f7", + "content-hash": "6de5d25741b658ad50f4794e782cdab8", "packages": [ { "name": "amphp/amp", @@ -6443,6 +6443,70 @@ ], "time": "2023-04-21T14:41:17+00:00" }, + { + "name": "symfony/finder", + "version": "v6.3.5", + "source": { + "type": "git", + "url": "https://github.com/symfony/finder.git", + "reference": "a1b31d88c0e998168ca7792f222cbecee47428c4" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/symfony/finder/zipball/a1b31d88c0e998168ca7792f222cbecee47428c4", + "reference": "a1b31d88c0e998168ca7792f222cbecee47428c4", + "shasum": "" + }, + "require": { + "php": ">=8.1" + }, + "require-dev": { + "symfony/filesystem": "^6.0" + }, + "type": "library", + "autoload": { + "psr-4": { + "Symfony\\Component\\Finder\\": "" + }, + "exclude-from-classmap": [ + "/Tests/" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Fabien Potencier", + "email": "fabien@symfony.com" + }, + { + "name": "Symfony Community", + "homepage": "https://symfony.com/contributors" + } + ], + "description": "Finds files and directories via an intuitive fluent interface", + "homepage": "https://symfony.com", + "support": { + "source": "https://github.com/symfony/finder/tree/v6.3.5" + }, + "funding": [ + { + "url": "https://symfony.com/sponsor", + "type": "custom" + }, + { + "url": "https://github.com/fabpot", + "type": "github" + }, + { + "url": "https://tidelift.com/funding/github/packagist/symfony/symfony", + "type": "tidelift" + } + ], + "time": "2023-09-26T12:56:25+00:00" + }, { "name": "symfony/options-resolver", "version": "v6.3.0", diff --git a/examples/bootstrap.php b/examples/bootstrap.php index fcd170acf..1f5978e36 100644 --- a/examples/bootstrap.php +++ b/examples/bootstrap.php @@ -6,11 +6,13 @@ \ini_set('memory_limit', -1); -const __FLOW_DATA__ = __DIR__ . '/data'; -const __FLOW_OUTPUT__ = __DIR__ . '/output'; -const __FLOW_VAR__ = __DIR__ . '/var'; -const __FLOW_VAR_RUN__ = __DIR__ . '/var/run'; -const __FLOW_SRC__ = __DIR__ . '/../src'; +if (!\defined('__FLOW_DATA__')) { + \define('__FLOW_DATA__', __DIR__ . '/data'); + \define('__FLOW_OUTPUT__', __DIR__ . '/output'); + \define('__FLOW_VAR__', __DIR__ . '/var'); + \define('__FLOW_VAR_RUN__', __DIR__ . '/var/run'); + \define('__FLOW_SRC__', __DIR__ . '/../src'); +} if (!\is_dir(__FLOW_VAR__)) { \mkdir(__FLOW_VAR__); diff --git a/examples/data/extractor.php b/examples/data/extractor.php index e4c5edb12..0ed741a14 100644 --- a/examples/data/extractor.php +++ b/examples/data/extractor.php @@ -39,7 +39,7 @@ public function extract(FlowContext $context) : Generator } } - if (\count($rows) >= 0) { + if ([] !== $rows) { yield new Rows(...$rows); } } diff --git a/examples/run_examples.php b/examples/run_examples.php new file mode 100755 index 000000000..1e7ed4062 --- /dev/null +++ b/examples/run_examples.php @@ -0,0 +1,60 @@ +#!/usr/bin/env php +in(__DIR__ . '/output')->exclude('.gitkeep') as $file) { + @\unlink($file->getRealPath()); +} + +print "Running setup scripts.\n"; + +$finder = new Finder(); +$finder->in(__DIR__ . '/setup') + ->files() + ->name('*.php'); + +foreach ($finder as $file) { + print "\nScript: {$file->getRelativePathname()}\n"; + + include $file->getRealPath(); +} + +$finder = new Finder(); +$finder->in(__DIR__ . '/topics') + // async, database & remote filesystem examples require additional manual setup to be run properly + ->exclude(['async', 'db', 'fs']) + ->files() + ->name('*.php'); + +foreach ($finder as $file) { + print "\nExample: {$file->getRelativePathname()}\n"; + + try { + include $file->getRealPath(); + } catch (\Exception $e) { + print "Example failed: {$e->getMessage()}\n"; + } +} diff --git a/examples/topics/types/csv/php_to_csv_and_json.php b/examples/setup/php_to_csv_and_json.php similarity index 95% rename from examples/topics/types/csv/php_to_csv_and_json.php rename to examples/setup/php_to_csv_and_json.php index be6f04e79..9eae3c859 100644 --- a/examples/topics/types/csv/php_to_csv_and_json.php +++ b/examples/setup/php_to_csv_and_json.php @@ -10,7 +10,7 @@ use Flow\ETL\Flow; use Flow\ETL\Monitoring\Memory\Consumption; -require __DIR__ . '/../../../bootstrap.php'; +require __DIR__ . '/../bootstrap.php'; $extractor = require __FLOW_DATA__ . '/extractor.php'; diff --git a/examples/topics/types/csv/php_to_csv_partition.php b/examples/setup/php_to_csv_partition.php similarity index 94% rename from examples/topics/types/csv/php_to_csv_partition.php rename to examples/setup/php_to_csv_partition.php index b09064182..b1f44f8c0 100644 --- a/examples/topics/types/csv/php_to_csv_partition.php +++ b/examples/setup/php_to_csv_partition.php @@ -9,7 +9,7 @@ use Flow\ETL\Filesystem\SaveMode; use Flow\ETL\Flow; -require __DIR__ . '/../../../bootstrap.php'; +require __DIR__ . '/../bootstrap.php'; $extractor = require __FLOW_DATA__ . '/extractor.php'; diff --git a/examples/topics/types/csv/README.md b/examples/topics/types/csv/README.md index 0af0cfdef..df7ef23fd 100644 --- a/examples/topics/types/csv/README.md +++ b/examples/topics/types/csv/README.md @@ -6,6 +6,3 @@ Examples: - [CSV to Json](csv_to_json.php) - [CSV to Parquet - 10k per group](csv_to_parquet_10k.php) - [CSV to Parquet - 100k per group](csv_to_parquet_100k.php) -- [PHP to CSV & Json](php_to_csv_and_json.php) -- [PHP to CSV partitioned](php_to_csv_partition.php) -- [CSV read partitioned](php_to_csv_partition.php) \ No newline at end of file diff --git a/examples/topics/types/csv/csv_read.php b/examples/topics/types/csv/csv_read.php index d0f321478..523fb9910 100644 --- a/examples/topics/types/csv/csv_read.php +++ b/examples/topics/types/csv/csv_read.php @@ -10,6 +10,12 @@ require __DIR__ . '/../../../bootstrap.php'; +if (!\file_exists(__FLOW_OUTPUT__ . '/dataset.csv')) { + print "Data file is missing, please first run: php examples/setup/php_to_csv_and_json.php\n"; + + exit(1); +} + $flow = (new Flow()) ->read(CSV::from(__FLOW_OUTPUT__ . '/dataset.csv', 1000)) ->withEntry('unpacked', ref('row')->unpack()) diff --git a/examples/topics/types/csv/csv_to_avro.php b/examples/topics/types/csv/csv_to_avro.php index 0da65948f..8284749f9 100644 --- a/examples/topics/types/csv/csv_to_avro.php +++ b/examples/topics/types/csv/csv_to_avro.php @@ -11,6 +11,12 @@ require __DIR__ . '/../../../bootstrap.php'; +if (!\file_exists(__FLOW_OUTPUT__ . '/dataset.csv')) { + print "Data file is missing, please first run: php examples/setup/php_to_csv_and_json.php\n"; + + exit(1); +} + $flow = (new Flow()) ->read(CSV::from(__FLOW_OUTPUT__ . '/dataset.csv', 10_000)) ->withEntry('unpacked', ref('row')->unpack()) diff --git a/examples/topics/types/csv/csv_to_parquet_100k.php b/examples/topics/types/csv/csv_to_parquet_100k.php index 18e1e95b2..2af86e855 100644 --- a/examples/topics/types/csv/csv_to_parquet_100k.php +++ b/examples/topics/types/csv/csv_to_parquet_100k.php @@ -11,6 +11,12 @@ require __DIR__ . '/../../../bootstrap.php'; +if (!\file_exists(__FLOW_OUTPUT__ . '/dataset.csv')) { + print "Data file is missing, please first run: php examples/setup/php_to_csv_and_json.php\n"; + + exit(1); +} + $flow = (new Flow()) ->read(CSV::from(__FLOW_OUTPUT__ . '/dataset.csv', 10_000)) ->withEntry('unpacked', ref('row')->unpack()) diff --git a/examples/topics/types/csv/csv_to_parquet_10k.php b/examples/topics/types/csv/csv_to_parquet_10k.php index 5f56ee45d..cc854847c 100644 --- a/examples/topics/types/csv/csv_to_parquet_10k.php +++ b/examples/topics/types/csv/csv_to_parquet_10k.php @@ -11,6 +11,12 @@ require __DIR__ . '/../../../bootstrap.php'; +if (!\file_exists(__FLOW_OUTPUT__ . '/dataset.csv')) { + print "Data file is missing, please first run: php examples/setup/php_to_csv_and_json.php\n"; + + exit(1); +} + $flow = (new Flow()) ->read(CSV::from(__FLOW_OUTPUT__ . '/dataset.csv', 10_000)) ->withEntry('unpacked', ref('row')->unpack()) diff --git a/examples/topics/types/csv/php_to_csv.php b/examples/topics/types/csv/php_to_csv.php deleted file mode 100644 index 67d9d8def..000000000 --- a/examples/topics/types/csv/php_to_csv.php +++ /dev/null @@ -1,38 +0,0 @@ -read($extractor) - ->withEntry('unpacked', ref('row')->unpack()) - ->renameAll('unpacked.', '') - ->drop(col('row')) - ->write(CSV::to(__FLOW_OUTPUT__ . '/dataset.csv')); - -if ($_ENV['FLOW_PHAR_APP'] ?? false) { - return $flow; -} - -$stopwatch = new Stopwatch(); -$stopwatch->start(); -$memory = new Consumption(); -$memory->current(); - -$flow->run(); - -$memory->current(); -$stopwatch->stop(); - -print "Memory consumption, max: {$memory->max()->inMb()}Mb\n"; -print "Total writing CSV: {$stopwatch->totalElapsedTime()->inSecondsPrecise()}s\n\n";