From 595d5f5a19961a4fe2d288dfdf2f35f81a3f60ff Mon Sep 17 00:00:00 2001 From: Norbert Orzechowicz <1921950+norberttech@users.noreply.github.com> Date: Sat, 10 Feb 2024 22:44:55 +0100 Subject: [PATCH] Added partitioning examples (#982) * Added partitioning exmples * CS fixes * Filter out duplicated contributor --- .../partitioning/partition_pruning/code.php | 29 +++++++++++ .../sku=PRODUCT01/65c7e9bc4460a568233195.csv | 2 + .../sku=PRODUCT02/65c7e9bc446c2326068326.csv | 2 + .../sku=PRODUCT01/65c7e9bc44305321518126.csv | 2 + .../sku=PRODUCT02/65c7e9bc44421020940545.csv | 2 + .../sku=PRODUCT03/65c7e9bc44515031584752.csv | 2 + .../sku=PRODUCT01/65c7e9bc4386f958078278.csv | 2 + .../sku=PRODUCT02/65c7e9bc440fa083889144.csv | 2 + .../sku=PRODUCT03/65c7e9bc44209401416287.csv | 2 + .../topics/partitioning/partitioning/code.php | 50 +++++++++++++++++++ .../partitioning/output/.gitignore | 2 + examples/topics/partitioning/reading/code.php | 29 +++++++++++ .../sku=PRODUCT01/65c7e9bc4460a568233195.csv | 2 + .../sku=PRODUCT02/65c7e9bc446c2326068326.csv | 2 + .../sku=PRODUCT01/65c7e9bc44305321518126.csv | 2 + .../sku=PRODUCT02/65c7e9bc44421020940545.csv | 2 + .../sku=PRODUCT03/65c7e9bc44515031584752.csv | 2 + .../sku=PRODUCT01/65c7e9bc4386f958078278.csv | 2 + .../sku=PRODUCT02/65c7e9bc440fa083889144.csv | 2 + .../sku=PRODUCT03/65c7e9bc44209401416287.csv | 2 + .../src/Flow/Website/Service/Github.php | 1 + 21 files changed, 143 insertions(+) create mode 100644 examples/topics/partitioning/partition_pruning/code.php create mode 100644 examples/topics/partitioning/partition_pruning/input/color=blue/sku=PRODUCT01/65c7e9bc4460a568233195.csv create mode 100644 examples/topics/partitioning/partition_pruning/input/color=blue/sku=PRODUCT02/65c7e9bc446c2326068326.csv create mode 100644 examples/topics/partitioning/partition_pruning/input/color=green/sku=PRODUCT01/65c7e9bc44305321518126.csv create mode 100644 examples/topics/partitioning/partition_pruning/input/color=green/sku=PRODUCT02/65c7e9bc44421020940545.csv create mode 100644 examples/topics/partitioning/partition_pruning/input/color=green/sku=PRODUCT03/65c7e9bc44515031584752.csv create mode 100644 examples/topics/partitioning/partition_pruning/input/color=red/sku=PRODUCT01/65c7e9bc4386f958078278.csv create mode 100644 examples/topics/partitioning/partition_pruning/input/color=red/sku=PRODUCT02/65c7e9bc440fa083889144.csv create mode 100644 examples/topics/partitioning/partition_pruning/input/color=red/sku=PRODUCT03/65c7e9bc44209401416287.csv create mode 100644 examples/topics/partitioning/partitioning/code.php create mode 100644 examples/topics/partitioning/partitioning/output/.gitignore create mode 100644 examples/topics/partitioning/reading/code.php create mode 100644 examples/topics/partitioning/reading/input/color=blue/sku=PRODUCT01/65c7e9bc4460a568233195.csv create mode 100644 examples/topics/partitioning/reading/input/color=blue/sku=PRODUCT02/65c7e9bc446c2326068326.csv create mode 100644 examples/topics/partitioning/reading/input/color=green/sku=PRODUCT01/65c7e9bc44305321518126.csv create mode 100644 examples/topics/partitioning/reading/input/color=green/sku=PRODUCT02/65c7e9bc44421020940545.csv create mode 100644 examples/topics/partitioning/reading/input/color=green/sku=PRODUCT03/65c7e9bc44515031584752.csv create mode 100644 examples/topics/partitioning/reading/input/color=red/sku=PRODUCT01/65c7e9bc4386f958078278.csv create mode 100644 examples/topics/partitioning/reading/input/color=red/sku=PRODUCT02/65c7e9bc440fa083889144.csv create mode 100644 examples/topics/partitioning/reading/input/color=red/sku=PRODUCT03/65c7e9bc44209401416287.csv diff --git a/examples/topics/partitioning/partition_pruning/code.php b/examples/topics/partitioning/partition_pruning/code.php new file mode 100644 index 000000000..d51b7351d --- /dev/null +++ b/examples/topics/partitioning/partition_pruning/code.php @@ -0,0 +1,29 @@ +read(from_csv(__DIR__ . '/input/color=*/sku=*/*.csv')) + ->filterPartitions(ref('color')->notEquals(lit('green'))) + ->collect() + ->write(to_output(false)) + ->run(); + +// +----+-------+-----------+ +// | id | color | sku | +// +----+-------+-----------+ +// | 2 | red | PRODUCT02 | +// | 3 | red | PRODUCT03 | +// | 1 | red | PRODUCT01 | +// | 8 | blue | PRODUCT02 | +// | 7 | blue | PRODUCT01 | +// +----+-------+-----------+ +// 5 rows diff --git a/examples/topics/partitioning/partition_pruning/input/color=blue/sku=PRODUCT01/65c7e9bc4460a568233195.csv b/examples/topics/partitioning/partition_pruning/input/color=blue/sku=PRODUCT01/65c7e9bc4460a568233195.csv new file mode 100644 index 000000000..545a74d47 --- /dev/null +++ b/examples/topics/partitioning/partition_pruning/input/color=blue/sku=PRODUCT01/65c7e9bc4460a568233195.csv @@ -0,0 +1,2 @@ +id,color,sku +7,blue,PRODUCT01 diff --git a/examples/topics/partitioning/partition_pruning/input/color=blue/sku=PRODUCT02/65c7e9bc446c2326068326.csv b/examples/topics/partitioning/partition_pruning/input/color=blue/sku=PRODUCT02/65c7e9bc446c2326068326.csv new file mode 100644 index 000000000..33873a6e6 --- /dev/null +++ b/examples/topics/partitioning/partition_pruning/input/color=blue/sku=PRODUCT02/65c7e9bc446c2326068326.csv @@ -0,0 +1,2 @@ +id,color,sku +8,blue,PRODUCT02 diff --git a/examples/topics/partitioning/partition_pruning/input/color=green/sku=PRODUCT01/65c7e9bc44305321518126.csv b/examples/topics/partitioning/partition_pruning/input/color=green/sku=PRODUCT01/65c7e9bc44305321518126.csv new file mode 100644 index 000000000..342be95a4 --- /dev/null +++ b/examples/topics/partitioning/partition_pruning/input/color=green/sku=PRODUCT01/65c7e9bc44305321518126.csv @@ -0,0 +1,2 @@ +id,color,sku +4,green,PRODUCT01 diff --git a/examples/topics/partitioning/partition_pruning/input/color=green/sku=PRODUCT02/65c7e9bc44421020940545.csv b/examples/topics/partitioning/partition_pruning/input/color=green/sku=PRODUCT02/65c7e9bc44421020940545.csv new file mode 100644 index 000000000..df1fb1e1a --- /dev/null +++ b/examples/topics/partitioning/partition_pruning/input/color=green/sku=PRODUCT02/65c7e9bc44421020940545.csv @@ -0,0 +1,2 @@ +id,color,sku +5,green,PRODUCT02 diff --git a/examples/topics/partitioning/partition_pruning/input/color=green/sku=PRODUCT03/65c7e9bc44515031584752.csv b/examples/topics/partitioning/partition_pruning/input/color=green/sku=PRODUCT03/65c7e9bc44515031584752.csv new file mode 100644 index 000000000..ae63c7bde --- /dev/null +++ b/examples/topics/partitioning/partition_pruning/input/color=green/sku=PRODUCT03/65c7e9bc44515031584752.csv @@ -0,0 +1,2 @@ +id,color,sku +6,green,PRODUCT03 diff --git a/examples/topics/partitioning/partition_pruning/input/color=red/sku=PRODUCT01/65c7e9bc4386f958078278.csv b/examples/topics/partitioning/partition_pruning/input/color=red/sku=PRODUCT01/65c7e9bc4386f958078278.csv new file mode 100644 index 000000000..46a4be882 --- /dev/null +++ b/examples/topics/partitioning/partition_pruning/input/color=red/sku=PRODUCT01/65c7e9bc4386f958078278.csv @@ -0,0 +1,2 @@ +id,color,sku +1,red,PRODUCT01 diff --git a/examples/topics/partitioning/partition_pruning/input/color=red/sku=PRODUCT02/65c7e9bc440fa083889144.csv b/examples/topics/partitioning/partition_pruning/input/color=red/sku=PRODUCT02/65c7e9bc440fa083889144.csv new file mode 100644 index 000000000..308dda75c --- /dev/null +++ b/examples/topics/partitioning/partition_pruning/input/color=red/sku=PRODUCT02/65c7e9bc440fa083889144.csv @@ -0,0 +1,2 @@ +id,color,sku +2,red,PRODUCT02 diff --git a/examples/topics/partitioning/partition_pruning/input/color=red/sku=PRODUCT03/65c7e9bc44209401416287.csv b/examples/topics/partitioning/partition_pruning/input/color=red/sku=PRODUCT03/65c7e9bc44209401416287.csv new file mode 100644 index 000000000..31b889e3e --- /dev/null +++ b/examples/topics/partitioning/partition_pruning/input/color=red/sku=PRODUCT03/65c7e9bc44209401416287.csv @@ -0,0 +1,2 @@ +id,color,sku +3,red,PRODUCT03 diff --git a/examples/topics/partitioning/partitioning/code.php b/examples/topics/partitioning/partitioning/code.php new file mode 100644 index 000000000..d69db9c52 --- /dev/null +++ b/examples/topics/partitioning/partitioning/code.php @@ -0,0 +1,50 @@ +read(from_array( + [ + ['id' => 1, 'color' => 'red', 'sku' => 'PRODUCT01'], + ['id' => 2, 'color' => 'red', 'sku' => 'PRODUCT02'], + ['id' => 3, 'color' => 'red', 'sku' => 'PRODUCT03'], + ['id' => 4, 'color' => 'green', 'sku' => 'PRODUCT01'], + ['id' => 5, 'color' => 'green', 'sku' => 'PRODUCT02'], + ['id' => 6, 'color' => 'green', 'sku' => 'PRODUCT03'], + ['id' => 7, 'color' => 'blue', 'sku' => 'PRODUCT01'], + ['id' => 8, 'color' => 'blue', 'sku' => 'PRODUCT02'], + ] + )) + ->partitionBy(ref('color'), ref('sku')) + ->write(to_csv(__DIR__ . '/output')) // do not provider extension, partitions are anyway written to separate folders + ->run(); + +// output +// ├── color=blue +// │ ├── sku=PRODUCT01 +// │ │ └── 65c7e9bc4460a568233195.csv +// │ └── sku=PRODUCT02 +// │ └── 65c7e9bc446c2326068326.csv +// ├── color=green +// │ ├── sku=PRODUCT01 +// │ │ └── 65c7e9bc44305321518126.csv +// │ ├── sku=PRODUCT02 +// │ │ └── 65c7e9bc44421020940545.csv +// │ └── sku=PRODUCT03 +// │ └── 65c7e9bc44515031584752.csv +// └── color=red +// ├── sku=PRODUCT01 +// │ └── 65c7e9bc4386f958078278.csv +// ├── sku=PRODUCT02 +// │ └── 65c7e9bc440fa083889144.csv +// └── sku=PRODUCT03 +// └── 65c7e9bc44209401416287.csv +// +// 12 directories, 8 files diff --git a/examples/topics/partitioning/partitioning/output/.gitignore b/examples/topics/partitioning/partitioning/output/.gitignore new file mode 100644 index 000000000..d6b7ef32c --- /dev/null +++ b/examples/topics/partitioning/partitioning/output/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore diff --git a/examples/topics/partitioning/reading/code.php b/examples/topics/partitioning/reading/code.php new file mode 100644 index 000000000..ffc39f6e3 --- /dev/null +++ b/examples/topics/partitioning/reading/code.php @@ -0,0 +1,29 @@ +read(from_csv(__DIR__ . '/input/color=*/sku=*/*.csv')) + ->collect() + ->write(to_output(false)) + ->run(); + +// +----+-------+-----------+ +// | id | color | sku | +// +----+-------+-----------+ +// | 5 | green | PRODUCT02 | +// | 6 | green | PRODUCT03 | +// | 4 | green | PRODUCT01 | +// | 2 | red | PRODUCT02 | +// | 3 | red | PRODUCT03 | +// | 1 | red | PRODUCT01 | +// | 8 | blue | PRODUCT02 | +// | 7 | blue | PRODUCT01 | +// +----+-------+-----------+ +// 8 rows diff --git a/examples/topics/partitioning/reading/input/color=blue/sku=PRODUCT01/65c7e9bc4460a568233195.csv b/examples/topics/partitioning/reading/input/color=blue/sku=PRODUCT01/65c7e9bc4460a568233195.csv new file mode 100644 index 000000000..545a74d47 --- /dev/null +++ b/examples/topics/partitioning/reading/input/color=blue/sku=PRODUCT01/65c7e9bc4460a568233195.csv @@ -0,0 +1,2 @@ +id,color,sku +7,blue,PRODUCT01 diff --git a/examples/topics/partitioning/reading/input/color=blue/sku=PRODUCT02/65c7e9bc446c2326068326.csv b/examples/topics/partitioning/reading/input/color=blue/sku=PRODUCT02/65c7e9bc446c2326068326.csv new file mode 100644 index 000000000..33873a6e6 --- /dev/null +++ b/examples/topics/partitioning/reading/input/color=blue/sku=PRODUCT02/65c7e9bc446c2326068326.csv @@ -0,0 +1,2 @@ +id,color,sku +8,blue,PRODUCT02 diff --git a/examples/topics/partitioning/reading/input/color=green/sku=PRODUCT01/65c7e9bc44305321518126.csv b/examples/topics/partitioning/reading/input/color=green/sku=PRODUCT01/65c7e9bc44305321518126.csv new file mode 100644 index 000000000..342be95a4 --- /dev/null +++ b/examples/topics/partitioning/reading/input/color=green/sku=PRODUCT01/65c7e9bc44305321518126.csv @@ -0,0 +1,2 @@ +id,color,sku +4,green,PRODUCT01 diff --git a/examples/topics/partitioning/reading/input/color=green/sku=PRODUCT02/65c7e9bc44421020940545.csv b/examples/topics/partitioning/reading/input/color=green/sku=PRODUCT02/65c7e9bc44421020940545.csv new file mode 100644 index 000000000..df1fb1e1a --- /dev/null +++ b/examples/topics/partitioning/reading/input/color=green/sku=PRODUCT02/65c7e9bc44421020940545.csv @@ -0,0 +1,2 @@ +id,color,sku +5,green,PRODUCT02 diff --git a/examples/topics/partitioning/reading/input/color=green/sku=PRODUCT03/65c7e9bc44515031584752.csv b/examples/topics/partitioning/reading/input/color=green/sku=PRODUCT03/65c7e9bc44515031584752.csv new file mode 100644 index 000000000..ae63c7bde --- /dev/null +++ b/examples/topics/partitioning/reading/input/color=green/sku=PRODUCT03/65c7e9bc44515031584752.csv @@ -0,0 +1,2 @@ +id,color,sku +6,green,PRODUCT03 diff --git a/examples/topics/partitioning/reading/input/color=red/sku=PRODUCT01/65c7e9bc4386f958078278.csv b/examples/topics/partitioning/reading/input/color=red/sku=PRODUCT01/65c7e9bc4386f958078278.csv new file mode 100644 index 000000000..46a4be882 --- /dev/null +++ b/examples/topics/partitioning/reading/input/color=red/sku=PRODUCT01/65c7e9bc4386f958078278.csv @@ -0,0 +1,2 @@ +id,color,sku +1,red,PRODUCT01 diff --git a/examples/topics/partitioning/reading/input/color=red/sku=PRODUCT02/65c7e9bc440fa083889144.csv b/examples/topics/partitioning/reading/input/color=red/sku=PRODUCT02/65c7e9bc440fa083889144.csv new file mode 100644 index 000000000..308dda75c --- /dev/null +++ b/examples/topics/partitioning/reading/input/color=red/sku=PRODUCT02/65c7e9bc440fa083889144.csv @@ -0,0 +1,2 @@ +id,color,sku +2,red,PRODUCT02 diff --git a/examples/topics/partitioning/reading/input/color=red/sku=PRODUCT03/65c7e9bc44209401416287.csv b/examples/topics/partitioning/reading/input/color=red/sku=PRODUCT03/65c7e9bc44209401416287.csv new file mode 100644 index 000000000..31b889e3e --- /dev/null +++ b/examples/topics/partitioning/reading/input/color=red/sku=PRODUCT03/65c7e9bc44209401416287.csv @@ -0,0 +1,2 @@ +id,color,sku +3,red,PRODUCT03 diff --git a/web/landing/src/Flow/Website/Service/Github.php b/web/landing/src/Flow/Website/Service/Github.php index 031f9288d..a089d50ca 100644 --- a/web/landing/src/Flow/Website/Service/Github.php +++ b/web/landing/src/Flow/Website/Service/Github.php @@ -63,6 +63,7 @@ public function contributors() : array ->drop('unpacked', 'data') ->filter(not(ref('login')->endsWith(lit('[bot]')))) ->filter(not(ref('login')->equals(lit('aeon-automation')))) + ->filter(not(ref('login')->equals(lit('norbertmwk')))) ->withEntry('avatar_url', ref('avatar_url')->concat(lit('&s=128'))) ->limit(24) ->write(to_memory($memory = new ArrayMemory()))