Skip to content

Commit

Permalink
Added partitioning examples (#982)
Browse files Browse the repository at this point in the history
* Added partitioning exmples

* CS fixes

* Filter out duplicated contributor
  • Loading branch information
norberttech authored Feb 10, 2024
1 parent 415d2c6 commit 595d5f5
Show file tree
Hide file tree
Showing 21 changed files with 143 additions and 0 deletions.
29 changes: 29 additions & 0 deletions examples/topics/partitioning/partition_pruning/code.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
<?php

declare(strict_types=1);

use function Flow\ETL\Adapter\CSV\from_csv;
use function Flow\ETL\DSL\data_frame;
use function Flow\ETL\DSL\lit;
use function Flow\ETL\DSL\ref;
use function Flow\ETL\DSL\to_output;

require __DIR__ . '/../../../autoload.php';

data_frame()
->read(from_csv(__DIR__ . '/input/color=*/sku=*/*.csv'))
->filterPartitions(ref('color')->notEquals(lit('green')))
->collect()
->write(to_output(false))
->run();

// +----+-------+-----------+
// | id | color | sku |
// +----+-------+-----------+
// | 2 | red | PRODUCT02 |
// | 3 | red | PRODUCT03 |
// | 1 | red | PRODUCT01 |
// | 8 | blue | PRODUCT02 |
// | 7 | blue | PRODUCT01 |
// +----+-------+-----------+
// 5 rows
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
id,color,sku
7,blue,PRODUCT01
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
id,color,sku
8,blue,PRODUCT02
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
id,color,sku
4,green,PRODUCT01
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
id,color,sku
5,green,PRODUCT02
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
id,color,sku
6,green,PRODUCT03
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
id,color,sku
1,red,PRODUCT01
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
id,color,sku
2,red,PRODUCT02
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
id,color,sku
3,red,PRODUCT03
50 changes: 50 additions & 0 deletions examples/topics/partitioning/partitioning/code.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
<?php

declare(strict_types=1);

use function Flow\ETL\Adapter\CSV\to_csv;
use function Flow\ETL\DSL\data_frame;
use function Flow\ETL\DSL\from_array;
use function Flow\ETL\DSL\ref;

require __DIR__ . '/../../../autoload.php';

data_frame()
->read(from_array(
[
['id' => 1, 'color' => 'red', 'sku' => 'PRODUCT01'],
['id' => 2, 'color' => 'red', 'sku' => 'PRODUCT02'],
['id' => 3, 'color' => 'red', 'sku' => 'PRODUCT03'],
['id' => 4, 'color' => 'green', 'sku' => 'PRODUCT01'],
['id' => 5, 'color' => 'green', 'sku' => 'PRODUCT02'],
['id' => 6, 'color' => 'green', 'sku' => 'PRODUCT03'],
['id' => 7, 'color' => 'blue', 'sku' => 'PRODUCT01'],
['id' => 8, 'color' => 'blue', 'sku' => 'PRODUCT02'],
]
))
->partitionBy(ref('color'), ref('sku'))
->write(to_csv(__DIR__ . '/output')) // do not provider extension, partitions are anyway written to separate folders
->run();

// output
// ├── color=blue
// │ ├── sku=PRODUCT01
// │ │ └── 65c7e9bc4460a568233195.csv
// │ └── sku=PRODUCT02
// │ └── 65c7e9bc446c2326068326.csv
// ├── color=green
// │ ├── sku=PRODUCT01
// │ │ └── 65c7e9bc44305321518126.csv
// │ ├── sku=PRODUCT02
// │ │ └── 65c7e9bc44421020940545.csv
// │ └── sku=PRODUCT03
// │ └── 65c7e9bc44515031584752.csv
// └── color=red
// ├── sku=PRODUCT01
// │ └── 65c7e9bc4386f958078278.csv
// ├── sku=PRODUCT02
// │ └── 65c7e9bc440fa083889144.csv
// └── sku=PRODUCT03
// └── 65c7e9bc44209401416287.csv
//
// 12 directories, 8 files
2 changes: 2 additions & 0 deletions examples/topics/partitioning/partitioning/output/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
*
!.gitignore
29 changes: 29 additions & 0 deletions examples/topics/partitioning/reading/code.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
<?php

declare(strict_types=1);

use function Flow\ETL\Adapter\CSV\from_csv;
use function Flow\ETL\DSL\data_frame;
use function Flow\ETL\DSL\to_output;

require __DIR__ . '/../../../autoload.php';

data_frame()
->read(from_csv(__DIR__ . '/input/color=*/sku=*/*.csv'))
->collect()
->write(to_output(false))
->run();

// +----+-------+-----------+
// | id | color | sku |
// +----+-------+-----------+
// | 5 | green | PRODUCT02 |
// | 6 | green | PRODUCT03 |
// | 4 | green | PRODUCT01 |
// | 2 | red | PRODUCT02 |
// | 3 | red | PRODUCT03 |
// | 1 | red | PRODUCT01 |
// | 8 | blue | PRODUCT02 |
// | 7 | blue | PRODUCT01 |
// +----+-------+-----------+
// 8 rows
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
id,color,sku
7,blue,PRODUCT01
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
id,color,sku
8,blue,PRODUCT02
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
id,color,sku
4,green,PRODUCT01
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
id,color,sku
5,green,PRODUCT02
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
id,color,sku
6,green,PRODUCT03
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
id,color,sku
1,red,PRODUCT01
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
id,color,sku
2,red,PRODUCT02
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
id,color,sku
3,red,PRODUCT03
1 change: 1 addition & 0 deletions web/landing/src/Flow/Website/Service/Github.php
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ public function contributors() : array
->drop('unpacked', 'data')
->filter(not(ref('login')->endsWith(lit('[bot]'))))
->filter(not(ref('login')->equals(lit('aeon-automation'))))
->filter(not(ref('login')->equals(lit('norbertmwk'))))
->withEntry('avatar_url', ref('avatar_url')->concat(lit('&s=128')))
->limit(24)
->write(to_memory($memory = new ArrayMemory()))
Expand Down

0 comments on commit 595d5f5

Please sign in to comment.