-
Notifications
You must be signed in to change notification settings - Fork 28
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
811264c
commit 5fb70b4
Showing
6 changed files
with
65 additions
and
15 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
<?php | ||
|
||
declare(strict_types=1); | ||
|
||
use function Flow\ETL\Adapter\CSV\from_csv; | ||
use function Flow\ETL\DSL\df; | ||
use function Flow\ETL\DSL\schema_from_json; | ||
use function Flow\ETL\DSL\schema_to_json; | ||
use function Flow\ETL\DSL\to_output; | ||
use Flow\ETL\Loader\StreamLoader\Output; | ||
|
||
require __DIR__ . '/../../../autoload.php'; | ||
|
||
if (!\file_exists(__DIR__ . '/output/schema.json')) { | ||
$schema = df() | ||
->read(from_csv(__DIR__ . '/input/dataset.csv')) | ||
->limit(100) // Limiting the number of rows to read will speed up the process but might bring less accurate results | ||
->autoCast() | ||
->schema(); | ||
|
||
\file_put_contents(__DIR__ . '/output/schema.json', schema_to_json($schema)); | ||
} else { | ||
$schema = schema_from_json(\file_get_contents(__DIR__ . '/output/schema.json')); | ||
} | ||
|
||
// Reading schemaless data formats with predefined schema can significantly improve performance | ||
df() | ||
->read(from_csv(__DIR__ . '/input/dataset.csv', schema: $schema)) | ||
->collect() | ||
->write(to_output(truncate: false, output: Output::rows_and_schema)) | ||
->run(); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
Index,Organization Id,Name,Website,Country,Description,Founded,Industry,Number of employees | ||
1,8cC6B5992C0309c,Acevedo LLC,https://www.donovan.com/,Holy See (Vatican City State),Multi-channeled bottom-line core,2019,Graphic Design / Web Design,7070 | ||
2,ec094061FeaF7Bc,Walls-Mcdonald,http://arias-willis.net/,Lithuania,Compatible encompassing groupware,2005,Utilities,8156 | ||
3,DAcC5dbc58946A7,Gregory PLC,http://www.lynch-hoover.net/,Tokelau,Multi-channeled intangible help-desk,2019,Leisure / Travel,6121 | ||
4,8Dd7beDa37FbeD0,"Byrd, Patterson and Knox",https://www.james-velez.net/,Netherlands,Pre-emptive national function,1982,Furniture,3494 | ||
5,a3b5c54AEC163e4,Mcdowell-Hopkins,http://fuentes.com/,Mayotte,Cloned bifurcated solution,2016,Online Publishing,36 | ||
6,fDfEBeFDaEb59Af,Hayden and Sons,https://www.shaw-mooney.info/,Belize,Persistent mobile task-force,1978,Insurance,7010 | ||
7,752ef90Eae1f7f5,Castro LLC,http://wilkinson.com/,Jamaica,Advanced value-added definition,2008,Outsourcing / Offshoring,2526 | ||
8,B1D4c5CA34f9992,"Barajas, Baird and Shaw",http://www.jordan-harvey.com/,United States of America,Stand-alone bandwidth-monitored algorithm,2000,Wholesale,4478 | ||
9,Cfa1a44106faD4B,"Lucas, Galloway and Benjamin",http://silva.info/,Western Sahara,Persevering leadingedge ability,1990,Retail Industry,8223 | ||
10,C08fcf292AB17DF,"Barker, Hubbard and Bennett",http://www.allen.biz/,Mauritania,Decentralized fault-tolerant functionalities,2014,Museums / Institutions,7716 | ||
11,94B9bEedc626820,Underwood-Mitchell,https://www.leonard.com/,Italy,Compatible dynamic support,1992,Fine Art,4564 | ||
12,FE42dEd40f5DfD8,"Lester, Ochoa and Franco",http://www.munoz.com/,Timor-Leste,Vision-oriented dynamic conglomeration,2014,Motion Pictures / Film,8075 | ||
13,1F861fAbeDdCFea,"Arias, Jackson and Hester",https://hardin-thompson.com/,Algeria,Switchable maximized synergy,1980,Utilities,1319 | ||
14,456de7dE1ab18ca,Riggs and Sons,http://klein-benton.info/,Czech Republic,Object-based discrete orchestration,2012,Law Enforcement,4946 | ||
15,457bcfFF18A7DD2,Stanley LLC,https://bowman.com/,Eritrea,Self-enabling 24/7 groupware,1984,Executive Office,4980 | ||
16,5B5ea5aea34dc5F,Page-Ware,http://lam-soto.com/,Togo,Realigned mobile groupware,1991,Entertainment / Movie Production,1307 | ||
17,A66F35C298Dfd82,"Garner, Melton and Burgess",https://mathews-knox.com/,Guinea-Bissau,Automated 5thgeneration complexity,2003,E - Learning,9038 | ||
18,EdAC2EF13734E0B,Andersen-Fuentes,http://www.mann.com/,Oman,Ameliorated coherent database,1991,Textiles,6436 | ||
19,dD1612190b24B12,Ford-Rice,https://peterson-irwin.com/,Turks and Caicos Islands,Sharable intangible leverage,1971,Computer / Network Security,3038 | ||
20,992CAdffccEebEa,Collins-Figueroa,http://www.holt-bartlett.info/,Mongolia,Realigned multi-state installation,1985,Aviation / Aerospace,9420 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
* | ||
!.gitignore |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters