diff --git a/.github/workflows/test-extensions.yml b/.github/workflows/test-extensions.yml new file mode 100644 index 000000000..29573c565 --- /dev/null +++ b/.github/workflows/test-extensions.yml @@ -0,0 +1,91 @@ +name: Extensions Tests + +on: + pull_request: + paths: + - '.github/workflows/**' + - 'src/adapter/**' + - 'src/core/**' + - 'src/lib/**' + - 'tools/**' + - 'examples/**' + - 'composer.lock' + push: + branches: [ 1.x ] + paths-ignore: + - 'CHANGELOG.md' + +# See https://stackoverflow.com/a/72408109 +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + tests: + name: "Tests" + + runs-on: ${{ matrix.operating-system }} + + strategy: + fail-fast: false + matrix: + dependencies: + - "locked" + - "lowest" + - "highest" + php-version: + - "8.1" + - "8.2" + - "8.3" + operating-system: + - "ubuntu-latest" + + steps: + - name: "Checkout" + uses: "actions/checkout@v4" + + - name: "Install dependencies" + run: | + sudo apt-get update && sudo apt-get install libzstd1 --assume-yes + + - name: "Install PHP" + uses: "shivammathur/setup-php@v2" + with: + tools: composer:v2 + php-version: "${{ matrix.php-version }}" + ini-values: memory_limit=-1 + extensions: :psr, zstd + + - name: "List PHP Extensions" + run: php -m + + - name: "List PHP configuration" + run: php -i + + - name: "Get Composer Cache Directory" + id: composer-cache + run: | + echo "dir=$(composer config cache-files-dir)" >> $GITHUB_OUTPUT + + - name: "Cache Composer dependencies" + uses: "actions/cache@v3" + with: + path: "${{ steps.composer-cache.outputs.dir }}" + key: "php-${{ matrix.php-version }}-${{ matrix.dependencies }}-composer-${{ hashFiles('**/composer.lock') }}" + restore-keys: | + php-${{ matrix.php-version }}-${{ matrix.dependencies }}-composer- + + - name: "Install lowest dependencies" + if: ${{ matrix.dependencies == 'lowest' }} + run: "composer update --prefer-lowest --no-interaction --no-progress --no-suggest" + + - name: "Install highest dependencies" + if: ${{ matrix.dependencies == 'highest' }} + run: "composer update --no-interaction --no-progress --no-suggest" + + - name: "Install locked dependencies" + if: ${{ matrix.dependencies == 'locked' }} + run: "composer install --no-interaction --no-progress --no-suggest" + + - name: "Test ZSTD" + run: "composer test -- --group zstd-extension" diff --git a/composer.json b/composer.json index 16fb15b85..c2adab9a2 100644 --- a/composer.json +++ b/composer.json @@ -70,6 +70,7 @@ "src/lib/azure-sdk/src/Flow/Azure/SDK/DSL/functions.php", "src/lib/filesystem/src/Flow/Filesystem/DSL/functions.php", "src/lib/parquet/src/Flow/Parquet/functions.php", + "src/lib/parquet/src/stubs.php", "src/lib/snappy/polyfill.php" ], "psr-4": { diff --git a/src/lib/parquet/composer.json b/src/lib/parquet/composer.json index b35967998..5a3846ab0 100644 --- a/src/lib/parquet/composer.json +++ b/src/lib/parquet/composer.json @@ -31,7 +31,8 @@ ] }, "files": [ - "src/Flow/Parquet/functions.php" + "src/Flow/Parquet/functions.php", + "src/stubs.php" ] }, "autoload-dev": { diff --git a/src/lib/parquet/src/Flow/Parquet/Option.php b/src/lib/parquet/src/Flow/Parquet/Option.php index 3a7b0cb72..21fa542e7 100644 --- a/src/lib/parquet/src/Flow/Parquet/Option.php +++ b/src/lib/parquet/src/Flow/Parquet/Option.php @@ -107,4 +107,12 @@ enum Option * Default 1 */ case WRITER_VERSION; + + /** + * Compression level for ZSTD codec. This option is going to be passed to zstd_compress function when Compression is set to ZSTD. + * A value smaller than 0 means a faster compression level. (Zstandard library 1.3.4 or later). + * + * Default value is 3 + */ + case ZSTD_COMPRESSION_LEVEL; } diff --git a/src/lib/parquet/src/Flow/Parquet/Options.php b/src/lib/parquet/src/Flow/Parquet/Options.php index f5835f28e..111b994b3 100644 --- a/src/lib/parquet/src/Flow/Parquet/Options.php +++ b/src/lib/parquet/src/Flow/Parquet/Options.php @@ -26,6 +26,7 @@ public function __construct() Option::DICTIONARY_PAGE_SIZE->name => SizeUnits::MiB_SIZE, Option::DICTIONARY_PAGE_MIN_CARDINALITY_RATION->name => 0.4, Option::GZIP_COMPRESSION_LEVEL->name => 9, + Option::ZSTD_COMPRESSION_LEVEL->name => 3, Option::WRITER_VERSION->name => 1, Option::VALIDATE_DATA->name => true, ]; diff --git a/src/lib/parquet/src/Flow/Parquet/ParquetFile/Codec.php b/src/lib/parquet/src/Flow/Parquet/ParquetFile/Codec.php index eb9d296e4..da00b70a8 100644 --- a/src/lib/parquet/src/Flow/Parquet/ParquetFile/Codec.php +++ b/src/lib/parquet/src/Flow/Parquet/ParquetFile/Codec.php @@ -26,6 +26,7 @@ public function compress(string $data, Compressions $compression) : string Compressions::SNAPPY => \snappy_compress($data), /** @phpstan-ignore-next-line */ Compressions::GZIP => \gzencode($data, $this->options->get(Option::GZIP_COMPRESSION_LEVEL)), + Compressions::ZSTD => \zstd_compress($data, $this->options->getInt(Option::ZSTD_COMPRESSION_LEVEL)), default => throw new RuntimeException('Compression ' . $compression->name . ' is not supported yet') }; @@ -43,6 +44,7 @@ public function decompress(string $data, Compressions $compression) : string Compressions::UNCOMPRESSED => $data, Compressions::SNAPPY => \snappy_uncompress($data), Compressions::GZIP => \gzdecode($data), + Compressions::ZSTD => \zstd_uncompress($data), default => throw new RuntimeException('Compression ' . $compression->name . ' is not supported yet') }; diff --git a/src/lib/parquet/src/Flow/Parquet/Writer.php b/src/lib/parquet/src/Flow/Parquet/Writer.php index 4ca67d783..8d9b0e4c8 100644 --- a/src/lib/parquet/src/Flow/Parquet/Writer.php +++ b/src/lib/parquet/src/Flow/Parquet/Writer.php @@ -32,6 +32,7 @@ public function __construct( case Compressions::UNCOMPRESSED: case Compressions::SNAPPY: case Compressions::GZIP: + case Compressions::ZSTD: break; default: diff --git a/src/lib/parquet/src/stubs.php b/src/lib/parquet/src/stubs.php new file mode 100644 index 000000000..05ee60d17 --- /dev/null +++ b/src/lib/parquet/src/stubs.php @@ -0,0 +1,17 @@ + [ + 'int64' => $faker->numberBetween(0, Consts::PHP_INT64_MAX), + 'boolean' => $faker->boolean, + 'string' => $faker->text(150), + 'int32' => $faker->numberBetween(0, Consts::PHP_INT32_MAX), + 'list_of_int' => \array_map( + static fn ($i) => $faker->numberBetween(0, Consts::PHP_INT32_MAX), + \range(1, \random_int(2, 10)) + ), + 'list_of_string' => \array_map( + static fn ($i) => $faker->text(10), + \range(1, \random_int(2, 10)) + ), + ], + ], + ]; + }, \range(1, 100))); + + $writer->write($path, $schema, $inputData); + + self::assertSame( + $inputData, + \iterator_to_array((new Reader())->read($path)->values()) + ); + self::assertFileExists($path); + \unlink($path); + } } diff --git a/src/lib/parquet/tests/Flow/Parquet/Tests/Unit/ParquetFile/CodecTest.php b/src/lib/parquet/tests/Flow/Parquet/Tests/Unit/ParquetFile/CodecTest.php index 712c9dce1..c78e9f392 100644 --- a/src/lib/parquet/tests/Flow/Parquet/Tests/Unit/ParquetFile/CodecTest.php +++ b/src/lib/parquet/tests/Flow/Parquet/Tests/Unit/ParquetFile/CodecTest.php @@ -6,6 +6,7 @@ use Flow\Parquet\ParquetFile\{Codec, Compressions}; use Flow\Parquet\{Option, Options}; +use PHPUnit\Framework\Attributes\Group; use PHPUnit\Framework\TestCase; final class CodecTest extends TestCase @@ -45,4 +46,21 @@ public function test_uncompressed() : void $codec->decompress($codec->compress($data, Compressions::UNCOMPRESSED), Compressions::UNCOMPRESSED) ); } + + #[Group('zstd-extension')] + public function test_zstd() : void + { + if (!\extension_loaded('zstd')) { + self::markTestSkipped('The Zstd extension is not available'); + } + + $data = 'this is some test data to be compressed'; + + $codec = new Codec((new Options())); + + self::assertSame( + $data, + $codec->decompress($codec->compress($data, Compressions::ZSTD), Compressions::ZSTD) + ); + } }