diff --git a/composer.json b/composer.json index abffd4263..4f62b4de3 100644 --- a/composer.json +++ b/composer.json @@ -262,6 +262,12 @@ "build:docs": [ "bin/docs.php dsl:dump web/landing/resources/dsl.json" ], + "build:parquet:thrift": [ + "grep -q 'namespace php Flow.Parquet.Thrift' src/lib/parquet/src/Flow/Parquet/Resources/Thrift/parquet.thrift || { echo \"Flow php namespace not found in thrift definition!\"; exit 1; }\n", + "rm src/lib/parquet/src/Flow/Parquet/Thrift/*.php", + "thrift --gen php --out src/lib/parquet/src src/lib/parquet/src/Flow/Parquet/Resources/Thrift/parquet.thrift", + "@cs:php:fix" + ], "pre-autoload-dump": [ "Google\\Task\\Composer::cleanup" ], diff --git a/src/adapter/etl-adapter-parquet/src/Flow/ETL/Adapter/Parquet/ParquetLoader.php b/src/adapter/etl-adapter-parquet/src/Flow/ETL/Adapter/Parquet/ParquetLoader.php index ebd4d8d30..6eb6f3c6b 100644 --- a/src/adapter/etl-adapter-parquet/src/Flow/ETL/Adapter/Parquet/ParquetLoader.php +++ b/src/adapter/etl-adapter-parquet/src/Flow/ETL/Adapter/Parquet/ParquetLoader.php @@ -117,9 +117,9 @@ public function withSchema(Schema $schema) : self private function inferSchema(Rows $rows) : void { if ($this->inferredSchema === null) { - $this->inferredSchema = $rows->schema(); + $this->inferredSchema = $rows->schema()->makeNullable(); } else { - $this->inferredSchema = $this->inferredSchema->merge($rows->schema()); + $this->inferredSchema = $this->inferredSchema->merge($rows->schema())->makeNullable(); } } diff --git a/src/adapter/etl-adapter-parquet/src/Flow/ETL/Adapter/Parquet/SchemaConverter.php b/src/adapter/etl-adapter-parquet/src/Flow/ETL/Adapter/Parquet/SchemaConverter.php index 3b93ed48b..745edcde4 100644 --- a/src/adapter/etl-adapter-parquet/src/Flow/ETL/Adapter/Parquet/SchemaConverter.php +++ b/src/adapter/etl-adapter-parquet/src/Flow/ETL/Adapter/Parquet/SchemaConverter.php @@ -26,6 +26,7 @@ use Flow\ETL\PHP\Type\Logical\{DateTimeType, JsonType, ListType, MapType, StructureType, UuidType, XMLElementType, XMLType}; use Flow\ETL\PHP\Type\Native\{ObjectType, ScalarType}; use Flow\ETL\PHP\Type\Type; +use Flow\ETL\PHP\Value\Uuid; use Flow\ETL\Row\{Schema}; use Flow\Parquet\ParquetFile\Schema as ParquetSchema; use Flow\Parquet\ParquetFile\Schema\{Column, FlatColumn, ListElement, NestedColumn}; @@ -65,42 +66,43 @@ private function flowListToParquetList(ListType $type) : ListElement case ScalarType::class: switch ($element->type()) { case ScalarType::FLOAT: - return ListElement::float(); + return ListElement::float(!$type->nullable()); case ScalarType::INTEGER: - return ListElement::int64(); + return ListElement::int64(!$type->nullable()); case ScalarType::STRING: - return ListElement::string(); + return ListElement::string(!$type->nullable()); case ScalarType::BOOLEAN: - return ListElement::boolean(); + return ListElement::boolean(!$type->nullable()); } break; case DateTimeType::class: - return ListElement::datetime(); + return ListElement::datetime(!$type->nullable()); case UuidType::class: - return ListElement::uuid(); + return ListElement::uuid(!$type->nullable()); case JsonType::class: - return ListElement::json(); + return ListElement::json(!$type->nullable()); case XMLType::class: case XMLElementType::class: - return ListElement::string(); + return ListElement::string(!$type->nullable()); case ObjectType::class: $class = $element->class; if ($class === \DateInterval::class) { - return ListElement::time(); + return ListElement::time(!$type->nullable()); } throw new \Flow\Parquet\Exception\RuntimeException($class . ' can\'t be converted to any parquet columns.'); case ListType::class: - return ListElement::list($this->flowListToParquetList($element)); + return ListElement::list($this->flowListToParquetList($element), !$type->nullable()); case MapType::class: return ListElement::map( $this->flowMapKeyToParquetMapKey($element->key()), - $this->flowMapValueToParquetMapValue($element->value()) + $this->flowMapValueToParquetMapValue($element->value()), + !$type->nullable() ); case StructureType::class: - return ListElement::structure($this->flowStructureToParquetStructureElements($element)); + return ListElement::structure($this->flowStructureToParquetStructureElements($element), !$type->nullable()); } throw new RuntimeException($element::class . ' is not supported.'); @@ -141,50 +143,51 @@ private function flowMapValueToParquetMapValue(MapValue $mapValue) : ParquetSche case ScalarType::class: switch ($mapValueType->type()) { case ScalarType::FLOAT: - return ParquetSchema\MapValue::float(); + return ParquetSchema\MapValue::float(!$mapValueType->nullable()); case ScalarType::INTEGER: - return ParquetSchema\MapValue::int64(); + return ParquetSchema\MapValue::int64(!$mapValueType->nullable()); case ScalarType::STRING: - return ParquetSchema\MapValue::string(); + return ParquetSchema\MapValue::string(!$mapValueType->nullable()); case ScalarType::BOOLEAN: - return ParquetSchema\MapValue::boolean(); + return ParquetSchema\MapValue::boolean(!$mapValueType->nullable()); } break; case UuidType::class: - return ParquetSchema\MapValue::uuid(); + return ParquetSchema\MapValue::uuid(!$mapValueType->nullable()); case DateTimeType::class: - return ParquetSchema\MapValue::datetime(); + return ParquetSchema\MapValue::datetime(!$mapValueType->nullable()); case JsonType::class: - return ParquetSchema\MapValue::json(); + return ParquetSchema\MapValue::json(!$mapValueType->nullable()); case XMLType::class: case XMLElementType::class: - return ParquetSchema\MapValue::string(); + return ParquetSchema\MapValue::string(!$mapValueType->nullable()); case ObjectType::class: $class = $mapValueType->class; if (\is_a($class, \DateTimeInterface::class, true)) { - return ParquetSchema\MapValue::datetime(); + return ParquetSchema\MapValue::datetime(!$mapValueType->nullable()); } - if ($class === \Flow\ETL\PHP\Value\Uuid::class) { - return ParquetSchema\MapValue::string(); + if ($class === Uuid::class) { + return ParquetSchema\MapValue::string(!$mapValueType->nullable()); } if ($class === \DateInterval::class) { - return ParquetSchema\MapValue::time(); + return ParquetSchema\MapValue::time(!$mapValueType->nullable()); } throw new \Flow\Parquet\Exception\RuntimeException($class . ' can\'t be converted to any parquet columns.'); case ListType::class: - return ParquetSchema\MapValue::list($this->flowListToParquetList($mapValueType)); + return ParquetSchema\MapValue::list($this->flowListToParquetList($mapValueType), !$mapValueType->nullable()); case MapType::class: return ParquetSchema\MapValue::map( $this->flowMapKeyToParquetMapKey($mapValueType->key()), - $this->flowMapValueToParquetMapValue($mapValueType->value()) + $this->flowMapValueToParquetMapValue($mapValueType->value()), + !$mapValueType->nullable() ); case StructureType::class: - return ParquetSchema\MapValue::structure(...$this->flowStructureToParquetStructureElements($mapValueType)); + return ParquetSchema\MapValue::structure($this->flowStructureToParquetStructureElements($mapValueType), !$mapValueType->nullable()); } throw new RuntimeException($mapValueType::class . ' is not supported.'); @@ -195,7 +198,7 @@ private function flowObjectToParquetFlat(ObjectType $type, string $name) : FlatC $class = $type->class; if ($class === \DateInterval::class) { - return FlatColumn::time($name); + return FlatColumn::time($name, $type->nullable() ? ParquetSchema\Repetition::OPTIONAL : ParquetSchema\Repetition::REQUIRED); } throw new RuntimeException($type->toString() . ' can\'t be converted to any parquet columns.'); @@ -205,13 +208,13 @@ private function flowScalarToParquetFlat(ScalarType $type, string $name) : FlatC { switch ($type->type()) { case ScalarType::FLOAT: - return FlatColumn::float($name); + return FlatColumn::float($name, $type->nullable() ? ParquetSchema\Repetition::OPTIONAL : ParquetSchema\Repetition::REQUIRED); case ScalarType::INTEGER: - return FlatColumn::int64($name); + return FlatColumn::int64($name, $type->nullable() ? ParquetSchema\Repetition::OPTIONAL : ParquetSchema\Repetition::REQUIRED); case ScalarType::STRING: - return FlatColumn::string($name); + return FlatColumn::string($name, $type->nullable() ? ParquetSchema\Repetition::OPTIONAL : ParquetSchema\Repetition::REQUIRED); case ScalarType::BOOLEAN: - return FlatColumn::boolean($name); + return FlatColumn::boolean($name, $type->nullable() ? ParquetSchema\Repetition::OPTIONAL : ParquetSchema\Repetition::REQUIRED); default: throw new RuntimeException($type->type() . ' is not supported.'); @@ -235,26 +238,27 @@ private function flowTypeToParquetType(string $name, Type $type) : Column case ScalarType::class: return $this->flowScalarToParquetFlat($type, $name); case DateTimeType::class: - return FlatColumn::datetime($name); + return FlatColumn::datetime($name, $type->nullable() ? ParquetSchema\Repetition::OPTIONAL : ParquetSchema\Repetition::REQUIRED); case UuidType::class: - return FlatColumn::uuid($name); + return FlatColumn::uuid($name, $type->nullable() ? ParquetSchema\Repetition::OPTIONAL : ParquetSchema\Repetition::REQUIRED); case JsonType::class: - return FlatColumn::json($name); + return FlatColumn::json($name, $type->nullable() ? ParquetSchema\Repetition::OPTIONAL : ParquetSchema\Repetition::REQUIRED); case XMLType::class: case XMLElementType::class: - return FlatColumn::string($name); + return FlatColumn::string($name, $type->nullable() ? ParquetSchema\Repetition::OPTIONAL : ParquetSchema\Repetition::REQUIRED); case ObjectType::class: return $this->flowObjectToParquetFlat($type, $name); case ListType::class: - return NestedColumn::list($name, $this->flowListToParquetList($type)); + return NestedColumn::list($name, $this->flowListToParquetList($type), $type->nullable() ? ParquetSchema\Repetition::OPTIONAL : ParquetSchema\Repetition::REQUIRED); case MapType::class: return NestedColumn::map( $name, $this->flowMapKeyToParquetMapKey($type->key()), - $this->flowMapValueToParquetMapValue($type->value()) + $this->flowMapValueToParquetMapValue($type->value()), + $type->nullable() ? ParquetSchema\Repetition::OPTIONAL : ParquetSchema\Repetition::REQUIRED ); case StructureType::class: - return NestedColumn::struct($name, $this->flowStructureToParquetStructureElements($type)); + return NestedColumn::struct($name, $this->flowStructureToParquetStructureElements($type), $type->nullable() ? ParquetSchema\Repetition::OPTIONAL : ParquetSchema\Repetition::REQUIRED); } throw new RuntimeException($type::class . ' is not supported.'); diff --git a/src/adapter/etl-adapter-parquet/tests/Flow/ETL/Adapter/Parquet/Tests/Unit/FlowToParquetSchemaTest.php b/src/adapter/etl-adapter-parquet/tests/Flow/ETL/Adapter/Parquet/Tests/Unit/FlowToParquetSchemaTest.php index ec589c639..d8ac98df3 100644 --- a/src/adapter/etl-adapter-parquet/tests/Flow/ETL/Adapter/Parquet/Tests/Unit/FlowToParquetSchemaTest.php +++ b/src/adapter/etl-adapter-parquet/tests/Flow/ETL/Adapter/Parquet/Tests/Unit/FlowToParquetSchemaTest.php @@ -32,22 +32,23 @@ public function test_convert_etl_entries_to_parquet_fields() : void { self::assertEquals( ParquetSchema::with( - FlatColumn::int64('integer'), - FlatColumn::boolean('boolean'), - FlatColumn::string('string'), - FlatColumn::float('float'), - FlatColumn::dateTime('datetime'), - FlatColumn::json('json'), - NestedColumn::list('list', ParquetSchema\ListElement::string()), + FlatColumn::int64('integer', ParquetSchema\Repetition::REQUIRED), + FlatColumn::boolean('boolean', ParquetSchema\Repetition::REQUIRED), + FlatColumn::string('string', ParquetSchema\Repetition::REQUIRED), + FlatColumn::float('float', ParquetSchema\Repetition::REQUIRED), + FlatColumn::dateTime('datetime', ParquetSchema\Repetition::REQUIRED), + FlatColumn::json('json', ParquetSchema\Repetition::REQUIRED), + NestedColumn::list('list', ParquetSchema\ListElement::string(true), ParquetSchema\Repetition::REQUIRED), NestedColumn::list('list_of_structs', ParquetSchema\ListElement::structure( [ - FlatColumn::int64('integer'), - FlatColumn::boolean('boolean'), - ] - )), - NestedColumn::struct('structure', [FlatColumn::string('a')]), - NestedColumn::map('map', ParquetSchema\MapKey::string(), ParquetSchema\MapValue::int64()), - FlatColumn::time('time') + FlatColumn::int64('integer', ParquetSchema\Repetition::REQUIRED), + FlatColumn::boolean('boolean', ParquetSchema\Repetition::REQUIRED), + ], + true + ), ParquetSchema\Repetition::REQUIRED), + NestedColumn::struct('structure', [FlatColumn::string('a', ParquetSchema\Repetition::REQUIRED)], ParquetSchema\Repetition::REQUIRED), + NestedColumn::map('map', ParquetSchema\MapKey::string(), ParquetSchema\MapValue::int64(true), ParquetSchema\Repetition::REQUIRED), + FlatColumn::time('time', ParquetSchema\Repetition::REQUIRED) ), (new SchemaConverter())->toParquet(new Schema( Schema\Definition::integer('integer'), @@ -65,7 +66,7 @@ public function test_convert_etl_entries_to_parquet_fields() : void ))), Schema\Definition::structure('structure', new StructureType([new StructureElement('a', type_string())])), Schema\Definition::map('map', new MapType(MapKey::string(), MapValue::integer())), - Schema\Definition::object('time', type_object(\DateInterval::class, false)) + Schema\Definition::object('time', type_object(\DateInterval::class)) )) ); } @@ -76,7 +77,7 @@ public function test_convert_object_entry_to_parquet_array() : void $this->expectExceptionMessage("object can't be converted to any parquet columns."); (new SchemaConverter())->toParquet(new Schema( - Schema\Definition::object('object', type_object(\stdClass::class, false)) + Schema\Definition::object('object', type_object(\stdClass::class)) )); } } diff --git a/src/core/etl/tests/Flow/ETL/Tests/Unit/PHP/Type/Logical/StructureTypeTest.php b/src/core/etl/tests/Flow/ETL/Tests/Unit/PHP/Type/Logical/StructureTypeTest.php index 271694e9f..a34a2ac24 100644 --- a/src/core/etl/tests/Flow/ETL/Tests/Unit/PHP/Type/Logical/StructureTypeTest.php +++ b/src/core/etl/tests/Flow/ETL/Tests/Unit/PHP/Type/Logical/StructureTypeTest.php @@ -107,7 +107,7 @@ public function test_merging_nested_structures() : void struct_element('id', type_string()), struct_element('name', type_float()), ], - true + nullable: true )), ]), struct_type([ diff --git a/src/lib/parquet-viewer/src/Flow/ParquetViewer/Command/ReadMetadataCommand.php b/src/lib/parquet-viewer/src/Flow/ParquetViewer/Command/ReadMetadataCommand.php index cbc8393d3..dca698482 100644 --- a/src/lib/parquet-viewer/src/Flow/ParquetViewer/Command/ReadMetadataCommand.php +++ b/src/lib/parquet-viewer/src/Flow/ParquetViewer/Command/ReadMetadataCommand.php @@ -78,13 +78,13 @@ protected function execute(InputInterface $input, OutputInterface $output) : int if ($displayColumns) { $columnsTable = $style->createTable(); $columnsTable->setStyle('box'); - $columnsTable->setHeaderTitle('Flat Columns'); + $columnsTable->setHeaderTitle('Columns'); $columnsTable->setHeaders(['path', 'type', 'logical type', 'repetition', 'max repetition', 'max definition']); foreach ($parquetFile->schema()->columnsFlat() as $column) { $columnsTable->addRow([ $column->flatPath(), - $column->type() ? $column->type()->name : 'group', + ($column->type() ? $column->type()->name : 'group') . ($column->typeLength() ? '(' . $column->typeLength() . ')' : ''), $column->logicalType() ? $column->logicalType()->name() : '-', $column->repetition()?->name ?? 'N/A', $column->maxRepetitionsLevel(), diff --git a/src/lib/parquet/src/Flow/Parquet/ParquetFile.php b/src/lib/parquet/src/Flow/Parquet/ParquetFile.php index 8aaadb60d..78f646fba 100644 --- a/src/lib/parquet/src/Flow/Parquet/ParquetFile.php +++ b/src/lib/parquet/src/Flow/Parquet/ParquetFile.php @@ -54,7 +54,11 @@ public function metadata() : Metadata $metadata = $this->stream->read($metadataLength, -($metadataLength + 8)); $thriftMetadata = new FileMetaData(); - $thriftMetadata->read(new TCompactProtocol(new TMemoryBuffer($metadata))); + $thriftMetadata->read( + new TCompactProtocol( + new TMemoryBuffer($metadata) + ) + ); $this->metadata = Metadata::fromThrift($thriftMetadata, $this->options); diff --git a/src/lib/parquet/src/Flow/Parquet/ParquetFile/Data/PlainValueUnpacker.php b/src/lib/parquet/src/Flow/Parquet/ParquetFile/Data/PlainValueUnpacker.php index 7d87dda60..61556452f 100644 --- a/src/lib/parquet/src/Flow/Parquet/ParquetFile/Data/PlainValueUnpacker.php +++ b/src/lib/parquet/src/Flow/Parquet/ParquetFile/Data/PlainValueUnpacker.php @@ -42,6 +42,7 @@ public function unpack(FlatColumn $column, int $total) : array PhysicalType::FIXED_LEN_BYTE_ARRAY => match ($column->logicalType()?->name()) { /** @phpstan-ignore-next-line */ LogicalType::DECIMAL => $this->reader->readDecimals($total, $column->typeLength(), $column->logicalType()?->decimalData()?->precision(), $column->logicalType()?->decimalData()?->scale()), + LogicalType::UUID => $this->reader->readStrings($total), default => throw new RuntimeException('Unsupported logical type ' . ($column->logicalType()?->name() ?: 'null') . ' for FIXED_LEN_BYTE_ARRAY'), }, PhysicalType::BOOLEAN => $this->reader->readBooleans($total), diff --git a/src/lib/parquet/src/Flow/Parquet/ParquetFile/Data/PlainValuesPacker.php b/src/lib/parquet/src/Flow/Parquet/ParquetFile/Data/PlainValuesPacker.php index ce59536d0..41e75bee3 100644 --- a/src/lib/parquet/src/Flow/Parquet/ParquetFile/Data/PlainValuesPacker.php +++ b/src/lib/parquet/src/Flow/Parquet/ParquetFile/Data/PlainValuesPacker.php @@ -70,6 +70,10 @@ public function packValues(FlatColumn $column, array $values) : void break; case PhysicalType::FIXED_LEN_BYTE_ARRAY: switch ($column->logicalType()?->name()) { + case LogicalType::UUID: + $this->writer->writeStrings($parquetValues); + + break; case LogicalType::DECIMAL: /** * @phpstan-ignore-next-line @@ -87,7 +91,6 @@ public function packValues(FlatColumn $column, array $values) : void break; case PhysicalType::BYTE_ARRAY: switch ($column->logicalType()?->name()) { - case LogicalType::UUID: case LogicalType::JSON: case LogicalType::STRING: $this->writer->writeStrings($parquetValues); diff --git a/src/lib/parquet/src/Flow/Parquet/ParquetFile/Schema/FlatColumn.php b/src/lib/parquet/src/Flow/Parquet/ParquetFile/Schema/FlatColumn.php index 52e6fe069..3824dcb67 100644 --- a/src/lib/parquet/src/Flow/Parquet/ParquetFile/Schema/FlatColumn.php +++ b/src/lib/parquet/src/Flow/Parquet/ParquetFile/Schema/FlatColumn.php @@ -137,9 +137,9 @@ public static function time(string $name, Repetition $repetition = Repetition::O return new self($name, PhysicalType::INT64, ConvertedType::TIME_MICROS, LogicalType::time(), $repetition); } - public static function uuid(string $string, Repetition $repetition = Repetition::OPTIONAL) : self + public static function uuid(string $uuid, Repetition $repetition = Repetition::OPTIONAL) : self { - return new self($string, PhysicalType::BYTE_ARRAY, null, LogicalType::uuid(), $repetition); + return new self($uuid, PhysicalType::FIXED_LEN_BYTE_ARRAY, null, LogicalType::uuid(), $repetition, typeLength: 16); } public function __debugInfo() : ?array diff --git a/src/lib/parquet/src/Flow/Parquet/Resources/Thrift/parquet.thrift b/src/lib/parquet/src/Flow/Parquet/Resources/Thrift/parquet.thrift index a468302e3..d76a9ca0f 100644 --- a/src/lib/parquet/src/Flow/Parquet/Resources/Thrift/parquet.thrift +++ b/src/lib/parquet/src/Flow/Parquet/Resources/Thrift/parquet.thrift @@ -61,14 +61,14 @@ enum ConvertedType { * values */ LIST = 3; - /** an enum is converted into a binary field */ + /** an enum is converted into a BYTE_ARRAY field */ ENUM = 4; /** * A decimal value. * - * This may be used to annotate binary or fixed primitive types. The - * underlying byte array stores the unscaled value encoded as two's + * This may be used to annotate BYTE_ARRAY or FIXED_LEN_BYTE_ARRAY primitive + * types. The underlying byte array stores the unscaled value encoded as two's * complement using big-endian byte order (the most significant byte is the * zeroth element). The value of the decimal is the value * 10^{-scale}. * @@ -159,7 +159,7 @@ enum ConvertedType { /** * An embedded BSON document * - * A BSON document embedded within a single BINARY column. + * A BSON document embedded within a single BYTE_ARRAY column. */ BSON = 20; @@ -182,16 +182,62 @@ enum ConvertedType { * Representation of Schemas */ enum FieldRepetitionType { - /** This field is required (can not be null) and each record has exactly 1 value. */ + /** This field is required (can not be null) and each row has exactly 1 value. */ REQUIRED = 0; - /** The field is optional (can be null) and each record has 0 or 1 values. */ + /** The field is optional (can be null) and each row has 0 or 1 values. */ OPTIONAL = 1; /** The field is repeated and can contain 0 or more values */ REPEATED = 2; } +/** + * A structure for capturing metadata for estimating the unencoded, + * uncompressed size of data written. This is useful for readers to estimate + * how much memory is needed to reconstruct data in their memory model and for + * fine grained filter pushdown on nested structures (the histograms contained + * in this structure can help determine the number of nulls at a particular + * nesting level and maximum length of lists). + */ +struct SizeStatistics { + /** + * The number of physical bytes stored for BYTE_ARRAY data values assuming + * no encoding. This is exclusive of the bytes needed to store the length of + * each byte array. In other words, this field is equivalent to the `(size + * of PLAIN-ENCODING the byte array values) - (4 bytes * number of values + * written)`. To determine unencoded sizes of other types readers can use + * schema information multiplied by the number of non-null and null values. + * The number of null/non-null values can be inferred from the histograms + * below. + * + * For example, if a column chunk is dictionary-encoded with dictionary + * ["a", "bc", "cde"], and a data page contains the indices [0, 0, 1, 2], + * then this value for that data page should be 7 (1 + 1 + 2 + 3). + * + * This field should only be set for types that use BYTE_ARRAY as their + * physical type. + */ + 1: optional i64 unencoded_byte_array_data_bytes; + /** + * When present, there is expected to be one element corresponding to each + * repetition (i.e. size=max repetition_level+1) where each element + * represents the number of times the repetition level was observed in the + * data. + * + * This field may be omitted if max_repetition_level is 0 without loss + * of information. + **/ + 2: optional list repetition_level_histogram; + /** + * Same as repetition_level_histogram except for definition levels. + * + * This field may be omitted if max_definition_level is 0 or 1 without + * loss of information. + **/ + 3: optional list definition_level_histogram; +} + /** * Statistics per row group and per page * All fields are optional. @@ -212,27 +258,45 @@ struct Statistics { */ 1: optional binary max; 2: optional binary min; - /** count of null value in the column */ + /** + * Count of null values in the column. + * + * Writers SHOULD always write this field even if it is zero (i.e. no null value) + * or the column is not nullable. + * Readers MUST distinguish between null_count not being present and null_count == 0. + * If null_count is not present, readers MUST NOT assume null_count == 0. + */ 3: optional i64 null_count; /** count of distinct values occurring */ 4: optional i64 distinct_count; /** - * Min and max values for the column, determined by its ColumnOrder. + * Lower and upper bound values for the column, determined by its ColumnOrder. + * + * These may be the actual minimum and maximum values found on a page or column + * chunk, but can also be (more compact) values that do not exist on a page or + * column chunk. For example, instead of storing "Blart Versenwald III", a writer + * may set min_value="B", max_value="C". Such more compact values must still be + * valid values within the column's logical type. * * Values are encoded using PLAIN encoding, except that variable-length byte * arrays do not include a length prefix. */ 5: optional binary max_value; 6: optional binary min_value; + /** If true, max_value is the actual maximum value for a column */ + 7: optional bool is_max_value_exact; + /** If true, min_value is the actual minimum value for a column */ + 8: optional bool is_min_value_exact; } /** Empty structs to use as logical type annotations */ -struct StringType {} // allowed for BINARY, must be encoded with UTF-8 +struct StringType {} // allowed for BYTE_ARRAY, must be encoded with UTF-8 struct UUIDType {} // allowed for FIXED[16], must encoded raw UUID bytes struct MapType {} // see LogicalTypes.md struct ListType {} // see LogicalTypes.md -struct EnumType {} // allowed for BINARY, must be encoded with UTF-8 +struct EnumType {} // allowed for BYTE_ARRAY, must be encoded with UTF-8 struct DateType {} // allowed for INT32 +struct Float16Type {} // allowed for FIXED[2], must encoded raw FLOAT16 bytes /** * Logical type to annotate a column that is always null. @@ -252,7 +316,7 @@ struct NullType {} // allowed for any physical type, only null values stored * To maintain forward-compatibility in v1, implementations using this logical * type must also set scale and precision on the annotated SchemaElement. * - * Allowed for physical types: INT32, INT64, FIXED, and BINARY + * Allowed for physical types: INT32, INT64, FIXED_LEN_BYTE_ARRAY, and BYTE_ARRAY. */ struct DecimalType { 1: required i32 scale @@ -304,7 +368,7 @@ struct IntType { /** * Embedded JSON logical type annotation * - * Allowed for physical types: BINARY + * Allowed for physical types: BYTE_ARRAY */ struct JsonType { } @@ -312,7 +376,7 @@ struct JsonType { /** * Embedded BSON logical type annotation * - * Allowed for physical types: BINARY + * Allowed for physical types: BYTE_ARRAY */ struct BsonType { } @@ -346,6 +410,7 @@ union LogicalType { 12: JsonType JSON // use ConvertedType JSON 13: BsonType BSON // use ConvertedType BSON 14: UUIDType UUID // no compatible ConvertedType + 15: Float16Type FLOAT16 // no compatible ConvertedType } /** @@ -469,12 +534,15 @@ enum Encoding { */ RLE_DICTIONARY = 8; - /** Encoding for floating-point data. + /** Encoding for fixed-width data (FLOAT, DOUBLE, INT32, INT64, FIXED_LEN_BYTE_ARRAY). K byte-streams are created where K is the size in bytes of the data type. - The individual bytes of an FP value are scattered to the corresponding stream and + The individual bytes of a value are scattered to the corresponding stream and the streams are concatenated. This itself does not reduce the size of the data but can lead to better compression afterwards. + + Added in 2.8 for FLOAT and DOUBLE. + Support for INT32, INT64 and FIXED_LEN_BYTE_ARRAY added in 2.11. */ BYTE_STREAM_SPLIT = 9; } @@ -518,7 +586,13 @@ enum BoundaryOrder { /** Data page header */ struct DataPageHeader { - /** Number of values, including NULLs, in this data page. **/ + /** + * Number of values, including NULLs, in this data page. + * + * If a OffsetIndex is present, a page must begin at a row + * boundary (repetition_level = 0). Otherwise, pages may begin + * within a row (repetition_level > 0). + **/ 1: required i32 num_values /** Encoding used for this data page **/ @@ -530,7 +604,7 @@ struct DataPageHeader { /** Encoding used for repetition levels **/ 4: required Encoding repetition_level_encoding; - /** Optional statistics for the data in this page**/ + /** Optional statistics for the data in this page **/ 5: optional Statistics statistics; } @@ -565,26 +639,30 @@ struct DataPageHeaderV2 { /** Number of NULL values, in this data page. Number of non-null = num_values - num_nulls which is also the number of values in the data section **/ 2: required i32 num_nulls - /** Number of rows in this data page. which means pages change on record boundaries (r = 0) **/ + /** + * Number of rows in this data page. Every page must begin at a + * row boundary (repetition_level = 0): rows must **not** be + * split across page boundaries when using V2 data pages. + **/ 3: required i32 num_rows /** Encoding used for data in this page **/ 4: required Encoding encoding // repetition levels and definition levels are always using RLE (without size in it) - /** length of the definition levels */ + /** Length of the definition levels */ 5: required i32 definition_levels_byte_length; - /** length of the repetition levels */ + /** Length of the repetition levels */ 6: required i32 repetition_levels_byte_length; - /** whether the values are compressed. + /** Whether the values are compressed. Which means the section of the page between definition_levels_byte_length + repetition_levels_byte_length + 1 and compressed_page_size (included) is compressed with the compression_codec. If missing it is considered compressed */ 7: optional bool is_compressed = true; - /** optional statistics for the data in this page **/ + /** Optional statistics for the data in this page **/ 8: optional Statistics statistics; } @@ -678,10 +756,10 @@ struct PageHeader { } /** - * Wrapper struct to specify sort order + * Sort order within a RowGroup of a leaf column */ struct SortingColumn { - /** The column index (in this row group) **/ + /** The ordinal position of the column (in this row group) **/ 1: required i32 column_idx /** If true, indicates this column is sorted in descending order. **/ @@ -765,6 +843,14 @@ struct ColumnMetaData { * in a single I/O. */ 15: optional i32 bloom_filter_length; + + /** + * Optional statistics to help estimate total memory when converted to in-memory + * representations. The histograms contained in these statistics can + * also be useful in some cases for more fine-grained nullability/list length + * filter pushdown. + */ + 16: optional SizeStatistics size_statistics; } struct EncryptionWithFooterKey { @@ -789,12 +875,21 @@ struct ColumnChunk { **/ 1: optional string file_path - /** Byte offset in file_path to the ColumnMetaData **/ - 2: required i64 file_offset + /** Deprecated: Byte offset in file_path to the ColumnMetaData + * + * Past use of this field has been inconsistent, with some implementations + * using it to point to the ColumnMetaData and some using it to point to + * the first page in the column chunk. In many cases, the ColumnMetaData at this + * location is wrong. This field is now deprecated and should not be used. + * Writers should set this field to 0 if no ColumnMetaData has been written outside + * the footer. + */ + 2: required i64 file_offset = 0 - /** Column metadata for this chunk. This is the same content as what is at - * file_path/file_offset. Having it here has it replicated in the file - * metadata. + /** Column metadata for this chunk. Some writers may also replicate this at the + * location pointed to by file_path/file_offset. + * Note: while marked as optional, this field is in fact required by most major + * Parquet implementations. As such, writers MUST populate this field. **/ 3: optional ColumnMetaData meta_data @@ -880,7 +975,7 @@ union ColumnOrder { * TIME_MICROS - signed comparison * TIMESTAMP_MILLIS - signed comparison * TIMESTAMP_MICROS - signed comparison - * INTERVAL - unsigned comparison + * INTERVAL - undefined * JSON - unsigned byte-wise comparison * BSON - unsigned byte-wise comparison * ENUM - unsigned byte-wise comparison @@ -927,23 +1022,44 @@ struct PageLocation { 2: required i32 compressed_page_size /** - * Index within the RowGroup of the first row of the page; this means pages - * change on record boundaries (r = 0). + * Index within the RowGroup of the first row of the page. When an + * OffsetIndex is present, pages must begin on row boundaries + * (repetition_level = 0). */ 3: required i64 first_row_index } +/** + * Optional offsets for each data page in a ColumnChunk. + * + * Forms part of the page index, along with ColumnIndex. + * + * OffsetIndex may be present even if ColumnIndex is not. + */ struct OffsetIndex { /** * PageLocations, ordered by increasing PageLocation.offset. It is required * that page_locations[i].first_row_index < page_locations[i+1].first_row_index. */ 1: required list page_locations + /** + * Unencoded/uncompressed size for BYTE_ARRAY types. + * + * See documention for unencoded_byte_array_data_bytes in SizeStatistics for + * more details on this field. + */ + 2: optional list unencoded_byte_array_data_bytes } /** - * Description for ColumnIndex. - * Each [i] refers to the page at OffsetIndex.page_locations[i] + * Optional statistics for each data page in a ColumnChunk. + * + * Forms part the page index, along with OffsetIndex. + * + * If this structure is present, OffsetIndex must also be present. + * + * For each field in this structure, [i] refers to the page at + * OffsetIndex.page_locations[i] */ struct ColumnIndex { /** @@ -976,8 +1092,35 @@ struct ColumnIndex { */ 4: required BoundaryOrder boundary_order - /** A list containing the number of null values for each page **/ + /** + * A list containing the number of null values for each page + * + * Writers SHOULD always write this field even if no null values + * are present or the column is not nullable. + * Readers MUST distinguish between null_counts not being present + * and null_count being 0. + * If null_counts are not present, readers MUST NOT assume all + * null counts are 0. + */ 5: optional list null_counts + + /** + * Contains repetition level histograms for each page + * concatenated together. The repetition_level_histogram field on + * SizeStatistics contains more details. + * + * When present the length should always be (number of pages * + * (max_repetition_level + 1)) elements. + * + * Element 0 is the first element of the histogram for the first page. + * Element (max_repetition_level + 1) is the first element of the histogram + * for the second page. + **/ + 6: optional list repetition_level_histograms; + /** + * Same as repetition_level_histograms except for definitions levels. + **/ + 7: optional list definition_level_histograms; } struct AesGcmV1 { @@ -1083,5 +1226,4 @@ struct FileCryptoMetaData { /** Retrieval metadata of key used for encryption of footer, * and (possibly) columns **/ 2: optional binary key_metadata -} - +} \ No newline at end of file diff --git a/src/lib/parquet/src/Flow/Parquet/Thrift/AesGcmCtrV1.php b/src/lib/parquet/src/Flow/Parquet/Thrift/AesGcmCtrV1.php index 5e7b70abe..9b6d9d8ee 100644 --- a/src/lib/parquet/src/Flow/Parquet/Thrift/AesGcmCtrV1.php +++ b/src/lib/parquet/src/Flow/Parquet/Thrift/AesGcmCtrV1.php @@ -4,16 +4,15 @@ namespace Flow\Parquet\Thrift; /** - * Autogenerated by Thrift Compiler (0.19.0). + * Autogenerated by Thrift Compiler (0.18.1). * * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING * * @generated */ -use Thrift\Base\TBase; -use Thrift\Type\TType; +use Thrift\Type\{TType}; -class AesGcmCtrV1 extends TBase +class AesGcmCtrV1 { public static $_TSPEC = [ 1 => [ @@ -59,8 +58,18 @@ class AesGcmCtrV1 extends TBase public function __construct($vals = null) { - if (\is_array($vals)) { - parent::__construct(self::$_TSPEC, $vals); + if (is_array($vals)) { + if (isset($vals['aad_prefix'])) { + $this->aad_prefix = $vals['aad_prefix']; + } + + if (isset($vals['aad_file_unique'])) { + $this->aad_file_unique = $vals['aad_file_unique']; + } + + if (isset($vals['supply_aad_prefix'])) { + $this->supply_aad_prefix = $vals['supply_aad_prefix']; + } } } @@ -71,11 +80,82 @@ public function getName() public function read($input) { - return $this->_read('AesGcmCtrV1', self::$_TSPEC, $input); + $xfer = 0; + $fname = null; + $ftype = 0; + $fid = 0; + $xfer += $input->readStructBegin($fname); + + while (true) { + $xfer += $input->readFieldBegin($fname, $ftype, $fid); + + if ($ftype == TType::STOP) { + break; + } + + switch ($fid) { + case 1: + if ($ftype == TType::STRING) { + $xfer += $input->readString($this->aad_prefix); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 2: + if ($ftype == TType::STRING) { + $xfer += $input->readString($this->aad_file_unique); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 3: + if ($ftype == TType::BOOL) { + $xfer += $input->readBool($this->supply_aad_prefix); + } else { + $xfer += $input->skip($ftype); + } + + break; + + default: + $xfer += $input->skip($ftype); + + break; + } + $xfer += $input->readFieldEnd(); + } + $xfer += $input->readStructEnd(); + + return $xfer; } public function write($output) { - return $this->_write('AesGcmCtrV1', self::$_TSPEC, $output); + $xfer = 0; + $xfer += $output->writeStructBegin('AesGcmCtrV1'); + + if ($this->aad_prefix !== null) { + $xfer += $output->writeFieldBegin('aad_prefix', TType::STRING, 1); + $xfer += $output->writeString($this->aad_prefix); + $xfer += $output->writeFieldEnd(); + } + + if ($this->aad_file_unique !== null) { + $xfer += $output->writeFieldBegin('aad_file_unique', TType::STRING, 2); + $xfer += $output->writeString($this->aad_file_unique); + $xfer += $output->writeFieldEnd(); + } + + if ($this->supply_aad_prefix !== null) { + $xfer += $output->writeFieldBegin('supply_aad_prefix', TType::BOOL, 3); + $xfer += $output->writeBool($this->supply_aad_prefix); + $xfer += $output->writeFieldEnd(); + } + $xfer += $output->writeFieldStop(); + $xfer += $output->writeStructEnd(); + + return $xfer; } } diff --git a/src/lib/parquet/src/Flow/Parquet/Thrift/AesGcmV1.php b/src/lib/parquet/src/Flow/Parquet/Thrift/AesGcmV1.php index 9a76c76a6..d549aa87f 100644 --- a/src/lib/parquet/src/Flow/Parquet/Thrift/AesGcmV1.php +++ b/src/lib/parquet/src/Flow/Parquet/Thrift/AesGcmV1.php @@ -4,16 +4,15 @@ namespace Flow\Parquet\Thrift; /** - * Autogenerated by Thrift Compiler (0.19.0). + * Autogenerated by Thrift Compiler (0.18.1). * * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING * * @generated */ -use Thrift\Base\TBase; -use Thrift\Type\TType; +use Thrift\Type\{TType}; -class AesGcmV1 extends TBase +class AesGcmV1 { public static $_TSPEC = [ 1 => [ @@ -59,8 +58,18 @@ class AesGcmV1 extends TBase public function __construct($vals = null) { - if (\is_array($vals)) { - parent::__construct(self::$_TSPEC, $vals); + if (is_array($vals)) { + if (isset($vals['aad_prefix'])) { + $this->aad_prefix = $vals['aad_prefix']; + } + + if (isset($vals['aad_file_unique'])) { + $this->aad_file_unique = $vals['aad_file_unique']; + } + + if (isset($vals['supply_aad_prefix'])) { + $this->supply_aad_prefix = $vals['supply_aad_prefix']; + } } } @@ -71,11 +80,82 @@ public function getName() public function read($input) { - return $this->_read('AesGcmV1', self::$_TSPEC, $input); + $xfer = 0; + $fname = null; + $ftype = 0; + $fid = 0; + $xfer += $input->readStructBegin($fname); + + while (true) { + $xfer += $input->readFieldBegin($fname, $ftype, $fid); + + if ($ftype == TType::STOP) { + break; + } + + switch ($fid) { + case 1: + if ($ftype == TType::STRING) { + $xfer += $input->readString($this->aad_prefix); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 2: + if ($ftype == TType::STRING) { + $xfer += $input->readString($this->aad_file_unique); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 3: + if ($ftype == TType::BOOL) { + $xfer += $input->readBool($this->supply_aad_prefix); + } else { + $xfer += $input->skip($ftype); + } + + break; + + default: + $xfer += $input->skip($ftype); + + break; + } + $xfer += $input->readFieldEnd(); + } + $xfer += $input->readStructEnd(); + + return $xfer; } public function write($output) { - return $this->_write('AesGcmV1', self::$_TSPEC, $output); + $xfer = 0; + $xfer += $output->writeStructBegin('AesGcmV1'); + + if ($this->aad_prefix !== null) { + $xfer += $output->writeFieldBegin('aad_prefix', TType::STRING, 1); + $xfer += $output->writeString($this->aad_prefix); + $xfer += $output->writeFieldEnd(); + } + + if ($this->aad_file_unique !== null) { + $xfer += $output->writeFieldBegin('aad_file_unique', TType::STRING, 2); + $xfer += $output->writeString($this->aad_file_unique); + $xfer += $output->writeFieldEnd(); + } + + if ($this->supply_aad_prefix !== null) { + $xfer += $output->writeFieldBegin('supply_aad_prefix', TType::BOOL, 3); + $xfer += $output->writeBool($this->supply_aad_prefix); + $xfer += $output->writeFieldEnd(); + } + $xfer += $output->writeFieldStop(); + $xfer += $output->writeStructEnd(); + + return $xfer; } } diff --git a/src/lib/parquet/src/Flow/Parquet/Thrift/BloomFilterAlgorithm.php b/src/lib/parquet/src/Flow/Parquet/Thrift/BloomFilterAlgorithm.php index 47151e44b..826eb1004 100644 --- a/src/lib/parquet/src/Flow/Parquet/Thrift/BloomFilterAlgorithm.php +++ b/src/lib/parquet/src/Flow/Parquet/Thrift/BloomFilterAlgorithm.php @@ -4,19 +4,19 @@ namespace Flow\Parquet\Thrift; /** - * Autogenerated by Thrift Compiler (0.19.0). + * Autogenerated by Thrift Compiler (0.18.1). * * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING * * @generated */ -use Thrift\Base\TBase; -use Thrift\Type\TType; +use Thrift\Exception\{TProtocolException}; +use Thrift\Type\{TType}; /** * The algorithm used in Bloom filter. *. */ -class BloomFilterAlgorithm extends TBase +class BloomFilterAlgorithm { public static $_TSPEC = [ 1 => [ @@ -38,8 +38,10 @@ class BloomFilterAlgorithm extends TBase public function __construct($vals = null) { - if (\is_array($vals)) { - parent::__construct(self::$_TSPEC, $vals); + if (is_array($vals)) { + if (isset($vals['BLOCK'])) { + $this->BLOCK = $vals['BLOCK']; + } } } @@ -50,11 +52,58 @@ public function getName() public function read($input) { - return $this->_read('BloomFilterAlgorithm', self::$_TSPEC, $input); + $xfer = 0; + $fname = null; + $ftype = 0; + $fid = 0; + $xfer += $input->readStructBegin($fname); + + while (true) { + $xfer += $input->readFieldBegin($fname, $ftype, $fid); + + if ($ftype == TType::STOP) { + break; + } + + switch ($fid) { + case 1: + if ($ftype == TType::STRUCT) { + $this->BLOCK = new SplitBlockAlgorithm(); + $xfer += $this->BLOCK->read($input); + } else { + $xfer += $input->skip($ftype); + } + + break; + + default: + $xfer += $input->skip($ftype); + + break; + } + $xfer += $input->readFieldEnd(); + } + $xfer += $input->readStructEnd(); + + return $xfer; } public function write($output) { - return $this->_write('BloomFilterAlgorithm', self::$_TSPEC, $output); + $xfer = 0; + $xfer += $output->writeStructBegin('BloomFilterAlgorithm'); + + if ($this->BLOCK !== null) { + if (!is_object($this->BLOCK)) { + throw new TProtocolException('Bad type in structure.', TProtocolException::INVALID_DATA); + } + $xfer += $output->writeFieldBegin('BLOCK', TType::STRUCT, 1); + $xfer += $this->BLOCK->write($output); + $xfer += $output->writeFieldEnd(); + } + $xfer += $output->writeFieldStop(); + $xfer += $output->writeStructEnd(); + + return $xfer; } } diff --git a/src/lib/parquet/src/Flow/Parquet/Thrift/BloomFilterCompression.php b/src/lib/parquet/src/Flow/Parquet/Thrift/BloomFilterCompression.php index 383e6b933..b8cd6b665 100644 --- a/src/lib/parquet/src/Flow/Parquet/Thrift/BloomFilterCompression.php +++ b/src/lib/parquet/src/Flow/Parquet/Thrift/BloomFilterCompression.php @@ -4,16 +4,16 @@ namespace Flow\Parquet\Thrift; /** - * Autogenerated by Thrift Compiler (0.19.0). + * Autogenerated by Thrift Compiler (0.18.1). * * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING * * @generated */ -use Thrift\Base\TBase; -use Thrift\Type\TType; +use Thrift\Exception\{TProtocolException}; +use Thrift\Type\{TType}; -class BloomFilterCompression extends TBase +class BloomFilterCompression { public static $_TSPEC = [ 1 => [ @@ -33,8 +33,10 @@ class BloomFilterCompression extends TBase public function __construct($vals = null) { - if (\is_array($vals)) { - parent::__construct(self::$_TSPEC, $vals); + if (is_array($vals)) { + if (isset($vals['UNCOMPRESSED'])) { + $this->UNCOMPRESSED = $vals['UNCOMPRESSED']; + } } } @@ -45,11 +47,58 @@ public function getName() public function read($input) { - return $this->_read('BloomFilterCompression', self::$_TSPEC, $input); + $xfer = 0; + $fname = null; + $ftype = 0; + $fid = 0; + $xfer += $input->readStructBegin($fname); + + while (true) { + $xfer += $input->readFieldBegin($fname, $ftype, $fid); + + if ($ftype == TType::STOP) { + break; + } + + switch ($fid) { + case 1: + if ($ftype == TType::STRUCT) { + $this->UNCOMPRESSED = new Uncompressed(); + $xfer += $this->UNCOMPRESSED->read($input); + } else { + $xfer += $input->skip($ftype); + } + + break; + + default: + $xfer += $input->skip($ftype); + + break; + } + $xfer += $input->readFieldEnd(); + } + $xfer += $input->readStructEnd(); + + return $xfer; } public function write($output) { - return $this->_write('BloomFilterCompression', self::$_TSPEC, $output); + $xfer = 0; + $xfer += $output->writeStructBegin('BloomFilterCompression'); + + if ($this->UNCOMPRESSED !== null) { + if (!is_object($this->UNCOMPRESSED)) { + throw new TProtocolException('Bad type in structure.', TProtocolException::INVALID_DATA); + } + $xfer += $output->writeFieldBegin('UNCOMPRESSED', TType::STRUCT, 1); + $xfer += $this->UNCOMPRESSED->write($output); + $xfer += $output->writeFieldEnd(); + } + $xfer += $output->writeFieldStop(); + $xfer += $output->writeStructEnd(); + + return $xfer; } } diff --git a/src/lib/parquet/src/Flow/Parquet/Thrift/BloomFilterHash.php b/src/lib/parquet/src/Flow/Parquet/Thrift/BloomFilterHash.php index e0febc1c1..8b746a6ea 100644 --- a/src/lib/parquet/src/Flow/Parquet/Thrift/BloomFilterHash.php +++ b/src/lib/parquet/src/Flow/Parquet/Thrift/BloomFilterHash.php @@ -4,20 +4,20 @@ namespace Flow\Parquet\Thrift; /** - * Autogenerated by Thrift Compiler (0.19.0). + * Autogenerated by Thrift Compiler (0.18.1). * * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING * * @generated */ -use Thrift\Base\TBase; -use Thrift\Type\TType; +use Thrift\Exception\{TProtocolException}; +use Thrift\Type\{TType}; /** * The hash function used in Bloom filter. This function takes the hash of a column value * using plain encoding. */ -class BloomFilterHash extends TBase +class BloomFilterHash { public static $_TSPEC = [ 1 => [ @@ -39,8 +39,10 @@ class BloomFilterHash extends TBase public function __construct($vals = null) { - if (\is_array($vals)) { - parent::__construct(self::$_TSPEC, $vals); + if (is_array($vals)) { + if (isset($vals['XXHASH'])) { + $this->XXHASH = $vals['XXHASH']; + } } } @@ -51,11 +53,58 @@ public function getName() public function read($input) { - return $this->_read('BloomFilterHash', self::$_TSPEC, $input); + $xfer = 0; + $fname = null; + $ftype = 0; + $fid = 0; + $xfer += $input->readStructBegin($fname); + + while (true) { + $xfer += $input->readFieldBegin($fname, $ftype, $fid); + + if ($ftype == TType::STOP) { + break; + } + + switch ($fid) { + case 1: + if ($ftype == TType::STRUCT) { + $this->XXHASH = new XxHash(); + $xfer += $this->XXHASH->read($input); + } else { + $xfer += $input->skip($ftype); + } + + break; + + default: + $xfer += $input->skip($ftype); + + break; + } + $xfer += $input->readFieldEnd(); + } + $xfer += $input->readStructEnd(); + + return $xfer; } public function write($output) { - return $this->_write('BloomFilterHash', self::$_TSPEC, $output); + $xfer = 0; + $xfer += $output->writeStructBegin('BloomFilterHash'); + + if ($this->XXHASH !== null) { + if (!is_object($this->XXHASH)) { + throw new TProtocolException('Bad type in structure.', TProtocolException::INVALID_DATA); + } + $xfer += $output->writeFieldBegin('XXHASH', TType::STRUCT, 1); + $xfer += $this->XXHASH->write($output); + $xfer += $output->writeFieldEnd(); + } + $xfer += $output->writeFieldStop(); + $xfer += $output->writeStructEnd(); + + return $xfer; } } diff --git a/src/lib/parquet/src/Flow/Parquet/Thrift/BloomFilterHeader.php b/src/lib/parquet/src/Flow/Parquet/Thrift/BloomFilterHeader.php index 45262c142..f53ad612a 100644 --- a/src/lib/parquet/src/Flow/Parquet/Thrift/BloomFilterHeader.php +++ b/src/lib/parquet/src/Flow/Parquet/Thrift/BloomFilterHeader.php @@ -4,20 +4,20 @@ namespace Flow\Parquet\Thrift; /** - * Autogenerated by Thrift Compiler (0.19.0). + * Autogenerated by Thrift Compiler (0.18.1). * * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING * * @generated */ -use Thrift\Base\TBase; -use Thrift\Type\TType; +use Thrift\Exception\{TProtocolException}; +use Thrift\Type\{TType}; /** * Bloom filter header is stored at beginning of Bloom filter data of each column * and followed by its bitset. */ -class BloomFilterHeader extends TBase +class BloomFilterHeader { public static $_TSPEC = [ 1 => [ @@ -77,8 +77,22 @@ class BloomFilterHeader extends TBase public function __construct($vals = null) { - if (\is_array($vals)) { - parent::__construct(self::$_TSPEC, $vals); + if (is_array($vals)) { + if (isset($vals['numBytes'])) { + $this->numBytes = $vals['numBytes']; + } + + if (isset($vals['algorithm'])) { + $this->algorithm = $vals['algorithm']; + } + + if (isset($vals['hash'])) { + $this->hash = $vals['hash']; + } + + if (isset($vals['compression'])) { + $this->compression = $vals['compression']; + } } } @@ -89,11 +103,108 @@ public function getName() public function read($input) { - return $this->_read('BloomFilterHeader', self::$_TSPEC, $input); + $xfer = 0; + $fname = null; + $ftype = 0; + $fid = 0; + $xfer += $input->readStructBegin($fname); + + while (true) { + $xfer += $input->readFieldBegin($fname, $ftype, $fid); + + if ($ftype == TType::STOP) { + break; + } + + switch ($fid) { + case 1: + if ($ftype == TType::I32) { + $xfer += $input->readI32($this->numBytes); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 2: + if ($ftype == TType::STRUCT) { + $this->algorithm = new BloomFilterAlgorithm(); + $xfer += $this->algorithm->read($input); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 3: + if ($ftype == TType::STRUCT) { + $this->hash = new BloomFilterHash(); + $xfer += $this->hash->read($input); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 4: + if ($ftype == TType::STRUCT) { + $this->compression = new BloomFilterCompression(); + $xfer += $this->compression->read($input); + } else { + $xfer += $input->skip($ftype); + } + + break; + + default: + $xfer += $input->skip($ftype); + + break; + } + $xfer += $input->readFieldEnd(); + } + $xfer += $input->readStructEnd(); + + return $xfer; } public function write($output) { - return $this->_write('BloomFilterHeader', self::$_TSPEC, $output); + $xfer = 0; + $xfer += $output->writeStructBegin('BloomFilterHeader'); + + if ($this->numBytes !== null) { + $xfer += $output->writeFieldBegin('numBytes', TType::I32, 1); + $xfer += $output->writeI32($this->numBytes); + $xfer += $output->writeFieldEnd(); + } + + if ($this->algorithm !== null) { + if (!is_object($this->algorithm)) { + throw new TProtocolException('Bad type in structure.', TProtocolException::INVALID_DATA); + } + $xfer += $output->writeFieldBegin('algorithm', TType::STRUCT, 2); + $xfer += $this->algorithm->write($output); + $xfer += $output->writeFieldEnd(); + } + + if ($this->hash !== null) { + if (!is_object($this->hash)) { + throw new TProtocolException('Bad type in structure.', TProtocolException::INVALID_DATA); + } + $xfer += $output->writeFieldBegin('hash', TType::STRUCT, 3); + $xfer += $this->hash->write($output); + $xfer += $output->writeFieldEnd(); + } + + if ($this->compression !== null) { + if (!is_object($this->compression)) { + throw new TProtocolException('Bad type in structure.', TProtocolException::INVALID_DATA); + } + $xfer += $output->writeFieldBegin('compression', TType::STRUCT, 4); + $xfer += $this->compression->write($output); + $xfer += $output->writeFieldEnd(); + } + $xfer += $output->writeFieldStop(); + $xfer += $output->writeStructEnd(); + + return $xfer; } } diff --git a/src/lib/parquet/src/Flow/Parquet/Thrift/BoundaryOrder.php b/src/lib/parquet/src/Flow/Parquet/Thrift/BoundaryOrder.php index e8d53a391..e987a6d1e 100644 --- a/src/lib/parquet/src/Flow/Parquet/Thrift/BoundaryOrder.php +++ b/src/lib/parquet/src/Flow/Parquet/Thrift/BoundaryOrder.php @@ -4,13 +4,12 @@ namespace Flow\Parquet\Thrift; /** - * Autogenerated by Thrift Compiler (0.19.0). + * Autogenerated by Thrift Compiler (0.18.1). * * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING * * @generated */ - /** * Enum to annotate whether lists of min/max elements inside ColumnIndex * are ordered and if so, in which direction. diff --git a/src/lib/parquet/src/Flow/Parquet/Thrift/BsonType.php b/src/lib/parquet/src/Flow/Parquet/Thrift/BsonType.php index f8181c9b7..b30530eb9 100644 --- a/src/lib/parquet/src/Flow/Parquet/Thrift/BsonType.php +++ b/src/lib/parquet/src/Flow/Parquet/Thrift/BsonType.php @@ -4,20 +4,20 @@ namespace Flow\Parquet\Thrift; /** - * Autogenerated by Thrift Compiler (0.19.0). + * Autogenerated by Thrift Compiler (0.18.1). * * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING * * @generated */ -use Thrift\Base\TBase; +use Thrift\Type\{TType}; /** * Embedded BSON logical type annotation. * - * Allowed for physical types: BINARY + * Allowed for physical types: BYTE_ARRAY */ -class BsonType extends TBase +class BsonType { public static $_TSPEC = [ ]; @@ -35,11 +35,39 @@ public function getName() public function read($input) { - return $this->_read('BsonType', self::$_TSPEC, $input); + $xfer = 0; + $fname = null; + $ftype = 0; + $fid = 0; + $xfer += $input->readStructBegin($fname); + + while (true) { + $xfer += $input->readFieldBegin($fname, $ftype, $fid); + + if ($ftype == TType::STOP) { + break; + } + + switch ($fid) { + default: + $xfer += $input->skip($ftype); + + break; + } + $xfer += $input->readFieldEnd(); + } + $xfer += $input->readStructEnd(); + + return $xfer; } public function write($output) { - return $this->_write('BsonType', self::$_TSPEC, $output); + $xfer = 0; + $xfer += $output->writeStructBegin('BsonType'); + $xfer += $output->writeFieldStop(); + $xfer += $output->writeStructEnd(); + + return $xfer; } } diff --git a/src/lib/parquet/src/Flow/Parquet/Thrift/ColumnChunk.php b/src/lib/parquet/src/Flow/Parquet/Thrift/ColumnChunk.php index bfe004832..0fb3dce7d 100644 --- a/src/lib/parquet/src/Flow/Parquet/Thrift/ColumnChunk.php +++ b/src/lib/parquet/src/Flow/Parquet/Thrift/ColumnChunk.php @@ -4,16 +4,16 @@ namespace Flow\Parquet\Thrift; /** - * Autogenerated by Thrift Compiler (0.19.0). + * Autogenerated by Thrift Compiler (0.18.1). * * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING * * @generated */ -use Thrift\Base\TBase; -use Thrift\Type\TType; +use Thrift\Exception\{TProtocolException}; +use Thrift\Type\{TType}; -class ColumnChunk extends TBase +class ColumnChunk { public static $_TSPEC = [ 1 => [ @@ -96,11 +96,18 @@ class ColumnChunk extends TBase public $encrypted_column_metadata; /** - * Byte offset in file_path to the ColumnMetaData *. + * Deprecated: Byte offset in file_path to the ColumnMetaData. + * + * Past use of this field has been inconsistent, with some implementations + * using it to point to the ColumnMetaData and some using it to point to + * the first page in the column chunk. In many cases, the ColumnMetaData at this + * location is wrong. This field is now deprecated and should not be used. + * Writers should set this field to 0 if no ColumnMetaData has been written outside + * the footer. * * @var int */ - public $file_offset; + public $file_offset = 0; /** * File where column data is stored. If not set, assumed to be same file as @@ -111,9 +118,10 @@ class ColumnChunk extends TBase public $file_path; /** - * Column metadata for this chunk. This is the same content as what is at - * file_path/file_offset. Having it here has it replicated in the file - * metadata. + * Column metadata for this chunk. Some writers may also replicate this at the + * location pointed to by file_path/file_offset. + * Note: while marked as optional, this field is in fact required by most major + * Parquet implementations. As such, writers MUST populate this field. * * @var ColumnMetaData */ @@ -135,8 +143,42 @@ class ColumnChunk extends TBase public function __construct($vals = null) { - if (\is_array($vals)) { - parent::__construct(self::$_TSPEC, $vals); + if (is_array($vals)) { + if (isset($vals['file_path'])) { + $this->file_path = $vals['file_path']; + } + + if (isset($vals['file_offset'])) { + $this->file_offset = $vals['file_offset']; + } + + if (isset($vals['meta_data'])) { + $this->meta_data = $vals['meta_data']; + } + + if (isset($vals['offset_index_offset'])) { + $this->offset_index_offset = $vals['offset_index_offset']; + } + + if (isset($vals['offset_index_length'])) { + $this->offset_index_length = $vals['offset_index_length']; + } + + if (isset($vals['column_index_offset'])) { + $this->column_index_offset = $vals['column_index_offset']; + } + + if (isset($vals['column_index_length'])) { + $this->column_index_length = $vals['column_index_length']; + } + + if (isset($vals['crypto_metadata'])) { + $this->crypto_metadata = $vals['crypto_metadata']; + } + + if (isset($vals['encrypted_column_metadata'])) { + $this->encrypted_column_metadata = $vals['encrypted_column_metadata']; + } } } @@ -147,11 +189,174 @@ public function getName() public function read($input) { - return $this->_read('ColumnChunk', self::$_TSPEC, $input); + $xfer = 0; + $fname = null; + $ftype = 0; + $fid = 0; + $xfer += $input->readStructBegin($fname); + + while (true) { + $xfer += $input->readFieldBegin($fname, $ftype, $fid); + + if ($ftype == TType::STOP) { + break; + } + + switch ($fid) { + case 1: + if ($ftype == TType::STRING) { + $xfer += $input->readString($this->file_path); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 2: + if ($ftype == TType::I64) { + $xfer += $input->readI64($this->file_offset); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 3: + if ($ftype == TType::STRUCT) { + $this->meta_data = new ColumnMetaData(); + $xfer += $this->meta_data->read($input); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 4: + if ($ftype == TType::I64) { + $xfer += $input->readI64($this->offset_index_offset); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 5: + if ($ftype == TType::I32) { + $xfer += $input->readI32($this->offset_index_length); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 6: + if ($ftype == TType::I64) { + $xfer += $input->readI64($this->column_index_offset); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 7: + if ($ftype == TType::I32) { + $xfer += $input->readI32($this->column_index_length); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 8: + if ($ftype == TType::STRUCT) { + $this->crypto_metadata = new ColumnCryptoMetaData(); + $xfer += $this->crypto_metadata->read($input); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 9: + if ($ftype == TType::STRING) { + $xfer += $input->readString($this->encrypted_column_metadata); + } else { + $xfer += $input->skip($ftype); + } + + break; + + default: + $xfer += $input->skip($ftype); + + break; + } + $xfer += $input->readFieldEnd(); + } + $xfer += $input->readStructEnd(); + + return $xfer; } public function write($output) { - return $this->_write('ColumnChunk', self::$_TSPEC, $output); + $xfer = 0; + $xfer += $output->writeStructBegin('ColumnChunk'); + + if ($this->file_path !== null) { + $xfer += $output->writeFieldBegin('file_path', TType::STRING, 1); + $xfer += $output->writeString($this->file_path); + $xfer += $output->writeFieldEnd(); + } + + if ($this->file_offset !== null) { + $xfer += $output->writeFieldBegin('file_offset', TType::I64, 2); + $xfer += $output->writeI64($this->file_offset); + $xfer += $output->writeFieldEnd(); + } + + if ($this->meta_data !== null) { + if (!is_object($this->meta_data)) { + throw new TProtocolException('Bad type in structure.', TProtocolException::INVALID_DATA); + } + $xfer += $output->writeFieldBegin('meta_data', TType::STRUCT, 3); + $xfer += $this->meta_data->write($output); + $xfer += $output->writeFieldEnd(); + } + + if ($this->offset_index_offset !== null) { + $xfer += $output->writeFieldBegin('offset_index_offset', TType::I64, 4); + $xfer += $output->writeI64($this->offset_index_offset); + $xfer += $output->writeFieldEnd(); + } + + if ($this->offset_index_length !== null) { + $xfer += $output->writeFieldBegin('offset_index_length', TType::I32, 5); + $xfer += $output->writeI32($this->offset_index_length); + $xfer += $output->writeFieldEnd(); + } + + if ($this->column_index_offset !== null) { + $xfer += $output->writeFieldBegin('column_index_offset', TType::I64, 6); + $xfer += $output->writeI64($this->column_index_offset); + $xfer += $output->writeFieldEnd(); + } + + if ($this->column_index_length !== null) { + $xfer += $output->writeFieldBegin('column_index_length', TType::I32, 7); + $xfer += $output->writeI32($this->column_index_length); + $xfer += $output->writeFieldEnd(); + } + + if ($this->crypto_metadata !== null) { + if (!is_object($this->crypto_metadata)) { + throw new TProtocolException('Bad type in structure.', TProtocolException::INVALID_DATA); + } + $xfer += $output->writeFieldBegin('crypto_metadata', TType::STRUCT, 8); + $xfer += $this->crypto_metadata->write($output); + $xfer += $output->writeFieldEnd(); + } + + if ($this->encrypted_column_metadata !== null) { + $xfer += $output->writeFieldBegin('encrypted_column_metadata', TType::STRING, 9); + $xfer += $output->writeString($this->encrypted_column_metadata); + $xfer += $output->writeFieldEnd(); + } + $xfer += $output->writeFieldStop(); + $xfer += $output->writeStructEnd(); + + return $xfer; } } diff --git a/src/lib/parquet/src/Flow/Parquet/Thrift/ColumnCryptoMetaData.php b/src/lib/parquet/src/Flow/Parquet/Thrift/ColumnCryptoMetaData.php index 173c0935e..2447a35d0 100644 --- a/src/lib/parquet/src/Flow/Parquet/Thrift/ColumnCryptoMetaData.php +++ b/src/lib/parquet/src/Flow/Parquet/Thrift/ColumnCryptoMetaData.php @@ -4,16 +4,16 @@ namespace Flow\Parquet\Thrift; /** - * Autogenerated by Thrift Compiler (0.19.0). + * Autogenerated by Thrift Compiler (0.18.1). * * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING * * @generated */ -use Thrift\Base\TBase; -use Thrift\Type\TType; +use Thrift\Exception\{TProtocolException}; +use Thrift\Type\{TType}; -class ColumnCryptoMetaData extends TBase +class ColumnCryptoMetaData { public static $_TSPEC = [ 1 => [ @@ -44,8 +44,14 @@ class ColumnCryptoMetaData extends TBase public function __construct($vals = null) { - if (\is_array($vals)) { - parent::__construct(self::$_TSPEC, $vals); + if (is_array($vals)) { + if (isset($vals['ENCRYPTION_WITH_FOOTER_KEY'])) { + $this->ENCRYPTION_WITH_FOOTER_KEY = $vals['ENCRYPTION_WITH_FOOTER_KEY']; + } + + if (isset($vals['ENCRYPTION_WITH_COLUMN_KEY'])) { + $this->ENCRYPTION_WITH_COLUMN_KEY = $vals['ENCRYPTION_WITH_COLUMN_KEY']; + } } } @@ -56,11 +62,76 @@ public function getName() public function read($input) { - return $this->_read('ColumnCryptoMetaData', self::$_TSPEC, $input); + $xfer = 0; + $fname = null; + $ftype = 0; + $fid = 0; + $xfer += $input->readStructBegin($fname); + + while (true) { + $xfer += $input->readFieldBegin($fname, $ftype, $fid); + + if ($ftype == TType::STOP) { + break; + } + + switch ($fid) { + case 1: + if ($ftype == TType::STRUCT) { + $this->ENCRYPTION_WITH_FOOTER_KEY = new EncryptionWithFooterKey(); + $xfer += $this->ENCRYPTION_WITH_FOOTER_KEY->read($input); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 2: + if ($ftype == TType::STRUCT) { + $this->ENCRYPTION_WITH_COLUMN_KEY = new EncryptionWithColumnKey(); + $xfer += $this->ENCRYPTION_WITH_COLUMN_KEY->read($input); + } else { + $xfer += $input->skip($ftype); + } + + break; + + default: + $xfer += $input->skip($ftype); + + break; + } + $xfer += $input->readFieldEnd(); + } + $xfer += $input->readStructEnd(); + + return $xfer; } public function write($output) { - return $this->_write('ColumnCryptoMetaData', self::$_TSPEC, $output); + $xfer = 0; + $xfer += $output->writeStructBegin('ColumnCryptoMetaData'); + + if ($this->ENCRYPTION_WITH_FOOTER_KEY !== null) { + if (!is_object($this->ENCRYPTION_WITH_FOOTER_KEY)) { + throw new TProtocolException('Bad type in structure.', TProtocolException::INVALID_DATA); + } + $xfer += $output->writeFieldBegin('ENCRYPTION_WITH_FOOTER_KEY', TType::STRUCT, 1); + $xfer += $this->ENCRYPTION_WITH_FOOTER_KEY->write($output); + $xfer += $output->writeFieldEnd(); + } + + if ($this->ENCRYPTION_WITH_COLUMN_KEY !== null) { + if (!is_object($this->ENCRYPTION_WITH_COLUMN_KEY)) { + throw new TProtocolException('Bad type in structure.', TProtocolException::INVALID_DATA); + } + $xfer += $output->writeFieldBegin('ENCRYPTION_WITH_COLUMN_KEY', TType::STRUCT, 2); + $xfer += $this->ENCRYPTION_WITH_COLUMN_KEY->write($output); + $xfer += $output->writeFieldEnd(); + } + $xfer += $output->writeFieldStop(); + $xfer += $output->writeStructEnd(); + + return $xfer; } } diff --git a/src/lib/parquet/src/Flow/Parquet/Thrift/ColumnIndex.php b/src/lib/parquet/src/Flow/Parquet/Thrift/ColumnIndex.php index 7e6fc7aaa..7d094ea56 100644 --- a/src/lib/parquet/src/Flow/Parquet/Thrift/ColumnIndex.php +++ b/src/lib/parquet/src/Flow/Parquet/Thrift/ColumnIndex.php @@ -4,20 +4,26 @@ namespace Flow\Parquet\Thrift; /** - * Autogenerated by Thrift Compiler (0.19.0). + * Autogenerated by Thrift Compiler (0.18.1). * * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING * * @generated */ -use Thrift\Base\TBase; -use Thrift\Type\TType; +use Thrift\Exception\{TProtocolException}; +use Thrift\Type\{TType}; /** - * Description for ColumnIndex. - * Each [i] refers to the page at OffsetIndex.page_locations[i]. + * Optional statistics for each data page in a ColumnChunk. + * + * Forms part the page index, along with OffsetIndex. + * + * If this structure is present, OffsetIndex must also be present. + * + * For each field in this structure, [i] refers to the page at + * OffsetIndex.page_locations[i] */ -class ColumnIndex extends TBase +class ColumnIndex { public static $_TSPEC = [ 1 => [ @@ -62,6 +68,24 @@ class ColumnIndex extends TBase 'type' => TType::I64, ], ], + 6 => [ + 'var' => 'repetition_level_histograms', + 'isRequired' => false, + 'type' => TType::LST, + 'etype' => TType::I64, + 'elem' => [ + 'type' => TType::I64, + ], + ], + 7 => [ + 'var' => 'definition_level_histograms', + 'isRequired' => false, + 'type' => TType::LST, + 'etype' => TType::I64, + 'elem' => [ + 'type' => TType::I64, + ], + ], ]; public static $isValidate = false; @@ -76,6 +100,13 @@ class ColumnIndex extends TBase */ public $boundary_order; + /** + * Same as repetition_level_histograms except for definitions levels. + * + * @var int[] + */ + public $definition_level_histograms; + /** * @var string[] */ @@ -96,7 +127,14 @@ class ColumnIndex extends TBase public $min_values; /** - * A list containing the number of null values for each page *. + * A list containing the number of null values for each page. + * + * Writers SHOULD always write this field even if no null values + * are present or the column is not nullable. + * Readers MUST distinguish between null_counts not being present + * and null_count being 0. + * If null_counts are not present, readers MUST NOT assume all + * null counts are 0. * * @var int[] */ @@ -113,10 +151,52 @@ class ColumnIndex extends TBase */ public $null_pages; + /** + * Contains repetition level histograms for each page + * concatenated together. The repetition_level_histogram field on + * SizeStatistics contains more details. + * + * When present the length should always be (number of pages * + * (max_repetition_level + 1)) elements. + * + * Element 0 is the first element of the histogram for the first page. + * Element (max_repetition_level + 1) is the first element of the histogram + * for the second page. + * + * @var int[] + */ + public $repetition_level_histograms; + public function __construct($vals = null) { - if (\is_array($vals)) { - parent::__construct(self::$_TSPEC, $vals); + if (is_array($vals)) { + if (isset($vals['null_pages'])) { + $this->null_pages = $vals['null_pages']; + } + + if (isset($vals['min_values'])) { + $this->min_values = $vals['min_values']; + } + + if (isset($vals['max_values'])) { + $this->max_values = $vals['max_values']; + } + + if (isset($vals['boundary_order'])) { + $this->boundary_order = $vals['boundary_order']; + } + + if (isset($vals['null_counts'])) { + $this->null_counts = $vals['null_counts']; + } + + if (isset($vals['repetition_level_histograms'])) { + $this->repetition_level_histograms = $vals['repetition_level_histograms']; + } + + if (isset($vals['definition_level_histograms'])) { + $this->definition_level_histograms = $vals['definition_level_histograms']; + } } } @@ -127,11 +207,246 @@ public function getName() public function read($input) { - return $this->_read('ColumnIndex', self::$_TSPEC, $input); + $xfer = 0; + $fname = null; + $ftype = 0; + $fid = 0; + $xfer += $input->readStructBegin($fname); + + while (true) { + $xfer += $input->readFieldBegin($fname, $ftype, $fid); + + if ($ftype == TType::STOP) { + break; + } + + switch ($fid) { + case 1: + if ($ftype == TType::LST) { + $this->null_pages = []; + $_size77 = 0; + $_etype80 = 0; + $xfer += $input->readListBegin($_etype80, $_size77); + + for ($_i81 = 0; $_i81 < $_size77; $_i81++) { + $elem82 = null; + $xfer += $input->readBool($elem82); + $this->null_pages[] = $elem82; + } + $xfer += $input->readListEnd(); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 2: + if ($ftype == TType::LST) { + $this->min_values = []; + $_size83 = 0; + $_etype86 = 0; + $xfer += $input->readListBegin($_etype86, $_size83); + + for ($_i87 = 0; $_i87 < $_size83; $_i87++) { + $elem88 = null; + $xfer += $input->readString($elem88); + $this->min_values[] = $elem88; + } + $xfer += $input->readListEnd(); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 3: + if ($ftype == TType::LST) { + $this->max_values = []; + $_size89 = 0; + $_etype92 = 0; + $xfer += $input->readListBegin($_etype92, $_size89); + + for ($_i93 = 0; $_i93 < $_size89; $_i93++) { + $elem94 = null; + $xfer += $input->readString($elem94); + $this->max_values[] = $elem94; + } + $xfer += $input->readListEnd(); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 4: + if ($ftype == TType::I32) { + $xfer += $input->readI32($this->boundary_order); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 5: + if ($ftype == TType::LST) { + $this->null_counts = []; + $_size95 = 0; + $_etype98 = 0; + $xfer += $input->readListBegin($_etype98, $_size95); + + for ($_i99 = 0; $_i99 < $_size95; $_i99++) { + $elem100 = null; + $xfer += $input->readI64($elem100); + $this->null_counts[] = $elem100; + } + $xfer += $input->readListEnd(); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 6: + if ($ftype == TType::LST) { + $this->repetition_level_histograms = []; + $_size101 = 0; + $_etype104 = 0; + $xfer += $input->readListBegin($_etype104, $_size101); + + for ($_i105 = 0; $_i105 < $_size101; $_i105++) { + $elem106 = null; + $xfer += $input->readI64($elem106); + $this->repetition_level_histograms[] = $elem106; + } + $xfer += $input->readListEnd(); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 7: + if ($ftype == TType::LST) { + $this->definition_level_histograms = []; + $_size107 = 0; + $_etype110 = 0; + $xfer += $input->readListBegin($_etype110, $_size107); + + for ($_i111 = 0; $_i111 < $_size107; $_i111++) { + $elem112 = null; + $xfer += $input->readI64($elem112); + $this->definition_level_histograms[] = $elem112; + } + $xfer += $input->readListEnd(); + } else { + $xfer += $input->skip($ftype); + } + + break; + + default: + $xfer += $input->skip($ftype); + + break; + } + $xfer += $input->readFieldEnd(); + } + $xfer += $input->readStructEnd(); + + return $xfer; } public function write($output) { - return $this->_write('ColumnIndex', self::$_TSPEC, $output); + $xfer = 0; + $xfer += $output->writeStructBegin('ColumnIndex'); + + if ($this->null_pages !== null) { + if (!is_array($this->null_pages)) { + throw new TProtocolException('Bad type in structure.', TProtocolException::INVALID_DATA); + } + $xfer += $output->writeFieldBegin('null_pages', TType::LST, 1); + $output->writeListBegin(TType::BOOL, count($this->null_pages)); + + foreach ($this->null_pages as $iter113) { + $xfer += $output->writeBool($iter113); + } + $output->writeListEnd(); + $xfer += $output->writeFieldEnd(); + } + + if ($this->min_values !== null) { + if (!is_array($this->min_values)) { + throw new TProtocolException('Bad type in structure.', TProtocolException::INVALID_DATA); + } + $xfer += $output->writeFieldBegin('min_values', TType::LST, 2); + $output->writeListBegin(TType::STRING, count($this->min_values)); + + foreach ($this->min_values as $iter114) { + $xfer += $output->writeString($iter114); + } + $output->writeListEnd(); + $xfer += $output->writeFieldEnd(); + } + + if ($this->max_values !== null) { + if (!is_array($this->max_values)) { + throw new TProtocolException('Bad type in structure.', TProtocolException::INVALID_DATA); + } + $xfer += $output->writeFieldBegin('max_values', TType::LST, 3); + $output->writeListBegin(TType::STRING, count($this->max_values)); + + foreach ($this->max_values as $iter115) { + $xfer += $output->writeString($iter115); + } + $output->writeListEnd(); + $xfer += $output->writeFieldEnd(); + } + + if ($this->boundary_order !== null) { + $xfer += $output->writeFieldBegin('boundary_order', TType::I32, 4); + $xfer += $output->writeI32($this->boundary_order); + $xfer += $output->writeFieldEnd(); + } + + if ($this->null_counts !== null) { + if (!is_array($this->null_counts)) { + throw new TProtocolException('Bad type in structure.', TProtocolException::INVALID_DATA); + } + $xfer += $output->writeFieldBegin('null_counts', TType::LST, 5); + $output->writeListBegin(TType::I64, count($this->null_counts)); + + foreach ($this->null_counts as $iter116) { + $xfer += $output->writeI64($iter116); + } + $output->writeListEnd(); + $xfer += $output->writeFieldEnd(); + } + + if ($this->repetition_level_histograms !== null) { + if (!is_array($this->repetition_level_histograms)) { + throw new TProtocolException('Bad type in structure.', TProtocolException::INVALID_DATA); + } + $xfer += $output->writeFieldBegin('repetition_level_histograms', TType::LST, 6); + $output->writeListBegin(TType::I64, count($this->repetition_level_histograms)); + + foreach ($this->repetition_level_histograms as $iter117) { + $xfer += $output->writeI64($iter117); + } + $output->writeListEnd(); + $xfer += $output->writeFieldEnd(); + } + + if ($this->definition_level_histograms !== null) { + if (!is_array($this->definition_level_histograms)) { + throw new TProtocolException('Bad type in structure.', TProtocolException::INVALID_DATA); + } + $xfer += $output->writeFieldBegin('definition_level_histograms', TType::LST, 7); + $output->writeListBegin(TType::I64, count($this->definition_level_histograms)); + + foreach ($this->definition_level_histograms as $iter118) { + $xfer += $output->writeI64($iter118); + } + $output->writeListEnd(); + $xfer += $output->writeFieldEnd(); + } + $xfer += $output->writeFieldStop(); + $xfer += $output->writeStructEnd(); + + return $xfer; } } diff --git a/src/lib/parquet/src/Flow/Parquet/Thrift/ColumnMetaData.php b/src/lib/parquet/src/Flow/Parquet/Thrift/ColumnMetaData.php index 6162a173b..c2e4eff84 100644 --- a/src/lib/parquet/src/Flow/Parquet/Thrift/ColumnMetaData.php +++ b/src/lib/parquet/src/Flow/Parquet/Thrift/ColumnMetaData.php @@ -4,19 +4,19 @@ namespace Flow\Parquet\Thrift; /** - * Autogenerated by Thrift Compiler (0.19.0). + * Autogenerated by Thrift Compiler (0.18.1). * * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING * * @generated */ -use Thrift\Base\TBase; -use Thrift\Type\TType; +use Thrift\Exception\{TProtocolException}; +use Thrift\Type\{TType}; /** * Description for column metadata. */ -class ColumnMetaData extends TBase +class ColumnMetaData { public static $_TSPEC = [ 1 => [ @@ -116,6 +116,12 @@ class ColumnMetaData extends TBase 'isRequired' => false, 'type' => TType::I32, ], + 16 => [ + 'var' => 'size_statistics', + 'isRequired' => false, + 'type' => TType::STRUCT, + 'class' => '\Flow\Parquet\Thrift\SizeStatistics', + ], ]; public static $isValidate = false; @@ -164,7 +170,7 @@ class ColumnMetaData extends TBase * This information can be used to determine if all data pages are * dictionary encoded for example *. * - * @var PageEncodingStats[] + * @var \Flow\Parquet\Thrift\PageEncodingStats[] */ public $encoding_stats; @@ -186,7 +192,7 @@ class ColumnMetaData extends TBase /** * Optional key/value metadata *. * - * @var KeyValue[] + * @var \Flow\Parquet\Thrift\KeyValue[] */ public $key_value_metadata; @@ -204,6 +210,16 @@ class ColumnMetaData extends TBase */ public $path_in_schema; + /** + * Optional statistics to help estimate total memory when converted to in-memory + * representations. The histograms contained in these statistics can + * also be useful in some cases for more fine-grained nullability/list length + * filter pushdown. + * + * @var SizeStatistics + */ + public $size_statistics; + /** * optional statistics for this column chunk. * @@ -235,8 +251,70 @@ class ColumnMetaData extends TBase public function __construct($vals = null) { - if (\is_array($vals)) { - parent::__construct(self::$_TSPEC, $vals); + if (is_array($vals)) { + if (isset($vals['type'])) { + $this->type = $vals['type']; + } + + if (isset($vals['encodings'])) { + $this->encodings = $vals['encodings']; + } + + if (isset($vals['path_in_schema'])) { + $this->path_in_schema = $vals['path_in_schema']; + } + + if (isset($vals['codec'])) { + $this->codec = $vals['codec']; + } + + if (isset($vals['num_values'])) { + $this->num_values = $vals['num_values']; + } + + if (isset($vals['total_uncompressed_size'])) { + $this->total_uncompressed_size = $vals['total_uncompressed_size']; + } + + if (isset($vals['total_compressed_size'])) { + $this->total_compressed_size = $vals['total_compressed_size']; + } + + if (isset($vals['key_value_metadata'])) { + $this->key_value_metadata = $vals['key_value_metadata']; + } + + if (isset($vals['data_page_offset'])) { + $this->data_page_offset = $vals['data_page_offset']; + } + + if (isset($vals['index_page_offset'])) { + $this->index_page_offset = $vals['index_page_offset']; + } + + if (isset($vals['dictionary_page_offset'])) { + $this->dictionary_page_offset = $vals['dictionary_page_offset']; + } + + if (isset($vals['statistics'])) { + $this->statistics = $vals['statistics']; + } + + if (isset($vals['encoding_stats'])) { + $this->encoding_stats = $vals['encoding_stats']; + } + + if (isset($vals['bloom_filter_offset'])) { + $this->bloom_filter_offset = $vals['bloom_filter_offset']; + } + + if (isset($vals['bloom_filter_length'])) { + $this->bloom_filter_length = $vals['bloom_filter_length']; + } + + if (isset($vals['size_statistics'])) { + $this->size_statistics = $vals['size_statistics']; + } } } @@ -247,11 +325,346 @@ public function getName() public function read($input) { - return $this->_read('ColumnMetaData', self::$_TSPEC, $input); + $xfer = 0; + $fname = null; + $ftype = 0; + $fid = 0; + $xfer += $input->readStructBegin($fname); + + while (true) { + $xfer += $input->readFieldBegin($fname, $ftype, $fid); + + if ($ftype == TType::STOP) { + break; + } + + switch ($fid) { + case 1: + if ($ftype == TType::I32) { + $xfer += $input->readI32($this->type); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 2: + if ($ftype == TType::LST) { + $this->encodings = []; + $_size14 = 0; + $_etype17 = 0; + $xfer += $input->readListBegin($_etype17, $_size14); + + for ($_i18 = 0; $_i18 < $_size14; $_i18++) { + $elem19 = null; + $xfer += $input->readI32($elem19); + $this->encodings[] = $elem19; + } + $xfer += $input->readListEnd(); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 3: + if ($ftype == TType::LST) { + $this->path_in_schema = []; + $_size20 = 0; + $_etype23 = 0; + $xfer += $input->readListBegin($_etype23, $_size20); + + for ($_i24 = 0; $_i24 < $_size20; $_i24++) { + $elem25 = null; + $xfer += $input->readString($elem25); + $this->path_in_schema[] = $elem25; + } + $xfer += $input->readListEnd(); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 4: + if ($ftype == TType::I32) { + $xfer += $input->readI32($this->codec); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 5: + if ($ftype == TType::I64) { + $xfer += $input->readI64($this->num_values); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 6: + if ($ftype == TType::I64) { + $xfer += $input->readI64($this->total_uncompressed_size); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 7: + if ($ftype == TType::I64) { + $xfer += $input->readI64($this->total_compressed_size); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 8: + if ($ftype == TType::LST) { + $this->key_value_metadata = []; + $_size26 = 0; + $_etype29 = 0; + $xfer += $input->readListBegin($_etype29, $_size26); + + for ($_i30 = 0; $_i30 < $_size26; $_i30++) { + $elem31 = null; + $elem31 = new KeyValue(); + $xfer += $elem31->read($input); + $this->key_value_metadata[] = $elem31; + } + $xfer += $input->readListEnd(); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 9: + if ($ftype == TType::I64) { + $xfer += $input->readI64($this->data_page_offset); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 10: + if ($ftype == TType::I64) { + $xfer += $input->readI64($this->index_page_offset); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 11: + if ($ftype == TType::I64) { + $xfer += $input->readI64($this->dictionary_page_offset); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 12: + if ($ftype == TType::STRUCT) { + $this->statistics = new Statistics(); + $xfer += $this->statistics->read($input); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 13: + if ($ftype == TType::LST) { + $this->encoding_stats = []; + $_size32 = 0; + $_etype35 = 0; + $xfer += $input->readListBegin($_etype35, $_size32); + + for ($_i36 = 0; $_i36 < $_size32; $_i36++) { + $elem37 = null; + $elem37 = new PageEncodingStats(); + $xfer += $elem37->read($input); + $this->encoding_stats[] = $elem37; + } + $xfer += $input->readListEnd(); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 14: + if ($ftype == TType::I64) { + $xfer += $input->readI64($this->bloom_filter_offset); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 15: + if ($ftype == TType::I32) { + $xfer += $input->readI32($this->bloom_filter_length); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 16: + if ($ftype == TType::STRUCT) { + $this->size_statistics = new SizeStatistics(); + $xfer += $this->size_statistics->read($input); + } else { + $xfer += $input->skip($ftype); + } + + break; + + default: + $xfer += $input->skip($ftype); + + break; + } + $xfer += $input->readFieldEnd(); + } + $xfer += $input->readStructEnd(); + + return $xfer; } public function write($output) { - return $this->_write('ColumnMetaData', self::$_TSPEC, $output); + $xfer = 0; + $xfer += $output->writeStructBegin('ColumnMetaData'); + + if ($this->type !== null) { + $xfer += $output->writeFieldBegin('type', TType::I32, 1); + $xfer += $output->writeI32($this->type); + $xfer += $output->writeFieldEnd(); + } + + if ($this->encodings !== null) { + if (!is_array($this->encodings)) { + throw new TProtocolException('Bad type in structure.', TProtocolException::INVALID_DATA); + } + $xfer += $output->writeFieldBegin('encodings', TType::LST, 2); + $output->writeListBegin(TType::I32, count($this->encodings)); + + foreach ($this->encodings as $iter38) { + $xfer += $output->writeI32($iter38); + } + $output->writeListEnd(); + $xfer += $output->writeFieldEnd(); + } + + if ($this->path_in_schema !== null) { + if (!is_array($this->path_in_schema)) { + throw new TProtocolException('Bad type in structure.', TProtocolException::INVALID_DATA); + } + $xfer += $output->writeFieldBegin('path_in_schema', TType::LST, 3); + $output->writeListBegin(TType::STRING, count($this->path_in_schema)); + + foreach ($this->path_in_schema as $iter39) { + $xfer += $output->writeString($iter39); + } + $output->writeListEnd(); + $xfer += $output->writeFieldEnd(); + } + + if ($this->codec !== null) { + $xfer += $output->writeFieldBegin('codec', TType::I32, 4); + $xfer += $output->writeI32($this->codec); + $xfer += $output->writeFieldEnd(); + } + + if ($this->num_values !== null) { + $xfer += $output->writeFieldBegin('num_values', TType::I64, 5); + $xfer += $output->writeI64($this->num_values); + $xfer += $output->writeFieldEnd(); + } + + if ($this->total_uncompressed_size !== null) { + $xfer += $output->writeFieldBegin('total_uncompressed_size', TType::I64, 6); + $xfer += $output->writeI64($this->total_uncompressed_size); + $xfer += $output->writeFieldEnd(); + } + + if ($this->total_compressed_size !== null) { + $xfer += $output->writeFieldBegin('total_compressed_size', TType::I64, 7); + $xfer += $output->writeI64($this->total_compressed_size); + $xfer += $output->writeFieldEnd(); + } + + if ($this->key_value_metadata !== null) { + if (!is_array($this->key_value_metadata)) { + throw new TProtocolException('Bad type in structure.', TProtocolException::INVALID_DATA); + } + $xfer += $output->writeFieldBegin('key_value_metadata', TType::LST, 8); + $output->writeListBegin(TType::STRUCT, count($this->key_value_metadata)); + + foreach ($this->key_value_metadata as $iter40) { + $xfer += $iter40->write($output); + } + $output->writeListEnd(); + $xfer += $output->writeFieldEnd(); + } + + if ($this->data_page_offset !== null) { + $xfer += $output->writeFieldBegin('data_page_offset', TType::I64, 9); + $xfer += $output->writeI64($this->data_page_offset); + $xfer += $output->writeFieldEnd(); + } + + if ($this->index_page_offset !== null) { + $xfer += $output->writeFieldBegin('index_page_offset', TType::I64, 10); + $xfer += $output->writeI64($this->index_page_offset); + $xfer += $output->writeFieldEnd(); + } + + if ($this->dictionary_page_offset !== null) { + $xfer += $output->writeFieldBegin('dictionary_page_offset', TType::I64, 11); + $xfer += $output->writeI64($this->dictionary_page_offset); + $xfer += $output->writeFieldEnd(); + } + + if ($this->statistics !== null) { + if (!is_object($this->statistics)) { + throw new TProtocolException('Bad type in structure.', TProtocolException::INVALID_DATA); + } + $xfer += $output->writeFieldBegin('statistics', TType::STRUCT, 12); + $xfer += $this->statistics->write($output); + $xfer += $output->writeFieldEnd(); + } + + if ($this->encoding_stats !== null) { + if (!is_array($this->encoding_stats)) { + throw new TProtocolException('Bad type in structure.', TProtocolException::INVALID_DATA); + } + $xfer += $output->writeFieldBegin('encoding_stats', TType::LST, 13); + $output->writeListBegin(TType::STRUCT, count($this->encoding_stats)); + + foreach ($this->encoding_stats as $iter41) { + $xfer += $iter41->write($output); + } + $output->writeListEnd(); + $xfer += $output->writeFieldEnd(); + } + + if ($this->bloom_filter_offset !== null) { + $xfer += $output->writeFieldBegin('bloom_filter_offset', TType::I64, 14); + $xfer += $output->writeI64($this->bloom_filter_offset); + $xfer += $output->writeFieldEnd(); + } + + if ($this->bloom_filter_length !== null) { + $xfer += $output->writeFieldBegin('bloom_filter_length', TType::I32, 15); + $xfer += $output->writeI32($this->bloom_filter_length); + $xfer += $output->writeFieldEnd(); + } + + if ($this->size_statistics !== null) { + if (!is_object($this->size_statistics)) { + throw new TProtocolException('Bad type in structure.', TProtocolException::INVALID_DATA); + } + $xfer += $output->writeFieldBegin('size_statistics', TType::STRUCT, 16); + $xfer += $this->size_statistics->write($output); + $xfer += $output->writeFieldEnd(); + } + $xfer += $output->writeFieldStop(); + $xfer += $output->writeStructEnd(); + + return $xfer; } } diff --git a/src/lib/parquet/src/Flow/Parquet/Thrift/ColumnOrder.php b/src/lib/parquet/src/Flow/Parquet/Thrift/ColumnOrder.php index e4bbd7800..d2cd636e6 100644 --- a/src/lib/parquet/src/Flow/Parquet/Thrift/ColumnOrder.php +++ b/src/lib/parquet/src/Flow/Parquet/Thrift/ColumnOrder.php @@ -4,14 +4,14 @@ namespace Flow\Parquet\Thrift; /** - * Autogenerated by Thrift Compiler (0.19.0). + * Autogenerated by Thrift Compiler (0.18.1). * * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING * * @generated */ -use Thrift\Base\TBase; -use Thrift\Type\TType; +use Thrift\Exception\{TProtocolException}; +use Thrift\Type\{TType}; /** * Union to specify the order used for the min_value and max_value fields for a @@ -25,7 +25,7 @@ * If the reader does not support the value of this union, min and max stats * for this column should be ignored. */ -class ColumnOrder extends TBase +class ColumnOrder { public static $_TSPEC = [ 1 => [ @@ -55,12 +55,12 @@ class ColumnOrder extends TBase * TIME_MICROS - signed comparison * TIMESTAMP_MILLIS - signed comparison * TIMESTAMP_MICROS - signed comparison - * INTERVAL - unsigned comparison + * INTERVAL - undefined * JSON - unsigned byte-wise comparison * BSON - unsigned byte-wise comparison * ENUM - unsigned byte-wise comparison * LIST - undefined - * MAP - undefined. + * MAP - undefined * * In the absence of logical types, the sort order is determined by the physical type: * BOOLEAN - false, true @@ -94,8 +94,10 @@ class ColumnOrder extends TBase public function __construct($vals = null) { - if (\is_array($vals)) { - parent::__construct(self::$_TSPEC, $vals); + if (is_array($vals)) { + if (isset($vals['TYPE_ORDER'])) { + $this->TYPE_ORDER = $vals['TYPE_ORDER']; + } } } @@ -106,11 +108,58 @@ public function getName() public function read($input) { - return $this->_read('ColumnOrder', self::$_TSPEC, $input); + $xfer = 0; + $fname = null; + $ftype = 0; + $fid = 0; + $xfer += $input->readStructBegin($fname); + + while (true) { + $xfer += $input->readFieldBegin($fname, $ftype, $fid); + + if ($ftype == TType::STOP) { + break; + } + + switch ($fid) { + case 1: + if ($ftype == TType::STRUCT) { + $this->TYPE_ORDER = new TypeDefinedOrder(); + $xfer += $this->TYPE_ORDER->read($input); + } else { + $xfer += $input->skip($ftype); + } + + break; + + default: + $xfer += $input->skip($ftype); + + break; + } + $xfer += $input->readFieldEnd(); + } + $xfer += $input->readStructEnd(); + + return $xfer; } public function write($output) { - return $this->_write('ColumnOrder', self::$_TSPEC, $output); + $xfer = 0; + $xfer += $output->writeStructBegin('ColumnOrder'); + + if ($this->TYPE_ORDER !== null) { + if (!is_object($this->TYPE_ORDER)) { + throw new TProtocolException('Bad type in structure.', TProtocolException::INVALID_DATA); + } + $xfer += $output->writeFieldBegin('TYPE_ORDER', TType::STRUCT, 1); + $xfer += $this->TYPE_ORDER->write($output); + $xfer += $output->writeFieldEnd(); + } + $xfer += $output->writeFieldStop(); + $xfer += $output->writeStructEnd(); + + return $xfer; } } diff --git a/src/lib/parquet/src/Flow/Parquet/Thrift/CompressionCodec.php b/src/lib/parquet/src/Flow/Parquet/Thrift/CompressionCodec.php index 4fb3feeec..5ae9b9e89 100644 --- a/src/lib/parquet/src/Flow/Parquet/Thrift/CompressionCodec.php +++ b/src/lib/parquet/src/Flow/Parquet/Thrift/CompressionCodec.php @@ -4,13 +4,12 @@ namespace Flow\Parquet\Thrift; /** - * Autogenerated by Thrift Compiler (0.19.0). + * Autogenerated by Thrift Compiler (0.18.1). * * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING * * @generated */ - /** * Supported compression algorithms. * diff --git a/src/lib/parquet/src/Flow/Parquet/Thrift/ConvertedType.php b/src/lib/parquet/src/Flow/Parquet/Thrift/ConvertedType.php index 9a42f6101..1c0bb6108 100644 --- a/src/lib/parquet/src/Flow/Parquet/Thrift/ConvertedType.php +++ b/src/lib/parquet/src/Flow/Parquet/Thrift/ConvertedType.php @@ -4,13 +4,12 @@ namespace Flow\Parquet\Thrift; /** - * Autogenerated by Thrift Compiler (0.19.0). + * Autogenerated by Thrift Compiler (0.18.1). * * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING * * @generated */ - /** * DEPRECATED: Common types used by frameworks(e.g. hive, pig) using parquet. * ConvertedType is superseded by LogicalType. This enum should not be extended. @@ -22,7 +21,7 @@ final class ConvertedType /** * An embedded BSON document. * - * A BSON document embedded within a single BINARY column. + * A BSON document embedded within a single BYTE_ARRAY column. */ public const BSON = 20; @@ -36,8 +35,8 @@ final class ConvertedType /** * A decimal value. * - * This may be used to annotate binary or fixed primitive types. The - * underlying byte array stores the unscaled value encoded as two's + * This may be used to annotate BYTE_ARRAY or FIXED_LEN_BYTE_ARRAY primitive + * types. The underlying byte array stores the unscaled value encoded as two's * complement using big-endian byte order (the most significant byte is the * zeroth element). The value of the decimal is the value * 10^{-scale}. * @@ -50,7 +49,7 @@ final class ConvertedType public const DECIMAL = 5; /** - * an enum is converted into a binary field. + * an enum is converted into a BYTE_ARRAY field. */ public const ENUM = 4; diff --git a/src/lib/parquet/src/Flow/Parquet/Thrift/DataPageHeader.php b/src/lib/parquet/src/Flow/Parquet/Thrift/DataPageHeader.php index d1d4cc008..6b259bba5 100644 --- a/src/lib/parquet/src/Flow/Parquet/Thrift/DataPageHeader.php +++ b/src/lib/parquet/src/Flow/Parquet/Thrift/DataPageHeader.php @@ -4,19 +4,19 @@ namespace Flow\Parquet\Thrift; /** - * Autogenerated by Thrift Compiler (0.19.0). + * Autogenerated by Thrift Compiler (0.18.1). * * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING * * @generated */ -use Thrift\Base\TBase; -use Thrift\Type\TType; +use Thrift\Exception\{TProtocolException}; +use Thrift\Type\{TType}; /** * Data page header. */ -class DataPageHeader extends TBase +class DataPageHeader { public static $_TSPEC = [ 1 => [ @@ -67,7 +67,11 @@ class DataPageHeader extends TBase public $encoding; /** - * Number of values, including NULLs, in this data page. *. + * Number of values, including NULLs, in this data page. + * + * If a OffsetIndex is present, a page must begin at a row + * boundary (repetition_level = 0). Otherwise, pages may begin + * within a row (repetition_level > 0). * * @var int */ @@ -81,7 +85,7 @@ class DataPageHeader extends TBase public $repetition_level_encoding; /** - * Optional statistics for the data in this page*. + * Optional statistics for the data in this page *. * * @var Statistics */ @@ -89,8 +93,26 @@ class DataPageHeader extends TBase public function __construct($vals = null) { - if (\is_array($vals)) { - parent::__construct(self::$_TSPEC, $vals); + if (is_array($vals)) { + if (isset($vals['num_values'])) { + $this->num_values = $vals['num_values']; + } + + if (isset($vals['encoding'])) { + $this->encoding = $vals['encoding']; + } + + if (isset($vals['definition_level_encoding'])) { + $this->definition_level_encoding = $vals['definition_level_encoding']; + } + + if (isset($vals['repetition_level_encoding'])) { + $this->repetition_level_encoding = $vals['repetition_level_encoding']; + } + + if (isset($vals['statistics'])) { + $this->statistics = $vals['statistics']; + } } } @@ -101,11 +123,114 @@ public function getName() public function read($input) { - return $this->_read('DataPageHeader', self::$_TSPEC, $input); + $xfer = 0; + $fname = null; + $ftype = 0; + $fid = 0; + $xfer += $input->readStructBegin($fname); + + while (true) { + $xfer += $input->readFieldBegin($fname, $ftype, $fid); + + if ($ftype == TType::STOP) { + break; + } + + switch ($fid) { + case 1: + if ($ftype == TType::I32) { + $xfer += $input->readI32($this->num_values); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 2: + if ($ftype == TType::I32) { + $xfer += $input->readI32($this->encoding); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 3: + if ($ftype == TType::I32) { + $xfer += $input->readI32($this->definition_level_encoding); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 4: + if ($ftype == TType::I32) { + $xfer += $input->readI32($this->repetition_level_encoding); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 5: + if ($ftype == TType::STRUCT) { + $this->statistics = new Statistics(); + $xfer += $this->statistics->read($input); + } else { + $xfer += $input->skip($ftype); + } + + break; + + default: + $xfer += $input->skip($ftype); + + break; + } + $xfer += $input->readFieldEnd(); + } + $xfer += $input->readStructEnd(); + + return $xfer; } public function write($output) { - return $this->_write('DataPageHeader', self::$_TSPEC, $output); + $xfer = 0; + $xfer += $output->writeStructBegin('DataPageHeader'); + + if ($this->num_values !== null) { + $xfer += $output->writeFieldBegin('num_values', TType::I32, 1); + $xfer += $output->writeI32($this->num_values); + $xfer += $output->writeFieldEnd(); + } + + if ($this->encoding !== null) { + $xfer += $output->writeFieldBegin('encoding', TType::I32, 2); + $xfer += $output->writeI32($this->encoding); + $xfer += $output->writeFieldEnd(); + } + + if ($this->definition_level_encoding !== null) { + $xfer += $output->writeFieldBegin('definition_level_encoding', TType::I32, 3); + $xfer += $output->writeI32($this->definition_level_encoding); + $xfer += $output->writeFieldEnd(); + } + + if ($this->repetition_level_encoding !== null) { + $xfer += $output->writeFieldBegin('repetition_level_encoding', TType::I32, 4); + $xfer += $output->writeI32($this->repetition_level_encoding); + $xfer += $output->writeFieldEnd(); + } + + if ($this->statistics !== null) { + if (!is_object($this->statistics)) { + throw new TProtocolException('Bad type in structure.', TProtocolException::INVALID_DATA); + } + $xfer += $output->writeFieldBegin('statistics', TType::STRUCT, 5); + $xfer += $this->statistics->write($output); + $xfer += $output->writeFieldEnd(); + } + $xfer += $output->writeFieldStop(); + $xfer += $output->writeStructEnd(); + + return $xfer; } } diff --git a/src/lib/parquet/src/Flow/Parquet/Thrift/DataPageHeaderV2.php b/src/lib/parquet/src/Flow/Parquet/Thrift/DataPageHeaderV2.php index c4305146c..d0723b383 100644 --- a/src/lib/parquet/src/Flow/Parquet/Thrift/DataPageHeaderV2.php +++ b/src/lib/parquet/src/Flow/Parquet/Thrift/DataPageHeaderV2.php @@ -4,21 +4,21 @@ namespace Flow\Parquet\Thrift; /** - * Autogenerated by Thrift Compiler (0.19.0). + * Autogenerated by Thrift Compiler (0.18.1). * * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING * * @generated */ -use Thrift\Base\TBase; -use Thrift\Type\TType; +use Thrift\Exception\{TProtocolException}; +use Thrift\Type\{TType}; /** * New page format allowing reading levels without decompressing the data * Repetition and definition levels are uncompressed * The remaining section containing the data is compressed if is_compressed is true. */ -class DataPageHeaderV2 extends TBase +class DataPageHeaderV2 { public static $_TSPEC = [ 1 => [ @@ -68,7 +68,7 @@ class DataPageHeaderV2 extends TBase public static $isValidate = false; /** - * length of the definition levels. + * Length of the definition levels. * * @var int */ @@ -82,7 +82,7 @@ class DataPageHeaderV2 extends TBase public $encoding; /** - * whether the values are compressed. + * Whether the values are compressed. * Which means the section of the page between * definition_levels_byte_length + repetition_levels_byte_length + 1 and compressed_page_size (included) * is compressed with the compression_codec. @@ -101,7 +101,9 @@ class DataPageHeaderV2 extends TBase public $num_nulls; /** - * Number of rows in this data page. which means pages change on record boundaries (r = 0) *. + * Number of rows in this data page. Every page must begin at a + * row boundary (repetition_level = 0): rows must **not** be + * split across page boundaries when using V2 data pages. * * @var int */ @@ -115,14 +117,14 @@ class DataPageHeaderV2 extends TBase public $num_values; /** - * length of the repetition levels. + * Length of the repetition levels. * * @var int */ public $repetition_levels_byte_length; /** - * optional statistics for the data in this page *. + * Optional statistics for the data in this page *. * * @var Statistics */ @@ -130,8 +132,38 @@ class DataPageHeaderV2 extends TBase public function __construct($vals = null) { - if (\is_array($vals)) { - parent::__construct(self::$_TSPEC, $vals); + if (is_array($vals)) { + if (isset($vals['num_values'])) { + $this->num_values = $vals['num_values']; + } + + if (isset($vals['num_nulls'])) { + $this->num_nulls = $vals['num_nulls']; + } + + if (isset($vals['num_rows'])) { + $this->num_rows = $vals['num_rows']; + } + + if (isset($vals['encoding'])) { + $this->encoding = $vals['encoding']; + } + + if (isset($vals['definition_levels_byte_length'])) { + $this->definition_levels_byte_length = $vals['definition_levels_byte_length']; + } + + if (isset($vals['repetition_levels_byte_length'])) { + $this->repetition_levels_byte_length = $vals['repetition_levels_byte_length']; + } + + if (isset($vals['is_compressed'])) { + $this->is_compressed = $vals['is_compressed']; + } + + if (isset($vals['statistics'])) { + $this->statistics = $vals['statistics']; + } } } @@ -142,11 +174,156 @@ public function getName() public function read($input) { - return $this->_read('DataPageHeaderV2', self::$_TSPEC, $input); + $xfer = 0; + $fname = null; + $ftype = 0; + $fid = 0; + $xfer += $input->readStructBegin($fname); + + while (true) { + $xfer += $input->readFieldBegin($fname, $ftype, $fid); + + if ($ftype == TType::STOP) { + break; + } + + switch ($fid) { + case 1: + if ($ftype == TType::I32) { + $xfer += $input->readI32($this->num_values); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 2: + if ($ftype == TType::I32) { + $xfer += $input->readI32($this->num_nulls); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 3: + if ($ftype == TType::I32) { + $xfer += $input->readI32($this->num_rows); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 4: + if ($ftype == TType::I32) { + $xfer += $input->readI32($this->encoding); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 5: + if ($ftype == TType::I32) { + $xfer += $input->readI32($this->definition_levels_byte_length); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 6: + if ($ftype == TType::I32) { + $xfer += $input->readI32($this->repetition_levels_byte_length); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 7: + if ($ftype == TType::BOOL) { + $xfer += $input->readBool($this->is_compressed); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 8: + if ($ftype == TType::STRUCT) { + $this->statistics = new Statistics(); + $xfer += $this->statistics->read($input); + } else { + $xfer += $input->skip($ftype); + } + + break; + + default: + $xfer += $input->skip($ftype); + + break; + } + $xfer += $input->readFieldEnd(); + } + $xfer += $input->readStructEnd(); + + return $xfer; } public function write($output) { - return $this->_write('DataPageHeaderV2', self::$_TSPEC, $output); + $xfer = 0; + $xfer += $output->writeStructBegin('DataPageHeaderV2'); + + if ($this->num_values !== null) { + $xfer += $output->writeFieldBegin('num_values', TType::I32, 1); + $xfer += $output->writeI32($this->num_values); + $xfer += $output->writeFieldEnd(); + } + + if ($this->num_nulls !== null) { + $xfer += $output->writeFieldBegin('num_nulls', TType::I32, 2); + $xfer += $output->writeI32($this->num_nulls); + $xfer += $output->writeFieldEnd(); + } + + if ($this->num_rows !== null) { + $xfer += $output->writeFieldBegin('num_rows', TType::I32, 3); + $xfer += $output->writeI32($this->num_rows); + $xfer += $output->writeFieldEnd(); + } + + if ($this->encoding !== null) { + $xfer += $output->writeFieldBegin('encoding', TType::I32, 4); + $xfer += $output->writeI32($this->encoding); + $xfer += $output->writeFieldEnd(); + } + + if ($this->definition_levels_byte_length !== null) { + $xfer += $output->writeFieldBegin('definition_levels_byte_length', TType::I32, 5); + $xfer += $output->writeI32($this->definition_levels_byte_length); + $xfer += $output->writeFieldEnd(); + } + + if ($this->repetition_levels_byte_length !== null) { + $xfer += $output->writeFieldBegin('repetition_levels_byte_length', TType::I32, 6); + $xfer += $output->writeI32($this->repetition_levels_byte_length); + $xfer += $output->writeFieldEnd(); + } + + if ($this->is_compressed !== null) { + $xfer += $output->writeFieldBegin('is_compressed', TType::BOOL, 7); + $xfer += $output->writeBool($this->is_compressed); + $xfer += $output->writeFieldEnd(); + } + + if ($this->statistics !== null) { + if (!is_object($this->statistics)) { + throw new TProtocolException('Bad type in structure.', TProtocolException::INVALID_DATA); + } + $xfer += $output->writeFieldBegin('statistics', TType::STRUCT, 8); + $xfer += $this->statistics->write($output); + $xfer += $output->writeFieldEnd(); + } + $xfer += $output->writeFieldStop(); + $xfer += $output->writeStructEnd(); + + return $xfer; } } diff --git a/src/lib/parquet/src/Flow/Parquet/Thrift/DateType.php b/src/lib/parquet/src/Flow/Parquet/Thrift/DateType.php index 2a174fcbc..242da2efb 100644 --- a/src/lib/parquet/src/Flow/Parquet/Thrift/DateType.php +++ b/src/lib/parquet/src/Flow/Parquet/Thrift/DateType.php @@ -4,15 +4,15 @@ namespace Flow\Parquet\Thrift; /** - * Autogenerated by Thrift Compiler (0.19.0). + * Autogenerated by Thrift Compiler (0.18.1). * * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING * * @generated */ -use Thrift\Base\TBase; +use Thrift\Type\{TType}; -class DateType extends TBase +class DateType { public static $_TSPEC = [ ]; @@ -30,11 +30,39 @@ public function getName() public function read($input) { - return $this->_read('DateType', self::$_TSPEC, $input); + $xfer = 0; + $fname = null; + $ftype = 0; + $fid = 0; + $xfer += $input->readStructBegin($fname); + + while (true) { + $xfer += $input->readFieldBegin($fname, $ftype, $fid); + + if ($ftype == TType::STOP) { + break; + } + + switch ($fid) { + default: + $xfer += $input->skip($ftype); + + break; + } + $xfer += $input->readFieldEnd(); + } + $xfer += $input->readStructEnd(); + + return $xfer; } public function write($output) { - return $this->_write('DateType', self::$_TSPEC, $output); + $xfer = 0; + $xfer += $output->writeStructBegin('DateType'); + $xfer += $output->writeFieldStop(); + $xfer += $output->writeStructEnd(); + + return $xfer; } } diff --git a/src/lib/parquet/src/Flow/Parquet/Thrift/DecimalType.php b/src/lib/parquet/src/Flow/Parquet/Thrift/DecimalType.php index fa922c8fb..5ea6f5cb0 100644 --- a/src/lib/parquet/src/Flow/Parquet/Thrift/DecimalType.php +++ b/src/lib/parquet/src/Flow/Parquet/Thrift/DecimalType.php @@ -4,14 +4,13 @@ namespace Flow\Parquet\Thrift; /** - * Autogenerated by Thrift Compiler (0.19.0). + * Autogenerated by Thrift Compiler (0.18.1). * * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING * * @generated */ -use Thrift\Base\TBase; -use Thrift\Type\TType; +use Thrift\Type\{TType}; /** * Decimal logical type annotation. @@ -22,9 +21,9 @@ * To maintain forward-compatibility in v1, implementations using this logical * type must also set scale and precision on the annotated SchemaElement. * - * Allowed for physical types: INT32, INT64, FIXED, and BINARY + * Allowed for physical types: INT32, INT64, FIXED_LEN_BYTE_ARRAY, and BYTE_ARRAY. */ -class DecimalType extends TBase +class DecimalType { public static $_TSPEC = [ 1 => [ @@ -53,8 +52,14 @@ class DecimalType extends TBase public function __construct($vals = null) { - if (\is_array($vals)) { - parent::__construct(self::$_TSPEC, $vals); + if (is_array($vals)) { + if (isset($vals['scale'])) { + $this->scale = $vals['scale']; + } + + if (isset($vals['precision'])) { + $this->precision = $vals['precision']; + } } } @@ -65,11 +70,68 @@ public function getName() public function read($input) { - return $this->_read('DecimalType', self::$_TSPEC, $input); + $xfer = 0; + $fname = null; + $ftype = 0; + $fid = 0; + $xfer += $input->readStructBegin($fname); + + while (true) { + $xfer += $input->readFieldBegin($fname, $ftype, $fid); + + if ($ftype == TType::STOP) { + break; + } + + switch ($fid) { + case 1: + if ($ftype == TType::I32) { + $xfer += $input->readI32($this->scale); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 2: + if ($ftype == TType::I32) { + $xfer += $input->readI32($this->precision); + } else { + $xfer += $input->skip($ftype); + } + + break; + + default: + $xfer += $input->skip($ftype); + + break; + } + $xfer += $input->readFieldEnd(); + } + $xfer += $input->readStructEnd(); + + return $xfer; } public function write($output) { - return $this->_write('DecimalType', self::$_TSPEC, $output); + $xfer = 0; + $xfer += $output->writeStructBegin('DecimalType'); + + if ($this->scale !== null) { + $xfer += $output->writeFieldBegin('scale', TType::I32, 1); + $xfer += $output->writeI32($this->scale); + $xfer += $output->writeFieldEnd(); + } + + if ($this->precision !== null) { + $xfer += $output->writeFieldBegin('precision', TType::I32, 2); + $xfer += $output->writeI32($this->precision); + $xfer += $output->writeFieldEnd(); + } + $xfer += $output->writeFieldStop(); + $xfer += $output->writeStructEnd(); + + return $xfer; } } diff --git a/src/lib/parquet/src/Flow/Parquet/Thrift/DictionaryPageHeader.php b/src/lib/parquet/src/Flow/Parquet/Thrift/DictionaryPageHeader.php index 0483b7889..0b847a591 100644 --- a/src/lib/parquet/src/Flow/Parquet/Thrift/DictionaryPageHeader.php +++ b/src/lib/parquet/src/Flow/Parquet/Thrift/DictionaryPageHeader.php @@ -4,21 +4,20 @@ namespace Flow\Parquet\Thrift; /** - * Autogenerated by Thrift Compiler (0.19.0). + * Autogenerated by Thrift Compiler (0.18.1). * * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING * * @generated */ -use Thrift\Base\TBase; -use Thrift\Type\TType; +use Thrift\Type\{TType}; /** * The dictionary page must be placed at the first position of the column chunk * if it is partly or completely dictionary encoded. At most one dictionary page * can be placed in a column chunk. */ -class DictionaryPageHeader extends TBase +class DictionaryPageHeader { public static $_TSPEC = [ 1 => [ @@ -64,8 +63,18 @@ class DictionaryPageHeader extends TBase public function __construct($vals = null) { - if (\is_array($vals)) { - parent::__construct(self::$_TSPEC, $vals); + if (is_array($vals)) { + if (isset($vals['num_values'])) { + $this->num_values = $vals['num_values']; + } + + if (isset($vals['encoding'])) { + $this->encoding = $vals['encoding']; + } + + if (isset($vals['is_sorted'])) { + $this->is_sorted = $vals['is_sorted']; + } } } @@ -76,11 +85,82 @@ public function getName() public function read($input) { - return $this->_read('DictionaryPageHeader', self::$_TSPEC, $input); + $xfer = 0; + $fname = null; + $ftype = 0; + $fid = 0; + $xfer += $input->readStructBegin($fname); + + while (true) { + $xfer += $input->readFieldBegin($fname, $ftype, $fid); + + if ($ftype == TType::STOP) { + break; + } + + switch ($fid) { + case 1: + if ($ftype == TType::I32) { + $xfer += $input->readI32($this->num_values); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 2: + if ($ftype == TType::I32) { + $xfer += $input->readI32($this->encoding); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 3: + if ($ftype == TType::BOOL) { + $xfer += $input->readBool($this->is_sorted); + } else { + $xfer += $input->skip($ftype); + } + + break; + + default: + $xfer += $input->skip($ftype); + + break; + } + $xfer += $input->readFieldEnd(); + } + $xfer += $input->readStructEnd(); + + return $xfer; } public function write($output) { - return $this->_write('DictionaryPageHeader', self::$_TSPEC, $output); + $xfer = 0; + $xfer += $output->writeStructBegin('DictionaryPageHeader'); + + if ($this->num_values !== null) { + $xfer += $output->writeFieldBegin('num_values', TType::I32, 1); + $xfer += $output->writeI32($this->num_values); + $xfer += $output->writeFieldEnd(); + } + + if ($this->encoding !== null) { + $xfer += $output->writeFieldBegin('encoding', TType::I32, 2); + $xfer += $output->writeI32($this->encoding); + $xfer += $output->writeFieldEnd(); + } + + if ($this->is_sorted !== null) { + $xfer += $output->writeFieldBegin('is_sorted', TType::BOOL, 3); + $xfer += $output->writeBool($this->is_sorted); + $xfer += $output->writeFieldEnd(); + } + $xfer += $output->writeFieldStop(); + $xfer += $output->writeStructEnd(); + + return $xfer; } } diff --git a/src/lib/parquet/src/Flow/Parquet/Thrift/Encoding.php b/src/lib/parquet/src/Flow/Parquet/Thrift/Encoding.php index 7ff21a4ee..31986a2a3 100644 --- a/src/lib/parquet/src/Flow/Parquet/Thrift/Encoding.php +++ b/src/lib/parquet/src/Flow/Parquet/Thrift/Encoding.php @@ -4,13 +4,12 @@ namespace Flow\Parquet\Thrift; /** - * Autogenerated by Thrift Compiler (0.19.0). + * Autogenerated by Thrift Compiler (0.18.1). * * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING * * @generated */ - /** * Encodings supported by Parquet. Not all encodings are valid for all types. These * enums are also used to specify the encoding of definition and repetition levels. @@ -25,12 +24,15 @@ final class Encoding public const BIT_PACKED = 4; /** - * Encoding for floating-point data. + * Encoding for fixed-width data (FLOAT, DOUBLE, INT32, INT64, FIXED_LEN_BYTE_ARRAY). * K byte-streams are created where K is the size in bytes of the data type. - * The individual bytes of an FP value are scattered to the corresponding stream and + * The individual bytes of a value are scattered to the corresponding stream and * the streams are concatenated. * This itself does not reduce the size of the data but can lead to better compression * afterwards. + * + * Added in 2.8 for FLOAT and DOUBLE. + * Support for INT32, INT64 and FIXED_LEN_BYTE_ARRAY added in 2.11. */ public const BYTE_STREAM_SPLIT = 9; diff --git a/src/lib/parquet/src/Flow/Parquet/Thrift/EncryptionAlgorithm.php b/src/lib/parquet/src/Flow/Parquet/Thrift/EncryptionAlgorithm.php index 7332c62cb..2bde8579f 100644 --- a/src/lib/parquet/src/Flow/Parquet/Thrift/EncryptionAlgorithm.php +++ b/src/lib/parquet/src/Flow/Parquet/Thrift/EncryptionAlgorithm.php @@ -4,16 +4,16 @@ namespace Flow\Parquet\Thrift; /** - * Autogenerated by Thrift Compiler (0.19.0). + * Autogenerated by Thrift Compiler (0.18.1). * * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING * * @generated */ -use Thrift\Base\TBase; -use Thrift\Type\TType; +use Thrift\Exception\{TProtocolException}; +use Thrift\Type\{TType}; -class EncryptionAlgorithm extends TBase +class EncryptionAlgorithm { public static $_TSPEC = [ 1 => [ @@ -44,8 +44,14 @@ class EncryptionAlgorithm extends TBase public function __construct($vals = null) { - if (\is_array($vals)) { - parent::__construct(self::$_TSPEC, $vals); + if (is_array($vals)) { + if (isset($vals['AES_GCM_V1'])) { + $this->AES_GCM_V1 = $vals['AES_GCM_V1']; + } + + if (isset($vals['AES_GCM_CTR_V1'])) { + $this->AES_GCM_CTR_V1 = $vals['AES_GCM_CTR_V1']; + } } } @@ -56,11 +62,76 @@ public function getName() public function read($input) { - return $this->_read('EncryptionAlgorithm', self::$_TSPEC, $input); + $xfer = 0; + $fname = null; + $ftype = 0; + $fid = 0; + $xfer += $input->readStructBegin($fname); + + while (true) { + $xfer += $input->readFieldBegin($fname, $ftype, $fid); + + if ($ftype == TType::STOP) { + break; + } + + switch ($fid) { + case 1: + if ($ftype == TType::STRUCT) { + $this->AES_GCM_V1 = new AesGcmV1(); + $xfer += $this->AES_GCM_V1->read($input); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 2: + if ($ftype == TType::STRUCT) { + $this->AES_GCM_CTR_V1 = new AesGcmCtrV1(); + $xfer += $this->AES_GCM_CTR_V1->read($input); + } else { + $xfer += $input->skip($ftype); + } + + break; + + default: + $xfer += $input->skip($ftype); + + break; + } + $xfer += $input->readFieldEnd(); + } + $xfer += $input->readStructEnd(); + + return $xfer; } public function write($output) { - return $this->_write('EncryptionAlgorithm', self::$_TSPEC, $output); + $xfer = 0; + $xfer += $output->writeStructBegin('EncryptionAlgorithm'); + + if ($this->AES_GCM_V1 !== null) { + if (!is_object($this->AES_GCM_V1)) { + throw new TProtocolException('Bad type in structure.', TProtocolException::INVALID_DATA); + } + $xfer += $output->writeFieldBegin('AES_GCM_V1', TType::STRUCT, 1); + $xfer += $this->AES_GCM_V1->write($output); + $xfer += $output->writeFieldEnd(); + } + + if ($this->AES_GCM_CTR_V1 !== null) { + if (!is_object($this->AES_GCM_CTR_V1)) { + throw new TProtocolException('Bad type in structure.', TProtocolException::INVALID_DATA); + } + $xfer += $output->writeFieldBegin('AES_GCM_CTR_V1', TType::STRUCT, 2); + $xfer += $this->AES_GCM_CTR_V1->write($output); + $xfer += $output->writeFieldEnd(); + } + $xfer += $output->writeFieldStop(); + $xfer += $output->writeStructEnd(); + + return $xfer; } } diff --git a/src/lib/parquet/src/Flow/Parquet/Thrift/EncryptionWithColumnKey.php b/src/lib/parquet/src/Flow/Parquet/Thrift/EncryptionWithColumnKey.php index 28ebdeee2..e53dfd23a 100644 --- a/src/lib/parquet/src/Flow/Parquet/Thrift/EncryptionWithColumnKey.php +++ b/src/lib/parquet/src/Flow/Parquet/Thrift/EncryptionWithColumnKey.php @@ -4,16 +4,16 @@ namespace Flow\Parquet\Thrift; /** - * Autogenerated by Thrift Compiler (0.19.0). + * Autogenerated by Thrift Compiler (0.18.1). * * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING * * @generated */ -use Thrift\Base\TBase; -use Thrift\Type\TType; +use Thrift\Exception\{TProtocolException}; +use Thrift\Type\{TType}; -class EncryptionWithColumnKey extends TBase +class EncryptionWithColumnKey { public static $_TSPEC = [ 1 => [ @@ -50,8 +50,14 @@ class EncryptionWithColumnKey extends TBase public function __construct($vals = null) { - if (\is_array($vals)) { - parent::__construct(self::$_TSPEC, $vals); + if (is_array($vals)) { + if (isset($vals['path_in_schema'])) { + $this->path_in_schema = $vals['path_in_schema']; + } + + if (isset($vals['key_metadata'])) { + $this->key_metadata = $vals['key_metadata']; + } } } @@ -62,11 +68,86 @@ public function getName() public function read($input) { - return $this->_read('EncryptionWithColumnKey', self::$_TSPEC, $input); + $xfer = 0; + $fname = null; + $ftype = 0; + $fid = 0; + $xfer += $input->readStructBegin($fname); + + while (true) { + $xfer += $input->readFieldBegin($fname, $ftype, $fid); + + if ($ftype == TType::STOP) { + break; + } + + switch ($fid) { + case 1: + if ($ftype == TType::LST) { + $this->path_in_schema = []; + $_size42 = 0; + $_etype45 = 0; + $xfer += $input->readListBegin($_etype45, $_size42); + + for ($_i46 = 0; $_i46 < $_size42; $_i46++) { + $elem47 = null; + $xfer += $input->readString($elem47); + $this->path_in_schema[] = $elem47; + } + $xfer += $input->readListEnd(); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 2: + if ($ftype == TType::STRING) { + $xfer += $input->readString($this->key_metadata); + } else { + $xfer += $input->skip($ftype); + } + + break; + + default: + $xfer += $input->skip($ftype); + + break; + } + $xfer += $input->readFieldEnd(); + } + $xfer += $input->readStructEnd(); + + return $xfer; } public function write($output) { - return $this->_write('EncryptionWithColumnKey', self::$_TSPEC, $output); + $xfer = 0; + $xfer += $output->writeStructBegin('EncryptionWithColumnKey'); + + if ($this->path_in_schema !== null) { + if (!is_array($this->path_in_schema)) { + throw new TProtocolException('Bad type in structure.', TProtocolException::INVALID_DATA); + } + $xfer += $output->writeFieldBegin('path_in_schema', TType::LST, 1); + $output->writeListBegin(TType::STRING, count($this->path_in_schema)); + + foreach ($this->path_in_schema as $iter48) { + $xfer += $output->writeString($iter48); + } + $output->writeListEnd(); + $xfer += $output->writeFieldEnd(); + } + + if ($this->key_metadata !== null) { + $xfer += $output->writeFieldBegin('key_metadata', TType::STRING, 2); + $xfer += $output->writeString($this->key_metadata); + $xfer += $output->writeFieldEnd(); + } + $xfer += $output->writeFieldStop(); + $xfer += $output->writeStructEnd(); + + return $xfer; } } diff --git a/src/lib/parquet/src/Flow/Parquet/Thrift/EncryptionWithFooterKey.php b/src/lib/parquet/src/Flow/Parquet/Thrift/EncryptionWithFooterKey.php index 3c88eebd8..e9297a64a 100644 --- a/src/lib/parquet/src/Flow/Parquet/Thrift/EncryptionWithFooterKey.php +++ b/src/lib/parquet/src/Flow/Parquet/Thrift/EncryptionWithFooterKey.php @@ -4,15 +4,15 @@ namespace Flow\Parquet\Thrift; /** - * Autogenerated by Thrift Compiler (0.19.0). + * Autogenerated by Thrift Compiler (0.18.1). * * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING * * @generated */ -use Thrift\Base\TBase; +use Thrift\Type\{TType}; -class EncryptionWithFooterKey extends TBase +class EncryptionWithFooterKey { public static $_TSPEC = [ ]; @@ -30,11 +30,39 @@ public function getName() public function read($input) { - return $this->_read('EncryptionWithFooterKey', self::$_TSPEC, $input); + $xfer = 0; + $fname = null; + $ftype = 0; + $fid = 0; + $xfer += $input->readStructBegin($fname); + + while (true) { + $xfer += $input->readFieldBegin($fname, $ftype, $fid); + + if ($ftype == TType::STOP) { + break; + } + + switch ($fid) { + default: + $xfer += $input->skip($ftype); + + break; + } + $xfer += $input->readFieldEnd(); + } + $xfer += $input->readStructEnd(); + + return $xfer; } public function write($output) { - return $this->_write('EncryptionWithFooterKey', self::$_TSPEC, $output); + $xfer = 0; + $xfer += $output->writeStructBegin('EncryptionWithFooterKey'); + $xfer += $output->writeFieldStop(); + $xfer += $output->writeStructEnd(); + + return $xfer; } } diff --git a/src/lib/parquet/src/Flow/Parquet/Thrift/EnumType.php b/src/lib/parquet/src/Flow/Parquet/Thrift/EnumType.php index a5082e295..844604ac9 100644 --- a/src/lib/parquet/src/Flow/Parquet/Thrift/EnumType.php +++ b/src/lib/parquet/src/Flow/Parquet/Thrift/EnumType.php @@ -4,15 +4,15 @@ namespace Flow\Parquet\Thrift; /** - * Autogenerated by Thrift Compiler (0.19.0). + * Autogenerated by Thrift Compiler (0.18.1). * * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING * * @generated */ -use Thrift\Base\TBase; +use Thrift\Type\{TType}; -class EnumType extends TBase +class EnumType { public static $_TSPEC = [ ]; @@ -30,11 +30,39 @@ public function getName() public function read($input) { - return $this->_read('EnumType', self::$_TSPEC, $input); + $xfer = 0; + $fname = null; + $ftype = 0; + $fid = 0; + $xfer += $input->readStructBegin($fname); + + while (true) { + $xfer += $input->readFieldBegin($fname, $ftype, $fid); + + if ($ftype == TType::STOP) { + break; + } + + switch ($fid) { + default: + $xfer += $input->skip($ftype); + + break; + } + $xfer += $input->readFieldEnd(); + } + $xfer += $input->readStructEnd(); + + return $xfer; } public function write($output) { - return $this->_write('EnumType', self::$_TSPEC, $output); + $xfer = 0; + $xfer += $output->writeStructBegin('EnumType'); + $xfer += $output->writeFieldStop(); + $xfer += $output->writeStructEnd(); + + return $xfer; } } diff --git a/src/lib/parquet/src/Flow/Parquet/Thrift/FieldRepetitionType.php b/src/lib/parquet/src/Flow/Parquet/Thrift/FieldRepetitionType.php index 12faea3d1..4e79cfba1 100644 --- a/src/lib/parquet/src/Flow/Parquet/Thrift/FieldRepetitionType.php +++ b/src/lib/parquet/src/Flow/Parquet/Thrift/FieldRepetitionType.php @@ -4,20 +4,19 @@ namespace Flow\Parquet\Thrift; /** - * Autogenerated by Thrift Compiler (0.19.0). + * Autogenerated by Thrift Compiler (0.18.1). * * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING * * @generated */ - /** * Representation of Schemas. */ final class FieldRepetitionType { /** - * The field is optional (can be null) and each record has 0 or 1 values. + * The field is optional (can be null) and each row has 0 or 1 values. */ public const OPTIONAL = 1; @@ -27,7 +26,7 @@ final class FieldRepetitionType public const REPEATED = 2; /** - * This field is required (can not be null) and each record has exactly 1 value. + * This field is required (can not be null) and each row has exactly 1 value. */ public const REQUIRED = 0; diff --git a/src/lib/parquet/src/Flow/Parquet/Thrift/FileCryptoMetaData.php b/src/lib/parquet/src/Flow/Parquet/Thrift/FileCryptoMetaData.php index 663d23578..f505c6ff6 100644 --- a/src/lib/parquet/src/Flow/Parquet/Thrift/FileCryptoMetaData.php +++ b/src/lib/parquet/src/Flow/Parquet/Thrift/FileCryptoMetaData.php @@ -4,19 +4,19 @@ namespace Flow\Parquet\Thrift; /** - * Autogenerated by Thrift Compiler (0.19.0). + * Autogenerated by Thrift Compiler (0.18.1). * * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING * * @generated */ -use Thrift\Base\TBase; -use Thrift\Type\TType; +use Thrift\Exception\{TProtocolException}; +use Thrift\Type\{TType}; /** * Crypto metadata for files with encrypted footer *. */ -class FileCryptoMetaData extends TBase +class FileCryptoMetaData { public static $_TSPEC = [ 1 => [ @@ -53,8 +53,14 @@ class FileCryptoMetaData extends TBase public function __construct($vals = null) { - if (\is_array($vals)) { - parent::__construct(self::$_TSPEC, $vals); + if (is_array($vals)) { + if (isset($vals['encryption_algorithm'])) { + $this->encryption_algorithm = $vals['encryption_algorithm']; + } + + if (isset($vals['key_metadata'])) { + $this->key_metadata = $vals['key_metadata']; + } } } @@ -65,11 +71,72 @@ public function getName() public function read($input) { - return $this->_read('FileCryptoMetaData', self::$_TSPEC, $input); + $xfer = 0; + $fname = null; + $ftype = 0; + $fid = 0; + $xfer += $input->readStructBegin($fname); + + while (true) { + $xfer += $input->readFieldBegin($fname, $ftype, $fid); + + if ($ftype == TType::STOP) { + break; + } + + switch ($fid) { + case 1: + if ($ftype == TType::STRUCT) { + $this->encryption_algorithm = new EncryptionAlgorithm(); + $xfer += $this->encryption_algorithm->read($input); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 2: + if ($ftype == TType::STRING) { + $xfer += $input->readString($this->key_metadata); + } else { + $xfer += $input->skip($ftype); + } + + break; + + default: + $xfer += $input->skip($ftype); + + break; + } + $xfer += $input->readFieldEnd(); + } + $xfer += $input->readStructEnd(); + + return $xfer; } public function write($output) { - return $this->_write('FileCryptoMetaData', self::$_TSPEC, $output); + $xfer = 0; + $xfer += $output->writeStructBegin('FileCryptoMetaData'); + + if ($this->encryption_algorithm !== null) { + if (!is_object($this->encryption_algorithm)) { + throw new TProtocolException('Bad type in structure.', TProtocolException::INVALID_DATA); + } + $xfer += $output->writeFieldBegin('encryption_algorithm', TType::STRUCT, 1); + $xfer += $this->encryption_algorithm->write($output); + $xfer += $output->writeFieldEnd(); + } + + if ($this->key_metadata !== null) { + $xfer += $output->writeFieldBegin('key_metadata', TType::STRING, 2); + $xfer += $output->writeString($this->key_metadata); + $xfer += $output->writeFieldEnd(); + } + $xfer += $output->writeFieldStop(); + $xfer += $output->writeStructEnd(); + + return $xfer; } } diff --git a/src/lib/parquet/src/Flow/Parquet/Thrift/FileMetaData.php b/src/lib/parquet/src/Flow/Parquet/Thrift/FileMetaData.php index 3abe7d12a..8340e3a5f 100644 --- a/src/lib/parquet/src/Flow/Parquet/Thrift/FileMetaData.php +++ b/src/lib/parquet/src/Flow/Parquet/Thrift/FileMetaData.php @@ -4,19 +4,19 @@ namespace Flow\Parquet\Thrift; /** - * Autogenerated by Thrift Compiler (0.19.0). + * Autogenerated by Thrift Compiler (0.18.1). * * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING * * @generated */ -use Thrift\Base\TBase; -use Thrift\Type\TType; +use Thrift\Exception\{TProtocolException}; +use Thrift\Type\{TType}; /** * Description for file metadata. */ -class FileMetaData extends TBase +class FileMetaData { public static $_TSPEC = [ 1 => [ @@ -105,7 +105,7 @@ class FileMetaData extends TBase * The obsolete min and max fields in the Statistics object are always sorted * by signed comparison regardless of column_orders. * - * @var ColumnOrder[] + * @var \Flow\Parquet\Thrift\ColumnOrder[] */ public $column_orders; @@ -138,7 +138,7 @@ class FileMetaData extends TBase /** * Optional key/value metadata *. * - * @var KeyValue[] + * @var \Flow\Parquet\Thrift\KeyValue[] */ public $key_value_metadata; @@ -152,7 +152,7 @@ class FileMetaData extends TBase /** * Row groups in this file *. * - * @var RowGroup[] + * @var \Flow\Parquet\Thrift\RowGroup[] */ public $row_groups; @@ -164,7 +164,7 @@ class FileMetaData extends TBase * used to map columns to nodes in the schema. * The first element is the root *. * - * @var SchemaElement[] + * @var \Flow\Parquet\Thrift\SchemaElement[] */ public $schema; @@ -177,8 +177,42 @@ class FileMetaData extends TBase public function __construct($vals = null) { - if (\is_array($vals)) { - parent::__construct(self::$_TSPEC, $vals); + if (is_array($vals)) { + if (isset($vals['version'])) { + $this->version = $vals['version']; + } + + if (isset($vals['schema'])) { + $this->schema = $vals['schema']; + } + + if (isset($vals['num_rows'])) { + $this->num_rows = $vals['num_rows']; + } + + if (isset($vals['row_groups'])) { + $this->row_groups = $vals['row_groups']; + } + + if (isset($vals['key_value_metadata'])) { + $this->key_value_metadata = $vals['key_value_metadata']; + } + + if (isset($vals['created_by'])) { + $this->created_by = $vals['created_by']; + } + + if (isset($vals['column_orders'])) { + $this->column_orders = $vals['column_orders']; + } + + if (isset($vals['encryption_algorithm'])) { + $this->encryption_algorithm = $vals['encryption_algorithm']; + } + + if (isset($vals['footer_signing_key_metadata'])) { + $this->footer_signing_key_metadata = $vals['footer_signing_key_metadata']; + } } } @@ -189,11 +223,246 @@ public function getName() public function read($input) { - return $this->_read('FileMetaData', self::$_TSPEC, $input); + $xfer = 0; + $fname = null; + $ftype = 0; + $fid = 0; + $xfer += $input->readStructBegin($fname); + + while (true) { + $xfer += $input->readFieldBegin($fname, $ftype, $fid); + + if ($ftype == TType::STOP) { + break; + } + + switch ($fid) { + case 1: + if ($ftype == TType::I32) { + $xfer += $input->readI32($this->version); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 2: + if ($ftype == TType::LST) { + $this->schema = []; + $_size119 = 0; + $_etype122 = 0; + $xfer += $input->readListBegin($_etype122, $_size119); + + for ($_i123 = 0; $_i123 < $_size119; $_i123++) { + $elem124 = null; + $elem124 = new SchemaElement(); + $xfer += $elem124->read($input); + $this->schema[] = $elem124; + } + $xfer += $input->readListEnd(); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 3: + if ($ftype == TType::I64) { + $xfer += $input->readI64($this->num_rows); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 4: + if ($ftype == TType::LST) { + $this->row_groups = []; + $_size125 = 0; + $_etype128 = 0; + $xfer += $input->readListBegin($_etype128, $_size125); + + for ($_i129 = 0; $_i129 < $_size125; $_i129++) { + $elem130 = null; + $elem130 = new RowGroup(); + $xfer += $elem130->read($input); + $this->row_groups[] = $elem130; + } + $xfer += $input->readListEnd(); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 5: + if ($ftype == TType::LST) { + $this->key_value_metadata = []; + $_size131 = 0; + $_etype134 = 0; + $xfer += $input->readListBegin($_etype134, $_size131); + + for ($_i135 = 0; $_i135 < $_size131; $_i135++) { + $elem136 = null; + $elem136 = new KeyValue(); + $xfer += $elem136->read($input); + $this->key_value_metadata[] = $elem136; + } + $xfer += $input->readListEnd(); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 6: + if ($ftype == TType::STRING) { + $xfer += $input->readString($this->created_by); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 7: + if ($ftype == TType::LST) { + $this->column_orders = []; + $_size137 = 0; + $_etype140 = 0; + $xfer += $input->readListBegin($_etype140, $_size137); + + for ($_i141 = 0; $_i141 < $_size137; $_i141++) { + $elem142 = null; + $elem142 = new ColumnOrder(); + $xfer += $elem142->read($input); + $this->column_orders[] = $elem142; + } + $xfer += $input->readListEnd(); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 8: + if ($ftype == TType::STRUCT) { + $this->encryption_algorithm = new EncryptionAlgorithm(); + $xfer += $this->encryption_algorithm->read($input); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 9: + if ($ftype == TType::STRING) { + $xfer += $input->readString($this->footer_signing_key_metadata); + } else { + $xfer += $input->skip($ftype); + } + + break; + + default: + $xfer += $input->skip($ftype); + + break; + } + $xfer += $input->readFieldEnd(); + } + $xfer += $input->readStructEnd(); + + return $xfer; } public function write($output) { - return $this->_write('FileMetaData', self::$_TSPEC, $output); + $xfer = 0; + $xfer += $output->writeStructBegin('FileMetaData'); + + if ($this->version !== null) { + $xfer += $output->writeFieldBegin('version', TType::I32, 1); + $xfer += $output->writeI32($this->version); + $xfer += $output->writeFieldEnd(); + } + + if ($this->schema !== null) { + if (!is_array($this->schema)) { + throw new TProtocolException('Bad type in structure.', TProtocolException::INVALID_DATA); + } + $xfer += $output->writeFieldBegin('schema', TType::LST, 2); + $output->writeListBegin(TType::STRUCT, count($this->schema)); + + foreach ($this->schema as $iter143) { + $xfer += $iter143->write($output); + } + $output->writeListEnd(); + $xfer += $output->writeFieldEnd(); + } + + if ($this->num_rows !== null) { + $xfer += $output->writeFieldBegin('num_rows', TType::I64, 3); + $xfer += $output->writeI64($this->num_rows); + $xfer += $output->writeFieldEnd(); + } + + if ($this->row_groups !== null) { + if (!is_array($this->row_groups)) { + throw new TProtocolException('Bad type in structure.', TProtocolException::INVALID_DATA); + } + $xfer += $output->writeFieldBegin('row_groups', TType::LST, 4); + $output->writeListBegin(TType::STRUCT, count($this->row_groups)); + + foreach ($this->row_groups as $iter144) { + $xfer += $iter144->write($output); + } + $output->writeListEnd(); + $xfer += $output->writeFieldEnd(); + } + + if ($this->key_value_metadata !== null) { + if (!is_array($this->key_value_metadata)) { + throw new TProtocolException('Bad type in structure.', TProtocolException::INVALID_DATA); + } + $xfer += $output->writeFieldBegin('key_value_metadata', TType::LST, 5); + $output->writeListBegin(TType::STRUCT, count($this->key_value_metadata)); + + foreach ($this->key_value_metadata as $iter145) { + $xfer += $iter145->write($output); + } + $output->writeListEnd(); + $xfer += $output->writeFieldEnd(); + } + + if ($this->created_by !== null) { + $xfer += $output->writeFieldBegin('created_by', TType::STRING, 6); + $xfer += $output->writeString($this->created_by); + $xfer += $output->writeFieldEnd(); + } + + if ($this->column_orders !== null) { + if (!is_array($this->column_orders)) { + throw new TProtocolException('Bad type in structure.', TProtocolException::INVALID_DATA); + } + $xfer += $output->writeFieldBegin('column_orders', TType::LST, 7); + $output->writeListBegin(TType::STRUCT, count($this->column_orders)); + + foreach ($this->column_orders as $iter146) { + $xfer += $iter146->write($output); + } + $output->writeListEnd(); + $xfer += $output->writeFieldEnd(); + } + + if ($this->encryption_algorithm !== null) { + if (!is_object($this->encryption_algorithm)) { + throw new TProtocolException('Bad type in structure.', TProtocolException::INVALID_DATA); + } + $xfer += $output->writeFieldBegin('encryption_algorithm', TType::STRUCT, 8); + $xfer += $this->encryption_algorithm->write($output); + $xfer += $output->writeFieldEnd(); + } + + if ($this->footer_signing_key_metadata !== null) { + $xfer += $output->writeFieldBegin('footer_signing_key_metadata', TType::STRING, 9); + $xfer += $output->writeString($this->footer_signing_key_metadata); + $xfer += $output->writeFieldEnd(); + } + $xfer += $output->writeFieldStop(); + $xfer += $output->writeStructEnd(); + + return $xfer; } } diff --git a/src/lib/parquet/src/Flow/Parquet/Thrift/Float16Type.php b/src/lib/parquet/src/Flow/Parquet/Thrift/Float16Type.php new file mode 100644 index 000000000..3c13572f3 --- /dev/null +++ b/src/lib/parquet/src/Flow/Parquet/Thrift/Float16Type.php @@ -0,0 +1,68 @@ +readStructBegin($fname); + + while (true) { + $xfer += $input->readFieldBegin($fname, $ftype, $fid); + + if ($ftype == TType::STOP) { + break; + } + + switch ($fid) { + default: + $xfer += $input->skip($ftype); + + break; + } + $xfer += $input->readFieldEnd(); + } + $xfer += $input->readStructEnd(); + + return $xfer; + } + + public function write($output) + { + $xfer = 0; + $xfer += $output->writeStructBegin('Float16Type'); + $xfer += $output->writeFieldStop(); + $xfer += $output->writeStructEnd(); + + return $xfer; + } +} diff --git a/src/lib/parquet/src/Flow/Parquet/Thrift/IndexPageHeader.php b/src/lib/parquet/src/Flow/Parquet/Thrift/IndexPageHeader.php index 48d289a94..a1092902a 100644 --- a/src/lib/parquet/src/Flow/Parquet/Thrift/IndexPageHeader.php +++ b/src/lib/parquet/src/Flow/Parquet/Thrift/IndexPageHeader.php @@ -4,15 +4,15 @@ namespace Flow\Parquet\Thrift; /** - * Autogenerated by Thrift Compiler (0.19.0). + * Autogenerated by Thrift Compiler (0.18.1). * * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING * * @generated */ -use Thrift\Base\TBase; +use Thrift\Type\{TType}; -class IndexPageHeader extends TBase +class IndexPageHeader { public static $_TSPEC = [ ]; @@ -30,11 +30,39 @@ public function getName() public function read($input) { - return $this->_read('IndexPageHeader', self::$_TSPEC, $input); + $xfer = 0; + $fname = null; + $ftype = 0; + $fid = 0; + $xfer += $input->readStructBegin($fname); + + while (true) { + $xfer += $input->readFieldBegin($fname, $ftype, $fid); + + if ($ftype == TType::STOP) { + break; + } + + switch ($fid) { + default: + $xfer += $input->skip($ftype); + + break; + } + $xfer += $input->readFieldEnd(); + } + $xfer += $input->readStructEnd(); + + return $xfer; } public function write($output) { - return $this->_write('IndexPageHeader', self::$_TSPEC, $output); + $xfer = 0; + $xfer += $output->writeStructBegin('IndexPageHeader'); + $xfer += $output->writeFieldStop(); + $xfer += $output->writeStructEnd(); + + return $xfer; } } diff --git a/src/lib/parquet/src/Flow/Parquet/Thrift/IntType.php b/src/lib/parquet/src/Flow/Parquet/Thrift/IntType.php index 56b70fdb3..02eee1a91 100644 --- a/src/lib/parquet/src/Flow/Parquet/Thrift/IntType.php +++ b/src/lib/parquet/src/Flow/Parquet/Thrift/IntType.php @@ -4,14 +4,13 @@ namespace Flow\Parquet\Thrift; /** - * Autogenerated by Thrift Compiler (0.19.0). + * Autogenerated by Thrift Compiler (0.18.1). * * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING * * @generated */ -use Thrift\Base\TBase; -use Thrift\Type\TType; +use Thrift\Type\{TType}; /** * Integer logical type annotation. @@ -20,7 +19,7 @@ * * Allowed for physical types: INT32, INT64 */ -class IntType extends TBase +class IntType { public static $_TSPEC = [ 1 => [ @@ -49,8 +48,14 @@ class IntType extends TBase public function __construct($vals = null) { - if (\is_array($vals)) { - parent::__construct(self::$_TSPEC, $vals); + if (is_array($vals)) { + if (isset($vals['bitWidth'])) { + $this->bitWidth = $vals['bitWidth']; + } + + if (isset($vals['isSigned'])) { + $this->isSigned = $vals['isSigned']; + } } } @@ -61,11 +66,68 @@ public function getName() public function read($input) { - return $this->_read('IntType', self::$_TSPEC, $input); + $xfer = 0; + $fname = null; + $ftype = 0; + $fid = 0; + $xfer += $input->readStructBegin($fname); + + while (true) { + $xfer += $input->readFieldBegin($fname, $ftype, $fid); + + if ($ftype == TType::STOP) { + break; + } + + switch ($fid) { + case 1: + if ($ftype == TType::BYTE) { + $xfer += $input->readByte($this->bitWidth); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 2: + if ($ftype == TType::BOOL) { + $xfer += $input->readBool($this->isSigned); + } else { + $xfer += $input->skip($ftype); + } + + break; + + default: + $xfer += $input->skip($ftype); + + break; + } + $xfer += $input->readFieldEnd(); + } + $xfer += $input->readStructEnd(); + + return $xfer; } public function write($output) { - return $this->_write('IntType', self::$_TSPEC, $output); + $xfer = 0; + $xfer += $output->writeStructBegin('IntType'); + + if ($this->bitWidth !== null) { + $xfer += $output->writeFieldBegin('bitWidth', TType::BYTE, 1); + $xfer += $output->writeByte($this->bitWidth); + $xfer += $output->writeFieldEnd(); + } + + if ($this->isSigned !== null) { + $xfer += $output->writeFieldBegin('isSigned', TType::BOOL, 2); + $xfer += $output->writeBool($this->isSigned); + $xfer += $output->writeFieldEnd(); + } + $xfer += $output->writeFieldStop(); + $xfer += $output->writeStructEnd(); + + return $xfer; } } diff --git a/src/lib/parquet/src/Flow/Parquet/Thrift/JsonType.php b/src/lib/parquet/src/Flow/Parquet/Thrift/JsonType.php index cbb5bf429..093af78d7 100644 --- a/src/lib/parquet/src/Flow/Parquet/Thrift/JsonType.php +++ b/src/lib/parquet/src/Flow/Parquet/Thrift/JsonType.php @@ -4,20 +4,20 @@ namespace Flow\Parquet\Thrift; /** - * Autogenerated by Thrift Compiler (0.19.0). + * Autogenerated by Thrift Compiler (0.18.1). * * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING * * @generated */ -use Thrift\Base\TBase; +use Thrift\Type\{TType}; /** * Embedded JSON logical type annotation. * - * Allowed for physical types: BINARY + * Allowed for physical types: BYTE_ARRAY */ -class JsonType extends TBase +class JsonType { public static $_TSPEC = [ ]; @@ -35,11 +35,39 @@ public function getName() public function read($input) { - return $this->_read('JsonType', self::$_TSPEC, $input); + $xfer = 0; + $fname = null; + $ftype = 0; + $fid = 0; + $xfer += $input->readStructBegin($fname); + + while (true) { + $xfer += $input->readFieldBegin($fname, $ftype, $fid); + + if ($ftype == TType::STOP) { + break; + } + + switch ($fid) { + default: + $xfer += $input->skip($ftype); + + break; + } + $xfer += $input->readFieldEnd(); + } + $xfer += $input->readStructEnd(); + + return $xfer; } public function write($output) { - return $this->_write('JsonType', self::$_TSPEC, $output); + $xfer = 0; + $xfer += $output->writeStructBegin('JsonType'); + $xfer += $output->writeFieldStop(); + $xfer += $output->writeStructEnd(); + + return $xfer; } } diff --git a/src/lib/parquet/src/Flow/Parquet/Thrift/KeyValue.php b/src/lib/parquet/src/Flow/Parquet/Thrift/KeyValue.php index a247969aa..0a4070b14 100644 --- a/src/lib/parquet/src/Flow/Parquet/Thrift/KeyValue.php +++ b/src/lib/parquet/src/Flow/Parquet/Thrift/KeyValue.php @@ -4,19 +4,18 @@ namespace Flow\Parquet\Thrift; /** - * Autogenerated by Thrift Compiler (0.19.0). + * Autogenerated by Thrift Compiler (0.18.1). * * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING * * @generated */ -use Thrift\Base\TBase; -use Thrift\Type\TType; +use Thrift\Type\{TType}; /** * Wrapper struct to store key values. */ -class KeyValue extends TBase +class KeyValue { public static $_TSPEC = [ 1 => [ @@ -45,8 +44,14 @@ class KeyValue extends TBase public function __construct($vals = null) { - if (\is_array($vals)) { - parent::__construct(self::$_TSPEC, $vals); + if (is_array($vals)) { + if (isset($vals['key'])) { + $this->key = $vals['key']; + } + + if (isset($vals['value'])) { + $this->value = $vals['value']; + } } } @@ -57,11 +62,68 @@ public function getName() public function read($input) { - return $this->_read('KeyValue', self::$_TSPEC, $input); + $xfer = 0; + $fname = null; + $ftype = 0; + $fid = 0; + $xfer += $input->readStructBegin($fname); + + while (true) { + $xfer += $input->readFieldBegin($fname, $ftype, $fid); + + if ($ftype == TType::STOP) { + break; + } + + switch ($fid) { + case 1: + if ($ftype == TType::STRING) { + $xfer += $input->readString($this->key); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 2: + if ($ftype == TType::STRING) { + $xfer += $input->readString($this->value); + } else { + $xfer += $input->skip($ftype); + } + + break; + + default: + $xfer += $input->skip($ftype); + + break; + } + $xfer += $input->readFieldEnd(); + } + $xfer += $input->readStructEnd(); + + return $xfer; } public function write($output) { - return $this->_write('KeyValue', self::$_TSPEC, $output); + $xfer = 0; + $xfer += $output->writeStructBegin('KeyValue'); + + if ($this->key !== null) { + $xfer += $output->writeFieldBegin('key', TType::STRING, 1); + $xfer += $output->writeString($this->key); + $xfer += $output->writeFieldEnd(); + } + + if ($this->value !== null) { + $xfer += $output->writeFieldBegin('value', TType::STRING, 2); + $xfer += $output->writeString($this->value); + $xfer += $output->writeFieldEnd(); + } + $xfer += $output->writeFieldStop(); + $xfer += $output->writeStructEnd(); + + return $xfer; } } diff --git a/src/lib/parquet/src/Flow/Parquet/Thrift/ListType.php b/src/lib/parquet/src/Flow/Parquet/Thrift/ListType.php index 4f1bbfc0b..f2d04797a 100644 --- a/src/lib/parquet/src/Flow/Parquet/Thrift/ListType.php +++ b/src/lib/parquet/src/Flow/Parquet/Thrift/ListType.php @@ -4,15 +4,15 @@ namespace Flow\Parquet\Thrift; /** - * Autogenerated by Thrift Compiler (0.19.0). + * Autogenerated by Thrift Compiler (0.18.1). * * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING * * @generated */ -use Thrift\Base\TBase; +use Thrift\Type\{TType}; -class ListType extends TBase +class ListType { public static $_TSPEC = [ ]; @@ -30,11 +30,39 @@ public function getName() public function read($input) { - return $this->_read('ListType', self::$_TSPEC, $input); + $xfer = 0; + $fname = null; + $ftype = 0; + $fid = 0; + $xfer += $input->readStructBegin($fname); + + while (true) { + $xfer += $input->readFieldBegin($fname, $ftype, $fid); + + if ($ftype == TType::STOP) { + break; + } + + switch ($fid) { + default: + $xfer += $input->skip($ftype); + + break; + } + $xfer += $input->readFieldEnd(); + } + $xfer += $input->readStructEnd(); + + return $xfer; } public function write($output) { - return $this->_write('ListType', self::$_TSPEC, $output); + $xfer = 0; + $xfer += $output->writeStructBegin('ListType'); + $xfer += $output->writeFieldStop(); + $xfer += $output->writeStructEnd(); + + return $xfer; } } diff --git a/src/lib/parquet/src/Flow/Parquet/Thrift/LogicalType.php b/src/lib/parquet/src/Flow/Parquet/Thrift/LogicalType.php index b427d72b0..241278aba 100644 --- a/src/lib/parquet/src/Flow/Parquet/Thrift/LogicalType.php +++ b/src/lib/parquet/src/Flow/Parquet/Thrift/LogicalType.php @@ -4,14 +4,14 @@ namespace Flow\Parquet\Thrift; /** - * Autogenerated by Thrift Compiler (0.19.0). + * Autogenerated by Thrift Compiler (0.18.1). * * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING * * @generated */ -use Thrift\Base\TBase; -use Thrift\Type\TType; +use Thrift\Exception\{TProtocolException}; +use Thrift\Type\{TType}; /** * LogicalType annotations to replace ConvertedType. @@ -20,7 +20,7 @@ * SchemaElement must also set the corresponding ConvertedType (if any) * from the following table. */ -class LogicalType extends TBase +class LogicalType { public static $_TSPEC = [ 1 => [ @@ -101,6 +101,12 @@ class LogicalType extends TBase 'type' => TType::STRUCT, 'class' => '\Flow\Parquet\Thrift\UUIDType', ], + 15 => [ + 'var' => 'FLOAT16', + 'isRequired' => false, + 'type' => TType::STRUCT, + 'class' => '\Flow\Parquet\Thrift\Float16Type', + ], ]; public static $isValidate = false; @@ -125,6 +131,11 @@ class LogicalType extends TBase */ public $ENUM; + /** + * @var Float16Type + */ + public $FLOAT16; + /** * @var IntType */ @@ -172,8 +183,62 @@ class LogicalType extends TBase public function __construct($vals = null) { - if (\is_array($vals)) { - parent::__construct(self::$_TSPEC, $vals); + if (is_array($vals)) { + if (isset($vals['STRING'])) { + $this->STRING = $vals['STRING']; + } + + if (isset($vals['MAP'])) { + $this->MAP = $vals['MAP']; + } + + if (isset($vals['LIST'])) { + $this->LIST = $vals['LIST']; + } + + if (isset($vals['ENUM'])) { + $this->ENUM = $vals['ENUM']; + } + + if (isset($vals['DECIMAL'])) { + $this->DECIMAL = $vals['DECIMAL']; + } + + if (isset($vals['DATE'])) { + $this->DATE = $vals['DATE']; + } + + if (isset($vals['TIME'])) { + $this->TIME = $vals['TIME']; + } + + if (isset($vals['TIMESTAMP'])) { + $this->TIMESTAMP = $vals['TIMESTAMP']; + } + + if (isset($vals['INTEGER'])) { + $this->INTEGER = $vals['INTEGER']; + } + + if (isset($vals['UNKNOWN'])) { + $this->UNKNOWN = $vals['UNKNOWN']; + } + + if (isset($vals['JSON'])) { + $this->JSON = $vals['JSON']; + } + + if (isset($vals['BSON'])) { + $this->BSON = $vals['BSON']; + } + + if (isset($vals['UUID'])) { + $this->UUID = $vals['UUID']; + } + + if (isset($vals['FLOAT16'])) { + $this->FLOAT16 = $vals['FLOAT16']; + } } } @@ -184,11 +249,292 @@ public function getName() public function read($input) { - return $this->_read('LogicalType', self::$_TSPEC, $input); + $xfer = 0; + $fname = null; + $ftype = 0; + $fid = 0; + $xfer += $input->readStructBegin($fname); + + while (true) { + $xfer += $input->readFieldBegin($fname, $ftype, $fid); + + if ($ftype == TType::STOP) { + break; + } + + switch ($fid) { + case 1: + if ($ftype == TType::STRUCT) { + $this->STRING = new StringType(); + $xfer += $this->STRING->read($input); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 2: + if ($ftype == TType::STRUCT) { + $this->MAP = new MapType(); + $xfer += $this->MAP->read($input); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 3: + if ($ftype == TType::STRUCT) { + $this->LIST = new ListType(); + $xfer += $this->LIST->read($input); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 4: + if ($ftype == TType::STRUCT) { + $this->ENUM = new EnumType(); + $xfer += $this->ENUM->read($input); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 5: + if ($ftype == TType::STRUCT) { + $this->DECIMAL = new DecimalType(); + $xfer += $this->DECIMAL->read($input); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 6: + if ($ftype == TType::STRUCT) { + $this->DATE = new DateType(); + $xfer += $this->DATE->read($input); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 7: + if ($ftype == TType::STRUCT) { + $this->TIME = new TimeType(); + $xfer += $this->TIME->read($input); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 8: + if ($ftype == TType::STRUCT) { + $this->TIMESTAMP = new TimestampType(); + $xfer += $this->TIMESTAMP->read($input); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 10: + if ($ftype == TType::STRUCT) { + $this->INTEGER = new IntType(); + $xfer += $this->INTEGER->read($input); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 11: + if ($ftype == TType::STRUCT) { + $this->UNKNOWN = new NullType(); + $xfer += $this->UNKNOWN->read($input); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 12: + if ($ftype == TType::STRUCT) { + $this->JSON = new JsonType(); + $xfer += $this->JSON->read($input); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 13: + if ($ftype == TType::STRUCT) { + $this->BSON = new BsonType(); + $xfer += $this->BSON->read($input); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 14: + if ($ftype == TType::STRUCT) { + $this->UUID = new UUIDType(); + $xfer += $this->UUID->read($input); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 15: + if ($ftype == TType::STRUCT) { + $this->FLOAT16 = new Float16Type(); + $xfer += $this->FLOAT16->read($input); + } else { + $xfer += $input->skip($ftype); + } + + break; + + default: + $xfer += $input->skip($ftype); + + break; + } + $xfer += $input->readFieldEnd(); + } + $xfer += $input->readStructEnd(); + + return $xfer; } public function write($output) { - return $this->_write('LogicalType', self::$_TSPEC, $output); + $xfer = 0; + $xfer += $output->writeStructBegin('LogicalType'); + + if ($this->STRING !== null) { + if (!is_object($this->STRING)) { + throw new TProtocolException('Bad type in structure.', TProtocolException::INVALID_DATA); + } + $xfer += $output->writeFieldBegin('STRING', TType::STRUCT, 1); + $xfer += $this->STRING->write($output); + $xfer += $output->writeFieldEnd(); + } + + if ($this->MAP !== null) { + if (!is_object($this->MAP)) { + throw new TProtocolException('Bad type in structure.', TProtocolException::INVALID_DATA); + } + $xfer += $output->writeFieldBegin('MAP', TType::STRUCT, 2); + $xfer += $this->MAP->write($output); + $xfer += $output->writeFieldEnd(); + } + + if ($this->LIST !== null) { + if (!is_object($this->LIST)) { + throw new TProtocolException('Bad type in structure.', TProtocolException::INVALID_DATA); + } + $xfer += $output->writeFieldBegin('LIST', TType::STRUCT, 3); + $xfer += $this->LIST->write($output); + $xfer += $output->writeFieldEnd(); + } + + if ($this->ENUM !== null) { + if (!is_object($this->ENUM)) { + throw new TProtocolException('Bad type in structure.', TProtocolException::INVALID_DATA); + } + $xfer += $output->writeFieldBegin('ENUM', TType::STRUCT, 4); + $xfer += $this->ENUM->write($output); + $xfer += $output->writeFieldEnd(); + } + + if ($this->DECIMAL !== null) { + if (!is_object($this->DECIMAL)) { + throw new TProtocolException('Bad type in structure.', TProtocolException::INVALID_DATA); + } + $xfer += $output->writeFieldBegin('DECIMAL', TType::STRUCT, 5); + $xfer += $this->DECIMAL->write($output); + $xfer += $output->writeFieldEnd(); + } + + if ($this->DATE !== null) { + if (!is_object($this->DATE)) { + throw new TProtocolException('Bad type in structure.', TProtocolException::INVALID_DATA); + } + $xfer += $output->writeFieldBegin('DATE', TType::STRUCT, 6); + $xfer += $this->DATE->write($output); + $xfer += $output->writeFieldEnd(); + } + + if ($this->TIME !== null) { + if (!is_object($this->TIME)) { + throw new TProtocolException('Bad type in structure.', TProtocolException::INVALID_DATA); + } + $xfer += $output->writeFieldBegin('TIME', TType::STRUCT, 7); + $xfer += $this->TIME->write($output); + $xfer += $output->writeFieldEnd(); + } + + if ($this->TIMESTAMP !== null) { + if (!is_object($this->TIMESTAMP)) { + throw new TProtocolException('Bad type in structure.', TProtocolException::INVALID_DATA); + } + $xfer += $output->writeFieldBegin('TIMESTAMP', TType::STRUCT, 8); + $xfer += $this->TIMESTAMP->write($output); + $xfer += $output->writeFieldEnd(); + } + + if ($this->INTEGER !== null) { + if (!is_object($this->INTEGER)) { + throw new TProtocolException('Bad type in structure.', TProtocolException::INVALID_DATA); + } + $xfer += $output->writeFieldBegin('INTEGER', TType::STRUCT, 10); + $xfer += $this->INTEGER->write($output); + $xfer += $output->writeFieldEnd(); + } + + if ($this->UNKNOWN !== null) { + if (!is_object($this->UNKNOWN)) { + throw new TProtocolException('Bad type in structure.', TProtocolException::INVALID_DATA); + } + $xfer += $output->writeFieldBegin('UNKNOWN', TType::STRUCT, 11); + $xfer += $this->UNKNOWN->write($output); + $xfer += $output->writeFieldEnd(); + } + + if ($this->JSON !== null) { + if (!is_object($this->JSON)) { + throw new TProtocolException('Bad type in structure.', TProtocolException::INVALID_DATA); + } + $xfer += $output->writeFieldBegin('JSON', TType::STRUCT, 12); + $xfer += $this->JSON->write($output); + $xfer += $output->writeFieldEnd(); + } + + if ($this->BSON !== null) { + if (!is_object($this->BSON)) { + throw new TProtocolException('Bad type in structure.', TProtocolException::INVALID_DATA); + } + $xfer += $output->writeFieldBegin('BSON', TType::STRUCT, 13); + $xfer += $this->BSON->write($output); + $xfer += $output->writeFieldEnd(); + } + + if ($this->UUID !== null) { + if (!is_object($this->UUID)) { + throw new TProtocolException('Bad type in structure.', TProtocolException::INVALID_DATA); + } + $xfer += $output->writeFieldBegin('UUID', TType::STRUCT, 14); + $xfer += $this->UUID->write($output); + $xfer += $output->writeFieldEnd(); + } + + if ($this->FLOAT16 !== null) { + if (!is_object($this->FLOAT16)) { + throw new TProtocolException('Bad type in structure.', TProtocolException::INVALID_DATA); + } + $xfer += $output->writeFieldBegin('FLOAT16', TType::STRUCT, 15); + $xfer += $this->FLOAT16->write($output); + $xfer += $output->writeFieldEnd(); + } + $xfer += $output->writeFieldStop(); + $xfer += $output->writeStructEnd(); + + return $xfer; } } diff --git a/src/lib/parquet/src/Flow/Parquet/Thrift/MapType.php b/src/lib/parquet/src/Flow/Parquet/Thrift/MapType.php index 117d1f447..f37506156 100644 --- a/src/lib/parquet/src/Flow/Parquet/Thrift/MapType.php +++ b/src/lib/parquet/src/Flow/Parquet/Thrift/MapType.php @@ -4,15 +4,15 @@ namespace Flow\Parquet\Thrift; /** - * Autogenerated by Thrift Compiler (0.19.0). + * Autogenerated by Thrift Compiler (0.18.1). * * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING * * @generated */ -use Thrift\Base\TBase; +use Thrift\Type\{TType}; -class MapType extends TBase +class MapType { public static $_TSPEC = [ ]; @@ -30,11 +30,39 @@ public function getName() public function read($input) { - return $this->_read('MapType', self::$_TSPEC, $input); + $xfer = 0; + $fname = null; + $ftype = 0; + $fid = 0; + $xfer += $input->readStructBegin($fname); + + while (true) { + $xfer += $input->readFieldBegin($fname, $ftype, $fid); + + if ($ftype == TType::STOP) { + break; + } + + switch ($fid) { + default: + $xfer += $input->skip($ftype); + + break; + } + $xfer += $input->readFieldEnd(); + } + $xfer += $input->readStructEnd(); + + return $xfer; } public function write($output) { - return $this->_write('MapType', self::$_TSPEC, $output); + $xfer = 0; + $xfer += $output->writeStructBegin('MapType'); + $xfer += $output->writeFieldStop(); + $xfer += $output->writeStructEnd(); + + return $xfer; } } diff --git a/src/lib/parquet/src/Flow/Parquet/Thrift/MicroSeconds.php b/src/lib/parquet/src/Flow/Parquet/Thrift/MicroSeconds.php index d6863715a..b3216ce64 100644 --- a/src/lib/parquet/src/Flow/Parquet/Thrift/MicroSeconds.php +++ b/src/lib/parquet/src/Flow/Parquet/Thrift/MicroSeconds.php @@ -4,15 +4,15 @@ namespace Flow\Parquet\Thrift; /** - * Autogenerated by Thrift Compiler (0.19.0). + * Autogenerated by Thrift Compiler (0.18.1). * * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING * * @generated */ -use Thrift\Base\TBase; +use Thrift\Type\{TType}; -class MicroSeconds extends TBase +class MicroSeconds { public static $_TSPEC = [ ]; @@ -30,11 +30,39 @@ public function getName() public function read($input) { - return $this->_read('MicroSeconds', self::$_TSPEC, $input); + $xfer = 0; + $fname = null; + $ftype = 0; + $fid = 0; + $xfer += $input->readStructBegin($fname); + + while (true) { + $xfer += $input->readFieldBegin($fname, $ftype, $fid); + + if ($ftype == TType::STOP) { + break; + } + + switch ($fid) { + default: + $xfer += $input->skip($ftype); + + break; + } + $xfer += $input->readFieldEnd(); + } + $xfer += $input->readStructEnd(); + + return $xfer; } public function write($output) { - return $this->_write('MicroSeconds', self::$_TSPEC, $output); + $xfer = 0; + $xfer += $output->writeStructBegin('MicroSeconds'); + $xfer += $output->writeFieldStop(); + $xfer += $output->writeStructEnd(); + + return $xfer; } } diff --git a/src/lib/parquet/src/Flow/Parquet/Thrift/MilliSeconds.php b/src/lib/parquet/src/Flow/Parquet/Thrift/MilliSeconds.php index d57b90732..2d025e317 100644 --- a/src/lib/parquet/src/Flow/Parquet/Thrift/MilliSeconds.php +++ b/src/lib/parquet/src/Flow/Parquet/Thrift/MilliSeconds.php @@ -4,18 +4,18 @@ namespace Flow\Parquet\Thrift; /** - * Autogenerated by Thrift Compiler (0.19.0). + * Autogenerated by Thrift Compiler (0.18.1). * * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING * * @generated */ -use Thrift\Base\TBase; +use Thrift\Type\{TType}; /** * Time units for logical types. */ -class MilliSeconds extends TBase +class MilliSeconds { public static $_TSPEC = [ ]; @@ -33,11 +33,39 @@ public function getName() public function read($input) { - return $this->_read('MilliSeconds', self::$_TSPEC, $input); + $xfer = 0; + $fname = null; + $ftype = 0; + $fid = 0; + $xfer += $input->readStructBegin($fname); + + while (true) { + $xfer += $input->readFieldBegin($fname, $ftype, $fid); + + if ($ftype == TType::STOP) { + break; + } + + switch ($fid) { + default: + $xfer += $input->skip($ftype); + + break; + } + $xfer += $input->readFieldEnd(); + } + $xfer += $input->readStructEnd(); + + return $xfer; } public function write($output) { - return $this->_write('MilliSeconds', self::$_TSPEC, $output); + $xfer = 0; + $xfer += $output->writeStructBegin('MilliSeconds'); + $xfer += $output->writeFieldStop(); + $xfer += $output->writeStructEnd(); + + return $xfer; } } diff --git a/src/lib/parquet/src/Flow/Parquet/Thrift/NanoSeconds.php b/src/lib/parquet/src/Flow/Parquet/Thrift/NanoSeconds.php index 53b682b39..d9bde3dad 100644 --- a/src/lib/parquet/src/Flow/Parquet/Thrift/NanoSeconds.php +++ b/src/lib/parquet/src/Flow/Parquet/Thrift/NanoSeconds.php @@ -4,15 +4,15 @@ namespace Flow\Parquet\Thrift; /** - * Autogenerated by Thrift Compiler (0.19.0). + * Autogenerated by Thrift Compiler (0.18.1). * * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING * * @generated */ -use Thrift\Base\TBase; +use Thrift\Type\{TType}; -class NanoSeconds extends TBase +class NanoSeconds { public static $_TSPEC = [ ]; @@ -30,11 +30,39 @@ public function getName() public function read($input) { - return $this->_read('NanoSeconds', self::$_TSPEC, $input); + $xfer = 0; + $fname = null; + $ftype = 0; + $fid = 0; + $xfer += $input->readStructBegin($fname); + + while (true) { + $xfer += $input->readFieldBegin($fname, $ftype, $fid); + + if ($ftype == TType::STOP) { + break; + } + + switch ($fid) { + default: + $xfer += $input->skip($ftype); + + break; + } + $xfer += $input->readFieldEnd(); + } + $xfer += $input->readStructEnd(); + + return $xfer; } public function write($output) { - return $this->_write('NanoSeconds', self::$_TSPEC, $output); + $xfer = 0; + $xfer += $output->writeStructBegin('NanoSeconds'); + $xfer += $output->writeFieldStop(); + $xfer += $output->writeStructEnd(); + + return $xfer; } } diff --git a/src/lib/parquet/src/Flow/Parquet/Thrift/NullType.php b/src/lib/parquet/src/Flow/Parquet/Thrift/NullType.php index 680965860..26787020d 100644 --- a/src/lib/parquet/src/Flow/Parquet/Thrift/NullType.php +++ b/src/lib/parquet/src/Flow/Parquet/Thrift/NullType.php @@ -4,13 +4,13 @@ namespace Flow\Parquet\Thrift; /** - * Autogenerated by Thrift Compiler (0.19.0). + * Autogenerated by Thrift Compiler (0.18.1). * * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING * * @generated */ -use Thrift\Base\TBase; +use Thrift\Type\{TType}; /** * Logical type to annotate a column that is always null. @@ -19,7 +19,7 @@ * null and the physical type can't be determined. This annotation signals * the case where the physical type was guessed from all null values. */ -class NullType extends TBase +class NullType { public static $_TSPEC = [ ]; @@ -37,11 +37,39 @@ public function getName() public function read($input) { - return $this->_read('NullType', self::$_TSPEC, $input); + $xfer = 0; + $fname = null; + $ftype = 0; + $fid = 0; + $xfer += $input->readStructBegin($fname); + + while (true) { + $xfer += $input->readFieldBegin($fname, $ftype, $fid); + + if ($ftype == TType::STOP) { + break; + } + + switch ($fid) { + default: + $xfer += $input->skip($ftype); + + break; + } + $xfer += $input->readFieldEnd(); + } + $xfer += $input->readStructEnd(); + + return $xfer; } public function write($output) { - return $this->_write('NullType', self::$_TSPEC, $output); + $xfer = 0; + $xfer += $output->writeStructBegin('NullType'); + $xfer += $output->writeFieldStop(); + $xfer += $output->writeStructEnd(); + + return $xfer; } } diff --git a/src/lib/parquet/src/Flow/Parquet/Thrift/OffsetIndex.php b/src/lib/parquet/src/Flow/Parquet/Thrift/OffsetIndex.php index 4b1827469..f806df27c 100644 --- a/src/lib/parquet/src/Flow/Parquet/Thrift/OffsetIndex.php +++ b/src/lib/parquet/src/Flow/Parquet/Thrift/OffsetIndex.php @@ -4,16 +4,23 @@ namespace Flow\Parquet\Thrift; /** - * Autogenerated by Thrift Compiler (0.19.0). + * Autogenerated by Thrift Compiler (0.18.1). * * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING * * @generated */ -use Thrift\Base\TBase; -use Thrift\Type\TType; +use Thrift\Exception\{TProtocolException}; +use Thrift\Type\{TType}; -class OffsetIndex extends TBase +/** + * Optional offsets for each data page in a ColumnChunk. + * + * Forms part of the page index, along with ColumnIndex. + * + * OffsetIndex may be present even if ColumnIndex is not. + */ +class OffsetIndex { public static $_TSPEC = [ 1 => [ @@ -26,6 +33,15 @@ class OffsetIndex extends TBase 'class' => '\Flow\Parquet\Thrift\PageLocation', ], ], + 2 => [ + 'var' => 'unencoded_byte_array_data_bytes', + 'isRequired' => false, + 'type' => TType::LST, + 'etype' => TType::I64, + 'elem' => [ + 'type' => TType::I64, + ], + ], ]; public static $isValidate = false; @@ -34,14 +50,30 @@ class OffsetIndex extends TBase * PageLocations, ordered by increasing PageLocation.offset. It is required * that page_locations[i].first_row_index < page_locations[i+1].first_row_index. * - * @var PageLocation[] + * @var \Flow\Parquet\Thrift\PageLocation[] */ public $page_locations; + /** + * Unencoded/uncompressed size for BYTE_ARRAY types. + * + * See documention for unencoded_byte_array_data_bytes in SizeStatistics for + * more details on this field. + * + * @var int[] + */ + public $unencoded_byte_array_data_bytes; + public function __construct($vals = null) { - if (\is_array($vals)) { - parent::__construct(self::$_TSPEC, $vals); + if (is_array($vals)) { + if (isset($vals['page_locations'])) { + $this->page_locations = $vals['page_locations']; + } + + if (isset($vals['unencoded_byte_array_data_bytes'])) { + $this->unencoded_byte_array_data_bytes = $vals['unencoded_byte_array_data_bytes']; + } } } @@ -52,11 +84,105 @@ public function getName() public function read($input) { - return $this->_read('OffsetIndex', self::$_TSPEC, $input); + $xfer = 0; + $fname = null; + $ftype = 0; + $fid = 0; + $xfer += $input->readStructBegin($fname); + + while (true) { + $xfer += $input->readFieldBegin($fname, $ftype, $fid); + + if ($ftype == TType::STOP) { + break; + } + + switch ($fid) { + case 1: + if ($ftype == TType::LST) { + $this->page_locations = []; + $_size63 = 0; + $_etype66 = 0; + $xfer += $input->readListBegin($_etype66, $_size63); + + for ($_i67 = 0; $_i67 < $_size63; $_i67++) { + $elem68 = null; + $elem68 = new PageLocation(); + $xfer += $elem68->read($input); + $this->page_locations[] = $elem68; + } + $xfer += $input->readListEnd(); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 2: + if ($ftype == TType::LST) { + $this->unencoded_byte_array_data_bytes = []; + $_size69 = 0; + $_etype72 = 0; + $xfer += $input->readListBegin($_etype72, $_size69); + + for ($_i73 = 0; $_i73 < $_size69; $_i73++) { + $elem74 = null; + $xfer += $input->readI64($elem74); + $this->unencoded_byte_array_data_bytes[] = $elem74; + } + $xfer += $input->readListEnd(); + } else { + $xfer += $input->skip($ftype); + } + + break; + + default: + $xfer += $input->skip($ftype); + + break; + } + $xfer += $input->readFieldEnd(); + } + $xfer += $input->readStructEnd(); + + return $xfer; } public function write($output) { - return $this->_write('OffsetIndex', self::$_TSPEC, $output); + $xfer = 0; + $xfer += $output->writeStructBegin('OffsetIndex'); + + if ($this->page_locations !== null) { + if (!is_array($this->page_locations)) { + throw new TProtocolException('Bad type in structure.', TProtocolException::INVALID_DATA); + } + $xfer += $output->writeFieldBegin('page_locations', TType::LST, 1); + $output->writeListBegin(TType::STRUCT, count($this->page_locations)); + + foreach ($this->page_locations as $iter75) { + $xfer += $iter75->write($output); + } + $output->writeListEnd(); + $xfer += $output->writeFieldEnd(); + } + + if ($this->unencoded_byte_array_data_bytes !== null) { + if (!is_array($this->unencoded_byte_array_data_bytes)) { + throw new TProtocolException('Bad type in structure.', TProtocolException::INVALID_DATA); + } + $xfer += $output->writeFieldBegin('unencoded_byte_array_data_bytes', TType::LST, 2); + $output->writeListBegin(TType::I64, count($this->unencoded_byte_array_data_bytes)); + + foreach ($this->unencoded_byte_array_data_bytes as $iter76) { + $xfer += $output->writeI64($iter76); + } + $output->writeListEnd(); + $xfer += $output->writeFieldEnd(); + } + $xfer += $output->writeFieldStop(); + $xfer += $output->writeStructEnd(); + + return $xfer; } } diff --git a/src/lib/parquet/src/Flow/Parquet/Thrift/PageEncodingStats.php b/src/lib/parquet/src/Flow/Parquet/Thrift/PageEncodingStats.php index 9477952cd..5770d21b9 100644 --- a/src/lib/parquet/src/Flow/Parquet/Thrift/PageEncodingStats.php +++ b/src/lib/parquet/src/Flow/Parquet/Thrift/PageEncodingStats.php @@ -4,19 +4,18 @@ namespace Flow\Parquet\Thrift; /** - * Autogenerated by Thrift Compiler (0.19.0). + * Autogenerated by Thrift Compiler (0.18.1). * * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING * * @generated */ -use Thrift\Base\TBase; -use Thrift\Type\TType; +use Thrift\Type\{TType}; /** * statistics of a given page type and encoding. */ -class PageEncodingStats extends TBase +class PageEncodingStats { public static $_TSPEC = [ 1 => [ @@ -63,8 +62,18 @@ class PageEncodingStats extends TBase public function __construct($vals = null) { - if (\is_array($vals)) { - parent::__construct(self::$_TSPEC, $vals); + if (is_array($vals)) { + if (isset($vals['page_type'])) { + $this->page_type = $vals['page_type']; + } + + if (isset($vals['encoding'])) { + $this->encoding = $vals['encoding']; + } + + if (isset($vals['count'])) { + $this->count = $vals['count']; + } } } @@ -75,11 +84,82 @@ public function getName() public function read($input) { - return $this->_read('PageEncodingStats', self::$_TSPEC, $input); + $xfer = 0; + $fname = null; + $ftype = 0; + $fid = 0; + $xfer += $input->readStructBegin($fname); + + while (true) { + $xfer += $input->readFieldBegin($fname, $ftype, $fid); + + if ($ftype == TType::STOP) { + break; + } + + switch ($fid) { + case 1: + if ($ftype == TType::I32) { + $xfer += $input->readI32($this->page_type); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 2: + if ($ftype == TType::I32) { + $xfer += $input->readI32($this->encoding); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 3: + if ($ftype == TType::I32) { + $xfer += $input->readI32($this->count); + } else { + $xfer += $input->skip($ftype); + } + + break; + + default: + $xfer += $input->skip($ftype); + + break; + } + $xfer += $input->readFieldEnd(); + } + $xfer += $input->readStructEnd(); + + return $xfer; } public function write($output) { - return $this->_write('PageEncodingStats', self::$_TSPEC, $output); + $xfer = 0; + $xfer += $output->writeStructBegin('PageEncodingStats'); + + if ($this->page_type !== null) { + $xfer += $output->writeFieldBegin('page_type', TType::I32, 1); + $xfer += $output->writeI32($this->page_type); + $xfer += $output->writeFieldEnd(); + } + + if ($this->encoding !== null) { + $xfer += $output->writeFieldBegin('encoding', TType::I32, 2); + $xfer += $output->writeI32($this->encoding); + $xfer += $output->writeFieldEnd(); + } + + if ($this->count !== null) { + $xfer += $output->writeFieldBegin('count', TType::I32, 3); + $xfer += $output->writeI32($this->count); + $xfer += $output->writeFieldEnd(); + } + $xfer += $output->writeFieldStop(); + $xfer += $output->writeStructEnd(); + + return $xfer; } } diff --git a/src/lib/parquet/src/Flow/Parquet/Thrift/PageHeader.php b/src/lib/parquet/src/Flow/Parquet/Thrift/PageHeader.php index 8e4ad7d3e..2bcd6df54 100644 --- a/src/lib/parquet/src/Flow/Parquet/Thrift/PageHeader.php +++ b/src/lib/parquet/src/Flow/Parquet/Thrift/PageHeader.php @@ -4,16 +4,16 @@ namespace Flow\Parquet\Thrift; /** - * Autogenerated by Thrift Compiler (0.19.0). + * Autogenerated by Thrift Compiler (0.18.1). * * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING * * @generated */ -use Thrift\Base\TBase; -use Thrift\Type\TType; +use Thrift\Exception\{TProtocolException}; +use Thrift\Type\{TType}; -class PageHeader extends TBase +class PageHeader { public static $_TSPEC = [ 1 => [ @@ -73,7 +73,7 @@ class PageHeader extends TBase public $compressed_page_size; /** - * The 32-bit CRC checksum for the page, to be be calculated as follows:. + * The 32-bit CRC checksum for the page, to be be calculated as follows: * * - The standard CRC32 algorithm is used (with polynomial 0x04C11DB7, * the same as in e.g. GZip). @@ -130,8 +130,38 @@ class PageHeader extends TBase public function __construct($vals = null) { - if (\is_array($vals)) { - parent::__construct(self::$_TSPEC, $vals); + if (is_array($vals)) { + if (isset($vals['type'])) { + $this->type = $vals['type']; + } + + if (isset($vals['uncompressed_page_size'])) { + $this->uncompressed_page_size = $vals['uncompressed_page_size']; + } + + if (isset($vals['compressed_page_size'])) { + $this->compressed_page_size = $vals['compressed_page_size']; + } + + if (isset($vals['crc'])) { + $this->crc = $vals['crc']; + } + + if (isset($vals['data_page_header'])) { + $this->data_page_header = $vals['data_page_header']; + } + + if (isset($vals['index_page_header'])) { + $this->index_page_header = $vals['index_page_header']; + } + + if (isset($vals['dictionary_page_header'])) { + $this->dictionary_page_header = $vals['dictionary_page_header']; + } + + if (isset($vals['data_page_header_v2'])) { + $this->data_page_header_v2 = $vals['data_page_header_v2']; + } } } @@ -142,11 +172,168 @@ public function getName() public function read($input) { - return $this->_read('PageHeader', self::$_TSPEC, $input); + $xfer = 0; + $fname = null; + $ftype = 0; + $fid = 0; + $xfer += $input->readStructBegin($fname); + + while (true) { + $xfer += $input->readFieldBegin($fname, $ftype, $fid); + + if ($ftype == TType::STOP) { + break; + } + + switch ($fid) { + case 1: + if ($ftype == TType::I32) { + $xfer += $input->readI32($this->type); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 2: + if ($ftype == TType::I32) { + $xfer += $input->readI32($this->uncompressed_page_size); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 3: + if ($ftype == TType::I32) { + $xfer += $input->readI32($this->compressed_page_size); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 4: + if ($ftype == TType::I32) { + $xfer += $input->readI32($this->crc); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 5: + if ($ftype == TType::STRUCT) { + $this->data_page_header = new DataPageHeader(); + $xfer += $this->data_page_header->read($input); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 6: + if ($ftype == TType::STRUCT) { + $this->index_page_header = new IndexPageHeader(); + $xfer += $this->index_page_header->read($input); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 7: + if ($ftype == TType::STRUCT) { + $this->dictionary_page_header = new DictionaryPageHeader(); + $xfer += $this->dictionary_page_header->read($input); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 8: + if ($ftype == TType::STRUCT) { + $this->data_page_header_v2 = new DataPageHeaderV2(); + $xfer += $this->data_page_header_v2->read($input); + } else { + $xfer += $input->skip($ftype); + } + + break; + + default: + $xfer += $input->skip($ftype); + + break; + } + $xfer += $input->readFieldEnd(); + } + $xfer += $input->readStructEnd(); + + return $xfer; } public function write($output) { - return $this->_write('PageHeader', self::$_TSPEC, $output); + $xfer = 0; + $xfer += $output->writeStructBegin('PageHeader'); + + if ($this->type !== null) { + $xfer += $output->writeFieldBegin('type', TType::I32, 1); + $xfer += $output->writeI32($this->type); + $xfer += $output->writeFieldEnd(); + } + + if ($this->uncompressed_page_size !== null) { + $xfer += $output->writeFieldBegin('uncompressed_page_size', TType::I32, 2); + $xfer += $output->writeI32($this->uncompressed_page_size); + $xfer += $output->writeFieldEnd(); + } + + if ($this->compressed_page_size !== null) { + $xfer += $output->writeFieldBegin('compressed_page_size', TType::I32, 3); + $xfer += $output->writeI32($this->compressed_page_size); + $xfer += $output->writeFieldEnd(); + } + + if ($this->crc !== null) { + $xfer += $output->writeFieldBegin('crc', TType::I32, 4); + $xfer += $output->writeI32($this->crc); + $xfer += $output->writeFieldEnd(); + } + + if ($this->data_page_header !== null) { + if (!is_object($this->data_page_header)) { + throw new TProtocolException('Bad type in structure.', TProtocolException::INVALID_DATA); + } + $xfer += $output->writeFieldBegin('data_page_header', TType::STRUCT, 5); + $xfer += $this->data_page_header->write($output); + $xfer += $output->writeFieldEnd(); + } + + if ($this->index_page_header !== null) { + if (!is_object($this->index_page_header)) { + throw new TProtocolException('Bad type in structure.', TProtocolException::INVALID_DATA); + } + $xfer += $output->writeFieldBegin('index_page_header', TType::STRUCT, 6); + $xfer += $this->index_page_header->write($output); + $xfer += $output->writeFieldEnd(); + } + + if ($this->dictionary_page_header !== null) { + if (!is_object($this->dictionary_page_header)) { + throw new TProtocolException('Bad type in structure.', TProtocolException::INVALID_DATA); + } + $xfer += $output->writeFieldBegin('dictionary_page_header', TType::STRUCT, 7); + $xfer += $this->dictionary_page_header->write($output); + $xfer += $output->writeFieldEnd(); + } + + if ($this->data_page_header_v2 !== null) { + if (!is_object($this->data_page_header_v2)) { + throw new TProtocolException('Bad type in structure.', TProtocolException::INVALID_DATA); + } + $xfer += $output->writeFieldBegin('data_page_header_v2', TType::STRUCT, 8); + $xfer += $this->data_page_header_v2->write($output); + $xfer += $output->writeFieldEnd(); + } + $xfer += $output->writeFieldStop(); + $xfer += $output->writeStructEnd(); + + return $xfer; } } diff --git a/src/lib/parquet/src/Flow/Parquet/Thrift/PageLocation.php b/src/lib/parquet/src/Flow/Parquet/Thrift/PageLocation.php index 1f0ab9d96..a3ce38241 100644 --- a/src/lib/parquet/src/Flow/Parquet/Thrift/PageLocation.php +++ b/src/lib/parquet/src/Flow/Parquet/Thrift/PageLocation.php @@ -4,16 +4,15 @@ namespace Flow\Parquet\Thrift; /** - * Autogenerated by Thrift Compiler (0.19.0). + * Autogenerated by Thrift Compiler (0.18.1). * * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING * * @generated */ -use Thrift\Base\TBase; -use Thrift\Type\TType; +use Thrift\Type\{TType}; -class PageLocation extends TBase +class PageLocation { public static $_TSPEC = [ 1 => [ @@ -44,8 +43,9 @@ class PageLocation extends TBase public $compressed_page_size; /** - * Index within the RowGroup of the first row of the page; this means pages - * change on record boundaries (r = 0). + * Index within the RowGroup of the first row of the page. When an + * OffsetIndex is present, pages must begin on row boundaries + * (repetition_level = 0). * * @var int */ @@ -60,8 +60,18 @@ class PageLocation extends TBase public function __construct($vals = null) { - if (\is_array($vals)) { - parent::__construct(self::$_TSPEC, $vals); + if (is_array($vals)) { + if (isset($vals['offset'])) { + $this->offset = $vals['offset']; + } + + if (isset($vals['compressed_page_size'])) { + $this->compressed_page_size = $vals['compressed_page_size']; + } + + if (isset($vals['first_row_index'])) { + $this->first_row_index = $vals['first_row_index']; + } } } @@ -72,11 +82,82 @@ public function getName() public function read($input) { - return $this->_read('PageLocation', self::$_TSPEC, $input); + $xfer = 0; + $fname = null; + $ftype = 0; + $fid = 0; + $xfer += $input->readStructBegin($fname); + + while (true) { + $xfer += $input->readFieldBegin($fname, $ftype, $fid); + + if ($ftype == TType::STOP) { + break; + } + + switch ($fid) { + case 1: + if ($ftype == TType::I64) { + $xfer += $input->readI64($this->offset); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 2: + if ($ftype == TType::I32) { + $xfer += $input->readI32($this->compressed_page_size); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 3: + if ($ftype == TType::I64) { + $xfer += $input->readI64($this->first_row_index); + } else { + $xfer += $input->skip($ftype); + } + + break; + + default: + $xfer += $input->skip($ftype); + + break; + } + $xfer += $input->readFieldEnd(); + } + $xfer += $input->readStructEnd(); + + return $xfer; } public function write($output) { - return $this->_write('PageLocation', self::$_TSPEC, $output); + $xfer = 0; + $xfer += $output->writeStructBegin('PageLocation'); + + if ($this->offset !== null) { + $xfer += $output->writeFieldBegin('offset', TType::I64, 1); + $xfer += $output->writeI64($this->offset); + $xfer += $output->writeFieldEnd(); + } + + if ($this->compressed_page_size !== null) { + $xfer += $output->writeFieldBegin('compressed_page_size', TType::I32, 2); + $xfer += $output->writeI32($this->compressed_page_size); + $xfer += $output->writeFieldEnd(); + } + + if ($this->first_row_index !== null) { + $xfer += $output->writeFieldBegin('first_row_index', TType::I64, 3); + $xfer += $output->writeI64($this->first_row_index); + $xfer += $output->writeFieldEnd(); + } + $xfer += $output->writeFieldStop(); + $xfer += $output->writeStructEnd(); + + return $xfer; } } diff --git a/src/lib/parquet/src/Flow/Parquet/Thrift/PageType.php b/src/lib/parquet/src/Flow/Parquet/Thrift/PageType.php index 9a8b41fd8..4d7a4e5ee 100644 --- a/src/lib/parquet/src/Flow/Parquet/Thrift/PageType.php +++ b/src/lib/parquet/src/Flow/Parquet/Thrift/PageType.php @@ -4,7 +4,7 @@ namespace Flow\Parquet\Thrift; /** - * Autogenerated by Thrift Compiler (0.19.0). + * Autogenerated by Thrift Compiler (0.18.1). * * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING * diff --git a/src/lib/parquet/src/Flow/Parquet/Thrift/RowGroup.php b/src/lib/parquet/src/Flow/Parquet/Thrift/RowGroup.php index 36baccdbd..25f29a4a6 100644 --- a/src/lib/parquet/src/Flow/Parquet/Thrift/RowGroup.php +++ b/src/lib/parquet/src/Flow/Parquet/Thrift/RowGroup.php @@ -4,16 +4,16 @@ namespace Flow\Parquet\Thrift; /** - * Autogenerated by Thrift Compiler (0.19.0). + * Autogenerated by Thrift Compiler (0.18.1). * * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING * * @generated */ -use Thrift\Base\TBase; -use Thrift\Type\TType; +use Thrift\Exception\{TProtocolException}; +use Thrift\Type\{TType}; -class RowGroup extends TBase +class RowGroup { public static $_TSPEC = [ 1 => [ @@ -69,7 +69,7 @@ class RowGroup extends TBase * Metadata for each column chunk in this row group. * This list must have the same order as the SchemaElement list in FileMetaData. * - * @var ColumnChunk[] + * @var \Flow\Parquet\Thrift\ColumnChunk[] */ public $columns; @@ -99,7 +99,7 @@ class RowGroup extends TBase * If set, specifies a sort ordering of the rows in this RowGroup. * The sorting columns can be a subset of all the columns. * - * @var SortingColumn[] + * @var \Flow\Parquet\Thrift\SortingColumn[] */ public $sorting_columns; @@ -120,8 +120,34 @@ class RowGroup extends TBase public function __construct($vals = null) { - if (\is_array($vals)) { - parent::__construct(self::$_TSPEC, $vals); + if (is_array($vals)) { + if (isset($vals['columns'])) { + $this->columns = $vals['columns']; + } + + if (isset($vals['total_byte_size'])) { + $this->total_byte_size = $vals['total_byte_size']; + } + + if (isset($vals['num_rows'])) { + $this->num_rows = $vals['num_rows']; + } + + if (isset($vals['sorting_columns'])) { + $this->sorting_columns = $vals['sorting_columns']; + } + + if (isset($vals['file_offset'])) { + $this->file_offset = $vals['file_offset']; + } + + if (isset($vals['total_compressed_size'])) { + $this->total_compressed_size = $vals['total_compressed_size']; + } + + if (isset($vals['ordinal'])) { + $this->ordinal = $vals['ordinal']; + } } } @@ -132,11 +158,176 @@ public function getName() public function read($input) { - return $this->_read('RowGroup', self::$_TSPEC, $input); + $xfer = 0; + $fname = null; + $ftype = 0; + $fid = 0; + $xfer += $input->readStructBegin($fname); + + while (true) { + $xfer += $input->readFieldBegin($fname, $ftype, $fid); + + if ($ftype == TType::STOP) { + break; + } + + switch ($fid) { + case 1: + if ($ftype == TType::LST) { + $this->columns = []; + $_size49 = 0; + $_etype52 = 0; + $xfer += $input->readListBegin($_etype52, $_size49); + + for ($_i53 = 0; $_i53 < $_size49; $_i53++) { + $elem54 = null; + $elem54 = new ColumnChunk(); + $xfer += $elem54->read($input); + $this->columns[] = $elem54; + } + $xfer += $input->readListEnd(); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 2: + if ($ftype == TType::I64) { + $xfer += $input->readI64($this->total_byte_size); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 3: + if ($ftype == TType::I64) { + $xfer += $input->readI64($this->num_rows); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 4: + if ($ftype == TType::LST) { + $this->sorting_columns = []; + $_size55 = 0; + $_etype58 = 0; + $xfer += $input->readListBegin($_etype58, $_size55); + + for ($_i59 = 0; $_i59 < $_size55; $_i59++) { + $elem60 = null; + $elem60 = new SortingColumn(); + $xfer += $elem60->read($input); + $this->sorting_columns[] = $elem60; + } + $xfer += $input->readListEnd(); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 5: + if ($ftype == TType::I64) { + $xfer += $input->readI64($this->file_offset); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 6: + if ($ftype == TType::I64) { + $xfer += $input->readI64($this->total_compressed_size); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 7: + if ($ftype == TType::I16) { + $xfer += $input->readI16($this->ordinal); + } else { + $xfer += $input->skip($ftype); + } + + break; + + default: + $xfer += $input->skip($ftype); + + break; + } + $xfer += $input->readFieldEnd(); + } + $xfer += $input->readStructEnd(); + + return $xfer; } public function write($output) { - return $this->_write('RowGroup', self::$_TSPEC, $output); + $xfer = 0; + $xfer += $output->writeStructBegin('RowGroup'); + + if ($this->columns !== null) { + if (!is_array($this->columns)) { + throw new TProtocolException('Bad type in structure.', TProtocolException::INVALID_DATA); + } + $xfer += $output->writeFieldBegin('columns', TType::LST, 1); + $output->writeListBegin(TType::STRUCT, count($this->columns)); + + foreach ($this->columns as $iter61) { + $xfer += $iter61->write($output); + } + $output->writeListEnd(); + $xfer += $output->writeFieldEnd(); + } + + if ($this->total_byte_size !== null) { + $xfer += $output->writeFieldBegin('total_byte_size', TType::I64, 2); + $xfer += $output->writeI64($this->total_byte_size); + $xfer += $output->writeFieldEnd(); + } + + if ($this->num_rows !== null) { + $xfer += $output->writeFieldBegin('num_rows', TType::I64, 3); + $xfer += $output->writeI64($this->num_rows); + $xfer += $output->writeFieldEnd(); + } + + if ($this->sorting_columns !== null) { + if (!is_array($this->sorting_columns)) { + throw new TProtocolException('Bad type in structure.', TProtocolException::INVALID_DATA); + } + $xfer += $output->writeFieldBegin('sorting_columns', TType::LST, 4); + $output->writeListBegin(TType::STRUCT, count($this->sorting_columns)); + + foreach ($this->sorting_columns as $iter62) { + $xfer += $iter62->write($output); + } + $output->writeListEnd(); + $xfer += $output->writeFieldEnd(); + } + + if ($this->file_offset !== null) { + $xfer += $output->writeFieldBegin('file_offset', TType::I64, 5); + $xfer += $output->writeI64($this->file_offset); + $xfer += $output->writeFieldEnd(); + } + + if ($this->total_compressed_size !== null) { + $xfer += $output->writeFieldBegin('total_compressed_size', TType::I64, 6); + $xfer += $output->writeI64($this->total_compressed_size); + $xfer += $output->writeFieldEnd(); + } + + if ($this->ordinal !== null) { + $xfer += $output->writeFieldBegin('ordinal', TType::I16, 7); + $xfer += $output->writeI16($this->ordinal); + $xfer += $output->writeFieldEnd(); + } + $xfer += $output->writeFieldStop(); + $xfer += $output->writeStructEnd(); + + return $xfer; } } diff --git a/src/lib/parquet/src/Flow/Parquet/Thrift/SchemaElement.php b/src/lib/parquet/src/Flow/Parquet/Thrift/SchemaElement.php index 957bce912..6d509ce3a 100644 --- a/src/lib/parquet/src/Flow/Parquet/Thrift/SchemaElement.php +++ b/src/lib/parquet/src/Flow/Parquet/Thrift/SchemaElement.php @@ -4,14 +4,14 @@ namespace Flow\Parquet\Thrift; /** - * Autogenerated by Thrift Compiler (0.19.0). + * Autogenerated by Thrift Compiler (0.18.1). * * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING * * @generated */ -use Thrift\Base\TBase; -use Thrift\Type\TType; +use Thrift\Exception\{TProtocolException}; +use Thrift\Type\{TType}; /** * Represents a element inside a schema definition. @@ -19,7 +19,7 @@ * - if it is a primitive type (leaf) then type is defined and num_children is undefined * the nodes are listed in depth first traversal order. */ -class SchemaElement extends TBase +class SchemaElement { public static $_TSPEC = [ 1 => [ @@ -167,8 +167,46 @@ class SchemaElement extends TBase public function __construct($vals = null) { - if (\is_array($vals)) { - parent::__construct(self::$_TSPEC, $vals); + if (is_array($vals)) { + if (isset($vals['type'])) { + $this->type = $vals['type']; + } + + if (isset($vals['type_length'])) { + $this->type_length = $vals['type_length']; + } + + if (isset($vals['repetition_type'])) { + $this->repetition_type = $vals['repetition_type']; + } + + if (isset($vals['name'])) { + $this->name = $vals['name']; + } + + if (isset($vals['num_children'])) { + $this->num_children = $vals['num_children']; + } + + if (isset($vals['converted_type'])) { + $this->converted_type = $vals['converted_type']; + } + + if (isset($vals['scale'])) { + $this->scale = $vals['scale']; + } + + if (isset($vals['precision'])) { + $this->precision = $vals['precision']; + } + + if (isset($vals['field_id'])) { + $this->field_id = $vals['field_id']; + } + + if (isset($vals['logicalType'])) { + $this->logicalType = $vals['logicalType']; + } } } @@ -179,11 +217,184 @@ public function getName() public function read($input) { - return $this->_read('SchemaElement', self::$_TSPEC, $input); + $xfer = 0; + $fname = null; + $ftype = 0; + $fid = 0; + $xfer += $input->readStructBegin($fname); + + while (true) { + $xfer += $input->readFieldBegin($fname, $ftype, $fid); + + if ($ftype == TType::STOP) { + break; + } + + switch ($fid) { + case 1: + if ($ftype == TType::I32) { + $xfer += $input->readI32($this->type); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 2: + if ($ftype == TType::I32) { + $xfer += $input->readI32($this->type_length); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 3: + if ($ftype == TType::I32) { + $xfer += $input->readI32($this->repetition_type); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 4: + if ($ftype == TType::STRING) { + $xfer += $input->readString($this->name); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 5: + if ($ftype == TType::I32) { + $xfer += $input->readI32($this->num_children); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 6: + if ($ftype == TType::I32) { + $xfer += $input->readI32($this->converted_type); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 7: + if ($ftype == TType::I32) { + $xfer += $input->readI32($this->scale); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 8: + if ($ftype == TType::I32) { + $xfer += $input->readI32($this->precision); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 9: + if ($ftype == TType::I32) { + $xfer += $input->readI32($this->field_id); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 10: + if ($ftype == TType::STRUCT) { + $this->logicalType = new LogicalType(); + $xfer += $this->logicalType->read($input); + } else { + $xfer += $input->skip($ftype); + } + + break; + + default: + $xfer += $input->skip($ftype); + + break; + } + $xfer += $input->readFieldEnd(); + } + $xfer += $input->readStructEnd(); + + return $xfer; } public function write($output) { - return $this->_write('SchemaElement', self::$_TSPEC, $output); + $xfer = 0; + $xfer += $output->writeStructBegin('SchemaElement'); + + if ($this->type !== null) { + $xfer += $output->writeFieldBegin('type', TType::I32, 1); + $xfer += $output->writeI32($this->type); + $xfer += $output->writeFieldEnd(); + } + + if ($this->type_length !== null) { + $xfer += $output->writeFieldBegin('type_length', TType::I32, 2); + $xfer += $output->writeI32($this->type_length); + $xfer += $output->writeFieldEnd(); + } + + if ($this->repetition_type !== null) { + $xfer += $output->writeFieldBegin('repetition_type', TType::I32, 3); + $xfer += $output->writeI32($this->repetition_type); + $xfer += $output->writeFieldEnd(); + } + + if ($this->name !== null) { + $xfer += $output->writeFieldBegin('name', TType::STRING, 4); + $xfer += $output->writeString($this->name); + $xfer += $output->writeFieldEnd(); + } + + if ($this->num_children !== null) { + $xfer += $output->writeFieldBegin('num_children', TType::I32, 5); + $xfer += $output->writeI32($this->num_children); + $xfer += $output->writeFieldEnd(); + } + + if ($this->converted_type !== null) { + $xfer += $output->writeFieldBegin('converted_type', TType::I32, 6); + $xfer += $output->writeI32($this->converted_type); + $xfer += $output->writeFieldEnd(); + } + + if ($this->scale !== null) { + $xfer += $output->writeFieldBegin('scale', TType::I32, 7); + $xfer += $output->writeI32($this->scale); + $xfer += $output->writeFieldEnd(); + } + + if ($this->precision !== null) { + $xfer += $output->writeFieldBegin('precision', TType::I32, 8); + $xfer += $output->writeI32($this->precision); + $xfer += $output->writeFieldEnd(); + } + + if ($this->field_id !== null) { + $xfer += $output->writeFieldBegin('field_id', TType::I32, 9); + $xfer += $output->writeI32($this->field_id); + $xfer += $output->writeFieldEnd(); + } + + if ($this->logicalType !== null) { + if (!is_object($this->logicalType)) { + throw new TProtocolException('Bad type in structure.', TProtocolException::INVALID_DATA); + } + $xfer += $output->writeFieldBegin('logicalType', TType::STRUCT, 10); + $xfer += $this->logicalType->write($output); + $xfer += $output->writeFieldEnd(); + } + $xfer += $output->writeFieldStop(); + $xfer += $output->writeStructEnd(); + + return $xfer; } } diff --git a/src/lib/parquet/src/Flow/Parquet/Thrift/SizeStatistics.php b/src/lib/parquet/src/Flow/Parquet/Thrift/SizeStatistics.php new file mode 100644 index 000000000..25848aaf0 --- /dev/null +++ b/src/lib/parquet/src/Flow/Parquet/Thrift/SizeStatistics.php @@ -0,0 +1,236 @@ + [ + 'var' => 'unencoded_byte_array_data_bytes', + 'isRequired' => false, + 'type' => TType::I64, + ], + 2 => [ + 'var' => 'repetition_level_histogram', + 'isRequired' => false, + 'type' => TType::LST, + 'etype' => TType::I64, + 'elem' => [ + 'type' => TType::I64, + ], + ], + 3 => [ + 'var' => 'definition_level_histogram', + 'isRequired' => false, + 'type' => TType::LST, + 'etype' => TType::I64, + 'elem' => [ + 'type' => TType::I64, + ], + ], + ]; + + public static $isValidate = false; + + /** + * Same as repetition_level_histogram except for definition levels. + * + * This field may be omitted if max_definition_level is 0 or 1 without + * loss of information. + * + * @var int[] + */ + public $definition_level_histogram; + + /** + * When present, there is expected to be one element corresponding to each + * repetition (i.e. size=max repetition_level+1) where each element + * represents the number of times the repetition level was observed in the + * data. + * + * This field may be omitted if max_repetition_level is 0 without loss + * of information. + * + * @var int[] + */ + public $repetition_level_histogram; + + /** + * The number of physical bytes stored for BYTE_ARRAY data values assuming + * no encoding. This is exclusive of the bytes needed to store the length of + * each byte array. In other words, this field is equivalent to the `(size + * of PLAIN-ENCODING the byte array values) - (4 bytes * number of values + * written)`. To determine unencoded sizes of other types readers can use + * schema information multiplied by the number of non-null and null values. + * The number of null/non-null values can be inferred from the histograms + * below. + * + * For example, if a column chunk is dictionary-encoded with dictionary + * ["a", "bc", "cde"], and a data page contains the indices [0, 0, 1, 2], + * then this value for that data page should be 7 (1 + 1 + 2 + 3). + * + * This field should only be set for types that use BYTE_ARRAY as their + * physical type. + * + * @var int + */ + public $unencoded_byte_array_data_bytes; + + public function __construct($vals = null) + { + if (is_array($vals)) { + if (isset($vals['unencoded_byte_array_data_bytes'])) { + $this->unencoded_byte_array_data_bytes = $vals['unencoded_byte_array_data_bytes']; + } + + if (isset($vals['repetition_level_histogram'])) { + $this->repetition_level_histogram = $vals['repetition_level_histogram']; + } + + if (isset($vals['definition_level_histogram'])) { + $this->definition_level_histogram = $vals['definition_level_histogram']; + } + } + } + + public function getName() + { + return 'SizeStatistics'; + } + + public function read($input) + { + $xfer = 0; + $fname = null; + $ftype = 0; + $fid = 0; + $xfer += $input->readStructBegin($fname); + + while (true) { + $xfer += $input->readFieldBegin($fname, $ftype, $fid); + + if ($ftype == TType::STOP) { + break; + } + + switch ($fid) { + case 1: + if ($ftype == TType::I64) { + $xfer += $input->readI64($this->unencoded_byte_array_data_bytes); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 2: + if ($ftype == TType::LST) { + $this->repetition_level_histogram = []; + $_size0 = 0; + $_etype3 = 0; + $xfer += $input->readListBegin($_etype3, $_size0); + + for ($_i4 = 0; $_i4 < $_size0; $_i4++) { + $elem5 = null; + $xfer += $input->readI64($elem5); + $this->repetition_level_histogram[] = $elem5; + } + $xfer += $input->readListEnd(); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 3: + if ($ftype == TType::LST) { + $this->definition_level_histogram = []; + $_size6 = 0; + $_etype9 = 0; + $xfer += $input->readListBegin($_etype9, $_size6); + + for ($_i10 = 0; $_i10 < $_size6; $_i10++) { + $elem11 = null; + $xfer += $input->readI64($elem11); + $this->definition_level_histogram[] = $elem11; + } + $xfer += $input->readListEnd(); + } else { + $xfer += $input->skip($ftype); + } + + break; + + default: + $xfer += $input->skip($ftype); + + break; + } + $xfer += $input->readFieldEnd(); + } + $xfer += $input->readStructEnd(); + + return $xfer; + } + + public function write($output) + { + $xfer = 0; + $xfer += $output->writeStructBegin('SizeStatistics'); + + if ($this->unencoded_byte_array_data_bytes !== null) { + $xfer += $output->writeFieldBegin('unencoded_byte_array_data_bytes', TType::I64, 1); + $xfer += $output->writeI64($this->unencoded_byte_array_data_bytes); + $xfer += $output->writeFieldEnd(); + } + + if ($this->repetition_level_histogram !== null) { + if (!is_array($this->repetition_level_histogram)) { + throw new TProtocolException('Bad type in structure.', TProtocolException::INVALID_DATA); + } + $xfer += $output->writeFieldBegin('repetition_level_histogram', TType::LST, 2); + $output->writeListBegin(TType::I64, count($this->repetition_level_histogram)); + + foreach ($this->repetition_level_histogram as $iter12) { + $xfer += $output->writeI64($iter12); + } + $output->writeListEnd(); + $xfer += $output->writeFieldEnd(); + } + + if ($this->definition_level_histogram !== null) { + if (!is_array($this->definition_level_histogram)) { + throw new TProtocolException('Bad type in structure.', TProtocolException::INVALID_DATA); + } + $xfer += $output->writeFieldBegin('definition_level_histogram', TType::LST, 3); + $output->writeListBegin(TType::I64, count($this->definition_level_histogram)); + + foreach ($this->definition_level_histogram as $iter13) { + $xfer += $output->writeI64($iter13); + } + $output->writeListEnd(); + $xfer += $output->writeFieldEnd(); + } + $xfer += $output->writeFieldStop(); + $xfer += $output->writeStructEnd(); + + return $xfer; + } +} diff --git a/src/lib/parquet/src/Flow/Parquet/Thrift/SortingColumn.php b/src/lib/parquet/src/Flow/Parquet/Thrift/SortingColumn.php index ec7e378fa..880f16e36 100644 --- a/src/lib/parquet/src/Flow/Parquet/Thrift/SortingColumn.php +++ b/src/lib/parquet/src/Flow/Parquet/Thrift/SortingColumn.php @@ -4,19 +4,18 @@ namespace Flow\Parquet\Thrift; /** - * Autogenerated by Thrift Compiler (0.19.0). + * Autogenerated by Thrift Compiler (0.18.1). * * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING * * @generated */ -use Thrift\Base\TBase; -use Thrift\Type\TType; +use Thrift\Type\{TType}; /** - * Wrapper struct to specify sort order. + * Sort order within a RowGroup of a leaf column. */ -class SortingColumn extends TBase +class SortingColumn { public static $_TSPEC = [ 1 => [ @@ -39,7 +38,7 @@ class SortingColumn extends TBase public static $isValidate = false; /** - * The column index (in this row group) *. + * The ordinal position of the column (in this row group) *. * * @var int */ @@ -62,8 +61,18 @@ class SortingColumn extends TBase public function __construct($vals = null) { - if (\is_array($vals)) { - parent::__construct(self::$_TSPEC, $vals); + if (is_array($vals)) { + if (isset($vals['column_idx'])) { + $this->column_idx = $vals['column_idx']; + } + + if (isset($vals['descending'])) { + $this->descending = $vals['descending']; + } + + if (isset($vals['nulls_first'])) { + $this->nulls_first = $vals['nulls_first']; + } } } @@ -74,11 +83,82 @@ public function getName() public function read($input) { - return $this->_read('SortingColumn', self::$_TSPEC, $input); + $xfer = 0; + $fname = null; + $ftype = 0; + $fid = 0; + $xfer += $input->readStructBegin($fname); + + while (true) { + $xfer += $input->readFieldBegin($fname, $ftype, $fid); + + if ($ftype == TType::STOP) { + break; + } + + switch ($fid) { + case 1: + if ($ftype == TType::I32) { + $xfer += $input->readI32($this->column_idx); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 2: + if ($ftype == TType::BOOL) { + $xfer += $input->readBool($this->descending); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 3: + if ($ftype == TType::BOOL) { + $xfer += $input->readBool($this->nulls_first); + } else { + $xfer += $input->skip($ftype); + } + + break; + + default: + $xfer += $input->skip($ftype); + + break; + } + $xfer += $input->readFieldEnd(); + } + $xfer += $input->readStructEnd(); + + return $xfer; } public function write($output) { - return $this->_write('SortingColumn', self::$_TSPEC, $output); + $xfer = 0; + $xfer += $output->writeStructBegin('SortingColumn'); + + if ($this->column_idx !== null) { + $xfer += $output->writeFieldBegin('column_idx', TType::I32, 1); + $xfer += $output->writeI32($this->column_idx); + $xfer += $output->writeFieldEnd(); + } + + if ($this->descending !== null) { + $xfer += $output->writeFieldBegin('descending', TType::BOOL, 2); + $xfer += $output->writeBool($this->descending); + $xfer += $output->writeFieldEnd(); + } + + if ($this->nulls_first !== null) { + $xfer += $output->writeFieldBegin('nulls_first', TType::BOOL, 3); + $xfer += $output->writeBool($this->nulls_first); + $xfer += $output->writeFieldEnd(); + } + $xfer += $output->writeFieldStop(); + $xfer += $output->writeStructEnd(); + + return $xfer; } } diff --git a/src/lib/parquet/src/Flow/Parquet/Thrift/SplitBlockAlgorithm.php b/src/lib/parquet/src/Flow/Parquet/Thrift/SplitBlockAlgorithm.php index dfce38cab..aaa692ea4 100644 --- a/src/lib/parquet/src/Flow/Parquet/Thrift/SplitBlockAlgorithm.php +++ b/src/lib/parquet/src/Flow/Parquet/Thrift/SplitBlockAlgorithm.php @@ -4,18 +4,18 @@ namespace Flow\Parquet\Thrift; /** - * Autogenerated by Thrift Compiler (0.19.0). + * Autogenerated by Thrift Compiler (0.18.1). * * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING * * @generated */ -use Thrift\Base\TBase; +use Thrift\Type\{TType}; /** * Block-based algorithm type annotation. *. */ -class SplitBlockAlgorithm extends TBase +class SplitBlockAlgorithm { public static $_TSPEC = [ ]; @@ -33,11 +33,39 @@ public function getName() public function read($input) { - return $this->_read('SplitBlockAlgorithm', self::$_TSPEC, $input); + $xfer = 0; + $fname = null; + $ftype = 0; + $fid = 0; + $xfer += $input->readStructBegin($fname); + + while (true) { + $xfer += $input->readFieldBegin($fname, $ftype, $fid); + + if ($ftype == TType::STOP) { + break; + } + + switch ($fid) { + default: + $xfer += $input->skip($ftype); + + break; + } + $xfer += $input->readFieldEnd(); + } + $xfer += $input->readStructEnd(); + + return $xfer; } public function write($output) { - return $this->_write('SplitBlockAlgorithm', self::$_TSPEC, $output); + $xfer = 0; + $xfer += $output->writeStructBegin('SplitBlockAlgorithm'); + $xfer += $output->writeFieldStop(); + $xfer += $output->writeStructEnd(); + + return $xfer; } } diff --git a/src/lib/parquet/src/Flow/Parquet/Thrift/Statistics.php b/src/lib/parquet/src/Flow/Parquet/Thrift/Statistics.php index 3a216748e..c6b252e90 100644 --- a/src/lib/parquet/src/Flow/Parquet/Thrift/Statistics.php +++ b/src/lib/parquet/src/Flow/Parquet/Thrift/Statistics.php @@ -4,20 +4,19 @@ namespace Flow\Parquet\Thrift; /** - * Autogenerated by Thrift Compiler (0.19.0). + * Autogenerated by Thrift Compiler (0.18.1). * * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING * * @generated */ -use Thrift\Base\TBase; -use Thrift\Type\TType; +use Thrift\Type\{TType}; /** * Statistics per row group and per page * All fields are optional. */ -class Statistics extends TBase +class Statistics { public static $_TSPEC = [ 1 => [ @@ -50,6 +49,16 @@ class Statistics extends TBase 'isRequired' => false, 'type' => TType::STRING, ], + 7 => [ + 'var' => 'is_max_value_exact', + 'isRequired' => false, + 'type' => TType::BOOL, + ], + 8 => [ + 'var' => 'is_min_value_exact', + 'isRequired' => false, + 'type' => TType::BOOL, + ], ]; public static $isValidate = false; @@ -61,6 +70,20 @@ class Statistics extends TBase */ public $distinct_count; + /** + * If true, max_value is the actual maximum value for a column. + * + * @var bool + */ + public $is_max_value_exact; + + /** + * If true, min_value is the actual minimum value for a column. + * + * @var bool + */ + public $is_min_value_exact; + /** * DEPRECATED: min and max value of the column. Use min_value and max_value. * @@ -79,7 +102,13 @@ class Statistics extends TBase public $max; /** - * Min and max values for the column, determined by its ColumnOrder. + * Lower and upper bound values for the column, determined by its ColumnOrder. + * + * These may be the actual minimum and maximum values found on a page or column + * chunk, but can also be (more compact) values that do not exist on a page or + * column chunk. For example, instead of storing "Blart Versenwald III", a writer + * may set min_value="B", max_value="C". Such more compact values must still be + * valid values within the column's logical type. * * Values are encoded using PLAIN encoding, except that variable-length byte * arrays do not include a length prefix. @@ -99,7 +128,12 @@ class Statistics extends TBase public $min_value; /** - * count of null value in the column. + * Count of null values in the column. + * + * Writers SHOULD always write this field even if it is zero (i.e. no null value) + * or the column is not nullable. + * Readers MUST distinguish between null_count not being present and null_count == 0. + * If null_count is not present, readers MUST NOT assume null_count == 0. * * @var int */ @@ -107,8 +141,38 @@ class Statistics extends TBase public function __construct($vals = null) { - if (\is_array($vals)) { - parent::__construct(self::$_TSPEC, $vals); + if (is_array($vals)) { + if (isset($vals['max'])) { + $this->max = $vals['max']; + } + + if (isset($vals['min'])) { + $this->min = $vals['min']; + } + + if (isset($vals['null_count'])) { + $this->null_count = $vals['null_count']; + } + + if (isset($vals['distinct_count'])) { + $this->distinct_count = $vals['distinct_count']; + } + + if (isset($vals['max_value'])) { + $this->max_value = $vals['max_value']; + } + + if (isset($vals['min_value'])) { + $this->min_value = $vals['min_value']; + } + + if (isset($vals['is_max_value_exact'])) { + $this->is_max_value_exact = $vals['is_max_value_exact']; + } + + if (isset($vals['is_min_value_exact'])) { + $this->is_min_value_exact = $vals['is_min_value_exact']; + } } } @@ -119,11 +183,152 @@ public function getName() public function read($input) { - return $this->_read('Statistics', self::$_TSPEC, $input); + $xfer = 0; + $fname = null; + $ftype = 0; + $fid = 0; + $xfer += $input->readStructBegin($fname); + + while (true) { + $xfer += $input->readFieldBegin($fname, $ftype, $fid); + + if ($ftype == TType::STOP) { + break; + } + + switch ($fid) { + case 1: + if ($ftype == TType::STRING) { + $xfer += $input->readString($this->max); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 2: + if ($ftype == TType::STRING) { + $xfer += $input->readString($this->min); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 3: + if ($ftype == TType::I64) { + $xfer += $input->readI64($this->null_count); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 4: + if ($ftype == TType::I64) { + $xfer += $input->readI64($this->distinct_count); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 5: + if ($ftype == TType::STRING) { + $xfer += $input->readString($this->max_value); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 6: + if ($ftype == TType::STRING) { + $xfer += $input->readString($this->min_value); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 7: + if ($ftype == TType::BOOL) { + $xfer += $input->readBool($this->is_max_value_exact); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 8: + if ($ftype == TType::BOOL) { + $xfer += $input->readBool($this->is_min_value_exact); + } else { + $xfer += $input->skip($ftype); + } + + break; + + default: + $xfer += $input->skip($ftype); + + break; + } + $xfer += $input->readFieldEnd(); + } + $xfer += $input->readStructEnd(); + + return $xfer; } public function write($output) { - return $this->_write('Statistics', self::$_TSPEC, $output); + $xfer = 0; + $xfer += $output->writeStructBegin('Statistics'); + + if ($this->max !== null) { + $xfer += $output->writeFieldBegin('max', TType::STRING, 1); + $xfer += $output->writeString($this->max); + $xfer += $output->writeFieldEnd(); + } + + if ($this->min !== null) { + $xfer += $output->writeFieldBegin('min', TType::STRING, 2); + $xfer += $output->writeString($this->min); + $xfer += $output->writeFieldEnd(); + } + + if ($this->null_count !== null) { + $xfer += $output->writeFieldBegin('null_count', TType::I64, 3); + $xfer += $output->writeI64($this->null_count); + $xfer += $output->writeFieldEnd(); + } + + if ($this->distinct_count !== null) { + $xfer += $output->writeFieldBegin('distinct_count', TType::I64, 4); + $xfer += $output->writeI64($this->distinct_count); + $xfer += $output->writeFieldEnd(); + } + + if ($this->max_value !== null) { + $xfer += $output->writeFieldBegin('max_value', TType::STRING, 5); + $xfer += $output->writeString($this->max_value); + $xfer += $output->writeFieldEnd(); + } + + if ($this->min_value !== null) { + $xfer += $output->writeFieldBegin('min_value', TType::STRING, 6); + $xfer += $output->writeString($this->min_value); + $xfer += $output->writeFieldEnd(); + } + + if ($this->is_max_value_exact !== null) { + $xfer += $output->writeFieldBegin('is_max_value_exact', TType::BOOL, 7); + $xfer += $output->writeBool($this->is_max_value_exact); + $xfer += $output->writeFieldEnd(); + } + + if ($this->is_min_value_exact !== null) { + $xfer += $output->writeFieldBegin('is_min_value_exact', TType::BOOL, 8); + $xfer += $output->writeBool($this->is_min_value_exact); + $xfer += $output->writeFieldEnd(); + } + $xfer += $output->writeFieldStop(); + $xfer += $output->writeStructEnd(); + + return $xfer; } } diff --git a/src/lib/parquet/src/Flow/Parquet/Thrift/StringType.php b/src/lib/parquet/src/Flow/Parquet/Thrift/StringType.php index f39b6cc8a..90878262f 100644 --- a/src/lib/parquet/src/Flow/Parquet/Thrift/StringType.php +++ b/src/lib/parquet/src/Flow/Parquet/Thrift/StringType.php @@ -4,18 +4,18 @@ namespace Flow\Parquet\Thrift; /** - * Autogenerated by Thrift Compiler (0.19.0). + * Autogenerated by Thrift Compiler (0.18.1). * * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING * * @generated */ -use Thrift\Base\TBase; +use Thrift\Type\{TType}; /** * Empty structs to use as logical type annotations. */ -class StringType extends TBase +class StringType { public static $_TSPEC = [ ]; @@ -33,11 +33,39 @@ public function getName() public function read($input) { - return $this->_read('StringType', self::$_TSPEC, $input); + $xfer = 0; + $fname = null; + $ftype = 0; + $fid = 0; + $xfer += $input->readStructBegin($fname); + + while (true) { + $xfer += $input->readFieldBegin($fname, $ftype, $fid); + + if ($ftype == TType::STOP) { + break; + } + + switch ($fid) { + default: + $xfer += $input->skip($ftype); + + break; + } + $xfer += $input->readFieldEnd(); + } + $xfer += $input->readStructEnd(); + + return $xfer; } public function write($output) { - return $this->_write('StringType', self::$_TSPEC, $output); + $xfer = 0; + $xfer += $output->writeStructBegin('StringType'); + $xfer += $output->writeFieldStop(); + $xfer += $output->writeStructEnd(); + + return $xfer; } } diff --git a/src/lib/parquet/src/Flow/Parquet/Thrift/TimeType.php b/src/lib/parquet/src/Flow/Parquet/Thrift/TimeType.php index 198b0ceae..69c956240 100644 --- a/src/lib/parquet/src/Flow/Parquet/Thrift/TimeType.php +++ b/src/lib/parquet/src/Flow/Parquet/Thrift/TimeType.php @@ -4,21 +4,21 @@ namespace Flow\Parquet\Thrift; /** - * Autogenerated by Thrift Compiler (0.19.0). + * Autogenerated by Thrift Compiler (0.18.1). * * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING * * @generated */ -use Thrift\Base\TBase; -use Thrift\Type\TType; +use Thrift\Exception\{TProtocolException}; +use Thrift\Type\{TType}; /** * Time logical type annotation. * * Allowed for physical types: INT32 (millis), INT64 (micros, nanos) */ -class TimeType extends TBase +class TimeType { public static $_TSPEC = [ 1 => [ @@ -48,8 +48,14 @@ class TimeType extends TBase public function __construct($vals = null) { - if (\is_array($vals)) { - parent::__construct(self::$_TSPEC, $vals); + if (is_array($vals)) { + if (isset($vals['isAdjustedToUTC'])) { + $this->isAdjustedToUTC = $vals['isAdjustedToUTC']; + } + + if (isset($vals['unit'])) { + $this->unit = $vals['unit']; + } } } @@ -60,11 +66,72 @@ public function getName() public function read($input) { - return $this->_read('TimeType', self::$_TSPEC, $input); + $xfer = 0; + $fname = null; + $ftype = 0; + $fid = 0; + $xfer += $input->readStructBegin($fname); + + while (true) { + $xfer += $input->readFieldBegin($fname, $ftype, $fid); + + if ($ftype == TType::STOP) { + break; + } + + switch ($fid) { + case 1: + if ($ftype == TType::BOOL) { + $xfer += $input->readBool($this->isAdjustedToUTC); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 2: + if ($ftype == TType::STRUCT) { + $this->unit = new TimeUnit(); + $xfer += $this->unit->read($input); + } else { + $xfer += $input->skip($ftype); + } + + break; + + default: + $xfer += $input->skip($ftype); + + break; + } + $xfer += $input->readFieldEnd(); + } + $xfer += $input->readStructEnd(); + + return $xfer; } public function write($output) { - return $this->_write('TimeType', self::$_TSPEC, $output); + $xfer = 0; + $xfer += $output->writeStructBegin('TimeType'); + + if ($this->isAdjustedToUTC !== null) { + $xfer += $output->writeFieldBegin('isAdjustedToUTC', TType::BOOL, 1); + $xfer += $output->writeBool($this->isAdjustedToUTC); + $xfer += $output->writeFieldEnd(); + } + + if ($this->unit !== null) { + if (!is_object($this->unit)) { + throw new TProtocolException('Bad type in structure.', TProtocolException::INVALID_DATA); + } + $xfer += $output->writeFieldBegin('unit', TType::STRUCT, 2); + $xfer += $this->unit->write($output); + $xfer += $output->writeFieldEnd(); + } + $xfer += $output->writeFieldStop(); + $xfer += $output->writeStructEnd(); + + return $xfer; } } diff --git a/src/lib/parquet/src/Flow/Parquet/Thrift/TimeUnit.php b/src/lib/parquet/src/Flow/Parquet/Thrift/TimeUnit.php index 1edbfba4f..d1e9b7cdb 100644 --- a/src/lib/parquet/src/Flow/Parquet/Thrift/TimeUnit.php +++ b/src/lib/parquet/src/Flow/Parquet/Thrift/TimeUnit.php @@ -4,16 +4,16 @@ namespace Flow\Parquet\Thrift; /** - * Autogenerated by Thrift Compiler (0.19.0). + * Autogenerated by Thrift Compiler (0.18.1). * * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING * * @generated */ -use Thrift\Base\TBase; -use Thrift\Type\TType; +use Thrift\Exception\{TProtocolException}; +use Thrift\Type\{TType}; -class TimeUnit extends TBase +class TimeUnit { public static $_TSPEC = [ 1 => [ @@ -55,8 +55,18 @@ class TimeUnit extends TBase public function __construct($vals = null) { - if (\is_array($vals)) { - parent::__construct(self::$_TSPEC, $vals); + if (is_array($vals)) { + if (isset($vals['MILLIS'])) { + $this->MILLIS = $vals['MILLIS']; + } + + if (isset($vals['MICROS'])) { + $this->MICROS = $vals['MICROS']; + } + + if (isset($vals['NANOS'])) { + $this->NANOS = $vals['NANOS']; + } } } @@ -67,11 +77,94 @@ public function getName() public function read($input) { - return $this->_read('TimeUnit', self::$_TSPEC, $input); + $xfer = 0; + $fname = null; + $ftype = 0; + $fid = 0; + $xfer += $input->readStructBegin($fname); + + while (true) { + $xfer += $input->readFieldBegin($fname, $ftype, $fid); + + if ($ftype == TType::STOP) { + break; + } + + switch ($fid) { + case 1: + if ($ftype == TType::STRUCT) { + $this->MILLIS = new MilliSeconds(); + $xfer += $this->MILLIS->read($input); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 2: + if ($ftype == TType::STRUCT) { + $this->MICROS = new MicroSeconds(); + $xfer += $this->MICROS->read($input); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 3: + if ($ftype == TType::STRUCT) { + $this->NANOS = new NanoSeconds(); + $xfer += $this->NANOS->read($input); + } else { + $xfer += $input->skip($ftype); + } + + break; + + default: + $xfer += $input->skip($ftype); + + break; + } + $xfer += $input->readFieldEnd(); + } + $xfer += $input->readStructEnd(); + + return $xfer; } public function write($output) { - return $this->_write('TimeUnit', self::$_TSPEC, $output); + $xfer = 0; + $xfer += $output->writeStructBegin('TimeUnit'); + + if ($this->MILLIS !== null) { + if (!is_object($this->MILLIS)) { + throw new TProtocolException('Bad type in structure.', TProtocolException::INVALID_DATA); + } + $xfer += $output->writeFieldBegin('MILLIS', TType::STRUCT, 1); + $xfer += $this->MILLIS->write($output); + $xfer += $output->writeFieldEnd(); + } + + if ($this->MICROS !== null) { + if (!is_object($this->MICROS)) { + throw new TProtocolException('Bad type in structure.', TProtocolException::INVALID_DATA); + } + $xfer += $output->writeFieldBegin('MICROS', TType::STRUCT, 2); + $xfer += $this->MICROS->write($output); + $xfer += $output->writeFieldEnd(); + } + + if ($this->NANOS !== null) { + if (!is_object($this->NANOS)) { + throw new TProtocolException('Bad type in structure.', TProtocolException::INVALID_DATA); + } + $xfer += $output->writeFieldBegin('NANOS', TType::STRUCT, 3); + $xfer += $this->NANOS->write($output); + $xfer += $output->writeFieldEnd(); + } + $xfer += $output->writeFieldStop(); + $xfer += $output->writeStructEnd(); + + return $xfer; } } diff --git a/src/lib/parquet/src/Flow/Parquet/Thrift/TimestampType.php b/src/lib/parquet/src/Flow/Parquet/Thrift/TimestampType.php index 8a3a19d94..fcae93ed8 100644 --- a/src/lib/parquet/src/Flow/Parquet/Thrift/TimestampType.php +++ b/src/lib/parquet/src/Flow/Parquet/Thrift/TimestampType.php @@ -4,21 +4,21 @@ namespace Flow\Parquet\Thrift; /** - * Autogenerated by Thrift Compiler (0.19.0). + * Autogenerated by Thrift Compiler (0.18.1). * * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING * * @generated */ -use Thrift\Base\TBase; -use Thrift\Type\TType; +use Thrift\Exception\{TProtocolException}; +use Thrift\Type\{TType}; /** * Timestamp logical type annotation. * * Allowed for physical types: INT64 */ -class TimestampType extends TBase +class TimestampType { public static $_TSPEC = [ 1 => [ @@ -48,8 +48,14 @@ class TimestampType extends TBase public function __construct($vals = null) { - if (\is_array($vals)) { - parent::__construct(self::$_TSPEC, $vals); + if (is_array($vals)) { + if (isset($vals['isAdjustedToUTC'])) { + $this->isAdjustedToUTC = $vals['isAdjustedToUTC']; + } + + if (isset($vals['unit'])) { + $this->unit = $vals['unit']; + } } } @@ -60,11 +66,72 @@ public function getName() public function read($input) { - return $this->_read('TimestampType', self::$_TSPEC, $input); + $xfer = 0; + $fname = null; + $ftype = 0; + $fid = 0; + $xfer += $input->readStructBegin($fname); + + while (true) { + $xfer += $input->readFieldBegin($fname, $ftype, $fid); + + if ($ftype == TType::STOP) { + break; + } + + switch ($fid) { + case 1: + if ($ftype == TType::BOOL) { + $xfer += $input->readBool($this->isAdjustedToUTC); + } else { + $xfer += $input->skip($ftype); + } + + break; + case 2: + if ($ftype == TType::STRUCT) { + $this->unit = new TimeUnit(); + $xfer += $this->unit->read($input); + } else { + $xfer += $input->skip($ftype); + } + + break; + + default: + $xfer += $input->skip($ftype); + + break; + } + $xfer += $input->readFieldEnd(); + } + $xfer += $input->readStructEnd(); + + return $xfer; } public function write($output) { - return $this->_write('TimestampType', self::$_TSPEC, $output); + $xfer = 0; + $xfer += $output->writeStructBegin('TimestampType'); + + if ($this->isAdjustedToUTC !== null) { + $xfer += $output->writeFieldBegin('isAdjustedToUTC', TType::BOOL, 1); + $xfer += $output->writeBool($this->isAdjustedToUTC); + $xfer += $output->writeFieldEnd(); + } + + if ($this->unit !== null) { + if (!is_object($this->unit)) { + throw new TProtocolException('Bad type in structure.', TProtocolException::INVALID_DATA); + } + $xfer += $output->writeFieldBegin('unit', TType::STRUCT, 2); + $xfer += $this->unit->write($output); + $xfer += $output->writeFieldEnd(); + } + $xfer += $output->writeFieldStop(); + $xfer += $output->writeStructEnd(); + + return $xfer; } } diff --git a/src/lib/parquet/src/Flow/Parquet/Thrift/Type.php b/src/lib/parquet/src/Flow/Parquet/Thrift/Type.php index f1901fc27..eca1317e7 100644 --- a/src/lib/parquet/src/Flow/Parquet/Thrift/Type.php +++ b/src/lib/parquet/src/Flow/Parquet/Thrift/Type.php @@ -4,13 +4,12 @@ namespace Flow\Parquet\Thrift; /** - * Autogenerated by Thrift Compiler (0.19.0). + * Autogenerated by Thrift Compiler (0.18.1). * * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING * * @generated */ - /** * Types supported by Parquet. These types are intended to be used in combination * with the encodings to control the on disk storage format. diff --git a/src/lib/parquet/src/Flow/Parquet/Thrift/TypeDefinedOrder.php b/src/lib/parquet/src/Flow/Parquet/Thrift/TypeDefinedOrder.php index 35b4a1933..477a6b24e 100644 --- a/src/lib/parquet/src/Flow/Parquet/Thrift/TypeDefinedOrder.php +++ b/src/lib/parquet/src/Flow/Parquet/Thrift/TypeDefinedOrder.php @@ -4,18 +4,18 @@ namespace Flow\Parquet\Thrift; /** - * Autogenerated by Thrift Compiler (0.19.0). + * Autogenerated by Thrift Compiler (0.18.1). * * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING * * @generated */ -use Thrift\Base\TBase; +use Thrift\Type\{TType}; /** * Empty struct to signal the order defined by the physical or logical type. */ -class TypeDefinedOrder extends TBase +class TypeDefinedOrder { public static $_TSPEC = [ ]; @@ -33,11 +33,39 @@ public function getName() public function read($input) { - return $this->_read('TypeDefinedOrder', self::$_TSPEC, $input); + $xfer = 0; + $fname = null; + $ftype = 0; + $fid = 0; + $xfer += $input->readStructBegin($fname); + + while (true) { + $xfer += $input->readFieldBegin($fname, $ftype, $fid); + + if ($ftype == TType::STOP) { + break; + } + + switch ($fid) { + default: + $xfer += $input->skip($ftype); + + break; + } + $xfer += $input->readFieldEnd(); + } + $xfer += $input->readStructEnd(); + + return $xfer; } public function write($output) { - return $this->_write('TypeDefinedOrder', self::$_TSPEC, $output); + $xfer = 0; + $xfer += $output->writeStructBegin('TypeDefinedOrder'); + $xfer += $output->writeFieldStop(); + $xfer += $output->writeStructEnd(); + + return $xfer; } } diff --git a/src/lib/parquet/src/Flow/Parquet/Thrift/UUIDType.php b/src/lib/parquet/src/Flow/Parquet/Thrift/UUIDType.php index e36477028..e0f261b58 100644 --- a/src/lib/parquet/src/Flow/Parquet/Thrift/UUIDType.php +++ b/src/lib/parquet/src/Flow/Parquet/Thrift/UUIDType.php @@ -4,15 +4,15 @@ namespace Flow\Parquet\Thrift; /** - * Autogenerated by Thrift Compiler (0.19.0). + * Autogenerated by Thrift Compiler (0.18.1). * * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING * * @generated */ -use Thrift\Base\TBase; +use Thrift\Type\{TType}; -class UUIDType extends TBase +class UUIDType { public static $_TSPEC = [ ]; @@ -30,11 +30,39 @@ public function getName() public function read($input) { - return $this->_read('UUIDType', self::$_TSPEC, $input); + $xfer = 0; + $fname = null; + $ftype = 0; + $fid = 0; + $xfer += $input->readStructBegin($fname); + + while (true) { + $xfer += $input->readFieldBegin($fname, $ftype, $fid); + + if ($ftype == TType::STOP) { + break; + } + + switch ($fid) { + default: + $xfer += $input->skip($ftype); + + break; + } + $xfer += $input->readFieldEnd(); + } + $xfer += $input->readStructEnd(); + + return $xfer; } public function write($output) { - return $this->_write('UUIDType', self::$_TSPEC, $output); + $xfer = 0; + $xfer += $output->writeStructBegin('UUIDType'); + $xfer += $output->writeFieldStop(); + $xfer += $output->writeStructEnd(); + + return $xfer; } } diff --git a/src/lib/parquet/src/Flow/Parquet/Thrift/Uncompressed.php b/src/lib/parquet/src/Flow/Parquet/Thrift/Uncompressed.php index 48f3e16bd..c14927024 100644 --- a/src/lib/parquet/src/Flow/Parquet/Thrift/Uncompressed.php +++ b/src/lib/parquet/src/Flow/Parquet/Thrift/Uncompressed.php @@ -4,18 +4,18 @@ namespace Flow\Parquet\Thrift; /** - * Autogenerated by Thrift Compiler (0.19.0). + * Autogenerated by Thrift Compiler (0.18.1). * * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING * * @generated */ -use Thrift\Base\TBase; +use Thrift\Type\{TType}; /** * The compression used in the Bloom filter. */ -class Uncompressed extends TBase +class Uncompressed { public static $_TSPEC = [ ]; @@ -33,11 +33,39 @@ public function getName() public function read($input) { - return $this->_read('Uncompressed', self::$_TSPEC, $input); + $xfer = 0; + $fname = null; + $ftype = 0; + $fid = 0; + $xfer += $input->readStructBegin($fname); + + while (true) { + $xfer += $input->readFieldBegin($fname, $ftype, $fid); + + if ($ftype == TType::STOP) { + break; + } + + switch ($fid) { + default: + $xfer += $input->skip($ftype); + + break; + } + $xfer += $input->readFieldEnd(); + } + $xfer += $input->readStructEnd(); + + return $xfer; } public function write($output) { - return $this->_write('Uncompressed', self::$_TSPEC, $output); + $xfer = 0; + $xfer += $output->writeStructBegin('Uncompressed'); + $xfer += $output->writeFieldStop(); + $xfer += $output->writeStructEnd(); + + return $xfer; } } diff --git a/src/lib/parquet/src/Flow/Parquet/Thrift/XxHash.php b/src/lib/parquet/src/Flow/Parquet/Thrift/XxHash.php index 4b1722ca4..988ac3e44 100644 --- a/src/lib/parquet/src/Flow/Parquet/Thrift/XxHash.php +++ b/src/lib/parquet/src/Flow/Parquet/Thrift/XxHash.php @@ -4,19 +4,19 @@ namespace Flow\Parquet\Thrift; /** - * Autogenerated by Thrift Compiler (0.19.0). + * Autogenerated by Thrift Compiler (0.18.1). * * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING * * @generated */ -use Thrift\Base\TBase; +use Thrift\Type\{TType}; /** * Hash strategy type annotation. xxHash is an extremely fast non-cryptographic hash * algorithm. It uses 64 bits version of xxHash. */ -class XxHash extends TBase +class XxHash { public static $_TSPEC = [ ]; @@ -34,11 +34,39 @@ public function getName() public function read($input) { - return $this->_read('XxHash', self::$_TSPEC, $input); + $xfer = 0; + $fname = null; + $ftype = 0; + $fid = 0; + $xfer += $input->readStructBegin($fname); + + while (true) { + $xfer += $input->readFieldBegin($fname, $ftype, $fid); + + if ($ftype == TType::STOP) { + break; + } + + switch ($fid) { + default: + $xfer += $input->skip($ftype); + + break; + } + $xfer += $input->readFieldEnd(); + } + $xfer += $input->readStructEnd(); + + return $xfer; } public function write($output) { - return $this->_write('XxHash', self::$_TSPEC, $output); + $xfer = 0; + $xfer += $output->writeStructBegin('XxHash'); + $xfer += $output->writeFieldStop(); + $xfer += $output->writeStructEnd(); + + return $xfer; } } diff --git a/src/lib/parquet/tests/Flow/Parquet/Tests/Integration/IO/ListsWritingTest.php b/src/lib/parquet/tests/Flow/Parquet/Tests/Integration/IO/ListsWritingTest.php index b72c6da23..ba6e096e2 100644 --- a/src/lib/parquet/tests/Flow/Parquet/Tests/Integration/IO/ListsWritingTest.php +++ b/src/lib/parquet/tests/Flow/Parquet/Tests/Integration/IO/ListsWritingTest.php @@ -264,4 +264,26 @@ public function test_writing_nullable_lists_of_ints() : void \iterator_to_array((new Reader())->read($path)->values()) ); } + + public function test_writing_repeated_lists() : void + { + $path = __DIR__ . '/var/test-writer-parquet-test-' . \Flow\ETL\DSL\generate_random_string() . '.parquet'; + + $writer = new Writer(); + $schema = Schema::with(NestedColumn::list('list_of_ints', ListElement::int32(), Schema\Repetition::REPEATED)); + + $writer->write($path, $schema, $inputData = [ + [ + 'list_of_ints' => [[1, 2, 3], [1, 2, 3]], + ], + [ + 'list_of_ints' => [[4, 5, 6], [4, 5, 6]], + ], + ]); + + self::assertSame( + $inputData, + \iterator_to_array((new Reader())->read($path)->values()) + ); + } } diff --git a/src/lib/parquet/tests/Flow/Parquet/Tests/Integration/IO/SimpleTypesWritingTest.php b/src/lib/parquet/tests/Flow/Parquet/Tests/Integration/IO/SimpleTypesWritingTest.php index 1a6277017..cd804438c 100644 --- a/src/lib/parquet/tests/Flow/Parquet/Tests/Integration/IO/SimpleTypesWritingTest.php +++ b/src/lib/parquet/tests/Flow/Parquet/Tests/Integration/IO/SimpleTypesWritingTest.php @@ -20,6 +20,26 @@ protected function setUp() : void } } + public function test_writing_array_of_ints() : void + { + $path = __DIR__ . '/var/test-writer-parquet-test-' . \Flow\ETL\DSL\generate_random_string() . '.parquet'; + + $writer = new Writer(); + $schema = Schema::with(FlatColumn::int32('array_of_ints', Schema\Repetition::REPEATED)); + + $writer->write($path, $schema, $inputData = [ + ['array_of_ints' => [1, 2, 3]], + ['array_of_ints' => [4, 5, 6]], + ]); + + self::assertEquals( + $inputData, + \iterator_to_array((new Reader())->read($path)->values()) + ); + self::assertTrue(\file_exists($path)); + \unlink($path); + } + public function test_writing_bool_column() : void { $path = __DIR__ . '/var/test-writer-parquet-test-' . \Flow\ETL\DSL\generate_random_string() . '.parquet';