Skip to content

Commit

Permalink
Fix: Corrected issue with merging Parquet files without Field ID
Browse files Browse the repository at this point in the history
  • Loading branch information
wangmingjin163 committed Aug 20, 2024
1 parent aa0f91f commit 6426e78
Showing 1 changed file with 10 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
import org.apache.iceberg.encryption.EncryptionManager;
import org.apache.iceberg.io.CloseableIterable;
import org.apache.iceberg.io.InputFile;
import org.apache.iceberg.mapping.NameMappingParser;
import org.apache.iceberg.parquet.Parquet;
import org.apache.iceberg.types.Type;
import org.apache.iceberg.types.TypeUtil;
Expand Down Expand Up @@ -191,13 +192,15 @@ private CloseableIterable<Record> openFile(
return avro.build();

case PARQUET:
Parquet.ReadBuilder parquet =
Parquet.read(input)
.project(fileProjection)
.createReaderFunc(
fileSchema ->
GenericParquetReaders.buildReader(
fileProjection, fileSchema, idToConstant));
Parquet.ReadBuilder parquet = Parquet.read(input)
.project(fileProjection)
.createReaderFunc(fileSchema ->
GenericParquetReaders.buildReader(fileProjection, fileSchema, idToConstant));

//Fix the issue that parquet file schema without field ID ,then compact misaligned columns issue
if (nameMapping != null && !nameMapping.isEmpty()) {
parquet.withNameMapping(NameMappingParser.fromJson(nameMapping));
}

if (reuseContainer) {
parquet.reuseContainers();
Expand Down

0 comments on commit 6426e78

Please sign in to comment.