diff --git a/paimon-format/src/main/java/org/apache/paimon/format/orc/writer/Vectorizer.java b/paimon-format/src/main/java/org/apache/paimon/format/orc/writer/Vectorizer.java index bd2967bcc97a..f1341b0b8b50 100644 --- a/paimon-format/src/main/java/org/apache/paimon/format/orc/writer/Vectorizer.java +++ b/paimon-format/src/main/java/org/apache/paimon/format/orc/writer/Vectorizer.java @@ -24,7 +24,6 @@ import java.io.IOException; import java.io.Serializable; -import java.nio.ByteBuffer; import static org.apache.paimon.utils.Preconditions.checkNotNull; @@ -67,20 +66,6 @@ public void setWriter(Writer writer) { this.writer = writer; } - /** - * Adds arbitrary user metadata to the outgoing ORC file. - * - *

Users who want to dynamically add new metadata either based on either the input or from an - * external system can do so by calling addUserMetadata(...) inside the overridden - * vectorize() method. - * - * @param key a key to label the data with. - * @param value the contents of the metadata. - */ - public void addUserMetadata(String key, ByteBuffer value) { - this.writer.addUserMetadata(key, value); - } - /** * Transforms the provided element to ColumnVectors and sets them in the exposed * VectorizedRowBatch. diff --git a/paimon-format/src/test/java/org/apache/paimon/format/orc/OrcBulkWriterTestUtil.java b/paimon-format/src/test/java/org/apache/paimon/format/orc/OrcBulkWriterTestUtil.java deleted file mode 100644 index b24f3d1ba952..000000000000 --- a/paimon-format/src/test/java/org/apache/paimon/format/orc/OrcBulkWriterTestUtil.java +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.paimon.format.orc; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.orc.CompressionKind; -import org.apache.orc.OrcFile; -import org.apache.orc.Reader; -import org.apache.orc.RecordReader; - -import java.io.File; -import java.io.IOException; -import java.nio.ByteBuffer; -import java.util.ArrayList; -import java.util.List; - -import static org.assertj.core.api.Assertions.assertThat; - -/** Util class for the OrcBulkWriter tests. */ -public class OrcBulkWriterTestUtil { - - public static final String USER_METADATA_KEY = "userKey"; - public static final ByteBuffer USER_METADATA_VALUE = ByteBuffer.wrap("hello".getBytes()); - - public static void validate(File files, List expected) throws IOException { - final File[] buckets = files.listFiles(); - assertThat(buckets).isNotNull(); - assertThat(buckets).hasSize(1); - - final File[] partFiles = buckets[0].listFiles(); - assertThat(partFiles).isNotNull(); - - for (File partFile : partFiles) { - assertThat(partFile.length()).isGreaterThan(0); - - OrcFile.ReaderOptions readerOptions = OrcFile.readerOptions(new Configuration()); - Reader reader = - OrcFile.createReader( - new org.apache.hadoop.fs.Path(partFile.toURI()), readerOptions); - - assertThat(reader.getNumberOfRows()).isEqualTo(3); - assertThat(reader.getSchema().getFieldNames()).hasSize(2); - assertThat(reader.getCompressionKind()).isSameAs(CompressionKind.LZ4); - assertThat(reader.hasMetadataValue(USER_METADATA_KEY)).isTrue(); - assertThat(reader.getMetadataKeys()).contains(USER_METADATA_KEY); - - List results = getResults(reader); - - assertThat(results).hasSize(3).isEqualTo(expected); - } - } - - private static List getResults(Reader reader) throws IOException { - List results = new ArrayList<>(); - - RecordReader recordReader = reader.rows(); - VectorizedRowBatch batch = reader.getSchema().createRowBatch(); - - while (recordReader.nextBatch(batch)) { - BytesColumnVector stringVector = (BytesColumnVector) batch.cols[0]; - LongColumnVector intVector = (LongColumnVector) batch.cols[1]; - for (int r = 0; r < batch.size; r++) { - String name = - new String( - stringVector.vector[r], - stringVector.start[r], - stringVector.length[r]); - int age = (int) intVector.vector[r]; - - results.add(new Record(name, age)); - } - recordReader.close(); - } - - return results; - } -} diff --git a/paimon-format/src/test/java/org/apache/paimon/format/orc/RecordVectorizer.java b/paimon-format/src/test/java/org/apache/paimon/format/orc/RecordVectorizer.java deleted file mode 100644 index 624fa6cc1838..000000000000 --- a/paimon-format/src/test/java/org/apache/paimon/format/orc/RecordVectorizer.java +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.paimon.format.orc; - -import org.apache.paimon.format.orc.writer.Vectorizer; - -import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; - -import java.io.IOException; -import java.io.Serializable; -import java.nio.charset.StandardCharsets; - -/** - * A Vectorizer implementation used for tests. - * - *

It transforms an input element which is of type {@link Record} to a VectorizedRowBatch. - */ -public class RecordVectorizer extends Vectorizer implements Serializable { - - public RecordVectorizer(String schema) { - super(schema); - } - - @Override - public void vectorize(Record element, VectorizedRowBatch batch) throws IOException { - BytesColumnVector stringVector = (BytesColumnVector) batch.cols[0]; - LongColumnVector intColVector = (LongColumnVector) batch.cols[1]; - - int row = batch.size++; - - stringVector.setVal(row, element.getName().getBytes(StandardCharsets.UTF_8)); - intColVector.vector[row] = element.getAge(); - - this.addUserMetadata( - OrcBulkWriterTestUtil.USER_METADATA_KEY, OrcBulkWriterTestUtil.USER_METADATA_VALUE); - } -}