Skip to content

Commit

Permalink
[core][spark] Fix comment consistency in the schema (apache#2600)
Browse files Browse the repository at this point in the history
  • Loading branch information
Zouxxyy authored Jan 2, 2024
1 parent 81bc13e commit 95d8006
Show file tree
Hide file tree
Showing 10 changed files with 185 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,7 @@ public Builder option(String key, String value) {
}

/** Declares table comment. */
public Builder comment(String comment) {
public Builder comment(@Nullable String comment) {
this.comment = comment;
return this;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@ static SchemaChange removeOption(String key) {
return new RemoveOption(key);
}

static SchemaChange updateComment(@Nullable String comment) {
return new UpdateComment(comment);
}

static SchemaChange addColumn(String fieldName, DataType dataType) {
return addColumn(fieldName, dataType, null, null);
}
Expand Down Expand Up @@ -159,6 +163,40 @@ public int hashCode() {
}
}

/** A SchemaChange to Update table comment. */
final class UpdateComment implements SchemaChange {

private static final long serialVersionUID = 1L;

// If comment is null, means to remove comment
private final @Nullable String comment;

private UpdateComment(@Nullable String comment) {
this.comment = comment;
}

public @Nullable String comment() {
return comment;
}

@Override
public boolean equals(Object object) {
if (this == object) {
return true;
}
if (object == null || getClass() != object.getClass()) {
return false;
}
UpdateComment that = (UpdateComment) object;
return Objects.equals(comment, that.comment);
}

@Override
public int hashCode() {
return Objects.hash(comment);
}
}

/** A SchemaChange to add a field. */
final class AddColumn implements SchemaChange {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import org.apache.paimon.schema.SchemaChange.UpdateColumnNullability;
import org.apache.paimon.schema.SchemaChange.UpdateColumnPosition;
import org.apache.paimon.schema.SchemaChange.UpdateColumnType;
import org.apache.paimon.schema.SchemaChange.UpdateComment;
import org.apache.paimon.types.DataField;
import org.apache.paimon.types.DataType;
import org.apache.paimon.types.DataTypeCasts;
Expand Down Expand Up @@ -165,6 +166,7 @@ public TableSchema commitChanges(List<SchemaChange> changes)
Map<String, String> newOptions = new HashMap<>(schema.options());
List<DataField> newFields = new ArrayList<>(schema.fields());
AtomicInteger highestFieldId = new AtomicInteger(schema.highestFieldId());
String newComment = schema.comment();
for (SchemaChange change : changes) {
if (change instanceof SetOption) {
SetOption setOption = (SetOption) change;
Expand All @@ -174,6 +176,9 @@ public TableSchema commitChanges(List<SchemaChange> changes)
RemoveOption removeOption = (RemoveOption) change;
checkAlterTableOption(removeOption.key());
newOptions.remove(removeOption.key());
} else if (change instanceof UpdateComment) {
UpdateComment updateComment = (UpdateComment) change;
newComment = updateComment.comment();
} else if (change instanceof AddColumn) {
AddColumn addColumn = (AddColumn) change;
SchemaChange.Move move = addColumn.move();
Expand Down Expand Up @@ -341,7 +346,7 @@ public TableSchema commitChanges(List<SchemaChange> changes)
schema.partitionKeys(),
schema.primaryKeys(),
newOptions,
schema.comment());
newComment);

try {
boolean success = commit(newSchema);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
import org.apache.paimon.types.DataTypeJsonParser;
import org.apache.paimon.utils.JsonDeserializer;
import org.apache.paimon.utils.JsonSerializer;
import org.apache.paimon.utils.StringUtils;

import org.apache.paimon.shade.jackson2.com.fasterxml.jackson.core.JsonGenerator;
import org.apache.paimon.shade.jackson2.com.fasterxml.jackson.databind.JsonNode;
Expand Down Expand Up @@ -72,7 +71,7 @@ public void serialize(TableSchema tableSchema, JsonGenerator generator) throws I
}
generator.writeEndObject();

if (!StringUtils.isNullOrWhitespaceOnly(tableSchema.comment())) {
if (tableSchema.comment() != null) {
generator.writeStringField("comment", tableSchema.comment());
}

Expand Down Expand Up @@ -114,7 +113,7 @@ public TableSchema deserialize(JsonNode node) {
}

JsonNode commentNode = node.get("comment");
String comment = "";
String comment = null;
if (commentNode != null) {
comment = commentNode.asText();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
import org.apache.paimon.utils.Preconditions;
import org.apache.paimon.utils.StringUtils;

import javax.annotation.Nullable;

import java.io.Serializable;
import java.util.Arrays;
import java.util.Collections;
Expand Down Expand Up @@ -56,7 +58,7 @@ public class TableSchema implements Serializable {

private final Map<String, String> options;

private final String comment;
private final @Nullable String comment;

private final long timeMillis;

Expand All @@ -67,7 +69,7 @@ public TableSchema(
List<String> partitionKeys,
List<String> primaryKeys,
Map<String, String> options,
String comment) {
@Nullable String comment) {
this(
id,
fields,
Expand All @@ -86,7 +88,7 @@ public TableSchema(
List<String> partitionKeys,
List<String> primaryKeys,
Map<String, String> options,
String comment,
@Nullable String comment,
long timeMillis) {
this.id = id;
this.fields = fields;
Expand Down Expand Up @@ -205,7 +207,7 @@ private boolean containsAll(List<String> all, List<String> contains) {
return new HashSet<>(all).containsAll(new HashSet<>(contains));
}

public String comment() {
public @Nullable String comment() {
return comment;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -801,4 +801,35 @@ public void testAlterTableUpdateColumnNullability() throws Exception {
UnsupportedOperationException.class,
"Cannot change nullability of primary key"));
}

@Test
public void testAlterTableUpdateComment() throws Exception {
catalog.createDatabase("test_db", false);

Identifier identifier = Identifier.create("test_db", "test_table");
catalog.createTable(
identifier,
new Schema(
Lists.newArrayList(
new DataField(0, "col1", DataTypes.STRING(), "field1"),
new DataField(1, "col2", DataTypes.STRING(), "field2")),
Collections.emptyList(),
Collections.emptyList(),
Maps.newHashMap(),
"comment"),
false);

catalog.alterTable(
identifier, Lists.newArrayList(SchemaChange.updateComment("new comment")), false);

Table table = catalog.getTable(identifier);
assertThat(table.comment().isPresent() && table.comment().get().equals("new comment"))
.isTrue();

// drop comment
catalog.alterTable(identifier, Lists.newArrayList(SchemaChange.updateComment(null)), false);

table = catalog.getTable(identifier);
assertThat(table.comment().isPresent()).isFalse();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ public void testSerialization() throws Exception {
runTest(SchemaChange.updateColumnNullability(new String[] {"col1", "col2"}, true));
runTest(SchemaChange.updateColumnComment(new String[] {"col1", "col2"}, "comment"));
runTest(SchemaChange.updateColumnPosition(SchemaChange.Move.after("col", "ref")));
runTest(SchemaChange.updateComment("comment"));
}

private void runTest(SchemaChange schemaChange) throws Exception {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,7 @@ public SparkTable createTable(
throws TableAlreadyExistsException, NoSuchNamespaceException {
try {
catalog.createTable(
toIdentifier(ident), toUpdateSchema(schema, partitions, properties), false);
toIdentifier(ident), toInitialSchema(schema, partitions, properties), false);
return loadTable(ident);
} catch (Catalog.TableAlreadyExistException e) {
throw new TableAlreadyExistsException(ident);
Expand All @@ -318,11 +318,19 @@ private SchemaChange toSchemaChange(TableChange change) {
if (change instanceof TableChange.SetProperty) {
TableChange.SetProperty set = (TableChange.SetProperty) change;
validateAlterProperty(set.property());
return SchemaChange.setOption(set.property(), set.value());
if (set.property().equals(TableCatalog.PROP_COMMENT)) {
return SchemaChange.updateComment(set.value());
} else {
return SchemaChange.setOption(set.property(), set.value());
}
} else if (change instanceof TableChange.RemoveProperty) {
TableChange.RemoveProperty remove = (TableChange.RemoveProperty) change;
validateAlterProperty(remove.property());
return SchemaChange.removeOption(remove.property());
if (remove.property().equals(TableCatalog.PROP_COMMENT)) {
return SchemaChange.updateComment(null);
} else {
return SchemaChange.removeOption(remove.property());
}
} else if (change instanceof TableChange.AddColumn) {
TableChange.AddColumn add = (TableChange.AddColumn) change;
validateAlterNestedField(add.fieldNames());
Expand Down Expand Up @@ -375,7 +383,7 @@ private static SchemaChange.Move getMove(
return move;
}

private Schema toUpdateSchema(
private Schema toInitialSchema(
StructType schema, Transform[] partitions, Map<String, String> properties) {
Preconditions.checkArgument(
Arrays.stream(partitions)
Expand All @@ -387,6 +395,7 @@ private Schema toUpdateSchema(
}));
Map<String, String> normalizedProperties = new HashMap<>(properties);
normalizedProperties.remove(PRIMARY_KEY_IDENTIFIER);
normalizedProperties.remove(TableCatalog.PROP_COMMENT);
String pkAsString = properties.get(PRIMARY_KEY_IDENTIFIER);
List<String> primaryKeys =
pkAsString == null
Expand All @@ -402,7 +411,7 @@ private Schema toUpdateSchema(
Arrays.stream(partitions)
.map(partition -> partition.references()[0].describe())
.collect(Collectors.toList()))
.comment(properties.getOrDefault(TableCatalog.PROP_COMMENT, ""));
.comment(properties.getOrDefault(TableCatalog.PROP_COMMENT, null));

for (StructField field : schema.fields()) {
schemaBuilder.column(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,9 @@ public Map<String, String> properties() {
CoreOptions.PRIMARY_KEY.key(), String.join(",", table.primaryKeys()));
}
properties.put(TableCatalog.PROP_PROVIDER, SparkSource.NAME());
if (table.comment().isPresent()) {
properties.put(TableCatalog.PROP_COMMENT, table.comment().get());
}
return properties;
} else {
return Collections.emptyMap();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.paimon.spark.sql

import org.apache.paimon.spark.PaimonSparkTestBase

import org.apache.spark.sql.Row
import org.junit.jupiter.api.Assertions

import java.util.Objects

class DescribeTableTest extends PaimonSparkTestBase {

test(s"Paimon describe: describe table comment") {
var comment = "test comment"
spark.sql(s"""
|CREATE TABLE T (
| id INT COMMENT 'id comment',
| name STRING,
| dt STRING)
|COMMENT '$comment'
|""".stripMargin)
checkTableCommentEqual("T", comment)

comment = "new comment"
spark.sql(s"ALTER TABLE T SET TBLPROPERTIES ('comment' = '$comment')")
checkTableCommentEqual("T", comment)

comment = " "
spark.sql(s"ALTER TABLE T SET TBLPROPERTIES ('comment' = '$comment')")
checkTableCommentEqual("T", comment)

comment = ""
spark.sql(s"ALTER TABLE T SET TBLPROPERTIES ('comment' = '$comment')")
checkTableCommentEqual("T", comment)

spark.sql(s"ALTER TABLE T UNSET TBLPROPERTIES ('comment')")
checkTableCommentEqual("T", null)

comment = "new comment"
spark.sql(s"ALTER TABLE T SET TBLPROPERTIES ('comment' = '$comment')")
checkTableCommentEqual("T", comment)
}

test(s"Paimon describe: describe table with no comment") {
spark.sql(s"""
|CREATE TABLE T (
| id INT COMMENT 'id comment',
| name STRING,
| dt STRING)
|""".stripMargin)
checkTableCommentEqual("T", null)
}

def checkTableCommentEqual(tableName: String, comment: String): Unit = {
// check describe table
checkAnswer(
spark
.sql(s"DESCRIBE TABLE EXTENDED $tableName")
.filter("col_name = 'Comment'")
.select("col_name", "data_type"),
if (comment == null) Nil else Row("Comment", comment) :: Nil
)

// check comment in schema
Assertions.assertTrue(Objects.equals(comment, loadTable(tableName).schema().comment()))
}
}

0 comments on commit 95d8006

Please sign in to comment.