From 6180ed6f7fbd7651619d18f77f763510e3284f6f Mon Sep 17 00:00:00 2001 From: yuqi Date: Fri, 13 Sep 2024 19:29:00 +0800 Subject: [PATCH 01/17] Support S3 filesystem for Paimon catalog. --- .../catalog-lakehouse-paimon/build.gradle.kts | 4 + .../paimon/PaimonCatalogOperations.java | 5 +- .../PaimonCatalogPropertiesMetadata.java | 12 ++ .../paimon/filesystem/FileSystemType.java | 47 ++++++++ .../s3/PaimonS3FileSystemConfig.java | 104 ++++++++++++++++++ .../lakehouse/paimon/utils/CatalogUtils.java | 34 ++++++ .../integration/test/CatalogPaimonBaseIT.java | 6 +- .../integration/test/CatalogPaimonS3IT.java | 102 +++++++++++++++++ gradle/libs.versions.toml | 6 + 9 files changed, 318 insertions(+), 2 deletions(-) create mode 100644 catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/filesystem/FileSystemType.java create mode 100644 catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/filesystem/s3/PaimonS3FileSystemConfig.java create mode 100644 catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/integration/test/CatalogPaimonS3IT.java diff --git a/catalogs/catalog-lakehouse-paimon/build.gradle.kts b/catalogs/catalog-lakehouse-paimon/build.gradle.kts index 3974fba61f3..0d6c75907fa 100644 --- a/catalogs/catalog-lakehouse-paimon/build.gradle.kts +++ b/catalogs/catalog-lakehouse-paimon/build.gradle.kts @@ -82,6 +82,10 @@ dependencies { testImplementation(libs.bundles.log4j) testImplementation(libs.junit.jupiter.params) testImplementation(libs.testcontainers) + testImplementation(libs.paimon.s3) + testImplementation(libs.paimon.spark) + testImplementation(libs.hadoop.aws) + testImplementation(libs.testcontainers.localstack) testRuntimeOnly(libs.junit.jupiter.engine) } diff --git a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonCatalogOperations.java b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonCatalogOperations.java index fbe6f4be49d..3188cd62cbe 100644 --- a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonCatalogOperations.java +++ b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonCatalogOperations.java @@ -39,6 +39,7 @@ import org.apache.gravitino.Namespace; import org.apache.gravitino.SchemaChange; import org.apache.gravitino.catalog.lakehouse.paimon.ops.PaimonCatalogOps; +import org.apache.gravitino.catalog.lakehouse.paimon.utils.CatalogUtils; import org.apache.gravitino.catalog.lakehouse.paimon.utils.TableOpsUtils; import org.apache.gravitino.connector.CatalogInfo; import org.apache.gravitino.connector.CatalogOperations; @@ -114,7 +115,9 @@ public void initialize( Map resultConf = Maps.newHashMap(prefixMap); resultConf.putAll(gravitinoConfig); - this.paimonCatalogOps = new PaimonCatalogOps(new PaimonConfig(resultConf)); + PaimonConfig paimonConfig = new PaimonConfig(resultConf); + CatalogUtils.checkWarehouseConfig(paimonConfig, resultConf); + this.paimonCatalogOps = new PaimonCatalogOps(paimonConfig); } /** diff --git a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonCatalogPropertiesMetadata.java b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonCatalogPropertiesMetadata.java index 9b7d45c77bd..5d2ba9e753f 100644 --- a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonCatalogPropertiesMetadata.java +++ b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonCatalogPropertiesMetadata.java @@ -31,6 +31,7 @@ import java.util.Map; import org.apache.gravitino.catalog.lakehouse.paimon.authentication.AuthenticationConfig; import org.apache.gravitino.catalog.lakehouse.paimon.authentication.kerberos.KerberosConfig; +import org.apache.gravitino.catalog.lakehouse.paimon.filesystem.s3.PaimonS3FileSystemConfig; import org.apache.gravitino.connector.BaseCatalogPropertiesMetadata; import org.apache.gravitino.connector.PropertiesMetadata; import org.apache.gravitino.connector.PropertyEntry; @@ -61,6 +62,12 @@ public class PaimonCatalogPropertiesMetadata extends BaseCatalogPropertiesMetada AuthenticationConfig.AUTH_TYPE_KEY, AuthenticationConfig.AUTH_TYPE_KEY); + private static final Map S3_CONFIGURATION = + ImmutableMap.of( + PaimonS3FileSystemConfig.S3_ACCESS_KEY, PaimonS3FileSystemConfig.S3_ACCESS_KEY, + PaimonS3FileSystemConfig.S3_SECRET_KEY, PaimonS3FileSystemConfig.S3_SECRET_KEY, + PaimonS3FileSystemConfig.S3_ENDPOINT, PaimonS3FileSystemConfig.S3_ENDPOINT); + static { List> propertyEntries = ImmutableList.of( @@ -88,6 +95,7 @@ public class PaimonCatalogPropertiesMetadata extends BaseCatalogPropertiesMetada result.putAll(Maps.uniqueIndex(propertyEntries, PropertyEntry::getName)); result.putAll(KerberosConfig.KERBEROS_PROPERTY_ENTRIES); result.putAll(AuthenticationConfig.AUTHENTICATION_PROPERTY_ENTRIES); + result.putAll(PaimonS3FileSystemConfig.S3_FILESYSTEM_PROPERTY_ENTRIES); PROPERTIES_METADATA = ImmutableMap.copyOf(result); } @@ -107,6 +115,10 @@ protected Map transformProperties(Map properties if (KERBEROS_CONFIGURATION.containsKey(key)) { gravitinoConfig.put(KERBEROS_CONFIGURATION.get(key), value); } + + if (S3_CONFIGURATION.containsKey(key)) { + gravitinoConfig.put(S3_CONFIGURATION.get(key), value); + } }); return gravitinoConfig; } diff --git a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/filesystem/FileSystemType.java b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/filesystem/FileSystemType.java new file mode 100644 index 00000000000..eb531a31dd0 --- /dev/null +++ b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/filesystem/FileSystemType.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.catalog.lakehouse.paimon.filesystem; + +public enum FileSystemType { + HDFS, + S3, + OSS; + + public static FileSystemType fromString(String type) { + for (FileSystemType fileSystemType : FileSystemType.values()) { + if (fileSystemType.name().equalsIgnoreCase(type)) { + return fileSystemType; + } + } + + throw new IllegalArgumentException("Unsupported file system type: " + type); + } + + public static FileSystemType fromStoragePath(String storagePath) { + if (storagePath.startsWith("s3://")) { + return S3; + } else if (storagePath.startsWith("oss://")) { + return OSS; + } else if (storagePath.startsWith("hdfs://")) { + return HDFS; + } + + throw new IllegalArgumentException("Unsupported storage path: " + storagePath); + } +} diff --git a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/filesystem/s3/PaimonS3FileSystemConfig.java b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/filesystem/s3/PaimonS3FileSystemConfig.java new file mode 100644 index 00000000000..0b47a703908 --- /dev/null +++ b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/filesystem/s3/PaimonS3FileSystemConfig.java @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.catalog.lakehouse.paimon.filesystem.s3; + +import com.google.common.collect.ImmutableMap; +import java.util.Map; +import org.apache.commons.lang3.StringUtils; +import org.apache.gravitino.Config; +import org.apache.gravitino.config.ConfigBuilder; +import org.apache.gravitino.config.ConfigConstants; +import org.apache.gravitino.config.ConfigEntry; +import org.apache.gravitino.connector.PropertyEntry; + +public class PaimonS3FileSystemConfig extends Config { + // S3 related properties + public static final String S3_ENDPOINT = "s3.endpoint"; + public static final String S3_ACCESS_KEY = "s3.access-key"; + public static final String S3_SECRET_KEY = "s3.secret-key"; + + public PaimonS3FileSystemConfig(Map properties) { + super(false); + loadFromMap(properties, k -> true); + } + + public static final ConfigEntry PAIMON_S3_ENDPOINT_ENTRY = + new ConfigBuilder(S3_ENDPOINT) + .doc("The endpoint of the AWS s3") + .version(ConfigConstants.VERSION_0_7_0) + .stringConf() + .checkValue(StringUtils::isNotBlank, ConfigConstants.NOT_BLANK_ERROR_MSG) + .create(); + + public static final ConfigEntry PAIMON_S3_ACCESS_KEY_ENTRY = + new ConfigBuilder(S3_ACCESS_KEY) + .doc("The access key of the AWS s3") + .version(ConfigConstants.VERSION_0_7_0) + .stringConf() + .checkValue(StringUtils::isNotBlank, ConfigConstants.NOT_BLANK_ERROR_MSG) + .create(); + + public static final ConfigEntry PAIMON_S3_SECRET_KEY_ENTRY = + new ConfigBuilder(S3_SECRET_KEY) + .doc("The secret key of the AWS s3") + .version(ConfigConstants.VERSION_0_7_0) + .stringConf() + .checkValue(StringUtils::isNotBlank, ConfigConstants.NOT_BLANK_ERROR_MSG) + .create(); + + public String getS3Endpoint() { + return get(PAIMON_S3_ENDPOINT_ENTRY); + } + + public String getS3AccessKey() { + return get(PAIMON_S3_ACCESS_KEY_ENTRY); + } + + public String getS3SecretKey() { + return get(PAIMON_S3_SECRET_KEY_ENTRY); + } + + public static final Map> S3_FILESYSTEM_PROPERTY_ENTRIES = + new ImmutableMap.Builder>() + .put( + S3_ENDPOINT, + PropertyEntry.stringOptionalPropertyEntry( + S3_ENDPOINT, + "The endpoint of the AWS s3", + true /* immutable */, + null /* defaultValue */, + false /* hidden */)) + .put( + S3_ACCESS_KEY, + PropertyEntry.stringOptionalPropertyEntry( + S3_ACCESS_KEY, + "The access key of the AWS s3", + true /* immutable */, + null /* defaultValue */, + false /* hidden */)) + .put( + S3_SECRET_KEY, + PropertyEntry.stringOptionalPropertyEntry( + S3_SECRET_KEY, + "The secret key of the AWS s3", + true /* immutable */, + null /* defaultValue */, + false /* hidden */)) + .build(); +} diff --git a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/utils/CatalogUtils.java b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/utils/CatalogUtils.java index 848063904f4..87f28a4f6af 100644 --- a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/utils/CatalogUtils.java +++ b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/utils/CatalogUtils.java @@ -33,6 +33,8 @@ import org.apache.gravitino.catalog.lakehouse.paimon.PaimonConfig; import org.apache.gravitino.catalog.lakehouse.paimon.authentication.AuthenticationConfig; import org.apache.gravitino.catalog.lakehouse.paimon.authentication.kerberos.KerberosClient; +import org.apache.gravitino.catalog.lakehouse.paimon.filesystem.FileSystemType; +import org.apache.gravitino.catalog.lakehouse.paimon.filesystem.s3.PaimonS3FileSystemConfig; import org.apache.gravitino.catalog.lakehouse.paimon.ops.PaimonBackendCatalogWrapper; import org.apache.hadoop.conf.Configuration; import org.apache.paimon.catalog.Catalog; @@ -120,4 +122,36 @@ private static void checkPaimonConfig(PaimonConfig paimonConfig) { StringUtils.isNotBlank(uri), "Paimon Catalog uri can not be null or empty."); } } + + public static void checkWarehouseConfig( + PaimonConfig paimonConfig, Map resultConf) { + String warehouse = paimonConfig.get(CATALOG_WAREHOUSE); + Preconditions.checkArgument( + StringUtils.isNotBlank(warehouse), "Paimon Catalog warehouse can not be null or empty."); + + FileSystemType fileSystemType = FileSystemType.fromStoragePath(warehouse); + switch (fileSystemType) { + case S3: + checkS3FileSystemConfig(resultConf); + break; + case HDFS: + case OSS: + break; + default: + throw new IllegalArgumentException("Unsupported file system type: " + fileSystemType); + } + } + + private static void checkS3FileSystemConfig(Map resultConf) { + PaimonS3FileSystemConfig s3FileSystemConfig = new PaimonS3FileSystemConfig(resultConf); + Preconditions.checkArgument( + StringUtils.isNotBlank(s3FileSystemConfig.getS3AccessKey()), + "S3 access key can not be null or empty."); + Preconditions.checkArgument( + StringUtils.isNotBlank(s3FileSystemConfig.getS3SecretKey()), + "S3 secret key can not be null or empty."); + Preconditions.checkArgument( + StringUtils.isNotBlank(s3FileSystemConfig.getS3Endpoint()), + "S3 endpoint can not be null or empty."); + } } diff --git a/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/integration/test/CatalogPaimonBaseIT.java b/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/integration/test/CatalogPaimonBaseIT.java index e2cb6d6e168..07a52735ee0 100644 --- a/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/integration/test/CatalogPaimonBaseIT.java +++ b/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/integration/test/CatalogPaimonBaseIT.java @@ -114,7 +114,7 @@ public abstract class CatalogPaimonBaseIT extends AbstractIT { @BeforeAll public void startup() { - containerSuite.startHiveContainer(); + startNecessaryContainers(); catalogProperties = initPaimonCatalogProperties(); createMetalake(); createCatalog(); @@ -122,6 +122,10 @@ public void startup() { initSparkEnv(); } + protected void startNecessaryContainers() { + containerSuite.startHiveContainer(); + } + @AfterAll public void stop() { clearTableAndSchema(); diff --git a/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/integration/test/CatalogPaimonS3IT.java b/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/integration/test/CatalogPaimonS3IT.java new file mode 100644 index 00000000000..933ca241734 --- /dev/null +++ b/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/integration/test/CatalogPaimonS3IT.java @@ -0,0 +1,102 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.gravitino.catalog.lakehouse.paimon.integration.test; + +import static org.testcontainers.containers.localstack.LocalStackContainer.Service.S3; + +import com.google.common.collect.Maps; +import java.util.Map; +import java.util.concurrent.TimeUnit; +import org.apache.gravitino.catalog.lakehouse.paimon.PaimonCatalogPropertiesMetadata; +import org.apache.gravitino.catalog.lakehouse.paimon.filesystem.s3.PaimonS3FileSystemConfig; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Tag; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInstance; +import org.testcontainers.containers.Container; +import org.testcontainers.containers.localstack.LocalStackContainer; +import org.testcontainers.shaded.org.awaitility.Awaitility; +import org.testcontainers.utility.DockerImageName; + +@Tag("gravitino-docker-test") +@TestInstance(TestInstance.Lifecycle.PER_CLASS) +public class CatalogPaimonS3IT extends CatalogPaimonBaseIT { + + // private static final String S3_BUCKET_NAME = + // GravitinoITUtils.genRandomName("paimon-s3-bucket-"); + private static final String S3_BUCKET_NAME = "my-test-bucket"; + private static LocalStackContainer localStackContainer; + + @Override + protected Map initPaimonCatalogProperties() { + + Map catalogProperties = Maps.newHashMap(); + catalogProperties.put("key1", "val1"); + catalogProperties.put("key2", "val2"); + + TYPE = "filesystem"; + WAREHOUSE = "s3://" + S3_BUCKET_NAME + "/"; + + String accessKey = localStackContainer.getAccessKey(); + String secretKey = localStackContainer.getSecretKey(); + String endpoint = localStackContainer.getEndpointOverride(S3).toString(); + + catalogProperties.put(PaimonCatalogPropertiesMetadata.GRAVITINO_CATALOG_BACKEND, TYPE); + catalogProperties.put(PaimonCatalogPropertiesMetadata.WAREHOUSE, WAREHOUSE); + catalogProperties.put(PaimonS3FileSystemConfig.S3_ACCESS_KEY, accessKey); + catalogProperties.put(PaimonS3FileSystemConfig.S3_SECRET_KEY, secretKey); + catalogProperties.put(PaimonS3FileSystemConfig.S3_ENDPOINT, endpoint); + + return catalogProperties; + } + + @Override + protected void startNecessaryContainers() { + localStackContainer = + new LocalStackContainer(DockerImageName.parse("localstack/localstack")).withServices(S3); + localStackContainer.start(); + + Awaitility.await() + .atMost(60, TimeUnit.SECONDS) + .pollInterval(1, TimeUnit.SECONDS) + .until( + () -> { + try { + Container.ExecResult result = + localStackContainer.execInContainer( + "awslocal", "s3", "mb", "s3://" + S3_BUCKET_NAME); + return result.getExitCode() == 0; + } catch (Exception e) { + return false; + } + }); + } + + @AfterAll + public void stop() { + super.stop(); + localStackContainer.stop(); + } + + @Test + void testOperationDataOfPaimonTable() { + // Something wrong to use spark to read data from paimon with s3 + } +} diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index 4efb10eb220..1be30a02e50 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -97,6 +97,7 @@ mail = "1.4.1" rome = "1.0" jettison = "1.1" thrift = "0.12.0" +hadoop-s3 = "3.3.6" [libraries] protobuf-java = { group = "com.google.protobuf", name = "protobuf-java", version.ref = "protoc" } @@ -144,6 +145,7 @@ hadoop3-hdfs = { group = "org.apache.hadoop", name = "hadoop-hdfs", version.ref hadoop3-common = { group = "org.apache.hadoop", name = "hadoop-common", version.ref = "hadoop3"} hadoop3-client = { group = "org.apache.hadoop", name = "hadoop-client", version.ref = "hadoop3"} hadoop3-minicluster = { group = "org.apache.hadoop", name = "hadoop-minicluster", version.ref = "hadoop-minikdc"} +hadoop-aws = { group = "org.apache.hadoop", name = "hadoop-aws", version.ref = "hadoop-s3"} htrace-core4 = { group = "org.apache.htrace", name = "htrace-core4", version.ref = "htrace-core4" } airlift-json = { group = "io.airlift", name = "json", version.ref = "airlift-json"} airlift-resolver = { group = "io.airlift.resolver", name = "resolver", version.ref = "airlift-resolver"} @@ -166,6 +168,9 @@ iceberg-gcp = { group = "org.apache.iceberg", name = "iceberg-gcp", version.ref paimon-core = { group = "org.apache.paimon", name = "paimon-core", version.ref = "paimon" } paimon-format = { group = "org.apache.paimon", name = "paimon-format", version.ref = "paimon" } paimon-hive-catalog = { group = "org.apache.paimon", name = "paimon-hive-catalog", version.ref = "paimon" } +paimon-s3 = { group = "org.apache.paimon", name = "paimon-s3", version.ref = "paimon" } +paimon-spark = { group = "org.apache.paimon", name = "paimon-spark", version.ref = "paimon" } + trino-spi= { group = "io.trino", name = "trino-spi", version.ref = "trino" } trino-testing= { group = "io.trino", name = "trino-testing", version.ref = "trino" } trino-memory= { group = "io.trino", name = "trino-memory", version.ref = "trino" } @@ -177,6 +182,7 @@ testcontainers = { group = "org.testcontainers", name = "testcontainers", versio testcontainers-mysql = { group = "org.testcontainers", name = "mysql", version.ref = "testcontainers" } testcontainers-postgresql = { group = "org.testcontainers", name = "postgresql", version.ref = "testcontainers" } testcontainers-junit-jupiter = { group = "org.testcontainers", name = "junit-jupiter", version.ref = "testcontainers" } +testcontainers-localstack = { group = "org.testcontainers", name = "localstack", version.ref = "testcontainers" } trino-jdbc = { group = "io.trino", name = "trino-jdbc", version.ref = "trino" } jwt-api = { group = "io.jsonwebtoken", name = "jjwt-api", version.ref = "jwt"} jwt-impl = { group = "io.jsonwebtoken", name = "jjwt-impl", version.ref = "jwt"} From 69e8b2840600f633ce6af09f59ee88888b2c4d68 Mon Sep 17 00:00:00 2001 From: yuqi Date: Fri, 13 Sep 2024 19:31:45 +0800 Subject: [PATCH 02/17] Fix --- .../paimon/filesystem/s3/PaimonS3FileSystemConfig.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/filesystem/s3/PaimonS3FileSystemConfig.java b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/filesystem/s3/PaimonS3FileSystemConfig.java index 0b47a703908..0190d6730be 100644 --- a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/filesystem/s3/PaimonS3FileSystemConfig.java +++ b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/filesystem/s3/PaimonS3FileSystemConfig.java @@ -81,7 +81,7 @@ public String getS3SecretKey() { PropertyEntry.stringOptionalPropertyEntry( S3_ENDPOINT, "The endpoint of the AWS s3", - true /* immutable */, + false/* immutable */, null /* defaultValue */, false /* hidden */)) .put( @@ -89,7 +89,7 @@ public String getS3SecretKey() { PropertyEntry.stringOptionalPropertyEntry( S3_ACCESS_KEY, "The access key of the AWS s3", - true /* immutable */, + false /* immutable */, null /* defaultValue */, false /* hidden */)) .put( @@ -97,7 +97,7 @@ public String getS3SecretKey() { PropertyEntry.stringOptionalPropertyEntry( S3_SECRET_KEY, "The secret key of the AWS s3", - true /* immutable */, + false /* immutable */, null /* defaultValue */, false /* hidden */)) .build(); From 1ff547af433408a14649ac4f1085c41476bb2260 Mon Sep 17 00:00:00 2001 From: yuqi Date: Fri, 13 Sep 2024 19:55:53 +0800 Subject: [PATCH 03/17] Fix --- .../catalog-lakehouse-paimon/build.gradle.kts | 4 +-- .../s3/PaimonS3FileSystemConfig.java | 2 +- .../integration/test/CatalogPaimonBaseIT.java | 4 +-- .../integration/test/CatalogPaimonS3IT.java | 30 ++++++++++++++----- gradle/libs.versions.toml | 2 -- 5 files changed, 28 insertions(+), 14 deletions(-) diff --git a/catalogs/catalog-lakehouse-paimon/build.gradle.kts b/catalogs/catalog-lakehouse-paimon/build.gradle.kts index 0d6c75907fa..d1846361db7 100644 --- a/catalogs/catalog-lakehouse-paimon/build.gradle.kts +++ b/catalogs/catalog-lakehouse-paimon/build.gradle.kts @@ -81,11 +81,11 @@ dependencies { testImplementation(libs.postgresql.driver) testImplementation(libs.bundles.log4j) testImplementation(libs.junit.jupiter.params) - testImplementation(libs.testcontainers) testImplementation(libs.paimon.s3) testImplementation(libs.paimon.spark) - testImplementation(libs.hadoop.aws) + testImplementation(libs.testcontainers) testImplementation(libs.testcontainers.localstack) +// testImplementation(libs.hadoop.aws) testRuntimeOnly(libs.junit.jupiter.engine) } diff --git a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/filesystem/s3/PaimonS3FileSystemConfig.java b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/filesystem/s3/PaimonS3FileSystemConfig.java index 0190d6730be..1f6c61f69f1 100644 --- a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/filesystem/s3/PaimonS3FileSystemConfig.java +++ b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/filesystem/s3/PaimonS3FileSystemConfig.java @@ -81,7 +81,7 @@ public String getS3SecretKey() { PropertyEntry.stringOptionalPropertyEntry( S3_ENDPOINT, "The endpoint of the AWS s3", - false/* immutable */, + false /* immutable */, null /* defaultValue */, false /* hidden */)) .put( diff --git a/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/integration/test/CatalogPaimonBaseIT.java b/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/integration/test/CatalogPaimonBaseIT.java index 07a52735ee0..cb0a2223d8d 100644 --- a/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/integration/test/CatalogPaimonBaseIT.java +++ b/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/integration/test/CatalogPaimonBaseIT.java @@ -109,7 +109,7 @@ public abstract class CatalogPaimonBaseIT extends AbstractIT { private GravitinoMetalake metalake; private Catalog catalog; private org.apache.paimon.catalog.Catalog paimonCatalog; - private SparkSession spark; + protected SparkSession spark; private Map catalogProperties; @BeforeAll @@ -930,7 +930,7 @@ private Map createProperties() { return properties; } - private void initSparkEnv() { + protected void initSparkEnv() { spark = SparkSession.builder() .master("local[1]") diff --git a/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/integration/test/CatalogPaimonS3IT.java b/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/integration/test/CatalogPaimonS3IT.java index 933ca241734..d898edd510c 100644 --- a/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/integration/test/CatalogPaimonS3IT.java +++ b/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/integration/test/CatalogPaimonS3IT.java @@ -26,9 +26,9 @@ import java.util.concurrent.TimeUnit; import org.apache.gravitino.catalog.lakehouse.paimon.PaimonCatalogPropertiesMetadata; import org.apache.gravitino.catalog.lakehouse.paimon.filesystem.s3.PaimonS3FileSystemConfig; +import org.apache.spark.sql.SparkSession; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.Tag; -import org.junit.jupiter.api.Test; import org.junit.jupiter.api.TestInstance; import org.testcontainers.containers.Container; import org.testcontainers.containers.localstack.LocalStackContainer; @@ -43,6 +43,9 @@ public class CatalogPaimonS3IT extends CatalogPaimonBaseIT { // GravitinoITUtils.genRandomName("paimon-s3-bucket-"); private static final String S3_BUCKET_NAME = "my-test-bucket"; private static LocalStackContainer localStackContainer; + private String accessKey; + private String secretKey; + private String endpoint; @Override protected Map initPaimonCatalogProperties() { @@ -54,9 +57,9 @@ protected Map initPaimonCatalogProperties() { TYPE = "filesystem"; WAREHOUSE = "s3://" + S3_BUCKET_NAME + "/"; - String accessKey = localStackContainer.getAccessKey(); - String secretKey = localStackContainer.getSecretKey(); - String endpoint = localStackContainer.getEndpointOverride(S3).toString(); + accessKey = localStackContainer.getAccessKey(); + secretKey = localStackContainer.getSecretKey(); + endpoint = localStackContainer.getEndpointOverride(S3).toString(); catalogProperties.put(PaimonCatalogPropertiesMetadata.GRAVITINO_CATALOG_BACKEND, TYPE); catalogProperties.put(PaimonCatalogPropertiesMetadata.WAREHOUSE, WAREHOUSE); @@ -95,8 +98,21 @@ public void stop() { localStackContainer.stop(); } - @Test - void testOperationDataOfPaimonTable() { - // Something wrong to use spark to read data from paimon with s3 + protected void initSparkEnv() { + spark = + SparkSession.builder() + .master("local[1]") + .appName("Paimon Catalog integration test") + .config("spark.sql.warehouse.dir", WAREHOUSE) + .config("spark.sql.catalog.paimon", "org.apache.paimon.spark.SparkCatalog") + .config("spark.sql.catalog.paimon.warehouse", WAREHOUSE) + .config( + "spark.sql.extensions", + "org.apache.paimon.spark.extensions.PaimonSparkSessionExtensions") + .config("spark.sql.catalog.paimon.s3.access-key", accessKey) + .config("spark.sql.catalog.paimon.s3.secret-key", secretKey) + .config("spark.sql.catalog.paimon.s3.endpoint", endpoint) + .enableHiveSupport() + .getOrCreate(); } } diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index 1be30a02e50..0fbe92e8328 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -97,7 +97,6 @@ mail = "1.4.1" rome = "1.0" jettison = "1.1" thrift = "0.12.0" -hadoop-s3 = "3.3.6" [libraries] protobuf-java = { group = "com.google.protobuf", name = "protobuf-java", version.ref = "protoc" } @@ -145,7 +144,6 @@ hadoop3-hdfs = { group = "org.apache.hadoop", name = "hadoop-hdfs", version.ref hadoop3-common = { group = "org.apache.hadoop", name = "hadoop-common", version.ref = "hadoop3"} hadoop3-client = { group = "org.apache.hadoop", name = "hadoop-client", version.ref = "hadoop3"} hadoop3-minicluster = { group = "org.apache.hadoop", name = "hadoop-minicluster", version.ref = "hadoop-minikdc"} -hadoop-aws = { group = "org.apache.hadoop", name = "hadoop-aws", version.ref = "hadoop-s3"} htrace-core4 = { group = "org.apache.htrace", name = "htrace-core4", version.ref = "htrace-core4" } airlift-json = { group = "io.airlift", name = "json", version.ref = "airlift-json"} airlift-resolver = { group = "io.airlift.resolver", name = "resolver", version.ref = "airlift-resolver"} From f9bf3c1189f4e8c18c77b2139cc1064d337cd695 Mon Sep 17 00:00:00 2001 From: yuqi Date: Fri, 13 Sep 2024 20:05:22 +0800 Subject: [PATCH 04/17] Add some document. --- docs/lakehouse-paimon-catalog.md | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/docs/lakehouse-paimon-catalog.md b/docs/lakehouse-paimon-catalog.md index 6eabd3e8fcd..0cf3942684c 100644 --- a/docs/lakehouse-paimon-catalog.md +++ b/docs/lakehouse-paimon-catalog.md @@ -30,16 +30,19 @@ Builds with Apache Paimon `0.8.0`. ### Catalog properties -| Property name | Description | Default value | Required | Since Version | -|----------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------|-----------------------------------------------------------------|---------------| -| `catalog-backend` | Catalog backend of Gravitino Paimon catalog. Only supports `filesystem` now. | (none) | Yes | 0.6.0 | -| `uri` | The URI configuration of the Paimon catalog. `thrift://127.0.0.1:9083` or `jdbc:postgresql://127.0.0.1:5432/db_name` or `jdbc:mysql://127.0.0.1:3306/metastore_db`. It is optional for `FilesystemCatalog`. | (none) | required if the value of `catalog-backend` is not `filesystem`. | 0.6.0 | -| `warehouse` | Warehouse directory of catalog. `file:///user/hive/warehouse-paimon/` for local fs or `hdfs://namespace/hdfs/path` for HDFS. | (none) | Yes | 0.6.0 | -| `authentication.type` | The type of authentication for Paimon catalog backend, currently Gravitino only supports `Kerberos` and `simple`. | `simple` | No | 0.6.0 | -| `authentication.kerberos.principal` | The principal of the Kerberos authentication. | (none) | required if the value of `authentication.type` is Kerberos. | 0.6.0 | -| `authentication.kerberos.keytab-uri` | The URI of The keytab for the Kerberos authentication. | (none) | required if the value of `authentication.type` is Kerberos. | 0.6.0 | -| `authentication.kerberos.check-interval-sec` | The check interval of Kerberos credential for Paimon catalog. | 60 | No | 0.6.0 | -| `authentication.kerberos.keytab-fetch-timeout-sec` | The fetch timeout of retrieving Kerberos keytab from `authentication.kerberos.keytab-uri`. | 60 | No | 0.6.0 | +| Property name | Description | Default value | Required | Since Version | +|-----------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------|-----------------------------------------------------------------|---------------| +| `catalog-backend` | Catalog backend of Gravitino Paimon catalog. Only supports `filesystem` now. | (none) | Yes | 0.6.0 | +| `uri` | The URI configuration of the Paimon catalog. `thrift://127.0.0.1:9083` or `jdbc:postgresql://127.0.0.1:5432/db_name` or `jdbc:mysql://127.0.0.1:3306/metastore_db`. It is optional for `FilesystemCatalog`. | (none) | required if the value of `catalog-backend` is not `filesystem`. | 0.6.0 | +| `warehouse` | Warehouse directory of catalog. `file:///user/hive/warehouse-paimon/` for local fs, `hdfs://namespace/hdfs/path` for HDFS or `s3://{bucket-name}/path/` for S3 | (none) | Yes | 0.6.0 | +| `authentication.type` | The type of authentication for Paimon catalog backend, currently Gravitino only supports `Kerberos` and `simple`. | `simple` | No | 0.6.0 | +| `authentication.kerberos.principal` | The principal of the Kerberos authentication. | (none) | required if the value of `authentication.type` is Kerberos. | 0.6.0 | +| `authentication.kerberos.keytab-uri` | The URI of The keytab for the Kerberos authentication. | (none) | required if the value of `authentication.type` is Kerberos. | 0.6.0 | +| `authentication.kerberos.check-interval-sec` | The check interval of Kerberos credential for Paimon catalog. | 60 | No | 0.6.0 | +| `authentication.kerberos.keytab-fetch-timeout-sec` | The fetch timeout of retrieving Kerberos keytab from `authentication.kerberos.keytab-uri`. | 60 | No | 0.6.0 | +| `s3.endpoint` | The endpoint of the AWS s3. | (none) | required if the value of `warehouse` is a S3 path | 0.7.0 | +| `s3.access-key` | The access key of the AWS s3. | (none) | required if the value of `warehouse` is a S3 path | 0.7.0 | +| `s3.secret-key` | The secret key of the AWS s3. | (none) | required if the value of `warehouse` is a S3 path | 0.7.0 | Any properties not defined by Gravitino with `gravitino.bypass.` prefix will pass to Paimon catalog properties and HDFS configuration. For example, if specify `gravitino.bypass.table.type`, `table.type` will pass to Paimon catalog properties. From fe2cc8fc730bb8bd29bcfd27c38c4ea683a01278 Mon Sep 17 00:00:00 2001 From: yuqi Date: Fri, 13 Sep 2024 20:10:38 +0800 Subject: [PATCH 05/17] fix --- catalogs/catalog-lakehouse-paimon/build.gradle.kts | 1 - 1 file changed, 1 deletion(-) diff --git a/catalogs/catalog-lakehouse-paimon/build.gradle.kts b/catalogs/catalog-lakehouse-paimon/build.gradle.kts index d1846361db7..c0fe7bb663a 100644 --- a/catalogs/catalog-lakehouse-paimon/build.gradle.kts +++ b/catalogs/catalog-lakehouse-paimon/build.gradle.kts @@ -85,7 +85,6 @@ dependencies { testImplementation(libs.paimon.spark) testImplementation(libs.testcontainers) testImplementation(libs.testcontainers.localstack) -// testImplementation(libs.hadoop.aws) testRuntimeOnly(libs.junit.jupiter.engine) } From 48149dd55f46f41a766427a0940c6e48923041f3 Mon Sep 17 00:00:00 2001 From: yuqi Date: Fri, 13 Sep 2024 20:13:47 +0800 Subject: [PATCH 06/17] fix test error. --- .../catalog/lakehouse/paimon/filesystem/FileSystemType.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/filesystem/FileSystemType.java b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/filesystem/FileSystemType.java index eb531a31dd0..2f53fe729a5 100644 --- a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/filesystem/FileSystemType.java +++ b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/filesystem/FileSystemType.java @@ -19,6 +19,7 @@ package org.apache.gravitino.catalog.lakehouse.paimon.filesystem; public enum FileSystemType { + LOCAL_FILE, HDFS, S3, OSS; @@ -40,6 +41,8 @@ public static FileSystemType fromStoragePath(String storagePath) { return OSS; } else if (storagePath.startsWith("hdfs://")) { return HDFS; + } else if (storagePath.startsWith("/")) { + return LOCAL_FILE; } throw new IllegalArgumentException("Unsupported storage path: " + storagePath); From 6b040e7127f6a235aeda354f055ced576b9652da Mon Sep 17 00:00:00 2001 From: yuqi Date: Fri, 13 Sep 2024 20:31:47 +0800 Subject: [PATCH 07/17] FIX --- .../gravitino/catalog/lakehouse/paimon/utils/CatalogUtils.java | 1 + 1 file changed, 1 insertion(+) diff --git a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/utils/CatalogUtils.java b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/utils/CatalogUtils.java index 87f28a4f6af..a7ddf6803ad 100644 --- a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/utils/CatalogUtils.java +++ b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/utils/CatalogUtils.java @@ -136,6 +136,7 @@ public static void checkWarehouseConfig( break; case HDFS: case OSS: + case LOCAL_FILE: break; default: throw new IllegalArgumentException("Unsupported file system type: " + fileSystemType); From 1836a326fc55a1c1fc2130f1e32caa496698d4ef Mon Sep 17 00:00:00 2001 From: yuqi Date: Sat, 14 Sep 2024 09:54:37 +0800 Subject: [PATCH 08/17] Fix test error in deploy mode. --- .../integration/test/CatalogPaimonS3IT.java | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/integration/test/CatalogPaimonS3IT.java b/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/integration/test/CatalogPaimonS3IT.java index d898edd510c..e2f7fc99d5e 100644 --- a/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/integration/test/CatalogPaimonS3IT.java +++ b/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/integration/test/CatalogPaimonS3IT.java @@ -26,6 +26,8 @@ import java.util.concurrent.TimeUnit; import org.apache.gravitino.catalog.lakehouse.paimon.PaimonCatalogPropertiesMetadata; import org.apache.gravitino.catalog.lakehouse.paimon.filesystem.s3.PaimonS3FileSystemConfig; +import org.apache.gravitino.integration.test.util.ITUtils; +import org.apache.gravitino.integration.test.util.JdbcDriverDownloader; import org.apache.spark.sql.SparkSession; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.Tag; @@ -47,6 +49,9 @@ public class CatalogPaimonS3IT extends CatalogPaimonBaseIT { private String secretKey; private String endpoint; + private static final String PAIMON_S3_JAR_URL = + "https://repo1.maven.org/maven2/org/apache/paimon/paimon-s3/0.8.0/paimon-s3-0.8.0.jar"; + @Override protected Map initPaimonCatalogProperties() { @@ -67,9 +72,26 @@ protected Map initPaimonCatalogProperties() { catalogProperties.put(PaimonS3FileSystemConfig.S3_SECRET_KEY, secretKey); catalogProperties.put(PaimonS3FileSystemConfig.S3_ENDPOINT, endpoint); + // Need to download the S3 dependency in the deploy mode. + downloadS3Dependency(); + return catalogProperties; } + private void downloadS3Dependency() { + String gravitinoHome = System.getenv("GRAVITINO_HOME"); + try { + if (!ITUtils.EMBEDDED_TEST_MODE.equals(testMode)) { + String serverPath = ITUtils.joinPath(gravitinoHome, "libs"); + String paimonCatalogPath = + ITUtils.joinPath(gravitinoHome, "catalogs", "lakehouse-paimon", "libs"); + JdbcDriverDownloader.downloadJdbcDriver(PAIMON_S3_JAR_URL, serverPath, paimonCatalogPath); + } + } catch (Exception e) { + throw new RuntimeException("Failed to download the S3 dependency", e); + } + } + @Override protected void startNecessaryContainers() { localStackContainer = From faf5e59563aeefe841ebcc1d210cdc1db17f6099 Mon Sep 17 00:00:00 2001 From: yuqi Date: Wed, 18 Sep 2024 19:53:49 +0800 Subject: [PATCH 09/17] Fix --- .../paimon/integration/test/CatalogPaimonS3IT.java | 4 ++-- .../test/util/IcebergRESTServerManagerForDeploy.java | 4 ++-- .../gravitino/integration/test/util/AbstractIT.java | 11 ++++------- ...dbcDriverDownloader.java => DownloaderUtils.java} | 12 ++++++------ 4 files changed, 14 insertions(+), 17 deletions(-) rename integration-test-common/src/test/java/org/apache/gravitino/integration/test/util/{JdbcDriverDownloader.java => DownloaderUtils.java} (84%) diff --git a/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/integration/test/CatalogPaimonS3IT.java b/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/integration/test/CatalogPaimonS3IT.java index e2f7fc99d5e..2674291bd3b 100644 --- a/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/integration/test/CatalogPaimonS3IT.java +++ b/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/integration/test/CatalogPaimonS3IT.java @@ -26,8 +26,8 @@ import java.util.concurrent.TimeUnit; import org.apache.gravitino.catalog.lakehouse.paimon.PaimonCatalogPropertiesMetadata; import org.apache.gravitino.catalog.lakehouse.paimon.filesystem.s3.PaimonS3FileSystemConfig; +import org.apache.gravitino.integration.test.util.DownloaderUtils; import org.apache.gravitino.integration.test.util.ITUtils; -import org.apache.gravitino.integration.test.util.JdbcDriverDownloader; import org.apache.spark.sql.SparkSession; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.Tag; @@ -85,7 +85,7 @@ private void downloadS3Dependency() { String serverPath = ITUtils.joinPath(gravitinoHome, "libs"); String paimonCatalogPath = ITUtils.joinPath(gravitinoHome, "catalogs", "lakehouse-paimon", "libs"); - JdbcDriverDownloader.downloadJdbcDriver(PAIMON_S3_JAR_URL, serverPath, paimonCatalogPath); + DownloaderUtils.downloadFile(PAIMON_S3_JAR_URL, serverPath, paimonCatalogPath); } } catch (Exception e) { throw new RuntimeException("Failed to download the S3 dependency", e); diff --git a/iceberg/iceberg-rest-server/src/test/java/org/apache/gravitino/iceberg/integration/test/util/IcebergRESTServerManagerForDeploy.java b/iceberg/iceberg-rest-server/src/test/java/org/apache/gravitino/iceberg/integration/test/util/IcebergRESTServerManagerForDeploy.java index 135713223ab..ca4ba0cc41f 100644 --- a/iceberg/iceberg-rest-server/src/test/java/org/apache/gravitino/iceberg/integration/test/util/IcebergRESTServerManagerForDeploy.java +++ b/iceberg/iceberg-rest-server/src/test/java/org/apache/gravitino/iceberg/integration/test/util/IcebergRESTServerManagerForDeploy.java @@ -27,7 +27,7 @@ import java.util.concurrent.Future; import org.apache.commons.io.FileUtils; import org.apache.gravitino.integration.test.util.CommandExecutor; -import org.apache.gravitino.integration.test.util.JdbcDriverDownloader; +import org.apache.gravitino.integration.test.util.DownloaderUtils; import org.apache.gravitino.integration.test.util.ProcessData; import org.apache.gravitino.integration.test.util.ProcessData.TypesOfData; import org.testcontainers.shaded.com.google.common.collect.ImmutableMap; @@ -51,7 +51,7 @@ public Path getConfigDir() { @Override public Optional> doStartIcebergRESTServer() throws Exception { - JdbcDriverDownloader.downloadJdbcDriver( + DownloaderUtils.downloadFile( SQLITE_DRIVER_DOWNLOAD_URL, Paths.get(icebergRESTServerHome.toString(), "iceberg-rest-server", "libs").toString()); diff --git a/integration-test-common/src/test/java/org/apache/gravitino/integration/test/util/AbstractIT.java b/integration-test-common/src/test/java/org/apache/gravitino/integration/test/util/AbstractIT.java index 6644e1f646c..b2a0b390520 100644 --- a/integration-test-common/src/test/java/org/apache/gravitino/integration/test/util/AbstractIT.java +++ b/integration-test-common/src/test/java/org/apache/gravitino/integration/test/util/AbstractIT.java @@ -145,18 +145,15 @@ protected static void downLoadJDBCDriver() throws IOException { String serverPath = ITUtils.joinPath(gravitinoHome, "libs"); String icebergCatalogPath = ITUtils.joinPath(gravitinoHome, "catalogs", "lakehouse-iceberg", "libs"); - JdbcDriverDownloader.downloadJdbcDriver( - DOWNLOAD_MYSQL_JDBC_DRIVER_URL, serverPath, icebergCatalogPath); - JdbcDriverDownloader.downloadJdbcDriver( + DownloaderUtils.downloadFile(DOWNLOAD_MYSQL_JDBC_DRIVER_URL, serverPath, icebergCatalogPath); + DownloaderUtils.downloadFile( DOWNLOAD_POSTGRESQL_JDBC_DRIVER_URL, serverPath, icebergCatalogPath); } else { Path icebergLibsPath = Paths.get(gravitinoHome, "catalogs", "catalog-lakehouse-iceberg", "build", "libs"); - JdbcDriverDownloader.downloadJdbcDriver( - DOWNLOAD_MYSQL_JDBC_DRIVER_URL, icebergLibsPath.toString()); + DownloaderUtils.downloadFile(DOWNLOAD_MYSQL_JDBC_DRIVER_URL, icebergLibsPath.toString()); - JdbcDriverDownloader.downloadJdbcDriver( - DOWNLOAD_POSTGRESQL_JDBC_DRIVER_URL, icebergLibsPath.toString()); + DownloaderUtils.downloadFile(DOWNLOAD_POSTGRESQL_JDBC_DRIVER_URL, icebergLibsPath.toString()); } } diff --git a/integration-test-common/src/test/java/org/apache/gravitino/integration/test/util/JdbcDriverDownloader.java b/integration-test-common/src/test/java/org/apache/gravitino/integration/test/util/DownloaderUtils.java similarity index 84% rename from integration-test-common/src/test/java/org/apache/gravitino/integration/test/util/JdbcDriverDownloader.java rename to integration-test-common/src/test/java/org/apache/gravitino/integration/test/util/DownloaderUtils.java index f534172486e..0d4e12ee0ce 100644 --- a/integration-test-common/src/test/java/org/apache/gravitino/integration/test/util/JdbcDriverDownloader.java +++ b/integration-test-common/src/test/java/org/apache/gravitino/integration/test/util/DownloaderUtils.java @@ -30,20 +30,20 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -public class JdbcDriverDownloader { +public class DownloaderUtils { - public static final Logger LOG = LoggerFactory.getLogger(JdbcDriverDownloader.class); + public static final Logger LOG = LoggerFactory.getLogger(DownloaderUtils.class); - public static void downloadJdbcDriver(String jdbcDriverUrl, String... destinationDirectories) + public static void downloadFile(String fileUrl, String... destinationDirectories) throws IOException { - URL url = new URL(jdbcDriverUrl); + URL url = new URL(fileUrl); URLConnection connection = url.openConnection(); String fileName = getFileName(url); String destinationDirectory = destinationDirectories[0]; Path destinationPath = Paths.get(destinationDirectory, fileName); File file = new File(destinationPath.toString()); if (!file.exists()) { - LOG.info("Start download jdbc-driver from:{}", jdbcDriverUrl); + LOG.info("Start download file from:{}", fileUrl); try (InputStream in = connection.getInputStream()) { if (!Files.exists(Paths.get(destinationDirectory))) { @@ -52,7 +52,7 @@ public static void downloadJdbcDriver(String jdbcDriverUrl, String... destinatio Files.copy(in, destinationPath, java.nio.file.StandardCopyOption.REPLACE_EXISTING); Assertions.assertTrue(new File(destinationPath.toString()).exists()); - LOG.info("Download jdbc-driver:{} success. path:{}", fileName, destinationPath); + LOG.info("Download file:{} success. path:{}", fileName, destinationPath); } } for (int i = 1; i < destinationDirectories.length; i++) { From 56358cb8faef17eaf0c94dfceb75b821692216b8 Mon Sep 17 00:00:00 2001 From: yuqi Date: Mon, 23 Sep 2024 11:02:18 +0800 Subject: [PATCH 10/17] Resolve comments --- catalogs/catalog-common/build.gradle.kts | 4 +++- .../catalog-lakehouse-paimon/build.gradle.kts | 1 + .../PaimonCatalogPropertiesMetadata.java | 19 ++++++++++----- .../paimon/filesystem/FileSystemType.java | 7 ++---- .../lakehouse/paimon/utils/CatalogUtils.java | 23 +++++++++---------- .../integration/test/CatalogPaimonBaseIT.java | 17 +++++++++++++- .../integration/test/CatalogPaimonS3IT.java | 8 +++---- 7 files changed, 50 insertions(+), 29 deletions(-) diff --git a/catalogs/catalog-common/build.gradle.kts b/catalogs/catalog-common/build.gradle.kts index ef3785f7ca9..769761a16c7 100644 --- a/catalogs/catalog-common/build.gradle.kts +++ b/catalogs/catalog-common/build.gradle.kts @@ -23,6 +23,8 @@ plugins { // try to avoid adding extra dependencies because it is used by catalogs and connectors. dependencies { - implementation(libs.slf4j.api) + implementation(project(":core")) + implementation(libs.commons.lang3) implementation(libs.guava) + implementation(libs.slf4j.api) } diff --git a/catalogs/catalog-lakehouse-paimon/build.gradle.kts b/catalogs/catalog-lakehouse-paimon/build.gradle.kts index c0fe7bb663a..182949ceb7d 100644 --- a/catalogs/catalog-lakehouse-paimon/build.gradle.kts +++ b/catalogs/catalog-lakehouse-paimon/build.gradle.kts @@ -33,6 +33,7 @@ dependencies { implementation(project(":api")) implementation(project(":common")) implementation(project(":core")) + implementation(project(":catalogs:catalog-common")) implementation(libs.bundles.paimon) { exclude("com.sun.jersey") exclude("javax.servlet") diff --git a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonCatalogPropertiesMetadata.java b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonCatalogPropertiesMetadata.java index 5d2ba9e753f..08c5830455d 100644 --- a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonCatalogPropertiesMetadata.java +++ b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonCatalogPropertiesMetadata.java @@ -29,12 +29,13 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import org.apache.gravitino.catalog.config.S3StorageConfig; import org.apache.gravitino.catalog.lakehouse.paimon.authentication.AuthenticationConfig; import org.apache.gravitino.catalog.lakehouse.paimon.authentication.kerberos.KerberosConfig; -import org.apache.gravitino.catalog.lakehouse.paimon.filesystem.s3.PaimonS3FileSystemConfig; import org.apache.gravitino.connector.BaseCatalogPropertiesMetadata; import org.apache.gravitino.connector.PropertiesMetadata; import org.apache.gravitino.connector.PropertyEntry; +import org.apache.gravitino.storage.S3Properties; /** * Implementation of {@link PropertiesMetadata} that represents Paimon catalog properties metadata. @@ -46,6 +47,11 @@ public class PaimonCatalogPropertiesMetadata extends BaseCatalogPropertiesMetada public static final String WAREHOUSE = "warehouse"; public static final String URI = "uri"; + // S3 properties needed by Paimon + public static final String S3_ENDPOINT = "s3.endpoint"; + public static final String S3_ACCESS_KEY = "s3.access-key"; + public static final String S3_SECRET_KEY = "s3.secret-key"; + public static final Map GRAVITINO_CONFIG_TO_PAIMON = ImmutableMap.of(GRAVITINO_CATALOG_BACKEND, PAIMON_METASTORE, WAREHOUSE, WAREHOUSE, URI, URI); private static final Map> PROPERTIES_METADATA; @@ -62,11 +68,11 @@ public class PaimonCatalogPropertiesMetadata extends BaseCatalogPropertiesMetada AuthenticationConfig.AUTH_TYPE_KEY, AuthenticationConfig.AUTH_TYPE_KEY); - private static final Map S3_CONFIGURATION = + public static final Map S3_CONFIGURATION = ImmutableMap.of( - PaimonS3FileSystemConfig.S3_ACCESS_KEY, PaimonS3FileSystemConfig.S3_ACCESS_KEY, - PaimonS3FileSystemConfig.S3_SECRET_KEY, PaimonS3FileSystemConfig.S3_SECRET_KEY, - PaimonS3FileSystemConfig.S3_ENDPOINT, PaimonS3FileSystemConfig.S3_ENDPOINT); + S3Properties.GRAVITINO_S3_ACCESS_KEY_ID, S3_ACCESS_KEY, + S3Properties.GRAVITINO_S3_SECRET_ACCESS_KEY, S3_SECRET_KEY, + S3Properties.GRAVITINO_S3_ENDPOINT, S3_ENDPOINT); static { List> propertyEntries = @@ -95,7 +101,8 @@ public class PaimonCatalogPropertiesMetadata extends BaseCatalogPropertiesMetada result.putAll(Maps.uniqueIndex(propertyEntries, PropertyEntry::getName)); result.putAll(KerberosConfig.KERBEROS_PROPERTY_ENTRIES); result.putAll(AuthenticationConfig.AUTHENTICATION_PROPERTY_ENTRIES); - result.putAll(PaimonS3FileSystemConfig.S3_FILESYSTEM_PROPERTY_ENTRIES); + result.putAll(S3StorageConfig.S3_FILESYSTEM_PROPERTY_ENTRIES); + PROPERTIES_METADATA = ImmutableMap.copyOf(result); } diff --git a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/filesystem/FileSystemType.java b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/filesystem/FileSystemType.java index 2f53fe729a5..912bde5eb52 100644 --- a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/filesystem/FileSystemType.java +++ b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/filesystem/FileSystemType.java @@ -21,8 +21,7 @@ public enum FileSystemType { LOCAL_FILE, HDFS, - S3, - OSS; + S3; public static FileSystemType fromString(String type) { for (FileSystemType fileSystemType : FileSystemType.values()) { @@ -37,11 +36,9 @@ public static FileSystemType fromString(String type) { public static FileSystemType fromStoragePath(String storagePath) { if (storagePath.startsWith("s3://")) { return S3; - } else if (storagePath.startsWith("oss://")) { - return OSS; } else if (storagePath.startsWith("hdfs://")) { return HDFS; - } else if (storagePath.startsWith("/")) { + } else if (storagePath.startsWith("/") || storagePath.startsWith("file://")) { return LOCAL_FILE; } diff --git a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/utils/CatalogUtils.java b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/utils/CatalogUtils.java index a7ddf6803ad..15f9a902f6e 100644 --- a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/utils/CatalogUtils.java +++ b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/utils/CatalogUtils.java @@ -34,7 +34,6 @@ import org.apache.gravitino.catalog.lakehouse.paimon.authentication.AuthenticationConfig; import org.apache.gravitino.catalog.lakehouse.paimon.authentication.kerberos.KerberosClient; import org.apache.gravitino.catalog.lakehouse.paimon.filesystem.FileSystemType; -import org.apache.gravitino.catalog.lakehouse.paimon.filesystem.s3.PaimonS3FileSystemConfig; import org.apache.gravitino.catalog.lakehouse.paimon.ops.PaimonBackendCatalogWrapper; import org.apache.hadoop.conf.Configuration; import org.apache.paimon.catalog.Catalog; @@ -135,7 +134,6 @@ public static void checkWarehouseConfig( checkS3FileSystemConfig(resultConf); break; case HDFS: - case OSS: case LOCAL_FILE: break; default: @@ -143,16 +141,17 @@ public static void checkWarehouseConfig( } } + @SuppressWarnings("unused") private static void checkS3FileSystemConfig(Map resultConf) { - PaimonS3FileSystemConfig s3FileSystemConfig = new PaimonS3FileSystemConfig(resultConf); - Preconditions.checkArgument( - StringUtils.isNotBlank(s3FileSystemConfig.getS3AccessKey()), - "S3 access key can not be null or empty."); - Preconditions.checkArgument( - StringUtils.isNotBlank(s3FileSystemConfig.getS3SecretKey()), - "S3 secret key can not be null or empty."); - Preconditions.checkArgument( - StringUtils.isNotBlank(s3FileSystemConfig.getS3Endpoint()), - "S3 endpoint can not be null or empty."); + // S3StorageConfig s3FileSystemConfig = new S3StorageConfig(resultConf); + // Preconditions.checkArgument( + // StringUtils.isNotBlank(s3FileSystemConfig.getS3AccessKey()), + // "S3 access key can not be null or empty."); + // Preconditions.checkArgument( + // StringUtils.isNotBlank(s3FileSystemConfig.getS3SecretKey()), + // "S3 secret key can not be null or empty."); + // Preconditions.checkArgument( + // StringUtils.isNotBlank(s3FileSystemConfig.getS3Endpoint()), + // "S3 endpoint can not be null or empty."); } } diff --git a/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/integration/test/CatalogPaimonBaseIT.java b/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/integration/test/CatalogPaimonBaseIT.java index cb0a2223d8d..ee26398f7ed 100644 --- a/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/integration/test/CatalogPaimonBaseIT.java +++ b/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/integration/test/CatalogPaimonBaseIT.java @@ -67,6 +67,7 @@ import org.apache.gravitino.rel.expressions.transforms.Transforms; import org.apache.gravitino.rel.indexes.Index; import org.apache.gravitino.rel.types.Types; +import org.apache.gravitino.storage.S3Properties; import org.apache.paimon.catalog.Catalog.DatabaseNotExistException; import org.apache.paimon.catalog.Identifier; import org.apache.paimon.schema.TableSchema; @@ -886,8 +887,22 @@ private void createCatalog() { Preconditions.checkArgument( StringUtils.isNotBlank(type), "Paimon Catalog backend type can not be null or empty."); catalogProperties.put(PaimonCatalogPropertiesMetadata.PAIMON_METASTORE, type); + + // Why needs this conversion? Because PaimonCatalogOperations#initialize will try to convert + // Gravitino general S3 properties to Paimon specific S3 properties. + Map copy = Maps.newHashMap(catalogProperties); + copy.put( + PaimonCatalogPropertiesMetadata.S3_ENDPOINT, + catalogProperties.get(S3Properties.GRAVITINO_S3_ENDPOINT)); + copy.put( + PaimonCatalogPropertiesMetadata.S3_ACCESS_KEY, + catalogProperties.get(S3Properties.GRAVITINO_S3_ACCESS_KEY_ID)); + copy.put( + PaimonCatalogPropertiesMetadata.S3_SECRET_KEY, + catalogProperties.get(S3Properties.GRAVITINO_S3_SECRET_ACCESS_KEY)); + PaimonBackendCatalogWrapper paimonBackendCatalogWrapper = - CatalogUtils.loadCatalogBackend(new PaimonConfig(catalogProperties)); + CatalogUtils.loadCatalogBackend(new PaimonConfig(copy)); paimonCatalog = paimonBackendCatalogWrapper.getCatalog(); } diff --git a/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/integration/test/CatalogPaimonS3IT.java b/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/integration/test/CatalogPaimonS3IT.java index e2f7fc99d5e..46147f87c90 100644 --- a/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/integration/test/CatalogPaimonS3IT.java +++ b/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/integration/test/CatalogPaimonS3IT.java @@ -25,9 +25,9 @@ import java.util.Map; import java.util.concurrent.TimeUnit; import org.apache.gravitino.catalog.lakehouse.paimon.PaimonCatalogPropertiesMetadata; -import org.apache.gravitino.catalog.lakehouse.paimon.filesystem.s3.PaimonS3FileSystemConfig; import org.apache.gravitino.integration.test.util.ITUtils; import org.apache.gravitino.integration.test.util.JdbcDriverDownloader; +import org.apache.gravitino.storage.S3Properties; import org.apache.spark.sql.SparkSession; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.Tag; @@ -68,9 +68,9 @@ protected Map initPaimonCatalogProperties() { catalogProperties.put(PaimonCatalogPropertiesMetadata.GRAVITINO_CATALOG_BACKEND, TYPE); catalogProperties.put(PaimonCatalogPropertiesMetadata.WAREHOUSE, WAREHOUSE); - catalogProperties.put(PaimonS3FileSystemConfig.S3_ACCESS_KEY, accessKey); - catalogProperties.put(PaimonS3FileSystemConfig.S3_SECRET_KEY, secretKey); - catalogProperties.put(PaimonS3FileSystemConfig.S3_ENDPOINT, endpoint); + catalogProperties.put(S3Properties.GRAVITINO_S3_ACCESS_KEY_ID, accessKey); + catalogProperties.put(S3Properties.GRAVITINO_S3_SECRET_ACCESS_KEY, secretKey); + catalogProperties.put(S3Properties.GRAVITINO_S3_ENDPOINT, endpoint); // Need to download the S3 dependency in the deploy mode. downloadS3Dependency(); From 4229cdb9690564bd7ea274713bace50554de140a Mon Sep 17 00:00:00 2001 From: yuqi Date: Mon, 23 Sep 2024 11:02:59 +0800 Subject: [PATCH 11/17] forget to push file S3StorageConfig --- .../catalog/config/S3StorageConfig.java} | 33 ++++++++++--------- 1 file changed, 17 insertions(+), 16 deletions(-) rename catalogs/{catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/filesystem/s3/PaimonS3FileSystemConfig.java => catalog-common/src/main/java/org/apache/gravitino/catalog/config/S3StorageConfig.java} (79%) diff --git a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/filesystem/s3/PaimonS3FileSystemConfig.java b/catalogs/catalog-common/src/main/java/org/apache/gravitino/catalog/config/S3StorageConfig.java similarity index 79% rename from catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/filesystem/s3/PaimonS3FileSystemConfig.java rename to catalogs/catalog-common/src/main/java/org/apache/gravitino/catalog/config/S3StorageConfig.java index 1f6c61f69f1..ebdcb1539b7 100644 --- a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/filesystem/s3/PaimonS3FileSystemConfig.java +++ b/catalogs/catalog-common/src/main/java/org/apache/gravitino/catalog/config/S3StorageConfig.java @@ -16,7 +16,11 @@ * specific language governing permissions and limitations * under the License. */ -package org.apache.gravitino.catalog.lakehouse.paimon.filesystem.s3; +package org.apache.gravitino.catalog.config; + +import static org.apache.gravitino.storage.S3Properties.GRAVITINO_S3_ACCESS_KEY_ID; +import static org.apache.gravitino.storage.S3Properties.GRAVITINO_S3_ENDPOINT; +import static org.apache.gravitino.storage.S3Properties.GRAVITINO_S3_SECRET_ACCESS_KEY; import com.google.common.collect.ImmutableMap; import java.util.Map; @@ -27,19 +31,16 @@ import org.apache.gravitino.config.ConfigEntry; import org.apache.gravitino.connector.PropertyEntry; -public class PaimonS3FileSystemConfig extends Config { - // S3 related properties - public static final String S3_ENDPOINT = "s3.endpoint"; - public static final String S3_ACCESS_KEY = "s3.access-key"; - public static final String S3_SECRET_KEY = "s3.secret-key"; +public class S3StorageConfig extends Config { - public PaimonS3FileSystemConfig(Map properties) { + public S3StorageConfig(Map properties) { super(false); loadFromMap(properties, k -> true); } + // Unified S3 public static final ConfigEntry PAIMON_S3_ENDPOINT_ENTRY = - new ConfigBuilder(S3_ENDPOINT) + new ConfigBuilder(GRAVITINO_S3_ENDPOINT) .doc("The endpoint of the AWS s3") .version(ConfigConstants.VERSION_0_7_0) .stringConf() @@ -47,7 +48,7 @@ public PaimonS3FileSystemConfig(Map properties) { .create(); public static final ConfigEntry PAIMON_S3_ACCESS_KEY_ENTRY = - new ConfigBuilder(S3_ACCESS_KEY) + new ConfigBuilder(GRAVITINO_S3_ACCESS_KEY_ID) .doc("The access key of the AWS s3") .version(ConfigConstants.VERSION_0_7_0) .stringConf() @@ -55,7 +56,7 @@ public PaimonS3FileSystemConfig(Map properties) { .create(); public static final ConfigEntry PAIMON_S3_SECRET_KEY_ENTRY = - new ConfigBuilder(S3_SECRET_KEY) + new ConfigBuilder(GRAVITINO_S3_SECRET_ACCESS_KEY) .doc("The secret key of the AWS s3") .version(ConfigConstants.VERSION_0_7_0) .stringConf() @@ -77,25 +78,25 @@ public String getS3SecretKey() { public static final Map> S3_FILESYSTEM_PROPERTY_ENTRIES = new ImmutableMap.Builder>() .put( - S3_ENDPOINT, + GRAVITINO_S3_ENDPOINT, PropertyEntry.stringOptionalPropertyEntry( - S3_ENDPOINT, + GRAVITINO_S3_ENDPOINT, "The endpoint of the AWS s3", false /* immutable */, null /* defaultValue */, false /* hidden */)) .put( - S3_ACCESS_KEY, + GRAVITINO_S3_ACCESS_KEY_ID, PropertyEntry.stringOptionalPropertyEntry( - S3_ACCESS_KEY, + GRAVITINO_S3_ACCESS_KEY_ID, "The access key of the AWS s3", false /* immutable */, null /* defaultValue */, false /* hidden */)) .put( - S3_SECRET_KEY, + GRAVITINO_S3_SECRET_ACCESS_KEY, PropertyEntry.stringOptionalPropertyEntry( - S3_SECRET_KEY, + GRAVITINO_S3_SECRET_ACCESS_KEY, "The secret key of the AWS s3", false /* immutable */, null /* defaultValue */, From 3c216863df6ee197a303148d2c1c32d459b6c74b Mon Sep 17 00:00:00 2001 From: yuqi Date: Tue, 24 Sep 2024 21:42:03 +0800 Subject: [PATCH 12/17] Fix --- .../integration/test/CatalogPaimonS3IT.java | 29 +++++-------------- 1 file changed, 8 insertions(+), 21 deletions(-) diff --git a/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/integration/test/CatalogPaimonS3IT.java b/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/integration/test/CatalogPaimonS3IT.java index 4b3c9bf90e0..2030bbd19df 100644 --- a/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/integration/test/CatalogPaimonS3IT.java +++ b/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/integration/test/CatalogPaimonS3IT.java @@ -19,32 +19,26 @@ package org.apache.gravitino.catalog.lakehouse.paimon.integration.test; -import static org.testcontainers.containers.localstack.LocalStackContainer.Service.S3; - import com.google.common.collect.Maps; import java.util.Map; import java.util.concurrent.TimeUnit; import org.apache.gravitino.catalog.lakehouse.paimon.PaimonCatalogPropertiesMetadata; +import org.apache.gravitino.integration.test.container.GravitinoLocalStackContainer; import org.apache.gravitino.integration.test.util.DownloaderUtils; import org.apache.gravitino.integration.test.util.ITUtils; import org.apache.gravitino.storage.S3Properties; import org.apache.spark.sql.SparkSession; -import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.Tag; import org.junit.jupiter.api.TestInstance; import org.testcontainers.containers.Container; -import org.testcontainers.containers.localstack.LocalStackContainer; import org.testcontainers.shaded.org.awaitility.Awaitility; -import org.testcontainers.utility.DockerImageName; @Tag("gravitino-docker-test") @TestInstance(TestInstance.Lifecycle.PER_CLASS) public class CatalogPaimonS3IT extends CatalogPaimonBaseIT { - // private static final String S3_BUCKET_NAME = - // GravitinoITUtils.genRandomName("paimon-s3-bucket-"); private static final String S3_BUCKET_NAME = "my-test-bucket"; - private static LocalStackContainer localStackContainer; + private static GravitinoLocalStackContainer localStackContainer; private String accessKey; private String secretKey; private String endpoint; @@ -62,9 +56,9 @@ protected Map initPaimonCatalogProperties() { TYPE = "filesystem"; WAREHOUSE = "s3://" + S3_BUCKET_NAME + "/"; - accessKey = localStackContainer.getAccessKey(); - secretKey = localStackContainer.getSecretKey(); - endpoint = localStackContainer.getEndpointOverride(S3).toString(); + accessKey = "accessKey"; + secretKey = "secretKey"; + endpoint = String.format("http://%s:%d", localStackContainer.getContainerIpAddress(), 4566); catalogProperties.put(PaimonCatalogPropertiesMetadata.GRAVITINO_CATALOG_BACKEND, TYPE); catalogProperties.put(PaimonCatalogPropertiesMetadata.WAREHOUSE, WAREHOUSE); @@ -94,9 +88,8 @@ private void downloadS3Dependency() { @Override protected void startNecessaryContainers() { - localStackContainer = - new LocalStackContainer(DockerImageName.parse("localstack/localstack")).withServices(S3); - localStackContainer.start(); + containerSuite.startLocalStackContainer(); + localStackContainer = containerSuite.getLocalStackContainer(); Awaitility.await() .atMost(60, TimeUnit.SECONDS) @@ -105,7 +98,7 @@ protected void startNecessaryContainers() { () -> { try { Container.ExecResult result = - localStackContainer.execInContainer( + localStackContainer.executeInContainer( "awslocal", "s3", "mb", "s3://" + S3_BUCKET_NAME); return result.getExitCode() == 0; } catch (Exception e) { @@ -114,12 +107,6 @@ protected void startNecessaryContainers() { }); } - @AfterAll - public void stop() { - super.stop(); - localStackContainer.stop(); - } - protected void initSparkEnv() { spark = SparkSession.builder() From 49f9ac36a1703d129323026b648a927e9bfd61ea Mon Sep 17 00:00:00 2001 From: yuqi Date: Tue, 24 Sep 2024 23:01:24 +0800 Subject: [PATCH 13/17] Fix --- .../integration/test/CatalogPaimonBaseIT.java | 26 ++++++++++++------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/integration/test/CatalogPaimonBaseIT.java b/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/integration/test/CatalogPaimonBaseIT.java index ee26398f7ed..df10908e7ce 100644 --- a/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/integration/test/CatalogPaimonBaseIT.java +++ b/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/integration/test/CatalogPaimonBaseIT.java @@ -891,15 +891,23 @@ private void createCatalog() { // Why needs this conversion? Because PaimonCatalogOperations#initialize will try to convert // Gravitino general S3 properties to Paimon specific S3 properties. Map copy = Maps.newHashMap(catalogProperties); - copy.put( - PaimonCatalogPropertiesMetadata.S3_ENDPOINT, - catalogProperties.get(S3Properties.GRAVITINO_S3_ENDPOINT)); - copy.put( - PaimonCatalogPropertiesMetadata.S3_ACCESS_KEY, - catalogProperties.get(S3Properties.GRAVITINO_S3_ACCESS_KEY_ID)); - copy.put( - PaimonCatalogPropertiesMetadata.S3_SECRET_KEY, - catalogProperties.get(S3Properties.GRAVITINO_S3_SECRET_ACCESS_KEY)); + if (catalogProperties.containsKey(S3Properties.GRAVITINO_S3_ENDPOINT)) { + copy.put( + PaimonCatalogPropertiesMetadata.S3_ENDPOINT, + catalogProperties.get(S3Properties.GRAVITINO_S3_ENDPOINT)); + } + + if (catalogProperties.containsKey(S3Properties.GRAVITINO_S3_ACCESS_KEY_ID)) { + copy.put( + PaimonCatalogPropertiesMetadata.S3_ACCESS_KEY, + catalogProperties.get(S3Properties.GRAVITINO_S3_ACCESS_KEY_ID)); + } + + if (catalogProperties.containsKey(S3Properties.GRAVITINO_S3_SECRET_ACCESS_KEY)) { + copy.put( + PaimonCatalogPropertiesMetadata.S3_SECRET_KEY, + catalogProperties.get(S3Properties.GRAVITINO_S3_SECRET_ACCESS_KEY)); + } PaimonBackendCatalogWrapper paimonBackendCatalogWrapper = CatalogUtils.loadCatalogBackend(new PaimonConfig(copy)); From 6135e7ec69b49f42583e67571d5df4eed5589e44 Mon Sep 17 00:00:00 2001 From: yuqi Date: Tue, 24 Sep 2024 23:36:26 +0800 Subject: [PATCH 14/17] Fix --- catalogs/catalog-common/build.gradle.kts | 1 - .../lakehouse/paimon/PaimonCatalogPropertiesMetadata.java | 2 +- .../catalog/lakehouse/paimon/filesystem}/S3StorageConfig.java | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) rename catalogs/{catalog-common/src/main/java/org/apache/gravitino/catalog/config => catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/filesystem}/S3StorageConfig.java (98%) diff --git a/catalogs/catalog-common/build.gradle.kts b/catalogs/catalog-common/build.gradle.kts index 2a40a89307e..4cae8c530c8 100644 --- a/catalogs/catalog-common/build.gradle.kts +++ b/catalogs/catalog-common/build.gradle.kts @@ -23,7 +23,6 @@ plugins { // try to avoid adding extra dependencies because it is used by catalogs and connectors. dependencies { - implementation(project(":core")) implementation(libs.commons.lang3) implementation(libs.guava) implementation(libs.slf4j.api) diff --git a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonCatalogPropertiesMetadata.java b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonCatalogPropertiesMetadata.java index 08c5830455d..e6298608468 100644 --- a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonCatalogPropertiesMetadata.java +++ b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonCatalogPropertiesMetadata.java @@ -29,9 +29,9 @@ import java.util.HashMap; import java.util.List; import java.util.Map; -import org.apache.gravitino.catalog.config.S3StorageConfig; import org.apache.gravitino.catalog.lakehouse.paimon.authentication.AuthenticationConfig; import org.apache.gravitino.catalog.lakehouse.paimon.authentication.kerberos.KerberosConfig; +import org.apache.gravitino.catalog.lakehouse.paimon.filesystem.S3StorageConfig; import org.apache.gravitino.connector.BaseCatalogPropertiesMetadata; import org.apache.gravitino.connector.PropertiesMetadata; import org.apache.gravitino.connector.PropertyEntry; diff --git a/catalogs/catalog-common/src/main/java/org/apache/gravitino/catalog/config/S3StorageConfig.java b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/filesystem/S3StorageConfig.java similarity index 98% rename from catalogs/catalog-common/src/main/java/org/apache/gravitino/catalog/config/S3StorageConfig.java rename to catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/filesystem/S3StorageConfig.java index ebdcb1539b7..c25786169bc 100644 --- a/catalogs/catalog-common/src/main/java/org/apache/gravitino/catalog/config/S3StorageConfig.java +++ b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/filesystem/S3StorageConfig.java @@ -16,7 +16,7 @@ * specific language governing permissions and limitations * under the License. */ -package org.apache.gravitino.catalog.config; +package org.apache.gravitino.catalog.lakehouse.paimon.filesystem; import static org.apache.gravitino.storage.S3Properties.GRAVITINO_S3_ACCESS_KEY_ID; import static org.apache.gravitino.storage.S3Properties.GRAVITINO_S3_ENDPOINT; From 15e6ca98b4fa8419c676914e168e74d47a7a35c1 Mon Sep 17 00:00:00 2001 From: yuqi Date: Sun, 29 Sep 2024 16:04:54 +0800 Subject: [PATCH 15/17] fix --- catalogs/catalog-lakehouse-paimon/build.gradle.kts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/catalogs/catalog-lakehouse-paimon/build.gradle.kts b/catalogs/catalog-lakehouse-paimon/build.gradle.kts index 9dbe18ce5f6..c81959ee7e8 100644 --- a/catalogs/catalog-lakehouse-paimon/build.gradle.kts +++ b/catalogs/catalog-lakehouse-paimon/build.gradle.kts @@ -30,10 +30,12 @@ val sparkMajorVersion: String = sparkVersion.substringBeforeLast(".") val paimonVersion: String = libs.versions.paimon.get() dependencies { - implementation(project(":catalogs:catalog-common")) implementation(project(":api")) { exclude("*") } + implementation(project(":catalogs:catalog-common")) { + exclude("*") + } implementation(project(":common")) { exclude("*") } From df823d9e331eb2caebdcaf858404b059235fe055 Mon Sep 17 00:00:00 2001 From: yuqi Date: Wed, 9 Oct 2024 14:34:48 +0800 Subject: [PATCH 16/17] Fix --- .../paimon/PaimonCatalogOperations.java | 5 +-- .../lakehouse/paimon/utils/CatalogUtils.java | 40 ++++--------------- .../integration/test/CatalogPaimonBaseIT.java | 20 +--------- docs/lakehouse-paimon-catalog.md | 26 ++++++------ 4 files changed, 23 insertions(+), 68 deletions(-) diff --git a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonCatalogOperations.java b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonCatalogOperations.java index 3188cd62cbe..fbe6f4be49d 100644 --- a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonCatalogOperations.java +++ b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonCatalogOperations.java @@ -39,7 +39,6 @@ import org.apache.gravitino.Namespace; import org.apache.gravitino.SchemaChange; import org.apache.gravitino.catalog.lakehouse.paimon.ops.PaimonCatalogOps; -import org.apache.gravitino.catalog.lakehouse.paimon.utils.CatalogUtils; import org.apache.gravitino.catalog.lakehouse.paimon.utils.TableOpsUtils; import org.apache.gravitino.connector.CatalogInfo; import org.apache.gravitino.connector.CatalogOperations; @@ -115,9 +114,7 @@ public void initialize( Map resultConf = Maps.newHashMap(prefixMap); resultConf.putAll(gravitinoConfig); - PaimonConfig paimonConfig = new PaimonConfig(resultConf); - CatalogUtils.checkWarehouseConfig(paimonConfig, resultConf); - this.paimonCatalogOps = new PaimonCatalogOps(paimonConfig); + this.paimonCatalogOps = new PaimonCatalogOps(new PaimonConfig(resultConf)); } /** diff --git a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/utils/CatalogUtils.java b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/utils/CatalogUtils.java index 15f9a902f6e..ffce417a045 100644 --- a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/utils/CatalogUtils.java +++ b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/utils/CatalogUtils.java @@ -18,6 +18,7 @@ */ package org.apache.gravitino.catalog.lakehouse.paimon.utils; +import static org.apache.gravitino.catalog.lakehouse.paimon.PaimonCatalogPropertiesMetadata.S3_CONFIGURATION; import static org.apache.gravitino.catalog.lakehouse.paimon.PaimonConfig.CATALOG_BACKEND; import static org.apache.gravitino.catalog.lakehouse.paimon.PaimonConfig.CATALOG_URI; import static org.apache.gravitino.catalog.lakehouse.paimon.PaimonConfig.CATALOG_WAREHOUSE; @@ -26,6 +27,7 @@ import com.google.common.base.Preconditions; import java.io.File; +import java.util.HashMap; import java.util.Map; import java.util.UUID; import org.apache.commons.lang3.StringUtils; @@ -33,7 +35,6 @@ import org.apache.gravitino.catalog.lakehouse.paimon.PaimonConfig; import org.apache.gravitino.catalog.lakehouse.paimon.authentication.AuthenticationConfig; import org.apache.gravitino.catalog.lakehouse.paimon.authentication.kerberos.KerberosClient; -import org.apache.gravitino.catalog.lakehouse.paimon.filesystem.FileSystemType; import org.apache.gravitino.catalog.lakehouse.paimon.ops.PaimonBackendCatalogWrapper; import org.apache.hadoop.conf.Configuration; import org.apache.paimon.catalog.Catalog; @@ -122,36 +123,11 @@ private static void checkPaimonConfig(PaimonConfig paimonConfig) { } } - public static void checkWarehouseConfig( - PaimonConfig paimonConfig, Map resultConf) { - String warehouse = paimonConfig.get(CATALOG_WAREHOUSE); - Preconditions.checkArgument( - StringUtils.isNotBlank(warehouse), "Paimon Catalog warehouse can not be null or empty."); - - FileSystemType fileSystemType = FileSystemType.fromStoragePath(warehouse); - switch (fileSystemType) { - case S3: - checkS3FileSystemConfig(resultConf); - break; - case HDFS: - case LOCAL_FILE: - break; - default: - throw new IllegalArgumentException("Unsupported file system type: " + fileSystemType); - } - } - - @SuppressWarnings("unused") - private static void checkS3FileSystemConfig(Map resultConf) { - // S3StorageConfig s3FileSystemConfig = new S3StorageConfig(resultConf); - // Preconditions.checkArgument( - // StringUtils.isNotBlank(s3FileSystemConfig.getS3AccessKey()), - // "S3 access key can not be null or empty."); - // Preconditions.checkArgument( - // StringUtils.isNotBlank(s3FileSystemConfig.getS3SecretKey()), - // "S3 secret key can not be null or empty."); - // Preconditions.checkArgument( - // StringUtils.isNotBlank(s3FileSystemConfig.getS3Endpoint()), - // "S3 endpoint can not be null or empty."); + public static Map toPaimonCatalogProperties( + Map gravitinoProperties) { + Map paimonProperties = new HashMap<>(); + gravitinoProperties.forEach( + (key, value) -> paimonProperties.put(S3_CONFIGURATION.getOrDefault(key, key), value)); + return paimonProperties; } } diff --git a/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/integration/test/CatalogPaimonBaseIT.java b/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/integration/test/CatalogPaimonBaseIT.java index df10908e7ce..bd907d06d92 100644 --- a/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/integration/test/CatalogPaimonBaseIT.java +++ b/catalogs/catalog-lakehouse-paimon/src/test/java/org/apache/gravitino/catalog/lakehouse/paimon/integration/test/CatalogPaimonBaseIT.java @@ -67,7 +67,6 @@ import org.apache.gravitino.rel.expressions.transforms.Transforms; import org.apache.gravitino.rel.indexes.Index; import org.apache.gravitino.rel.types.Types; -import org.apache.gravitino.storage.S3Properties; import org.apache.paimon.catalog.Catalog.DatabaseNotExistException; import org.apache.paimon.catalog.Identifier; import org.apache.paimon.schema.TableSchema; @@ -890,24 +889,7 @@ private void createCatalog() { // Why needs this conversion? Because PaimonCatalogOperations#initialize will try to convert // Gravitino general S3 properties to Paimon specific S3 properties. - Map copy = Maps.newHashMap(catalogProperties); - if (catalogProperties.containsKey(S3Properties.GRAVITINO_S3_ENDPOINT)) { - copy.put( - PaimonCatalogPropertiesMetadata.S3_ENDPOINT, - catalogProperties.get(S3Properties.GRAVITINO_S3_ENDPOINT)); - } - - if (catalogProperties.containsKey(S3Properties.GRAVITINO_S3_ACCESS_KEY_ID)) { - copy.put( - PaimonCatalogPropertiesMetadata.S3_ACCESS_KEY, - catalogProperties.get(S3Properties.GRAVITINO_S3_ACCESS_KEY_ID)); - } - - if (catalogProperties.containsKey(S3Properties.GRAVITINO_S3_SECRET_ACCESS_KEY)) { - copy.put( - PaimonCatalogPropertiesMetadata.S3_SECRET_KEY, - catalogProperties.get(S3Properties.GRAVITINO_S3_SECRET_ACCESS_KEY)); - } + Map copy = CatalogUtils.toPaimonCatalogProperties(catalogProperties); PaimonBackendCatalogWrapper paimonBackendCatalogWrapper = CatalogUtils.loadCatalogBackend(new PaimonConfig(copy)); diff --git a/docs/lakehouse-paimon-catalog.md b/docs/lakehouse-paimon-catalog.md index 0b117522f38..03a36408ee3 100644 --- a/docs/lakehouse-paimon-catalog.md +++ b/docs/lakehouse-paimon-catalog.md @@ -30,19 +30,19 @@ Builds with Apache Paimon `0.8.0`. ### Catalog properties -| Property name | Description | Default value | Required | Since Version | -|-----------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------|-----------------------------------------------------------------|---------------| -| `catalog-backend` | Catalog backend of Gravitino Paimon catalog. Only supports `filesystem` now. | (none) | Yes | 0.6.0 | -| `uri` | The URI configuration of the Paimon catalog. `thrift://127.0.0.1:9083` or `jdbc:postgresql://127.0.0.1:5432/db_name` or `jdbc:mysql://127.0.0.1:3306/metastore_db`. It is optional for `FilesystemCatalog`. | (none) | required if the value of `catalog-backend` is not `filesystem`. | 0.6.0 | -| `warehouse` | Warehouse directory of catalog. `file:///user/hive/warehouse-paimon/` for local fs, `hdfs://namespace/hdfs/path` for HDFS or `s3://{bucket-name}/path/` for S3 | (none) | Yes | 0.6.0 | -| `authentication.type` | The type of authentication for Paimon catalog backend, currently Gravitino only supports `Kerberos` and `simple`. | `simple` | No | 0.6.0 | -| `authentication.kerberos.principal` | The principal of the Kerberos authentication. | (none) | required if the value of `authentication.type` is Kerberos. | 0.6.0 | -| `authentication.kerberos.keytab-uri` | The URI of The keytab for the Kerberos authentication. | (none) | required if the value of `authentication.type` is Kerberos. | 0.6.0 | -| `authentication.kerberos.check-interval-sec` | The check interval of Kerberos credential for Paimon catalog. | 60 | No | 0.6.0 | -| `authentication.kerberos.keytab-fetch-timeout-sec` | The fetch timeout of retrieving Kerberos keytab from `authentication.kerberos.keytab-uri`. | 60 | No | 0.6.0 | -| `s3.endpoint` | The endpoint of the AWS s3. | (none) | required if the value of `warehouse` is a S3 path | 0.7.0 | -| `s3.access-key` | The access key of the AWS s3. | (none) | required if the value of `warehouse` is a S3 path | 0.7.0 | -| `s3.secret-key` | The secret key of the AWS s3. | (none) | required if the value of `warehouse` is a S3 path | 0.7.0 | +| Property name | Description | Default value | Required | Since Version | +|----------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------|-----------------------------------------------------------------|---------------| +| `catalog-backend` | Catalog backend of Gravitino Paimon catalog. Only supports `filesystem` now. | (none) | Yes | 0.6.0 | +| `uri` | The URI configuration of the Paimon catalog. `thrift://127.0.0.1:9083` or `jdbc:postgresql://127.0.0.1:5432/db_name` or `jdbc:mysql://127.0.0.1:3306/metastore_db`. It is optional for `FilesystemCatalog`. | (none) | required if the value of `catalog-backend` is not `filesystem`. | 0.6.0 | +| `warehouse` | Warehouse directory of catalog. `file:///user/hive/warehouse-paimon/` for local fs, `hdfs://namespace/hdfs/path` for HDFS or `s3://{bucket-name}/path/` for S3 | (none) | Yes | 0.6.0 | +| `authentication.type` | The type of authentication for Paimon catalog backend, currently Gravitino only supports `Kerberos` and `simple`. | `simple` | No | 0.6.0 | +| `authentication.kerberos.principal` | The principal of the Kerberos authentication. | (none) | required if the value of `authentication.type` is Kerberos. | 0.6.0 | +| `authentication.kerberos.keytab-uri` | The URI of The keytab for the Kerberos authentication. | (none) | required if the value of `authentication.type` is Kerberos. | 0.6.0 | +| `authentication.kerberos.check-interval-sec` | The check interval of Kerberos credential for Paimon catalog. | 60 | No | 0.6.0 | +| `authentication.kerberos.keytab-fetch-timeout-sec` | The fetch timeout of retrieving Kerberos keytab from `authentication.kerberos.keytab-uri`. | 60 | No | 0.6.0 | +| `s3-endpoint` | The endpoint of the AWS s3. | (none) | required if the value of `warehouse` is a S3 path | 0.7.0 | +| `s3-access-key-id` | The access key of the AWS s3. | (none) | required if the value of `warehouse` is a S3 path | 0.7.0 | +| `s3-secret-access-key` | The secret key of the AWS s3. | (none) | required if the value of `warehouse` is a S3 path | 0.7.0 | Any properties not defined by Gravitino with `gravitino.bypass.` prefix will pass to Paimon catalog properties and HDFS configuration. For example, if specify `gravitino.bypass.table.type`, `table.type` will pass to Paimon catalog properties. From c705667ae567288f63a5ebc39f75fd8ebdc1bbaa Mon Sep 17 00:00:00 2001 From: yuqi Date: Thu, 10 Oct 2024 09:41:57 +0800 Subject: [PATCH 17/17] Removed unused class. --- .../PaimonCatalogPropertiesMetadata.java | 2 +- .../paimon/filesystem/FileSystemType.java | 47 ------------------- .../S3StorageConfig.java | 2 +- 3 files changed, 2 insertions(+), 49 deletions(-) delete mode 100644 catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/filesystem/FileSystemType.java rename catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/{filesystem => storage}/S3StorageConfig.java (98%) diff --git a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonCatalogPropertiesMetadata.java b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonCatalogPropertiesMetadata.java index e6298608468..589d79672d3 100644 --- a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonCatalogPropertiesMetadata.java +++ b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonCatalogPropertiesMetadata.java @@ -31,7 +31,7 @@ import java.util.Map; import org.apache.gravitino.catalog.lakehouse.paimon.authentication.AuthenticationConfig; import org.apache.gravitino.catalog.lakehouse.paimon.authentication.kerberos.KerberosConfig; -import org.apache.gravitino.catalog.lakehouse.paimon.filesystem.S3StorageConfig; +import org.apache.gravitino.catalog.lakehouse.paimon.storage.S3StorageConfig; import org.apache.gravitino.connector.BaseCatalogPropertiesMetadata; import org.apache.gravitino.connector.PropertiesMetadata; import org.apache.gravitino.connector.PropertyEntry; diff --git a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/filesystem/FileSystemType.java b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/filesystem/FileSystemType.java deleted file mode 100644 index 912bde5eb52..00000000000 --- a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/filesystem/FileSystemType.java +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.gravitino.catalog.lakehouse.paimon.filesystem; - -public enum FileSystemType { - LOCAL_FILE, - HDFS, - S3; - - public static FileSystemType fromString(String type) { - for (FileSystemType fileSystemType : FileSystemType.values()) { - if (fileSystemType.name().equalsIgnoreCase(type)) { - return fileSystemType; - } - } - - throw new IllegalArgumentException("Unsupported file system type: " + type); - } - - public static FileSystemType fromStoragePath(String storagePath) { - if (storagePath.startsWith("s3://")) { - return S3; - } else if (storagePath.startsWith("hdfs://")) { - return HDFS; - } else if (storagePath.startsWith("/") || storagePath.startsWith("file://")) { - return LOCAL_FILE; - } - - throw new IllegalArgumentException("Unsupported storage path: " + storagePath); - } -} diff --git a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/filesystem/S3StorageConfig.java b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/storage/S3StorageConfig.java similarity index 98% rename from catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/filesystem/S3StorageConfig.java rename to catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/storage/S3StorageConfig.java index c25786169bc..213589d788d 100644 --- a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/filesystem/S3StorageConfig.java +++ b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/storage/S3StorageConfig.java @@ -16,7 +16,7 @@ * specific language governing permissions and limitations * under the License. */ -package org.apache.gravitino.catalog.lakehouse.paimon.filesystem; +package org.apache.gravitino.catalog.lakehouse.paimon.storage; import static org.apache.gravitino.storage.S3Properties.GRAVITINO_S3_ACCESS_KEY_ID; import static org.apache.gravitino.storage.S3Properties.GRAVITINO_S3_ENDPOINT;