Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reduce iceberg #11

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions LICENSE.bin
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,7 @@
Apache Hadoop Auth
Apache Hadoop Client Aggregator
Apache Hadoop Common
Apache Hadoop HDFS
Apache Hadoop HDFS Client
Apache Hadoop MapReduce Common
Apache Hadoop MapReduce Core
Expand Down
37 changes: 18 additions & 19 deletions catalogs/catalog-lakehouse-iceberg/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -31,22 +31,25 @@ val icebergVersion: String = libs.versions.iceberg.get()
val scalaCollectionCompatVersion: String = libs.versions.scala.collection.compat.get()

dependencies {
implementation(project(":api"))
implementation(project(":api")) {
exclude("*")
}
implementation(project(":catalogs:catalog-common"))
implementation(project(":common"))
implementation(project(":core"))
implementation(project(":common")) {
exclude("*")
}
implementation(project(":core")) {
exclude("*")
}
implementation(project(":iceberg:iceberg-common"))
implementation(project(":server-common"))
implementation(libs.bundles.iceberg)
implementation(libs.bundles.jersey)
implementation(libs.bundles.jetty)

implementation(libs.bundles.log4j)
implementation(libs.cglib)
implementation(libs.commons.collections4)
implementation(libs.commons.io)
implementation(libs.commons.lang3)
implementation(libs.guava)
implementation(libs.sqlite.jdbc)

annotationProcessor(libs.lombok)

Expand All @@ -60,7 +63,9 @@ dependencies {

testImplementation("org.scala-lang.modules:scala-collection-compat_$scalaVersion:$scalaCollectionCompatVersion")
testImplementation("org.apache.iceberg:iceberg-spark-runtime-${sparkMajorVersion}_$scalaVersion:$icebergVersion")
testImplementation("org.apache.spark:spark-hive_$scalaVersion:$sparkVersion")
testImplementation("org.apache.spark:spark-hive_$scalaVersion:$sparkVersion") {
exclude("org.apache.hive")
}
testImplementation("org.apache.spark:spark-sql_$scalaVersion:$sparkVersion") {
exclude("org.apache.avro")
exclude("org.apache.hadoop")
Expand All @@ -69,22 +74,12 @@ dependencies {
exclude("org.rocksdb")
}

testImplementation(libs.hadoop2.common) {
exclude("com.github.spotbugs")
}
testImplementation(libs.jersey.test.framework.core) {
exclude(group = "org.junit.jupiter")
}
testImplementation(libs.jersey.test.framework.provider.jetty) {
exclude(group = "org.junit.jupiter")
}
testImplementation(libs.junit.jupiter.api)
testImplementation(libs.junit.jupiter.params)
testImplementation(libs.mockito.core)
// For test TestMultipleJDBCLoad, it was depended on testcontainers.mysql and testcontainers.postgresql
testImplementation(libs.mysql.driver)
testImplementation(libs.postgresql.driver)

testImplementation(libs.slf4j.api)
testImplementation(libs.testcontainers)
testImplementation(libs.testcontainers.mysql)
Expand All @@ -105,7 +100,11 @@ tasks {

val copyCatalogLibs by registering(Copy::class) {
dependsOn("jar", "runtimeJars")
from("build/libs")
from("build/libs") {
exclude("guava-*.jar")
exclude("log4j-*.jar")
exclude("slf4j-*.jar")
}
into("$rootDir/distribution/package/catalogs/lakehouse-iceberg/libs")
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
import org.apache.commons.lang.math.RandomUtils;
import java.util.Random;
import org.apache.gravitino.catalog.lakehouse.iceberg.IcebergColumn;
import org.apache.gravitino.rel.Column;
import org.apache.gravitino.rel.expressions.Expression;
Expand Down Expand Up @@ -54,6 +54,8 @@ public class TestBaseConvert {
protected static final Map<String, Type> GRAVITINO_TYPE = new HashMap<>();
protected static final Map<String, org.apache.iceberg.types.Type> ICEBERG_TYPE = new HashMap<>();

private static Random random = new Random(System.currentTimeMillis());

static {
GRAVITINO_TYPE.put("BOOLEAN", org.apache.gravitino.rel.types.Types.BooleanType.get());
// Types not supported by iceberg
Expand Down Expand Up @@ -113,24 +115,24 @@ protected static SortOrder[] createSortOrder(String... colNames) {
results.add(
SortOrders.of(
field(colName),
RandomUtils.nextBoolean() ? SortDirection.DESCENDING : SortDirection.ASCENDING,
RandomUtils.nextBoolean() ? NullOrdering.NULLS_FIRST : NullOrdering.NULLS_LAST));
random.nextBoolean() ? SortDirection.DESCENDING : SortDirection.ASCENDING,
random.nextBoolean() ? NullOrdering.NULLS_FIRST : NullOrdering.NULLS_LAST));
}
return results.toArray(new SortOrder[0]);
}

protected static SortOrder createSortOrder(String name, String colName) {
return SortOrders.of(
FunctionExpression.of(name, field(colName)),
RandomUtils.nextBoolean() ? SortDirection.DESCENDING : SortDirection.ASCENDING,
RandomUtils.nextBoolean() ? NullOrdering.NULLS_FIRST : NullOrdering.NULLS_LAST);
random.nextBoolean() ? SortDirection.DESCENDING : SortDirection.ASCENDING,
random.nextBoolean() ? NullOrdering.NULLS_FIRST : NullOrdering.NULLS_LAST);
}

protected static SortOrder createSortOrder(String name, int width, String colName) {
return SortOrders.of(
FunctionExpression.of(name, Literals.integerLiteral(width), field(colName)),
RandomUtils.nextBoolean() ? SortDirection.DESCENDING : SortDirection.ASCENDING,
RandomUtils.nextBoolean() ? NullOrdering.NULLS_FIRST : NullOrdering.NULLS_LAST);
random.nextBoolean() ? SortDirection.DESCENDING : SortDirection.ASCENDING,
random.nextBoolean() ? NullOrdering.NULLS_FIRST : NullOrdering.NULLS_LAST);
}

protected static Types.NestedField createNestedField(
Expand All @@ -143,7 +145,7 @@ protected static Types.NestedField[] createNestedField(String... colNames) {
for (int i = 0; i < colNames.length; i++) {
results.add(
Types.NestedField.of(
i + 1, RandomUtils.nextBoolean(), colNames[i], getRandomIcebergType(), TEST_COMMENT));
i + 1, random.nextBoolean(), colNames[i], getRandomIcebergType(), TEST_COMMENT));
}
return results.toArray(new Types.NestedField[0]);
}
Expand Down Expand Up @@ -211,15 +213,15 @@ protected static String getGravitinoSortOrderExpressionString(Expression sortOrd
private static Type getRandomGravitinoType() {
Collection<Type> values = GRAVITINO_TYPE.values();
return values.stream()
.skip(RandomUtils.nextInt(values.size()))
.skip(random.nextInt(values.size()))
.findFirst()
.orElseThrow(() -> new RuntimeException("No type found"));
}

private static org.apache.iceberg.types.Type getRandomIcebergType() {
Collection<org.apache.iceberg.types.Type> values = ICEBERG_TYPE.values();
return values.stream()
.skip(RandomUtils.nextInt(values.size()))
.skip(random.nextInt(values.size()))
.findFirst()
.orElseThrow(() -> new RuntimeException("No type found"));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
* under the License.
*/

package org.apache.gravitino.server.web;
package org.apache.gravitino;

import java.util.Map;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,11 @@ public final class ConfigConstants {

private ConfigConstants() {}

/** HTTP Server port, reused by Gravitino server and Iceberg REST server */
public static final String WEBSERVER_HTTP_PORT = "httpPort";
/** HTTPS Server port, reused by Gravitino server and Iceberg REST server */
public static final String WEBSERVER_HTTPS_PORT = "httpsPort";

/** The value of messages used to indicate that the configuration is not set. */
public static final String NOT_BLANK_ERROR_MSG = "The value can't be blank";

Expand Down
1 change: 1 addition & 0 deletions gradle/libs.versions.toml
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ hive2-common = { group = "org.apache.hive", name = "hive-common", version.ref =
hive2-jdbc = { group = "org.apache.hive", name = "hive-jdbc", version.ref = "hive2"}
hadoop2-auth = { group = "org.apache.hadoop", name = "hadoop-auth", version.ref = "hadoop2" }
hadoop2-hdfs = { group = "org.apache.hadoop", name = "hadoop-hdfs", version.ref = "hadoop2" }
hadoop2-hdfs-client = { group = "org.apache.hadoop", name = "hadoop-hdfs-client", version.ref = "hadoop2" }
hadoop2-common = { group = "org.apache.hadoop", name = "hadoop-common", version.ref = "hadoop2"}
hadoop2-mapreduce-client-core = { group = "org.apache.hadoop", name = "hadoop-mapreduce-client-core", version.ref = "hadoop2"}
hadoop3-hdfs = { group = "org.apache.hadoop", name = "hadoop-hdfs", version.ref = "hadoop3" }
Expand Down
35 changes: 29 additions & 6 deletions iceberg/iceberg-common/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,19 @@ plugins {

dependencies {
implementation(project(":catalogs:catalog-common"))
implementation(project(":core"))
implementation(project(":common"))
implementation(project(":server-common"))
implementation(project(":core")) {
exclude("*")
}
implementation(project(":common")) {
exclude("*")
}
implementation(libs.bundles.iceberg)
implementation(libs.bundles.log4j)
implementation(libs.bundles.kerby) {
exclude("org.jline")
}
implementation(libs.caffeine)
implementation(libs.cglib)
implementation(libs.commons.lang3)
implementation(libs.guava)
implementation(libs.iceberg.aliyun)
Expand All @@ -42,17 +49,22 @@ dependencies {
exclude("com.github.spotbugs")
exclude("com.sun.jersey")
exclude("javax.servlet")
exclude("org.apache.curator")
exclude("org.apache.zookeeper")
exclude("org.mortbay.jetty")
}
// use hdfs-default.xml
implementation(libs.hadoop2.hdfs) {
exclude("*")
}
implementation(libs.hadoop2.hdfs.client) {
exclude("com.sun.jersey")
exclude("javax.servlet")
exclude("org.fusesource.leveldbjni")
exclude("org.mortbay.jetty")
}
implementation(libs.hadoop2.mapreduce.client.core) {
exclude("com.sun.jersey")
exclude("javax.servlet")
exclude("org.mortbay.jetty")
exclude("*")
}
implementation(libs.hive2.metastore) {
exclude("co.cask.tephra")
Expand All @@ -61,18 +73,28 @@ dependencies {
exclude("com.sun.jersey")
exclude("com.tdunning", "json")
exclude("com.zaxxer", "HikariCP")
exclude("com.github.joshelser")
exclude("io.dropwizard.metrics")
exclude("javax.servlet")
exclude("javax.transaction", "transaction-api")
exclude("jline")
exclude("org.apache.ant")
exclude("org.apache.avro", "avro")
exclude("org.apache.curator")
exclude("org.apache.derby")
exclude("org.apache.hbase")
exclude("org.apache.hive", "hive-service-rpc")
exclude("org.apache.hadoop")
exclude("org.apache.hadoop", "hadoop-yarn-api")
exclude("org.apache.hadoop", "hadoop-yarn-server-applicationhistoryservice")
exclude("org.apache.hadoop", "hadoop-yarn-server-common")
exclude("org.apache.hadoop", "hadoop-yarn-server-resourcemanager")
exclude("org.apache.hadoop", "hadoop-yarn-server-web-proxy")
exclude("org.apache.logging.log4j")
exclude("org.apache.parquet", "parquet-hadoop-bundle")
exclude("org.apache.orc")
exclude("org.apache.zookeeper")
exclude("org.datanucleus")
exclude("org.eclipse.jetty.aggregate", "jetty-all")
exclude("org.eclipse.jetty.orbit", "javax.servlet")
exclude("org.mortbay.jetty")
Expand All @@ -83,6 +105,7 @@ dependencies {
annotationProcessor(libs.lombok)
compileOnly(libs.lombok)

testImplementation(project(":server-common"))
testImplementation(libs.junit.jupiter.api)
testImplementation(libs.junit.jupiter.params)
testImplementation(libs.sqlite.jdbc)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,18 +24,20 @@
import java.util.Optional;
import org.apache.commons.lang3.StringUtils;
import org.apache.gravitino.Config;
import org.apache.gravitino.OverwriteDefaultConfig;
import org.apache.gravitino.catalog.lakehouse.iceberg.IcebergConstants;
import org.apache.gravitino.catalog.lakehouse.iceberg.IcebergPropertiesUtils;
import org.apache.gravitino.config.ConfigBuilder;
import org.apache.gravitino.config.ConfigConstants;
import org.apache.gravitino.config.ConfigEntry;
import org.apache.gravitino.server.web.JettyServerConfig;
import org.apache.gravitino.server.web.OverwriteDefaultConfig;

public class IcebergConfig extends Config implements OverwriteDefaultConfig {

public static final String ICEBERG_CONFIG_PREFIX = "gravitino.iceberg-rest.";

public static final int DEFAULT_ICEBERG_REST_SERVICE_HTTP_PORT = 9001;
public static final int DEFAULT_ICEBERG_REST_SERVICE_HTTPS_PORT = 9433;

public static final ConfigEntry<String> CATALOG_BACKEND =
new ConfigBuilder(IcebergConstants.CATALOG_BACKEND)
.doc("Catalog backend of Gravitino Iceberg catalog")
Expand Down Expand Up @@ -246,9 +248,9 @@ public Map<String, String> getIcebergCatalogProperties() {
@Override
public Map<String, String> getOverwriteDefaultConfig() {
return ImmutableMap.of(
JettyServerConfig.WEBSERVER_HTTP_PORT.getKey(),
String.valueOf(JettyServerConfig.DEFAULT_ICEBERG_REST_SERVICE_HTTP_PORT),
JettyServerConfig.WEBSERVER_HTTPS_PORT.getKey(),
String.valueOf(JettyServerConfig.DEFAULT_ICEBERG_REST_SERVICE_HTTPS_PORT));
ConfigConstants.WEBSERVER_HTTP_PORT,
String.valueOf(DEFAULT_ICEBERG_REST_SERVICE_HTTP_PORT),
ConfigConstants.WEBSERVER_HTTPS_PORT,
String.valueOf(DEFAULT_ICEBERG_REST_SERVICE_HTTPS_PORT));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,9 @@ public void testIcebergHttpPort() {
IcebergConfig icebergConfig = new IcebergConfig(properties);
JettyServerConfig jettyServerConfig = JettyServerConfig.fromConfig(icebergConfig);
Assertions.assertEquals(
JettyServerConfig.DEFAULT_ICEBERG_REST_SERVICE_HTTP_PORT, jettyServerConfig.getHttpPort());
IcebergConfig.DEFAULT_ICEBERG_REST_SERVICE_HTTP_PORT, jettyServerConfig.getHttpPort());
Assertions.assertEquals(
JettyServerConfig.DEFAULT_ICEBERG_REST_SERVICE_HTTPS_PORT,
jettyServerConfig.getHttpsPort());
IcebergConfig.DEFAULT_ICEBERG_REST_SERVICE_HTTPS_PORT, jettyServerConfig.getHttpsPort());

properties =
ImmutableMap.of(
Expand Down
14 changes: 11 additions & 3 deletions iceberg/iceberg-rest-server/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,22 @@ dependencies {
implementation(project(":api"))
implementation(project(":catalogs:catalog-common"))
implementation(project(":clients:client-java"))
implementation(project(":core"))
implementation(project(":common"))
implementation(project(":core")) {
exclude("*")
}
implementation(project(":common")) {
exclude("*")
}
implementation(project(":iceberg:iceberg-common"))
implementation(project(":server-common"))
implementation(project(":server-common")) {
exclude("*")
}
implementation(libs.bundles.iceberg)
implementation(libs.bundles.jetty)
implementation(libs.bundles.jersey)
implementation(libs.bundles.log4j)
implementation(libs.bundles.metrics)
implementation(libs.bundles.prometheus)
implementation(libs.caffeine)
implementation(libs.commons.lang3)
implementation(libs.guava)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@

import com.google.common.collect.Maps;
import java.util.Map;
import java.util.UUID;
import org.apache.gravitino.catalog.lakehouse.iceberg.IcebergConstants;
import org.apache.gravitino.iceberg.common.IcebergConfig;
import org.apache.gravitino.iceberg.common.ops.IcebergTableOps;
Expand All @@ -33,9 +32,6 @@
import org.junit.jupiter.params.provider.ValueSource;

public class TestConfigBasedIcebergTableOpsProvider {
private static final String STORE_PATH =
"/tmp/gravitino_test_iceberg_jdbc_backend_" + UUID.randomUUID().toString().replace("-", "");

@Test
public void testValidIcebergTableOps() {
String hiveCatalogName = "hive_backend";
Expand All @@ -51,13 +47,11 @@ public void testValidIcebergTableOps() {
// jdbc backend catalog
config.put("catalog.jdbc_backend.catalog-backend-name", jdbcCatalogName);
config.put("catalog.jdbc_backend.catalog-backend", "jdbc");
config.put(
"catalog.jdbc_backend.uri",
String.format("jdbc:h2:%s;DB_CLOSE_DELAY=-1;MODE=MYSQL", STORE_PATH));
config.put("catalog.jdbc_backend.uri", "jdbc:sqlite::memory:");
config.put("catalog.jdbc_backend.warehouse", "/tmp/usr/jdbc/warehouse");
config.put("catalog.jdbc_backend.jdbc.password", "gravitino");
config.put("catalog.jdbc_backend.jdbc.user", "gravitino");
config.put("catalog.jdbc_backend.jdbc-driver", "org.h2.Driver");
config.put("catalog.jdbc_backend.jdbc-driver", "org.sqlite.JDBC");
config.put("catalog.jdbc_backend.jdbc-initialize", "true");
// default catalog
config.put("catalog-backend-name", defaultCatalogName);
Expand Down Expand Up @@ -87,7 +81,7 @@ public void testValidIcebergTableOps() {
Assertions.assertEquals("jdbc", jdbcIcebergConfig.get(IcebergConfig.CATALOG_BACKEND));
Assertions.assertEquals(
"/tmp/usr/jdbc/warehouse", jdbcIcebergConfig.get(IcebergConfig.CATALOG_WAREHOUSE));
Assertions.assertEquals("org.h2.Driver", jdbcIcebergConfig.get(IcebergConfig.JDBC_DRIVER));
Assertions.assertEquals("org.sqlite.JDBC", jdbcIcebergConfig.get(IcebergConfig.JDBC_DRIVER));
Assertions.assertEquals(true, jdbcIcebergConfig.get(IcebergConfig.JDBC_INIT_TABLES));

Assertions.assertEquals(
Expand Down
Loading
Loading