From d5c7c79d875125999a239392170399a90d04a5c6 Mon Sep 17 00:00:00 2001 From: Shiyan Xu <2701446+xushiyan@users.noreply.github.com> Date: Fri, 22 Jul 2022 19:18:45 -0500 Subject: [PATCH] Revert "[HUDI-4324] Remove use_jdbc config from hudi sync (#6072)" (#6160) This reverts commit 046044c83d6382d455cfb0ff5c1130ddf926fcf3. --- conf/hudi-defaults.conf.template | 2 +- docker/demo/config/hoodie-incr.properties | 3 +- docker/demo/sparksql-incremental.commands | 2 -- .../util/TestDFSPropertiesConfiguration.java | 2 +- .../external-config/hudi-defaults.conf | 2 +- .../hudi/configuration/FlinkOptions.java | 6 ++++ .../hudi/sink/utils/HiveSyncContext.java | 2 ++ .../hudi/streamer/FlinkStreamerConfig.java | 4 +++ hudi-kafka-connect/demo/config-sink-hive.json | 1 + .../connect/writers/KafkaConnectConfigs.java | 1 + .../org/apache/hudi/DataSourceOptions.scala | 10 ++++++ .../org/apache/hudi/HoodieWriterUtils.scala | 1 + .../org/apache/hudi/hive/HiveSyncConfig.java | 5 +++ .../hudi/hive/HiveSyncConfigHolder.java | 11 ++++++- .../hudi/hive/HoodieHiveSyncClient.java | 32 +++++++++++-------- .../hudi/hive/ddl/HiveQueryDDLExecutor.java | 2 +- .../apache/hudi/hive/ddl/JDBCExecutor.java | 2 +- .../HiveSyncGlobalCommitParams.java | 2 -- .../apache/hudi/hive/TestHiveSyncTool.java | 2 +- .../utilities/HoodieDropPartitionsTool.java | 9 ++++-- 20 files changed, 72 insertions(+), 29 deletions(-) diff --git a/conf/hudi-defaults.conf.template b/conf/hudi-defaults.conf.template index fbcedb3f18b9c..175dbaf23d739 100644 --- a/conf/hudi-defaults.conf.template +++ b/conf/hudi-defaults.conf.template @@ -20,7 +20,7 @@ # Example: # hoodie.datasource.hive_sync.jdbcurl jdbc:hive2://localhost:10000 -# hoodie.datasource.hive_sync.mode jdbc +# hoodie.datasource.hive_sync.use_jdbc true # hoodie.datasource.hive_sync.support_timestamp false # hoodie.index.type BLOOM # hoodie.metadata.enable false diff --git a/docker/demo/config/hoodie-incr.properties b/docker/demo/config/hoodie-incr.properties index 47bfc95b53cbe..80f474b1e7716 100644 --- a/docker/demo/config/hoodie-incr.properties +++ b/docker/demo/config/hoodie-incr.properties @@ -28,6 +28,5 @@ hoodie.deltastreamer.source.hoodieincr.path=/docker_hoodie_sync_valid_test hoodie.deltastreamer.source.hoodieincr.read_latest_on_missing_ckpt=true # hive sync hoodie.datasource.hive_sync.table=docker_hoodie_sync_valid_test_2 -hoodie.datasource.hive_sync.mode=jdbc hoodie.datasource.hive_sync.jdbcurl=jdbc:hive2://hiveserver:10000 -hoodie.datasource.hive_sync.partition_fields=partition +hoodie.datasource.hive_sync.partition_fields=partition \ No newline at end of file diff --git a/docker/demo/sparksql-incremental.commands b/docker/demo/sparksql-incremental.commands index 3d7da63703c2c..9ec586e49d854 100644 --- a/docker/demo/sparksql-incremental.commands +++ b/docker/demo/sparksql-incremental.commands @@ -47,7 +47,6 @@ spark.sql("select key, `_hoodie_partition_path` as datestr, symbol, ts, open, cl option(HoodieWriteConfig.TBL_NAME.key(), "stock_ticks_derived_mor"). option(HoodieSyncConfig.META_SYNC_TABLE_NAME.key(), "stock_ticks_derived_mor"). option(HoodieSyncConfig.META_SYNC_DATABASE_NAME.key(), "default"). - option(HiveSyncConfigHolder.HIVE_SYNC_MODE.key(), "jdbc"). option(HiveSyncConfigHolder.HIVE_URL.key(), "jdbc:hive2://hiveserver:10000"). option(HiveSyncConfigHolder.HIVE_USER.key(), "hive"). option(HiveSyncConfigHolder.HIVE_PASS.key(), "hive"). @@ -80,7 +79,6 @@ spark.sql("select key, `_hoodie_partition_path` as datestr, symbol, ts, open, cl option(HoodieWriteConfig.TBL_NAME.key(), "stock_ticks_derived_mor_bs"). option(HoodieSyncConfig.META_SYNC_TABLE_NAME.key(), "stock_ticks_derived_mor_bs"). option(HoodieSyncConfig.META_SYNC_DATABASE_NAME.key(), "default"). - option(HiveSyncConfigHolder.HIVE_SYNC_MODE.key(), "jdbc"). option(HiveSyncConfigHolder.HIVE_URL.key(), "jdbc:hive2://hiveserver:10000"). option(HiveSyncConfigHolder.HIVE_USER.key(), "hive"). option(HiveSyncConfigHolder.HIVE_PASS.key(), "hive"). diff --git a/hudi-common/src/test/java/org/apache/hudi/common/util/TestDFSPropertiesConfiguration.java b/hudi-common/src/test/java/org/apache/hudi/common/util/TestDFSPropertiesConfiguration.java index a122f414f93d9..465739340dc86 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/util/TestDFSPropertiesConfiguration.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/util/TestDFSPropertiesConfiguration.java @@ -185,7 +185,7 @@ public void testLoadGlobalConfFile() { DFSPropertiesConfiguration.refreshGlobalProps(); assertEquals(5, DFSPropertiesConfiguration.getGlobalProps().size()); assertEquals("jdbc:hive2://localhost:10000", DFSPropertiesConfiguration.getGlobalProps().get("hoodie.datasource.hive_sync.jdbcurl")); - assertEquals("jdbc", DFSPropertiesConfiguration.getGlobalProps().get("hoodie.datasource.hive_sync.mode")); + assertEquals("true", DFSPropertiesConfiguration.getGlobalProps().get("hoodie.datasource.hive_sync.use_jdbc")); assertEquals("false", DFSPropertiesConfiguration.getGlobalProps().get("hoodie.datasource.hive_sync.support_timestamp")); assertEquals("BLOOM", DFSPropertiesConfiguration.getGlobalProps().get("hoodie.index.type")); assertEquals("true", DFSPropertiesConfiguration.getGlobalProps().get("hoodie.metadata.enable")); diff --git a/hudi-common/src/test/resources/external-config/hudi-defaults.conf b/hudi-common/src/test/resources/external-config/hudi-defaults.conf index 2e4c3a5d75429..1133adb4d7735 100644 --- a/hudi-common/src/test/resources/external-config/hudi-defaults.conf +++ b/hudi-common/src/test/resources/external-config/hudi-defaults.conf @@ -20,7 +20,7 @@ # Example: hoodie.datasource.hive_sync.jdbcurl jdbc:hive2://localhost:10000 -hoodie.datasource.hive_sync.mode jdbc +hoodie.datasource.hive_sync.use_jdbc true hoodie.datasource.hive_sync.support_timestamp false hoodie.index.type BLOOM hoodie.metadata.enable true diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/configuration/FlinkOptions.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/configuration/FlinkOptions.java index cf613d62aa85e..0984296ee54ef 100644 --- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/configuration/FlinkOptions.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/configuration/FlinkOptions.java @@ -756,6 +756,12 @@ private FlinkOptions() { .defaultValue(false) .withDescription("Assume partitioning is yyyy/mm/dd, default false"); + public static final ConfigOption HIVE_SYNC_USE_JDBC = ConfigOptions + .key("hive_sync.use_jdbc") + .booleanType() + .defaultValue(true) + .withDescription("Use JDBC when hive synchronization is enabled, default true"); + public static final ConfigOption HIVE_SYNC_AUTO_CREATE_DB = ConfigOptions .key("hive_sync.auto_create_db") .booleanType() diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/utils/HiveSyncContext.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/utils/HiveSyncContext.java index cceab5a6157c9..e34adac580f70 100644 --- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/utils/HiveSyncContext.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/utils/HiveSyncContext.java @@ -43,6 +43,7 @@ import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_TABLE_SERDE_PROPERTIES; import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_URL; import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_USER; +import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_USE_JDBC; import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_USE_PRE_APACHE_INPUT_FORMAT; import static org.apache.hudi.hive.HiveSyncConfigHolder.METASTORE_URIS; import static org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_ASSUME_DATE_PARTITION; @@ -104,6 +105,7 @@ public static Properties buildSyncConfig(Configuration conf) { props.setPropertyIfNonNull(HIVE_TABLE_SERDE_PROPERTIES.key(), conf.getString(FlinkOptions.HIVE_SYNC_TABLE_SERDE_PROPERTIES)); props.setPropertyIfNonNull(META_SYNC_PARTITION_FIELDS.key(), String.join(",", FilePathUtils.extractHivePartitionFields(conf))); props.setPropertyIfNonNull(META_SYNC_PARTITION_EXTRACTOR_CLASS.key(), conf.getString(FlinkOptions.HIVE_SYNC_PARTITION_EXTRACTOR_CLASS_NAME)); + props.setPropertyIfNonNull(HIVE_USE_JDBC.key(), String.valueOf(conf.getBoolean(FlinkOptions.HIVE_SYNC_USE_JDBC))); props.setPropertyIfNonNull(META_SYNC_USE_FILE_LISTING_FROM_METADATA.key(), String.valueOf(conf.getBoolean(FlinkOptions.METADATA_ENABLED))); props.setPropertyIfNonNull(HIVE_IGNORE_EXCEPTIONS.key(), String.valueOf(conf.getBoolean(FlinkOptions.HIVE_SYNC_IGNORE_EXCEPTIONS))); props.setPropertyIfNonNull(HIVE_SUPPORT_TIMESTAMP_TYPE.key(), String.valueOf(conf.getBoolean(FlinkOptions.HIVE_SYNC_SUPPORT_TIMESTAMP))); diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/streamer/FlinkStreamerConfig.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/streamer/FlinkStreamerConfig.java index c99cd13c7612e..3a69e5d080903 100644 --- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/streamer/FlinkStreamerConfig.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/streamer/FlinkStreamerConfig.java @@ -321,6 +321,9 @@ public class FlinkStreamerConfig extends Configuration { @Parameter(names = {"--hive-sync-assume-date-partitioning"}, description = "Assume partitioning is yyyy/mm/dd, default false") public Boolean hiveSyncAssumeDatePartition = false; + @Parameter(names = {"--hive-sync-use-jdbc"}, description = "Use JDBC when hive synchronization is enabled, default true") + public Boolean hiveSyncUseJdbc = true; + @Parameter(names = {"--hive-sync-auto-create-db"}, description = "Auto create hive database if it does not exists, default true") public Boolean hiveSyncAutoCreateDb = true; @@ -417,6 +420,7 @@ public static org.apache.flink.configuration.Configuration toFlinkConfig(FlinkSt conf.setString(FlinkOptions.HIVE_SYNC_PARTITION_FIELDS, config.hiveSyncPartitionFields); conf.setString(FlinkOptions.HIVE_SYNC_PARTITION_EXTRACTOR_CLASS_NAME, config.hiveSyncPartitionExtractorClass); conf.setBoolean(FlinkOptions.HIVE_SYNC_ASSUME_DATE_PARTITION, config.hiveSyncAssumeDatePartition); + conf.setBoolean(FlinkOptions.HIVE_SYNC_USE_JDBC, config.hiveSyncUseJdbc); conf.setBoolean(FlinkOptions.HIVE_SYNC_AUTO_CREATE_DB, config.hiveSyncAutoCreateDb); conf.setBoolean(FlinkOptions.HIVE_SYNC_IGNORE_EXCEPTIONS, config.hiveSyncIgnoreExceptions); conf.setBoolean(FlinkOptions.HIVE_SYNC_SKIP_RO_SUFFIX, config.hiveSyncSkipRoSuffix); diff --git a/hudi-kafka-connect/demo/config-sink-hive.json b/hudi-kafka-connect/demo/config-sink-hive.json index 7c49784cbf6a3..214fd1891906f 100644 --- a/hudi-kafka-connect/demo/config-sink-hive.json +++ b/hudi-kafka-connect/demo/config-sink-hive.json @@ -21,6 +21,7 @@ "hoodie.datasource.hive_sync.table": "huditesttopic", "hoodie.datasource.hive_sync.partition_fields": "date", "hoodie.datasource.hive_sync.partition_extractor_class": "org.apache.hudi.hive.MultiPartKeysValueExtractor", + "hoodie.datasource.hive_sync.use_jdbc": "false", "hoodie.datasource.hive_sync.mode": "hms", "dfs.client.use.datanode.hostname": "true", "hive.metastore.uris": "thrift://hivemetastore:9083", diff --git a/hudi-kafka-connect/src/main/java/org/apache/hudi/connect/writers/KafkaConnectConfigs.java b/hudi-kafka-connect/src/main/java/org/apache/hudi/connect/writers/KafkaConnectConfigs.java index 3b51fddfa8db6..e4543c692db86 100644 --- a/hudi-kafka-connect/src/main/java/org/apache/hudi/connect/writers/KafkaConnectConfigs.java +++ b/hudi-kafka-connect/src/main/java/org/apache/hudi/connect/writers/KafkaConnectConfigs.java @@ -172,6 +172,7 @@ public String getHadoopConfHome() { public static final String HIVE_URL = "hoodie.datasource.hive_sync.jdbcurl"; public static final String HIVE_PARTITION_FIELDS = "hoodie.datasource.hive_sync.partition_fields"; public static final String HIVE_PARTITION_EXTRACTOR_CLASS = "hoodie.datasource.hive_sync.partition_extractor_class"; + public static final String HIVE_USE_JDBC = "hoodie.datasource.hive_sync.use_jdbc"; public static final String HIVE_SYNC_MODE = "hoodie.datasource.hive_sync.mode"; public static final String HIVE_AUTO_CREATE_DATABASE = "hoodie.datasource.hive_sync.auto_create_database"; public static final String HIVE_IGNORE_EXCEPTIONS = "hoodie.datasource.hive_sync.ignore_exceptions"; diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DataSourceOptions.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DataSourceOptions.scala index 844e171b083d1..cd16e17abb4db 100644 --- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DataSourceOptions.scala +++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DataSourceOptions.scala @@ -437,6 +437,10 @@ object DataSourceWriteOptions { val HIVE_ASSUME_DATE_PARTITION: ConfigProperty[String] = HoodieSyncConfig.META_SYNC_ASSUME_DATE_PARTITION @Deprecated val HIVE_USE_PRE_APACHE_INPUT_FORMAT: ConfigProperty[String] = HiveSyncConfigHolder.HIVE_USE_PRE_APACHE_INPUT_FORMAT + + /** @deprecated Use {@link HIVE_SYNC_MODE} instead of this config from 0.9.0 */ + @Deprecated + val HIVE_USE_JDBC: ConfigProperty[String] = HiveSyncConfigHolder.HIVE_USE_JDBC @Deprecated val HIVE_AUTO_CREATE_DATABASE: ConfigProperty[String] = HiveSyncConfigHolder.HIVE_AUTO_CREATE_DATABASE @Deprecated @@ -496,6 +500,9 @@ object DataSourceWriteOptions { /** @deprecated Use {@link HIVE_USE_PRE_APACHE_INPUT_FORMAT} and its methods instead */ @Deprecated val HIVE_USE_PRE_APACHE_INPUT_FORMAT_OPT_KEY = HiveSyncConfigHolder.HIVE_USE_PRE_APACHE_INPUT_FORMAT.key() + /** @deprecated Use {@link HIVE_USE_JDBC} and its methods instead */ + @Deprecated + val HIVE_USE_JDBC_OPT_KEY = HiveSyncConfigHolder.HIVE_USE_JDBC.key() /** @deprecated Use {@link HIVE_AUTO_CREATE_DATABASE} and its methods instead */ @Deprecated val HIVE_AUTO_CREATE_DATABASE_OPT_KEY = HiveSyncConfigHolder.HIVE_AUTO_CREATE_DATABASE.key() @@ -686,6 +693,9 @@ object DataSourceWriteOptions { val DEFAULT_HIVE_ASSUME_DATE_PARTITION_OPT_VAL = HoodieSyncConfig.META_SYNC_ASSUME_DATE_PARTITION.defaultValue() @Deprecated val DEFAULT_USE_PRE_APACHE_INPUT_FORMAT_OPT_VAL = "false" + /** @deprecated Use {@link HIVE_USE_JDBC} and its methods instead */ + @Deprecated + val DEFAULT_HIVE_USE_JDBC_OPT_VAL = HiveSyncConfigHolder.HIVE_USE_JDBC.defaultValue() /** @deprecated Use {@link HIVE_AUTO_CREATE_DATABASE} and its methods instead */ @Deprecated val DEFAULT_HIVE_AUTO_CREATE_DATABASE_OPT_KEY = HiveSyncConfigHolder.HIVE_AUTO_CREATE_DATABASE.defaultValue() diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieWriterUtils.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieWriterUtils.scala index f6473c2b89731..f9d8a60004a8b 100644 --- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieWriterUtils.scala +++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieWriterUtils.scala @@ -78,6 +78,7 @@ object HoodieWriterUtils { hoodieConfig.setDefaultValue(HoodieSyncConfig.META_SYNC_PARTITION_FIELDS) hoodieConfig.setDefaultValue(HoodieSyncConfig.META_SYNC_PARTITION_EXTRACTOR_CLASS) hoodieConfig.setDefaultValue(HIVE_STYLE_PARTITIONING) + hoodieConfig.setDefaultValue(HiveSyncConfigHolder.HIVE_USE_JDBC) hoodieConfig.setDefaultValue(HiveSyncConfigHolder.HIVE_CREATE_MANAGED_TABLE) hoodieConfig.setDefaultValue(HiveSyncConfigHolder.HIVE_SYNC_AS_DATA_SOURCE_TABLE) hoodieConfig.setDefaultValue(ASYNC_COMPACT_ENABLE) diff --git a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncConfig.java b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncConfig.java index cdb192f9fedd5..6f2cc50a0af69 100644 --- a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncConfig.java +++ b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncConfig.java @@ -45,6 +45,7 @@ import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_TABLE_SERDE_PROPERTIES; import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_URL; import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_USER; +import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_USE_JDBC; import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_USE_PRE_APACHE_INPUT_FORMAT; import static org.apache.hudi.hive.HiveSyncConfigHolder.METASTORE_URIS; @@ -94,6 +95,9 @@ public static class HiveSyncConfigParams { + "com.uber.hoodie to org.apache.hudi. Stop using this after you migrated the table definition to " + "org.apache.hudi input format.") public Boolean usePreApacheInputFormat; + @Deprecated + @Parameter(names = {"--use-jdbc"}, description = "Hive jdbc connect url") + public Boolean useJdbc; @Parameter(names = {"--metastore-uris"}, description = "Hive metastore uris") public String metastoreUris; @Parameter(names = {"--sync-mode"}, description = "Mode to choose for Hive ops. Valid values are hms,glue,jdbc and hiveql") @@ -138,6 +142,7 @@ public TypedProperties toProps() { props.setPropertyIfNonNull(HIVE_PASS.key(), hivePass); props.setPropertyIfNonNull(HIVE_URL.key(), jdbcUrl); props.setPropertyIfNonNull(HIVE_USE_PRE_APACHE_INPUT_FORMAT.key(), usePreApacheInputFormat); + props.setPropertyIfNonNull(HIVE_USE_JDBC.key(), useJdbc); props.setPropertyIfNonNull(HIVE_SYNC_MODE.key(), syncMode); props.setPropertyIfNonNull(METASTORE_URIS.key(), metastoreUris); props.setPropertyIfNonNull(HIVE_AUTO_CREATE_DATABASE.key(), autoCreateDatabase); diff --git a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncConfigHolder.java b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncConfigHolder.java index 8c14cdfe29beb..3877782c92026 100644 --- a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncConfigHolder.java +++ b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncConfigHolder.java @@ -52,6 +52,15 @@ public class HiveSyncConfigHolder { .withDocumentation("Flag to choose InputFormat under com.uber.hoodie package instead of org.apache.hudi package. " + "Use this when you are in the process of migrating from " + "com.uber.hoodie to org.apache.hudi. Stop using this after you migrated the table definition to org.apache.hudi input format"); + /** + * @deprecated Use {@link #HIVE_SYNC_MODE} instead of this config from 0.9.0 + */ + @Deprecated + public static final ConfigProperty HIVE_USE_JDBC = ConfigProperty + .key("hoodie.datasource.hive_sync.use_jdbc") + .defaultValue("true") + .deprecatedAfter("0.9.0") + .withDocumentation("Use JDBC when hive synchronization is enabled"); public static final ConfigProperty METASTORE_URIS = ConfigProperty .key("hoodie.datasource.hive_sync.metastore.uris") .defaultValue("thrift://localhost:9083") @@ -100,7 +109,7 @@ public class HiveSyncConfigHolder { .withDocumentation("The number of partitions one batch when synchronous partitions to hive."); public static final ConfigProperty HIVE_SYNC_MODE = ConfigProperty .key("hoodie.datasource.hive_sync.mode") - .defaultValue("jdbc") + .noDefaultValue() .withDocumentation("Mode to choose for Hive ops. Valid values are hms, jdbc and hiveql."); public static final ConfigProperty HIVE_SYNC_BUCKET_SYNC = ConfigProperty .key("hoodie.datasource.hive_sync.bucket_sync") diff --git a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HoodieHiveSyncClient.java b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HoodieHiveSyncClient.java index 26ba4ae8e1b50..d5a85adcbacc2 100644 --- a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HoodieHiveSyncClient.java +++ b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HoodieHiveSyncClient.java @@ -21,6 +21,7 @@ import org.apache.hudi.common.table.TableSchemaResolver; import org.apache.hudi.common.table.timeline.HoodieInstant; import org.apache.hudi.common.util.Option; +import org.apache.hudi.common.util.StringUtils; import org.apache.hudi.common.util.collection.Pair; import org.apache.hudi.hive.ddl.DDLExecutor; import org.apache.hudi.hive.ddl.HMSDDLExecutor; @@ -48,6 +49,7 @@ import static org.apache.hudi.hadoop.utils.HoodieHiveUtils.GLOBALLY_CONSISTENT_READ_TIMESTAMP; import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_SYNC_MODE; +import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_USE_JDBC; import static org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_DATABASE_NAME; import static org.apache.hudi.sync.common.util.TableUtils.tableId; @@ -70,19 +72,23 @@ public HoodieHiveSyncClient(HiveSyncConfig config) { // Support JDBC, HiveQL and metastore based implementations for backwards compatibility. Future users should // disable jdbc and depend on metastore client for all hive registrations try { - HiveSyncMode syncMode = HiveSyncMode.of(config.getStringOrDefault(HIVE_SYNC_MODE)); - switch (syncMode) { - case HMS: - ddlExecutor = new HMSDDLExecutor(config); - break; - case HIVEQL: - ddlExecutor = new HiveQueryDDLExecutor(config); - break; - case JDBC: - ddlExecutor = new JDBCExecutor(config); - break; - default: - throw new HoodieHiveSyncException("Invalid sync mode given " + config.getString(HIVE_SYNC_MODE)); + if (!StringUtils.isNullOrEmpty(config.getString(HIVE_SYNC_MODE))) { + HiveSyncMode syncMode = HiveSyncMode.of(config.getString(HIVE_SYNC_MODE)); + switch (syncMode) { + case HMS: + ddlExecutor = new HMSDDLExecutor(config); + break; + case HIVEQL: + ddlExecutor = new HiveQueryDDLExecutor(config); + break; + case JDBC: + ddlExecutor = new JDBCExecutor(config); + break; + default: + throw new HoodieHiveSyncException("Invalid sync mode given " + config.getString(HIVE_SYNC_MODE)); + } + } else { + ddlExecutor = config.getBoolean(HIVE_USE_JDBC) ? new JDBCExecutor(config) : new HiveQueryDDLExecutor(config); } this.client = Hive.get(config.getHiveConf()).getMSC(); } catch (Exception e) { diff --git a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/ddl/HiveQueryDDLExecutor.java b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/ddl/HiveQueryDDLExecutor.java index 619a417c331f3..90efd2701c793 100644 --- a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/ddl/HiveQueryDDLExecutor.java +++ b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/ddl/HiveQueryDDLExecutor.java @@ -47,7 +47,7 @@ import static org.apache.hudi.sync.common.util.TableUtils.tableId; /** - * This class offers DDL executor backed by the HiveQL Driver. + * This class offers DDL executor backed by the hive.ql Driver This class preserves the old useJDBC = false way of doing things. */ public class HiveQueryDDLExecutor extends QueryBasedDDLExecutor { diff --git a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/ddl/JDBCExecutor.java b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/ddl/JDBCExecutor.java index 1fc8f082d8c0f..2673e46a9f745 100644 --- a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/ddl/JDBCExecutor.java +++ b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/ddl/JDBCExecutor.java @@ -43,7 +43,7 @@ import static org.apache.hudi.hive.util.HiveSchemaUtil.HIVE_ESCAPE_CHARACTER; /** - * This class offers DDL executor backed by the jdbc. + * This class offers DDL executor backed by the jdbc This class preserves the old useJDBC = true way of doing things. */ public class JDBCExecutor extends QueryBasedDDLExecutor { diff --git a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/replication/HiveSyncGlobalCommitParams.java b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/replication/HiveSyncGlobalCommitParams.java index b8b2de73e7062..58188f578e1bf 100644 --- a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/replication/HiveSyncGlobalCommitParams.java +++ b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/replication/HiveSyncGlobalCommitParams.java @@ -32,7 +32,6 @@ import java.io.InputStream; import java.util.Properties; -import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_SYNC_MODE; import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_URL; import static org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_BASE_PATH; @@ -93,7 +92,6 @@ Properties mkGlobalHiveSyncProps(boolean forRemote) { String jdbcUrl = forRemote ? loadedProps.getProperty(REMOTE_HIVE_SERVER_JDBC_URLS) : loadedProps.getProperty(LOCAL_HIVE_SERVER_JDBC_URLS, loadedProps.getProperty(HIVE_URL.key())); props.setPropertyIfNonNull(HIVE_URL.key(), jdbcUrl); - props.setProperty(HIVE_SYNC_MODE.key(), "jdbc"); LOG.info("building hivesync config forRemote: " + forRemote + " " + jdbcUrl + " " + basePath); return props; diff --git a/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/TestHiveSyncTool.java b/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/TestHiveSyncTool.java index 7abeed480c639..072feeb663160 100644 --- a/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/TestHiveSyncTool.java +++ b/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/TestHiveSyncTool.java @@ -120,7 +120,7 @@ private static Iterable syncModeAndSchemaFromCommitMetadataAndManagedT return opts; } - // (useSchemaFromCommitMetadata, syncAsDataSource, syncMode) + // (useJdbc, useSchemaFromCommitMetadata, syncAsDataSource) private static Iterable syncDataSourceTableParams() { List opts = new ArrayList<>(); for (Object mode : SYNC_MODES) { diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieDropPartitionsTool.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieDropPartitionsTool.java index fc16bcaa1bc69..95e84e413cd57 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieDropPartitionsTool.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieDropPartitionsTool.java @@ -174,6 +174,8 @@ public static class Config implements Serializable { public String hiveURL = "jdbc:hive2://localhost:10000"; @Parameter(names = {"--hive-partition-field"}, description = "Comma separated list of field in the hive table to use for determining hive partition columns.", required = false) public String hivePartitionsField = ""; + @Parameter(names = {"--hive-sync-use-jdbc"}, description = "Use JDBC when hive synchronization.", required = false) + public boolean hiveUseJdbc = true; @Parameter(names = {"--hive-metastore-uris"}, description = "hive meta store uris to use.", required = false) public String hiveHMSUris = null; @Parameter(names = {"--hive-sync-mode"}, description = "Mode to choose for Hive ops. Valid values are hms, jdbc and hiveql.", required = false) @@ -213,7 +215,7 @@ public String toString() { + " --hive-pass-word " + "Masked" + ", \n" + " --hive-jdbc-url " + hiveURL + ", \n" + " --hive-partition-field " + hivePartitionsField + ", \n" - + " --hive-sync-mode " + hiveSyncMode + ", \n" + + " --hive-sync-use-jdbc " + hiveUseJdbc + ", \n" + " --hive-metastore-uris " + hiveHMSUris + ", \n" + " --hive-sync-ignore-exception " + hiveSyncIgnoreException + ", \n" + " --hive-partition-value-extractor-class " + partitionValueExtractorClass + ", \n" @@ -245,7 +247,7 @@ public boolean equals(Object o) { && Objects.equals(hivePassWord, config.hivePassWord) && Objects.equals(hiveURL, config.hiveURL) && Objects.equals(hivePartitionsField, config.hivePartitionsField) - && Objects.equals(hiveSyncMode, config.hiveSyncMode) + && Objects.equals(hiveUseJdbc, config.hiveUseJdbc) && Objects.equals(hiveHMSUris, config.hiveHMSUris) && Objects.equals(partitionValueExtractorClass, config.partitionValueExtractorClass) && Objects.equals(sparkMaster, config.sparkMaster) @@ -259,7 +261,7 @@ public boolean equals(Object o) { public int hashCode() { return Objects.hash(basePath, runningMode, tableName, partitions, instantTime, syncToHive, hiveDataBase, hiveTableName, hiveUserName, hivePassWord, hiveURL, - hivePartitionsField, hiveSyncMode, hiveHMSUris, partitionValueExtractorClass, + hivePartitionsField, hiveUseJdbc, hiveHMSUris, partitionValueExtractorClass, sparkMaster, sparkMemory, propsFilePath, configs, hiveSyncIgnoreException, help); } } @@ -348,6 +350,7 @@ private HiveSyncConfig buildHiveSyncProps() { props.put(DataSourceWriteOptions.HIVE_PASS().key(), cfg.hivePassWord); props.put(DataSourceWriteOptions.HIVE_URL().key(), cfg.hiveURL); props.put(DataSourceWriteOptions.HIVE_PARTITION_FIELDS().key(), cfg.hivePartitionsField); + props.put(DataSourceWriteOptions.HIVE_USE_JDBC().key(), cfg.hiveUseJdbc); props.put(DataSourceWriteOptions.HIVE_SYNC_MODE().key(), cfg.hiveSyncMode); props.put(DataSourceWriteOptions.HIVE_IGNORE_EXCEPTIONS().key(), cfg.hiveSyncIgnoreException); props.put(DataSourceWriteOptions.HIVE_PASS().key(), cfg.hivePassWord);