Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HIVE-28095: Hive Query History #5613

Merged
merged 17 commits into from
Feb 12, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
Original file line number Diff line number Diff line change
Expand Up @@ -5740,6 +5740,33 @@ public static enum ConfVars {
+ "a background update happens periodically to report the actual state of the query"),
HIVE_SCHEDULED_QUERIES_CREATE_AS_ENABLED("hive.scheduled.queries.create.as.enabled", true,
"This option sets the default behaviour of newly created scheduled queries."),
HIVE_QUERY_HISTORY_ENABLED("hive.query.history.enabled", false,
"Whether to start QueryHistoryService in HS2."),
HIVE_QUERY_HISTORY_EXPLAIN_PLAN_ENABLED("hive.query.history.explain.plan.enabled", true,
"Whether to collect and store explain plan in the query history. Default is true."),
HIVE_QUERY_HISTORY_EXEC_SUMMARY_ENABLED("hive.query.history.exec.summary.enabled", true,
"Whether to collect and store execution summary in the query history. Default is true."),
HIVE_QUERY_HISTORY_BATCH_SIZE("hive.query.history.batch.size", 200,
"The maximum amount of records held in memory " +
"before query history service persists them to the target table. " +
"A small value (like 1-5) will lead to more real-time behavior with the price of small files. " +
"Set this to 0 to wait for the records to be persisted synchronously (not recommended in production)."),
deniskuzZ marked this conversation as resolved.
Show resolved Hide resolved
HIVE_QUERY_HISTORY_MAX_MEMORY_BYTES("hive.query.history.max.memory.bytes",
20 * 1024 * 1024, // 20MB
"The maximum size in bytes the query history queue can grow in the memory before query history service " +
"persists them to the target table." +
"Set this to 0 to disable this check (not recommended in production in order to keep HS2's heap under " +
"control)"),
HIVE_QUERY_HISTORY_FLUSH_INTERVAL_SECONDS("hive.query.history.flush.interval.seconds",
60 * 60, // 1h
"The query history service attempts to flush records from memory to the Iceberg table regardless of the " +
"current batch size. This ensures that history records are not kept inaccessible to users for extended " +
"periods. This property defines the interval for this operation. The default value of 1 hour is " +
"presumably a reasonable tradeoff between generating smaller files and allowing sufficient time for " +
"records to arrive. Set this to 0 to disable (so to 'force' batch size-based strategies)"),
HIVE_QUERY_HISTORY_REPOSITORY_CLASS("hive.query.history.repository.class",
"org.apache.hadoop.hive.ql.queryhistory.repository.IcebergRepository",
"The class implementing QueryHistoryRepository to be used for persisting Record instances"),
HIVE_SECURITY_AUTHORIZATION_SCHEDULED_QUERIES_SUPPORTED("hive.security.authorization.scheduled.queries.supported",
false,
"Enable this if the configured authorizer is able to handle scheduled query related calls."),
Expand Down
27 changes: 27 additions & 0 deletions common/src/java/org/apache/hadoop/hive/ql/log/PerfLogger.java
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,33 @@ public Map<String, Long> getEndTimes() {
return ImmutableMap.copyOf(endTimes);
}

/**
* Helper method for getting the time spent with total DAG preparation.
*/
public long getPreparePlanDuration() {
long dagSubmitStartTime = getStartTime(PerfLogger.TEZ_SUBMIT_DAG);
long compileEndTime = getEndTime(PerfLogger.COMPILE);
long getSessionDuration = getDuration(PerfLogger.TEZ_GET_SESSION);

// no DAG was running with this query
if (dagSubmitStartTime == 0){
// so no plan preparation happened
return 0;
}

return dagSubmitStartTime - compileEndTime - getSessionDuration;
}

/**
* Helper method for getting the time spent to actually run the DAG.
*/
public long getRunDagDuration() {
long submitToRunningDuration = getDuration(PerfLogger.TEZ_SUBMIT_TO_RUNNING);

return submitToRunningDuration == 0 ? getDuration(PerfLogger.TEZ_RUN_DAG) : getEndTime(PerfLogger.TEZ_RUN_DAG) -
getEndTime(PerfLogger.TEZ_SUBMIT_TO_RUNNING);
}

//Methods for metrics integration. Each thread-local PerfLogger will open/close scope during each perf-log method.
private final transient Map<String, MetricsScope> openScopes = new HashMap<>();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,9 @@ private void init(Class<?> cls) {
setValue("tez.local.cache.root.folder", System.getProperty("build.dir"));
// prevent RecoveryService from starting, which is not needed in unit tests
setValue("tez.dag.recovery.enabled", "false");
// as Query History Service is enabled by default, it might add overhead to tests, it's not worth using it
// dedicated unit tests are supposed to check that service as much as possible
setValue(HiveConf.ConfVars.HIVE_QUERY_HISTORY_ENABLED.varname, "false");
}

public void setValue(String name, String value) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -929,7 +929,7 @@ public boolean commitInMoveTask() {

@Override
public void storageHandlerCommit(Properties commitProperties, Operation operation)
throws HiveException {
throws HiveException {
String tableName = commitProperties.getProperty(Catalogs.NAME);
String location = commitProperties.getProperty(Catalogs.LOCATION);
String snapshotRef = commitProperties.getProperty(Catalogs.SNAPSHOT_REF);
Expand Down Expand Up @@ -962,6 +962,8 @@ public void storageHandlerCommit(Properties commitProperties, Operation operatio
}
}


@Override
public HiveIcebergOutputCommitter getOutputCommitter() {
return new HiveIcebergOutputCommitter();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,8 @@ private HiveConf initializeConf() {
hiveConf.setVar(HiveConf.ConfVars.HIVE_QUERY_LIFETIME_HOOKS, HiveIcebergQueryLifeTimeHook.class.getName());

MetastoreConf.setBoolVar(hiveConf, MetastoreConf.ConfVars.TRY_DIRECT_SQL, true);

hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_QUERY_HISTORY_ENABLED, false);
return hiveConf;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,8 @@ public static void beforeTestBase(String transportMode) throws Exception {
hiveConf.setBoolVar(ConfVars.HIVE_SUPPORT_CONCURRENCY, false);
hiveConf.setBoolVar(ConfVars.HIVE_SERVER2_ENABLE_DOAS, false);
hiveConf.setBoolVar(ConfVars.HIVE_FETCH_TASK_CACHING, false);

// query history adds no value to this test, it would just bring iceberg handler dependency, which isn't worth
hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_QUERY_HISTORY_ENABLED, false);
miniHS2 = MiniHiveKdc.getMiniHS2WithKerb(miniHiveKdc, hiveConf);
miniHS2.start(new HashMap<String, String>());

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,9 @@ public void setUpBefore() throws Exception {
hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY, false);
hiveConf.setVar(HiveConf.ConfVars.HIVE_SERVER2_PLAIN_LDAP_USERDNPATTERN,
"uid=%s,ou=People,dc=example,dc=com");

// query history adds no value to this test, it would just bring iceberg handler dependency, which isn't worth
// this should be handled with HiveConfForTests when it's used here too
hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_QUERY_HISTORY_ENABLED, false);
AuthenticationProviderFactory.AuthMethods.LDAP.getConf().setVar(HiveConf.ConfVars.HIVE_SERVER2_PLAIN_LDAP_URL,
"ldap://localhost:" + ldapServer.getPort());
AuthenticationProviderFactory.AuthMethods.LDAP.getConf().setVar(HiveConf.ConfVars.HIVE_SERVER2_PLAIN_LDAP_USERDNPATTERN,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,9 @@ public static void setUpBeforeClass() throws Exception {
hiveConf.setVar(HiveConf.ConfVars.HIVE_SERVER2_TRANSPORT_MODE, HiveServer2TransportMode.http.name());
hiveConf.setVar(HiveConf.ConfVars.HIVE_SERVER2_AUTHENTICATION_JWT_JWKS_URL, "http://localhost:" +
MOCK_JWKS_SERVER_PORT + "/jwks");

// query history adds no value to this test, it would just bring iceberg handler dependency, which isn't worth
// this should be handled with HiveConfForTests when it's used here too
hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_QUERY_HISTORY_ENABLED, false);
miniHS2 = MiniHiveKdc.getMiniHS2WithKerb(miniHiveKdc, hiveConf,
HiveAuthConstants.AuthTypes.KERBEROS.getAuthName() + "," + HiveAuthConstants.AuthTypes.JWT.getAuthName());
miniHS2.start(new HashMap<>());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,9 @@ public static void setUpBeforeClass() throws Exception {
SemanticAnalysisHook.class.getName());
confOverlay.put(ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "" + Boolean.FALSE);
confOverlay.put(ConfVars.HIVE_FETCH_TASK_CACHING.varname, "" + false);

// query history adds no value to this test, it would just bring iceberg handler dependency, which isn't worth
// this should be handled with HiveConfForTests when it's used here too
confOverlay.put(ConfVars.HIVE_QUERY_HISTORY_ENABLED.varname, "false");
miniHiveKdc = new MiniHiveKdc();
HiveConf hiveConf = new HiveConf();
miniHS2 = MiniHiveKdc.getMiniHS2WithKerb(miniHiveKdc, hiveConf);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@ public static void beforeTest() throws Exception {
confOverlay.put(ConfVars.HIVE_SERVER2_CUSTOM_AUTHENTICATION_CLASS.varname,
CustomAuthenticator.class.getName());
confOverlay.put(ConfVars.HIVE_SCHEDULED_QUERIES_EXECUTOR_ENABLED.varname, "false");
// query history adds no value to this test, it would just bring iceberg handler dependency, which isn't worth
// this should be handled with HiveConfForTests when it's used here too
confOverlay.put(ConfVars.HIVE_QUERY_HISTORY_ENABLED.varname, "false");
miniHiveKdc = new MiniHiveKdc();
HiveConf hiveConf = new HiveConf();
miniHS2 = MiniHiveKdc.getMiniHS2WithKerbWithRemoteHMS(miniHiveKdc, hiveConf, "CUSTOM");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,9 @@ public static void beforeTest() throws Exception {
confOverlay.put(ConfVars.HIVE_SERVER2_SESSION_HOOK.varname,
SessionHookTest.class.getName());
confOverlay.put(ConfVars.HIVE_SCHEDULED_QUERIES_EXECUTOR_ENABLED.varname, "false");

// query history adds no value to this test, it would just bring iceberg handler dependency, which isn't worth
// this should be handled with HiveConfForTests when it's used here too
confOverlay.put(ConfVars.HIVE_QUERY_HISTORY_ENABLED.varname, "false");
miniHiveKdc = new MiniHiveKdc();
HiveConf hiveConf = new HiveConf();
//using old config value tests backwards compatibility
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,9 @@ public static void beforeTest() throws Exception {
confOverlay.put(ConfVars.HIVE_SERVER2_SESSION_HOOK.varname,
SessionHookTest.class.getName());
confOverlay.put(ConfVars.HIVE_SCHEDULED_QUERIES_EXECUTOR_ENABLED.varname, "false");

// query history adds no value to this test, it would just bring iceberg handler dependency, which isn't worth
// this should be handled with HiveConfForTests when it's used here too
confOverlay.put(ConfVars.HIVE_QUERY_HISTORY_ENABLED.varname, "false");
miniHiveKdc = new MiniHiveKdc();
HiveConf hiveConf = new HiveConf();
hiveConf.setVar(ConfVars.METASTORE_CLUSTER_DELEGATION_TOKEN_STORE_CLS, "org.apache.hadoop.hive.thrift.DBTokenStore");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,9 @@ public static void beforeTest() throws Exception {
confOverlay.put(ConfVars.HIVE_SERVER2_SESSION_HOOK.varname,
SessionHookTest.class.getName());
confOverlay.put(ConfVars.HIVE_SCHEDULED_QUERIES_EXECUTOR_ENABLED.varname, "false");

// query history adds no value to this test, it would just bring iceberg handler dependency, which isn't worth
// this should be handled with HiveConfForTests when it's used here too
confOverlay.put(ConfVars.HIVE_QUERY_HISTORY_ENABLED.varname, "false");
miniHiveKdc = new MiniHiveKdc();
HiveConf hiveConf = new HiveConf();
miniHS2 = MiniHiveKdc.getMiniHS2WithKerb(miniHiveKdc, hiveConf);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,9 @@ public void setUp() throws Exception {
1, TimeUnit.SECONDS);
hiveConf.setBoolVar(ConfVars.HIVE_SUPPORT_CONCURRENCY, false);
hiveConf.setBoolVar(ConfVars.HIVE_FETCH_TASK_CACHING, false);

// query history adds no value to this test, it would just bring iceberg handler dependency, which isn't worth
// this should be handled with HiveConfForTests when it's used here too
hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_QUERY_HISTORY_ENABLED, false);
miniHS2 = MiniHiveKdc.getMiniHS2WithKerb(miniHiveKdc, hiveConf);
miniHS2.start(new HashMap<String, String>());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,9 @@ public static void beforeTest() throws Exception {

Map<String, String> confOverlay = new HashMap<>();
confOverlay.put(ConfVars.HIVE_SCHEDULED_QUERIES_EXECUTOR_ENABLED.varname, "false");
// query history adds no value to this test, it would just bring iceberg handler dependency, which isn't worth
// this should be handled with HiveConfForTests when it's used here too
confOverlay.put(ConfVars.HIVE_QUERY_HISTORY_ENABLED.varname, "false");
SSLTestUtils.setHttpConfOverlay(confOverlay);
SSLTestUtils.setSslConfOverlay(confOverlay);

Expand Down
6 changes: 6 additions & 0 deletions itests/hive-unit/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,12 @@
<classifier>tests</classifier>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-iceberg-handler</artifactId>
<version>${project.version}</version>
<scope>test</scope>
</dependency>
<!-- test inter-project -->
<dependency>
<groupId>com.github.tomakehurst</groupId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,10 @@ public static void beforeTest() throws Exception {

conf.setVar(ConfVars.HIVE_METASTORE_WAREHOUSE_EXTERNAL, whRootExternal);

// query history adds no value to this test, it would just bring iceberg handler dependency, which isn't worth
// this should be handled with HiveConfForTests when it's used here too
conf.setBoolVar(HiveConf.ConfVars.HIVE_QUERY_HISTORY_ENABLED, false);

miniHS2 = new MiniHS2.Builder().withMiniMR().withRemoteMetastore().withConf(conf).build();
miniHS2.start(new HashMap<String, String>());

Expand Down
Loading
Loading