Skip to content

Commit

Permalink
DCL, DQL, operation
Browse files Browse the repository at this point in the history
  • Loading branch information
abstractdog committed Feb 11, 2025
1 parent 35a3be9 commit cc2a495
Show file tree
Hide file tree
Showing 24 changed files with 336 additions and 89 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
import org.apache.hadoop.hive.ql.exec.tez.TezTask;
import org.apache.hadoop.hive.ql.exec.tez.monitoring.TezJobMonitor;
import org.apache.hadoop.hive.ql.log.PerfLogger;
import org.apache.hadoop.hive.ql.plan.HiveOperation;
import org.apache.hadoop.hive.ql.queryhistory.repository.IcebergRepository;
import org.apache.hadoop.hive.ql.queryhistory.repository.QueryHistoryRepository;
import org.apache.hadoop.hive.ql.queryhistory.schema.DummyRecord;
Expand Down Expand Up @@ -98,6 +99,7 @@ public void testSimpleFlush() throws Exception {
HiveConf.setQueryString(conf, DummyRecord.SQL);

QueryState queryState = DriverFactory.getNewQueryState(conf);
queryState.setCommandType(HiveOperation.valueOf(DummyRecord.OPERATION));

// prepare SessionState
SessionState ss = SessionState.start(conf);
Expand Down Expand Up @@ -168,10 +170,9 @@ private static void preparePerfLogger(QueryInfo queryInfo, PerfLogger perfLogger
}

private static void prepareDriverContext(DriverContext driverContext) {
// mock the queryType and ddlType, because the calculation if those are quite complicated,
// mock the queryType and operation, because the calculation if those are quite complicated,
// which should be unit tested separately
when(driverContext.getQueryType()).thenReturn(DummyRecord.QUERY_TYPE);
when(driverContext.getDdlType()).thenReturn(DummyRecord.DDL_TYPE);
driverContext.setQueryErrorMessage(DummyRecord.FAILURE_REASON);
driverContext.setExplainPlan(DummyRecord.PLAN);
FetchTask fetchTask = mock(FetchTask.class);
Expand Down Expand Up @@ -304,7 +305,7 @@ private void checkValues(SessionState ss, QueryState queryState, DriverContext d
compareValue(Schema.Field.QUERY_STATE, DummyRecord.QUERY_STATE, record, fieldsValidated);
compareValue(Schema.Field.QUERY_TYPE, DummyRecord.QUERY_TYPE.getName(), record,
fieldsValidated);
compareValue(Schema.Field.DDL_TYPE, DummyRecord.DDL_TYPE, record, fieldsValidated);
compareValue(Schema.Field.OPERATION, DummyRecord.OPERATION, record, fieldsValidated);
compareValue(Schema.Field.SERVER_ADDRESS, serviceContext.getHost(), record, fieldsValidated);
compareValue(Schema.Field.SERVER_PORT, serviceContext.getPort(), record, fieldsValidated);
compareValue(Schema.Field.CLIENT_ADDRESS, DummyRecord.CLIENT_ADDRESS, record, fieldsValidated);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@ public class DummyRecord extends Record {
public static final String SQL = "SELECT 1";
public static final int HIVERSERVER2_PROTOCOL_VERSION = 1;
public static final String CURRENT_DATABASE = "default";
public static final QueryType QUERY_TYPE = QueryType.QUERY;
public static final String DDL_TYPE = "ALTERDATABASE";
public static final QueryType QUERY_TYPE = QueryType.DQL;
public static final String OPERATION = "ALTERDATABASE";
public static final String CLIENT_ADDRESS = "client_host.domain";

// example durations with distiguishable values (even in case of addition), like: 5100 means planning + prepare plan
Expand Down Expand Up @@ -107,7 +107,7 @@ public DummyRecord(long queryStartMillis) {
setTezAmAddress(TEZ_AM_ADDRESS);
setQueryState(QUERY_STATE);
setQueryType(QUERY_TYPE);
setDdlType(DDL_TYPE);
setOperation(OPERATION);
setServerAddress(SERVER_HOST);
setServerPort(SERVER_PORT);
setClientAddress(CLIENT_ADDRESS);
Expand Down
4 changes: 0 additions & 4 deletions ql/src/java/org/apache/hadoop/hive/ql/DriverContext.java
Original file line number Diff line number Diff line change
Expand Up @@ -149,10 +149,6 @@ public QueryProperties.QueryType getQueryType() {
return queryProperties == null ? null : queryProperties.getQueryType();
}

public String getDdlType() {
return queryProperties == null ? null : queryProperties.getDdlType();
}

public TezRuntimeContext getRuntimeContext() {
return runtimeContext;
}
Expand Down
12 changes: 2 additions & 10 deletions ql/src/java/org/apache/hadoop/hive/ql/QueryProperties.java
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,10 @@
*/
public class QueryProperties {
public enum QueryType {
QUERY("QUERY"),
DQL("DQL"),
DML("DML"),
DDL("DDL"),
DCL("DCL"),
// strictly speaking, "ANALYZE TABLE" is DDL because it collects and stores metadata or statistical information,
// but in Hive it's a special statement which is worth a separate query type
STATS("STATS"),
Expand Down Expand Up @@ -97,7 +98,6 @@ public String getName() {
private boolean isView;

private QueryType queryType = null;
private String ddlType = null;

// set of used tables, aliases are resolved to real table names
private Set<String> usedTables = new HashSet<>();
Expand Down Expand Up @@ -368,14 +368,6 @@ public void setQueryType(QueryType queryType) {
this.queryType = queryType;
}

public String getDdlType() {
return ddlType == null ? "" : ddlType;
}

public void setDdlType(String ddlType) {
this.ddlType = ddlType;
}

public Set<String> getUsedTables() {
return usedTables;
}
Expand Down
15 changes: 15 additions & 0 deletions ql/src/java/org/apache/hadoop/hive/ql/QueryState.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import java.util.concurrent.locks.ReentrantLock;
import java.util.function.Supplier;

import org.apache.calcite.sql.SqlKind;
import org.apache.hadoop.hive.common.ValidTxnList;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.lockmgr.HiveTxnManager;
Expand Down Expand Up @@ -52,6 +53,12 @@ public class QueryState {
*/
private HiveOperation commandType;

/**
* The SqlKind of the command. This typically covers additional information to HiveOperation.QUERY,
* it could be: INSERT, DELETE, UPDATE, MERGE, EXPLAIN.
*/
private SqlKind sqlKind;

/**
* Per-query Lineage state to track what happens in the query
*/
Expand Down Expand Up @@ -151,6 +158,14 @@ public void setCommandType(HiveOperation commandType) {
this.commandType = commandType;
}

public SqlKind getSqlKind() {
return sqlKind;
}

public void setSqlKind(SqlKind sqlKind) {
this.sqlKind = sqlKind;
}

public HiveConf getConf() {
return queryConf;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,10 @@
import java.lang.reflect.Constructor;

import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.QueryProperties;
import org.apache.hadoop.hive.ql.QueryState;
import org.apache.hadoop.hive.ql.metadata.Hive;
import org.apache.hadoop.hive.ql.parse.ASTNode;
import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.parse.authorization.HiveAuthorizationTaskFactory;
Expand Down Expand Up @@ -61,4 +63,9 @@ private HiveAuthorizationTaskFactory createAuthorizationTaskFactory(HiveConf con
"Unable to create instance of " + authProviderClass.getName() + ": " + e.getMessage(), e);
}
}

@Override
public void setQueryType(ASTNode tree) {
queryProperties.setQueryType(QueryProperties.QueryType.DCL);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import org.apache.calcite.rel.metadata.RelMetadataProvider;
import org.apache.calcite.rel.metadata.RelMetadataQuery;
import org.apache.calcite.rex.RexExecutor;
import org.apache.calcite.sql.SqlKind;
import org.apache.calcite.tools.Frameworks;
import org.apache.hadoop.hive.common.TableName;
import org.apache.hadoop.hive.conf.HiveConf;
Expand Down Expand Up @@ -788,4 +789,10 @@ protected boolean allowOutputMultipleTimes() {
public void setQueryType(ASTNode tree) {
queryProperties.setQueryType(QueryProperties.QueryType.DDL);
}

@Override
protected void setSqlKind(SqlKind sqlKind) {
// NO-OP: prevent Semantic Analyzer to classify this query as a simple SqlKind=INSERT
// we classify MV REBUILD as HiveOperation.ALTER_MATERIALIZED_VIEW_REBUILD
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import java.util.UUID;

import org.antlr.runtime.tree.Tree;
import org.apache.calcite.sql.SqlKind;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.TableName;
import org.apache.hadoop.hive.metastore.Warehouse;
Expand Down Expand Up @@ -305,6 +306,12 @@ private List<Task<?>> findStatsTasks(

@Override
public void setQueryType(ASTNode tree) {
queryProperties.setQueryType(QueryProperties.QueryType.OTHER);
queryProperties.setQueryType(QueryProperties.QueryType.DDL);
}

@Override
protected void setSqlKind(SqlKind sqlKind) {
// NO-OP: prevent Semantic Analyzer to turn this query to a simple SqlKind=INSERT
// we classify acid export as HiveOperation.EXPORT
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
import org.antlr.runtime.TokenRewriteStream;
import org.antlr.runtime.tree.CommonTree;
import org.antlr.runtime.tree.Tree;
import org.apache.calcite.sql.SqlKind;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
Expand Down Expand Up @@ -94,10 +95,10 @@
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.FetchWork;
import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
import org.apache.hadoop.hive.ql.plan.HiveOperation;
import org.apache.hadoop.hive.ql.plan.ListBucketingCtx;
import org.apache.hadoop.hive.ql.plan.PlanUtils;
import org.apache.hadoop.hive.ql.plan.TableDesc;
import org.apache.hadoop.hive.ql.plan.api.StageType;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.ql.session.SessionState.LogHelper;
import org.apache.hadoop.hive.ql.util.DirectionUtils;
Expand Down Expand Up @@ -2113,8 +2114,20 @@ protected void setQueryType(ASTNode tree) {
// instead of ""
QueryType queryType = QueryType.OTHER;
if ("TOK_QUERY".equalsIgnoreCase(tree.getText())) {
queryType = QueryType.QUERY;
queryType = QueryType.DQL;
}
queryProperties.setQueryType(queryType);
}

/**
* Sets the sqlKind of the query if any. Subclasses can overwrite this to prevent using any sqlKind (e.g. MV REBUILD).
* @param sqlKind that belongs to the semantic analyzer
*/
protected void setSqlKind(SqlKind sqlKind) {
// when e.g. a MERGE query is rewritten to INSERTs, the analyzer codepaths
// will keep calling setSqlKind(HiveOperation.INSERT), so this null-check prevents overwrite
if (queryState.getSqlKind() == null) {
queryState.setSqlKind(sqlKind);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
*/
package org.apache.hadoop.hive.ql.parse;

import org.apache.calcite.sql.SqlKind;
import org.apache.hadoop.hive.common.TableName;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.Context;
Expand All @@ -33,6 +34,7 @@
import org.apache.hadoop.hive.ql.parse.rewrite.DeleteStatement;
import org.apache.hadoop.hive.ql.parse.rewrite.RewriterFactory;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
import org.apache.hadoop.hive.ql.plan.HiveOperation;

import java.util.Collections;
import java.util.List;
Expand All @@ -44,6 +46,7 @@ public class DeleteSemanticAnalyzer extends RewriteSemanticAnalyzer<DeleteStatem
public DeleteSemanticAnalyzer(QueryState queryState, RewriterFactory<DeleteStatement> rewriterFactory)
throws SemanticException {
super(queryState, rewriterFactory);
queryState.setSqlKind(SqlKind.DELETE);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import java.util.List;
import java.util.Map;

import org.apache.calcite.sql.SqlKind;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
Expand All @@ -36,6 +37,7 @@
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.ql.Context;
import org.apache.hadoop.hive.ql.Driver;
import org.apache.hadoop.hive.ql.QueryProperties;
import org.apache.hadoop.hive.ql.QueryState;
import org.apache.hadoop.hive.ql.exec.ExplainTask;
import org.apache.hadoop.hive.ql.exec.FetchTask;
Expand All @@ -61,6 +63,7 @@ public class ExplainSemanticAnalyzer extends BaseSemanticAnalyzer {
public ExplainSemanticAnalyzer(QueryState queryState) throws SemanticException {
super(queryState);
config = new ExplainConfiguration();
setSqlKind(SqlKind.EXPLAIN);
}

@Override
Expand Down Expand Up @@ -289,4 +292,8 @@ public void startAnalysis() {
queryState.createHMSCache();
}
}

public void setQueryType(ASTNode tree) {
queryProperties.setQueryType(QueryProperties.QueryType.DQL);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import org.antlr.runtime.tree.Tree;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.ErrorMsg;
import org.apache.hadoop.hive.ql.QueryProperties;
import org.apache.hadoop.hive.ql.QueryState;
import org.apache.hadoop.hive.ql.exec.Task;
import org.apache.hadoop.hive.ql.exec.TaskFactory;
Expand Down Expand Up @@ -132,4 +133,8 @@ static Task<ExportWork> analyzeExport(ASTNode ast, @Nullable String acidTableNam
public boolean hasTransactionalInQuery() {
return isMmExport; // Full ACID export goes through UpdateDelete analyzer.
}

public void setQueryType(ASTNode tree) {
queryProperties.setQueryType(QueryProperties.QueryType.DDL);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.ql.Context;
import org.apache.hadoop.hive.ql.ErrorMsg;
import org.apache.hadoop.hive.ql.QueryProperties;
import org.apache.hadoop.hive.ql.QueryState;
import org.apache.hadoop.hive.ql.exec.Task;
import org.apache.hadoop.hive.ql.exec.TaskFactory;
Expand Down Expand Up @@ -597,4 +598,9 @@ private void reparseAndSuperAnalyze(Table table, URI fromURI) throws SemanticExc
public Set<WriteEntity> getAllOutputs() {
return outputs;
}

@Override
public void setQueryType(ASTNode tree) {
queryProperties.setQueryType(QueryProperties.QueryType.DML);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
package org.apache.hadoop.hive.ql.parse;

import org.antlr.runtime.TokenRewriteStream;
import org.apache.calcite.sql.SqlKind;
import org.apache.commons.lang3.ObjectUtils;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
Expand All @@ -28,6 +29,7 @@
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.parse.rewrite.MergeStatement;
import org.apache.hadoop.hive.ql.parse.rewrite.RewriterFactory;
import org.apache.hadoop.hive.ql.plan.HiveOperation;

import java.util.ArrayList;
import java.util.HashMap;
Expand All @@ -53,6 +55,7 @@ public class MergeSemanticAnalyzer extends RewriteSemanticAnalyzer<MergeStatemen
MergeSemanticAnalyzer(QueryState queryState, RewriterFactory<MergeStatement> rewriterFactory)
throws SemanticException {
super(queryState, rewriterFactory);
queryState.setSqlKind(SqlKind.MERGE);
}

@Override
Expand Down
Loading

0 comments on commit cc2a495

Please sign in to comment.