From e21b949719ed39413b5dd172cb937b517fcb5d40 Mon Sep 17 00:00:00 2001 From: HappenLee Date: Sat, 12 Oct 2024 13:17:54 +0800 Subject: [PATCH 1/3] [Fix](bug) Is null predicate get error query result (#41702) cherry-pick from #41668 --- be/src/exec/olap_common.h | 28 +++++++++++-------- .../query_p0/scan_range/test_scan_range.out | 5 ++++ .../scan_range/test_scan_range.groovy | 5 ++++ 3 files changed, 27 insertions(+), 11 deletions(-) diff --git a/be/src/exec/olap_common.h b/be/src/exec/olap_common.h index d66cde2fad3689..8915c890f1e6d3 100644 --- a/be/src/exec/olap_common.h +++ b/be/src/exec/olap_common.h @@ -722,6 +722,18 @@ bool ColumnValueRange::convert_to_avg_range_value( std::vector& begin_scan_keys, std::vector& end_scan_keys, bool& begin_include, bool& end_include, int32_t max_scan_key_num) { if constexpr (!_is_reject_split_type) { + CppType min_value = get_range_min_value(); + CppType max_value = get_range_max_value(); + if constexpr (primitive_type == PrimitiveType::TYPE_DATE) { + min_value.set_type(TimeType::TIME_DATE); + max_value.set_type(TimeType::TIME_DATE); + } + auto empty_range_only_null = min_value > max_value; + if (empty_range_only_null) { + // Not contain null will be disposed in `convert_to_close_range`, return eos. + DCHECK(contain_null()); + } + auto no_split = [&]() -> bool { begin_scan_keys.emplace_back(); begin_scan_keys.back().add_value( @@ -729,18 +741,11 @@ bool ColumnValueRange::convert_to_avg_range_value( contain_null()); end_scan_keys.emplace_back(); end_scan_keys.back().add_value( - cast_to_string(get_range_max_value(), scale())); + cast_to_string(get_range_max_value(), scale()), + empty_range_only_null ? true : false); return true; }; - - CppType min_value = get_range_min_value(); - CppType max_value = get_range_max_value(); - if constexpr (primitive_type == PrimitiveType::TYPE_DATE) { - min_value.set_type(TimeType::TIME_DATE); - max_value.set_type(TimeType::TIME_DATE); - } - - if (min_value > max_value || max_scan_key_num == 1) { + if (empty_range_only_null || max_scan_key_num == 1) { return no_split(); } @@ -1125,7 +1130,8 @@ Status OlapScanKeys::extend_scan_key(ColumnValueRange& range, *eos |= range.convert_to_close_range(_begin_scan_keys, _end_scan_keys, _begin_include, _end_include); - if (range.convert_to_avg_range_value(_begin_scan_keys, _end_scan_keys, _begin_include, + if (!(*eos) && + range.convert_to_avg_range_value(_begin_scan_keys, _end_scan_keys, _begin_include, _end_include, max_scan_key_num)) { _has_range_value = true; } diff --git a/regression-test/data/query_p0/scan_range/test_scan_range.out b/regression-test/data/query_p0/scan_range/test_scan_range.out index e4df16ef06ca8e..9d42dd67dc901a 100644 --- a/regression-test/data/query_p0/scan_range/test_scan_range.out +++ b/regression-test/data/query_p0/scan_range/test_scan_range.out @@ -4,7 +4,12 @@ -- !sql_2 -- 1 +-2147483648 -- !sql_3 -- -- !sql_4 -- + +-- !sql_5 -- +\N + diff --git a/regression-test/suites/query_p0/scan_range/test_scan_range.groovy b/regression-test/suites/query_p0/scan_range/test_scan_range.groovy index e011a5095a59ec..c0ec6daeef0a3f 100644 --- a/regression-test/suites/query_p0/scan_range/test_scan_range.groovy +++ b/regression-test/suites/query_p0/scan_range/test_scan_range.groovy @@ -34,6 +34,8 @@ suite("test_scan_range", "query,p0") { """ sql "insert into ${tableName} values(1,1)" + sql "insert into ${tableName} values(-2147483648, -2147483648)" + sql "insert into ${tableName} values(null, null)" qt_sql_1 "select k1 from ${tableName} where k1 > -2147483648" @@ -42,4 +44,7 @@ suite("test_scan_range", "query,p0") { qt_sql_3 "select k1 from ${tableName} where k1 < -2147483648" qt_sql_4 "select k1 from ${tableName} where k1 > 2147483647" + + qt_sql_5 "select k1 from ${tableName} where k1 is null" + } From fe93764abacc7cc1d92debb1d92423712e82bfd9 Mon Sep 17 00:00:00 2001 From: morrySnow <101034200+morrySnow@users.noreply.github.com> Date: Sat, 12 Oct 2024 14:32:19 +0800 Subject: [PATCH 2/3] [fix](parser) should not use selectHint in any place (#41260) (#41728) pick from master #41260 --- .../src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 index 62c5661589ceee..54fe6d9cd5c18a 100644 --- a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 +++ b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 @@ -135,7 +135,7 @@ columnAliases ; selectClause - : SELECT selectHint? (DISTINCT|ALL)? selectColumnClause + : SELECT (DISTINCT|ALL)? selectColumnClause ; selectColumnClause From 9a9eaa77497367f005c9aff9682ae989ec1bcb71 Mon Sep 17 00:00:00 2001 From: zhannngchen <48427519+zhannngchen@users.noreply.github.com> Date: Sat, 12 Oct 2024 15:29:17 +0800 Subject: [PATCH 3/3] [enhancement](sequence col) add session variable to skip sequence column check while INSERT INTO (#41655) (#41732) cherry-pick #41655 --- .../doris/analysis/NativeInsertStmt.java | 3 +- .../nereids/rules/analysis/BindSink.java | 3 +- .../org/apache/doris/qe/SessionVariable.java | 17 ++++++++++ .../unique/test_unique_table_sequence.out | 8 ++++- .../unique/test_unique_table_sequence.groovy | 33 +++++++++++++++++++ 5 files changed, 61 insertions(+), 3 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/NativeInsertStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/NativeInsertStmt.java index 8b5f6f9aa3b656..b7aa7b73968e8f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/NativeInsertStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/NativeInsertStmt.java @@ -450,7 +450,8 @@ private void analyzeTargetTable(Analyzer analyzer) throws AnalysisException { } if (!haveInputSeqCol && !isPartialUpdate && !isFromDeleteOrUpdateStmt - && !analyzer.getContext().getSessionVariable().isEnableUniqueKeyPartialUpdate()) { + && !analyzer.getContext().getSessionVariable().isEnableUniqueKeyPartialUpdate() + && analyzer.getContext().getSessionVariable().isRequireSequenceInInsert()) { if (!seqColInTable.isPresent() || seqColInTable.get().getDefaultValue() == null || !seqColInTable.get().getDefaultValue().equals(DefaultValue.CURRENT_TIMESTAMP)) { throw new AnalysisException("Table " + olapTable.getName() diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindSink.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindSink.java index d5c4a50ab7527d..995b0a11969d3e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindSink.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindSink.java @@ -146,7 +146,8 @@ public List buildRules() { } } - if (!haveInputSeqCol && !isPartialUpdate) { + if (!haveInputSeqCol && !isPartialUpdate && (!sink.isFromNativeInsertStmt() + || ConnectContext.get().getSessionVariable().isRequireSequenceInInsert())) { if (!seqColInTable.isPresent() || seqColInTable.get().getDefaultValue() == null || !seqColInTable.get().getDefaultValue() .equals(DefaultValue.CURRENT_TIMESTAMP)) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index 8a5c3a85055b88..129bf172e3e7c8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -481,6 +481,8 @@ public class SessionVariable implements Serializable, Writable { public static final String ENABLE_MATCH_WITHOUT_INVERTED_INDEX = "enable_match_without_inverted_index"; public static final String ENABLE_FALLBACK_ON_MISSING_INVERTED_INDEX = "enable_fallback_on_missing_inverted_index"; + public static final String REQUIRE_SEQUENCE_IN_INSERT = "require_sequence_in_insert"; + /** * If set false, user couldn't submit analyze SQL and FE won't allocate any related resources. */ @@ -1501,6 +1503,13 @@ public void setEnableEsShardScroll(boolean enableESShardScroll) { this.enableESShardScroll = enableESShardScroll; } + @VariableMgr.VarAttr(name = REQUIRE_SEQUENCE_IN_INSERT, needForward = true, description = { + "该变量用于控制,使用了sequence列的unique key表,insert into操作是否要求必须提供每一行的sequence列的值", + "This variable controls whether the INSERT INTO operation on unique key tables with a sequence" + + " column requires a sequence column to be provided for each row" + }) + public boolean requireSequenceInInsert = true; + public boolean isEnableESShardScroll() { return enableESShardScroll; } @@ -2593,6 +2602,14 @@ public void setEnableUniqueKeyPartialUpdate(boolean enableUniqueKeyPartialUpdate this.enableUniqueKeyPartialUpdate = enableUniqueKeyPartialUpdate; } + public void setRequireSequenceInInsert(boolean value) { + this.requireSequenceInInsert = value; + } + + public boolean isRequireSequenceInInsert() { + return this.requireSequenceInInsert; + } + /** * Serialize to thrift object. * Used for rest api. diff --git a/regression-test/data/data_model_p0/unique/test_unique_table_sequence.out b/regression-test/data/data_model_p0/unique/test_unique_table_sequence.out index 4f1d633250d85b..d6a6879b117abf 100644 --- a/regression-test/data/data_model_p0/unique/test_unique_table_sequence.out +++ b/regression-test/data/data_model_p0/unique/test_unique_table_sequence.out @@ -41,10 +41,16 @@ -- !all -- 1 10 15 16 17 0 4 15 -15 8 19 20 21 0 7 3 +15 8 19 20 21 0 9 3 2 5 14 13 14 0 5 12 3 6 11 14 15 0 6 13 +-- !all_clone_table -- +1 10 15 16 17 0 2 \N +15 8 19 20 21 0 2 \N +2 5 14 13 14 0 2 \N +3 6 11 14 15 0 2 \N + -- !1 -- 1 1 1 1 1 0 2 1 2 2 2 2 2 0 2 2 diff --git a/regression-test/suites/data_model_p0/unique/test_unique_table_sequence.groovy b/regression-test/suites/data_model_p0/unique/test_unique_table_sequence.groovy index c5898480f0dff4..c04b50bb55c585 100644 --- a/regression-test/suites/data_model_p0/unique/test_unique_table_sequence.groovy +++ b/regression-test/suites/data_model_p0/unique/test_unique_table_sequence.groovy @@ -119,6 +119,15 @@ suite("test_unique_table_sequence") { exception "Table ${tableName} has sequence column, need to specify the sequence column" } + // with `require_sequence_in_insert=false`, previous insert operation should success + sql "SET require_sequence_in_insert=false" + + sql "INSERT INTO ${tableName} values(15, 8, 19, 20, 21)" + + sql "INSERT INTO ${tableName} (k1, v1, v2, v3, v4) values(15, 8, 19, 20, 21)" + + sql "SET require_sequence_in_insert=true" + // correct way of insert into with seq col sql "INSERT INTO ${tableName} (k1, v1, v2, v3, v4, __DORIS_SEQUENCE_COL__) values(15, 8, 19, 20, 21, 3)" @@ -134,7 +143,31 @@ suite("test_unique_table_sequence") { order_qt_all "SELECT * from ${tableName}" + sql "SET show_hidden_columns=false" + + def tableNameClone = tableName + "_clone" + sql "DROP TABLE IF EXISTS ${tableNameClone}" + sql "create table ${tableNameClone} like ${tableName}" + + // test insert into select * + test { + sql "INSERT INTO ${tableNameClone} select * from ${tableName}" + exception "Table ${tableNameClone} has sequence column, need to specify the sequence column" + } + + // with `require_sequence_in_insert=true`, previous insert operation should success + sql "SET require_sequence_in_insert=false" + + sql "INSERT INTO ${tableNameClone} select * from ${tableName}" + + sql "SET require_sequence_in_insert=true" + + sql "SET show_hidden_columns=true" + + order_qt_all_clone_table "SELECT * from ${tableNameClone}" + sql "DROP TABLE ${tableName}" + sql "DROP TABLE ${tableNameClone}" sql "DROP TABLE IF EXISTS ${tableName}" sql """