From 1b73bda32dfa83d58dc3e5cf3bb3cb5dbada05f0 Mon Sep 17 00:00:00 2001 From: Xinyi Zou Date: Thu, 28 Dec 2023 15:00:18 +0800 Subject: [PATCH] [fix](expr) Fix BE core dump while common expr filter delete condition column (#29107) (#29208) pred column also needs to be filtered by expr, exclude delete condition column, delete condition column not need to be filtered, query engine does not need it, after _output_column_by_sel_idx, delete condition materialize column will be erase at the end of the block. Eg: delete from table where a = 10; select b from table; a column only effective in segment iterator, the block from query engine only contain the b column, so no need to filter a column by expr. --- .../rowset/segment_v2/segment_iterator.cpp | 34 ++++++++++- .../correctness/test_pushdown_common_expr.out | 18 ++++++ .../test_pushdown_common_expr.groovy | 58 +++++++++++++++++++ 3 files changed, 108 insertions(+), 2 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index 44ff03c9671e88..d41b18609a4a11 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -1454,6 +1454,8 @@ Status SegmentIterator::_seek_columns(const std::vector& column_ids, r // todo(wb) need a UT here Status SegmentIterator::_vec_init_lazy_materialization() { _is_pred_column.resize(_schema->columns().size(), false); + std::vector is_pred_column_no_del_condition; + is_pred_column_no_del_condition.resize(_schema->columns().size(), false); // including short/vec/delete pred std::set pred_column_ids; @@ -1495,6 +1497,7 @@ Status SegmentIterator::_vec_init_lazy_materialization() { for (auto predicate : _col_predicates) { auto cid = predicate->column_id(); _is_pred_column[cid] = true; + is_pred_column_no_del_condition[cid] = true; pred_column_ids.insert(cid); // check pred using short eval or vec eval @@ -1548,8 +1551,16 @@ Status SegmentIterator::_vec_init_lazy_materialization() { if (!_common_expr_columns.empty()) { _is_need_expr_eval = true; for (auto cid : _schema->column_ids()) { - // pred column also needs to be filtered by expr - if (_is_common_expr_column[cid] || _is_pred_column[cid]) { + // pred column also needs to be filtered by expr, exclude delete condition column, + // Delete condition column not need to be filtered, query engine does not need it, + // after _output_column_by_sel_idx, delete condition materialize column will be erase + // at the end of the block. + // Eg: + // `delete from table where a = 10;` + // `select b from table;` + // a column only effective in segment iterator, the block from query engine only contain the b column, + // so no need to filter a column by expr. + if (_is_common_expr_column[cid] || is_pred_column_no_del_condition[cid]) { auto loc = _schema_block_id_map[cid]; _columns_to_filter.push_back(loc); } @@ -2040,6 +2051,25 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) { _replace_version_col(_current_batch_rows_read); } + // If col >= block->columns(), it means col should not be filtered, there is a BUG. + // such as delete condition column was incorrectly put into columns_to_filter, + // which is usually at the end of the block. only check during the first next_batch. + if (_opts.stats->blocks_load == 0) { + for (const auto& col : _columns_to_filter) { + if (col >= block->columns()) { + std::ostringstream ss; + for (const auto& i : _columns_to_filter) { + ss << i << "-"; + } + throw Exception( + ErrorCode::INTERNAL_ERROR, + "filter block column id(index) greater than block->columns(), " + "column id={}, all columns that need filter={}, block columns num={}", + col, ss.str().substr(0, ss.str().length() - 1), block->columns()); + } + } + } + _opts.stats->blocks_load += 1; _opts.stats->raw_rows_read += _current_batch_rows_read; diff --git a/regression-test/data/correctness/test_pushdown_common_expr.out b/regression-test/data/correctness/test_pushdown_common_expr.out index 8da380e841b23a..0055c93b7d83de 100644 --- a/regression-test/data/correctness/test_pushdown_common_expr.out +++ b/regression-test/data/correctness/test_pushdown_common_expr.out @@ -71,3 +71,21 @@ -- !9 -- +-- !1 -- +0 + +-- !2 -- +0 + +-- !3 -- +0 + +-- !4 -- +0 + +-- !5 -- +0 + +-- !6 -- +a + diff --git a/regression-test/suites/correctness/test_pushdown_common_expr.groovy b/regression-test/suites/correctness/test_pushdown_common_expr.groovy index 2395f057b1df4d..aa0e68e3e4c5f5 100644 --- a/regression-test/suites/correctness/test_pushdown_common_expr.groovy +++ b/regression-test/suites/correctness/test_pushdown_common_expr.groovy @@ -150,4 +150,62 @@ suite("test_pushdown_common_expr") { WHERE random() = 0 ) AS t2 ON t1.c1 = t2.c1 """ + + sql """ DROP TABLE IF EXISTS t_pushdown_common_expr_for_del """ + sql """ + CREATE TABLE `t_pushdown_common_expr_for_del` ( + `c1` int(11) NULL, + `c2` varchar(100) NULL COMMENT "", + `c3` varchar(100) NULL COMMENT "", + `c4` varchar(100) NULL COMMENT "", + `c5` varchar(100) NULL COMMENT "", + `c6` varchar(100) NULL COMMENT "", + `c7` varchar(100) NULL COMMENT "" + ) ENGINE=OLAP + DUPLICATE KEY(`c1`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`c1`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "disable_auto_compaction" = "true" + ); + """ + + sql """ + INSERT INTO t_pushdown_common_expr_for_del VALUES + (1,'a','aa','aaa','aaaa','aaaaa','aaaaaa'), + (2,'b','bb','bbb','bbbb','bbbbb','bbbbbb'), + (3,'c','cc','ccc','cccc','ccccc','cccccc'), + (4,'d','dd','ddd','dddd','ddddd','dddddd'), + (5,'e','ee','eee','eeee','eeeee','eeeeee'), + (6,'f','ff','fff','ffff','fffff','ffffff'), + (7,'g','gg','ggg','gggg','ggggg','gggggg'); + """ + + sql """set enable_common_expr_pushdown=true""" + + // delete condition columns num > block columns num + order_qt_1 """ + delete from t_pushdown_common_expr_for_del where c3 = "bb"; + """ + + order_qt_2 """ + delete from t_pushdown_common_expr_for_del where c4 = "ccc"; + """ + + order_qt_3 """ + delete from t_pushdown_common_expr_for_del where c5 = "dddd"; + """ + + order_qt_4 """ + delete from t_pushdown_common_expr_for_del where c6 = "eeeee"; + """ + + order_qt_5 """ + delete from t_pushdown_common_expr_for_del where c7 = "ffffff"; + """ + + order_qt_6 """ + select c2 from t_pushdown_common_expr_for_del where upper(c2) = "A"; + """ } \ No newline at end of file