Skip to content

Commit

Permalink
[fix](expr) Fix BE core dump while common expr filter delete conditio…
Browse files Browse the repository at this point in the history
…n column (#29107) (#29208)

pred column also needs to be filtered by expr, exclude delete condition column, delete condition column not need to be filtered, query engine does not need it, after _output_column_by_sel_idx, delete condition materialize column will be erase at the end of the block.
Eg:
delete from table where a = 10;
select b from table;
a column only effective in segment iterator, the block from query engine only contain the b column, so no need to filter a column by expr.
  • Loading branch information
xinyiZzz authored Dec 28, 2023
1 parent 9f5fd3c commit 1b73bda
Show file tree
Hide file tree
Showing 3 changed files with 108 additions and 2 deletions.
34 changes: 32 additions & 2 deletions be/src/olap/rowset/segment_v2/segment_iterator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1454,6 +1454,8 @@ Status SegmentIterator::_seek_columns(const std::vector<ColumnId>& column_ids, r
// todo(wb) need a UT here
Status SegmentIterator::_vec_init_lazy_materialization() {
_is_pred_column.resize(_schema->columns().size(), false);
std::vector<bool> is_pred_column_no_del_condition;
is_pred_column_no_del_condition.resize(_schema->columns().size(), false);

// including short/vec/delete pred
std::set<ColumnId> pred_column_ids;
Expand Down Expand Up @@ -1495,6 +1497,7 @@ Status SegmentIterator::_vec_init_lazy_materialization() {
for (auto predicate : _col_predicates) {
auto cid = predicate->column_id();
_is_pred_column[cid] = true;
is_pred_column_no_del_condition[cid] = true;
pred_column_ids.insert(cid);

// check pred using short eval or vec eval
Expand Down Expand Up @@ -1548,8 +1551,16 @@ Status SegmentIterator::_vec_init_lazy_materialization() {
if (!_common_expr_columns.empty()) {
_is_need_expr_eval = true;
for (auto cid : _schema->column_ids()) {
// pred column also needs to be filtered by expr
if (_is_common_expr_column[cid] || _is_pred_column[cid]) {
// pred column also needs to be filtered by expr, exclude delete condition column,
// Delete condition column not need to be filtered, query engine does not need it,
// after _output_column_by_sel_idx, delete condition materialize column will be erase
// at the end of the block.
// Eg:
// `delete from table where a = 10;`
// `select b from table;`
// a column only effective in segment iterator, the block from query engine only contain the b column,
// so no need to filter a column by expr.
if (_is_common_expr_column[cid] || is_pred_column_no_del_condition[cid]) {
auto loc = _schema_block_id_map[cid];
_columns_to_filter.push_back(loc);
}
Expand Down Expand Up @@ -2040,6 +2051,25 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) {
_replace_version_col(_current_batch_rows_read);
}

// If col >= block->columns(), it means col should not be filtered, there is a BUG.
// such as delete condition column was incorrectly put into columns_to_filter,
// which is usually at the end of the block. only check during the first next_batch.
if (_opts.stats->blocks_load == 0) {
for (const auto& col : _columns_to_filter) {
if (col >= block->columns()) {
std::ostringstream ss;
for (const auto& i : _columns_to_filter) {
ss << i << "-";
}
throw Exception(
ErrorCode::INTERNAL_ERROR,
"filter block column id(index) greater than block->columns(), "
"column id={}, all columns that need filter={}, block columns num={}",
col, ss.str().substr(0, ss.str().length() - 1), block->columns());
}
}
}

_opts.stats->blocks_load += 1;
_opts.stats->raw_rows_read += _current_batch_rows_read;

Expand Down
18 changes: 18 additions & 0 deletions regression-test/data/correctness/test_pushdown_common_expr.out
Original file line number Diff line number Diff line change
Expand Up @@ -71,3 +71,21 @@

-- !9 --

-- !1 --
0

-- !2 --
0

-- !3 --
0

-- !4 --
0

-- !5 --
0

-- !6 --
a

Original file line number Diff line number Diff line change
Expand Up @@ -150,4 +150,62 @@ suite("test_pushdown_common_expr") {
WHERE random() = 0
) AS t2 ON t1.c1 = t2.c1
"""

sql """ DROP TABLE IF EXISTS t_pushdown_common_expr_for_del """
sql """
CREATE TABLE `t_pushdown_common_expr_for_del` (
`c1` int(11) NULL,
`c2` varchar(100) NULL COMMENT "",
`c3` varchar(100) NULL COMMENT "",
`c4` varchar(100) NULL COMMENT "",
`c5` varchar(100) NULL COMMENT "",
`c6` varchar(100) NULL COMMENT "",
`c7` varchar(100) NULL COMMENT ""
) ENGINE=OLAP
DUPLICATE KEY(`c1`)
COMMENT 'OLAP'
DISTRIBUTED BY HASH(`c1`) BUCKETS 1
PROPERTIES (
"replication_allocation" = "tag.location.default: 1",
"disable_auto_compaction" = "true"
);
"""

sql """
INSERT INTO t_pushdown_common_expr_for_del VALUES
(1,'a','aa','aaa','aaaa','aaaaa','aaaaaa'),
(2,'b','bb','bbb','bbbb','bbbbb','bbbbbb'),
(3,'c','cc','ccc','cccc','ccccc','cccccc'),
(4,'d','dd','ddd','dddd','ddddd','dddddd'),
(5,'e','ee','eee','eeee','eeeee','eeeeee'),
(6,'f','ff','fff','ffff','fffff','ffffff'),
(7,'g','gg','ggg','gggg','ggggg','gggggg');
"""

sql """set enable_common_expr_pushdown=true"""

// delete condition columns num > block columns num
order_qt_1 """
delete from t_pushdown_common_expr_for_del where c3 = "bb";
"""

order_qt_2 """
delete from t_pushdown_common_expr_for_del where c4 = "ccc";
"""

order_qt_3 """
delete from t_pushdown_common_expr_for_del where c5 = "dddd";
"""

order_qt_4 """
delete from t_pushdown_common_expr_for_del where c6 = "eeeee";
"""

order_qt_5 """
delete from t_pushdown_common_expr_for_del where c7 = "ffffff";
"""

order_qt_6 """
select c2 from t_pushdown_common_expr_for_del where upper(c2) = "A";
"""
}

0 comments on commit 1b73bda

Please sign in to comment.