diff --git a/be/src/vec/aggregate_functions/aggregate_function_null.h b/be/src/vec/aggregate_functions/aggregate_function_null.h index becb06f7cfca64d..939396073825c49 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_null.h +++ b/be/src/vec/aggregate_functions/aggregate_function_null.h @@ -210,17 +210,29 @@ class AggregateFunctionNullUnaryInline final } } - void add_batch(size_t batch_size, AggregateDataPtr* places, size_t place_offset, + void add_batch(size_t batch_size, AggregateDataPtr* __restrict places, size_t place_offset, const IColumn** columns, Arena* arena, bool agg_many) const override { - const ColumnNullable* column = assert_cast(columns[0]); - // The overhead introduced is negligible here, just an extra memory read from NullMap - const auto* __restrict null_map_data = column->get_null_map_data().data(); + const auto* column = assert_cast(columns[0]); const IColumn* nested_column = &column->get_nested_column(); - for (int i = 0; i < batch_size; ++i) { - if (!null_map_data[i]) { - AggregateDataPtr __restrict place = places[i] + place_offset; - this->set_flag(place); - this->nested_function->add(this->nested_place(place), &nested_column, i, arena); + if (column->has_null()) { + const auto* __restrict null_map_data = column->get_null_map_data().data(); + for (int i = 0; i < batch_size; ++i) { + if (!null_map_data[i]) { + AggregateDataPtr __restrict place = places[i] + place_offset; + this->set_flag(place); + this->nested_function->add(this->nested_place(place), &nested_column, i, arena); + } + } + } else { + if constexpr (result_is_nullable) { + for (int i = 0; i < batch_size; ++i) { + AggregateDataPtr __restrict place = places[i] + place_offset; + place[0] |= 1; + this->nested_function->add(this->nested_place(place), &nested_column, i, arena); + } + } else { + this->nested_function->add_batch(batch_size, places, place_offset, &nested_column, + arena, agg_many); } } } diff --git a/be/src/vec/core/block.cpp b/be/src/vec/core/block.cpp index d3d7523c0f315a6..950c7ad7d47a3d9 100644 --- a/be/src/vec/core/block.cpp +++ b/be/src/vec/core/block.cpp @@ -183,25 +183,17 @@ void Block::clear_names() { } void Block::insert(const ColumnWithTypeAndName& elem) { - index_by_name.emplace(elem.name, data.size()); + if (!elem.name.empty()) { + index_by_name.emplace(elem.name, data.size()); + } data.emplace_back(elem); } void Block::insert(ColumnWithTypeAndName&& elem) { - index_by_name.emplace(elem.name, data.size()); - data.emplace_back(std::move(elem)); -} - -void Block::insert_unique(const ColumnWithTypeAndName& elem) { - if (index_by_name.end() == index_by_name.find(elem.name)) { - insert(elem); - } -} - -void Block::insert_unique(ColumnWithTypeAndName&& elem) { - if (index_by_name.end() == index_by_name.find(elem.name)) { - insert(std::move(elem)); + if (!elem.name.empty()) { + index_by_name.emplace(elem.name, data.size()); } + data.emplace_back(std::move(elem)); } void Block::erase(const std::set& positions) { diff --git a/be/src/vec/core/block.h b/be/src/vec/core/block.h index 8433ebf074cbb7d..8a2b09c3280180f 100644 --- a/be/src/vec/core/block.h +++ b/be/src/vec/core/block.h @@ -105,9 +105,6 @@ class Block { /// insert the column to the end void insert(const ColumnWithTypeAndName& elem); void insert(ColumnWithTypeAndName&& elem); - /// insert the column to the end, if there is no column with that name yet - void insert_unique(const ColumnWithTypeAndName& elem); - void insert_unique(ColumnWithTypeAndName&& elem); /// remove the column at the specified position void erase(size_t position); /// remove the column at the [start, end) diff --git a/be/src/vec/core/column_with_type_and_name.cpp b/be/src/vec/core/column_with_type_and_name.cpp index 9ac2bbe6e4476be..1dea758bcef0500 100644 --- a/be/src/vec/core/column_with_type_and_name.cpp +++ b/be/src/vec/core/column_with_type_and_name.cpp @@ -30,6 +30,7 @@ #include "vec/columns/column.h" #include "vec/core/types.h" #include "vec/data_types/data_type.h" +#include "vec/data_types/data_type_nullable.h" namespace doris::vectorized { @@ -87,4 +88,25 @@ void ColumnWithTypeAndName::to_pb_column_meta(PColumnMeta* col_meta) const { type->to_pb_column_meta(col_meta); } +ColumnWithTypeAndName ColumnWithTypeAndName::get_nested(bool replace_null_data_to_default) const { + if (!type->is_nullable()) { + return *this; + } + + const auto* source_column = assert_cast(column.get()); + auto nested_column = source_column->get_nested_column_ptr(); + auto nested_type = assert_cast(type.get())->get_nested_type(); + + if (replace_null_data_to_default) { + const auto& null_map = source_column->get_null_map_data(); + // only need to mutate nested column, avoid to copy nullmap + auto mutable_nested_col = (*std::move(nested_column)).mutate(); + mutable_nested_col->replace_column_null_data(null_map.data()); + + return {std::move(mutable_nested_col), nested_type, ""}; + } + + return {nested_column, nested_type, ""}; +} + } // namespace doris::vectorized diff --git a/be/src/vec/core/column_with_type_and_name.h b/be/src/vec/core/column_with_type_and_name.h index caf68f46260db1a..53ca6f20b2dd6db 100644 --- a/be/src/vec/core/column_with_type_and_name.h +++ b/be/src/vec/core/column_with_type_and_name.h @@ -25,6 +25,7 @@ #include #include #include +#include #include "vec/core/types.h" #include "vec/data_types/data_type.h" @@ -47,13 +48,13 @@ struct ColumnWithTypeAndName { DataTypePtr type; String name; - ColumnWithTypeAndName() {} - ColumnWithTypeAndName(const ColumnPtr& column_, const DataTypePtr& type_, const String& name_) - : column(column_), type(type_), name(name_) {} + ColumnWithTypeAndName() = default; + ColumnWithTypeAndName(ColumnPtr column_, DataTypePtr type_, String name_) + : column(std::move(column_)), type(std::move(type_)), name(std::move(name_)) {} /// Uses type->create_column() to create column - ColumnWithTypeAndName(const DataTypePtr& type_, const String& name_) - : column(type_->create_column()), type(type_), name(name_) {} + ColumnWithTypeAndName(const DataTypePtr& type_, String name_) + : column(type_->create_column()), type(type_), name(std::move(name_)) {} ColumnWithTypeAndName clone_empty() const; bool operator==(const ColumnWithTypeAndName& other) const; @@ -63,6 +64,8 @@ struct ColumnWithTypeAndName { std::string to_string(size_t row_num) const; void to_pb_column_meta(PColumnMeta* col_meta) const; + + ColumnWithTypeAndName get_nested(bool replace_null_data_to_default = false) const; }; } // namespace doris::vectorized diff --git a/be/src/vec/functions/function.cpp b/be/src/vec/functions/function.cpp index 6e7f6572ab86672..7fec5614c0cc4de 100644 --- a/be/src/vec/functions/function.cpp +++ b/be/src/vec/functions/function.cpp @@ -48,7 +48,7 @@ ColumnPtr wrap_in_nullable(const ColumnPtr& src, const Block& block, const Colum ColumnPtr src_not_nullable = src; MutableColumnPtr mutable_result_null_map_column; - if (auto* nullable = check_and_get_column(*src)) { + if (const auto* nullable = check_and_get_column(*src)) { src_not_nullable = nullable->get_nested_column_ptr(); result_null_map_column = nullable->get_null_map_column_ptr(); } @@ -69,14 +69,14 @@ ColumnPtr wrap_in_nullable(const ColumnPtr& src, const Block& block, const Colum continue; } - if (auto* nullable = assert_cast(elem.column.get())) { + if (const auto* nullable = assert_cast(elem.column.get())) { const ColumnPtr& null_map_column = nullable->get_null_map_column_ptr(); if (!result_null_map_column) { - result_null_map_column = null_map_column->clone_resized(input_rows_count); - } else { + result_null_map_column = null_map_column; + } else if (nullable->has_null()) { if (!mutable_result_null_map_column) { mutable_result_null_map_column = - std::move(result_null_map_column)->assume_mutable(); + null_map_column->clone_resized(input_rows_count); } NullMap& result_null_map = @@ -234,19 +234,20 @@ Status PreparedFunctionImpl::default_implementation_for_nulls( } if (null_presence.has_nullable) { - bool check_overflow_for_decimal = false; + bool need_to_default = need_replace_null_data_to_default(); if (context) { - check_overflow_for_decimal = context->check_overflow_for_decimal(); + need_to_default &= context->check_overflow_for_decimal(); + } + ColumnNumbers new_args; + for (auto arg : args) { + new_args.push_back(block.columns()); + block.insert(block.get_by_position(arg).get_nested(need_to_default)); } - auto [temporary_block, new_args, new_result] = create_block_with_nested_columns( - block, args, result, - check_overflow_for_decimal && need_replace_null_data_to_default()); - RETURN_IF_ERROR(execute_without_low_cardinality_columns( - context, temporary_block, new_args, new_result, temporary_block.rows(), dry_run)); - block.get_by_position(result).column = - wrap_in_nullable(temporary_block.get_by_position(new_result).column, block, args, - result, input_rows_count); + RETURN_IF_ERROR(execute_without_low_cardinality_columns(context, block, new_args, result, + block.rows(), dry_run)); + block.get_by_position(result).column = wrap_in_nullable( + block.get_by_position(result).column, block, args, result, input_rows_count); *executed = true; return Status::OK(); } diff --git a/be/src/vec/functions/function_cast.h b/be/src/vec/functions/function_cast.h index 2e1e48db14d3d34..c467aadb4dc3ff9 100644 --- a/be/src/vec/functions/function_cast.h +++ b/be/src/vec/functions/function_cast.h @@ -94,6 +94,7 @@ #include "vec/functions/function_helpers.h" #include "vec/io/reader_buffer.h" #include "vec/runtime/vdatetime_value.h" +#include "vec/utils/util.hpp" class DateLUTImpl; @@ -2290,62 +2291,39 @@ class FunctionCast final : public IFunctionBase { const auto& nullable_type = static_cast(*ret_type); const auto& nested_type = nullable_type.get_nested_type(); - Block tmp_block; - size_t tmp_res_index = 0; + block.insert({nullptr, nested_type, ""}); if (source_is_nullable) { - auto [t_block, tmp_args] = - create_block_with_nested_columns(block, arguments, true); - tmp_block = std::move(t_block); - tmp_res_index = tmp_block.columns(); - tmp_block.insert({nullptr, nested_type, ""}); - + auto source = block.get_by_position(arguments[0]); + const auto* source_column = + assert_cast(source.column.get()); + block.insert(source.get_nested()); /// Perform the requested conversion. - RETURN_IF_ERROR( - wrapper(context, tmp_block, {0}, tmp_res_index, input_rows_count)); - } else { - tmp_block = block; - - tmp_res_index = block.columns(); - tmp_block.insert({nullptr, nested_type, ""}); - - /// Perform the requested conversion. - RETURN_IF_ERROR(wrapper(context, tmp_block, arguments, tmp_res_index, + RETURN_IF_ERROR(wrapper(context, block, {block.columns() - 1}, result, input_rows_count)); - } - - // Note: here we should return the nullable result column - const auto& tmp_res = tmp_block.get_by_position(tmp_res_index); - res.column = wrap_in_nullable(tmp_res.column, - Block({block.get_by_position(arguments[0]), tmp_res}), - {0}, 1, input_rows_count); - - return Status::OK(); - }; - } else if (source_is_nullable) { - /// Conversion from Nullable to non-Nullable. - - return [wrapper, skip_not_null_check](FunctionContext* context, Block& block, - const ColumnNumbers& arguments, - const size_t result, size_t input_rows_count) { - auto [tmp_block, tmp_args, tmp_res] = - create_block_with_nested_columns(block, arguments, result); - - /// Check that all values are not-NULL. - /// Check can be skipped in case if LowCardinality dictionary is transformed. - /// In that case, correctness will be checked beforehand. - if (!skip_not_null_check) { - const auto& col = block.get_by_position(arguments[0]).column; - const auto& nullable_col = assert_cast(*col); - const auto& null_map = nullable_col.get_null_map_data(); - - if (!memory_is_zero(null_map.data(), null_map.size())) { - return Status::RuntimeError( - "Cannot convert NULL value to non-Nullable type"); + auto result_column = block.get_by_position(result).column; + if (result_column->is_nullable() && source_column->has_null()) { + NullMap& result_null_map = + assert_cast( + assert_cast(result_column.get()) + ->get_null_map_column_ptr() + ->assume_mutable() + .get()) + ->get_data(); + + const NullMap& source_null_map = + assert_cast( + source_column->get_null_map_column_ptr().get()) + ->get_data(); + + VectorizedUtils::update_null_map(result_null_map, source_null_map); + } else { + block.get_by_position(result).column = ColumnNullable::create( + result_column, source_column->get_nested_column_ptr()); } + } else { + /// Perform the requested conversion. + RETURN_IF_ERROR(wrapper(context, block, arguments, result, input_rows_count)); } - - RETURN_IF_ERROR(wrapper(context, tmp_block, tmp_args, tmp_res, input_rows_count)); - block.get_by_position(result).column = tmp_block.get_by_position(tmp_res).column; return Status::OK(); }; } else {