Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
BiteTheDDDDt committed Dec 22, 2023
1 parent 073ca8e commit 48bdcdf
Show file tree
Hide file tree
Showing 7 changed files with 102 additions and 97 deletions.
30 changes: 21 additions & 9 deletions be/src/vec/aggregate_functions/aggregate_function_null.h
Original file line number Diff line number Diff line change
Expand Up @@ -210,17 +210,29 @@ class AggregateFunctionNullUnaryInline final
}
}

void add_batch(size_t batch_size, AggregateDataPtr* places, size_t place_offset,
void add_batch(size_t batch_size, AggregateDataPtr* __restrict places, size_t place_offset,
const IColumn** columns, Arena* arena, bool agg_many) const override {
const ColumnNullable* column = assert_cast<const ColumnNullable*>(columns[0]);
// The overhead introduced is negligible here, just an extra memory read from NullMap
const auto* __restrict null_map_data = column->get_null_map_data().data();
const auto* column = assert_cast<const ColumnNullable*>(columns[0]);
const IColumn* nested_column = &column->get_nested_column();
for (int i = 0; i < batch_size; ++i) {
if (!null_map_data[i]) {
AggregateDataPtr __restrict place = places[i] + place_offset;
this->set_flag(place);
this->nested_function->add(this->nested_place(place), &nested_column, i, arena);
if (column->has_null()) {
const auto* __restrict null_map_data = column->get_null_map_data().data();
for (int i = 0; i < batch_size; ++i) {
if (!null_map_data[i]) {
AggregateDataPtr __restrict place = places[i] + place_offset;
this->set_flag(place);
this->nested_function->add(this->nested_place(place), &nested_column, i, arena);
}
}
} else {
if constexpr (result_is_nullable) {
for (int i = 0; i < batch_size; ++i) {
AggregateDataPtr __restrict place = places[i] + place_offset;
place[0] |= 1;
this->nested_function->add(this->nested_place(place), &nested_column, i, arena);
}
} else {
this->nested_function->add_batch(batch_size, places, place_offset, &nested_column,
arena, agg_many);
}
}
}
Expand Down
20 changes: 6 additions & 14 deletions be/src/vec/core/block.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -183,25 +183,17 @@ void Block::clear_names() {
}

void Block::insert(const ColumnWithTypeAndName& elem) {
index_by_name.emplace(elem.name, data.size());
if (!elem.name.empty()) {
index_by_name.emplace(elem.name, data.size());
}
data.emplace_back(elem);
}

void Block::insert(ColumnWithTypeAndName&& elem) {
index_by_name.emplace(elem.name, data.size());
data.emplace_back(std::move(elem));
}

void Block::insert_unique(const ColumnWithTypeAndName& elem) {
if (index_by_name.end() == index_by_name.find(elem.name)) {
insert(elem);
}
}

void Block::insert_unique(ColumnWithTypeAndName&& elem) {
if (index_by_name.end() == index_by_name.find(elem.name)) {
insert(std::move(elem));
if (!elem.name.empty()) {
index_by_name.emplace(elem.name, data.size());
}
data.emplace_back(std::move(elem));
}

void Block::erase(const std::set<size_t>& positions) {
Expand Down
3 changes: 0 additions & 3 deletions be/src/vec/core/block.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,9 +105,6 @@ class Block {
/// insert the column to the end
void insert(const ColumnWithTypeAndName& elem);
void insert(ColumnWithTypeAndName&& elem);
/// insert the column to the end, if there is no column with that name yet
void insert_unique(const ColumnWithTypeAndName& elem);
void insert_unique(ColumnWithTypeAndName&& elem);
/// remove the column at the specified position
void erase(size_t position);
/// remove the column at the [start, end)
Expand Down
22 changes: 22 additions & 0 deletions be/src/vec/core/column_with_type_and_name.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include "vec/columns/column.h"
#include "vec/core/types.h"
#include "vec/data_types/data_type.h"
#include "vec/data_types/data_type_nullable.h"

namespace doris::vectorized {

Expand Down Expand Up @@ -87,4 +88,25 @@ void ColumnWithTypeAndName::to_pb_column_meta(PColumnMeta* col_meta) const {
type->to_pb_column_meta(col_meta);
}

ColumnWithTypeAndName ColumnWithTypeAndName::get_nested(bool replace_null_data_to_default) const {
if (!type->is_nullable()) {
return *this;
}

const auto* source_column = assert_cast<const ColumnNullable*>(column.get());
auto nested_column = source_column->get_nested_column_ptr();
auto nested_type = assert_cast<const DataTypeNullable*>(type.get())->get_nested_type();

if (replace_null_data_to_default) {
const auto& null_map = source_column->get_null_map_data();
// only need to mutate nested column, avoid to copy nullmap
auto mutable_nested_col = (*std::move(nested_column)).mutate();
mutable_nested_col->replace_column_null_data(null_map.data());

return {std::move(mutable_nested_col), nested_type, ""};
}

return {nested_column, nested_type, ""};
}

} // namespace doris::vectorized
13 changes: 8 additions & 5 deletions be/src/vec/core/column_with_type_and_name.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include <iosfwd>
#include <memory>
#include <string>
#include <utility>

#include "vec/core/types.h"
#include "vec/data_types/data_type.h"
Expand All @@ -47,13 +48,13 @@ struct ColumnWithTypeAndName {
DataTypePtr type;
String name;

ColumnWithTypeAndName() {}
ColumnWithTypeAndName(const ColumnPtr& column_, const DataTypePtr& type_, const String& name_)
: column(column_), type(type_), name(name_) {}
ColumnWithTypeAndName() = default;
ColumnWithTypeAndName(ColumnPtr column_, DataTypePtr type_, String name_)
: column(std::move(column_)), type(std::move(type_)), name(std::move(name_)) {}

/// Uses type->create_column() to create column
ColumnWithTypeAndName(const DataTypePtr& type_, const String& name_)
: column(type_->create_column()), type(type_), name(name_) {}
ColumnWithTypeAndName(const DataTypePtr& type_, String name_)
: column(type_->create_column()), type(type_), name(std::move(name_)) {}

ColumnWithTypeAndName clone_empty() const;
bool operator==(const ColumnWithTypeAndName& other) const;
Expand All @@ -63,6 +64,8 @@ struct ColumnWithTypeAndName {
std::string to_string(size_t row_num) const;

void to_pb_column_meta(PColumnMeta* col_meta) const;

ColumnWithTypeAndName get_nested(bool replace_null_data_to_default = false) const;
};

} // namespace doris::vectorized
31 changes: 16 additions & 15 deletions be/src/vec/functions/function.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ ColumnPtr wrap_in_nullable(const ColumnPtr& src, const Block& block, const Colum
ColumnPtr src_not_nullable = src;
MutableColumnPtr mutable_result_null_map_column;

if (auto* nullable = check_and_get_column<ColumnNullable>(*src)) {
if (const auto* nullable = check_and_get_column<ColumnNullable>(*src)) {
src_not_nullable = nullable->get_nested_column_ptr();
result_null_map_column = nullable->get_null_map_column_ptr();
}
Expand All @@ -69,14 +69,14 @@ ColumnPtr wrap_in_nullable(const ColumnPtr& src, const Block& block, const Colum
continue;
}

if (auto* nullable = assert_cast<const ColumnNullable*>(elem.column.get())) {
if (const auto* nullable = assert_cast<const ColumnNullable*>(elem.column.get())) {
const ColumnPtr& null_map_column = nullable->get_null_map_column_ptr();
if (!result_null_map_column) {
result_null_map_column = null_map_column->clone_resized(input_rows_count);
} else {
result_null_map_column = null_map_column;
} else if (nullable->has_null()) {
if (!mutable_result_null_map_column) {
mutable_result_null_map_column =
std::move(result_null_map_column)->assume_mutable();
null_map_column->clone_resized(input_rows_count);
}

NullMap& result_null_map =
Expand Down Expand Up @@ -234,19 +234,20 @@ Status PreparedFunctionImpl::default_implementation_for_nulls(
}

if (null_presence.has_nullable) {
bool check_overflow_for_decimal = false;
bool need_to_default = need_replace_null_data_to_default();
if (context) {
check_overflow_for_decimal = context->check_overflow_for_decimal();
need_to_default &= context->check_overflow_for_decimal();
}
ColumnNumbers new_args;
for (auto arg : args) {
new_args.push_back(block.columns());
block.insert(block.get_by_position(arg).get_nested(need_to_default));
}
auto [temporary_block, new_args, new_result] = create_block_with_nested_columns(
block, args, result,
check_overflow_for_decimal && need_replace_null_data_to_default());

RETURN_IF_ERROR(execute_without_low_cardinality_columns(
context, temporary_block, new_args, new_result, temporary_block.rows(), dry_run));
block.get_by_position(result).column =
wrap_in_nullable(temporary_block.get_by_position(new_result).column, block, args,
result, input_rows_count);
RETURN_IF_ERROR(execute_without_low_cardinality_columns(context, block, new_args, result,
block.rows(), dry_run));
block.get_by_position(result).column = wrap_in_nullable(
block.get_by_position(result).column, block, args, result, input_rows_count);
*executed = true;
return Status::OK();
}
Expand Down
80 changes: 29 additions & 51 deletions be/src/vec/functions/function_cast.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@
#include "vec/functions/function_helpers.h"
#include "vec/io/reader_buffer.h"
#include "vec/runtime/vdatetime_value.h"
#include "vec/utils/util.hpp"

class DateLUTImpl;

Expand Down Expand Up @@ -2290,62 +2291,39 @@ class FunctionCast final : public IFunctionBase {
const auto& nullable_type = static_cast<const DataTypeNullable&>(*ret_type);
const auto& nested_type = nullable_type.get_nested_type();

Block tmp_block;
size_t tmp_res_index = 0;
block.insert({nullptr, nested_type, ""});
if (source_is_nullable) {
auto [t_block, tmp_args] =
create_block_with_nested_columns(block, arguments, true);
tmp_block = std::move(t_block);
tmp_res_index = tmp_block.columns();
tmp_block.insert({nullptr, nested_type, ""});

auto source = block.get_by_position(arguments[0]);
const auto* source_column =
assert_cast<const ColumnNullable*>(source.column.get());
block.insert(source.get_nested());
/// Perform the requested conversion.
RETURN_IF_ERROR(
wrapper(context, tmp_block, {0}, tmp_res_index, input_rows_count));
} else {
tmp_block = block;

tmp_res_index = block.columns();
tmp_block.insert({nullptr, nested_type, ""});

/// Perform the requested conversion.
RETURN_IF_ERROR(wrapper(context, tmp_block, arguments, tmp_res_index,
RETURN_IF_ERROR(wrapper(context, block, {block.columns() - 1}, result,
input_rows_count));
}

// Note: here we should return the nullable result column
const auto& tmp_res = tmp_block.get_by_position(tmp_res_index);
res.column = wrap_in_nullable(tmp_res.column,
Block({block.get_by_position(arguments[0]), tmp_res}),
{0}, 1, input_rows_count);

return Status::OK();
};
} else if (source_is_nullable) {
/// Conversion from Nullable to non-Nullable.

return [wrapper, skip_not_null_check](FunctionContext* context, Block& block,
const ColumnNumbers& arguments,
const size_t result, size_t input_rows_count) {
auto [tmp_block, tmp_args, tmp_res] =
create_block_with_nested_columns(block, arguments, result);

/// Check that all values are not-NULL.
/// Check can be skipped in case if LowCardinality dictionary is transformed.
/// In that case, correctness will be checked beforehand.
if (!skip_not_null_check) {
const auto& col = block.get_by_position(arguments[0]).column;
const auto& nullable_col = assert_cast<const ColumnNullable&>(*col);
const auto& null_map = nullable_col.get_null_map_data();

if (!memory_is_zero(null_map.data(), null_map.size())) {
return Status::RuntimeError(
"Cannot convert NULL value to non-Nullable type");
auto result_column = block.get_by_position(result).column;
if (result_column->is_nullable() && source_column->has_null()) {
NullMap& result_null_map =
assert_cast<ColumnUInt8*>(
assert_cast<const ColumnNullable*>(result_column.get())
->get_null_map_column_ptr()
->assume_mutable()
.get())
->get_data();

const NullMap& source_null_map =
assert_cast<const ColumnUInt8*>(
source_column->get_null_map_column_ptr().get())
->get_data();

VectorizedUtils::update_null_map(result_null_map, source_null_map);
} else {
block.get_by_position(result).column = ColumnNullable::create(
result_column, source_column->get_nested_column_ptr());
}
} else {
/// Perform the requested conversion.
RETURN_IF_ERROR(wrapper(context, block, arguments, result, input_rows_count));
}

RETURN_IF_ERROR(wrapper(context, tmp_block, tmp_args, tmp_res, input_rows_count));
block.get_by_position(result).column = tmp_block.get_by_position(tmp_res).column;
return Status::OK();
};
} else {
Expand Down

0 comments on commit 48bdcdf

Please sign in to comment.