From 55dbedffc93b1076961b5eed13cad6b557a69f69 Mon Sep 17 00:00:00 2001 From: Jerry Hu Date: Tue, 12 Nov 2024 14:49:41 +0800 Subject: [PATCH] [fix](column_complex) wrong type of Field returned by ColumnComplex (#43515) ### What problem does this PR solve? `ColumnComplex::operator[](size_t n)` always return String Field type. ``` *** Query id: b73dc1a149a469b-ac1b822f8fe0a8a2 *** *** is nereids: 1 *** *** tablet id: 0 *** *** Aborted at 1731047590 (unix time) try "date -d @1731047590" if you are using GNU date *** *** Current BE git commitID: 55e92da7e7 *** *** SIGSEGV address not mapped to object (@0x58) received by PID 2528792 (TID 2533139 OR 0x7f6add64b700) from PID 88; stack trace: *** 0# doris::signal::(anonymous namespace)::FailureSignalHandler(int, siginfo_t*, void*) at /mnt/disk1/doris/be/src/common/signal_handler.h:421 1# 0x00007F6FEE12BB50 in /lib64/libc.so.6 2# doris::BitmapValue::BitmapValue(doris::BitmapValue const&) at /mnt/disk1/doris/be/src/util/bitmap_value.h:850 3# void std::vector >::_M_realloc_insert(__gnu_cxx::__normal_iterator > >, doris::BitmapValue const&) in /mnt/disk1/doris/be/output/lib/doris_be 4# doris::vectorized::ColumnNullable::insert(doris::vectorized::Field const&) at /mnt/disk1/doris/be/src/vec/columns/column_nullable.cpp:334 5# doris::vectorized::AggregateFunctionMapAggData, std::allocator > >::add(doris::vectorized::Field const&, doris::vectorized::Field const&) in /mnt/disk1/doris/be/output/lib/doris_be 6# doris::vectorized::AggregateFunctionMapAgg, std::allocator > >, std::__cxx11::basic_string, std::allocator > >::deserialize_and_merge_from_column(char*, doris::vectorized::IColumn const&, doris::vectorized::Arena*) const at /mnt/disk1/doris/be/src/vec/aggregate_functions/aggregate_function_map.h:287 7# doris::pipeline::AggSinkLocalState::_merge_without_key(doris::vectorized::Block*) at /mnt/disk1/doris/be/src/pipeline/exec/aggregation_sink_operator.cpp:389 8# doris::pipeline::AggSinkLocalState::Executor::execute(doris::pipeline::AggSinkLocalState*, doris::vectorized::Block*) at /mnt/disk1/doris/be/src/pipeline/exec/aggregation_sink_operator.h:73 9# doris::pipeline::AggSinkOperatorX::sink(doris::RuntimeState*, doris::vectorized::Block*, bool) at /mnt/disk1/doris/be/src/pipeline/exec/aggregation_sink_operator.cpp:744 10# doris::pipeline::PipelineXTask::execute(bool*) at /mnt/disk1/doris/be/src/pipeline/pipeline_x/pipeline_x_task.cpp:332 11# doris::pipeline::TaskScheduler::_do_work(unsigned long) at /mnt/disk1/doris/be/src/pipeline/task_scheduler.cpp:347 12# doris::ThreadPool::dispatch_thread() in /mnt/disk1/doris/be/output/lib/doris_be 13# doris::Thread::supervise_thread(void*) at /mnt/disk1/doris/be/src/util/thread.cpp:499 14# start_thread in /lib64/libpthread.so.0 15# __clone in /lib64/libc.so.6 ``` --- be/src/exec/es/es_scroll_parser.cpp | 2 +- be/src/vec/columns/column_complex.h | 5 +- .../vec/columns/column_fixed_length_object.h | 6 +- be/src/vec/columns/column_string.h | 4 +- be/src/vec/common/schema_util.cpp | 2 +- be/src/vec/core/field.cpp | 2 +- be/src/vec/core/field.h | 56 ++---------- .../data_type_fixed_length_object.h | 2 +- be/src/vec/data_types/data_type_jsonb.h | 2 +- be/src/vec/data_types/data_type_object.h | 4 +- be/src/vec/data_types/data_type_string.cpp | 2 +- be/src/vec/data_types/data_type_string.h | 2 +- .../serde/data_type_object_serde.cpp | 5 +- be/src/vec/json/parse2column.cpp | 2 +- .../compaction/index_compaction_test.cpp | 4 +- .../index_compaction_with_deleted_term.cpp | 4 +- .../agg_min_max_by_test.cpp | 2 +- be/test/vec/columns/column_hash_func_test.cpp | 2 +- be/test/vec/columns/column_nullable_test.h | 2 +- be/test/vec/core/column_complex_test.cpp | 87 +++++++++++++++++-- be/test/vec/core/field_test.cpp | 2 +- .../serde/data_type_serde_pb_test.cpp | 6 +- .../serde/data_type_to_string_test.cpp | 10 +-- .../function/function_array_element_test.cpp | 2 +- .../function/function_array_index_test.cpp | 4 +- .../vec/function/function_array_size_test.cpp | 12 +-- .../function/function_arrays_overlap_test.cpp | 6 +- ...nction_compressed_materialization_test.cpp | 4 +- be/test/vec/function/function_string_test.cpp | 10 +-- be/test/vec/function/table_function_test.cpp | 4 +- be/test/vec/jsonb/serialize_test.cpp | 2 +- .../datatype_p0/complex_types/test_map.out | 3 + .../datatype_p0/complex_types/test_map.groovy | 35 ++++++++ 33 files changed, 188 insertions(+), 109 deletions(-) diff --git a/be/src/exec/es/es_scroll_parser.cpp b/be/src/exec/es/es_scroll_parser.cpp index f8dfbd0d85e4f3..f745ac34e65a0c 100644 --- a/be/src/exec/es/es_scroll_parser.cpp +++ b/be/src/exec/es/es_scroll_parser.cpp @@ -488,7 +488,7 @@ Status process_single_column(const rapidjson::Value& col, PrimitiveType sub_type bool pure_doc_value, vectorized::Array& array) { T val; RETURN_IF_ERROR(handle_value(col, sub_type, pure_doc_value, val)); - array.push_back(val); + array.push_back(vectorized::Field(val)); return Status::OK(); } diff --git a/be/src/vec/columns/column_complex.h b/be/src/vec/columns/column_complex.h index 9febe28488f155..c8c1b78d580166 100644 --- a/be/src/vec/columns/column_complex.h +++ b/be/src/vec/columns/column_complex.h @@ -20,6 +20,8 @@ #pragma once +#include + #include #include "olap/hll.h" @@ -129,13 +131,14 @@ class ColumnComplexType final : public COWHelper> MutableColumnPtr clone_resized(size_t size) const override; void insert(const Field& x) override { + DCHECK_EQ(x.get_type(), Field::TypeToEnum::value); const T& s = doris::vectorized::get(x); data.push_back(s); } Field operator[](size_t n) const override { assert(n < size()); - return {reinterpret_cast(&data[n]), sizeof(data[n])}; + return Field(data[n]); } void get(size_t n, Field& res) const override { diff --git a/be/src/vec/columns/column_fixed_length_object.h b/be/src/vec/columns/column_fixed_length_object.h index b83f11ff98aac3..1f92816ba044e0 100644 --- a/be/src/vec/columns/column_fixed_length_object.h +++ b/be/src/vec/columns/column_fixed_length_object.h @@ -105,11 +105,13 @@ class ColumnFixedLengthObject final : public COWHelper(_data.data() + n * _item_size), _item_size)); } void get(size_t n, Field& res) const override { - res.assign_string(_data.data() + n * _item_size, _item_size); + res = Field( + String(reinterpret_cast(_data.data() + n * _item_size), _item_size)); } StringRef get_data_at(size_t n) const override { diff --git a/be/src/vec/columns/column_string.h b/be/src/vec/columns/column_string.h index 4fc4ee65abc415..906f62b52aaca2 100644 --- a/be/src/vec/columns/column_string.h +++ b/be/src/vec/columns/column_string.h @@ -122,7 +122,7 @@ class ColumnStr final : public COWHelper> { Field operator[](size_t n) const override { assert(n < size()); - return Field(&chars[offset_at(n)], size_at(n)); + return Field(String(reinterpret_cast(&chars[offset_at(n)]), size_at(n))); } void get(size_t n, Field& res) const override { @@ -132,7 +132,7 @@ class ColumnStr final : public COWHelper> { res = JsonbField(reinterpret_cast(&chars[offset_at(n)]), size_at(n)); return; } - res.assign_string(&chars[offset_at(n)], size_at(n)); + res = Field(String(reinterpret_cast(&chars[offset_at(n)]), size_at(n))); } StringRef get_data_at(size_t n) const override { diff --git a/be/src/vec/common/schema_util.cpp b/be/src/vec/common/schema_util.cpp index 4545a3839100ca..fd50af3e1fcd88 100644 --- a/be/src/vec/common/schema_util.cpp +++ b/be/src/vec/common/schema_util.cpp @@ -578,7 +578,7 @@ Status extract(ColumnPtr source, const PathInData& path, MutableColumnPtr& dst) : std::make_shared(); ColumnsWithTypeAndName arguments { {source, json_type, ""}, - {type_string->create_column_const(1, Field(jsonpath.data(), jsonpath.size())), + {type_string->create_column_const(1, Field(String(jsonpath.data(), jsonpath.size()))), type_string, ""}}; auto function = SimpleFunctionFactory::instance().get_function("jsonb_extract", arguments, json_type); diff --git a/be/src/vec/core/field.cpp b/be/src/vec/core/field.cpp index 8cb07f27c7c416..e652fc2dc9e1be 100644 --- a/be/src/vec/core/field.cpp +++ b/be/src/vec/core/field.cpp @@ -74,7 +74,7 @@ void read_binary(Array& x, BufferReadable& buf) { case Field::Types::String: { std::string value; doris::vectorized::read_string_binary(value, buf); - x.push_back(value); + x.push_back(Field(value)); break; } case Field::Types::JSONB: { diff --git a/be/src/vec/core/field.h b/be/src/vec/core/field.h index 87459f19ce6b72..8113dc602fbd4e 100644 --- a/be/src/vec/core/field.h +++ b/be/src/vec/core/field.h @@ -452,43 +452,20 @@ class Field { Field(Field&& rhs) { create(std::move(rhs)); } + // Make the constructor with a String parameter explicit to prevent accidentally creating a Field with the wrong string type. + // Other types don't require explicit construction to avoid extensive modifications. template requires(!std::is_same_v, Field>) - Field(T&& rhs); - - /// Create a string inplace. - Field(const char* data, size_t size) { create(data, size); } - - Field(const unsigned char* data, size_t size) { create(data, size); } - - /// NOTE In case when field already has string type, more direct assign is possible. - void assign_string(const char* data, size_t size) { - destroy(); - create(data, size); - } - - void assign_string(const unsigned char* data, size_t size) { - destroy(); - create(data, size); - } - - void assign_jsonb(const char* data, size_t size) { - destroy(); - create_jsonb(data, size); - } - - void assign_jsonb(const unsigned char* data, size_t size) { - destroy(); - create_jsonb(data, size); - } + explicit(std::is_same_v, String>) Field(T&& rhs); Field& operator=(const Field& rhs) { if (this != &rhs) { if (which != rhs.which) { destroy(); create(rhs); - } else + } else { assign(rhs); /// This assigns string or vector without deallocation of existing buffer. + } } return *this; } @@ -503,8 +480,9 @@ class Field { if (which != rhs.which) { destroy(); create(std::move(rhs)); - } else + } else { assign(std::move(rhs)); + } } return *this; } @@ -731,7 +709,6 @@ class Field { *ptr = std::forward(x); } -private: void create(const Field& x) { dispatch([this](auto& value) { create_concrete(value); }, x); } @@ -748,25 +725,6 @@ class Field { dispatch([this](auto& value) { assign_concrete(std::move(value)); }, x); } - void create(const char* data, size_t size) { - new (&storage) String(data, size); - which = Types::String; - } - - void create(const unsigned char* data, size_t size) { - create(reinterpret_cast(data), size); - } - - void create_jsonb(const char* data, size_t size) { - new (&storage) JsonbField(data, size); - which = Types::JSONB; - } - - void create_jsonb(const unsigned char* data, size_t size) { - new (&storage) JsonbField(reinterpret_cast(data), size); - which = Types::JSONB; - } - ALWAYS_INLINE void destroy() { if (which < Types::MIN_NON_POD) { return; diff --git a/be/src/vec/data_types/data_type_fixed_length_object.h b/be/src/vec/data_types/data_type_fixed_length_object.h index cc3a74429d792f..af923ddce18cf0 100644 --- a/be/src/vec/data_types/data_type_fixed_length_object.h +++ b/be/src/vec/data_types/data_type_fixed_length_object.h @@ -60,7 +60,7 @@ class DataTypeFixedLengthObject final : public IDataType { return doris::FieldType::OLAP_FIELD_TYPE_NONE; } - Field get_default() const override { return String(); } + Field get_default() const override { return Field(String()); } [[noreturn]] Field get_field(const TExprNode& node) const override { throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, diff --git a/be/src/vec/data_types/data_type_jsonb.h b/be/src/vec/data_types/data_type_jsonb.h index 0577e5ed449e55..3d681e3ce79754 100644 --- a/be/src/vec/data_types/data_type_jsonb.h +++ b/be/src/vec/data_types/data_type_jsonb.h @@ -78,7 +78,7 @@ class DataTypeJsonb final : public IDataType { DCHECK_EQ(node.node_type, TExprNodeType::JSON_LITERAL); DCHECK(node.__isset.json_literal); JsonBinaryValue value(node.json_literal.value); - return String(value.value(), value.size()); + return Field(String(value.value(), value.size())); } bool equals(const IDataType& rhs) const override; diff --git a/be/src/vec/data_types/data_type_object.h b/be/src/vec/data_types/data_type_object.h index 2959b3dc074ec5..ec60cde9f92fca 100644 --- a/be/src/vec/data_types/data_type_object.h +++ b/be/src/vec/data_types/data_type_object.h @@ -81,10 +81,10 @@ class DataTypeObject : public IDataType { Field get_field(const TExprNode& node) const override { if (node.__isset.string_literal) { - return node.string_literal.value; + return Field(node.string_literal.value); } if (node.node_type == TExprNodeType::NULL_LITERAL) { - return Field(); + return {}; } std::stringstream error_string; node.printTo(error_string); diff --git a/be/src/vec/data_types/data_type_string.cpp b/be/src/vec/data_types/data_type_string.cpp index 878e6c319a103b..424cd43bd3ab57 100644 --- a/be/src/vec/data_types/data_type_string.cpp +++ b/be/src/vec/data_types/data_type_string.cpp @@ -66,7 +66,7 @@ Status DataTypeString::from_string(ReadBuffer& rb, IColumn* column) const { } Field DataTypeString::get_default() const { - return String(); + return Field(String()); } MutableColumnPtr DataTypeString::create_column() const { diff --git a/be/src/vec/data_types/data_type_string.h b/be/src/vec/data_types/data_type_string.h index abac6bc4b04828..dd937168611471 100644 --- a/be/src/vec/data_types/data_type_string.h +++ b/be/src/vec/data_types/data_type_string.h @@ -75,7 +75,7 @@ class DataTypeString : public IDataType { Field get_field(const TExprNode& node) const override { DCHECK_EQ(node.node_type, TExprNodeType::STRING_LITERAL); DCHECK(node.__isset.string_literal); - return node.string_literal.value; + return Field(node.string_literal.value); } bool equals(const IDataType& rhs) const override; diff --git a/be/src/vec/data_types/serde/data_type_object_serde.cpp b/be/src/vec/data_types/serde/data_type_object_serde.cpp index 49efa8c829c370..f356c454b3f8f1 100644 --- a/be/src/vec/data_types/serde/data_type_object_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_object_serde.cpp @@ -26,6 +26,7 @@ #include "vec/common/assert_cast.h" #include "vec/common/schema_util.h" #include "vec/core/field.h" +#include "vec/core/types.h" #ifdef __AVX2__ #include "util/jsonb_parser_simd.h" @@ -117,11 +118,11 @@ void DataTypeObjectSerDe::read_one_cell_from_jsonb(IColumn& column, const JsonbV Field field; if (arg->isBinary()) { const auto* blob = static_cast(arg); - field.assign_jsonb(blob->getBlob(), blob->getBlobLen()); + field = JsonbField(blob->getBlob(), blob->getBlobLen()); } else if (arg->isString()) { // not a valid jsonb type, insert as string const auto* str = static_cast(arg); - field.assign_string(str->getBlob(), str->getBlobLen()); + field = Field(String(str->getBlob(), str->getBlobLen())); } else { throw doris::Exception(ErrorCode::INTERNAL_ERROR, "Invalid jsonb type"); } diff --git a/be/src/vec/json/parse2column.cpp b/be/src/vec/json/parse2column.cpp index aa5fc5eb8ed215..ba18083a95c5f6 100644 --- a/be/src/vec/json/parse2column.cpp +++ b/be/src/vec/json/parse2column.cpp @@ -149,7 +149,7 @@ void parse_json_to_variant(IColumn& column, const char* src, size_t length, } // Treat as string PathInData root_path; - Field field(src, length); + Field field(String(src, length)); result = ParseResult {{root_path}, {field}}; } auto& [paths, values] = *result; diff --git a/be/test/olap/rowset/segment_v2/inverted_index/compaction/index_compaction_test.cpp b/be/test/olap/rowset/segment_v2/inverted_index/compaction/index_compaction_test.cpp index 5e3370847e94b9..aed83201a6349b 100644 --- a/be/test/olap/rowset/segment_v2/inverted_index/compaction/index_compaction_test.cpp +++ b/be/test/olap/rowset/segment_v2/inverted_index/compaction/index_compaction_test.cpp @@ -289,8 +289,8 @@ TEST_F(IndexCompactionTest, write_index_test) { auto columns = block.mutate_columns(); for (const auto& row : data[i]) { vectorized::Field key = Int32(row.key); - vectorized::Field v1 = row.word; - vectorized::Field v2 = row.url; + vectorized::Field v1(row.word); + vectorized::Field v2(row.url); vectorized::Field v3 = Int32(row.num); columns[0]->insert(key); columns[1]->insert(v1); diff --git a/be/test/olap/rowset/segment_v2/inverted_index/compaction/index_compaction_with_deleted_term.cpp b/be/test/olap/rowset/segment_v2/inverted_index/compaction/index_compaction_with_deleted_term.cpp index 321d43fa87206c..8b5d403fca4ba5 100644 --- a/be/test/olap/rowset/segment_v2/inverted_index/compaction/index_compaction_with_deleted_term.cpp +++ b/be/test/olap/rowset/segment_v2/inverted_index/compaction/index_compaction_with_deleted_term.cpp @@ -582,8 +582,8 @@ TEST_F(IndexCompactionDeleteTest, delete_index_test) { auto columns = block.mutate_columns(); for (const auto& row : data[i]) { vectorized::Field key = Int32(row.key); - vectorized::Field v1 = row.word; - vectorized::Field v2 = row.url; + vectorized::Field v1(row.word); + vectorized::Field v2(row.url); vectorized::Field v3 = Int32(row.num); columns[0]->insert(key); columns[1]->insert(v1); diff --git a/be/test/vec/aggregate_functions/agg_min_max_by_test.cpp b/be/test/vec/aggregate_functions/agg_min_max_by_test.cpp index 137f4fc70b169b..b1a3e9ed483143 100644 --- a/be/test/vec/aggregate_functions/agg_min_max_by_test.cpp +++ b/be/test/vec/aggregate_functions/agg_min_max_by_test.cpp @@ -71,7 +71,7 @@ TEST_P(AggMinMaxByTest, min_max_by_test) { min_pair.first = str_val; min_pair.second = i; } - column_vector_key_str->insert(cast_to_nearest_field_type(str_val)); + column_vector_key_str->insert(Field(cast_to_nearest_field_type(str_val))); } // Prepare test function and parameters. diff --git a/be/test/vec/columns/column_hash_func_test.cpp b/be/test/vec/columns/column_hash_func_test.cpp index 7b2d5f2dddd81e..c49f1e0a578578 100644 --- a/be/test/vec/columns/column_hash_func_test.cpp +++ b/be/test/vec/columns/column_hash_func_test.cpp @@ -242,7 +242,7 @@ TEST(HashFuncTest, StructTypeTestWithSepcificValueCrcHash) { Tuple t; t.push_back(Int64(1)); - t.push_back(String("hello")); + t.push_back(Field(String("hello"))); DataTypePtr a = std::make_shared(dataTypes); std::cout << a->get_name() << std::endl; diff --git a/be/test/vec/columns/column_nullable_test.h b/be/test/vec/columns/column_nullable_test.h index 0f90a25c9b56b3..f371ff13fb20fd 100644 --- a/be/test/vec/columns/column_nullable_test.h +++ b/be/test/vec/columns/column_nullable_test.h @@ -83,7 +83,7 @@ inline MutableColumnPtr create_nested_column(size_t input_rows_count) { if constexpr (std::is_integral_v) { column->insert(rand() % std::numeric_limits::max()); } else if constexpr (std::is_same_v) { - column->insert(generate_random_string(rand() % 512)); + column->insert(Field(generate_random_string(rand() % 512))); } else if constexpr (std::is_same_v) { column->insert(Int64(rand() % std::numeric_limits::max())); } else { diff --git a/be/test/vec/core/column_complex_test.cpp b/be/test/vec/core/column_complex_test.cpp index 589a705e072a0a..a0fbcccdd150fd 100644 --- a/be/test/vec/core/column_complex_test.cpp +++ b/be/test/vec/core/column_complex_test.cpp @@ -17,8 +17,10 @@ #include "vec/columns/column_complex.h" +#include #include #include +#include #include #include @@ -26,6 +28,8 @@ #include "agent/be_exec_version_manager.h" #include "gtest/gtest_pred_impl.h" +#include "util/bitmap_value.h" +#include "vec/core/field.h" #include "vec/data_types/data_type.h" #include "vec/data_types/data_type_bitmap.h" #include "vec/data_types/data_type_quantilestate.h" @@ -72,12 +76,12 @@ class ColumnBitmapTest : public testing::Test { } void check_serialize_and_deserialize(MutableColumnPtr& col) { - auto column = assert_cast(col.get()); + auto* column = assert_cast(col.get()); auto size = _bitmap_type.get_uncompressed_serialized_bytes( *column, BeExecVersionManager::get_newest_version()); std::unique_ptr buf = std::make_unique(size); - auto result = _bitmap_type.serialize(*column, buf.get(), - BeExecVersionManager::get_newest_version()); + auto* result = _bitmap_type.serialize(*column, buf.get(), + BeExecVersionManager::get_newest_version()); ASSERT_EQ(result, buf.get() + size); auto column2 = _bitmap_type.create_column(); @@ -85,6 +89,19 @@ class ColumnBitmapTest : public testing::Test { check_bitmap_column(*column, *column2.get()); } + void check_field_type(MutableColumnPtr& col) { + auto& column = assert_cast(*col.get()); + auto dst_column = ColumnBitmap::create(); + const auto rows = column.size(); + for (size_t i = 0; i != rows; ++i) { + auto field = column[i]; + ASSERT_EQ(field.get_type(), Field::Types::Bitmap); + dst_column->insert(field); + } + + check_bitmap_column(column, *dst_column); + } + private: DataTypeBitMap _bitmap_type; }; @@ -94,7 +111,7 @@ class ColumnQuantileStateTest : public testing::Test { virtual void SetUp() override {} virtual void TearDown() override {} - void check_bitmap_column(const IColumn& l, const IColumn& r) { + void check_quantile_state_column(const IColumn& l, const IColumn& r) { ASSERT_EQ(l.size(), r.size()); const auto& l_col = assert_cast(l); const auto& r_col = assert_cast(r); @@ -117,7 +134,20 @@ class ColumnQuantileStateTest : public testing::Test { auto column2 = _quantile_state_type.create_column(); _quantile_state_type.deserialize(buf.get(), &column2, BeExecVersionManager::get_newest_version()); - check_bitmap_column(*column, *column2.get()); + check_quantile_state_column(*column, *column2.get()); + } + + void check_field_type(MutableColumnPtr& col) { + auto& column = assert_cast(*col.get()); + auto dst_column = ColumnQuantileState::create(); + const auto rows = column.size(); + for (size_t i = 0; i != rows; ++i) { + auto field = column[i]; + ASSERT_EQ(field.get_type(), Field::Types::QuantileState); + dst_column->insert(field); + } + + check_quantile_state_column(column, *dst_column); } private: @@ -153,6 +183,38 @@ TEST_F(ColumnBitmapTest, ColumnBitmapReadWrite) { EXPECT_TRUE(bitmap.contains(1000000)); } +TEST_F(ColumnBitmapTest, OperatorValidate) { + auto column = _bitmap_type.create_column(); + + // empty column + check_serialize_and_deserialize(column); + + // bitmap with lots of rows + const size_t row_size = 128; + auto& data = assert_cast(*column.get()).get_data(); + data.reserve(row_size); + + for (size_t i = 0; i != row_size; ++i) { + BitmapValue bitmap_value; + for (size_t j = 0; j <= i; ++j) { + bitmap_value.add(j); + } + data.emplace_back(std::move(bitmap_value)); + } + + auto& bitmap_column = assert_cast(*column.get()); + for (size_t i = 0; i != row_size; ++i) { + auto field = bitmap_column[i]; + ASSERT_EQ(field.get_type(), Field::Types::Bitmap); + const auto& bitmap = vectorized::get(field); + + ASSERT_EQ(bitmap.cardinality(), i + 1); + for (size_t j = 0; j <= i; ++j) { + ASSERT_TRUE(bitmap.contains(j)); + } + } +} + TEST_F(ColumnQuantileStateTest, ColumnQuantileStateReadWrite) { auto column = _quantile_state_type.create_column(); // empty column @@ -180,4 +242,19 @@ TEST_F(ColumnQuantileStateTest, ColumnQuantileStateReadWrite) { check_serialize_and_deserialize(column); } +TEST_F(ColumnQuantileStateTest, OperatorValidate) { + auto column = _quantile_state_type.create_column(); + + // empty column + check_serialize_and_deserialize(column); + + // bitmap with lots of rows + const size_t row_size = 20000; + auto& data = assert_cast(*column.get()).get_data(); + data.resize(row_size); + check_serialize_and_deserialize(column); + + check_field_type(column); +} + } // namespace doris::vectorized diff --git a/be/test/vec/core/field_test.cpp b/be/test/vec/core/field_test.cpp index a3542735c50a0f..71d26ea4979bde 100644 --- a/be/test/vec/core/field_test.cpp +++ b/be/test/vec/core/field_test.cpp @@ -39,7 +39,7 @@ TEST(VFieldTest, field_string) { ASSERT_EQ(f.get(), "Hello, world (4)"); f = Array {Field {String {"Hello, world (5)"}}}; ASSERT_EQ(f.get()[0].get(), "Hello, world (5)"); - f = Array {String {"Hello, world (6)"}}; + f = Array {Field(String {"Hello, world (6)"})}; ASSERT_EQ(f.get()[0].get(), "Hello, world (6)"); } diff --git a/be/test/vec/data_types/serde/data_type_serde_pb_test.cpp b/be/test/vec/data_types/serde/data_type_serde_pb_test.cpp index b64ddee1d2cb32..852614e84c533b 100644 --- a/be/test/vec/data_types/serde/data_type_serde_pb_test.cpp +++ b/be/test/vec/data_types/serde/data_type_serde_pb_test.cpp @@ -583,10 +583,10 @@ TEST(DataTypeSerDePbTest, DataTypeScalaSerDeTestStruct) { DataTypePtr m = std::make_shared(std::make_shared()); DataTypePtr st = std::make_shared(std::vector {s, d, m}); Tuple t1, t2; - t1.push_back(String("amory cute")); + t1.push_back(Field(String("amory cute"))); t1.push_back(__int128_t(37)); t1.push_back(true); - t2.push_back("null"); + t2.push_back(Field("null")); t2.push_back(__int128_t(26)); t2.push_back(false); MutableColumnPtr struct_column = st->create_column(); @@ -614,7 +614,7 @@ TEST(DataTypeSerDePbTest, DataTypeScalaSerDeTestStruct2) { DataTypePtr m = std::make_shared(std::make_shared()); DataTypePtr st = std::make_shared(std::vector {s, d, m}); Tuple t1, t2; - t1.push_back(String("amory cute")); + t1.push_back(Field(String("amory cute"))); t1.push_back(37); t1.push_back(true); t2.push_back("null"); diff --git a/be/test/vec/data_types/serde/data_type_to_string_test.cpp b/be/test/vec/data_types/serde/data_type_to_string_test.cpp index fe2e05d10a1d90..d605e73ced3e2d 100644 --- a/be/test/vec/data_types/serde/data_type_to_string_test.cpp +++ b/be/test/vec/data_types/serde/data_type_to_string_test.cpp @@ -45,9 +45,9 @@ TEST(ToStringMethodTest, DataTypeToStringTest) { a1.push_back(Null()); a1.push_back(UInt64(12345678)); a1.push_back(UInt64(0)); - a2.push_back(String("hello amory")); - a2.push_back("NULL"); - a2.push_back(String("cute amory")); + a2.push_back(Field(String("hello amory"))); + a2.push_back(Field("NULL")); + a2.push_back(Field(String("cute amory"))); a2.push_back(Null()); Map m; m.push_back(a1); @@ -55,11 +55,11 @@ TEST(ToStringMethodTest, DataTypeToStringTest) { Tuple t; t.push_back(Int128(12345454342)); - t.push_back(String("amory cute")); + t.push_back(Field(String("amory cute"))); t.push_back(UInt64(0)); cases.field_values = {UInt64(12), - String(" hello amory , cute amory "), + Field(String(" hello amory , cute amory ")), DecimalField(-12345678, 0), a1, a2, diff --git a/be/test/vec/function/function_array_element_test.cpp b/be/test/vec/function/function_array_element_test.cpp index 16ce28f52599e1..bf25ea4386c37c 100644 --- a/be/test/vec/function/function_array_element_test.cpp +++ b/be/test/vec/function/function_array_element_test.cpp @@ -148,7 +148,7 @@ TEST(function_array_element_test, element_at) { { InputTypeSet input_types = {TypeIndex::Array, TypeIndex::String, TypeIndex::Int32}; - Array vec = {Field("abc", 3), Field("", 0), Field("def", 3)}; + Array vec = {Field(String("abc", 3)), Field(String("", 0)), Field(String("def", 3))}; DataSet data_set = {{{vec, 1}, std::string("abc")}, {{vec, 2}, std::string("")}, {{vec, 10}, Null()}, diff --git a/be/test/vec/function/function_array_index_test.cpp b/be/test/vec/function/function_array_index_test.cpp index 24bd5797869a11..1a037818b10f54 100644 --- a/be/test/vec/function/function_array_index_test.cpp +++ b/be/test/vec/function/function_array_index_test.cpp @@ -152,7 +152,7 @@ TEST(function_array_index_test, array_contains) { { InputTypeSet input_types = {TypeIndex::Array, TypeIndex::String, TypeIndex::String}; - Array vec = {Field("abc", 3), Field("", 0), Field("def", 3)}; + Array vec = {Field(String("abc", 3)), Field(String("", 0)), Field(String("def", 3))}; DataSet data_set = {{{vec, std::string("abc")}, UInt8(1)}, {{vec, std::string("aaa")}, UInt8(0)}, {{vec, std::string("")}, UInt8(1)}, @@ -252,7 +252,7 @@ TEST(function_array_index_test, array_position) { { InputTypeSet input_types = {TypeIndex::Array, TypeIndex::String, TypeIndex::String}; - Array vec = {Field("abc", 3), Field("", 0), Field("def", 3)}; + Array vec = {Field(String("abc", 3)), Field(String("", 0)), Field(String("def", 3))}; DataSet data_set = {{{vec, std::string("abc")}, Int64(1)}, {{vec, std::string("aaa")}, Int64(0)}, {{vec, std::string("")}, Int64(2)}, diff --git a/be/test/vec/function/function_array_size_test.cpp b/be/test/vec/function/function_array_size_test.cpp index 3fa710f6844f93..c853a56930de12 100644 --- a/be/test/vec/function/function_array_size_test.cpp +++ b/be/test/vec/function/function_array_size_test.cpp @@ -47,8 +47,8 @@ TEST(function_array_size_test, size) { { InputTypeSet input_types = {TypeIndex::Array, TypeIndex::String}; - Array vec1 = {Field("abc", 3), Field("", 0), Field("def", 3)}; - Array vec2 = {Field("abc", 3), Field("123", 0), Field("def", 3)}; + Array vec1 = {Field(String("abc", 3)), Field(String("", 0)), Field(String("def", 3))}; + Array vec2 = {Field(String("abc", 3)), Field(String("123", 0)), Field(String("def", 3))}; DataSet data_set = {{{vec1}, Int64(3)}, {{vec2}, Int64(3)}, {{Null()}, Null()}, @@ -76,8 +76,8 @@ TEST(function_array_size_test, cardinality) { { InputTypeSet input_types = {TypeIndex::Array, TypeIndex::String}; - Array vec1 = {Field("abc", 3), Field("", 0), Field("def", 3)}; - Array vec2 = {Field("abc", 3), Field("123", 0), Field("def", 3)}; + Array vec1 = {Field(String("abc", 3)), Field(String("", 0)), Field(String("def", 3))}; + Array vec2 = {Field(String("abc", 3)), Field(String("123", 0)), Field(String("def", 3))}; DataSet data_set = {{{vec1}, Int64(3)}, {{vec2}, Int64(3)}, {{Null()}, Null()}, @@ -105,8 +105,8 @@ TEST(function_array_size_test, array_size) { { InputTypeSet input_types = {TypeIndex::Array, TypeIndex::String}; - Array vec1 = {Field("abc", 3), Field("", 0), Field("def", 3)}; - Array vec2 = {Field("abc", 3), Field("123", 0), Field("def", 3)}; + Array vec1 = {Field(String("abc", 3)), Field(String("", 0)), Field(String("def", 3))}; + Array vec2 = {Field(String("abc", 3)), Field(String("123", 0)), Field(String("def", 3))}; DataSet data_set = {{{vec1}, Int64(3)}, {{vec2}, Int64(3)}, {{Null()}, Null()}, diff --git a/be/test/vec/function/function_arrays_overlap_test.cpp b/be/test/vec/function/function_arrays_overlap_test.cpp index 4a13d41b0a3121..3297f5fc281c86 100644 --- a/be/test/vec/function/function_arrays_overlap_test.cpp +++ b/be/test/vec/function/function_arrays_overlap_test.cpp @@ -124,9 +124,9 @@ TEST(function_arrays_overlap_test, arrays_overlap) { InputTypeSet input_types = {TypeIndex::Array, TypeIndex::String, TypeIndex::Array, TypeIndex::String}; - Array vec1 = {Field("abc", 3), Field("", 0), Field("def", 3)}; - Array vec2 = {Field("abc", 3)}; - Array vec3 = {Field("", 0)}; + Array vec1 = {Field(String("abc", 3)), Field(String("", 0)), Field(String("def", 3))}; + Array vec2 = {Field(String("abc", 3))}; + Array vec3 = {Field(String("", 0))}; DataSet data_set = {{{vec1, vec2}, UInt8(1)}, {{vec1, vec3}, UInt8(1)}, {{Null(), vec1}, Null()}, diff --git a/be/test/vec/function/function_compressed_materialization_test.cpp b/be/test/vec/function/function_compressed_materialization_test.cpp index 2553fc82fc7e54..432fbf78529350 100644 --- a/be/test/vec/function/function_compressed_materialization_test.cpp +++ b/be/test/vec/function/function_compressed_materialization_test.cpp @@ -111,7 +111,7 @@ void encode_and_decode(size_t len_of_varchar, std::string function_name) { continue; } else { std::string random_bytes = generate_random_len_and_random_bytes(m); - col_source_str_mutate->insert(Field(random_bytes.c_str(), random_bytes.size())); + col_source_str_mutate->insert(Field(random_bytes)); } } @@ -185,7 +185,7 @@ TEST(CompressedMaterializationTest, abnormal_test) { for (size_t i = 0; i < input_rows_count; ++i) { std::string random_bytes = generate_random_bytes(16); - col_source_str_mutate->insert(Field(random_bytes.c_str(), random_bytes.size())); + col_source_str_mutate->insert(Field(random_bytes)); } auto col_source_str = std::move(col_source_str_mutate); diff --git a/be/test/vec/function/function_string_test.cpp b/be/test/vec/function/function_string_test.cpp index 5d1d6fb9d8b217..f4381505276758 100644 --- a/be/test/vec/function/function_string_test.cpp +++ b/be/test/vec/function/function_string_test.cpp @@ -1417,11 +1417,11 @@ TEST(function_string_test, function_concat_ws_test) { { BaseInputTypeSet input_types = {TypeIndex::String, TypeIndex::Array, TypeIndex::String}; - Array vec1 = {Field("", 0), Field("", 0), Field("", 0)}; - Array vec2 = {Field("123", 3), Field("456", 3), Field("789", 3)}; - Array vec3 = {Field("", 0), Field("?", 1), Field("", 0)}; - Array vec4 = {Field("abc", 3), Field("", 0), Field("def", 3)}; - Array vec5 = {Field("abc", 3), Field("def", 3), Field("ghi", 3)}; + Array vec1 = {Field(String("", 0)), Field(String("", 0)), Field(String("", 0))}; + Array vec2 = {Field(String("123", 3)), Field(String("456", 3)), Field(String("789", 3))}; + Array vec3 = {Field(String("", 0)), Field(String("?", 1)), Field(String("", 0))}; + Array vec4 = {Field(String("abc", 3)), Field(String("", 0)), Field(String("def", 3))}; + Array vec5 = {Field(String("abc", 3)), Field(String("def", 3)), Field(String("ghi", 3))}; DataSet data_set = {{{std::string("-"), vec1}, std::string("--")}, {{std::string(""), vec2}, std::string("123456789")}, {{std::string("-"), vec3}, std::string("-?-")}, diff --git a/be/test/vec/function/table_function_test.cpp b/be/test/vec/function/table_function_test.cpp index a5c49dbdba94d6..43d37f6bf73fca 100644 --- a/be/test/vec/function/table_function_test.cpp +++ b/be/test/vec/function/table_function_test.cpp @@ -97,7 +97,7 @@ TEST_F(TableFunctionTest, vexplode_outer) { // explode_outer(Array) { InputTypeSet input_types = {TypeIndex::Array, TypeIndex::String}; - Array vec = {std::string("abc"), std::string(""), std::string("def")}; + Array vec = {Field(std::string("abc")), Field(std::string("")), Field(std::string("def"))}; InputDataSet input_set = {{Null()}, {Array()}, {vec}}; InputTypeSet output_types = {TypeIndex::String}; @@ -144,7 +144,7 @@ TEST_F(TableFunctionTest, vexplode) { // explode(Array) { InputTypeSet input_types = {TypeIndex::Array, TypeIndex::String}; - Array vec = {std::string("abc"), std::string(""), std::string("def")}; + Array vec = {Field(std::string("abc")), Field(std::string("")), Field(std::string("def"))}; InputDataSet input_set = {{Null()}, {Array()}, {vec}}; InputTypeSet output_types = {TypeIndex::String}; diff --git a/be/test/vec/jsonb/serialize_test.cpp b/be/test/vec/jsonb/serialize_test.cpp index 82d8c4f394ab5a..47819c75072347 100644 --- a/be/test/vec/jsonb/serialize_test.cpp +++ b/be/test/vec/jsonb/serialize_test.cpp @@ -294,7 +294,7 @@ TEST(BlockSerializeTest, Struct) { DataTypePtr m = std::make_shared(std::make_shared()); DataTypePtr st = std::make_shared(std::vector {s, d, m}); Tuple t1, t2; - t1.push_back(String("amory cute")); + t1.push_back(Field(String("amory cute"))); t1.push_back(__int128_t(37)); t1.push_back(true); t2.push_back("null"); diff --git a/regression-test/data/datatype_p0/complex_types/test_map.out b/regression-test/data/datatype_p0/complex_types/test_map.out index 4ac971fb3a1590..03c9853b8e8535 100644 --- a/regression-test/data/datatype_p0/complex_types/test_map.out +++ b/regression-test/data/datatype_p0/complex_types/test_map.out @@ -14,3 +14,6 @@ 6 3 {"key3":"value3", "key33":"value33", "key3333":"value333"} 6 3 7 4 {"key4":"value4", "key44":"value44", "key444":"value444", "key4444":"value4444"} \N \N +-- !sql2 -- +3 true true true + diff --git a/regression-test/suites/datatype_p0/complex_types/test_map.groovy b/regression-test/suites/datatype_p0/complex_types/test_map.groovy index 4dd0272f517737..b985ef61008a64 100644 --- a/regression-test/suites/datatype_p0/complex_types/test_map.groovy +++ b/regression-test/suites/datatype_p0/complex_types/test_map.groovy @@ -51,4 +51,39 @@ suite("test_map") { qt_sql """ select * from test_map_table left join test_map_table_right on test_map_table.k1 = test_map_table_right.value order by 1,2,4,5; """ + + sql "DROP TABLE IF EXISTS `task_map_agg_with_bitmap`" + sql """ + CREATE TABLE `task_map_agg_with_bitmap` ( + `cache_key` varchar(65533) NOT NULL, + `result_cnt` int NULL COMMENT '人群包人数' + ) ENGINE = OLAP duplicate KEY(`cache_key`) COMMENT 'OLAP' DISTRIBUTED BY HASH(`cache_key`) BUCKETS 1 PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ); + """ + + sql 'insert into `task_map_agg_with_bitmap` values ("aa",null);' + sql 'insert into `task_map_agg_with_bitmap` values ("bb",null);' + sql 'insert into `task_map_agg_with_bitmap` values ("bb",1);' + sql 'insert into `task_map_agg_with_bitmap` values ("bb",2);' + sql 'insert into `task_map_agg_with_bitmap` values ("bb",3);' + + qt_sql2 """ + select bitmap_count(id_map['2024-11-03']) cnt, + bitmap_contains(id_map['2024-11-03'], 1) c1, + bitmap_contains(id_map['2024-11-03'], 2) c2, + bitmap_contains(id_map['2024-11-03'], 3) c3 + from ( + select + map_agg(tag_logymd, result) id_map + from + ( + select + '2024-11-03' tag_logymd, + bitmap_agg(result_cnt) result + from + `task_map_agg_with_bitmap` + ) t1 + ) t2; + """ }