Skip to content

Commit

Permalink
[fix](column_complex) wrong type of Field returned by ColumnComplex (a…
Browse files Browse the repository at this point in the history
…pache#43515)

### What problem does this PR solve?

`ColumnComplex::operator[](size_t n)` always return String Field type.

```
*** Query id: b73dc1a149a469b-ac1b822f8fe0a8a2 ***
*** is nereids: 1 ***
*** tablet id: 0 ***
*** Aborted at 1731047590 (unix time) try "date -d @1731047590" if you are using GNU date ***
*** Current BE git commitID: 55e92da ***
*** SIGSEGV address not mapped to object (@0x58) received by PID 2528792 (TID 2533139 OR 0x7f6add64b700) from PID 88; stack trace: ***
 0# doris::signal::(anonymous namespace)::FailureSignalHandler(int, siginfo_t*, void*) at /mnt/disk1/doris/be/src/common/signal_handler.h:421
 1# 0x00007F6FEE12BB50 in /lib64/libc.so.6
 2# doris::BitmapValue::BitmapValue(doris::BitmapValue const&) at /mnt/disk1/doris/be/src/util/bitmap_value.h:850
 3# void std::vector<doris::BitmapValue, std::allocator<doris::BitmapValue> >::_M_realloc_insert<doris::BitmapValue const&>(__gnu_cxx::__normal_iterator<doris::BitmapValue*, std::vector<doris::BitmapValue, std::allocator<doris::BitmapValue> > >, doris::BitmapValue const&) in /mnt/disk1/doris/be/output/lib/doris_be
 4# doris::vectorized::ColumnNullable::insert(doris::vectorized::Field const&) at /mnt/disk1/doris/be/src/vec/columns/column_nullable.cpp:334
 5# doris::vectorized::AggregateFunctionMapAggData<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >::add(doris::vectorized::Field const&, doris::vectorized::Field const&) in /mnt/disk1/doris/be/output/lib/doris_be
 6# doris::vectorized::AggregateFunctionMapAgg<doris::vectorized::AggregateFunctionMapAggData<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >::deserialize_and_merge_from_column(char*, doris::vectorized::IColumn const&, doris::vectorized::Arena*) const at /mnt/disk1/doris/be/src/vec/aggregate_functions/aggregate_function_map.h:287
 7# doris::pipeline::AggSinkLocalState::_merge_without_key(doris::vectorized::Block*) at /mnt/disk1/doris/be/src/pipeline/exec/aggregation_sink_operator.cpp:389
 8# doris::pipeline::AggSinkLocalState::Executor<true, true>::execute(doris::pipeline::AggSinkLocalState*, doris::vectorized::Block*) at /mnt/disk1/doris/be/src/pipeline/exec/aggregation_sink_operator.h:73
 9# doris::pipeline::AggSinkOperatorX::sink(doris::RuntimeState*, doris::vectorized::Block*, bool) at /mnt/disk1/doris/be/src/pipeline/exec/aggregation_sink_operator.cpp:744
10# doris::pipeline::PipelineXTask::execute(bool*) at /mnt/disk1/doris/be/src/pipeline/pipeline_x/pipeline_x_task.cpp:332
11# doris::pipeline::TaskScheduler::_do_work(unsigned long) at /mnt/disk1/doris/be/src/pipeline/task_scheduler.cpp:347
12# doris::ThreadPool::dispatch_thread() in /mnt/disk1/doris/be/output/lib/doris_be
13# doris::Thread::supervise_thread(void*) at /mnt/disk1/doris/be/src/util/thread.cpp:499
14# start_thread in /lib64/libpthread.so.0
15# __clone in /lib64/libc.so.6
```
  • Loading branch information
mrhhsg authored Nov 12, 2024
1 parent b88b2e3 commit 55dbedf
Show file tree
Hide file tree
Showing 33 changed files with 188 additions and 109 deletions.
2 changes: 1 addition & 1 deletion be/src/exec/es/es_scroll_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -488,7 +488,7 @@ Status process_single_column(const rapidjson::Value& col, PrimitiveType sub_type
bool pure_doc_value, vectorized::Array& array) {
T val;
RETURN_IF_ERROR(handle_value<T>(col, sub_type, pure_doc_value, val));
array.push_back(val);
array.push_back(vectorized::Field(val));
return Status::OK();
}

Expand Down
5 changes: 4 additions & 1 deletion be/src/vec/columns/column_complex.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@

#pragma once

#include <glog/logging.h>

#include <vector>

#include "olap/hll.h"
Expand Down Expand Up @@ -129,13 +131,14 @@ class ColumnComplexType final : public COWHelper<IColumn, ColumnComplexType<T>>
MutableColumnPtr clone_resized(size_t size) const override;

void insert(const Field& x) override {
DCHECK_EQ(x.get_type(), Field::TypeToEnum<T>::value);
const T& s = doris::vectorized::get<const T&>(x);
data.push_back(s);
}

Field operator[](size_t n) const override {
assert(n < size());
return {reinterpret_cast<const char*>(&data[n]), sizeof(data[n])};
return Field(data[n]);
}

void get(size_t n, Field& res) const override {
Expand Down
6 changes: 4 additions & 2 deletions be/src/vec/columns/column_fixed_length_object.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,11 +105,13 @@ class ColumnFixedLengthObject final : public COWHelper<IColumn, ColumnFixedLengt
}

Field operator[](size_t n) const override {
return {_data.data() + n * _item_size, _item_size};
return Field(
String(reinterpret_cast<const char*>(_data.data() + n * _item_size), _item_size));
}

void get(size_t n, Field& res) const override {
res.assign_string(_data.data() + n * _item_size, _item_size);
res = Field(
String(reinterpret_cast<const char*>(_data.data() + n * _item_size), _item_size));
}

StringRef get_data_at(size_t n) const override {
Expand Down
4 changes: 2 additions & 2 deletions be/src/vec/columns/column_string.h
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ class ColumnStr final : public COWHelper<IColumn, ColumnStr<T>> {

Field operator[](size_t n) const override {
assert(n < size());
return Field(&chars[offset_at(n)], size_at(n));
return Field(String(reinterpret_cast<const char*>(&chars[offset_at(n)]), size_at(n)));
}

void get(size_t n, Field& res) const override {
Expand All @@ -132,7 +132,7 @@ class ColumnStr final : public COWHelper<IColumn, ColumnStr<T>> {
res = JsonbField(reinterpret_cast<const char*>(&chars[offset_at(n)]), size_at(n));
return;
}
res.assign_string(&chars[offset_at(n)], size_at(n));
res = Field(String(reinterpret_cast<const char*>(&chars[offset_at(n)]), size_at(n)));
}

StringRef get_data_at(size_t n) const override {
Expand Down
2 changes: 1 addition & 1 deletion be/src/vec/common/schema_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -578,7 +578,7 @@ Status extract(ColumnPtr source, const PathInData& path, MutableColumnPtr& dst)
: std::make_shared<DataTypeJsonb>();
ColumnsWithTypeAndName arguments {
{source, json_type, ""},
{type_string->create_column_const(1, Field(jsonpath.data(), jsonpath.size())),
{type_string->create_column_const(1, Field(String(jsonpath.data(), jsonpath.size()))),
type_string, ""}};
auto function =
SimpleFunctionFactory::instance().get_function("jsonb_extract", arguments, json_type);
Expand Down
2 changes: 1 addition & 1 deletion be/src/vec/core/field.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ void read_binary(Array& x, BufferReadable& buf) {
case Field::Types::String: {
std::string value;
doris::vectorized::read_string_binary(value, buf);
x.push_back(value);
x.push_back(Field(value));
break;
}
case Field::Types::JSONB: {
Expand Down
56 changes: 7 additions & 49 deletions be/src/vec/core/field.h
Original file line number Diff line number Diff line change
Expand Up @@ -452,43 +452,20 @@ class Field {

Field(Field&& rhs) { create(std::move(rhs)); }

// Make the constructor with a String parameter explicit to prevent accidentally creating a Field with the wrong string type.
// Other types don't require explicit construction to avoid extensive modifications.
template <typename T>
requires(!std::is_same_v<std::decay_t<T>, Field>)
Field(T&& rhs);

/// Create a string inplace.
Field(const char* data, size_t size) { create(data, size); }

Field(const unsigned char* data, size_t size) { create(data, size); }

/// NOTE In case when field already has string type, more direct assign is possible.
void assign_string(const char* data, size_t size) {
destroy();
create(data, size);
}

void assign_string(const unsigned char* data, size_t size) {
destroy();
create(data, size);
}

void assign_jsonb(const char* data, size_t size) {
destroy();
create_jsonb(data, size);
}

void assign_jsonb(const unsigned char* data, size_t size) {
destroy();
create_jsonb(data, size);
}
explicit(std::is_same_v<std::decay_t<T>, String>) Field(T&& rhs);

Field& operator=(const Field& rhs) {
if (this != &rhs) {
if (which != rhs.which) {
destroy();
create(rhs);
} else
} else {
assign(rhs); /// This assigns string or vector without deallocation of existing buffer.
}
}
return *this;
}
Expand All @@ -503,8 +480,9 @@ class Field {
if (which != rhs.which) {
destroy();
create(std::move(rhs));
} else
} else {
assign(std::move(rhs));
}
}
return *this;
}
Expand Down Expand Up @@ -731,7 +709,6 @@ class Field {
*ptr = std::forward<T>(x);
}

private:
void create(const Field& x) {
dispatch([this](auto& value) { create_concrete(value); }, x);
}
Expand All @@ -748,25 +725,6 @@ class Field {
dispatch([this](auto& value) { assign_concrete(std::move(value)); }, x);
}

void create(const char* data, size_t size) {
new (&storage) String(data, size);
which = Types::String;
}

void create(const unsigned char* data, size_t size) {
create(reinterpret_cast<const char*>(data), size);
}

void create_jsonb(const char* data, size_t size) {
new (&storage) JsonbField(data, size);
which = Types::JSONB;
}

void create_jsonb(const unsigned char* data, size_t size) {
new (&storage) JsonbField(reinterpret_cast<const char*>(data), size);
which = Types::JSONB;
}

ALWAYS_INLINE void destroy() {
if (which < Types::MIN_NON_POD) {
return;
Expand Down
2 changes: 1 addition & 1 deletion be/src/vec/data_types/data_type_fixed_length_object.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ class DataTypeFixedLengthObject final : public IDataType {
return doris::FieldType::OLAP_FIELD_TYPE_NONE;
}

Field get_default() const override { return String(); }
Field get_default() const override { return Field(String()); }

[[noreturn]] Field get_field(const TExprNode& node) const override {
throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
Expand Down
2 changes: 1 addition & 1 deletion be/src/vec/data_types/data_type_jsonb.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ class DataTypeJsonb final : public IDataType {
DCHECK_EQ(node.node_type, TExprNodeType::JSON_LITERAL);
DCHECK(node.__isset.json_literal);
JsonBinaryValue value(node.json_literal.value);
return String(value.value(), value.size());
return Field(String(value.value(), value.size()));
}

bool equals(const IDataType& rhs) const override;
Expand Down
4 changes: 2 additions & 2 deletions be/src/vec/data_types/data_type_object.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,10 +81,10 @@ class DataTypeObject : public IDataType {

Field get_field(const TExprNode& node) const override {
if (node.__isset.string_literal) {
return node.string_literal.value;
return Field(node.string_literal.value);
}
if (node.node_type == TExprNodeType::NULL_LITERAL) {
return Field();
return {};
}
std::stringstream error_string;
node.printTo(error_string);
Expand Down
2 changes: 1 addition & 1 deletion be/src/vec/data_types/data_type_string.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ Status DataTypeString::from_string(ReadBuffer& rb, IColumn* column) const {
}

Field DataTypeString::get_default() const {
return String();
return Field(String());
}

MutableColumnPtr DataTypeString::create_column() const {
Expand Down
2 changes: 1 addition & 1 deletion be/src/vec/data_types/data_type_string.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ class DataTypeString : public IDataType {
Field get_field(const TExprNode& node) const override {
DCHECK_EQ(node.node_type, TExprNodeType::STRING_LITERAL);
DCHECK(node.__isset.string_literal);
return node.string_literal.value;
return Field(node.string_literal.value);
}

bool equals(const IDataType& rhs) const override;
Expand Down
5 changes: 3 additions & 2 deletions be/src/vec/data_types/serde/data_type_object_serde.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include "vec/common/assert_cast.h"
#include "vec/common/schema_util.h"
#include "vec/core/field.h"
#include "vec/core/types.h"

#ifdef __AVX2__
#include "util/jsonb_parser_simd.h"
Expand Down Expand Up @@ -117,11 +118,11 @@ void DataTypeObjectSerDe::read_one_cell_from_jsonb(IColumn& column, const JsonbV
Field field;
if (arg->isBinary()) {
const auto* blob = static_cast<const JsonbBlobVal*>(arg);
field.assign_jsonb(blob->getBlob(), blob->getBlobLen());
field = JsonbField(blob->getBlob(), blob->getBlobLen());
} else if (arg->isString()) {
// not a valid jsonb type, insert as string
const auto* str = static_cast<const JsonbStringVal*>(arg);
field.assign_string(str->getBlob(), str->getBlobLen());
field = Field(String(str->getBlob(), str->getBlobLen()));
} else {
throw doris::Exception(ErrorCode::INTERNAL_ERROR, "Invalid jsonb type");
}
Expand Down
2 changes: 1 addition & 1 deletion be/src/vec/json/parse2column.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ void parse_json_to_variant(IColumn& column, const char* src, size_t length,
}
// Treat as string
PathInData root_path;
Field field(src, length);
Field field(String(src, length));
result = ParseResult {{root_path}, {field}};
}
auto& [paths, values] = *result;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -289,8 +289,8 @@ TEST_F(IndexCompactionTest, write_index_test) {
auto columns = block.mutate_columns();
for (const auto& row : data[i]) {
vectorized::Field key = Int32(row.key);
vectorized::Field v1 = row.word;
vectorized::Field v2 = row.url;
vectorized::Field v1(row.word);
vectorized::Field v2(row.url);
vectorized::Field v3 = Int32(row.num);
columns[0]->insert(key);
columns[1]->insert(v1);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -582,8 +582,8 @@ TEST_F(IndexCompactionDeleteTest, delete_index_test) {
auto columns = block.mutate_columns();
for (const auto& row : data[i]) {
vectorized::Field key = Int32(row.key);
vectorized::Field v1 = row.word;
vectorized::Field v2 = row.url;
vectorized::Field v1(row.word);
vectorized::Field v2(row.url);
vectorized::Field v3 = Int32(row.num);
columns[0]->insert(key);
columns[1]->insert(v1);
Expand Down
2 changes: 1 addition & 1 deletion be/test/vec/aggregate_functions/agg_min_max_by_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ TEST_P(AggMinMaxByTest, min_max_by_test) {
min_pair.first = str_val;
min_pair.second = i;
}
column_vector_key_str->insert(cast_to_nearest_field_type(str_val));
column_vector_key_str->insert(Field(cast_to_nearest_field_type(str_val)));
}

// Prepare test function and parameters.
Expand Down
2 changes: 1 addition & 1 deletion be/test/vec/columns/column_hash_func_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ TEST(HashFuncTest, StructTypeTestWithSepcificValueCrcHash) {

Tuple t;
t.push_back(Int64(1));
t.push_back(String("hello"));
t.push_back(Field(String("hello")));

DataTypePtr a = std::make_shared<DataTypeStruct>(dataTypes);
std::cout << a->get_name() << std::endl;
Expand Down
2 changes: 1 addition & 1 deletion be/test/vec/columns/column_nullable_test.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ inline MutableColumnPtr create_nested_column(size_t input_rows_count) {
if constexpr (std::is_integral_v<T>) {
column->insert(rand() % std::numeric_limits<T>::max());
} else if constexpr (std::is_same_v<T, String>) {
column->insert(generate_random_string(rand() % 512));
column->insert(Field(generate_random_string(rand() % 512)));
} else if constexpr (std::is_same_v<T, Decimal64>) {
column->insert(Int64(rand() % std::numeric_limits<Int64>::max()));
} else {
Expand Down
Loading

0 comments on commit 55dbedf

Please sign in to comment.