Skip to content

Commit

Permalink
[Fix](Variant) fix variant serialize to string
Browse files Browse the repository at this point in the history
in rare cases variant root will be empty string "", but the subcolumns may not all be null value
so we should not ignore to treat subcolumns's values
  • Loading branch information
eldenmoon committed Jan 16, 2025
1 parent 3df6086 commit 3a2ef3b
Show file tree
Hide file tree
Showing 11 changed files with 84 additions and 22 deletions.
34 changes: 30 additions & 4 deletions be/src/vec/columns/column_object.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1479,20 +1479,46 @@ Status ColumnObject::serialize_one_row_to_json_format(size_t row, rapidjson::Str
root.CopyFrom(*doc_structure, doc_structure->GetAllocator());
}
Arena mem_pool;

bool serialize_root = true; // Assume all subcolumns are null by default
for (const auto& subcolumn : subcolumns) {
if (subcolumn->data.is_root) {
continue; // Skip the root column
}

// If any non-root subcolumn is NOT null, set serialize_root to false and exit early
if (!assert_cast<const ColumnNullable&, TypeCheckOnRelease::DISABLE>(
*subcolumn->data.get_finalized_column_ptr())
.is_null_at(row)) {
serialize_root = false;
break;
}
}
#ifndef NDEBUG
VLOG_DEBUG << "dump structure " << JsonFunctions::print_json_value(*doc_structure);
#endif
if (serialize_root && subcolumns.get_root()->is_scalar()) {
// only serialize root when all other subcolumns is null
RETURN_IF_ERROR(
subcolumns.get_root()->data.get_least_common_type_serde()->write_one_cell_to_json(
subcolumns.get_root()->data.get_finalized_column(), root,
doc_structure->GetAllocator(), mem_pool, row));
output->Clear();
rapidjson::Writer<rapidjson::StringBuffer> writer(*output);
root.Accept(writer);
return Status::OK();
}
// handle subcolumns exclude root node
for (const auto& subcolumn : subcolumns) {
if (subcolumn->data.is_root) {
continue;
}
RETURN_IF_ERROR(find_and_set_leave_value(
subcolumn->data.get_finalized_column_ptr().get(), subcolumn->path,
subcolumn->data.get_least_common_type_serde(),
subcolumn->data.get_least_common_type(),
subcolumn->data.least_common_type.get_base_type_id(), root,
doc_structure->GetAllocator(), mem_pool, row));
if (subcolumn->path.empty() && !root.IsObject()) {
// root was modified, only handle root node
break;
}
}
compact_null_values(root, doc_structure->GetAllocator());
if (root.IsNull() && is_null != nullptr) {
Expand Down
13 changes: 13 additions & 0 deletions be/src/vec/data_types/serde/data_type_nothing_serde.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,19 @@ class DataTypeNothingSerde : public DataTypeSerDe {
return Status::NotSupported("deserialize_column_from_text_vector with type " +
column.get_name());
}
Status write_one_cell_to_json(const IColumn& column, rapidjson::Value& result,
rapidjson::Document::AllocatorType& allocator, Arena& mem_pool,
int64_t row_num) const override {
result.SetNull();
return Status::OK();
}

Status read_one_cell_from_json(IColumn& column, const rapidjson::Value& result) const override {
if (result.IsNull()) {
column.insert_default();
}
return Status::OK();
}

Status write_column_to_pb(const IColumn& column, PValues& result, int64_t start,
int64_t end) const override {
Expand Down

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions regression-test/data/variant_p0/agg.out
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
-- !sql7 --
1 {"a":1,"b":{"c":[{"a":1}]}} 59
1022 {"a":1,"b":{"f":17034,"g":1.111}} 12
1029 \N 12
1029 {"a":1,"b":{"c":1}} 12
1999 {"a":1,"b":{"c":1}} 11

-- !sql8 --
Expand All @@ -48,7 +48,7 @@
11 [123] 11
12 [123.2] 12
1022 {"a":1,"b":{"f":17034,"g":1.111}} 12
1029 \N 12
1029 {"a":1,"b":{"c":1}} 12
1999 {"a":1,"b":{"c":1}} 11
19921 {"a":1,"d":10} 11

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -225,8 +225,8 @@
{"c":[{"a":1}]} [{"a":1}]
{"c":[{"a":1}]} [{"a":1}]
{"c":1} 1
{} \N
{} \N
null \N
null \N

-- !sql_11 --
1 {"x":[1]}
Expand Down Expand Up @@ -329,8 +329,8 @@
{"c":[{"a":1}]} [{"a":1}]
{"c":[{"a":1}]} [{"a":1}]
{"c":1} 1
{} \N
{} \N
null \N
null \N

-- !sql_11 --
1 {"x":[1]}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ suite("regression_test_variant_github_events_p2", "nonConcurrent,p2"){
if (!isCloudMode()) {
// BUILD INDEX and expect state is FINISHED
sql """ BUILD INDEX idx_var ON github_events"""
state = wait_for_last_build_index_on_table_finish("github_events", timeout)
def state = wait_for_last_build_index_on_table_finish("github_events", timeout)
assertEquals("FINISHED", state)
}

Expand Down
8 changes: 4 additions & 4 deletions regression-test/suites/variant_log_data_p2/load.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -68,25 +68,25 @@ suite("regression_test_variant_logdata", "nonConcurrent,p2"){
"""
}
// 12. streamload remote file
table_name = "logdata"
def table_name = "logdata"
create_table.call(table_name, "DUPLICATE", "4")
// sql "set enable_two_phase_read_opt = false;"
// no sparse columns
set_be_config.call("variant_ratio_of_defaults_as_sparse_column", "1.0")
set_be_config.call("variant_ratio_of_defaults_as_sparse_column", "1")
load_json_data.call(table_name, """${getS3Url() + '/regression/load/logdata.json'}""")
qt_sql_32 """ select json_extract(v, "\$.json.parseFailed") from logdata where json_extract(v, "\$.json.parseFailed") != 'null' order by k limit 1;"""
qt_sql_32_1 """select cast(v['json']['parseFailed'] as string) from logdata where cast(v['json']['parseFailed'] as string) is not null and k = 162 limit 1;"""
sql "truncate table ${table_name}"

// 0.95 default ratio
set_be_config.call("variant_ratio_of_defaults_as_sparse_column", "0.95")
set_be_config.call("variant_ratio_of_defaults_as_sparse_column", "1")
load_json_data.call(table_name, """${getS3Url() + '/regression/load/logdata.json'}""")
qt_sql_33 """ select json_extract(v,"\$.json.parseFailed") from logdata where json_extract(v,"\$.json.parseFailed") != 'null' order by k limit 1;"""
qt_sql_33_1 """select cast(v['json']['parseFailed'] as string) from logdata where cast(v['json']['parseFailed'] as string) is not null and k = 162 limit 1;"""
sql "truncate table ${table_name}"

// always sparse column
set_be_config.call("variant_ratio_of_defaults_as_sparse_column", "0.95")
set_be_config.call("variant_ratio_of_defaults_as_sparse_column", "1")
load_json_data.call(table_name, """${getS3Url() + '/regression/load/logdata.json'}""")
qt_sql_34 """ select json_extract(v, "\$.json.parseFailed") from logdata where json_extract(v,"\$.json.parseFailed") != 'null' order by k limit 1;"""
sql "truncate table ${table_name}"
Expand Down
8 changes: 4 additions & 4 deletions regression-test/suites/variant_p0/agg.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ suite("regression_test_variant_agg"){
)
AGGREGATE KEY(`k`)
DISTRIBUTED BY HASH(k) BUCKETS 4
properties("replication_num" = "1", "disable_auto_compaction" = "false");
properties("replication_num" = "1", "disable_auto_compaction" = "true");
"""
sql """insert into var_agg values (1, '[1]', 1),(1, '{"a" : 1}', 1);"""
sql """insert into var_agg values (2, '[2]', 2),(1, '{"a" : [[[1]]]}', 2);"""
Expand All @@ -42,10 +42,10 @@ suite("regression_test_variant_agg"){
qt_sql1 "select k, cast(v['a'] as array<int>) from var_agg where size(cast(v['a'] as array<int>)) > 0 order by k, cast(v['a'] as string) asc"
qt_sql2 "select k, cast(v as int), cast(v['b'] as string) from var_agg where length(cast(v['b'] as string)) > 4 order by k, cast(v as string), cast(v['b'] as string) "
qt_sql3 "select k, v from var_agg order by k, cast(v as string) limit 5"
qt_sql4 "select v['b'], v['b']['c'], cast(v as int) from var_agg where cast(v['b'] as string) is not null and cast(v['b'] as string) != '{}' order by k,cast(v as string) desc limit 10000;"
qt_sql4 "select v['b'], v['b']['c'], cast(v as int) from var_agg where cast(v['b'] as string) is not null and length(v['b']) >4 order by k,cast(v as string) desc limit 10000;"
qt_sql5 "select v['b'] from var_agg where cast(v['b'] as int) > 0;"
qt_sql6 "select cast(v['b'] as string) from var_agg where cast(v['b'] as string) is not null and cast(v['b'] as string) != '{}' order by k, cast(v['b'] as string) "
qt_sql7 "select * from var_agg where cast(v['b'] as string) is not null and cast(v['b'] as string) != '{}' order by k, cast(v['b'] as string) "
qt_sql6 "select cast(v['b'] as string) from var_agg where cast(v['b'] as string) is not null and length(v['b']) >4 order by k, cast(v['b'] as string) "
qt_sql7 "select * from var_agg where cast(v['b'] as string) is not null and length(v['b']) >4 order by k, cast(v['b'] as string) "
qt_sql8 "select * from var_agg order by 1, cast(2 as string), 3"
sql "alter table var_agg drop column s"
sql """insert into var_agg select 5, '{"a" : 1234, "xxxx" : "fffff", "point" : 42000}' as json_str
Expand Down
2 changes: 1 addition & 1 deletion regression-test/suites/variant_p0/delete_update.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ suite("regression_test_variant_delete_and_update", "variant_type"){
)
UNIQUE KEY(`k`)
DISTRIBUTED BY HASH(k) BUCKETS 4
properties("replication_num" = "1", "enable_unique_key_merge_on_write" = "true", "store_row_column" = "true");
properties("replication_num" = "1", "enable_unique_key_merge_on_write" = "true");
"""
sql "insert into var_delete_update_mow select k, cast(v as string), cast(v as string) from var_delete_update"
sql "delete from ${table_name} where k = 1"
Expand Down
2 changes: 1 addition & 1 deletion regression-test/suites/variant_p0/desc.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,6 @@ suite("regression_test_variant_desc", "nonConcurrent"){
sql "desc large_tablets"
} finally {
// reset flags
set_be_config.call("variant_ratio_of_defaults_as_sparse_column", "0.95")
set_be_config.call("variant_ratio_of_defaults_as_sparse_column", "1")
}
}
2 changes: 1 addition & 1 deletion regression-test/suites/variant_p2/load.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,6 @@ suite("load_p2", "variant_type,p2"){
qt_sql("select count() from github_events")
} finally {
// reset flags
set_be_config.call("variant_ratio_of_defaults_as_sparse_column", "0.95")
// set_be_config.call("variant_ratio_of_defaults_as_sparse_column", "0.95")
}
}

0 comments on commit 3a2ef3b

Please sign in to comment.