Skip to content

Commit

Permalink
[cherry-pick](jsonb) add a check for jsonb value to avoid invalid jso…
Browse files Browse the repository at this point in the history
…nb value write into segment file (#48729)

…ke select core (#48625)

fix invalid jsonb value write into segment file which make select core,
so we add a check for jsonb value when convert_to_olap which value will
be written into segment file
  • Loading branch information
amorynan authored Mar 6, 2025
1 parent 4296e11 commit 7b2899a
Show file tree
Hide file tree
Showing 12 changed files with 287 additions and 26 deletions.
4 changes: 2 additions & 2 deletions be/src/util/jsonb_document.h
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ class JsonbDocument {
static JsonbDocument* makeDocument(char* pb, uint32_t size, const JsonbValue* rval);

// create an JsonbDocument object from JSONB packed bytes
static JsonbDocument* createDocument(const char* pb, uint32_t size);
static JsonbDocument* checkAndCreateDocument(const char* pb, size_t size);

// create an JsonbValue from JSONB packed bytes
static JsonbValue* createValue(const char* pb, uint32_t size);
Expand Down Expand Up @@ -1138,7 +1138,7 @@ inline JsonbDocument* JsonbDocument::makeDocument(char* pb, uint32_t size, const
return doc;
}

inline JsonbDocument* JsonbDocument::createDocument(const char* pb, uint32_t size) {
inline JsonbDocument* JsonbDocument::checkAndCreateDocument(const char* pb, size_t size) {
if (!pb || size < sizeof(JsonbHeader) + sizeof(JsonbValue)) {
return nullptr;
}
Expand Down
2 changes: 1 addition & 1 deletion be/src/util/jsonb_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ class JsonbToJson {

// get json string
const std::string to_json_string(const char* data, size_t size) {
JsonbDocument* pdoc = doris::JsonbDocument::createDocument(data, size);
JsonbDocument* pdoc = doris::JsonbDocument::checkAndCreateDocument(data, size);
if (!pdoc) {
LOG(FATAL) << "invalid json binary value: " << std::string_view(data, size);
}
Expand Down
3 changes: 2 additions & 1 deletion be/src/util/jsonb_writer.h
Original file line number Diff line number Diff line change
Expand Up @@ -479,7 +479,8 @@ class JsonbWriterT {

OS_TYPE* getOutput() { return os_; }
JsonbDocument* getDocument() {
return JsonbDocument::createDocument(getOutput()->getBuffer(), getOutput()->getSize());
return JsonbDocument::checkAndCreateDocument(getOutput()->getBuffer(),
getOutput()->getSize());
}

JsonbValue* getValue() {
Expand Down
2 changes: 1 addition & 1 deletion be/src/vec/exprs/table_function/vexplode_json_array.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ void VExplodeJsonArrayTableFunction<DataImpl>::process_row(size_t row_idx) {
StringRef text = _text_column->get_data_at(row_idx);
if (text.data != nullptr) {
if (WhichDataType(_text_datatype).is_json()) {
JsonbDocument* doc = JsonbDocument::createDocument(text.data, text.size);
JsonbDocument* doc = JsonbDocument::checkAndCreateDocument(text.data, text.size);
if (doc && doc->getValue() && doc->getValue()->isArray()) {
auto* a = (ArrayVal*)doc->getValue();
if (a->numElem() > 0) {
Expand Down
4 changes: 2 additions & 2 deletions be/src/vec/exprs/table_function/vexplode_json_object.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,8 @@ void VExplodeJsonObjectTableFunction::process_row(size_t row_idx) {

StringRef text = _json_object_column->get_data_at(row_idx);
if (text.data != nullptr) {
JsonbDocument* doc = JsonbDocument::createDocument(text.data, text.size);
if (UNLIKELY(!doc || !doc->getValue())) {
JsonbDocument* doc = JsonbDocument::checkAndCreateDocument(text.data, text.size);
if (!doc || !doc->getValue()) [[unlikely]] {
// error jsonb, put null into output, cur_size = 0 , we will insert_default
return;
}
Expand Down
4 changes: 2 additions & 2 deletions be/src/vec/functions/function_cast.h
Original file line number Diff line number Diff line change
Expand Up @@ -714,7 +714,7 @@ struct ConvertImplGenericFromJsonb {
const bool is_dst_string = is_string_or_fixed_string(data_type_to);
for (size_t i = 0; i < size; ++i) {
const auto& val = col_from_string->get_data_at(i);
JsonbDocument* doc = JsonbDocument::createDocument(val.data, val.size);
JsonbDocument* doc = JsonbDocument::checkAndCreateDocument(val.data, val.size);
if (UNLIKELY(!doc || !doc->getValue())) {
(*vec_null_map_to)[i] = 1;
col_to->insert_default();
Expand Down Expand Up @@ -862,7 +862,7 @@ struct ConvertImplFromJsonb {
}

// doc is NOT necessary to be deleted since JsonbDocument will not allocate memory
JsonbDocument* doc = JsonbDocument::createDocument(val.data, val.size);
JsonbDocument* doc = JsonbDocument::checkAndCreateDocument(val.data, val.size);
if (UNLIKELY(!doc || !doc->getValue())) {
null_map[i] = 1;
res[i] = 0;
Expand Down
17 changes: 9 additions & 8 deletions be/src/vec/functions/function_jsonb.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -557,7 +557,7 @@ class FunctionJsonbKeys : public IFunction {
continue;
}
const char* l_raw = reinterpret_cast<const char*>(&ldata[l_off]);
JsonbDocument* doc = JsonbDocument::createDocument(l_raw, l_size);
JsonbDocument* doc = JsonbDocument::checkAndCreateDocument(l_raw, l_size);
if (UNLIKELY(!doc || !doc->getValue())) {
dst_arr.clear();
return Status::InvalidArgument("jsonb data is invalid");
Expand Down Expand Up @@ -665,7 +665,7 @@ class FunctionJsonbExtractPath : public IFunction {
static ALWAYS_INLINE void inner_loop_impl(size_t i, Container& res, const char* l_raw_str,
int l_str_size, JsonbPath& path) {
// doc is NOT necessary to be deleted since JsonbDocument will not allocate memory
JsonbDocument* doc = JsonbDocument::createDocument(l_raw_str, l_str_size);
JsonbDocument* doc = JsonbDocument::checkAndCreateDocument(l_raw_str, l_str_size);
if (UNLIKELY(!doc || !doc->getValue())) {
return;
}
Expand Down Expand Up @@ -760,7 +760,7 @@ struct JsonbExtractStringImpl {
}

// doc is NOT necessary to be deleted since JsonbDocument will not allocate memory
JsonbDocument* doc = JsonbDocument::createDocument(l_raw, l_size);
JsonbDocument* doc = JsonbDocument::checkAndCreateDocument(l_raw, l_size);
if (UNLIKELY(!doc || !doc->getValue())) {
StringOP::push_null_string(i, res_data, res_offsets, null_map);
return;
Expand Down Expand Up @@ -886,7 +886,7 @@ struct JsonbExtractStringImpl {
writer->writeStartArray();

// doc is NOT necessary to be deleted since JsonbDocument will not allocate memory
JsonbDocument* doc = JsonbDocument::createDocument(l_raw, l_size);
JsonbDocument* doc = JsonbDocument::checkAndCreateDocument(l_raw, l_size);

for (size_t pi = 0; pi < rdata_columns.size(); ++pi) {
if (UNLIKELY(!doc || !doc->getValue())) {
Expand Down Expand Up @@ -1027,7 +1027,7 @@ struct JsonbExtractImpl {
}

// doc is NOT necessary to be deleted since JsonbDocument will not allocate memory
JsonbDocument* doc = JsonbDocument::createDocument(l_raw_str, l_str_size);
JsonbDocument* doc = JsonbDocument::checkAndCreateDocument(l_raw_str, l_str_size);
if (UNLIKELY(!doc || !doc->getValue())) {
null_map[i] = 1;
res[i] = 0;
Expand Down Expand Up @@ -1406,7 +1406,8 @@ struct JsonbLengthUtil {
}
auto jsonb_value = jsonb_data_column->get_data_at(i);
// doc is NOT necessary to be deleted since JsonbDocument will not allocate memory
JsonbDocument* doc = JsonbDocument::createDocument(jsonb_value.data, jsonb_value.size);
JsonbDocument* doc =
JsonbDocument::checkAndCreateDocument(jsonb_value.data, jsonb_value.size);
JsonbValue* value = doc->getValue()->findValue(path, nullptr);
if (UNLIKELY(!value)) {
null_map->get_data()[i] = 1;
Expand Down Expand Up @@ -1541,9 +1542,9 @@ struct JsonbContainsUtil {
}
// doc is NOT necessary to be deleted since JsonbDocument will not allocate memory
JsonbDocument* doc1 =
JsonbDocument::createDocument(jsonb_value1.data, jsonb_value1.size);
JsonbDocument::checkAndCreateDocument(jsonb_value1.data, jsonb_value1.size);
JsonbDocument* doc2 =
JsonbDocument::createDocument(jsonb_value2.data, jsonb_value2.size);
JsonbDocument::checkAndCreateDocument(jsonb_value2.data, jsonb_value2.size);

JsonbValue* value1 = doc1->getValue()->findValue(path, nullptr);
JsonbValue* value2 = doc2->getValue();
Expand Down
4 changes: 2 additions & 2 deletions be/src/vec/jsonb/serialize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ void JsonbSerializeUtil::jsonb_to_block(const DataTypeSerDeSPtrs& serdes, const
const std::unordered_map<uint32_t, uint32_t>& col_id_to_idx,
Block& dst,
const std::vector<std::string>& default_values) {
auto pdoc = JsonbDocument::createDocument(data, size);
auto pdoc = JsonbDocument::checkAndCreateDocument(data, size);
JsonbDocument& doc = *pdoc;
size_t num_rows = dst.rows();
size_t filled_columns = 0;
Expand Down Expand Up @@ -120,4 +120,4 @@ void JsonbSerializeUtil::jsonb_to_block(const DataTypeSerDeSPtrs& serdes, const
}
}

} // namespace doris::vectorized
} // namespace doris::vectorized
23 changes: 19 additions & 4 deletions be/src/vec/olap/olap_data_convertor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ OlapBlockDataConvertor::create_olap_column_data_convertor(const TabletColumn& co
return std::make_unique<OlapColumnDataConvertorDecimalV3<Decimal256>>();
}
case FieldType::OLAP_FIELD_TYPE_JSONB: {
return std::make_unique<OlapColumnDataConvertorVarChar>(true);
return std::make_unique<OlapColumnDataConvertorVarChar>(true, true);
}
case FieldType::OLAP_FIELD_TYPE_BOOL: {
return std::make_unique<OlapColumnDataConvertorSimple<vectorized::UInt8>>();
Expand Down Expand Up @@ -204,7 +204,10 @@ OlapBlockDataConvertor::create_olap_column_data_convertor(const TabletColumn& co
void OlapBlockDataConvertor::set_source_content(const vectorized::Block* block, size_t row_pos,
size_t num_rows) {
DCHECK(block && num_rows > 0 && row_pos + num_rows <= block->rows() &&
block->columns() == _convertors.size());
block->columns() == _convertors.size())
<< "block=" << block->dump_structure() << ", block rows=" << block->rows()
<< ", row_pos=" << row_pos << ", num_rows=" << num_rows
<< ", convertors.size=" << _convertors.size();
size_t cid = 0;
for (const auto& typed_column : *block) {
if (typed_column.column->size() != block->rows()) {
Expand Down Expand Up @@ -601,8 +604,8 @@ Status OlapBlockDataConvertor::OlapColumnDataConvertorChar::convert_to_olap() {

// class OlapBlockDataConvertor::OlapColumnDataConvertorVarChar
OlapBlockDataConvertor::OlapColumnDataConvertorVarChar::OlapColumnDataConvertorVarChar(
bool check_length)
: _check_length(check_length) {}
bool check_length, bool is_jsonb)
: _check_length(check_length), _is_jsonb(is_jsonb) {}

void OlapBlockDataConvertor::OlapColumnDataConvertorVarChar::set_source_column(
const ColumnWithTypeAndName& typed_column, size_t row_pos, size_t num_rows) {
Expand Down Expand Up @@ -646,6 +649,12 @@ Status OlapBlockDataConvertor::OlapColumnDataConvertorVarChar::convert_to_olap(
"Not support string len over than "
"`string_type_length_soft_limit_bytes` in vec engine.");
}
// Make sure that the json binary data written in is the correct jsonb value.
if (_is_jsonb &&
!doris::JsonbDocument::checkAndCreateDocument(slice->data, slice->size)) {
return Status::InvalidArgument("invalid json binary value: {}",
std::string_view(slice->data, slice->size));
}
} else {
// TODO: this may not be necessary, check and remove later
slice->data = nullptr;
Expand All @@ -667,6 +676,12 @@ Status OlapBlockDataConvertor::OlapColumnDataConvertorVarChar::convert_to_olap(
"Not support string len over than `string_type_length_soft_limit_bytes`"
" in vec engine.");
}
// Make sure that the json binary data written in is the correct jsonb value.
if (_is_jsonb &&
!doris::JsonbDocument::checkAndCreateDocument(slice->data, slice->size)) {
return Status::InvalidArgument("invalid json binary value: {}",
std::string_view(slice->data, slice->size));
}
string_offset = *offset_cur;
++slice;
++offset_cur;
Expand Down
4 changes: 3 additions & 1 deletion be/src/vec/olap/olap_data_convertor.h
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ class OlapBlockDataConvertor {

class OlapColumnDataConvertorVarChar : public OlapColumnDataConvertorBase {
public:
OlapColumnDataConvertorVarChar(bool check_length);
OlapColumnDataConvertorVarChar(bool check_length, bool is_jsonb = false);
~OlapColumnDataConvertorVarChar() override = default;

void set_source_column(const ColumnWithTypeAndName& typed_column, size_t row_pos,
Expand All @@ -209,6 +209,8 @@ class OlapBlockDataConvertor {

private:
bool _check_length;
bool _is_jsonb =
false; // Make sure that the json binary data written in is the correct jsonb value.
PaddedPODArray<Slice> _slice;
};

Expand Down
4 changes: 2 additions & 2 deletions be/test/vec/data_types/serde/data_type_serde_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ TEST(DataTypeSerDeTest, DataTypeRowStoreSerDeTest) {
jsonb_column->insert_data(jsonb_writer.getOutput()->getBuffer(),
jsonb_writer.getOutput()->getSize());
StringRef jsonb_data = jsonb_column->get_data_at(0);
auto pdoc = JsonbDocument::createDocument(jsonb_data.data, jsonb_data.size);
auto pdoc = JsonbDocument::checkAndCreateDocument(jsonb_data.data, jsonb_data.size);
JsonbDocument& doc = *pdoc;
for (auto it = doc->begin(); it != doc->end(); ++it) {
serde->read_one_cell_from_jsonb(*vec, it->value());
Expand Down Expand Up @@ -270,7 +270,7 @@ TEST(DataTypeSerDeTest, DataTypeRowStoreSerDeTest) {
jsonb_column->insert_data(jsonb_writer.getOutput()->getBuffer(),
jsonb_writer.getOutput()->getSize());
StringRef jsonb_data = jsonb_column->get_data_at(0);
auto pdoc = JsonbDocument::createDocument(jsonb_data.data, jsonb_data.size);
auto pdoc = JsonbDocument::checkAndCreateDocument(jsonb_data.data, jsonb_data.size);
JsonbDocument& doc = *pdoc;
for (auto it = doc->begin(); it != doc->end(); ++it) {
serde->read_one_cell_from_jsonb(*vec, it->value());
Expand Down
Loading

0 comments on commit 7b2899a

Please sign in to comment.