From 44ddcb5a636f8b7bd02a1479db0cb0f6e7e0f205 Mon Sep 17 00:00:00 2001 From: smellthemoon <64083300+smellthemoon@users.noreply.github.com> Date: Fri, 25 Oct 2024 17:19:28 +0800 Subject: [PATCH] fix: not check has_value before get value in JSON (#37128) https://github.com/milvus-io/milvus/issues/36236 also: https://github.com/milvus-io/milvus/issues/37113 Signed-off-by: lixinguo Co-authored-by: lixinguo --- internal/core/src/common/FieldData.cpp | 4 ++- internal/core/src/common/FieldDataInterface.h | 12 +++++--- internal/core/src/storage/Event.cpp | 4 ++- internal/core/unittest/test_data_codec.cpp | 29 +++++++++++++++++++ 4 files changed, 43 insertions(+), 6 deletions(-) diff --git a/internal/core/src/common/FieldData.cpp b/internal/core/src/common/FieldData.cpp index af089fa4696e3..69015dd2743c0 100644 --- a/internal/core/src/common/FieldData.cpp +++ b/internal/core/src/common/FieldData.cpp @@ -100,7 +100,7 @@ FieldDataImpl::FillFieldData( if (element_count == 0) { return; } - null_count = array->null_count(); + null_count_ = array->null_count(); switch (data_type_) { case DataType::BOOL: { AssertInfo(array->type()->id() == arrow::Type::type::BOOL, @@ -195,6 +195,8 @@ FieldDataImpl::FillFieldData( return FillFieldData(values.data(), element_count); } case DataType::JSON: { + // The code here is not referenced. + // A subclass named FieldDataJsonImpl is implemented, which overloads this function. AssertInfo(array->type()->id() == arrow::Type::type::BINARY, "inconsistent data type"); auto json_array = diff --git a/internal/core/src/common/FieldDataInterface.h b/internal/core/src/common/FieldDataInterface.h index 72aff36da8b82..5703acce16106 100644 --- a/internal/core/src/common/FieldDataInterface.h +++ b/internal/core/src/common/FieldDataInterface.h @@ -484,7 +484,7 @@ class FieldDataImpl : public FieldDataBase { int64_t get_null_count() const override { std::shared_lock lck(tell_mutex_); - return null_count; + return null_count_; } bool @@ -507,7 +507,7 @@ class FieldDataImpl : public FieldDataBase { // number of elements data_ can hold int64_t num_rows_; mutable std::shared_mutex num_rows_mutex_; - int64_t null_count{0}; + int64_t null_count_{0}; // number of actual elements in data_ size_t length_{}; mutable std::shared_mutex tell_mutex_; @@ -616,6 +616,7 @@ class FieldDataJsonImpl : public FieldDataImpl { if (n == 0) { return; } + null_count_ = array->null_count(); std::lock_guard lck(tell_mutex_); if (length_ + n > get_num_rows()) { @@ -624,8 +625,11 @@ class FieldDataJsonImpl : public FieldDataImpl { auto i = 0; for (const auto& json : *array) { - data_[length_ + i] = Json(simdjson::padded_string(json.value())); - i++; + if (!json.has_value()) { + i++; + continue; + } + data_[length_ + i++] = Json(simdjson::padded_string(json.value())); } if (IsNullable()) { auto valid_data = array->null_bitmap_data(); diff --git a/internal/core/src/storage/Event.cpp b/internal/core/src/storage/Event.cpp index b76657de3fd12..b35c92c22c155 100644 --- a/internal/core/src/storage/Event.cpp +++ b/internal/core/src/storage/Event.cpp @@ -273,10 +273,12 @@ BaseEventData::Serialize() { auto string_view = static_cast(field_data->RawValue(offset)) ->data(); + auto size = + field_data->is_valid(offset) ? string_view.size() : -1; payload_writer->add_one_binary_payload( reinterpret_cast( std::string(string_view).c_str()), - string_view.size()); + size); } break; } diff --git a/internal/core/unittest/test_data_codec.cpp b/internal/core/unittest/test_data_codec.cpp index 7831454c6a03d..1e836eeb3aa93 100644 --- a/internal/core/unittest/test_data_codec.cpp +++ b/internal/core/unittest/test_data_codec.cpp @@ -765,3 +765,32 @@ TEST(storage, InsertDataStringArrayNullable) { ASSERT_EQ(*new_payload->ValidData(), *valid_data); delete[] valid_data; } + +TEST(storage, InsertDataJsonNullable) { + FixedVector data = {Json(), + Json(simdjson::padded_string(std::string("A")))}; + auto field_data = + milvus::storage::CreateFieldData(storage::DataType::JSON, true); + uint8_t* valid_data = new uint8_t[1]{0x00}; + field_data->FillFieldData(data.data(), valid_data, data.size()); + + storage::InsertData insert_data(field_data); + storage::FieldDataMeta field_data_meta{100, 101, 102, 103}; + insert_data.SetFieldDataMeta(field_data_meta); + insert_data.SetTimestamps(0, 100); + + auto serialized_bytes = insert_data.Serialize(storage::StorageType::Remote); + std::shared_ptr serialized_data_ptr(serialized_bytes.data(), + [&](uint8_t*) {}); + auto new_insert_data = storage::DeserializeFileData( + serialized_data_ptr, serialized_bytes.size()); + ASSERT_EQ(new_insert_data->GetCodecType(), storage::InsertDataType); + ASSERT_EQ(new_insert_data->GetTimeRage(), + std::make_pair(Timestamp(0), Timestamp(100))); + auto new_payload = new_insert_data->GetFieldData(); + ASSERT_EQ(new_payload->get_data_type(), storage::DataType::JSON); + ASSERT_EQ(new_payload->get_num_rows(), data.size()); + ASSERT_EQ(new_payload->get_null_count(), 2); + ASSERT_EQ(*new_payload->ValidData(), *valid_data); + delete[] valid_data; +}