From 56c0d88d6b0d563d8b2197b4812911b3f26343bb Mon Sep 17 00:00:00 2001 From: Nick Crews Date: Thu, 6 Feb 2025 21:29:40 -0900 Subject: [PATCH] [C++]: support casting nullable fields to non-nullable if there are no null values Fixes https://github.com/apache/arrow/issues/33592 --- .../compute/kernels/scalar_cast_nested.cc | 25 ++++++++++------- .../arrow/compute/kernels/scalar_cast_test.cc | 27 ++++++++++--------- 2 files changed, 30 insertions(+), 22 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc b/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc index 0033d89fc40fc..58fbcd64c6084 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc @@ -363,10 +363,6 @@ struct CastStruct { const auto& in_field = in_type.field(in_field_index); // If there are more in_fields check if they match the out_field. if (in_field->name() == out_field->name()) { - if (in_field->nullable() && !out_field->nullable()) { - return Status::TypeError("cannot cast nullable field to non-nullable field: ", - in_type.ToString(), " ", out_type.ToString()); - } // Found matching in_field and out_field. fields_to_select[out_field_index++] = in_field_index; // Using the same in_field for multiple out_fields is not allowed. @@ -403,17 +399,26 @@ struct CastStruct { } int out_field_index = 0; - for (int field_index : fields_to_select) { - const auto& target_type = out->type()->field(out_field_index++)->type(); - if (field_index == kFillNullSentinel) { + for (int in_field_index : fields_to_select) { + const auto& in_field = in_type.field(in_field_index); + const auto& out_field = out_type.field(out_field_index++); + const auto& in_field_type = in_field->type(); + const auto& out_field_type = out_field->type(); + if (in_field_index == kFillNullSentinel) { ARROW_ASSIGN_OR_RAISE(auto nulls, - MakeArrayOfNull(target_type->GetSharedPtr(), batch.length)); + MakeArrayOfNull(out_field_type->GetSharedPtr(), batch.length)); out_array->child_data.push_back(nulls->data()); } else { - const auto& values = (in_array.child_data[field_index].ToArrayData()->Slice( + const auto& in_values = (in_array.child_data[in_field_index].ToArrayData()->Slice( in_array.offset, in_array.length)); + if (in_field->nullable() && !out_field->nullable() && in_values->GetNullCount() > 0) { + return Status::Invalid("field '", in_field->name(), + "' of type ", in_field_type->ToString(), + " has nulls. Can't cast to field '", out_field->name(), + "' of non-nullable type ", out_field_type->ToString()); + } ARROW_ASSIGN_OR_RAISE(Datum cast_values, - Cast(values, target_type, options, ctx->exec_context())); + Cast(in_values, out_field_type, options, ctx->exec_context())); DCHECK(cast_values.is_array()); out_array->child_data.push_back(cast_values.array()); } diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc index 33a01425508e0..93857170836ae 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc @@ -2943,7 +2943,7 @@ TEST(Cast, StructToDifferentNullabilityStruct) { CheckCast(src_non_nullable, dest3_nullable); } { - // But NOT OK to go from nullable to non-nullable... + // But when going from nullable to non-nullable, all data must be non-null... std::vector> fields_src_nullable = { std::make_shared("a", int8(), true), std::make_shared("b", int8(), true), @@ -2964,8 +2964,9 @@ TEST(Cast, StructToDifferentNullabilityStruct) { const auto dest1_non_nullable = arrow::struct_(fields_dest1_non_nullable); const auto options1_non_nullable = CastOptions::Safe(dest1_non_nullable); EXPECT_RAISES_WITH_MESSAGE_THAT( - TypeError, - ::testing::HasSubstr("cannot cast nullable field to non-nullable field"), + Invalid, + ::testing::HasSubstr( + "field 'a' of type int64 has nulls. Can't cast to field 'a' of non-nullable type int64"), Cast(src_nullable, options1_non_nullable)); std::vector> fields_dest2_non_nullable = { @@ -2974,18 +2975,20 @@ TEST(Cast, StructToDifferentNullabilityStruct) { const auto dest2_non_nullable = arrow::struct_(fields_dest2_non_nullable); const auto options2_non_nullable = CastOptions::Safe(dest2_non_nullable); EXPECT_RAISES_WITH_MESSAGE_THAT( - TypeError, - ::testing::HasSubstr("cannot cast nullable field to non-nullable field"), + Invalid, + ::testing::HasSubstr( + "field 'a' of type int64 has nulls. Can't cast to field 'a' of non-nullable type int64"), Cast(src_nullable, options2_non_nullable)); - std::vector> fields_dest3_non_nullable = { + std::shared_ptr c_dest_no_nulls; + c_dest_no_nulls = ArrayFromJSON(int64(), "[9, 11, 44]"); + std::vector> fields_dest_no_nulls = { std::make_shared("c", int64(), false)}; - const auto dest3_non_nullable = arrow::struct_(fields_dest3_non_nullable); - const auto options3_non_nullable = CastOptions::Safe(dest3_non_nullable); - EXPECT_RAISES_WITH_MESSAGE_THAT( - TypeError, - ::testing::HasSubstr("cannot cast nullable field to non-nullable field"), - Cast(src_nullable, options3_non_nullable)); + ASSERT_OK_AND_ASSIGN(auto dest_no_nulls, + StructArray::Make({c_dest_no_nulls}, fields_dest_no_nulls)); + const auto options3_non_nullable = + CastOptions::Safe(arrow::struct_(fields_dest_no_nulls)); + CheckCast(src_nullable, dest_no_nulls, options3_non_nullable); } }