From 4c00445a4eff8a48fb2fcd63ee952100834cfde3 Mon Sep 17 00:00:00 2001 From: Johan Mabille Date: Fri, 18 Oct 2024 09:13:19 +0200 Subject: [PATCH 1/7] Fixed copy semantic of non composed layouts --- .../layout/dictionary_encoded_array.hpp | 56 +++++++++++++++++++ src/arrow_array_schema_proxy.cpp | 2 +- test/test_dictionary_encoded_array.cpp | 23 ++++++++ test/test_null_array.cpp | 27 +++++++++ test/test_primitive_array.cpp | 27 +++++++++ test/test_variable_size_binary_array.cpp | 23 ++++++++ 6 files changed, 157 insertions(+), 1 deletion(-) diff --git a/include/sparrow/layout/dictionary_encoded_array.hpp b/include/sparrow/layout/dictionary_encoded_array.hpp index b5b2cf19..0028cdea 100644 --- a/include/sparrow/layout/dictionary_encoded_array.hpp +++ b/include/sparrow/layout/dictionary_encoded_array.hpp @@ -104,6 +104,12 @@ namespace sparrow explicit dictionary_encoded_array(arrow_proxy); + dictionary_encoded_array(const self_type&); + self_type& operator=(const self_type&); + + dictionary_encoded_array(self_type&&); + self_type& operator=(self_type&&); + size_type size() const; const_reference operator[](size_type i) const; @@ -139,6 +145,9 @@ namespace sparrow friend class array_wrapper_impl; }; + template + bool operator==(const dictionary_encoded_array& lhs, const dictionary_encoded_array& rhs); + /******************************************* * dictionary_encoded_array implementation * *******************************************/ @@ -152,6 +161,47 @@ namespace sparrow SPARROW_ASSERT_TRUE(data_type_is_integer(m_proxy.data_type())); } + template + dictionary_encoded_array::dictionary_encoded_array(const self_type& rhs) + : m_proxy(rhs.m_proxy) + , m_keys_layout(create_keys_layout(m_proxy)) + , p_values_layout(create_values_layout(m_proxy)) + { + } + + template + auto dictionary_encoded_array::operator=(const self_type& rhs) -> self_type& + { + if (this != &rhs) + { + m_proxy = rhs.m_proxy; + m_keys_layout = create_keys_layout(m_proxy); + p_values_layout = create_values_layout(m_proxy); + } + return *this; + } + + template + dictionary_encoded_array::dictionary_encoded_array(self_type&& rhs) + : m_proxy(std::move(rhs.m_proxy)) + , m_keys_layout(create_keys_layout(m_proxy)) + , p_values_layout(create_values_layout(m_proxy)) + { + } + + template + auto dictionary_encoded_array::operator=(self_type&& rhs) -> self_type& + { + if (this != &rhs) + { + using std::swap; + swap(m_proxy, rhs.m_proxy); + m_keys_layout = create_keys_layout(m_proxy); + p_values_layout = create_values_layout(m_proxy); + } + return *this; + } + template auto dictionary_encoded_array::size() const -> size_type { @@ -257,4 +307,10 @@ namespace sparrow { return m_proxy; } + + template + bool operator==(const dictionary_encoded_array& lhs, const dictionary_encoded_array& rhs) + { + return std::ranges::equal(lhs, rhs); + } } diff --git a/src/arrow_array_schema_proxy.cpp b/src/arrow_array_schema_proxy.cpp index 56ccd586..41ad516d 100644 --- a/src/arrow_array_schema_proxy.cpp +++ b/src/arrow_array_schema_proxy.cpp @@ -160,7 +160,7 @@ namespace sparrow { other.m_array = {}; other.m_schema = {}; - other.m_buffers.clear(); + other.reset(); update_buffers(); update_children(); update_dictionary(); diff --git a/test/test_dictionary_encoded_array.cpp b/test/test_dictionary_encoded_array.cpp index 869f0817..ae9c3697 100644 --- a/test/test_dictionary_encoded_array.cpp +++ b/test/test_dictionary_encoded_array.cpp @@ -71,6 +71,29 @@ namespace sparrow CHECK_NOTHROW(layout_type{make_arrow_proxy()}); } + TEST_CASE("copy") + { + layout_type ar(make_arrow_proxy()); + layout_type ar2(ar); + CHECK_EQ(ar, ar2); + + layout_type ar3(make_arrow_proxy()); + ar3 = ar; + CHECK_EQ(ar, ar3); + } + + TEST_CASE("move") + { + layout_type ar(make_arrow_proxy()); + layout_type ar2(ar); + layout_type ar3(std::move(ar)); + CHECK_EQ(ar2, ar3); + + layout_type ar4(make_arrow_proxy()); + ar4 = std::move(ar3); + CHECK_EQ(ar2, ar4); + } + TEST_CASE("size") { const layout_type dict(make_arrow_proxy()); diff --git a/test/test_null_array.cpp b/test/test_null_array.cpp index d8bb6437..4a1287e6 100644 --- a/test/test_null_array.cpp +++ b/test/test_null_array.cpp @@ -34,6 +34,33 @@ namespace sparrow CHECK_EQ(ar.size(), size); } + TEST_CASE("copy") + { + constexpr std::size_t size = 10u; + null_array ar(make_arrow_proxy(size)); + null_array ar2(ar); + CHECK_EQ(ar, ar2); + + null_array ar3(make_arrow_proxy(size + 2u)); + CHECK_NE(ar, ar3); + ar3 = ar; + CHECK_EQ(ar, ar3); + } + + TEST_CASE("move") + { + constexpr std::size_t size = 10u; + null_array ar(make_arrow_proxy(size)); + null_array ar2(ar); + null_array ar3(std::move(ar)); + CHECK_EQ(ar3, ar2); + + null_array ar4(make_arrow_proxy(size + 3u)); + CHECK_NE(ar4, ar2); + ar4 = std::move(ar3); + CHECK_EQ(ar2, ar4); + } + TEST_CASE("operator[]") { constexpr std::size_t size = 10u; diff --git a/test/test_primitive_array.cpp b/test/test_primitive_array.cpp index e84b4dff..63393552 100644 --- a/test/test_primitive_array.cpp +++ b/test/test_primitive_array.cpp @@ -35,6 +35,33 @@ namespace sparrow CHECK_EQ(ar.size(), size - offset); } + TEST_CASE("copy") + { + array_test_type ar(make_arrow_proxy(size, offset)); + array_test_type ar2(ar); + + CHECK_EQ(ar, ar2); + + array_test_type ar3(make_arrow_proxy(size + 3u, offset)); + CHECK_NE(ar, ar3); + ar3 = ar; + CHECK_EQ(ar, ar3); + } + + TEST_CASE("move") + { + array_test_type ar(make_arrow_proxy(size, offset)); + array_test_type ar2(ar); + + array_test_type ar3(std::move(ar)); + CHECK_EQ(ar2, ar3); + + array_test_type ar4(make_arrow_proxy(size + 3u, offset)); + CHECK_NE(ar2, ar4); + ar4 = std::move(ar2); + CHECK_EQ(ar3, ar4); + } + TEST_CASE("const operator[]") { auto pr = make_arrow_proxy(size, offset); diff --git a/test/test_variable_size_binary_array.cpp b/test/test_variable_size_binary_array.cpp index 7cbcdd0c..0f677823 100644 --- a/test/test_variable_size_binary_array.cpp +++ b/test/test_variable_size_binary_array.cpp @@ -71,6 +71,29 @@ namespace sparrow } } + TEST_CASE_FIXTURE(variable_size_binary_fixture, "copy") + { + layout_type ar(m_arrow_proxy); + layout_type ar2(ar); + CHECK_EQ(ar, ar2); + + layout_type ar3(std::move(m_arrow_proxy)); + ar3 = ar2; + CHECK_EQ(ar2, ar3); + } + + TEST_CASE_FIXTURE(variable_size_binary_fixture, "move") + { + layout_type ar(m_arrow_proxy); + layout_type ar2(ar); + layout_type ar3(std::move(ar)); + CHECK_EQ(ar2, ar3); + + layout_type ar4(std::move(m_arrow_proxy)); + ar4 = std::move(ar3); + CHECK_EQ(ar2, ar4); + } + TEST_CASE_FIXTURE(variable_size_binary_fixture, "size") { const layout_type array(std::move(m_arrow_proxy)); From a3a8a7622fb850841c91b3a659efbb6e34e1281e Mon Sep 17 00:00:00 2001 From: Johan Mabille Date: Mon, 21 Oct 2024 13:18:39 +0200 Subject: [PATCH 2/7] Fixed copy semantic of list arrays --- .../arrow_array_schema_utils.hpp | 3 + .../sparrow/layout/list_layout/list_array.hpp | 143 ++++++++++++++++-- test/external_array_data_creation.cpp | 27 ++-- test/external_array_data_creation.hpp | 12 +- test/test_list_array.cpp | 132 ++++++++++++---- 5 files changed, 251 insertions(+), 66 deletions(-) diff --git a/include/sparrow/arrow_interface/arrow_array_schema_utils.hpp b/include/sparrow/arrow_interface/arrow_array_schema_utils.hpp index 9c724885..28ad2917 100644 --- a/include/sparrow/arrow_interface/arrow_array_schema_utils.hpp +++ b/include/sparrow/arrow_interface/arrow_array_schema_utils.hpp @@ -62,6 +62,9 @@ namespace sparrow child->release(child); } } + // TODO: We assume Arrow structures allocated inside sparrow always use operator new + //delete child; + //t.children[i] = nullptr; } delete[] t.children; t.children = nullptr; diff --git a/include/sparrow/layout/list_layout/list_array.hpp b/include/sparrow/layout/list_layout/list_array.hpp index 3b5e492f..28c6a34c 100644 --- a/include/sparrow/layout/list_layout/list_array.hpp +++ b/include/sparrow/layout/list_layout/list_array.hpp @@ -127,11 +127,16 @@ namespace sparrow using const_reference = nullable; using iterator_tag = typename base_type::iterator_tag; - explicit list_array_crtp_base(arrow_proxy proxy); - const array_wrapper* raw_flat_array() const; array_wrapper* raw_flat_array(); + protected: + + explicit list_array_crtp_base(arrow_proxy proxy); + + list_array_crtp_base(const self_type&); + list_array_crtp_base& operator=(const self_type&); + private: using list_size_type = inner_types::list_size_type; @@ -144,6 +149,8 @@ namespace sparrow inner_reference value(size_type i); inner_const_reference value(size_type i) const; + cloning_ptr make_flat_array(); + // data members cloning_ptr p_flat_array; @@ -169,11 +176,16 @@ namespace sparrow explicit list_array_impl(arrow_proxy proxy); + list_array_impl(const self_type&); + list_array_impl& operator=(const self_type&); + private: static constexpr std::size_t OFFSET_BUFFER_INDEX = 1; std::pair offset_range(size_type i) const; + offset_type* make_list_offsets(); + offset_type* p_list_offsets; // friend classes @@ -195,12 +207,18 @@ namespace sparrow explicit list_view_array_impl(arrow_proxy proxy); + list_view_array_impl(const self_type&); + list_view_array_impl& operator=(const self_type&); + private: static constexpr std::size_t OFFSET_BUFFER_INDEX = 1; static constexpr std::size_t SIZES_BUFFER_INDEX = 2; std::pair offset_range(size_type i) const; + offset_type* make_list_offsets(); + offset_type* make_list_sizes(); + offset_type* p_list_offsets; offset_type* p_list_sizes; @@ -222,6 +240,9 @@ namespace sparrow explicit fixed_sized_list_array(arrow_proxy proxy); + fixed_sized_list_array(const self_type&) = default; + fixed_sized_list_array& operator=(const self_type&) = default; + private: static uint64_t list_size_from_format(const std::string_view format); @@ -234,13 +255,32 @@ namespace sparrow friend class list_array_crtp_base; }; + /*************************************** + * list_array_crtp_base implementation * + ***************************************/ + template list_array_crtp_base::list_array_crtp_base(arrow_proxy proxy) : base_type(std::move(proxy)) - , p_flat_array(array_factory(this->storage().children()[0].view())) + , p_flat_array(make_flat_array()) + { + } + + template + list_array_crtp_base::list_array_crtp_base(const self_type& rhs) + : base_type(rhs) + , p_flat_array(make_flat_array()) { } + template + auto list_array_crtp_base::operator=(const self_type& rhs) -> self_type& + { + base_type::operator=(rhs); + p_flat_array = make_flat_array(); + return *this; + } + template auto list_array_crtp_base::raw_flat_array() const -> const array_wrapper* { @@ -302,31 +342,68 @@ namespace sparrow return list_value{p_flat_array.get(), static_cast(r.first), static_cast(r.second)}; } + template + cloning_ptr list_array_crtp_base::make_flat_array() + { + return array_factory(this->storage().children()[0].view()); + } + + /********************************** + * list_array_impl implementation * + **********************************/ + #ifdef __GNUC__ # pragma GCC diagnostic push # pragma GCC diagnostic ignored "-Wcast-align" #endif - template - inline list_array_impl::list_array_impl(arrow_proxy proxy) + list_array_impl::list_array_impl(arrow_proxy proxy) : base_type(std::move(proxy)) - , p_list_offsets(reinterpret_cast( - this->storage().buffers()[OFFSET_BUFFER_INDEX].data() + this->storage().offset() - )) + , p_list_offsets(make_list_offsets()) + { + } + + template + list_array_impl::list_array_impl(const self_type& rhs) + : base_type(rhs) + , p_list_offsets(make_list_offsets()) + { + } + + template + auto list_array_impl::operator=(const self_type& rhs) -> self_type& { + if (this != &rhs) + { + base_type::operator=(rhs); + p_list_offsets = make_list_offsets(); + } + return *this; } + #ifdef __GNUC__ # pragma GCC diagnostic pop #endif - template auto list_array_impl::offset_range(size_type i) const -> std::pair { return std::make_pair(p_list_offsets[i], p_list_offsets[i + 1]); } + template + auto list_array_impl::make_list_offsets() -> offset_type* + { + return reinterpret_cast( + this->storage().buffers()[OFFSET_BUFFER_INDEX].data() + this->storage().offset() + ); + } + + /*************************************** + * list_view_array_impl implementation * + ***************************************/ + #ifdef __GNUC__ # pragma GCC diagnostic push # pragma GCC diagnostic ignored "-Wcast-align" @@ -335,15 +412,31 @@ namespace sparrow template inline list_view_array_impl::list_view_array_impl(arrow_proxy proxy) : base_type(std::move(proxy)) - , p_list_offsets(reinterpret_cast( - this->storage().buffers()[OFFSET_BUFFER_INDEX].data() + this->storage().offset() - )) - , p_list_sizes(reinterpret_cast( - this->storage().buffers()[SIZES_BUFFER_INDEX].data() + this->storage().offset() - )) + , p_list_offsets(make_list_offsets()) + , p_list_sizes(make_list_sizes()) + { + } + + template + list_view_array_impl::list_view_array_impl(const self_type& rhs) + : base_type(rhs) + , p_list_offsets(make_list_offsets()) + , p_list_sizes(make_list_sizes()) { } + template + auto list_view_array_impl::operator=(const self_type& rhs) -> self_type& + { + if (this != &rhs) + { + base_type::operator=(rhs); + p_list_offsets = make_list_offsets(); + p_list_sizes = make_list_sizes(); + } + return *this; + } + #ifdef __GNUC__ # pragma GCC diagnostic pop #endif @@ -356,6 +449,26 @@ namespace sparrow return std::make_pair(offset, offset + p_list_sizes[i]); } + template + auto list_view_array_impl::make_list_offsets() -> offset_type* + { + return reinterpret_cast( + this->storage().buffers()[OFFSET_BUFFER_INDEX].data() + this->storage().offset() + ); + } + + template + auto list_view_array_impl::make_list_sizes() -> offset_type* + { + return reinterpret_cast( + this->storage().buffers()[SIZES_BUFFER_INDEX].data() + this->storage().offset() + ); + } + + /***************************************** + * fixed_sized_list_array implementation * + *****************************************/ + inline auto fixed_sized_list_array::list_size_from_format(const std::string_view format) -> uint64_t { SPARROW_ASSERT(format.size() >= 3, "Invalid format string"); diff --git a/test/external_array_data_creation.cpp b/test/external_array_data_creation.cpp index 3baad5a0..122d6531 100644 --- a/test/external_array_data_creation.cpp +++ b/test/external_array_data_creation.cpp @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "sparrow/arrow_interface/arrow_array.hpp" +#include "sparrow/arrow_interface/arrow_schema.hpp" #include "external_array_data_creation.hpp" #ifdef __GNUC__ @@ -36,6 +38,7 @@ namespace sparrow::test for (std::int64_t i = 0; i < t->n_children; ++i) { t->children[i]->release(t->children[i]); + delete t->children[i]; } delete[] t->children; t->children = nullptr; @@ -64,8 +67,8 @@ namespace sparrow::test void fill_schema_and_array_for_list_layout( ArrowSchema& schema, ArrowArray& arr, - ArrowSchema & flat_value_schema, - ArrowArray & flat_value_arr, + ArrowSchema&& flat_value_schema, + ArrowArray&& flat_value_arr, const std::vector & list_lengths, const std::vector & false_postions, bool big_list @@ -76,7 +79,7 @@ namespace sparrow::test schema.n_children = 1; schema.children = new ArrowSchema*[1]; - schema.children[0] = &flat_value_schema; + schema.children[0] = new ArrowSchema(std::move(flat_value_schema)); schema.dictionary = nullptr; schema.release = &release_arrow_schema; @@ -97,7 +100,7 @@ namespace sparrow::test arr.buffers = const_cast(reinterpret_cast(buf)); arr.children = new ArrowArray*[1]; - arr.children[0] = &flat_value_arr; + arr.children[0] = new ArrowArray(std::move(flat_value_arr)); arr.dictionary = nullptr; arr.release = &release_arrow_array; @@ -110,8 +113,8 @@ namespace sparrow::test void fill_schema_and_array_for_fixed_size_list_layout( ArrowSchema& schema, ArrowArray& arr, - ArrowSchema & flat_value_schema, - ArrowArray & flat_value_arr, + ArrowSchema&& flat_value_schema, + ArrowArray&& flat_value_arr, const std::vector & false_postions, std::size_t list_size ){ @@ -126,7 +129,7 @@ namespace sparrow::test schema.n_children = 1; schema.children = new ArrowSchema*[1]; - schema.children[0] = &flat_value_schema; + schema.children[0] = new ArrowSchema(std::move(flat_value_schema)); schema.dictionary = nullptr; schema.release = &release_arrow_schema; @@ -144,7 +147,7 @@ namespace sparrow::test arr.buffers = const_cast(reinterpret_cast(buf)); arr.children = new ArrowArray*[1]; - arr.children[0] = &flat_value_arr; + arr.children[0] = new ArrowArray(std::move(flat_value_arr)); arr.dictionary = nullptr; arr.release = &release_arrow_array; @@ -155,8 +158,8 @@ namespace sparrow::test void fill_schema_and_array_for_list_view_layout( ArrowSchema& schema, ArrowArray& arr, - ArrowSchema & flat_value_schema, - ArrowArray & flat_value_arr, + ArrowSchema&& flat_value_schema, + ArrowArray&& flat_value_arr, const std::vector & list_lengths, const std::vector & false_postions, bool big_list @@ -167,7 +170,7 @@ namespace sparrow::test schema.n_children = 1; schema.children = new ArrowSchema*[1]; - schema.children[0] = &flat_value_schema; + schema.children[0] = new ArrowSchema(std::move(flat_value_schema)); schema.dictionary = nullptr; schema.release = &release_arrow_schema; @@ -213,7 +216,7 @@ namespace sparrow::test arr.buffers = const_cast(reinterpret_cast(buf)); arr.children = new ArrowArray*[1]; - arr.children[0] = &flat_value_arr; + arr.children[0] = new ArrowArray(std::move(flat_value_arr)); arr.dictionary = nullptr; arr.release = &release_arrow_array; diff --git a/test/external_array_data_creation.hpp b/test/external_array_data_creation.hpp index bcac8c69..5f96c162 100644 --- a/test/external_array_data_creation.hpp +++ b/test/external_array_data_creation.hpp @@ -245,8 +245,8 @@ namespace sparrow::test void fill_schema_and_array_for_list_layout( ArrowSchema& schema, ArrowArray& arr, - ArrowSchema& flat_value_schema, - ArrowArray& flat_value_arr, + ArrowSchema&& flat_value_schema, + ArrowArray&& flat_value_arr, const std::vector& list_lengths, const std::vector& false_postions, bool big_list @@ -255,8 +255,8 @@ namespace sparrow::test void fill_schema_and_array_for_list_view_layout( ArrowSchema& schema, ArrowArray& arr, - ArrowSchema & flat_value_schema, - ArrowArray & flat_value_arr, + ArrowSchema&& flat_value_schema, + ArrowArray&& flat_value_arr, const std::vector & list_lengths, const std::vector & false_postions, bool big_list @@ -265,8 +265,8 @@ namespace sparrow::test void fill_schema_and_array_for_fixed_size_list_layout( ArrowSchema& schema, ArrowArray& arr, - ArrowSchema & flat_value_schema, - ArrowArray & flat_value_arr, + ArrowSchema&& flat_value_schema, + ArrowArray&& flat_value_arr, const std::vector & false_postions, std::size_t list_size ); diff --git a/test/test_list_array.cpp b/test/test_list_array.cpp index 8cde65a2..64ee949d 100644 --- a/test/test_list_array.cpp +++ b/test/test_list_array.cpp @@ -22,6 +22,24 @@ namespace sparrow { + namespace test + { + template + arrow_proxy make_list_proxy(size_t n_flat, const std::vector& sizes) + { + // first we create a flat array of integers + ArrowArray flat_arr{}; + ArrowSchema flat_schema{}; + test::fill_schema_and_array(flat_schema, flat_arr, n_flat, 0/*offset*/, {}); + flat_schema.name = "the flat array"; + + ArrowArray arr{}; + ArrowSchema schema{}; + test::fill_schema_and_array_for_list_layout(schema, arr, std::move(flat_schema), std::move(flat_arr), sizes, {}, 0); + return arrow_proxy(std::move(arr), std::move(schema)); + } + } + TEST_SUITE("list_array") { TEST_CASE_TEMPLATE("list[T]",T, std::uint8_t, std::int32_t, float, double) @@ -36,21 +54,26 @@ namespace sparrow // vector of sizes std::vector sizes = {1, 2, 3, 4}; - // first we create a flat array of integers - ArrowArray flat_arr{}; - ArrowSchema flat_schema{}; - test::fill_schema_and_array(flat_schema, flat_arr, n_flat, 0/*offset*/, {}); - flat_schema.name = "the flat array"; + const std::size_t n_flat2 = 8; + std::vector sizes2 = {2, 4, 2}; - ArrowArray arr{}; - ArrowSchema schema{}; - test::fill_schema_and_array_for_list_layout(schema, arr, flat_schema, flat_arr, sizes, {}, 0); - arrow_proxy proxy(&arr, &schema); + arrow_proxy proxy = test::make_list_proxy(n_flat, sizes); // create a list array list_array list_arr(std::move(proxy)); REQUIRE(list_arr.size() == n); + SUBCASE("copy") + { + list_array list_arr2(list_arr); + CHECK_EQ(list_arr, list_arr2); + + list_array list_arr3(test::make_list_proxy(n_flat2, sizes2)); + CHECK_NE(list_arr3, list_arr); + list_arr3 = list_arr; + CHECK_EQ(list_arr3, list_arr); + } + SUBCASE("element-sizes") { for(std::size_t i = 0; i < n; ++i){ @@ -77,7 +100,7 @@ namespace sparrow } } - SUBCASE("consitency") + SUBCASE("consistency") { test::generic_consistency_test(list_arr); } @@ -111,6 +134,24 @@ namespace sparrow } } + namespace test + { + template + arrow_proxy make_list_view_proxy(size_t n_flat, const std::vector& sizes) + { + // first we create a flat array of integers + ArrowArray flat_arr{}; + ArrowSchema flat_schema{}; + test::fill_schema_and_array(flat_schema, flat_arr, n_flat, 0/*offset*/, {}); + flat_schema.name = "the flat array"; + + ArrowArray arr{}; + ArrowSchema schema{}; + test::fill_schema_and_array_for_list_view_layout(schema, arr, std::move(flat_schema), std::move(flat_arr), sizes, {}, 0); + return arrow_proxy(std::move(arr), std::move(schema)); + } + } + TEST_SUITE("list_view_array") { TEST_CASE_TEMPLATE("list_view_array[T]",T, std::uint8_t, std::int32_t, float, double) @@ -125,21 +166,26 @@ namespace sparrow // vector of sizes std::vector sizes = {1, 2, 3, 4}; - // first we create a flat array of integers - ArrowArray flat_arr{}; - ArrowSchema flat_schema{}; - test::fill_schema_and_array(flat_schema, flat_arr, n_flat, 0/*offset*/, {}); - flat_schema.name = "the flat array"; - - ArrowArray arr{}; - ArrowSchema schema{}; - test::fill_schema_and_array_for_list_view_layout(schema, arr, flat_schema, flat_arr, sizes, {}, 0); - arrow_proxy proxy(&arr, &schema); + const std::size_t n_flat2 = 8; + std::vector sizes2 = {2, 4, 2}; + + arrow_proxy proxy = test::make_list_view_proxy(n_flat, sizes); // create a list array list_view_array list_arr(std::move(proxy)); REQUIRE(list_arr.size() == n); + SUBCASE("copy") + { + list_view_array list_arr2(list_arr); + CHECK_EQ(list_arr, list_arr2); + + list_view_array list_arr3(test::make_list_view_proxy(n_flat2, sizes2)); + CHECK_NE(list_arr3, list_arr); + list_arr3 = list_arr; + CHECK_EQ(list_arr3, list_arr); + } + SUBCASE("element-sizes") { for(std::size_t i = 0; i < n; ++i){ @@ -167,7 +213,7 @@ namespace sparrow } } - SUBCASE("consitency") + SUBCASE("consistency") { test::generic_consistency_test(list_arr); } @@ -201,6 +247,23 @@ namespace sparrow } } + namespace test + { + template + arrow_proxy make_fixed_sized_list_proxy(size_t n_flat, size_t list_size) + { + // first we create a flat array of integers + ArrowArray flat_arr{}; + ArrowSchema flat_schema{}; + test::fill_schema_and_array(flat_schema, flat_arr, n_flat, 0/*offset*/, {}); + flat_schema.name = "the flat array"; + + ArrowArray arr{}; + ArrowSchema schema{}; + test::fill_schema_and_array_for_fixed_size_list_layout(schema, arr, std::move(flat_schema), std::move(flat_arr), {}, list_size); + return arrow_proxy(std::move(arr), std::move(schema)); + } + } TEST_SUITE("fixed_sized_list_array") { @@ -219,21 +282,24 @@ namespace sparrow CHECK(n == 4); + const std::size_t n_flat2 = 10; + const std::size_t list_size2 = 4; + + arrow_proxy proxy = test::make_fixed_sized_list_proxy(n_flat, list_size); - // first we create a flat array of integers - ArrowArray flat_arr{}; - ArrowSchema flat_schema{}; - test::fill_schema_and_array(flat_schema, flat_arr, n_flat, 0/*offset*/, {}); - flat_schema.name = "the flat array"; - + fixed_sized_list_array list_arr(std::move(proxy)); - ArrowArray arr{}; - ArrowSchema schema{}; - test::fill_schema_and_array_for_fixed_size_list_layout(schema, arr, flat_schema, flat_arr, {}, list_size); + SUBCASE("copy") + { + fixed_sized_list_array list_arr2(list_arr); + CHECK_EQ(list_arr, list_arr2); - arrow_proxy proxy(&arr, &schema); + fixed_sized_list_array list_arr3(test::make_fixed_sized_list_proxy(n_flat2, list_size2)); + CHECK_NE(list_arr3, list_arr); + list_arr3 = list_arr; + CHECK_EQ(list_arr3, list_arr); + } - fixed_sized_list_array list_arr(std::move(proxy)); SUBCASE("consitency") { test::generic_consistency_test(list_arr); @@ -266,7 +332,7 @@ namespace sparrow } } } - delete schema.format; + //delete schema.format; } } From 0f6ad388569e26b6542990152d8d37d5b0e90543 Mon Sep 17 00:00:00 2001 From: Johan Mabille Date: Mon, 21 Oct 2024 14:06:06 +0200 Subject: [PATCH 3/7] Fixed copy semantic of struct layout --- .../layout/struct_layout/struct_array.hpp | 37 ++++++++++++-- test/external_array_data_creation.cpp | 21 +++++--- test/external_array_data_creation.hpp | 4 +- test/test_list_array.cpp | 1 - test/test_struct_array.cpp | 48 +++++++++++++------ 5 files changed, 83 insertions(+), 28 deletions(-) diff --git a/include/sparrow/layout/struct_layout/struct_array.hpp b/include/sparrow/layout/struct_layout/struct_array.hpp index 8a34f3ae..fecbaf3b 100644 --- a/include/sparrow/layout/struct_layout/struct_array.hpp +++ b/include/sparrow/layout/struct_layout/struct_array.hpp @@ -70,11 +70,16 @@ namespace sparrow explicit struct_array(arrow_proxy proxy); + struct_array(const struct_array&); + struct_array& operator=(const struct_array&); + const array_wrapper* raw_child(std::size_t i) const; array_wrapper* raw_child(std::size_t i); private: + using children_type = std::vector>; + value_iterator value_begin(); value_iterator value_end(); const_value_iterator value_cbegin() const; @@ -82,8 +87,10 @@ namespace sparrow inner_reference value(size_type i); inner_const_reference value(size_type i) const; + children_type make_children(); + // data members - std::vector> m_children; + children_type m_children; // friend classes friend class array_crtp_base; @@ -95,12 +102,24 @@ namespace sparrow inline struct_array::struct_array(arrow_proxy proxy) : base_type(std::move(proxy)) - , m_children(this->storage().children().size(), nullptr) + , m_children(make_children()) + { + } + + inline struct_array::struct_array(const struct_array& rhs) + : base_type(rhs) + , m_children(make_children()) + { + } + + inline struct_array& struct_array::operator=(const struct_array& rhs) { - for (std::size_t i = 0; i < m_children.size(); ++i) + if (this != &rhs) { - m_children[i] = array_factory(this->storage().children()[i].view()); + base_type::operator=(rhs); + m_children = make_children(); } + return *this; } inline auto struct_array::raw_child(std::size_t i) const -> const array_wrapper* @@ -145,4 +164,14 @@ namespace sparrow { return struct_value{m_children, i}; } + + inline auto struct_array::make_children() -> children_type + { + children_type children(this->storage().children().size(), nullptr); + for (std::size_t i = 0; i < children.size(); ++i) + { + children[i] = array_factory(this->storage().children()[i].view()); + } + return children; + } } diff --git a/test/external_array_data_creation.cpp b/test/external_array_data_creation.cpp index 122d6531..0c08a9fa 100644 --- a/test/external_array_data_creation.cpp +++ b/test/external_array_data_creation.cpp @@ -226,8 +226,8 @@ namespace sparrow::test void fill_schema_and_array_for_struct_layout( ArrowSchema& schema, ArrowArray& arr, - std::vector & children_schemas, - std::vector & children_arrays, + std::vector&& children_schemas, + std::vector&& children_arrays, const std::vector & false_postions ) { @@ -237,10 +237,14 @@ namespace sparrow::test schema.n_children = static_cast(children_schemas.size()); schema.children = new ArrowSchema*[children_schemas.size()]; - for (std::size_t i = 0; i < children_schemas.size(); ++i) + std::transform(std::make_move_iterator(children_schemas.begin()), + std::make_move_iterator(children_schemas.end()), + schema.children, + [](auto&& child) { return new ArrowSchema(std::move(child)); }); + /*for (std::size_t i = 0; i < children_schemas.size(); ++i) { schema.children[i] = &children_schemas[i]; - } + }*/ schema.dictionary = nullptr; schema.release = &release_arrow_schema; @@ -259,10 +263,15 @@ namespace sparrow::test arr.buffers = const_cast(reinterpret_cast(buf)); arr.children = new ArrowArray*[children_arrays.size()]; - for (std::size_t i = 0; i < children_arrays.size(); ++i) + + std::transform(std::make_move_iterator(children_arrays.begin()), + std::make_move_iterator(children_arrays.end()), + arr.children, + [](auto&& child) { return new ArrowArray(std::move(child)); }); + /*for (std::size_t i = 0; i < children_arrays.size(); ++i) { arr.children[i] = &children_arrays[i]; - } + }*/ arr.dictionary = nullptr; arr.release = &release_arrow_array; diff --git a/test/external_array_data_creation.hpp b/test/external_array_data_creation.hpp index 5f96c162..d389cd99 100644 --- a/test/external_array_data_creation.hpp +++ b/test/external_array_data_creation.hpp @@ -275,8 +275,8 @@ namespace sparrow::test void fill_schema_and_array_for_struct_layout( ArrowSchema& schema, ArrowArray& arr, - std::vector& children_schemas, - std::vector& children_arrays, + std::vector&& children_schemas, + std::vector&& children_arrays, const std::vector& false_postions ); diff --git a/test/test_list_array.cpp b/test/test_list_array.cpp index 64ee949d..f2407ca6 100644 --- a/test/test_list_array.cpp +++ b/test/test_list_array.cpp @@ -286,7 +286,6 @@ namespace sparrow const std::size_t list_size2 = 4; arrow_proxy proxy = test::make_fixed_sized_list_proxy(n_flat, list_size); - fixed_sized_list_array list_arr(std::move(proxy)); SUBCASE("copy") diff --git a/test/test_struct_array.cpp b/test/test_struct_array.cpp index 2b56c85a..680d93e9 100644 --- a/test/test_struct_array.cpp +++ b/test/test_struct_array.cpp @@ -25,38 +25,56 @@ namespace sparrow { - TEST_SUITE("struct_array") - { - - TEST_CASE_TEMPLATE("struct[T, uint8]",T, std::uint8_t, std::int32_t, float, double) + namespace test + { + template + arrow_proxy make_struct_proxy(size_t n) { - using inner_scalar_type = T; - //using inner_nullable_type = nullable; - - // number of elements in the struct array - const std::size_t n = 4; - // n-children == 2 std::vector children_arrays(2); std::vector children_schemas(2); - test::fill_schema_and_array(children_schemas[0], children_arrays[0], n, 0/*offset*/, {}); + test::fill_schema_and_array(children_schemas[0], children_arrays[0], n, 0/*offset*/, {}); children_schemas[0].name = "item 0"; - test::fill_schema_and_array(children_schemas[1], children_arrays[1], n, 0/*offset*/, {}); + test::fill_schema_and_array(children_schemas[1], children_arrays[1], n, 0/*offset*/, {}); children_schemas[1].name = "item 1"; ArrowArray arr{}; ArrowSchema schema{}; - test::fill_schema_and_array_for_struct_layout(schema, arr, children_schemas, children_arrays, {}); - arrow_proxy proxy(&arr, &schema); + test::fill_schema_and_array_for_struct_layout(schema, arr, std::move(children_schemas), std::move(children_arrays), {}); + return arrow_proxy(std::move(arr), std::move(schema)); + } + } + TEST_SUITE("struct_array") + { + + TEST_CASE_TEMPLATE("struct[T, uint8]",T, std::uint8_t, std::int32_t, float, double) + { + using inner_scalar_type = T; + //using inner_nullable_type = nullable; + + // number of elements in the struct array + const std::size_t n = 4; + const std::size_t n2 = 3; // create a struct array + arrow_proxy proxy = test::make_struct_proxy(n); struct_array struct_arr(std::move(proxy)); REQUIRE(struct_arr.size() == n); + SUBCASE("copy") + { + struct_array struct_arr2(struct_arr); + CHECK_EQ(struct_arr, struct_arr2); + + struct_array struct_arr3(test::make_struct_proxy(n2)); + CHECK_NE(struct_arr3, struct_arr); + struct_arr3 = struct_arr; + CHECK_EQ(struct_arr3, struct_arr); + } SUBCASE("operator[]") { @@ -118,7 +136,7 @@ namespace sparrow CHECK(struct_arr[0] != struct_arr[1]); } - SUBCASE("consitency") + SUBCASE("consistency") { test::generic_consistency_test(struct_arr); } From b71a304bfbed9aa22aef7d8aa1c2760701e54e9f Mon Sep 17 00:00:00 2001 From: Johan Mabille Date: Mon, 21 Oct 2024 15:12:45 +0200 Subject: [PATCH 4/7] Fixed copy semantic of run_end_encoded_array --- .../run_end_encoded_array.hpp | 43 ++++++++- test/external_array_data_creation.cpp | 16 ++-- test/external_array_data_creation.hpp | 8 +- test/test_run_end_encoded_array.cpp | 95 +++++++++++-------- 4 files changed, 103 insertions(+), 59 deletions(-) diff --git a/include/sparrow/layout/run_end_encoded_layout/run_end_encoded_array.hpp b/include/sparrow/layout/run_end_encoded_layout/run_end_encoded_array.hpp index 5a4d1888..09c6a810 100644 --- a/include/sparrow/layout/run_end_encoded_layout/run_end_encoded_array.hpp +++ b/include/sparrow/layout/run_end_encoded_layout/run_end_encoded_array.hpp @@ -47,12 +47,15 @@ namespace sparrow { public: + using self_type = run_end_encoded_array; using size_type = std::size_t; using inner_value_type = array_traits::inner_value_type; using iterator = run_encoded_array_iterator; using const_iterator = run_encoded_array_iterator; SPARROW_API explicit run_end_encoded_array(arrow_proxy proxy); + SPARROW_API run_end_encoded_array(const self_type&); + SPARROW_API self_type& operator=(const self_type&); SPARROW_API array_traits::const_reference operator[](std::uint64_t i); SPARROW_API array_traits::const_reference operator[](std::uint64_t i) const; @@ -91,14 +94,40 @@ namespace sparrow friend class array_wrapper_impl; }; + SPARROW_API + bool operator==(const run_end_encoded_array& lhs, const run_end_encoded_array& rhs); + + /**************************************** + * run_end_encoded_array implementation * + ****************************************/ + inline run_end_encoded_array::run_end_encoded_array(arrow_proxy proxy) - : m_proxy(std::move(proxy)), - m_encoded_length(m_proxy.children()[0].length()), - p_acc_lengths_array(array_factory(m_proxy.children()[0].view())), - p_encoded_values_array(array_factory(m_proxy.children()[1].view())), - m_acc_lengths(run_end_encoded_array::get_acc_lengths_ptr(*p_acc_lengths_array)) + : m_proxy(std::move(proxy)) + , m_encoded_length(m_proxy.children()[0].length()) + , p_acc_lengths_array(array_factory(m_proxy.children()[0].view())) + , p_encoded_values_array(array_factory(m_proxy.children()[1].view())) + , m_acc_lengths(run_end_encoded_array::get_acc_lengths_ptr(*p_acc_lengths_array)) + { + } + + inline run_end_encoded_array::run_end_encoded_array(const self_type& rhs) + : run_end_encoded_array(rhs.m_proxy) + { + } + + inline auto run_end_encoded_array::operator=(const self_type& rhs) -> self_type& { + if (this != &rhs) + { + m_proxy = rhs.m_proxy; + m_encoded_length = rhs.m_encoded_length; + p_acc_lengths_array = array_factory(m_proxy.children()[0].view()); + p_encoded_values_array = array_factory(m_proxy.children()[1].view()); + m_acc_lengths = run_end_encoded_array::get_acc_lengths_ptr(*p_acc_lengths_array); + } + return *this; } + inline auto run_end_encoded_array::size() const -> size_type { return m_proxy.length(); @@ -164,4 +193,8 @@ namespace sparrow return const_iterator(this, size(), 0); } + inline bool operator==(const run_end_encoded_array& lhs, const run_end_encoded_array& rhs) + { + return std::ranges::equal(lhs, rhs); + } } // namespace sparrow diff --git a/test/external_array_data_creation.cpp b/test/external_array_data_creation.cpp index 0c08a9fa..26a2ea92 100644 --- a/test/external_array_data_creation.cpp +++ b/test/external_array_data_creation.cpp @@ -280,10 +280,10 @@ namespace sparrow::test void fill_schema_and_array_for_run_end_encoded( ArrowSchema& schema, ArrowArray& arr, - ArrowSchema & acc_length_schema, - ArrowArray & acc_length_arr, - ArrowSchema & value_schema, - ArrowArray & value_arr, + ArrowSchema&& acc_length_schema, + ArrowArray&& acc_length_arr, + ArrowSchema&& value_schema, + ArrowArray&& value_arr, std::size_t length ){ schema.format = "+r"; @@ -292,8 +292,8 @@ namespace sparrow::test schema.n_children = 2; schema.children = new ArrowSchema*[2]; - schema.children[0] = &acc_length_schema; - schema.children[1] = &value_schema; + schema.children[0] = new ArrowSchema(std::move(acc_length_schema)); + schema.children[1] = new ArrowSchema(std::move(value_schema)); schema.dictionary = nullptr; schema.release = &release_arrow_schema; @@ -310,8 +310,8 @@ namespace sparrow::test arr.buffers = nullptr; arr.children = new ArrowArray*[2]; - arr.children[0] = &acc_length_arr; - arr.children[1] = &value_arr; + arr.children[0] = new ArrowArray(std::move(acc_length_arr)); + arr.children[1] = new ArrowArray(std::move(value_arr)); arr.dictionary = nullptr; arr.release = &release_arrow_array; diff --git a/test/external_array_data_creation.hpp b/test/external_array_data_creation.hpp index d389cd99..e22ab827 100644 --- a/test/external_array_data_creation.hpp +++ b/test/external_array_data_creation.hpp @@ -283,10 +283,10 @@ namespace sparrow::test void fill_schema_and_array_for_run_end_encoded( ArrowSchema& schema, ArrowArray& arr, - ArrowSchema & acc_length_schema, - ArrowArray & acc_length_arr, - ArrowSchema & value_schema, - ArrowArray & value_arr, + ArrowSchema&& acc_length_schema, + ArrowArray&& acc_length_arr, + ArrowSchema&& value_schema, + ArrowArray&& value_arr, std::size_t length ); diff --git a/test/test_run_end_encoded_array.cpp b/test/test_run_end_encoded_array.cpp index 8aa0d1b6..df12ca69 100644 --- a/test/test_run_end_encoded_array.cpp +++ b/test/test_run_end_encoded_array.cpp @@ -24,16 +24,11 @@ namespace sparrow { - - TEST_SUITE("run_length_encoded") - { - - TEST_CASE("run_length_encoded") + namespace test + { + template + arrow_proxy make_run_end_encoded_proxy(std::size_t n, std::size_t child_length, bool alterate = false) { - using acc_type = std::uint32_t; - using inner_value_type = std::uint64_t; - - // lets encode the following: (length: 8) // [1,null,null, 42, 42, 42, null, 9] @@ -42,56 +37,62 @@ namespace sparrow // the accumulated lengths are: // [1,3,6,7,8] + // acc-values child - - - // number of elements in the run_length_encoded array - const std::size_t n = 8; - - // size of the children - const std::size_t child_length = 5; - - - - // n-children == 2 - std::vector children_arrays(2); - std::vector children_schemas(2); - - // schema for acc-values - test::fill_schema_and_array(children_schemas[0], children_arrays[0], child_length, 0/*offset*/, {}); - children_schemas[0].name = "acc"; - + ArrowSchema acc_schema; + ArrowArray acc_array; + test::fill_schema_and_array(acc_schema, acc_array, child_length, 0/*offset*/, {}); + acc_schema.name = "acc"; // fill acc-values buffer std::vector acc_values{1,3,6,7,8}; - auto ptr = reinterpret_cast(const_cast(children_arrays[0].buffers[1])); + if (alterate) + { + acc_values[3] = 2; + } + auto ptr = reinterpret_cast(const_cast(acc_array.buffers[1])); std::copy(acc_values.begin(), acc_values.end(), ptr); - // schema for values - test::fill_schema_and_array(children_schemas[1], children_arrays[1], child_length, 0/*offset*/, {1,3}); - children_schemas[1].name = "values"; + // values child + ArrowSchema values_schema; + ArrowArray values_array; + test::fill_schema_and_array(values_schema, values_array, child_length, 0/*offset*/, {1,3}); + values_schema.name = "values"; // fill values buffer std::vector values{1, 0, 42, 0, 9}; - auto ptr2 = reinterpret_cast(const_cast(children_arrays[1].buffers[1])); + auto ptr2 = reinterpret_cast(const_cast(values_array.buffers[1])); std::copy(values.begin(), values.end(), ptr2); - - ArrowArray arr{}; ArrowSchema schema{}; test::fill_schema_and_array_for_run_end_encoded( schema, arr, - children_schemas[0], - children_arrays[0], - children_schemas[1], - children_arrays[1], + std::move(acc_schema), + std::move(acc_array), + std::move(values_schema), + std::move(values_array), n ); - arrow_proxy proxy(&arr, &schema); + return arrow_proxy(std::move(arr), std::move(schema)); + } + } - run_end_encoded_array rle_array(std::move(proxy)); + TEST_SUITE("run_length_encoded") + { + TEST_CASE("run_length_encoded") + { + using acc_type = std::uint32_t; + using inner_value_type = std::uint64_t; + // number of elements in the run_length_encoded array + const std::size_t n = 8; + + // size of the children + const std::size_t child_length = 5; + + auto proxy = test::make_run_end_encoded_proxy(n, child_length); + run_end_encoded_array rle_array(std::move(proxy)); // check size REQUIRE(rle_array.size() == n); @@ -99,6 +100,18 @@ namespace sparrow std::vector expected_bitmap{1,0,0,1,1,1,0,1}; std::vector expected_values{1,0,0, 42,42, 42,0,9}; + SUBCASE("copy") + { + run_end_encoded_array rle_array2(rle_array); + CHECK_EQ(rle_array2, rle_array); + + run_end_encoded_array rle_array3( + test::make_run_end_encoded_proxy(n, child_length, true)); + CHECK_NE(rle_array3, rle_array); + rle_array3 = rle_array; + CHECK_EQ(rle_array3, rle_array); + } + SUBCASE("operator[]"){ //check elements for(std::size_t i=0; i Date: Tue, 22 Oct 2024 05:18:05 +0200 Subject: [PATCH 5/7] Fixed copy semantic of union layouts --- include/sparrow/layout/layout_utils.hpp | 56 +++++------ include/sparrow/layout/union_array.hpp | 124 ++++++++++++++++++++---- test/external_array_data_creation.cpp | 52 +++++----- test/external_array_data_creation.hpp | 8 +- test/test_union_array.cpp | 107 +++++++++++++------- 5 files changed, 229 insertions(+), 118 deletions(-) diff --git a/include/sparrow/layout/layout_utils.hpp b/include/sparrow/layout/layout_utils.hpp index 00ac6471..f97258a7 100644 --- a/include/sparrow/layout/layout_utils.hpp +++ b/include/sparrow/layout/layout_utils.hpp @@ -18,45 +18,31 @@ namespace sparrow::detail { - - template - class layout_functor_base - { - public: - using layout_type = LAYOUT_TYPE; - constexpr layout_functor_base() = default; - constexpr layout_functor_base& operator=(layout_functor_base&&) = default; - constexpr layout_functor_base(const layout_functor_base&) = default; - constexpr layout_functor_base(layout_functor_base&&) = default; - constexpr layout_functor_base& operator=(const layout_functor_base&) = default; - - constexpr layout_functor_base(layout_type * layout) - : p_layout(layout) - { - } - - protected: - layout_type * p_layout = nullptr; - }; - - // Functor to get the value of the layout at index i. // // This is usefull to create a iterator over the values of a layout. // This functor will be passed to the functor_index_iterator. template - class layout_value_functor : public layout_functor_base + class layout_value_functor { - public: - using base_type = layout_functor_base; - using base_type::base_type; - using base_type::operator=; + public: + using value_type = VALUE_TYPE; + using layout_type = LAYOUT_TYPE; + + constexpr explicit layout_value_functor(layout_type* layout = nullptr) + : p_layout(layout) + { + } value_type operator()(std::size_t i) const { return this->p_layout->value(i); } + + private: + + layout_type* p_layout; }; @@ -65,18 +51,26 @@ namespace sparrow::detail // This is usefull to create a iterator over the nullable-values of a layout. // This functor will be passed to the functor_index_iterator. template - class layout_bracket_functor : public layout_functor_base + class layout_bracket_functor { public: - using base_type = layout_functor_base; - using base_type::base_type; - using base_type::operator=; + using value_type = VALUE_TYPE; + using layout_type = LAYOUT_TYPE; + + constexpr explicit layout_bracket_functor(layout_type* layout = nullptr) + : p_layout(layout) + { + } value_type operator()(std::size_t i) const { return this->p_layout->operator[](i); } + + private: + + layout_type* p_layout; }; }; // namespace sparrow::detail diff --git a/include/sparrow/layout/union_array.hpp b/include/sparrow/layout/union_array.hpp index 76af9d3c..6c27e1ad 100644 --- a/include/sparrow/layout/union_array.hpp +++ b/include/sparrow/layout/union_array.hpp @@ -27,11 +27,9 @@ namespace sparrow { - class dense_union_array; class sparse_union_array; - namespace detail { template @@ -60,13 +58,16 @@ namespace sparrow class union_array_crtp_base : public crtp_base { public: + + using self_type = union_array_crtp_base; using derived_type = DERIVED; using inner_value_type = array_traits::inner_value_type; using value_type = array_traits::const_reference; - using iterator = functor_index_iterator>; - using const_iterator = functor_index_iterator>; + using functor_type = detail::layout_bracket_functor; + using const_functor_type = detail::layout_bracket_functor; + using iterator = functor_index_iterator; + using const_iterator = functor_index_iterator; - explicit union_array_crtp_base(arrow_proxy proxy); value_type operator[](std::size_t i) const; value_type operator[](std::size_t i); @@ -80,14 +81,22 @@ namespace sparrow const_iterator cend() const; protected: + using type_id_map = std::array; static type_id_map parse_type_id_map(std::string_view format_string); + using children_type = std::vector>; + children_type make_children(arrow_proxy& proxy); + + explicit union_array_crtp_base(arrow_proxy proxy); + union_array_crtp_base(const self_type& rhs); + self_type& operator=(const self_type& rhs); + arrow_proxy& get_arrow_proxy(); arrow_proxy m_proxy; const std::uint8_t * p_type_ids; - std::vector> m_children; + children_type m_children; // map from type-id to child-index std::array m_type_id_map; @@ -96,10 +105,19 @@ namespace sparrow friend class array_wrapper_impl; }; + template + bool operator==(const union_array_crtp_base& lhs, const union_array_crtp_base& rhs); + class dense_union_array : public union_array_crtp_base { public: + + using base_type = union_array_crtp_base; + explicit dense_union_array(arrow_proxy proxy); + dense_union_array(const dense_union_array& rhs); + dense_union_array& operator=(const dense_union_array& rhs); + private: std::size_t element_offset(std::size_t i) const; const std::int32_t * p_offsets; @@ -109,7 +127,11 @@ namespace sparrow class sparse_union_array : public union_array_crtp_base { public: - using union_array_crtp_base::union_array_crtp_base; + + using base_type = union_array_crtp_base; + + explicit sparse_union_array(arrow_proxy proxy); + private: std::size_t element_offset(std::size_t i) const; friend class union_array_crtp_base; @@ -132,6 +154,10 @@ namespace sparrow return ret; } + /**************************************** + * union_array_crtp_base implementation * + ****************************************/ + template arrow_proxy& union_array_crtp_base::get_arrow_proxy() { @@ -140,15 +166,30 @@ namespace sparrow template union_array_crtp_base::union_array_crtp_base(arrow_proxy proxy) - : m_proxy(std::move(proxy)), - p_type_ids(reinterpret_cast(m_proxy.buffers()[0/*index of type-ids*/].data())), - m_children(m_proxy.children().size(), nullptr), - m_type_id_map(parse_type_id_map(m_proxy.format())) + : m_proxy(std::move(proxy)) + , p_type_ids(reinterpret_cast(m_proxy.buffers()[0/*index of type-ids*/].data())) + , m_children(make_children(m_proxy)) + , m_type_id_map(parse_type_id_map(m_proxy.format())) + { + } + + template + union_array_crtp_base::union_array_crtp_base(const self_type& rhs) + : self_type(rhs.m_proxy) + { + } + + template + auto union_array_crtp_base::operator=(const self_type& rhs) -> self_type& { - for (std::size_t i = 0; i < m_children.size(); ++i) + if (this != &rhs) { - m_children[i] = array_factory(m_proxy.children()[i].view()); + m_proxy = rhs.m_proxy; + p_type_ids = reinterpret_cast(m_proxy.buffers()[0/*index of type-ids*/].data()); + m_children = make_children(m_proxy); + m_type_id_map = parse_type_id_map(m_proxy.format()); } + return *this; } template @@ -175,13 +216,13 @@ namespace sparrow template auto union_array_crtp_base::begin() -> iterator { - return iterator(detail::layout_bracket_functor{this}, 0); + return iterator(functor_type{&(this->derived_cast())}, 0); } template auto union_array_crtp_base::end() -> iterator { - return iterator(detail::layout_bracket_functor{this}, this->size()); + return iterator(functor_type{&(this->derived_cast())}, this->size()); } template @@ -199,25 +240,61 @@ namespace sparrow template auto union_array_crtp_base::cbegin() const -> const_iterator { - return const_iterator(detail::layout_bracket_functor{this}, 0); + return const_iterator(const_functor_type{&(this->derived_cast())}, 0); } template auto union_array_crtp_base::cend() const -> const_iterator { - return const_iterator(detail::layout_bracket_functor{this}, this->size()); + return const_iterator(const_functor_type{&(this->derived_cast())}, this->size()); } + template + auto union_array_crtp_base::make_children(arrow_proxy& proxy) -> children_type + { + children_type children(proxy.children().size(), nullptr); + for (std::size_t i = 0; i < children.size(); ++i) + { + children[i] = array_factory(proxy.children()[i].view()); + } + return children; + } + + template + bool operator==(const union_array_crtp_base& lhs, const union_array_crtp_base& rhs) + { + return std::ranges::equal(lhs, rhs); + } + + /************************************ + * dense_union_array implementation * + ************************************/ + #ifdef __GNUC__ # pragma GCC diagnostic push # pragma GCC diagnostic ignored "-Wcast-align" #endif inline dense_union_array::dense_union_array(arrow_proxy proxy) - : union_array_crtp_base(std::move(proxy)), - p_offsets(reinterpret_cast(m_proxy.buffers()[1/*index of offsets*/].data())) + : base_type(std::move(proxy)) + , p_offsets(reinterpret_cast(m_proxy.buffers()[1/*index of offsets*/].data())) + { + } + + inline dense_union_array::dense_union_array(const dense_union_array& rhs) + : dense_union_array(rhs.m_proxy) { } + inline dense_union_array& dense_union_array::operator=(const dense_union_array& rhs) + { + if (this !=&rhs) + { + base_type::operator=(rhs); + p_offsets = reinterpret_cast(m_proxy.buffers()[1/*index of offsets*/].data()); + } + return *this; + } + #ifdef __GNUC__ # pragma GCC diagnostic pop #endif @@ -227,6 +304,15 @@ namespace sparrow return static_cast(p_offsets[i]) + m_proxy.offset(); } + /************************************* + * sparse_union_array implementation * + *************************************/ + + inline sparse_union_array::sparse_union_array(arrow_proxy proxy) + : base_type(std::move(proxy)) + { + } + inline std::size_t sparse_union_array::element_offset(std::size_t i) const { return i + m_proxy.offset(); diff --git a/test/external_array_data_creation.cpp b/test/external_array_data_creation.cpp index 26a2ea92..838761cd 100644 --- a/test/external_array_data_creation.cpp +++ b/test/external_array_data_creation.cpp @@ -241,10 +241,6 @@ namespace sparrow::test std::make_move_iterator(children_schemas.end()), schema.children, [](auto&& child) { return new ArrowSchema(std::move(child)); }); - /*for (std::size_t i = 0; i < children_schemas.size(); ++i) - { - schema.children[i] = &children_schemas[i]; - }*/ schema.dictionary = nullptr; schema.release = &release_arrow_schema; @@ -268,10 +264,6 @@ namespace sparrow::test std::make_move_iterator(children_arrays.end()), arr.children, [](auto&& child) { return new ArrowArray(std::move(child)); }); - /*for (std::size_t i = 0; i < children_arrays.size(); ++i) - { - arr.children[i] = &children_arrays[i]; - }*/ arr.dictionary = nullptr; arr.release = &release_arrow_array; @@ -321,10 +313,10 @@ namespace sparrow::test void fill_schema_and_array_for_sparse_union( ArrowSchema& schema, ArrowArray& arr, - std::vector & children_schemas, - std::vector & children_arrays, - const std::vector & type_ids, - const std::string & format + std::vector&& children_schemas, + std::vector&& children_arrays, + const std::vector& type_ids, + const std::string& format ){ schema.format = format.c_str(); schema.name = "test"; @@ -332,10 +324,10 @@ namespace sparrow::test schema.n_children = static_cast(children_schemas.size()); schema.children = new ArrowSchema*[children_schemas.size()]; - for (std::size_t i = 0; i < children_schemas.size(); ++i) - { - schema.children[i] = &children_schemas[i]; - } + std::transform(std::make_move_iterator(children_schemas.begin()), + std::make_move_iterator(children_schemas.end()), + schema.children, + [](auto&& child) { return new ArrowSchema(std::move(child)); }); schema.dictionary = nullptr; schema.release = &release_arrow_schema; @@ -355,10 +347,10 @@ namespace sparrow::test arr.buffers = const_cast(reinterpret_cast(buf)); arr.children = new ArrowArray*[children_arrays.size()]; - for (std::size_t i = 0; i < children_arrays.size(); ++i) - { - arr.children[i] = &children_arrays[i]; - } + std::transform(std::make_move_iterator(children_arrays.begin()), + std::make_move_iterator(children_arrays.end()), + arr.children, + [](auto&& child) { return new ArrowArray(std::move(child)); }); arr.dictionary = nullptr; arr.release = &release_arrow_array; @@ -367,8 +359,8 @@ namespace sparrow::test void fill_schema_and_array_for_dense_union( ArrowSchema& schema, ArrowArray& arr, - std::vector & children_schemas, - std::vector & children_arrays, + std::vector&& children_schemas, + std::vector&& children_arrays, const std::vector & type_ids, const std::vector & offsets, const std::string & format @@ -379,10 +371,10 @@ namespace sparrow::test schema.n_children = static_cast(children_schemas.size()); schema.children = new ArrowSchema*[children_schemas.size()]; - for (std::size_t i = 0; i < children_schemas.size(); ++i) - { - schema.children[i] = &children_schemas[i]; - } + std::transform(std::make_move_iterator(children_schemas.begin()), + std::make_move_iterator(children_schemas.end()), + schema.children, + [](auto&& child) { return new ArrowSchema(std::move(child)); }); schema.dictionary = nullptr; schema.release = &release_arrow_schema; @@ -407,10 +399,10 @@ namespace sparrow::test arr.buffers = const_cast(reinterpret_cast(buf)); arr.children = new ArrowArray*[children_arrays.size()]; - for (std::size_t i = 0; i < children_arrays.size(); ++i) - { - arr.children[i] = &children_arrays[i]; - } + std::transform(std::make_move_iterator(children_arrays.begin()), + std::make_move_iterator(children_arrays.end()), + arr.children, + [](auto&& child) { return new ArrowArray(std::move(child)); }); arr.dictionary = nullptr; arr.release = &release_arrow_array; diff --git a/test/external_array_data_creation.hpp b/test/external_array_data_creation.hpp index e22ab827..3529d749 100644 --- a/test/external_array_data_creation.hpp +++ b/test/external_array_data_creation.hpp @@ -293,8 +293,8 @@ namespace sparrow::test void fill_schema_and_array_for_sparse_union( ArrowSchema& schema, ArrowArray& arr, - std::vector & children_schemas, - std::vector & children_arrays, + std::vector&& children_schemas, + std::vector&& children_arrays, const std::vector & type_ids, const std::string & format ); @@ -302,8 +302,8 @@ namespace sparrow::test void fill_schema_and_array_for_dense_union( ArrowSchema& schema, ArrowArray& arr, - std::vector & children_schemas, - std::vector & children_arrays, + std::vector&& children_schemas, + std::vector&& children_arrays, const std::vector & type_ids, const std::vector & offsets, const std::string & format diff --git a/test/test_union_array.cpp b/test/test_union_array.cpp index ed62d15a..e599d94f 100644 --- a/test/test_union_array.cpp +++ b/test/test_union_array.cpp @@ -25,15 +25,10 @@ namespace sparrow { - TEST_SUITE("union") - { - - TEST_CASE("sparse_union") + namespace test + { + arrow_proxy make_sparse_union_proxy(const std::string& format_string, std::size_t n, bool altered = false) { - - const std::string format_string = "+us:3,4"; - const std::size_t n = 4; - std::vector children_arrays(2); std::vector children_schemas(2); @@ -43,22 +38,76 @@ namespace sparrow test::fill_schema_and_array(children_schemas[1], children_arrays[1], n, 0/*offset*/, {}); children_schemas[1].name = "item 1"; - ArrowArray arr{}; ArrowSchema schema{}; std::vector type_ids = {std::uint8_t(3), std::uint8_t(4), std::uint8_t(3), std::uint8_t(4)}; + if (altered) + { + type_ids[0] = std::uint8_t(4); + } test::fill_schema_and_array_for_sparse_union( - schema, arr, children_schemas, children_arrays, type_ids, format_string + schema, arr, std::move(children_schemas), std::move(children_arrays), type_ids, format_string ); - arrow_proxy proxy(&arr, &schema); + return arrow_proxy(std::move(arr), std::move(schema)); + } + + arrow_proxy make_dense_union_proxy(const std::string& format_string, std::size_t n_c, bool altered = false) + { + std::vector children_arrays(2); + std::vector children_schemas(2); + + test::fill_schema_and_array(children_schemas[0], children_arrays[0], n_c, 0/*offset*/, {}); + children_schemas[0].name = "item 0"; + + test::fill_schema_and_array(children_schemas[1], children_arrays[1], n_c, 0/*offset*/, {}); + children_schemas[1].name = "item 1"; + + ArrowArray arr{}; + ArrowSchema schema{}; + + std::vector type_ids = {std::uint8_t(3), std::uint8_t(4), std::uint8_t(3), std::uint8_t(4)}; + if (altered) + { + type_ids[0] = std::uint8_t(4); + } + std::vector offsets = {0,0,1,1}; + + test::fill_schema_and_array_for_dense_union( + schema, arr, std::move(children_schemas), std::move(children_arrays), type_ids, offsets, format_string + ); + + return arrow_proxy(std::move(arr), std::move(schema)); + } + } + + TEST_SUITE("union") + { + TEST_CASE("sparse_union") + { + + const std::string format_string = "+us:3,4"; + const std::size_t n = 4; + + auto proxy = test::make_sparse_union_proxy(format_string, n); sparse_union_array uarr(std::move(proxy)); REQUIRE(uarr.size() == n); + SUBCASE("copy") + { + sparse_union_array uarr2(uarr); + CHECK_EQ(uarr2, uarr); + + sparse_union_array uarr3(test::make_sparse_union_proxy(format_string, n, true)); + CHECK_NE(uarr3, uarr); + uarr3 = uarr; + CHECK_EQ(uarr3, uarr); + } + SUBCASE("operator[]") { for (std::size_t i = 0; i < n; ++i) @@ -125,39 +174,29 @@ namespace sparrow } } + TEST_CASE("dense_union") { - const std::string format_string = "+ud:3,4"; const std::size_t n_c = 2; const std::size_t n = 4; - std::vector children_arrays(2); - std::vector children_schemas(2); - - test::fill_schema_and_array(children_schemas[0], children_arrays[0], n_c, 0/*offset*/, {}); - children_schemas[0].name = "item 0"; - - test::fill_schema_and_array(children_schemas[1], children_arrays[1], n_c, 0/*offset*/, {}); - children_schemas[1].name = "item 1"; - - - ArrowArray arr{}; - ArrowSchema schema{}; - - std::vector type_ids = {std::uint8_t(3), std::uint8_t(4), std::uint8_t(3), std::uint8_t(4)}; - std::vector offsets = {0,0,1,1}; - - test::fill_schema_and_array_for_dense_union( - schema, arr, children_schemas, children_arrays, type_ids, offsets, format_string - ); - - arrow_proxy proxy(&arr, &schema); - + auto proxy = test::make_dense_union_proxy(format_string, n_c); dense_union_array uarr(std::move(proxy)); REQUIRE(uarr.size() == n); + SUBCASE("copy") + { + dense_union_array uarr2(uarr); + CHECK_EQ(uarr2, uarr); + + dense_union_array uarr3(test::make_dense_union_proxy(format_string, n_c, true)); + CHECK_NE(uarr3, uarr); + uarr3 = uarr; + CHECK_EQ(uarr3, uarr); + } + SUBCASE("operator[]") { for (std::size_t i = 0; i < n; ++i) From 5ec3b323d25403cf7249bec27f07f2f24418bf82 Mon Sep 17 00:00:00 2001 From: Johan Mabille Date: Tue, 22 Oct 2024 09:21:36 +0200 Subject: [PATCH 6/7] Fixed build on Exotic architectures --- .../sparrow/layout/list_layout/list_array.hpp | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/include/sparrow/layout/list_layout/list_array.hpp b/include/sparrow/layout/list_layout/list_array.hpp index 28c6a34c..8c4a0a15 100644 --- a/include/sparrow/layout/list_layout/list_array.hpp +++ b/include/sparrow/layout/list_layout/list_array.hpp @@ -382,10 +382,6 @@ namespace sparrow return *this; } -#ifdef __GNUC__ -# pragma GCC diagnostic pop -#endif - template auto list_array_impl::offset_range(size_type i) const -> std::pair { @@ -404,11 +400,6 @@ namespace sparrow * list_view_array_impl implementation * ***************************************/ -#ifdef __GNUC__ -# pragma GCC diagnostic push -# pragma GCC diagnostic ignored "-Wcast-align" -#endif - template inline list_view_array_impl::list_view_array_impl(arrow_proxy proxy) : base_type(std::move(proxy)) @@ -437,10 +428,6 @@ namespace sparrow return *this; } -#ifdef __GNUC__ -# pragma GCC diagnostic pop -#endif - template inline auto list_view_array_impl::offset_range(size_type i) const -> std::pair @@ -465,6 +452,10 @@ namespace sparrow ); } +#ifdef __GNUC__ +# pragma GCC diagnostic pop +#endif + /***************************************** * fixed_sized_list_array implementation * *****************************************/ From 9bc199caad8a07bf49b1dff3f770f1477ac1dcbb Mon Sep 17 00:00:00 2001 From: Johan Mabille Date: Tue, 22 Oct 2024 10:14:10 +0200 Subject: [PATCH 7/7] Changes according to the review --- test/test_list_array.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/test/test_list_array.cpp b/test/test_list_array.cpp index f2407ca6..51e23d84 100644 --- a/test/test_list_array.cpp +++ b/test/test_list_array.cpp @@ -331,7 +331,6 @@ namespace sparrow } } } - //delete schema.format; } }