From 0982f4b1f093af34c9d0b52e11478202fbb1758a Mon Sep 17 00:00:00 2001 From: DerThorsten Date: Tue, 15 Oct 2024 15:43:02 +0200 Subject: [PATCH 01/14] cleanup --- include/sparrow/layout/array_base.hpp | 38 ++-- include/sparrow/layout/layout_utils.hpp | 56 +++-- .../run_end_encoded_iterator.hpp | 6 +- .../layout/union_layout/union_array.hpp | 192 ++++++++++++++++++ include/sparrow/utils/crtp_base.hpp | 41 ++++ test/CMakeLists.txt | 1 + test/external_array_data_creation.cpp | 46 +++++ test/external_array_data_creation.hpp | 8 + test/test_union_array.cpp | 76 +++++++ 9 files changed, 424 insertions(+), 40 deletions(-) create mode 100644 include/sparrow/layout/union_layout/union_array.hpp create mode 100644 include/sparrow/utils/crtp_base.hpp create mode 100644 test/test_union_array.cpp diff --git a/include/sparrow/layout/array_base.hpp b/include/sparrow/layout/array_base.hpp index 513cb96f..dda9d2a7 100644 --- a/include/sparrow/layout/array_base.hpp +++ b/include/sparrow/layout/array_base.hpp @@ -22,6 +22,7 @@ #include "sparrow/layout/layout_iterator.hpp" #include "sparrow/utils/nullable.hpp" #include "sparrow/utils/iterator.hpp" +#include "sparrow/utils/crtp_base.hpp" namespace sparrow { @@ -61,7 +62,7 @@ namespace sparrow * implements comme interface for arrays with a bitmap. */ template - class array_crtp_base + class array_crtp_base : public crtp_base { public: using self_type = array_crtp_base; @@ -127,8 +128,6 @@ namespace sparrow const_bitmap_iterator bitmap_begin() const; const_bitmap_iterator bitmap_end() const; - derived_type& derived_cast(); - const derived_type& derived_cast() const; private: @@ -155,13 +154,13 @@ namespace sparrow template auto array_crtp_base::begin() -> iterator { - return iterator(derived_cast().value_begin(), derived_cast().bitmap_begin()); + return iterator(this->derived_cast().value_begin(), this->derived_cast().bitmap_begin()); } template auto array_crtp_base::end() -> iterator { - return iterator(derived_cast().value_end(), derived_cast().bitmap_end()); + return iterator(this->derived_cast().value_end(), this->derived_cast().bitmap_end()); } template @@ -179,13 +178,13 @@ namespace sparrow template auto array_crtp_base::cbegin() const -> const_iterator { - return const_iterator(derived_cast().value_cbegin(), derived_cast().bitmap_begin()); + return const_iterator(this->derived_cast().value_cbegin(), this->derived_cast().bitmap_begin()); } template auto array_crtp_base::cend() const -> const_iterator { - return const_iterator(derived_cast().value_cend(), derived_cast().bitmap_end()); + return const_iterator(this->derived_cast().value_cend(), this->derived_cast().bitmap_end()); } template @@ -197,15 +196,15 @@ namespace sparrow template auto array_crtp_base::values() const -> const_value_range { - return const_value_range(derived_cast().value_cbegin(), derived_cast().value_cend()); + return const_value_range(this->derived_cast().value_cbegin(), this->derived_cast().value_cend()); } template auto array_crtp_base::operator[](size_type i) -> reference { return reference( - inner_reference(derived_cast().value(i)), - derived_cast().has_value(i) + inner_reference(this->derived_cast().value(i)), + this->derived_cast().has_value(i) ); } @@ -213,8 +212,8 @@ namespace sparrow auto array_crtp_base::operator[](size_type i) const -> const_reference { return const_reference( - inner_const_reference(derived_cast().value(i)), - derived_cast().has_value(i) + inner_const_reference(this->derived_cast().value(i)), + this->derived_cast().has_value(i) ); } @@ -259,7 +258,7 @@ namespace sparrow template auto array_crtp_base::bitmap_begin() -> bitmap_iterator { - return derived_cast().bitmap_begin_impl(); + return this->derived_cast().bitmap_begin_impl(); } template @@ -271,7 +270,7 @@ namespace sparrow template auto array_crtp_base::bitmap_begin() const -> const_bitmap_iterator { - return derived_cast().bitmap_begin_impl(); + return this->derived_cast().bitmap_begin_impl(); } template @@ -280,17 +279,6 @@ namespace sparrow return sparrow::next(bitmap_begin(), size()); } - template - auto array_crtp_base::derived_cast() -> derived_type& - { - return *static_cast(this); - } - - template - auto array_crtp_base::derived_cast() const -> const derived_type& - { - return *static_cast(this); - } template bool operator==(const array_crtp_base& lhs, const array_crtp_base& rhs) diff --git a/include/sparrow/layout/layout_utils.hpp b/include/sparrow/layout/layout_utils.hpp index ac32657a..871bf006 100644 --- a/include/sparrow/layout/layout_utils.hpp +++ b/include/sparrow/layout/layout_utils.hpp @@ -18,33 +18,65 @@ namespace sparrow::detail { + + template + class layout_functor_base + { + public: + using layout_type = LAYOUT_TYPE; + constexpr layout_functor_base() = default; + constexpr layout_functor_base& operator=(layout_functor_base&&) = default; + constexpr layout_functor_base(const layout_functor_base&) = default; + constexpr layout_functor_base(layout_functor_base&&) = default; + constexpr layout_functor_base& operator=(const layout_functor_base&) = default; + + constexpr layout_functor_base(layout_type * layout) + : p_layout(layout) + { + } + + protected: + layout_type * p_layout = nullptr; + }; + + // Functor to get the value of the layout at index i. // // This is usefull to create a iterator over the values of a layout. // This functor will be passed to the functor_index_iterator. template - class layout_value_functor + class layout_value_functor : public layout_functor_base { public: - using layout_type = LAYOUT_TYPE; + using base_type = layout_functor_base; + using base_type::base_type; + using base_type::operator=; using value_type = VALUE_TYPE; - constexpr layout_value_functor() = default; - constexpr layout_value_functor& operator=(layout_value_functor&&) = default; - constexpr layout_value_functor(const layout_value_functor&) = default; - constexpr layout_value_functor(layout_value_functor&&) = default; - constexpr layout_value_functor& operator=(const layout_value_functor&) = default; - constexpr layout_value_functor(layout_type * layout) - : p_layout(layout) + value_type operator()(std::size_t i) const { + return this->p_layout->value(i); } + }; + + + // Functor to get the optional-value of the layout at index i. + // + // This is usefull to create a iterator over the nullable-values of a layout. + // This functor will be passed to the functor_index_iterator. + template + class layout_bracket_functor : public layout_functor_base + { + public: + using base_type = layout_functor_base; + using base_type::base_type; + using base_type::operator=; + using value_type = VALUE_TYPE; value_type operator()(std::size_t i) const { - return p_layout->value(i); + return this->p_layout->operator[](i); } - private: - layout_type * p_layout = nullptr; }; }; // namespace sparrow::detail diff --git a/include/sparrow/layout/run_end_encoded_layout/run_end_encoded_iterator.hpp b/include/sparrow/layout/run_end_encoded_layout/run_end_encoded_iterator.hpp index 6fb1f3da..10a56357 100644 --- a/include/sparrow/layout/run_end_encoded_layout/run_end_encoded_iterator.hpp +++ b/include/sparrow/layout/run_end_encoded_layout/run_end_encoded_iterator.hpp @@ -39,12 +39,12 @@ namespace sparrow array_traits::const_reference > { - private: + private: using array_ptr_type = std::conditional_t; - public: + public: run_encoded_array_iterator() = default; run_encoded_array_iterator(array_ptr_type array_ptr, std::uint64_t index, std::uint64_t run_end_index); - private: + private: bool equal(const run_encoded_array_iterator& rhs) const; void increment(); array_traits::const_reference dereference() const; diff --git a/include/sparrow/layout/union_layout/union_array.hpp b/include/sparrow/layout/union_layout/union_array.hpp new file mode 100644 index 00000000..969b149c --- /dev/null +++ b/include/sparrow/layout/union_layout/union_array.hpp @@ -0,0 +1,192 @@ +// Copyright 2024 Man Group Operations Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "sparrow/config/config.hpp" +#include "sparrow/layout/array_wrapper.hpp" +#include "sparrow/array_factory.hpp" +#include "sparrow/layout/layout_utils.hpp" +#include "sparrow/layout/nested_value_types.hpp" +#include "sparrow/utils/iterator.hpp" +#include "sparrow/utils/memory.hpp" +#include "sparrow/utils/nullable.hpp" +#include "sparrow/layout/dispatch_lib.hpp" +#include "sparrow/utils/crtp_base.hpp" +#include "sparrow/utils/functor_index_iterator.hpp" + + + + +namespace sparrow +{ + + + // helper crtp-base to have sparse and dense and dense union in the same file + template + class union_array_crtp_base : public crtp_base + { + public: + using derived_type = DERIVED; + using value_type = array_traits::const_reference; + using iterator = functor_index_iterator>; + using const_iterator = functor_index_iterator>; + + explicit union_array_crtp_base(arrow_proxy proxy); + value_type operator[](std::size_t i) const; + value_type operator[](std::size_t i); + + std::size_t size() const; + + iterator begin(); + iterator end(); + const_iterator begin() const; + const_iterator end() const; + const_iterator cbegin() const; + const_iterator cend() const; + + protected: + using type_id_map = std::array; + static type_id_map parse_type_id_map(std::string_view format_string) + { + type_id_map ret; + // remove +du: / +su: prefix + format_string.remove_prefix(4); + + // const std::string_view const_format_string = format_string; + + constexpr std::string_view delim { "," }; + std::size_t child_index = 0; + std::ranges::for_each(format_string | std::views::split(delim), [&](const auto& s) { + // convert s to uint8_t number + const auto as_int = std::atoi(std::string(s.begin(), s.end()).c_str()); + ret[static_cast(as_int)] = static_cast(child_index); + ++child_index; + }); + return ret; + } + + arrow_proxy m_proxy; + const std::uint8_t * p_type_ids; + std::vector> m_children; + + // map from type-id to child-index + std::array m_type_id_map; + + }; + + class dense_union_array : public union_array_crtp_base + { + public: + explicit dense_union_array(arrow_proxy proxy); + private: + std::size_t element_offset(std::size_t i) const; + const std::int32_t * p_offsets; + friend class union_array_crtp_base; + }; + + class sparse_union_array : public union_array_crtp_base + { + public: + using union_array_crtp_base::union_array_crtp_base; + private: + std::size_t element_offset(std::size_t i) const; + friend class union_array_crtp_base; + }; + + template + union_array_crtp_base::union_array_crtp_base(arrow_proxy proxy) + : m_proxy(std::move(proxy)), + p_type_ids(reinterpret_cast(m_proxy.buffers()[0].data())), + m_children(m_proxy.children().size(), nullptr), + m_type_id_map(parse_type_id_map(m_proxy.format())) + { + for (std::size_t i = 0; i < m_children.size(); ++i) + { + m_children[i] = array_factory(m_proxy.children()[i].view()); + } + } + + template + auto union_array_crtp_base::operator[](std::size_t i) const -> value_type + { + const auto type_id = static_cast(p_type_ids[i]); + const auto child_index = m_type_id_map[type_id]; + const auto offset = this->derived_cast().element_offset(i); + return array_element(*m_children[child_index], static_cast(offset)); + } + + template + auto union_array_crtp_base::operator[](std::size_t i) -> value_type + { + return static_cast(*this)[i]; + } + + template + std::size_t union_array_crtp_base::size() const + { + return m_proxy.length(); + } + + template + auto union_array_crtp_base::begin() -> iterator + { + return iterator(detail::layout_bracket_functor{this}, 0); + } + + template + auto union_array_crtp_base::end() -> iterator + { + return iterator(detail::layout_bracket_functor{this}, this->size()); + } + + template + auto union_array_crtp_base::begin() const -> const_iterator + { + return cbegin(); + } + + template + auto union_array_crtp_base::end() const -> const_iterator + { + return cend(); + } + + template + auto union_array_crtp_base::cbegin() const -> const_iterator + { + return const_iterator(detail::layout_bracket_functor{this}, 0); + } + + template + auto union_array_crtp_base::cend() const -> const_iterator + { + return const_iterator(detail::layout_bracket_functor{this}, this->size()); + } + + inline dense_union_array::dense_union_array(arrow_proxy proxy) + : union_array_crtp_base(std::move(proxy)), + p_offsets(reinterpret_cast(m_proxy.buffers()[1].data())) + { + } + inline std::size_t dense_union_array::element_offset(std::size_t i) const + { + return static_cast(p_offsets[i]) + static_cast(m_proxy.offset()); + } + + inline std::size_t sparse_union_array::element_offset(std::size_t i) const + { + return i + static_cast(m_proxy.offset()); + } +} \ No newline at end of file diff --git a/include/sparrow/utils/crtp_base.hpp b/include/sparrow/utils/crtp_base.hpp new file mode 100644 index 00000000..f47de12c --- /dev/null +++ b/include/sparrow/utils/crtp_base.hpp @@ -0,0 +1,41 @@ +// Copyright 2024 Man Group Operations Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or mplied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + + +namespace sparrow +{ + template + class crtp_base + { + protected: + using derived_type = DERIVED; + + derived_type& derived_cast(); + const derived_type& derived_cast() const; + }; + + template + auto crtp_base::derived_cast() -> derived_type& + { + return static_cast(*this); + } + + template + auto crtp_base::derived_cast() const -> const derived_type& + { + return static_cast(*this); + } +} \ No newline at end of file diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index bfbf2163..11ef7cfb 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -70,6 +70,7 @@ set(SPARROW_TESTS_SOURCES test_utils.hpp test_variable_size_binary_array.cpp test_run_end_encoded_array.cpp + test_union_array.cpp ) set(test_target "test_sparrow_lib") add_executable(${test_target} ${SPARROW_TESTS_SOURCES}) diff --git a/test/external_array_data_creation.cpp b/test/external_array_data_creation.cpp index 4bca60a6..83835da2 100644 --- a/test/external_array_data_creation.cpp +++ b/test/external_array_data_creation.cpp @@ -300,5 +300,51 @@ namespace sparrow::test } + void fill_schema_and_array_for_sparse_union( + ArrowSchema& schema, + ArrowArray& arr, + std::vector & children_schemas, + std::vector & children_arrays, + const std::vector & type_ids, + const std::string & format + ){ + schema.format = format.c_str(); + schema.name = "test"; + schema.metadata = "test metadata"; + + schema.n_children = static_cast(children_schemas.size()); + schema.children = new ArrowSchema*[children_schemas.size()]; + for (std::size_t i = 0; i < children_schemas.size(); ++i) + { + schema.children[i] = &children_schemas[i]; + } + + schema.dictionary = nullptr; + schema.release = &release_arrow_schema; + + arr.length = static_cast(type_ids.size()); + + arr.null_count = 0; + arr.offset = 0; + + arr.n_buffers = 1; + std::uint8_t** buf = new std::uint8_t*[1]; + buf[0] = new std::uint8_t[type_ids.size()]; + std::copy(type_ids.begin(), type_ids.end(), buf[0]); + + arr.n_children = static_cast(children_arrays.size()); + + arr.buffers = const_cast(reinterpret_cast(buf)); + + arr.children = new ArrowArray*[children_arrays.size()]; + for (std::size_t i = 0; i < children_arrays.size(); ++i) + { + arr.children[i] = &children_arrays[i]; + } + + arr.dictionary = nullptr; + arr.release = &release_arrow_array; + } + } diff --git a/test/external_array_data_creation.hpp b/test/external_array_data_creation.hpp index cc5b7cc6..f53a1e3b 100644 --- a/test/external_array_data_creation.hpp +++ b/test/external_array_data_creation.hpp @@ -288,5 +288,13 @@ namespace sparrow::test std::size_t length ); + void fill_schema_and_array_for_sparse_union( + ArrowSchema& schema, + ArrowArray& arr, + std::vector & children_schemas, + std::vector & children_arrays, + const std::vector & type_ids, + const std::string & format + ); } diff --git a/test/test_union_array.cpp b/test/test_union_array.cpp new file mode 100644 index 00000000..2d10fa2e --- /dev/null +++ b/test/test_union_array.cpp @@ -0,0 +1,76 @@ +// Copyright 2024 Man Group Operations Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "sparrow/layout/primitive_array.hpp" +#include "sparrow/utils/nullable.hpp" +#include "sparrow/layout/dispatch.hpp" +#include "doctest/doctest.h" + +#include "test_utils.hpp" +#include "../test/external_array_data_creation.hpp" + +#include "sparrow/layout/union_layout/union_array.hpp" + +namespace sparrow +{ + + TEST_SUITE("union") + { + + TEST_CASE("sparse_union") + { + + const std::string format_string = "+us:3,4"; + const std::size_t n = 4; + + std::vector children_arrays(2); + std::vector children_schemas(2); + + test::fill_schema_and_array(children_schemas[0], children_arrays[0], n, 0/*offset*/, {}); + children_schemas[0].name = "item 0"; + + test::fill_schema_and_array(children_schemas[1], children_arrays[1], n, 0/*offset*/, {}); + children_schemas[1].name = "item 1"; + + + ArrowArray arr{}; + ArrowSchema schema{}; + + std::vector type_ids = {std::uint8_t(3), std::uint8_t(4), std::uint8_t(3), std::uint8_t(4)}; + + test::fill_schema_and_array_for_sparse_union( + schema, arr, children_schemas, children_arrays, type_ids, format_string + ); + + arrow_proxy proxy(&arr, &schema); + + // create a sparse union array + sparse_union_array sparse_union_arr(std::move(proxy)); + + REQUIRE(sparse_union_arr.size() == n); + + SUBCASE("operator[]") + { + for (std::size_t i = 0; i < n; ++i) + { + const auto& val = sparse_union_arr[i]; + REQUIRE(val.has_value()); + } + } + } + } + + +} + From e6d8c6f41f58a6cfd56e9aafe7badbf6d726fe31 Mon Sep 17 00:00:00 2001 From: DerThorsten Date: Wed, 16 Oct 2024 08:10:46 +0200 Subject: [PATCH 02/14] cleanup --- .gitignore | 3 + test/external_array_data_creation.cpp | 52 +++++++++ test/external_array_data_creation.hpp | 10 ++ test/test_union_array.cpp | 162 +++++++++++++++++++++++++- 4 files changed, 223 insertions(+), 4 deletions(-) diff --git a/.gitignore b/.gitignore index 0102ea9e..6b460a69 100644 --- a/.gitignore +++ b/.gitignore @@ -44,3 +44,6 @@ # Clangd cache .cache CMakeUserPresets.json + +# MacOS +.DS_Store diff --git a/test/external_array_data_creation.cpp b/test/external_array_data_creation.cpp index 83835da2..597082fb 100644 --- a/test/external_array_data_creation.cpp +++ b/test/external_array_data_creation.cpp @@ -346,5 +346,57 @@ namespace sparrow::test arr.release = &release_arrow_array; } + void fill_schema_and_array_for_dense_union( + ArrowSchema& schema, + ArrowArray& arr, + std::vector & children_schemas, + std::vector & children_arrays, + const std::vector & type_ids, + const std::vector & offsets, + const std::string & format + ){ + schema.format = format.c_str(); + schema.name = "test"; + schema.metadata = "test metadata"; + + schema.n_children = static_cast(children_schemas.size()); + schema.children = new ArrowSchema*[children_schemas.size()]; + for (std::size_t i = 0; i < children_schemas.size(); ++i) + { + schema.children[i] = &children_schemas[i]; + } + + schema.dictionary = nullptr; + schema.release = &release_arrow_schema; + + arr.length = static_cast(type_ids.size()); + + arr.null_count = 0; + arr.offset = 0; + + arr.n_buffers = 2; + std::uint8_t** buf = new std::uint8_t*[2]; + + buf[0] = new std::uint8_t[type_ids.size()]; + std::copy(type_ids.begin(), type_ids.end(), buf[0]); + + buf[1] = new std::uint8_t[offsets.size() * sizeof(std::int32_t)]; + std::copy(offsets.begin(), offsets.end(), reinterpret_cast(buf[1])); + + + arr.n_children = static_cast(children_arrays.size()); + + arr.buffers = const_cast(reinterpret_cast(buf)); + + arr.children = new ArrowArray*[children_arrays.size()]; + for (std::size_t i = 0; i < children_arrays.size(); ++i) + { + arr.children[i] = &children_arrays[i]; + } + + arr.dictionary = nullptr; + arr.release = &release_arrow_array; + } + } diff --git a/test/external_array_data_creation.hpp b/test/external_array_data_creation.hpp index f53a1e3b..6e3fa0dd 100644 --- a/test/external_array_data_creation.hpp +++ b/test/external_array_data_creation.hpp @@ -296,5 +296,15 @@ namespace sparrow::test const std::vector & type_ids, const std::string & format ); + + void fill_schema_and_array_for_dense_union( + ArrowSchema& schema, + ArrowArray& arr, + std::vector & children_schemas, + std::vector & children_arrays, + const std::vector & type_ids, + const std::vector & offsets, + const std::string & format + ); } diff --git a/test/test_union_array.cpp b/test/test_union_array.cpp index 2d10fa2e..fabd64c1 100644 --- a/test/test_union_array.cpp +++ b/test/test_union_array.cpp @@ -55,19 +55,173 @@ namespace sparrow arrow_proxy proxy(&arr, &schema); - // create a sparse union array - sparse_union_array sparse_union_arr(std::move(proxy)); + sparse_union_array uarr(std::move(proxy)); - REQUIRE(sparse_union_arr.size() == n); + REQUIRE(uarr.size() == n); SUBCASE("operator[]") { for (std::size_t i = 0; i < n; ++i) { - const auto& val = sparse_union_arr[i]; + const auto& val = uarr[i]; + REQUIRE(val.has_value()); + } + + // 0 + std::visit([](auto&& arg) { + using inner_type = std::decay_t< typename std::decay_t::value_type>; + if constexpr (std::is_same_v) + { + REQUIRE_EQ(0.0f, arg.value()); + } + else + { + CHECK(false); + } + + }, uarr[0]); + + // 1 + std::visit([](auto&& arg) { + using inner_type = std::decay_t< typename std::decay_t::value_type>; + if constexpr (std::is_same_v) + { + REQUIRE_EQ(1, arg.value()); + } + else + { + CHECK(false); + } + + }, uarr[1]); + + // 2 + std::visit([](auto&& arg) { + using inner_type = std::decay_t< typename std::decay_t::value_type>; + if constexpr (std::is_same_v) + { + REQUIRE_EQ(2.0f, arg.value()); + } + else + { + CHECK(false); + } + + }, uarr[2]); + + // 3 + std::visit([](auto&& arg) { + using inner_type = std::decay_t< typename std::decay_t::value_type>; + if constexpr (std::is_same_v) + { + REQUIRE_EQ(3, arg.value()); + } + else + { + CHECK(false); + } + + }, uarr[3]); + + } + } + TEST_CASE("dense_union") + { + + const std::string format_string = "+ud:3,4"; + const std::size_t n_c = 2; + const std::size_t n = 4; + + std::vector children_arrays(2); + std::vector children_schemas(2); + + test::fill_schema_and_array(children_schemas[0], children_arrays[0], n_c, 0/*offset*/, {}); + children_schemas[0].name = "item 0"; + + test::fill_schema_and_array(children_schemas[1], children_arrays[1], n_c, 0/*offset*/, {}); + children_schemas[1].name = "item 1"; + + + ArrowArray arr{}; + ArrowSchema schema{}; + + std::vector type_ids = {std::uint8_t(3), std::uint8_t(4), std::uint8_t(3), std::uint8_t(4)}; + std::vector offsets = {0,0,1,1}; + + test::fill_schema_and_array_for_dense_union( + schema, arr, children_schemas, children_arrays, type_ids, offsets, format_string + ); + + arrow_proxy proxy(&arr, &schema); + + dense_union_array uarr(std::move(proxy)); + + REQUIRE(uarr.size() == n); + + SUBCASE("operator[]") + { + for (std::size_t i = 0; i < n; ++i) + { + const auto& val = uarr[i]; REQUIRE(val.has_value()); } } + + // 0 + std::visit([](auto&& arg) { + using inner_type = std::decay_t< typename std::decay_t::value_type>; + if constexpr (std::is_same_v) + { + REQUIRE_EQ(0.0f, arg.value()); + } + else + { + CHECK(false); + } + + }, uarr[0]); + + // 1 + std::visit([](auto&& arg) { + using inner_type = std::decay_t< typename std::decay_t::value_type>; + if constexpr (std::is_same_v) + { + REQUIRE_EQ(0, arg.value()); + } + else + { + CHECK(false); + } + + }, uarr[1]); + + // 2 + std::visit([](auto&& arg) { + using inner_type = std::decay_t< typename std::decay_t::value_type>; + if constexpr (std::is_same_v) + { + REQUIRE_EQ(1.0f, arg.value()); + } + else + { + CHECK(false); + } + + }, uarr[2]); + + // 3 + std::visit([](auto&& arg) { + using inner_type = std::decay_t< typename std::decay_t::value_type>; + if constexpr (std::is_same_v) + { + REQUIRE_EQ(1, arg.value()); + } + else + { + CHECK(false); + } + + }, uarr[3]); } } From 80f856dce92d4b2c87df2e36dcfbb034e39d466c Mon Sep 17 00:00:00 2001 From: DerThorsten Date: Wed, 16 Oct 2024 08:18:28 +0200 Subject: [PATCH 03/14] renamed --- include/sparrow/layout/union_layout/union_array.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/sparrow/layout/union_layout/union_array.hpp b/include/sparrow/layout/union_layout/union_array.hpp index 969b149c..72ef830b 100644 --- a/include/sparrow/layout/union_layout/union_array.hpp +++ b/include/sparrow/layout/union_layout/union_array.hpp @@ -22,7 +22,7 @@ #include "sparrow/utils/iterator.hpp" #include "sparrow/utils/memory.hpp" #include "sparrow/utils/nullable.hpp" -#include "sparrow/layout/dispatch_lib.hpp" +#include "sparrow/layout/array_helper.hpp" #include "sparrow/utils/crtp_base.hpp" #include "sparrow/utils/functor_index_iterator.hpp" From 33c01fb3fa7d76dd205cdfe2b6d1026b10ce316b Mon Sep 17 00:00:00 2001 From: DerThorsten Date: Wed, 16 Oct 2024 08:23:05 +0200 Subject: [PATCH 04/14] renamed --- include/sparrow/layout/union_layout/union_array.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/sparrow/layout/union_layout/union_array.hpp b/include/sparrow/layout/union_layout/union_array.hpp index 72ef830b..191722b6 100644 --- a/include/sparrow/layout/union_layout/union_array.hpp +++ b/include/sparrow/layout/union_layout/union_array.hpp @@ -182,7 +182,7 @@ namespace sparrow } inline std::size_t dense_union_array::element_offset(std::size_t i) const { - return static_cast(p_offsets[i]) + static_cast(m_proxy.offset()); + return static_cast(p_offsets[i]) + m_proxy.offset(); } inline std::size_t sparse_union_array::element_offset(std::size_t i) const From 9c2b27868497a7c772db78175561714e2ca0c21f Mon Sep 17 00:00:00 2001 From: DerThorsten Date: Wed, 16 Oct 2024 08:27:33 +0200 Subject: [PATCH 05/14] renamed --- include/sparrow/layout/union_layout/union_array.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/sparrow/layout/union_layout/union_array.hpp b/include/sparrow/layout/union_layout/union_array.hpp index 191722b6..6ecbbf7e 100644 --- a/include/sparrow/layout/union_layout/union_array.hpp +++ b/include/sparrow/layout/union_layout/union_array.hpp @@ -187,6 +187,6 @@ namespace sparrow inline std::size_t sparse_union_array::element_offset(std::size_t i) const { - return i + static_cast(m_proxy.offset()); + return i + m_proxy.offset(); } } \ No newline at end of file From d889e8fa5e6b947e26d5a1451e3bf34ff5abfe23 Mon Sep 17 00:00:00 2001 From: DerThorsten Date: Wed, 16 Oct 2024 08:37:10 +0200 Subject: [PATCH 06/14] align --- include/sparrow/layout/union_layout/union_array.hpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/include/sparrow/layout/union_layout/union_array.hpp b/include/sparrow/layout/union_layout/union_array.hpp index 6ecbbf7e..8c907dc2 100644 --- a/include/sparrow/layout/union_layout/union_array.hpp +++ b/include/sparrow/layout/union_layout/union_array.hpp @@ -175,11 +175,20 @@ namespace sparrow return const_iterator(detail::layout_bracket_functor{this}, this->size()); } + #ifdef __GNUC__ + # pragma GCC diagnostic push + # pragma GCC diagnostic ignored "-Wcast-align" + #endif inline dense_union_array::dense_union_array(arrow_proxy proxy) : union_array_crtp_base(std::move(proxy)), p_offsets(reinterpret_cast(m_proxy.buffers()[1].data())) { } + + #ifdef __GNUC__ + # pragma GCC diagnostic pop + #endif + inline std::size_t dense_union_array::element_offset(std::size_t i) const { return static_cast(p_offsets[i]) + m_proxy.offset(); From 217293006f7cd926794e149a57f19b7c8451617c Mon Sep 17 00:00:00 2001 From: DerThorsten Date: Wed, 16 Oct 2024 08:51:01 +0200 Subject: [PATCH 07/14] align --- test/external_array_data_creation.cpp | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/test/external_array_data_creation.cpp b/test/external_array_data_creation.cpp index 597082fb..3baad5a0 100644 --- a/test/external_array_data_creation.cpp +++ b/test/external_array_data_creation.cpp @@ -14,6 +14,12 @@ #include "external_array_data_creation.hpp" +#ifdef __GNUC__ +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wcast-align" +#endif + + namespace sparrow::test { namespace detail @@ -376,7 +382,7 @@ namespace sparrow::test arr.n_buffers = 2; std::uint8_t** buf = new std::uint8_t*[2]; - + buf[0] = new std::uint8_t[type_ids.size()]; std::copy(type_ids.begin(), type_ids.end(), buf[0]); @@ -399,4 +405,6 @@ namespace sparrow::test } } - +#ifdef __GNUC__ +# pragma GCC diagnostic pop +#endif From 5a09ad8079d2035707d0db34d381c8786016963c Mon Sep 17 00:00:00 2001 From: DerThorsten Date: Wed, 16 Oct 2024 09:09:19 +0200 Subject: [PATCH 08/14] cleanup --- include/sparrow/layout/layout_utils.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/sparrow/layout/layout_utils.hpp b/include/sparrow/layout/layout_utils.hpp index 871bf006..00ac6471 100644 --- a/include/sparrow/layout/layout_utils.hpp +++ b/include/sparrow/layout/layout_utils.hpp @@ -18,7 +18,7 @@ namespace sparrow::detail { - + template class layout_functor_base { @@ -67,7 +67,7 @@ namespace sparrow::detail template class layout_bracket_functor : public layout_functor_base { - public: + public: using base_type = layout_functor_base; using base_type::base_type; using base_type::operator=; From f17866e00669c4d16741b929965b6baf90497fa0 Mon Sep 17 00:00:00 2001 From: DerThorsten Date: Wed, 16 Oct 2024 09:13:09 +0200 Subject: [PATCH 09/14] cleanup --- .../layout/union_layout/union_array.hpp | 43 ++++++++----------- 1 file changed, 19 insertions(+), 24 deletions(-) diff --git a/include/sparrow/layout/union_layout/union_array.hpp b/include/sparrow/layout/union_layout/union_array.hpp index 8c907dc2..3f1e7545 100644 --- a/include/sparrow/layout/union_layout/union_array.hpp +++ b/include/sparrow/layout/union_layout/union_array.hpp @@ -19,21 +19,16 @@ #include "sparrow/array_factory.hpp" #include "sparrow/layout/layout_utils.hpp" #include "sparrow/layout/nested_value_types.hpp" -#include "sparrow/utils/iterator.hpp" #include "sparrow/utils/memory.hpp" -#include "sparrow/utils/nullable.hpp" #include "sparrow/layout/array_helper.hpp" #include "sparrow/utils/crtp_base.hpp" #include "sparrow/utils/functor_index_iterator.hpp" - - namespace sparrow { - - // helper crtp-base to have sparse and dense and dense union in the same file + // helper crtp-base to have sparse and dense and dense union share most of their code template class union_array_crtp_base : public crtp_base { @@ -58,24 +53,7 @@ namespace sparrow protected: using type_id_map = std::array; - static type_id_map parse_type_id_map(std::string_view format_string) - { - type_id_map ret; - // remove +du: / +su: prefix - format_string.remove_prefix(4); - - // const std::string_view const_format_string = format_string; - - constexpr std::string_view delim { "," }; - std::size_t child_index = 0; - std::ranges::for_each(format_string | std::views::split(delim), [&](const auto& s) { - // convert s to uint8_t number - const auto as_int = std::atoi(std::string(s.begin(), s.end()).c_str()); - ret[static_cast(as_int)] = static_cast(child_index); - ++child_index; - }); - return ret; - } + static type_id_map parse_type_id_map(std::string_view format_string); arrow_proxy m_proxy; const std::uint8_t * p_type_ids; @@ -105,6 +83,23 @@ namespace sparrow friend class union_array_crtp_base; }; + template + auto union_array_crtp_base::parse_type_id_map(std::string_view format_string) -> type_id_map + { + type_id_map ret; + // remove +du: / +su: prefix + format_string.remove_prefix(4); + + constexpr std::string_view delim { "," }; + std::size_t child_index = 0; + std::ranges::for_each(format_string | std::views::split(delim), [&](const auto& s) { + const auto as_int = std::atoi(std::string(s.begin(), s.end()).c_str()); + ret[static_cast(as_int)] = static_cast(child_index); + ++child_index; + }); + return ret; + } + template union_array_crtp_base::union_array_crtp_base(arrow_proxy proxy) : m_proxy(std::move(proxy)), From d833b2d2d3ad20f4a45cc109f89a21d54339dc0b Mon Sep 17 00:00:00 2001 From: DerThorsten Date: Wed, 16 Oct 2024 09:21:42 +0200 Subject: [PATCH 10/14] integrated in factories and dispatching --- include/sparrow/layout/dispatch.hpp | 5 + .../layout/union_layout/union_array.hpp | 196 ------------------ src/array_factory.cpp | 8 +- test/test_union_array.cpp | 2 +- 4 files changed, 12 insertions(+), 199 deletions(-) delete mode 100644 include/sparrow/layout/union_layout/union_array.hpp diff --git a/include/sparrow/layout/dispatch.hpp b/include/sparrow/layout/dispatch.hpp index ccd4d8ac..b6f083b2 100644 --- a/include/sparrow/layout/dispatch.hpp +++ b/include/sparrow/layout/dispatch.hpp @@ -25,6 +25,7 @@ #include "sparrow/layout/run_end_encoded_layout/run_end_encoded_array.hpp" #include "sparrow/layout/list_layout/list_array.hpp" #include "sparrow/layout/struct_layout/struct_array.hpp" +#include "sparrow/layout/union_array.hpp" #include "sparrow/types/data_traits.hpp" namespace sparrow @@ -105,6 +106,10 @@ namespace sparrow return func(unwrap_array(ar)); case data_type::STRUCT: return func(unwrap_array(ar)); + case data_type::DENSE_UNION: + return func(unwrap_array(ar)); + case data_type::SPARSE_UNION: + return func(unwrap_array(ar)); default: throw std::invalid_argument("array type not supported"); } diff --git a/include/sparrow/layout/union_layout/union_array.hpp b/include/sparrow/layout/union_layout/union_array.hpp deleted file mode 100644 index 3f1e7545..00000000 --- a/include/sparrow/layout/union_layout/union_array.hpp +++ /dev/null @@ -1,196 +0,0 @@ -// Copyright 2024 Man Group Operations Limited -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include "sparrow/config/config.hpp" -#include "sparrow/layout/array_wrapper.hpp" -#include "sparrow/array_factory.hpp" -#include "sparrow/layout/layout_utils.hpp" -#include "sparrow/layout/nested_value_types.hpp" -#include "sparrow/utils/memory.hpp" -#include "sparrow/layout/array_helper.hpp" -#include "sparrow/utils/crtp_base.hpp" -#include "sparrow/utils/functor_index_iterator.hpp" - - -namespace sparrow -{ - - // helper crtp-base to have sparse and dense and dense union share most of their code - template - class union_array_crtp_base : public crtp_base - { - public: - using derived_type = DERIVED; - using value_type = array_traits::const_reference; - using iterator = functor_index_iterator>; - using const_iterator = functor_index_iterator>; - - explicit union_array_crtp_base(arrow_proxy proxy); - value_type operator[](std::size_t i) const; - value_type operator[](std::size_t i); - - std::size_t size() const; - - iterator begin(); - iterator end(); - const_iterator begin() const; - const_iterator end() const; - const_iterator cbegin() const; - const_iterator cend() const; - - protected: - using type_id_map = std::array; - static type_id_map parse_type_id_map(std::string_view format_string); - - arrow_proxy m_proxy; - const std::uint8_t * p_type_ids; - std::vector> m_children; - - // map from type-id to child-index - std::array m_type_id_map; - - }; - - class dense_union_array : public union_array_crtp_base - { - public: - explicit dense_union_array(arrow_proxy proxy); - private: - std::size_t element_offset(std::size_t i) const; - const std::int32_t * p_offsets; - friend class union_array_crtp_base; - }; - - class sparse_union_array : public union_array_crtp_base - { - public: - using union_array_crtp_base::union_array_crtp_base; - private: - std::size_t element_offset(std::size_t i) const; - friend class union_array_crtp_base; - }; - - template - auto union_array_crtp_base::parse_type_id_map(std::string_view format_string) -> type_id_map - { - type_id_map ret; - // remove +du: / +su: prefix - format_string.remove_prefix(4); - - constexpr std::string_view delim { "," }; - std::size_t child_index = 0; - std::ranges::for_each(format_string | std::views::split(delim), [&](const auto& s) { - const auto as_int = std::atoi(std::string(s.begin(), s.end()).c_str()); - ret[static_cast(as_int)] = static_cast(child_index); - ++child_index; - }); - return ret; - } - - template - union_array_crtp_base::union_array_crtp_base(arrow_proxy proxy) - : m_proxy(std::move(proxy)), - p_type_ids(reinterpret_cast(m_proxy.buffers()[0].data())), - m_children(m_proxy.children().size(), nullptr), - m_type_id_map(parse_type_id_map(m_proxy.format())) - { - for (std::size_t i = 0; i < m_children.size(); ++i) - { - m_children[i] = array_factory(m_proxy.children()[i].view()); - } - } - - template - auto union_array_crtp_base::operator[](std::size_t i) const -> value_type - { - const auto type_id = static_cast(p_type_ids[i]); - const auto child_index = m_type_id_map[type_id]; - const auto offset = this->derived_cast().element_offset(i); - return array_element(*m_children[child_index], static_cast(offset)); - } - - template - auto union_array_crtp_base::operator[](std::size_t i) -> value_type - { - return static_cast(*this)[i]; - } - - template - std::size_t union_array_crtp_base::size() const - { - return m_proxy.length(); - } - - template - auto union_array_crtp_base::begin() -> iterator - { - return iterator(detail::layout_bracket_functor{this}, 0); - } - - template - auto union_array_crtp_base::end() -> iterator - { - return iterator(detail::layout_bracket_functor{this}, this->size()); - } - - template - auto union_array_crtp_base::begin() const -> const_iterator - { - return cbegin(); - } - - template - auto union_array_crtp_base::end() const -> const_iterator - { - return cend(); - } - - template - auto union_array_crtp_base::cbegin() const -> const_iterator - { - return const_iterator(detail::layout_bracket_functor{this}, 0); - } - - template - auto union_array_crtp_base::cend() const -> const_iterator - { - return const_iterator(detail::layout_bracket_functor{this}, this->size()); - } - - #ifdef __GNUC__ - # pragma GCC diagnostic push - # pragma GCC diagnostic ignored "-Wcast-align" - #endif - inline dense_union_array::dense_union_array(arrow_proxy proxy) - : union_array_crtp_base(std::move(proxy)), - p_offsets(reinterpret_cast(m_proxy.buffers()[1].data())) - { - } - - #ifdef __GNUC__ - # pragma GCC diagnostic pop - #endif - - inline std::size_t dense_union_array::element_offset(std::size_t i) const - { - return static_cast(p_offsets[i]) + m_proxy.offset(); - } - - inline std::size_t sparse_union_array::element_offset(std::size_t i) const - { - return i + m_proxy.offset(); - } -} \ No newline at end of file diff --git a/src/array_factory.cpp b/src/array_factory.cpp index 80364152..d1c3f6c5 100644 --- a/src/array_factory.cpp +++ b/src/array_factory.cpp @@ -22,6 +22,8 @@ #include "sparrow/layout/null_array.hpp" #include "sparrow/layout/variable_size_binary_array.hpp" #include "sparrow/layout/run_end_encoded_layout/run_end_encoded_array.hpp" +#include "sparrow/layout/union_array.hpp" + namespace sparrow { namespace detail @@ -105,11 +107,13 @@ namespace sparrow return detail::make_wrapper_ptr>(std::move(proxy)); case data_type::RUN_ENCODED: return detail::make_wrapper_ptr(std::move(proxy)); + case data_type::DENSE_UNION: + return detail::make_wrapper_ptr(std::move(proxy)); + case data_type::SPARSE_UNION: + return detail::make_wrapper_ptr(std::move(proxy)); case data_type::FIXED_SIZE_BINARY: case data_type::TIMESTAMP: case data_type::MAP: - case data_type::DENSE_UNION: - case data_type::SPARSE_UNION: case data_type::DECIMAL: case data_type::FIXED_WIDTH_BINARY: throw std::runtime_error("not yet supported data type"); diff --git a/test/test_union_array.cpp b/test/test_union_array.cpp index fabd64c1..ed62d15a 100644 --- a/test/test_union_array.cpp +++ b/test/test_union_array.cpp @@ -20,7 +20,7 @@ #include "test_utils.hpp" #include "../test/external_array_data_creation.hpp" -#include "sparrow/layout/union_layout/union_array.hpp" +#include "sparrow/layout/union_array.hpp" namespace sparrow { From 97b13e9c61694bb72a30c501000295df05d773df Mon Sep 17 00:00:00 2001 From: DerThorsten Date: Wed, 16 Oct 2024 09:26:15 +0200 Subject: [PATCH 11/14] missing file --- include/sparrow/layout/union_array.hpp | 224 +++++++++++++++++++++++++ 1 file changed, 224 insertions(+) create mode 100644 include/sparrow/layout/union_array.hpp diff --git a/include/sparrow/layout/union_array.hpp b/include/sparrow/layout/union_array.hpp new file mode 100644 index 00000000..ef6f6f0a --- /dev/null +++ b/include/sparrow/layout/union_array.hpp @@ -0,0 +1,224 @@ +// Copyright 2024 Man Group Operations Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "sparrow/config/config.hpp" +#include "sparrow/layout/array_wrapper.hpp" +#include "sparrow/array_factory.hpp" +#include "sparrow/layout/layout_utils.hpp" +#include "sparrow/layout/nested_value_types.hpp" +#include "sparrow/utils/memory.hpp" +#include "sparrow/layout/array_helper.hpp" +#include "sparrow/utils/crtp_base.hpp" +#include "sparrow/utils/functor_index_iterator.hpp" + + +namespace sparrow +{ + + class dense_union_array; + class sparse_union_array; + + + namespace detail + { + template + struct get_data_type_from_array; + + template<> + struct get_data_type_from_array + { + constexpr static sparrow::data_type get() + { + return sparrow::data_type::DENSE_UNION; + } + }; + template<> + struct get_data_type_from_array + { + constexpr static sparrow::data_type get() + { + return sparrow::data_type::SPARSE_UNION; + } + }; + } + + // helper crtp-base to have sparse and dense and dense union share most of their code + template + class union_array_crtp_base : public crtp_base + { + public: + using derived_type = DERIVED; + using inner_value_type = array_traits::inner_value_type; + using value_type = array_traits::const_reference; + using iterator = functor_index_iterator>; + using const_iterator = functor_index_iterator>; + + explicit union_array_crtp_base(arrow_proxy proxy); + value_type operator[](std::size_t i) const; + value_type operator[](std::size_t i); + + std::size_t size() const; + + iterator begin(); + iterator end(); + const_iterator begin() const; + const_iterator end() const; + const_iterator cbegin() const; + const_iterator cend() const; + + protected: + using type_id_map = std::array; + static type_id_map parse_type_id_map(std::string_view format_string); + + arrow_proxy m_proxy; + const std::uint8_t * p_type_ids; + std::vector> m_children; + + // map from type-id to child-index + std::array m_type_id_map; + + }; + + class dense_union_array : public union_array_crtp_base + { + public: + explicit dense_union_array(arrow_proxy proxy); + private: + std::size_t element_offset(std::size_t i) const; + const std::int32_t * p_offsets; + friend class union_array_crtp_base; + }; + + class sparse_union_array : public union_array_crtp_base + { + public: + using union_array_crtp_base::union_array_crtp_base; + private: + std::size_t element_offset(std::size_t i) const; + friend class union_array_crtp_base; + }; + + template + auto union_array_crtp_base::parse_type_id_map(std::string_view format_string) -> type_id_map + { + type_id_map ret; + // remove +du: / +su: prefix + format_string.remove_prefix(4); + + constexpr std::string_view delim { "," }; + std::size_t child_index = 0; + std::ranges::for_each(format_string | std::views::split(delim), [&](const auto& s) { + const auto as_int = std::atoi(std::string(s.begin(), s.end()).c_str()); + ret[static_cast(as_int)] = static_cast(child_index); + ++child_index; + }); + return ret; + } + + template + union_array_crtp_base::union_array_crtp_base(arrow_proxy proxy) + : m_proxy(std::move(proxy)), + p_type_ids(reinterpret_cast(m_proxy.buffers()[0].data())), + m_children(m_proxy.children().size(), nullptr), + m_type_id_map(parse_type_id_map(m_proxy.format())) + { + for (std::size_t i = 0; i < m_children.size(); ++i) + { + m_children[i] = array_factory(m_proxy.children()[i].view()); + } + } + + template + auto union_array_crtp_base::operator[](std::size_t i) const -> value_type + { + const auto type_id = static_cast(p_type_ids[i]); + const auto child_index = m_type_id_map[type_id]; + const auto offset = this->derived_cast().element_offset(i); + return array_element(*m_children[child_index], static_cast(offset)); + } + + template + auto union_array_crtp_base::operator[](std::size_t i) -> value_type + { + return static_cast(*this)[i]; + } + + template + std::size_t union_array_crtp_base::size() const + { + return m_proxy.length(); + } + + template + auto union_array_crtp_base::begin() -> iterator + { + return iterator(detail::layout_bracket_functor{this}, 0); + } + + template + auto union_array_crtp_base::end() -> iterator + { + return iterator(detail::layout_bracket_functor{this}, this->size()); + } + + template + auto union_array_crtp_base::begin() const -> const_iterator + { + return cbegin(); + } + + template + auto union_array_crtp_base::end() const -> const_iterator + { + return cend(); + } + + template + auto union_array_crtp_base::cbegin() const -> const_iterator + { + return const_iterator(detail::layout_bracket_functor{this}, 0); + } + + template + auto union_array_crtp_base::cend() const -> const_iterator + { + return const_iterator(detail::layout_bracket_functor{this}, this->size()); + } + + #ifdef __GNUC__ + # pragma GCC diagnostic push + # pragma GCC diagnostic ignored "-Wcast-align" + #endif + inline dense_union_array::dense_union_array(arrow_proxy proxy) + : union_array_crtp_base(std::move(proxy)), + p_offsets(reinterpret_cast(m_proxy.buffers()[1].data())) + { + } + + #ifdef __GNUC__ + # pragma GCC diagnostic pop + #endif + + inline std::size_t dense_union_array::element_offset(std::size_t i) const + { + return static_cast(p_offsets[i]) + m_proxy.offset(); + } + + inline std::size_t sparse_union_array::element_offset(std::size_t i) const + { + return i + m_proxy.offset(); + } +} \ No newline at end of file From 688b4dc984cf1f757519035b55155f52b24d34bc Mon Sep 17 00:00:00 2001 From: DerThorsten Date: Wed, 16 Oct 2024 09:27:35 +0200 Subject: [PATCH 12/14] comment --- include/sparrow/layout/union_array.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/sparrow/layout/union_array.hpp b/include/sparrow/layout/union_array.hpp index ef6f6f0a..befaaf8d 100644 --- a/include/sparrow/layout/union_array.hpp +++ b/include/sparrow/layout/union_array.hpp @@ -131,7 +131,7 @@ namespace sparrow template union_array_crtp_base::union_array_crtp_base(arrow_proxy proxy) : m_proxy(std::move(proxy)), - p_type_ids(reinterpret_cast(m_proxy.buffers()[0].data())), + p_type_ids(reinterpret_cast(m_proxy.buffers()[0/*index of type-ids*/].data())), m_children(m_proxy.children().size(), nullptr), m_type_id_map(parse_type_id_map(m_proxy.format())) { From a009852e408efe1dc2b10d619f8888e768e280a1 Mon Sep 17 00:00:00 2001 From: DerThorsten Date: Wed, 16 Oct 2024 09:28:21 +0200 Subject: [PATCH 13/14] comment --- include/sparrow/layout/union_array.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/sparrow/layout/union_array.hpp b/include/sparrow/layout/union_array.hpp index befaaf8d..84b56da0 100644 --- a/include/sparrow/layout/union_array.hpp +++ b/include/sparrow/layout/union_array.hpp @@ -204,7 +204,7 @@ namespace sparrow #endif inline dense_union_array::dense_union_array(arrow_proxy proxy) : union_array_crtp_base(std::move(proxy)), - p_offsets(reinterpret_cast(m_proxy.buffers()[1].data())) + p_offsets(reinterpret_cast(m_proxy.buffers()[1/*index of offsets*/].data())) { } From 6e10e01d54b40cb4fe55aa11b84ba34c78e9c0f9 Mon Sep 17 00:00:00 2001 From: DerThorsten Date: Wed, 16 Oct 2024 10:34:23 +0200 Subject: [PATCH 14/14] merged --- include/sparrow/layout/array_base.hpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/include/sparrow/layout/array_base.hpp b/include/sparrow/layout/array_base.hpp index 2ef7fcc2..bf84f9be 100644 --- a/include/sparrow/layout/array_base.hpp +++ b/include/sparrow/layout/array_base.hpp @@ -154,10 +154,10 @@ namespace sparrow template auto array_crtp_base::operator[](size_type i) -> reference { - SPARROW_ASSERT_TRUE(i < derived_cast().size()); + SPARROW_ASSERT_TRUE(i < this->derived_cast().size()); return reference( - inner_reference(derived_cast().value(i)), - derived_cast().has_value(i) + inner_reference(this->derived_cast().value(i)), + this->derived_cast().has_value(i) ); } @@ -219,6 +219,7 @@ namespace sparrow return const_value_range(this->derived_cast().value_cbegin(), this->derived_cast().value_cend()); } + template array_crtp_base::array_crtp_base(arrow_proxy proxy) : m_proxy(std::move(proxy)) , m_bitmap(make_bitmap())