From 0153cf3a04dfaf0cd5041cd3c3e82ffdd76e2030 Mon Sep 17 00:00:00 2001 From: Alexis Placet Date: Wed, 9 Oct 2024 09:59:02 +0200 Subject: [PATCH] Add resizing method in primitive array --- CMakeLists.txt | 10 +- include/sparrow/arrow_array_schema_proxy.hpp | 163 ++++- .../arrow_array_schema_proxy_factory.hpp | 40 + .../arrow_array/private_data.hpp | 19 +- .../arrow_array_schema_factory.hpp | 12 +- .../arrow_array_schema_info_utils.hpp | 50 +- .../arrow_schema/private_data.hpp | 12 + include/sparrow/layout/array_base.hpp | 257 ++++++- include/sparrow/layout/primitive_array.hpp | 83 ++- include/sparrow/utils/algorithm.hpp | 2 - src/arrow_array_schema_proxy.cpp | 193 ++++- src/arrow_interface/arrow_array.cpp | 2 +- test/test_arrow_array_schema_proxy.cpp | 353 ++++++++- test/test_dictionary_encoded_array.cpp | 2 +- test/test_list_array.cpp | 125 ++-- test/test_primitive_array.cpp | 686 +++++++++++++++--- 16 files changed, 1761 insertions(+), 248 deletions(-) create mode 100644 include/sparrow/arrow_array_schema_proxy_factory.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index c94693c0..1a67bbcf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -120,10 +120,15 @@ set(SPARROW_HEADERS ${SPARROW_INCLUDE_DIR}/sparrow/arrow_interface/arrow_schema/smart_pointers.hpp # buffer ${SPARROW_INCLUDE_DIR}/sparrow/buffer/allocator.hpp - ${SPARROW_INCLUDE_DIR}/sparrow/buffer/buffer.hpp ${SPARROW_INCLUDE_DIR}/sparrow/buffer/buffer_adaptor.hpp ${SPARROW_INCLUDE_DIR}/sparrow/buffer/buffer_view.hpp + ${SPARROW_INCLUDE_DIR}/sparrow/buffer/buffer.hpp ${SPARROW_INCLUDE_DIR}/sparrow/buffer/dynamic_bitset.hpp + ${SPARROW_INCLUDE_DIR}/sparrow/buffer/dynamic_bitset/dynamic_bitset_base.hpp + ${SPARROW_INCLUDE_DIR}/sparrow/buffer/dynamic_bitset/bitset_iterator.hpp + ${SPARROW_INCLUDE_DIR}/sparrow/buffer/dynamic_bitset/bitset_reference.hpp + ${SPARROW_INCLUDE_DIR}/sparrow/buffer/dynamic_bitset/dynamic_bitset_view.hpp + ${SPARROW_INCLUDE_DIR}/sparrow/buffer/dynamic_bitset/dynamic_bitset.hpp # config ${SPARROW_INCLUDE_DIR}/sparrow/config/config.hpp ${SPARROW_INCLUDE_DIR}/sparrow/config/sparrow_version.hpp @@ -178,8 +183,7 @@ set(SPARROW_SRC ${SPARROW_SOURCE_DIR}/arrow_interface/arrow_schema.cpp ${SPARROW_SOURCE_DIR}/list_value.cpp ${SPARROW_SOURCE_DIR}/run_encoded_array.cpp - ${SPARROW_SOURCE_DIR}/struct_value.cpp -) + ${SPARROW_SOURCE_DIR}/struct_value.cpp) add_library(sparrow SHARED ${SPARROW_HEADERS} ${SPARROW_SRC}) # TODO: handle static lib, so name and versionning diff --git a/include/sparrow/arrow_array_schema_proxy.hpp b/include/sparrow/arrow_array_schema_proxy.hpp index da9b2203..abd45f53 100644 --- a/include/sparrow/arrow_array_schema_proxy.hpp +++ b/include/sparrow/arrow_array_schema_proxy.hpp @@ -18,12 +18,15 @@ #include #include "sparrow/arrow_interface/arrow_array/private_data.hpp" +#include "sparrow/arrow_interface/arrow_array_schema_info_utils.hpp" #include "sparrow/arrow_interface/arrow_schema/private_data.hpp" #include "sparrow/buffer/buffer_view.hpp" +#include "sparrow/buffer/dynamic_bitset/non_owning_dynamic_bitset.hpp" #include "sparrow/c_interface.hpp" #include "sparrow/config/config.hpp" #include "sparrow/types/data_type.hpp" + namespace sparrow { /** @@ -122,7 +125,9 @@ namespace sparrow [[nodiscard]] SPARROW_API size_t length() const; /** - * Set the length of the `ArrowArray`. + * Set the length of the `ArrowArray`. This method does not resize the buffers of the `ArrowArray`. + * You have to change the length before replacing/resizing the buffers to have the right sizes when + * calling `buffers()`. * @exception `arrow_proxy_exception` If the `ArrowArray` was not created with sparrow. * @param length The length to set. */ @@ -130,12 +135,12 @@ namespace sparrow [[nodiscard]] SPARROW_API int64_t null_count() const; /** - * Set the null count of the `ArrowArray`. + * Set the null count of the `ArrowArray`. This method does not change the bitmap. * @exception `arrow_proxy_exception` If the `ArrowArray` was not created with sparrow. * @param null_count The null count to set. */ SPARROW_API void set_null_count(int64_t null_count); - [[nodiscard]] SPARROW_API size_t offset() const; + [[nodiscard]] SPARROW_API size_t offset() const; /** * Set the offset of the `ArrowArray`. @@ -146,7 +151,8 @@ namespace sparrow [[nodiscard]] SPARROW_API size_t n_buffers() const; /** - * Set the number of buffers of the `ArrowArray`. + * Set the number of buffers of the `ArrowArray`. Resize the buffers vector of the `ArrowArray` + * private data. * @exception `arrow_proxy_exception` If the `ArrowArray` was not created with sparrow. * @param n_buffers The number of buffers to set. */ @@ -156,7 +162,8 @@ namespace sparrow [[nodiscard]] SPARROW_API std::vector>& buffers(); /** - * Set the buffer at the given index. + * Set the buffer at the given index. You have to call the `set_length` method before calling this + * method to have the right sizes when calling `buffers()`. * @exception `arrow_proxy_exception` If the `ArrowArray` was not created with sparrow. * @param index The index of the buffer to set. * @param buffer The buffer to set. @@ -164,13 +171,117 @@ namespace sparrow SPARROW_API void set_buffer(size_t index, const buffer_view& buffer); /** - * Set the buffer at the given index. + * Set the buffer at the given index. You have to call the `set_length` method before calling this + * method to have the right sizes when calling `buffers()`. * @exception `arrow_proxy_exception` If the `ArrowArray` was not created with sparrow. * @param index The index of the buffer to set. * @param buffer The buffer to set. */ SPARROW_API void set_buffer(size_t index, buffer&& buffer); + /** + * Resize the bitmap buffer of the `ArrowArray`. + * @exception `arrow_proxy_exception` If the `ArrowArray` was not created with sparrow. + * @exception `arrow_proxy_exception` If the array format does not support a validity bitmap. + * @param new_size The new size of the bitmap buffer. + */ + SPARROW_API void resize_bitmap(size_t new_size); + + /** + * Insert a value in the bitmap buffer at the given index. + * @exception `arrow_proxy_exception` If the `ArrowArray` was not created with sparrow. + * @exception `arrow_proxy_exception` If the array format does not support a validity bitmap. + * @exception `std::out_of_range` If the index is greater than the length of the bitmap. + * @param index The index where to insert the value. Must be less than the length of the bitmap. + * @param value The value to insert. + * @return The index of the inserted value. + */ + SPARROW_API size_t insert_bitmap(size_t index, bool value); + + /** + * Insert several element of the same value in the bitmap buffer at the given index. + * @exception `arrow_proxy_exception` If the `ArrowArray` was not created with sparrow. + * @exception `arrow_proxy_exception` If the array format does not support a validity bitmap. + * @exception `std::out_of_range` If the index is greater than the length of the bitmap. + * @param index The index where to insert the value. Must be less than the length of the bitmap. + * @param value The value to insert. + * @param count The number of times to insert the value. + * @return The index of the first inserted value. + */ + SPARROW_API size_t insert_bitmap(size_t index, bool value, size_t count); + + /** + * Insert several elements in the bitmap buffer at the given index. + * @exception `arrow_proxy_exception` If the `ArrowArray` was not created with sparrow. + * @exception `arrow_proxy_exception` If the array format does not support a validity bitmap. + * @exception `std::out_of_range` If the index is greater than the length of the bitmap. + * @param index The index where to insert the values. Must be less than the length of the bitmap. + * @param values The values to insert. + * @return The index of the first inserted value. + */ + SPARROW_API size_t insert_bitmap(size_t index, std::initializer_list values); + + /** + * Insert several elements in the bitmap buffer at the given index. + * @exception `arrow_proxy_exception` If the `ArrowArray` was not created with sparrow. + * @exception `arrow_proxy_exception` If the array format does not support a validity bitmap. + * @exception `std::out_of_range` If the index is greater than the length of the bitmap. + * @param index The index where to insert the values. Must be less than the length of the bitmap. + * @param first The beginning of the range of values to insert. + * @param last The end of the range of values to insert. + * @return The index of the first inserted value. + */ + template + size_t insert_bitmap(size_t index, InputIt first, InputIt last); + + /** + * Insert several elements in the bitmap buffer at the given index. + * @exception `arrow_proxy_exception` If the `ArrowArray` was not created with sparrow. + * @exception `arrow_proxy_exception` If the array format does not support a validity bitmap. + * @exception `std::out_of_range` If the index is greater than the length of the bitmap. + * @param index The index where to insert the values. Must be less than the length of the bitmap. + * @param range The range of values to insert. + * @return The index of the first inserted value. + */ + template + size_t insert_bitmap(size_t index, const R& range); + + /** + * Erase a value in the bitmap buffer at the given index. + * @exception `arrow_proxy_exception` If the `ArrowArray` was not created with sparrow. + * @exception `arrow_proxy_exception` If the array format does not support a validity bitmap. + * @exception `std::out_of_range` If the index is greater than the length of the bitmap. + * @param index The index of the element to erase. Must be less than the length of the bitmap. + * @return The index of the erased value. + */ + SPARROW_API size_t erase_bitmap(size_t index); + + /** + * Erase several elements in the bitmap buffer at the given index. + * @exception `arrow_proxy_exception` If the `ArrowArray` was not created with sparrow. + * @exception `arrow_proxy_exception` If the array format does not support a validity bitmap. + * @exception `std::out_of_range` If the index is greater than the length of the bitmap. + * @param index The index of the first value to erase. Must be less than the length of the bitmap. + * @param count The number of elements to erase. + * @return The index of the first erased value. + */ + SPARROW_API size_t erase_bitmap(size_t index, size_t count); + + /** + * Push a value at the end of the bitmap buffer. + * @exception `arrow_proxy_exception` If the `ArrowArray` was not created with sparrow. + * @exception `arrow_proxy_exception` If the array format does not support a validity bitmap. + * @param value The value to push. + */ + SPARROW_API void push_back_bitmap(bool value); + + /** + * Pop a value at the end of the bitmap buffer. + * @exception `arrow_proxy_exception` If the `ArrowArray` was not created with sparrow. + * @exception `arrow_proxy_exception` If the array format does not support a validity bitmap. + */ + SPARROW_API void pop_back_bitmap(); + /** * Add children. * @exception `arrow_proxy_exception` If the `ArrowArray` or `ArrowSchema` were not created with @@ -235,6 +346,11 @@ namespace sparrow [[nodiscard]] SPARROW_API ArrowSchema& schema(); [[nodiscard]] SPARROW_API const ArrowSchema& schema() const; + [[nodiscard]] [[nodiscard]]SPARROW_API arrow_schema_private_data* get_schema_private_data(); + [[nodiscard]] SPARROW_API arrow_array_private_data* get_array_private_data(); + + SPARROW_API void update_buffers(); + private: std::variant m_array; @@ -254,7 +370,8 @@ namespace sparrow SPARROW_API void resize_children(size_t children_count); - void update_buffers(); + [[nodiscard]] SPARROW_API non_owning_dynamic_bitset get_non_owning_dynamic_bitset(); + void update_children(); void update_dictionary(); void update_null_count(); @@ -265,13 +382,12 @@ namespace sparrow void validate_array_and_schema() const; - arrow_schema_private_data* get_schema_private_data(); - arrow_array_private_data* get_array_private_data(); - [[nodiscard]] bool is_arrow_array_valid() const; [[nodiscard]] bool is_arrow_schema_valid() const; [[nodiscard]] bool is_proxy_valid() const; + [[nodiscard]] size_t get_null_count() const; + void swap(arrow_proxy& other) noexcept; }; @@ -298,4 +414,31 @@ namespace sparrow ); } } + + template + inline size_t arrow_proxy::insert_bitmap(size_t index, InputIt first, InputIt last) + { + if (!is_created_with_sparrow()) + { + throw arrow_proxy_exception("Cannot modify the bitmap on non-sparrow created ArrowArray"); + } + SPARROW_ASSERT_TRUE(has_bitmap(data_type())) + SPARROW_ASSERT_TRUE(first <= last) + SPARROW_ASSERT_TRUE(index <= length()) + + auto bitmap = get_non_owning_dynamic_bitset(); + const auto it = bitmap.insert(sparrow::next(bitmap.cbegin(), index), first, last); + return static_cast(std::distance(bitmap.begin(), it)); + } + + template + inline size_t arrow_proxy::insert_bitmap(size_t index, const R& range) + { + if (!is_created_with_sparrow()) + { + throw arrow_proxy_exception("Cannot modify the bitmap on non-sparrow created ArrowArray"); + } + SPARROW_ASSERT_TRUE(has_bitmap(data_type())) + return insert_bitmap(index, std::ranges::begin(range), std::ranges::end(range)); + } } diff --git a/include/sparrow/arrow_array_schema_proxy_factory.hpp b/include/sparrow/arrow_array_schema_proxy_factory.hpp new file mode 100644 index 00000000..30c86fdd --- /dev/null +++ b/include/sparrow/arrow_array_schema_proxy_factory.hpp @@ -0,0 +1,40 @@ +// Copyright 2024 Man Group Operations Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "sparrow/arrow_array_schema_proxy.hpp" +#include "sparrow/arrow_interface/arrow_array_schema_factory.hpp" +#include "sparrow/types/data_traits.hpp" + +namespace sparrow +{ + template + requires std::is_arithmetic_v> + && std::integral> + arrow_proxy make_primitive_arrow_proxy( + Values&& values, + Nulls&& nulls, + int64_t offset, + std::string_view name, + std::optional metadata + ) + { + using ValueType = std::ranges::range_value_t; + return arrow_proxy{ + make_primitive_arrow_array(std::forward(values), std::forward(nulls), offset), + make_primitive_arrow_schema(arrow_traits::type_id, name, metadata, std::nullopt) + }; + } +} diff --git a/include/sparrow/arrow_interface/arrow_array/private_data.hpp b/include/sparrow/arrow_interface/arrow_array/private_data.hpp index 4018288a..bd8d0244 100644 --- a/include/sparrow/arrow_interface/arrow_array/private_data.hpp +++ b/include/sparrow/arrow_interface/arrow_array/private_data.hpp @@ -41,11 +41,12 @@ namespace sparrow [[nodiscard]] constexpr BufferType& buffers() noexcept; [[nodiscard]] constexpr const BufferType& buffers() const noexcept; - + constexpr void resize_buffers(std::size_t size); void set_buffer(std::size_t index, buffer&& buffer); void set_buffer(std::size_t index, const buffer_view& buffer); constexpr void resize_buffer(std::size_t index, std::size_t size, std::uint8_t value); + constexpr void update_buffers_ptrs(); template [[nodiscard]] constexpr const T** buffers_ptrs() noexcept; @@ -62,8 +63,7 @@ namespace sparrow { } - [[nodiscard]] constexpr std::vector>& - arrow_array_private_data::buffers() noexcept + [[nodiscard]] constexpr std::vector>& arrow_array_private_data::buffers() noexcept { return m_buffers; } @@ -77,21 +77,21 @@ namespace sparrow constexpr void arrow_array_private_data::resize_buffers(std::size_t size) { m_buffers.resize(size); - m_buffers_pointers = to_raw_ptr_vec(m_buffers); + update_buffers_ptrs(); } inline void arrow_array_private_data::set_buffer(std::size_t index, buffer&& buffer) { SPARROW_ASSERT_TRUE(index < m_buffers.size()); m_buffers[index] = std::move(buffer); - m_buffers_pointers[index] = m_buffers[index].data(); + update_buffers_ptrs(); } inline void arrow_array_private_data::set_buffer(std::size_t index, const buffer_view& buffer) { SPARROW_ASSERT_TRUE(index < m_buffers.size()); m_buffers[index] = buffer; - m_buffers_pointers[index] = m_buffers[index].data(); + update_buffers_ptrs(); } constexpr void @@ -99,7 +99,7 @@ namespace sparrow { SPARROW_ASSERT_TRUE(index < m_buffers.size()); m_buffers[index].resize(size, value); - m_buffers_pointers[index] = m_buffers[index].data(); + update_buffers_ptrs(); } template @@ -107,4 +107,9 @@ namespace sparrow { return const_cast(reinterpret_cast(m_buffers_pointers.data())); } + + constexpr void arrow_array_private_data::update_buffers_ptrs() + { + m_buffers_pointers = to_raw_ptr_vec(m_buffers); + } } diff --git a/include/sparrow/arrow_interface/arrow_array_schema_factory.hpp b/include/sparrow/arrow_interface/arrow_array_schema_factory.hpp index 93492280..b4822ec8 100644 --- a/include/sparrow/arrow_interface/arrow_array_schema_factory.hpp +++ b/include/sparrow/arrow_interface/arrow_array_schema_factory.hpp @@ -55,7 +55,7 @@ namespace sparrow } template - requires(std::integral>) + requires(std::integral> && !std::same_as, bool>) buffer make_bitmap_buffer(size_t count, R&& nulls) { if (!std::ranges::empty(nulls)) @@ -98,6 +98,16 @@ namespace sparrow return make_arrow_array(length, null_count, offset, std::move(value_buffers), 0, nullptr, nullptr); } + inline ArrowSchema make_primitive_arrow_schema( + data_type data_type, + std::string_view name, + std::optional metadata, + std::optional arrow_flag + ) + { + return make_arrow_schema(data_type_to_format(data_type), name, metadata, arrow_flag, 0, nullptr, nullptr); + } + template < std::ranges::sized_range Keys, std::ranges::sized_range KeyNulls, diff --git a/include/sparrow/arrow_interface/arrow_array_schema_info_utils.hpp b/include/sparrow/arrow_interface/arrow_array_schema_info_utils.hpp index e6a247e4..25293fe1 100644 --- a/include/sparrow/arrow_interface/arrow_array_schema_info_utils.hpp +++ b/include/sparrow/arrow_interface/arrow_array_schema_info_utils.hpp @@ -26,7 +26,7 @@ namespace sparrow { /// @returns `true` if the number of buffers in an `ArrowArray` for a given data type is valid, `false` /// otherwise. - constexpr bool validate_buffers_count(data_type data_type, int64_t n_buffers) + constexpr bool validate_buffers_count(data_type data_type, int64_t n_buffers) { const std::size_t expected_buffer_count = get_expected_buffer_count(data_type); return static_cast(n_buffers) == expected_buffer_count; @@ -74,16 +74,17 @@ namespace sparrow } /// @returns `true` if the format of an `ArrowArray` for a given data type is valid, `false` otherwise. - inline bool validate_format_with_arrow_array(data_type , const ArrowArray& ) + inline bool validate_format_with_arrow_array(data_type, const ArrowArray&) { - return true; + return true; /* THE CODE BELOW MAKES WRONG ASSUMPTIONS AND NEEDS TO BE REFACTORED IN A SEPERATE PR*/ // const bool buffers_count_valid = validate_buffers_count(data_type, array.n_buffers); // // const bool children_count_valid = static_cast(array.n_children) // // == get_expected_children_count(data_type); - // //std::cout<<"child cound: "< get_buffer_types_from_data_type(data_type data_type) @@ -232,4 +232,42 @@ namespace sparrow mpl::unreachable(); } + constexpr bool has_bitmap(data_type dt) + { + switch (dt) + { + case data_type::BOOL: + case data_type::INT8: + case data_type::INT16: + case data_type::INT32: + case data_type::INT64: + case data_type::UINT8: + case data_type::UINT16: + case data_type::UINT32: + case data_type::UINT64: + case data_type::HALF_FLOAT: + case data_type::FLOAT: + case data_type::DOUBLE: + case data_type::TIMESTAMP: + case data_type::DECIMAL: + case data_type::LIST: + case data_type::STRUCT: + case data_type::MAP: + case data_type::STRING: + case data_type::BINARY: + case data_type::FIXED_SIZE_BINARY: + case data_type::FIXED_WIDTH_BINARY: + case data_type::LARGE_LIST: + case data_type::LIST_VIEW: + case data_type::LARGE_LIST_VIEW: + case data_type::FIXED_SIZED_LIST: + return true; + case data_type::NA: + case data_type::SPARSE_UNION: + case data_type::DENSE_UNION: + case data_type::RUN_ENCODED: + return false; + } + mpl::unreachable(); + } } diff --git a/include/sparrow/arrow_interface/arrow_schema/private_data.hpp b/include/sparrow/arrow_interface/arrow_schema/private_data.hpp index a6e2b6cd..22c3ca39 100644 --- a/include/sparrow/arrow_interface/arrow_schema/private_data.hpp +++ b/include/sparrow/arrow_interface/arrow_schema/private_data.hpp @@ -92,8 +92,20 @@ namespace sparrow { return std::string(t.cbegin(), t.cend()); } + else if constexpr (mpl::is_type_instance_of_v) + { + if (t.has_value()) + { + return to_optional_string(*t); + } + else + { + return std::nullopt; + } + } else { + static_assert(mpl::dependent_false::value, "to_optional_string: unsupported type."); mpl::unreachable(); } } diff --git a/include/sparrow/layout/array_base.hpp b/include/sparrow/layout/array_base.hpp index 81f117d9..fb327001 100644 --- a/include/sparrow/layout/array_base.hpp +++ b/include/sparrow/layout/array_base.hpp @@ -14,18 +14,31 @@ #pragma once +#include #include #include +#include #include "sparrow/arrow_array_schema_proxy.hpp" -#include "sparrow/buffer/dynamic_bitset.hpp" +#include "sparrow/buffer/dynamic_bitset/dynamic_bitset_view.hpp" #include "sparrow/layout/layout_iterator.hpp" -#include "sparrow/utils/nullable.hpp" -#include "sparrow/utils/iterator.hpp" #include "sparrow/utils/crtp_base.hpp" +#include "sparrow/utils/iterator.hpp" +#include "sparrow/utils/nullable.hpp" namespace sparrow { + /** + * Make a simple bitmap from an arrow proxy. + */ + [[nodiscard]] inline dynamic_bitset_view make_simple_bitmap(arrow_proxy& arrow_proxy) + { + constexpr size_t bitmap_buffer_index = 0; + SPARROW_ASSERT_TRUE(arrow_proxy.buffers().size() > bitmap_buffer_index); + const auto bitmap_size = arrow_proxy.length() + arrow_proxy.offset(); + return {arrow_proxy.buffers()[bitmap_buffer_index].data(), bitmap_size}; + } + /** * Base class for array_inner_types specialization * @@ -54,6 +67,7 @@ namespace sparrow class array_crtp_base : public crtp_base { public: + using self_type = array_crtp_base; using derived_type = D; using inner_types = array_inner_types; @@ -78,13 +92,13 @@ namespace sparrow using iterator = layout_iterator; using const_iterator = layout_iterator; - + using value_iterator = typename inner_types::value_iterator; using const_value_iterator = typename inner_types::const_value_iterator; using const_value_range = std::ranges::subrange; - size_type size() const; + [[nodiscard]] size_type size() const; reference operator[](size_type i); const_reference operator[](size_type i) const; @@ -101,6 +115,22 @@ namespace sparrow const_bitmap_range bitmap() const; const_value_range values() const; + void resize(size_type new_size, const value_type& value); + + iterator insert(const_iterator pos, const value_type& value); + iterator insert(const_iterator pos, const value_type& value, size_type count); + iterator insert(const_iterator pos, std::initializer_list values); + template + iterator insert(const_iterator pos, InputIt first, InputIt last); + template + iterator insert(const_iterator pos, const R& range); + + iterator erase(const_iterator pos); + iterator erase(const_iterator first, const_iterator last); + + void push_back(const value_type& value); + void pop_back(); + protected: array_crtp_base(arrow_proxy); @@ -123,6 +153,9 @@ namespace sparrow const_bitmap_iterator bitmap_begin() const; const_bitmap_iterator bitmap_end() const; + const_bitmap_iterator bitmap_cbegin() const; + const_bitmap_iterator bitmap_cend() const; + private: arrow_proxy& get_arrow_proxy(); @@ -150,6 +183,9 @@ namespace sparrow using base_type = array_crtp_base; using bitmap_type = typename base_type::bitmap_type; + using bitmap_iterator = typename base_type::bitmap_iterator; + using const_bitmap_iterator = typename base_type::const_bitmap_iterator; + using size_type = typename base_type::size_type; protected: @@ -164,9 +200,21 @@ namespace sparrow bitmap_type& get_bitmap(); const bitmap_type& get_bitmap() const; + void resize_bitmap(size_type new_length); + + bitmap_iterator insert_bitmap(const_bitmap_iterator pos, bool value, size_type count); + + template + requires std::same_as::value_type, bool> + bitmap_iterator insert_bitmap(const_bitmap_iterator pos, InputIt first, InputIt last); + + bitmap_iterator erase_bitmap(const_bitmap_iterator pos, size_type count); + + void update(); + private: - static constexpr std::size_t m_bitmap_buffer_index = 0; + non_owning_dynamic_bitset get_non_owning_dynamic_bitset(); bitmap_type make_bitmap(); bitmap_type m_bitmap; @@ -186,10 +234,7 @@ namespace sparrow auto array_crtp_base::operator[](size_type i) -> reference { SPARROW_ASSERT_TRUE(i < this->derived_cast().size()); - return reference( - inner_reference(this->derived_cast().value(i)), - this->derived_cast().has_value(i) - ); + return reference(inner_reference(this->derived_cast().value(i)), this->derived_cast().has_value(i)); } template @@ -229,13 +274,13 @@ namespace sparrow template auto array_crtp_base::cbegin() const -> const_iterator { - return const_iterator(this->derived_cast().value_cbegin(), this->derived_cast().bitmap_begin()); + return const_iterator(this->derived_cast().value_cbegin(), bitmap_begin()); } template auto array_crtp_base::cend() const -> const_iterator { - return const_iterator(this->derived_cast().value_cend(), this->derived_cast().bitmap_end()); + return const_iterator(this->derived_cast().value_cend(), bitmap_end()); } template @@ -255,7 +300,7 @@ namespace sparrow : m_proxy(std::move(proxy)) { } - + template auto array_crtp_base::storage() -> arrow_proxy& { @@ -306,6 +351,18 @@ namespace sparrow return sparrow::next(bitmap_begin(), size()); } + template + auto array_crtp_base::bitmap_cbegin() const -> const_bitmap_iterator + { + return bitmap_begin(); + } + + template + auto array_crtp_base::bitmap_cend() const -> const_bitmap_iterator + { + return bitmap_end(); + } + template auto array_crtp_base::get_arrow_proxy() -> arrow_proxy& { @@ -318,6 +375,126 @@ namespace sparrow return std::ranges::equal(lhs, rhs); } + template + void array_crtp_base::resize(size_type new_length, const value_type& value) + { + this->derived_cast().resize_bitmap(new_length); + this->derived_cast().resize_values(new_length, value.get()); + m_proxy.set_length(new_length); // Must be done after resizing the bitmap and values + this->derived_cast().update(); + } + + template + auto array_crtp_base::insert(const_iterator pos, const value_type& value) -> iterator + { + return insert(pos, value, 1); + } + + template + auto array_crtp_base::insert(const_iterator pos, const value_type& value, size_type count) -> iterator + { + SPARROW_ASSERT_TRUE(pos >= cbegin()); + SPARROW_ASSERT_TRUE(pos <= cend()); + const size_t distance = static_cast(std::distance(cbegin(), pos)); + this->derived_cast().insert_bitmap(sparrow::next(this->bitmap_cbegin(), distance), value.has_value(), count); + this->derived_cast() + .insert_value(sparrow::next(this->derived_cast().value_cbegin(), distance), value.get(), count); + m_proxy.set_length(size() + count); // Must be done after resizing the bitmap and values + this->derived_cast().update(); + return sparrow::next(begin(), distance); + } + + template + auto array_crtp_base::insert(const_iterator pos, std::initializer_list values) -> iterator + { + return insert(pos, values.begin(), values.end()); + } + + template + template + auto array_crtp_base::insert(const_iterator pos, InputIt first, InputIt last) -> iterator + { + SPARROW_ASSERT_TRUE(pos >= cbegin()) + SPARROW_ASSERT_TRUE(pos <= cend()); + SPARROW_ASSERT_TRUE(first <= last); + const difference_type distance = std::distance(cbegin(), pos); + const auto validity_range = std::ranges::subrange(first, last) + | std::views::transform( + [](const value_type& obj) + { + return obj.has_value(); + } + ); + this->derived_cast().insert_bitmap( + sparrow::next(bitmap_cbegin(), distance), + validity_range.begin(), + validity_range.end() + ); + + const auto value_range = std::ranges::subrange(first, last) + | std::views::transform( + [](const value_type& obj) + { + return obj.get(); + } + ); + this->derived_cast().insert_values( + sparrow::next(this->derived_cast().value_cbegin(), distance), + value_range.begin(), + value_range.end() + ); + const difference_type count = std::distance(first, last); + m_proxy.set_length(size() + static_cast(count)); // Must be done after modifying the bitmap and values + this->derived_cast().update(); + return sparrow::next(begin(), distance); + } + + template + template + auto array_crtp_base::insert(const_iterator pos, const R& range) -> iterator + { + return insert(pos, std::ranges::begin(range), std::ranges::end(range)); + } + + template + auto array_crtp_base::erase(const_iterator pos) -> iterator + { + SPARROW_ASSERT_TRUE(cbegin() <= pos) + SPARROW_ASSERT_TRUE(pos < cend()); + return erase(pos, pos + 1); + } + + template + auto array_crtp_base::erase(const_iterator first, const_iterator last) -> iterator + { + SPARROW_ASSERT_TRUE(first < last); + SPARROW_ASSERT_TRUE(cbegin() <= first) + SPARROW_ASSERT_TRUE(last <= cend()); + const difference_type first_index = std::distance(cbegin(), first); + if (first == last) + { + return sparrow::next(begin(), first_index); + } + const auto count = static_cast(std::distance(first, last)); + this->derived_cast().erase_bitmap(sparrow::next(bitmap_cbegin(), first_index), count); + this->derived_cast().erase_values(sparrow::next(this->derived_cast().value_cbegin(), first_index), count); + m_proxy.set_length(size() - count); // Must be done after modifying the bitmap and values + this->derived_cast().update(); + return sparrow::next(begin(), first_index); + } + + template + void array_crtp_base::push_back(const value_type& value) + { + insert(cend(), value); + } + + template + void array_crtp_base::pop_back() + { + erase(std::prev(cend())); + } + /************************************ * array_bitmap_base implementation * ************************************/ @@ -335,6 +512,7 @@ namespace sparrow , m_bitmap(make_bitmap()) { } + template array_bitmap_base& array_bitmap_base::operator=(const array_bitmap_base& rhs) { @@ -358,8 +536,57 @@ namespace sparrow template auto array_bitmap_base::make_bitmap() -> bitmap_type { - SPARROW_ASSERT_TRUE(this->storage().buffers().size() > m_bitmap_buffer_index); + static constexpr size_t bitmap_buffer_index = 0; + SPARROW_ASSERT_TRUE(this->storage().buffers().size() > bitmap_buffer_index); const auto bitmap_size = static_cast(this->storage().length() + this->storage().offset()); - return bitmap_type(this->storage().buffers()[m_bitmap_buffer_index].data(), bitmap_size); + return bitmap_type(this->storage().buffers()[bitmap_buffer_index].data(), bitmap_size); + } + + template + void array_bitmap_base::resize_bitmap(size_type new_length) + { + const size_t new_size = new_length + static_cast(this->storage().offset()); + this->storage().resize_bitmap(new_size); + } + + template + auto + array_bitmap_base::insert_bitmap(const_bitmap_iterator pos, bool value, size_type count) -> bitmap_iterator + { + SPARROW_ASSERT_TRUE(this->bitmap_cbegin() <= pos) + SPARROW_ASSERT_TRUE(pos <= this->bitmap_cend()) + const auto pos_index = static_cast(std::distance(this->bitmap_cbegin(), pos)); + const auto idx = this->storage().insert_bitmap(pos_index, value, count); + return sparrow::next(this->bitmap_begin(), idx); + } + + template + template + requires std::same_as::value_type, bool> + auto + array_bitmap_base::insert_bitmap(const_bitmap_iterator pos, InputIt first, InputIt last) -> bitmap_iterator + { + SPARROW_ASSERT_TRUE(this->bitmap_cbegin() <= pos) + SPARROW_ASSERT_TRUE(pos <= this->bitmap_cend()); + SPARROW_ASSERT_TRUE(first <= last); + const auto distance = static_cast(std::distance(this->bitmap_cbegin(), pos)); + const auto idx = this->storage().insert_bitmap(distance, first, last); + return sparrow::next(this->bitmap_begin(), idx); + } + + template + auto array_bitmap_base::erase_bitmap(const_bitmap_iterator pos, size_type count) -> bitmap_iterator + { + SPARROW_ASSERT_TRUE(this->bitmap_cbegin() <= pos) + SPARROW_ASSERT_TRUE(pos < this->bitmap_cend()) + const auto pos_idx = static_cast(std::distance(this->bitmap_cbegin(), pos)); + const auto idx = this->storage().erase_bitmap(pos_idx, count); + return sparrow::next(this->bitmap_begin(), idx); + } + + template + auto array_bitmap_base::update() -> void + { + m_bitmap = make_bitmap(); } } diff --git a/include/sparrow/layout/primitive_array.hpp b/include/sparrow/layout/primitive_array.hpp index 92cb7b8d..ac023aa7 100644 --- a/include/sparrow/layout/primitive_array.hpp +++ b/include/sparrow/layout/primitive_array.hpp @@ -14,13 +14,22 @@ #pragma once +#include + #include "sparrow/arrow_array_schema_proxy.hpp" +#include "sparrow/buffer/buffer_adaptor.hpp" #include "sparrow/layout/array_base.hpp" #include "sparrow/utils/iterator.hpp" #include "sparrow/utils/nullable.hpp" namespace sparrow -{ +{ + class run_end_encoded_array; + + template + concept iterator_of_type = std::input_iterator + && std::same_as::value_type, T>; + template class primitive_array; @@ -55,6 +64,8 @@ namespace sparrow using bitmap_type = typename base_type::bitmap_type; using bitmap_reference = typename base_type::bitmap_reference; using bitmap_const_reference = typename base_type::bitmap_const_reference; + using bitmap_iterator = typename base_type::bitmap_iterator; + using const_bitmap_iterator = typename base_type::const_bitmap_iterator; using value_type = nullable; using reference = nullable; using const_reference = nullable; @@ -69,6 +80,9 @@ namespace sparrow using const_value_iterator = typename base_type::const_value_iterator; using const_bitmap_range = typename base_type::const_bitmap_range; + using iterator = typename base_type::iterator; + using const_iterator = typename base_type::const_iterator; + explicit primitive_array(arrow_proxy); using base_type::size; @@ -89,6 +103,19 @@ namespace sparrow const_value_iterator value_cbegin() const; const_value_iterator value_cend() const; + // Modifiers + + buffer_adaptor&> get_data_buffer(); + + void resize_values(size_type new_length, inner_value_type value); + + value_iterator insert_value(const_value_iterator pos, inner_value_type value, size_type count); + + template InputIt> + value_iterator insert_values(const_value_iterator pos, InputIt first, InputIt last); + + value_iterator erase_values(const_value_iterator pos, size_type count); + static constexpr size_type DATA_BUFFER_INDEX = 1; friend class array_crtp_base; @@ -127,7 +154,7 @@ namespace sparrow primitive_array::primitive_array(arrow_proxy proxy) : base_type(std::move(proxy)) { - SPARROW_ASSERT_TRUE(detail::check_primitive_data_type(storage().data_type())); + SPARROW_ASSERT_TRUE(storage().data_type() == arrow_traits::type_id); } template @@ -181,4 +208,56 @@ namespace sparrow { return sparrow::next(value_cbegin(), size()); } + + template + buffer_adaptor&> primitive_array::get_data_buffer() + { + auto& buffers = storage().get_array_private_data()->buffers(); + return make_buffer_adaptor(buffers[DATA_BUFFER_INDEX]); + } + + template + void primitive_array::resize_values(size_type new_length, inner_value_type value) + { + const size_t new_size = new_length + static_cast(storage().offset()); + get_data_buffer().resize(new_size, value); + } + + template + auto primitive_array::insert_value(const_value_iterator pos, inner_value_type value, size_type count) + -> value_iterator + { + SPARROW_ASSERT_TRUE(value_cbegin() <= pos) + SPARROW_ASSERT_TRUE(pos <= value_cend()); + const auto distance = std::distance(value_cbegin(), sparrow::next(pos, storage().offset())); + get_data_buffer().insert(pos, count, value); + return sparrow::next(this->value_begin(), distance); + } + + template + template InputIt> + auto + primitive_array::insert_values(const_value_iterator pos, InputIt first, InputIt last) -> value_iterator + { + SPARROW_ASSERT_TRUE(value_cbegin() <= pos) + SPARROW_ASSERT_TRUE(pos <= value_cend()); + const auto distance = std::distance(value_cbegin(), sparrow::next(pos, storage().offset())); + get_data_buffer().insert(pos, first, last); + return sparrow::next(this->value_begin(), distance); + } + + template + auto primitive_array::erase_values(const_value_iterator pos, size_type count) -> value_iterator + { + SPARROW_ASSERT_TRUE(this->value_cbegin() <= pos) + SPARROW_ASSERT_TRUE(pos < this->value_cend()); + const size_type distance = static_cast( + std::distance(this->value_cbegin(), sparrow::next(pos, storage().offset())) + ); + auto data_buffer = get_data_buffer(); + const auto first = sparrow::next(data_buffer.cbegin(), distance); + const auto last = sparrow::next(first, count); + data_buffer.erase(first, last); + return sparrow::next(this->value_begin(), distance); + } } diff --git a/include/sparrow/utils/algorithm.hpp b/include/sparrow/utils/algorithm.hpp index 59873f1a..9f72d474 100644 --- a/include/sparrow/utils/algorithm.hpp +++ b/include/sparrow/utils/algorithm.hpp @@ -16,7 +16,6 @@ #include #include -#include #include "sparrow/config/config.hpp" @@ -123,5 +122,4 @@ namespace sparrow { return lexicographical_compare_three_way(r1, r2) == std::strong_ordering::less; } - } // namespace sparrow diff --git a/src/arrow_array_schema_proxy.cpp b/src/arrow_array_schema_proxy.cpp index 41ad516d..781b6aca 100644 --- a/src/arrow_array_schema_proxy.cpp +++ b/src/arrow_array_schema_proxy.cpp @@ -14,6 +14,8 @@ #include "sparrow/arrow_array_schema_proxy.hpp" +#include + #include "sparrow/arrow_interface/arrow_array.hpp" #include "sparrow/arrow_interface/arrow_array_schema_info_utils.hpp" #include "sparrow/arrow_interface/arrow_flag_utils.hpp" @@ -22,9 +24,10 @@ #include "sparrow/buffer/dynamic_bitset/dynamic_bitset_view.hpp" #include "sparrow/utils/contracts.hpp" - namespace sparrow { + static constexpr size_t bitmap_buffer_index = 0; + arrow_proxy arrow_proxy::view() { return arrow_proxy(&array(), &schema()); @@ -32,6 +35,12 @@ namespace sparrow void arrow_proxy::update_buffers() { + if (is_created_with_sparrow()) + { + get_array_private_data()->update_buffers_ptrs(); + array().buffers = get_array_private_data()->buffers_ptrs(); + array().n_buffers = static_cast(n_buffers()); + } m_buffers = get_arrow_array_buffers(array(), schema()); } @@ -272,8 +281,9 @@ namespace sparrow { throw arrow_proxy_exception("Cannot set name on non-sparrow created ArrowArray"); } - get_schema_private_data()->name() = name; - schema().name = get_schema_private_data()->name_ptr(); + auto private_data = get_schema_private_data(); + private_data->name() = name; + schema().name = private_data->name_ptr(); } [[nodiscard]] std::optional arrow_proxy::metadata() const @@ -291,8 +301,9 @@ namespace sparrow { throw arrow_proxy_exception("Cannot set metadata on non-sparrow created ArrowArray"); } - get_schema_private_data()->metadata() = metadata; - schema().metadata = get_schema_private_data()->metadata_ptr(); + auto private_data = get_schema_private_data(); + private_data->metadata() = metadata; + schema().metadata = private_data->metadata_ptr(); } [[nodiscard]] std::vector arrow_proxy::flags() const @@ -324,6 +335,8 @@ namespace sparrow throw arrow_proxy_exception("Cannot set length on non-sparrow created ArrowArray"); } array().length = static_cast(length); + update_buffers(); + update_null_count(); } [[nodiscard]] int64_t arrow_proxy::null_count() const @@ -370,8 +383,7 @@ namespace sparrow array().n_buffers = static_cast(n_buffers); arrow_array_private_data* private_data = get_array_private_data(); private_data->resize_buffers(n_buffers); - array().buffers = private_data->buffers_ptrs(); - array().n_buffers = static_cast(n_buffers); + update_buffers(); } [[nodiscard]] size_t arrow_proxy::n_children() const @@ -438,13 +450,19 @@ namespace sparrow arrow_schema_private_data* arrow_proxy::get_schema_private_data() { - SPARROW_ASSERT_TRUE(schema_created_with_sparrow()); + if (!schema_created_with_sparrow()) + { + throw arrow_proxy_exception("Cannot get schema private data on non-sparrow created ArrowArray"); + } return static_cast(schema().private_data); } arrow_array_private_data* arrow_proxy::get_array_private_data() { - SPARROW_ASSERT_TRUE(array_created_with_sparrow()); + if (!array_created_with_sparrow()) + { + throw arrow_proxy_exception("Cannot get array private data on non-sparrow created ArrowArray"); + } return static_cast(array().private_data); } @@ -465,9 +483,7 @@ namespace sparrow { throw arrow_proxy_exception("Cannot set buffer on non-sparrow created ArrowArray"); } - auto array_private_data = get_array_private_data(); - array_private_data->set_buffer(index, buffer); - array().buffers = array_private_data->buffers_ptrs(); + get_array_private_data()->set_buffer(index, buffer); update_null_count(); update_buffers(); } @@ -479,9 +495,7 @@ namespace sparrow { throw arrow_proxy_exception("Cannot set buffer on non-sparrow created ArrowArray"); } - auto array_private_data = get_array_private_data(); - array_private_data->set_buffer(index, std::move(buffer)); - array().buffers = array_private_data->buffers_ptrs(); + get_array_private_data()->set_buffer(index, std::move(buffer)); update_null_count(); update_buffers(); } @@ -622,7 +636,7 @@ namespace sparrow } const auto validity_index = std::distance(buffer_types.begin(), validity_it); auto& validity_buffer = buffers()[static_cast(validity_index)]; - const dynamic_bitset_view bitmap(validity_buffer.data(), validity_buffer.size()); + const dynamic_bitset_view bitmap(validity_buffer.data(), length() + offset()); const auto null_count = bitmap.null_count(); set_null_count(static_cast(null_count)); } @@ -654,4 +668,151 @@ namespace sparrow std::swap(m_children, other.m_children); std::swap(m_dictionary, other.m_dictionary); } + + [[nodiscard]] non_owning_dynamic_bitset arrow_proxy::get_non_owning_dynamic_bitset() + { + if (!array_created_with_sparrow()) + { + throw arrow_proxy_exception( + "Cannot get non owning dynamic bitset from a non-sparrow created ArrowArray or ArrowSchema" + ); + } + + SPARROW_ASSERT_TRUE(is_created_with_sparrow()) + SPARROW_ASSERT_TRUE(has_bitmap(data_type())) + auto private_data = static_cast(array().private_data); + auto& bitmap_buffer = private_data->buffers()[bitmap_buffer_index]; + const size_t current_size = length() + offset(); + non_owning_dynamic_bitset bitmap{&bitmap_buffer, current_size}; + return bitmap; + } + + void arrow_proxy::resize_bitmap(size_t new_size) + { + if (!array_created_with_sparrow()) + { + throw arrow_proxy_exception("Cannot resize bitmap on a non-sparrow created ArrowArray or ArrowSchema" + ); + } + SPARROW_ASSERT_TRUE(has_bitmap(data_type())) + auto bitmap = get_non_owning_dynamic_bitset(); + bitmap.resize(new_size, true); + update_buffers(); + } + + size_t arrow_proxy::insert_bitmap(size_t index, bool value) + { + if (!array_created_with_sparrow()) + { + throw arrow_proxy_exception( + "Cannot insert value in bitmap on a non-sparrow created ArrowArray or ArrowSchema" + ); + } + SPARROW_ASSERT_TRUE(has_bitmap(data_type())) + SPARROW_ASSERT_TRUE(std::cmp_less_equal(index, length())) + auto bitmap = get_non_owning_dynamic_bitset(); + auto it = bitmap.insert(sparrow::next(bitmap.cbegin(), index), value); + update_buffers(); + return std::distance(bitmap.begin(), it); + } + + size_t arrow_proxy::insert_bitmap(size_t index, bool value, size_t count) + { + if (!array_created_with_sparrow()) + { + throw arrow_proxy_exception( + "Cannot insert values in bitmap on a non-sparrow created ArrowArray or ArrowSchema" + ); + } + SPARROW_ASSERT_TRUE(has_bitmap(data_type())) + SPARROW_ASSERT_TRUE(std::cmp_less_equal(index, length())) + if (count == 0) + { + return index; + } + auto bitmap = get_non_owning_dynamic_bitset(); + auto it = bitmap.insert(sparrow::next(bitmap.cbegin(), index), count, value); + update_buffers(); + return std::distance(bitmap.begin(), it); + } + + size_t arrow_proxy::insert_bitmap(size_t index, std::initializer_list values) + { + if (!array_created_with_sparrow()) + { + throw arrow_proxy_exception( + "Cannot insert values in bitmap on a non-sparrow created ArrowArray or ArrowSchema" + ); + } + SPARROW_ASSERT_TRUE(has_bitmap(data_type())) + SPARROW_ASSERT_TRUE(std::cmp_less_equal(index, length())) + if (values.size() == 0) + { + return index; + } + auto bitmap = get_non_owning_dynamic_bitset(); + auto it = bitmap.insert(sparrow::next(bitmap.cbegin(), index), values.begin(), values.end()); + update_buffers(); + return std::distance(bitmap.begin(), it); + } + + size_t arrow_proxy::erase_bitmap(size_t index) + { + if (!array_created_with_sparrow()) + { + throw arrow_proxy_exception( + "Cannot erase values in bitmap on a non-sparrow created ArrowArray or ArrowSchema" + ); + } + SPARROW_ASSERT_TRUE(has_bitmap(data_type())) + SPARROW_ASSERT_TRUE(std::cmp_less(index, length())) + auto bitmap = get_non_owning_dynamic_bitset(); + auto it = bitmap.erase(sparrow::next(bitmap.cbegin(), index + offset())); + update_buffers(); + return std::distance(bitmap.begin(), it); + } + + size_t arrow_proxy::erase_bitmap(size_t index, size_t count) + { + if (!array_created_with_sparrow()) + { + throw arrow_proxy_exception( + "Cannot erase values in bitmap on a non-sparrow created ArrowArray or ArrowSchema" + ); + } + SPARROW_ASSERT_TRUE(has_bitmap(data_type())) + SPARROW_ASSERT_TRUE(std::cmp_less(index, length())) + auto bitmap = get_non_owning_dynamic_bitset(); + const auto it_first = sparrow::next(bitmap.cbegin(), index + offset()); + const auto it_last = sparrow::next(it_first, count); + const auto it = bitmap.erase(it_first, it_last); + update_buffers(); + return std::distance(bitmap.begin(), it); + } + + void arrow_proxy::push_back_bitmap(bool value) + { + if (!array_created_with_sparrow()) + { + throw arrow_proxy_exception( + "Cannot push_back value in bitmap on a non-sparrow created ArrowArray or ArrowSchema" + ); + } + SPARROW_ASSERT_TRUE(has_bitmap(data_type())) + insert_bitmap(length(), value); + update_buffers(); + } + + void arrow_proxy::pop_back_bitmap() + { + if (!array_created_with_sparrow()) + { + throw arrow_proxy_exception( + "Cannot pop_back value in bitmap on a non-sparrow created ArrowArray or ArrowSchema" + ); + } + SPARROW_ASSERT_TRUE(has_bitmap(data_type())) + erase_bitmap(length() - 1); + update_buffers(); + } } diff --git a/src/arrow_interface/arrow_array.cpp b/src/arrow_interface/arrow_array.cpp index d443d514..d81ebcb8 100644 --- a/src/arrow_interface/arrow_array.cpp +++ b/src/arrow_interface/arrow_array.cpp @@ -17,7 +17,6 @@ #include "sparrow/arrow_interface/arrow_array_schema_info_utils.hpp" #include "sparrow/types/data_type.hpp" - namespace sparrow { void release_arrow_array(ArrowArray* array) @@ -43,6 +42,7 @@ namespace sparrow buffers.reserve(buffer_count); const enum data_type data_type = format_to_data_type(schema.format); const auto buffers_type = get_buffer_types_from_data_type(data_type); + SPARROW_ASSERT_TRUE(buffers_type.size() == buffer_count); for (std::size_t i = 0; i < buffer_count; ++i) { const auto buffer_type = buffers_type[i]; diff --git a/test/test_arrow_array_schema_proxy.cpp b/test/test_arrow_array_schema_proxy.cpp index a1e5aedd..0fce182c 100644 --- a/test/test_arrow_array_schema_proxy.cpp +++ b/test/test_arrow_array_schema_proxy.cpp @@ -22,7 +22,6 @@ #include "arrow_array_schema_creation.hpp" #include "doctest/doctest.h" - TEST_SUITE("ArrowArrowSchemaProxy") { TEST_CASE("constructors") @@ -149,7 +148,7 @@ TEST_SUITE("ArrowArrowSchemaProxy") { auto [schema, array] = make_external_arrow_schema_and_array(); sparrow::arrow_proxy proxy(std::move(array), std::move(schema)); - CHECK_THROWS_AS(proxy.set_format("U"), std::runtime_error); + CHECK_THROWS(proxy.set_format("U")); } } @@ -252,8 +251,8 @@ TEST_SUITE("ArrowArrowSchemaProxy") { auto [schema, array] = make_sparrow_arrow_schema_and_array(); sparrow::arrow_proxy proxy(std::move(array), std::move(schema)); - proxy.set_length(20); - CHECK_EQ(proxy.length(), 20); + proxy.set_length(2); + CHECK_EQ(proxy.length(), 2); } SUBCASE("on external c structure") @@ -323,14 +322,15 @@ TEST_SUITE("ArrowArrowSchemaProxy") TEST_CASE("set_n_buffers") { - SUBCASE("on sparrow c structure") - { - auto [schema, array] = make_sparrow_arrow_schema_and_array(); - sparrow::arrow_proxy proxy(std::move(array), std::move(schema)); - CHECK_EQ(proxy.n_children(), 0); - proxy.set_n_buffers(3); - CHECK_EQ(proxy.n_buffers(), 3); - } + // TODO: Deactivate because it can only be tested on Variable Binary View + // SUBCASE("on sparrow c structure") + // { + // auto [schema, array] = make_sparrow_arrow_schema_and_array(); + // sparrow::arrow_proxy proxy(std::move(array), std::move(schema)); + // CHECK_EQ(proxy.n_children(), 0); + // proxy.set_n_buffers(3); + // CHECK_EQ(proxy.n_buffers(), 3); + // } SUBCASE("on external c structure") { @@ -417,7 +417,9 @@ TEST_SUITE("ArrowArrowSchemaProxy") SUBCASE("on sparrow c structure") { auto array_schema_pair = make_sparrow_arrow_schema_and_array(); - std::array array_child_ptr{{{&array_schema_pair.second ,&array_schema_pair.first}}}; + std::array array_child_ptr{ + {{&array_schema_pair.second, &array_schema_pair.first}} + }; auto [schema, array] = make_sparrow_arrow_schema_and_array(); sparrow::arrow_proxy proxy(std::move(array), std::move(schema)); @@ -427,6 +429,18 @@ TEST_SUITE("ArrowArrowSchemaProxy") CHECK_EQ(children.size(), 1); CHECK_EQ(children[0].format(), "C"); } + + SUBCASE("on external c structure") + { + auto array_schema_pair = make_external_arrow_schema_and_array(); + std::array array_child_ptr{ + {{&array_schema_pair.second, &array_schema_pair.first}} + }; + + auto [schema, array] = make_external_arrow_schema_and_array(); + sparrow::arrow_proxy proxy(std::move(array), std::move(schema)); + CHECK_THROWS_AS(proxy.add_children(array_child_ptr), std::runtime_error); + } } TEST_CASE("pop_children") @@ -436,14 +450,23 @@ TEST_SUITE("ArrowArrowSchemaProxy") auto [schema, array] = make_sparrow_arrow_schema_and_array(); sparrow::arrow_proxy proxy(std::move(array), std::move(schema)); - auto array_schema_pair = make_sparrow_arrow_schema_and_array(); - std::array array_child_ptr{{{&array_schema_pair.second ,&array_schema_pair.first}}}; + auto array_schema_pair = make_sparrow_arrow_schema_and_array(); + std::array array_child_ptr{ + {{&array_schema_pair.second, &array_schema_pair.first}} + }; proxy.add_children(array_child_ptr); proxy.pop_children(1); const auto& children = proxy.children(); CHECK_EQ(children.size(), 0); CHECK_EQ(proxy.n_children(), 0); } + + SUBCASE("on external c structure") + { + auto [schema, array] = make_external_arrow_schema_and_array(); + sparrow::arrow_proxy proxy(std::move(array), std::move(schema)); + CHECK_THROWS_AS(proxy.pop_children(1), std::runtime_error); + } } TEST_CASE("dictionary") @@ -457,7 +480,7 @@ TEST_SUITE("ArrowArrowSchemaProxy") { SUBCASE("on sparrow c structure") { - auto array_schema_pair = make_sparrow_arrow_schema_and_array(); + auto array_schema_pair = make_sparrow_arrow_schema_and_array(); auto [schema, array] = make_sparrow_arrow_schema_and_array(); sparrow::arrow_proxy proxy(std::move(array), std::move(schema)); @@ -498,4 +521,302 @@ TEST_SUITE("ArrowArrowSchemaProxy") const sparrow::arrow_proxy proxy_ext(std::move(array_ext), std::move(schema_ext)); CHECK_EQ(proxy_ext.private_data(), nullptr); } + + TEST_CASE("resize_bitmap") + { + SUBCASE("on sparrow c structure") + { + auto [schema, array] = make_sparrow_arrow_schema_and_array(); + sparrow::arrow_proxy proxy(std::move(array), std::move(schema)); + proxy.resize_bitmap(5); + const auto buffers = proxy.buffers(); + REQUIRE_EQ(buffers.size(), 2); + const sparrow::dynamic_bitset_view bitmap(buffers[0].data(), 5); + CHECK(bitmap.test(0)); + CHECK(bitmap.test(1)); + CHECK_FALSE(bitmap.test(2)); + CHECK_FALSE(bitmap.test(3)); + CHECK(bitmap.test(4)); + } + + SUBCASE("on external c structure") + { + auto [schema, array] = make_external_arrow_schema_and_array(); + sparrow::arrow_proxy proxy(std::move(array), std::move(schema)); + CHECK_THROWS_AS(proxy.resize_bitmap(5), std::runtime_error); + } + } + + TEST_CASE("insert_bitmap") + { + SUBCASE("with index and value") + { + SUBCASE("on sparrow c structure") + { + auto [schema, array] = make_sparrow_arrow_schema_and_array(); + sparrow::arrow_proxy proxy(std::move(array), std::move(schema)); + proxy.insert_bitmap(1, false); + const auto buffers = proxy.buffers(); + REQUIRE_EQ(buffers.size(), 2); + const sparrow::dynamic_bitset_view bitmap(buffers[0].data(), 7); + CHECK(bitmap.test(0)); + CHECK_FALSE(bitmap.test(1)); + CHECK(bitmap.test(2)); + CHECK_FALSE(bitmap.test(3)); + CHECK_FALSE(bitmap.test(4)); + CHECK(bitmap.test(5)); + CHECK(bitmap.test(6)); + } + + SUBCASE("on external c structure") + { + auto [schema, array] = make_external_arrow_schema_and_array(); + sparrow::arrow_proxy proxy(std::move(array), std::move(schema)); + CHECK_THROWS_AS(proxy.insert_bitmap(1, true), std::runtime_error); + } + } + + SUBCASE("with index, value and count") + { + SUBCASE("on sparrow c structure") + { + auto [schema, array] = make_sparrow_arrow_schema_and_array(); + sparrow::arrow_proxy proxy(std::move(array), std::move(schema)); + proxy.insert_bitmap(1, false, 2); + const auto buffers = proxy.buffers(); + REQUIRE_EQ(buffers.size(), 2); + const sparrow::dynamic_bitset_view bitmap(buffers[0].data(), 12); + CHECK(bitmap.test(0)); + CHECK_FALSE(bitmap.test(1)); + CHECK_FALSE(bitmap.test(2)); + CHECK(bitmap.test(3)); + CHECK_FALSE(bitmap.test(4)); + CHECK_FALSE(bitmap.test(5)); + CHECK(bitmap.test(6)); + CHECK(bitmap.test(7)); + CHECK(bitmap.test(8)); + CHECK(bitmap.test(9)); + CHECK(bitmap.test(10)); + CHECK(bitmap.test(11)); + } + + SUBCASE("on external c structure") + { + auto [schema, array] = make_external_arrow_schema_and_array(); + sparrow::arrow_proxy proxy(std::move(array), std::move(schema)); + CHECK_THROWS_AS(proxy.insert_bitmap(1, true, 2), std::runtime_error); + } + } + + SUBCASE("with index and initializer list") + { + SUBCASE("on sparrow c structure") + { + auto [schema, array] = make_sparrow_arrow_schema_and_array(); + sparrow::arrow_proxy proxy(std::move(array), std::move(schema)); + proxy.insert_bitmap(1, {false, true, false, true}); + const auto buffers = proxy.buffers(); + REQUIRE_EQ(buffers.size(), 2); + const sparrow::dynamic_bitset_view bitmap(buffers[0].data(), 14); + CHECK(bitmap.test(0)); + CHECK_FALSE(bitmap.test(1)); + CHECK(bitmap.test(2)); + CHECK_FALSE(bitmap.test(3)); + CHECK(bitmap.test(4)); + CHECK(bitmap.test(5)); + CHECK_FALSE(bitmap.test(6)); + CHECK_FALSE(bitmap.test(7)); + CHECK(bitmap.test(8)); + CHECK(bitmap.test(9)); + CHECK(bitmap.test(10)); + CHECK(bitmap.test(11)); + CHECK(bitmap.test(12)); + CHECK(bitmap.test(13)); + } + + SUBCASE("on external c structure") + { + auto [schema, array] = make_external_arrow_schema_and_array(); + sparrow::arrow_proxy proxy(std::move(array), std::move(schema)); + CHECK_THROWS_AS(proxy.insert_bitmap(1, {0, 1, 0, 1}), std::runtime_error); + } + } + + SUBCASE("with index and iterators") + { + SUBCASE("on sparrow c structure") + { + auto [schema, array] = make_sparrow_arrow_schema_and_array(); + sparrow::arrow_proxy proxy(std::move(array), std::move(schema)); + std::vector values{0, 1, 0, 1}; + proxy.insert_bitmap(1, values.begin(), values.end()); + const auto buffers = proxy.buffers(); + REQUIRE_EQ(buffers.size(), 2); + const sparrow::dynamic_bitset_view bitmap(buffers[0].data(), 14); + CHECK(bitmap.test(0)); + CHECK_FALSE(bitmap.test(1)); + CHECK(bitmap.test(2)); + CHECK_FALSE(bitmap.test(3)); + CHECK(bitmap.test(4)); + CHECK(bitmap.test(5)); + CHECK_FALSE(bitmap.test(6)); + CHECK_FALSE(bitmap.test(7)); + CHECK(bitmap.test(8)); + CHECK(bitmap.test(9)); + CHECK(bitmap.test(10)); + CHECK(bitmap.test(11)); + CHECK(bitmap.test(12)); + CHECK(bitmap.test(13)); + } + + SUBCASE("on external c structure") + { + auto [schema, array] = make_external_arrow_schema_and_array(); + sparrow::arrow_proxy proxy(std::move(array), std::move(schema)); + std::vector values{0, 1, 0, 1}; + CHECK_THROWS_AS( + proxy.insert_bitmap(1, values.begin(), values.end()), + std::runtime_error + ); + } + } + + SUBCASE("with index and range") + { + SUBCASE("on sparrow c structure") + { + auto [schema, array] = make_sparrow_arrow_schema_and_array(); + sparrow::arrow_proxy proxy(std::move(array), std::move(schema)); + std::vector values{false, true, false, true}; + proxy.insert_bitmap(1, values); + const auto buffers = proxy.buffers(); + REQUIRE_EQ(buffers.size(), 2); + const sparrow::dynamic_bitset_view bitmap(buffers[0].data(), 14); + CHECK(bitmap.test(0)); + CHECK_FALSE(bitmap.test(1)); + CHECK(bitmap.test(2)); + CHECK_FALSE(bitmap.test(3)); + CHECK(bitmap.test(4)); + CHECK(bitmap.test(5)); + CHECK_FALSE(bitmap.test(6)); + CHECK_FALSE(bitmap.test(7)); + CHECK(bitmap.test(8)); + CHECK(bitmap.test(9)); + CHECK(bitmap.test(10)); + CHECK(bitmap.test(11)); + CHECK(bitmap.test(12)); + CHECK(bitmap.test(13)); + } + + SUBCASE("on external c structure") + { + auto [schema, array] = make_external_arrow_schema_and_array(); + sparrow::arrow_proxy proxy(std::move(array), std::move(schema)); + std::vector values{0, 1, 0, 1}; + CHECK_THROWS_AS(proxy.insert_bitmap(1, values), std::runtime_error); + } + } + } + + TEST_CASE("erase_bitmap") + { + SUBCASE("with index") + { + SUBCASE("on sparrow c structure") + { + auto [schema, array] = make_sparrow_arrow_schema_and_array(); + sparrow::arrow_proxy proxy(std::move(array), std::move(schema)); + proxy.erase_bitmap(1); + const auto buffers = proxy.buffers(); + REQUIRE_EQ(buffers.size(), 2); + + } + + SUBCASE("on external c structure") + { + auto [schema, array] = make_external_arrow_schema_and_array(); + sparrow::arrow_proxy proxy(std::move(array), std::move(schema)); + CHECK_THROWS_AS(proxy.erase_bitmap(1), std::runtime_error); + } + } + + SUBCASE("with index and count") + { + SUBCASE("on sparrow c structure") + { + auto [schema, array] = make_sparrow_arrow_schema_and_array(); + sparrow::arrow_proxy proxy(std::move(array), std::move(schema)); + proxy.erase_bitmap(1, 2); + const auto buffers = proxy.buffers(); + REQUIRE_EQ(buffers.size(), 2); + } + + SUBCASE("on external c structure") + { + auto [schema, array] = make_external_arrow_schema_and_array(); + sparrow::arrow_proxy proxy(std::move(array), std::move(schema)); + CHECK_THROWS_AS(proxy.erase_bitmap(1, 2), std::runtime_error); + } + } + } + + TEST_CASE("push_back_bitmap") + { + SUBCASE("on sparrow c structure") + { + auto [schema, array] = make_sparrow_arrow_schema_and_array(); + sparrow::arrow_proxy proxy(std::move(array), std::move(schema)); + proxy.push_back_bitmap(1); + const auto buffers = proxy.buffers(); + REQUIRE_EQ(buffers.size(), 2); + const sparrow::dynamic_bitset_view bitmap(buffers[0].data(), 11); + CHECK(bitmap.test(0)); + CHECK(bitmap.test(1)); + CHECK_FALSE(bitmap.test(2)); + CHECK_FALSE(bitmap.test(3)); + CHECK(bitmap.test(4)); + CHECK(bitmap.test(5)); + CHECK(bitmap.test(6)); + CHECK(bitmap.test(7)); + CHECK(bitmap.test(8)); + CHECK(bitmap.test(9)); + CHECK(bitmap.test(10)); + } + + SUBCASE("on external c structure") + { + auto [schema, array] = make_external_arrow_schema_and_array(); + sparrow::arrow_proxy proxy(std::move(array), std::move(schema)); + CHECK_THROWS_AS(proxy.push_back_bitmap(1), std::runtime_error); + } + } + + TEST_CASE("pop_back_bitmap") + { + SUBCASE("on sparrow c structure") + { + auto [schema, array] = make_sparrow_arrow_schema_and_array(); + sparrow::arrow_proxy proxy(std::move(array), std::move(schema)); + proxy.pop_back_bitmap(); + const auto buffers = proxy.buffers(); + REQUIRE_EQ(buffers.size(), 2); + const sparrow::dynamic_bitset_view bitmap(buffers[0].data(), 9); + CHECK(bitmap.test(0)); + CHECK(bitmap.test(1)); + CHECK_FALSE(bitmap.test(2)); + CHECK_FALSE(bitmap.test(3)); + CHECK(bitmap.test(4)); + CHECK(bitmap.test(5)); + CHECK(bitmap.test(6)); + CHECK(bitmap.test(7)); + CHECK(bitmap.test(8)); + } + + SUBCASE("on external c structure") + { + auto [schema, array] = make_external_arrow_schema_and_array(); + sparrow::arrow_proxy proxy(std::move(array), std::move(schema)); + CHECK_THROWS_AS(proxy.pop_back_bitmap(), std::runtime_error); + } + } } diff --git a/test/test_dictionary_encoded_array.cpp b/test/test_dictionary_encoded_array.cpp index ae9c3697..d929b7bf 100644 --- a/test/test_dictionary_encoded_array.cpp +++ b/test/test_dictionary_encoded_array.cpp @@ -33,7 +33,7 @@ namespace sparrow static const std::array words{{"hello", "you", "are", "not", "prepared", "!", "?"}}; - arrow_proxy make_arrow_proxy() + inline arrow_proxy make_arrow_proxy() { constexpr std::array keys_nulls{1ULL, 5ULL}; const std::vector keys{0, 0, 1, 2, 3, 4, 2, 5, 0, 1, 2}; diff --git a/test/test_list_array.cpp b/test/test_list_array.cpp index 8cde65a2..f62ec60d 100644 --- a/test/test_list_array.cpp +++ b/test/test_list_array.cpp @@ -12,25 +12,25 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "sparrow/layout/primitive_array.hpp" #include "sparrow/layout/list_layout/list_array.hpp" +#include "sparrow/layout/primitive_array.hpp" #include "doctest/doctest.h" - -#include "test_utils.hpp" #include "external_array_data_creation.hpp" +#include "test_utils.hpp" + namespace sparrow { TEST_SUITE("list_array") - { - TEST_CASE_TEMPLATE("list[T]",T, std::uint8_t, std::int32_t, float, double) + { + TEST_CASE_TEMPLATE("list[T]", T, std::uint8_t, std::int32_t, float, double) { using inner_scalar_type = T; using inner_nullable_type = nullable; // number of elements in the flatted array - const std::size_t n_flat = 10; //1+2+3+4 + const std::size_t n_flat = 10; // 1+2+3+4 // number of elements in the list array const std::size_t n = 4; // vector of sizes @@ -39,13 +39,13 @@ namespace sparrow // first we create a flat array of integers ArrowArray flat_arr{}; ArrowSchema flat_schema{}; - test::fill_schema_and_array(flat_schema, flat_arr, n_flat, 0/*offset*/, {}); + test::fill_schema_and_array(flat_schema, flat_arr, n_flat, 0 /*offset*/, {}); flat_schema.name = "the flat array"; ArrowArray arr{}; ArrowSchema schema{}; test::fill_schema_and_array_for_list_layout(schema, arr, flat_schema, flat_arr, sizes, {}, 0); - arrow_proxy proxy(&arr, &schema); + arrow_proxy proxy(&arr, &schema); // create a list array list_array list_arr(std::move(proxy)); @@ -53,32 +53,39 @@ namespace sparrow SUBCASE("element-sizes") { - for(std::size_t i = 0; i < n; ++i){ + for (std::size_t i = 0; i < n; ++i) + { REQUIRE(list_arr[i].has_value()); CHECK(list_arr[i].value().size() == sizes[i]); } - } + } SUBCASE("element-values") { std::size_t flat_index = 0; - for(std::size_t i = 0; i < n; ++i){ + for (std::size_t i = 0; i < n; ++i) + { auto list = list_arr[i].value(); - for(std::size_t j = 0; j < sizes[i]; ++j){ - + for (std::size_t j = 0; j < sizes[i]; ++j) + { auto value_variant = list[j]; // visit the variant - std::visit([&](auto && value){ - if constexpr(std::is_same_v, inner_nullable_type>){ - CHECK(value == flat_index); - } - }, value_variant); + std::visit( + [&](auto&& value) + { + if constexpr (std::is_same_v, inner_nullable_type>) + { + CHECK(value == flat_index); + } + }, + value_variant + ); ++flat_index; } } } SUBCASE("consitency") - { + { test::generic_consistency_test(list_arr); } @@ -95,7 +102,7 @@ namespace sparrow REQUIRE(flat_values_casted.size() == n_flat); // check that flat values are "iota" - if constexpr(std::is_integral_v) + if constexpr (std::is_integral_v) { for(inner_scalar_type i = 0; i < static_cast(n_flat); ++i){ CHECK(flat_values_casted[static_cast(i)].value() == i); @@ -112,14 +119,14 @@ namespace sparrow } TEST_SUITE("list_view_array") - { - TEST_CASE_TEMPLATE("list_view_array[T]",T, std::uint8_t, std::int32_t, float, double) + { + TEST_CASE_TEMPLATE("list_view_array[T]", T, std::uint8_t, std::int32_t, float, double) { using inner_scalar_type = T; using inner_nullable_type = nullable; // number of elements in the flatted array - const std::size_t n_flat = 10; //1+2+3+4 + const std::size_t n_flat = 10; // 1+2+3+4 // number of elements in the list array const std::size_t n = 4; // vector of sizes @@ -128,7 +135,7 @@ namespace sparrow // first we create a flat array of integers ArrowArray flat_arr{}; ArrowSchema flat_schema{}; - test::fill_schema_and_array(flat_schema, flat_arr, n_flat, 0/*offset*/, {}); + test::fill_schema_and_array(flat_schema, flat_arr, n_flat, 0 /*offset*/, {}); flat_schema.name = "the flat array"; ArrowArray arr{}; @@ -142,7 +149,8 @@ namespace sparrow SUBCASE("element-sizes") { - for(std::size_t i = 0; i < n; ++i){ + for (std::size_t i = 0; i < n; ++i) + { REQUIRE(list_arr[i].has_value()); CHECK(list_arr[i].value().size() == sizes[i]); } @@ -151,24 +159,30 @@ namespace sparrow SUBCASE("element-values") { std::size_t flat_index = 0; - for(std::size_t i = 0; i < n; ++i){ + for (std::size_t i = 0; i < n; ++i) + { auto list = list_arr[i].value(); - for(std::size_t j = 0; j < sizes[i]; ++j){ - + for (std::size_t j = 0; j < sizes[i]; ++j) + { auto value_variant = list[j]; // visit the variant - std::visit([&](auto && value){ - if constexpr(std::is_same_v, inner_nullable_type>){ - CHECK(value == flat_index); - } - }, value_variant); + std::visit( + [&](auto&& value) + { + if constexpr (std::is_same_v, inner_nullable_type>) + { + CHECK(value == flat_index); + } + }, + value_variant + ); ++flat_index; } } } - SUBCASE("consitency") - { + SUBCASE("consistency") + { test::generic_consistency_test(list_arr); } @@ -185,7 +199,7 @@ namespace sparrow REQUIRE(flat_values_casted.size() == n_flat); // check that flat values are "iota" - if constexpr(std::is_integral_v) + if constexpr (std::is_integral_v) { for(inner_scalar_type i = 0; i < static_cast(n_flat); ++i){ CHECK(flat_values_casted[static_cast(i)].value() == i); @@ -201,17 +215,15 @@ namespace sparrow } } - TEST_SUITE("fixed_sized_list_array") - { - TEST_CASE_TEMPLATE("fixed_sized_array_list[T]",T, std::uint8_t, std::int32_t, float, double) + { + TEST_CASE_TEMPLATE("fixed_sized_array_list[T]", T, std::uint8_t, std::int32_t, float, double) { - using inner_scalar_type = T; using inner_nullable_type = nullable; // number of elements in the flatted array - const std::size_t n_flat = 20; + const std::size_t n_flat = 20; // the size of each list = const std::size_t list_size = 5; @@ -223,7 +235,7 @@ namespace sparrow // first we create a flat array of integers ArrowArray flat_arr{}; ArrowSchema flat_schema{}; - test::fill_schema_and_array(flat_schema, flat_arr, n_flat, 0/*offset*/, {}); + test::fill_schema_and_array(flat_schema, flat_arr, n_flat, 0 /*offset*/, {}); flat_schema.name = "the flat array"; @@ -235,14 +247,15 @@ namespace sparrow fixed_sized_list_array list_arr(std::move(proxy)); SUBCASE("consitency") - { + { test::generic_consistency_test(list_arr); } REQUIRE(list_arr.size() == n); SUBCASE("element-sizes") { - for(std::size_t i = 0; i < list_arr.size(); ++i){ + for (std::size_t i = 0; i < list_arr.size(); ++i) + { REQUIRE(list_arr[i].has_value()); REQUIRE(list_arr[i].value().size() == list_size); } @@ -251,17 +264,23 @@ namespace sparrow SUBCASE("element-values") { std::size_t flat_index = 0; - for(std::size_t i = 0; i < n; ++i){ + for (std::size_t i = 0; i < n; ++i) + { auto list = list_arr[i].value(); - for(std::size_t j = 0; j < list.size(); ++j){ - + for (std::size_t j = 0; j < list.size(); ++j) + { auto value_variant = list[j]; // visit the variant - std::visit([&](auto && value){ - if constexpr(std::is_same_v, inner_nullable_type>){ - CHECK(value == flat_index); - } - }, value_variant); + std::visit( + [&](auto&& value) + { + if constexpr (std::is_same_v, inner_nullable_type>) + { + CHECK(value == flat_index); + } + }, + value_variant + ); ++flat_index; } } @@ -269,6 +288,4 @@ namespace sparrow delete schema.format; } } - } - diff --git a/test/test_primitive_array.cpp b/test/test_primitive_array.cpp index 63393552..3a5698ad 100644 --- a/test/test_primitive_array.cpp +++ b/test/test_primitive_array.cpp @@ -12,166 +12,624 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "../test/external_array_data_creation.hpp" -#include "doctest/doctest.h" +#include +#include +#include + +#include "sparrow/arrow_array_schema_proxy_factory.hpp" #include "sparrow/layout/primitive_array.hpp" +#include "doctest/doctest.h" namespace sparrow { - using scalar_value_type = std::int32_t; - using array_test_type = primitive_array; - using test::make_arrow_proxy; + + using testing_types = std::tuple< + std::int8_t, + std::uint8_t, + std::int16_t, + std::uint16_t, + std::int32_t, + std::uint32_t, + std::int64_t, + std::uint64_t, + float16_t, + float32_t, + float64_t>; TEST_SUITE("primitive_array") { - constexpr std::size_t size = 10u; - constexpr std::size_t offset = 1u; - - TEST_CASE("constructor") + TEST_CASE_TEMPLATE_DEFINE("", T, primitive_array_id) { - auto pr = make_arrow_proxy(size, offset); - array_test_type ar(std::move(pr)); - CHECK_EQ(ar.size(), size - offset); - } + const std::array values{1, 2, 3, 4, 5}; + constexpr std::array nulls{2}; + constexpr int64_t offset = 1; - TEST_CASE("copy") - { - array_test_type ar(make_arrow_proxy(size, offset)); - array_test_type ar2(ar); + auto make_array = [&nulls](R values_range) + { + return make_primitive_arrow_proxy(values_range, nulls, offset, "test", std::nullopt); + }; - CHECK_EQ(ar, ar2); + // Elements: 2, null, 4, 5 - array_test_type ar3(make_arrow_proxy(size + 3u, offset)); - CHECK_NE(ar, ar3); - ar3 = ar; - CHECK_EQ(ar, ar3); - } + using array_test_type = primitive_array; + array_test_type ar{make_array(values)}; - TEST_CASE("move") - { - array_test_type ar(make_arrow_proxy(size, offset)); - array_test_type ar2(ar); + SUBCASE("constructor") + { + CHECK_EQ(ar.size(), 4); + } - array_test_type ar3(std::move(ar)); - CHECK_EQ(ar2, ar3); + SUBCASE("const operator[]") + { + REQUIRE_EQ(ar.size(), 4); + CHECK(ar[0].has_value()); + CHECK_EQ(ar[0].get(), values[1]); + CHECK_FALSE(ar[1].has_value()); + CHECK_EQ(ar[1].get(), values[2]); + CHECK(ar[2].has_value()); + CHECK_EQ(ar[2].get(), values[3]); + CHECK(ar[3].has_value()); + CHECK_EQ(ar[3].get(), values[4]); + } - array_test_type ar4(make_arrow_proxy(size + 3u, offset)); - CHECK_NE(ar2, ar4); - ar4 = std::move(ar2); - CHECK_EQ(ar3, ar4); - } + SUBCASE("copy") + { + array_test_type ar2(ar); - TEST_CASE("const operator[]") - { - auto pr = make_arrow_proxy(size, offset); - std::vector ref(size - offset); - std::copy( - pr.buffers()[1].data() + offset, - pr.buffers()[1].data() + size, - ref.begin() - ); - array_test_type ar(std::move(pr)); - const array_test_type& car = ar; - for (std::size_t i = 0; i < ref.size(); ++i) - { - CHECK_EQ(ar[i], ref[i]); - CHECK_EQ(car[i], ref[i]); + CHECK_EQ(ar, ar2); + + array_test_type ar3(make_array(std::vector{1, 2, 3, 4, 5, 6, 7})); + CHECK_NE(ar, ar3); + ar3 = ar; + CHECK_EQ(ar, ar3); } - } - TEST_CASE("value_iterator_ordering") - { - array_test_type ar(make_arrow_proxy(size, offset)); - auto ar_values = ar.values(); - array_test_type::const_value_iterator citer = ar_values.begin(); - CHECK(citer < ar_values.end()); - } + SUBCASE("move") + { + array_test_type ar2(ar); - TEST_CASE("value_iterator_equality") - { - array_test_type ar(make_arrow_proxy(size, offset)); - auto ar_values = ar.values(); - array_test_type::const_value_iterator citer = ar_values.begin(); - for (std::size_t i = 0; i < ar.size(); ++i) + array_test_type ar3(std::move(ar)); + CHECK_EQ(ar2, ar3); + + array_test_type ar4(make_array(std::vector{1, 2, 3, 4, 5, 6, 7})); + CHECK_NE(ar2, ar4); + ar4 = std::move(ar2); + CHECK_EQ(ar3, ar4); + } + + SUBCASE("value_iterator_ordering") { - CHECK_EQ(*citer++, ar[i]); + auto ar_values = ar.values(); + auto citer = ar_values.begin(); + CHECK(citer < ar_values.end()); } - CHECK_EQ(citer, ar_values.end()); - } - TEST_CASE("const_value_iterator_ordering") - { - array_test_type ar(make_arrow_proxy(size, offset)); - auto ar_values = ar.values(); - array_test_type::const_value_iterator citer = ar_values.begin(); - CHECK(citer < ar_values.end()); - } + SUBCASE("value_iterator_equality") + { + const auto ar_values = ar.values(); + auto citer = ar_values.begin(); + CHECK_EQ(*citer, values[1]); + ++citer; + CHECK_EQ(*citer, values[2]); + ++citer; + CHECK_EQ(*citer, values[3]); + ++citer; + CHECK_EQ(*citer, values[4]); + ++citer; + CHECK_EQ(citer, ar_values.end()); + } - TEST_CASE("const_value_iterator_equality") - { - array_test_type ar(make_arrow_proxy(size, offset)); - auto ar_values = ar.values(); - for (std::size_t i = 0; i < ar.size(); ++i) + SUBCASE("const_value_iterator_ordering") { - ar[i] = static_cast(i); + auto ar_values = ar.values(); + auto citer = ar_values.begin(); + CHECK(citer < ar_values.end()); } - array_test_type::const_value_iterator citer = ar_values.begin(); - for (std::size_t i = 0; i < ar.size(); ++i, ++citer) + SUBCASE("const_value_iterator_equality") { - CHECK_EQ(*citer, i); + auto ar_values = ar.values(); + for (std::size_t i = 0; i < ar.size(); ++i) + { + if constexpr (std::same_as) + { + ar[i] = float16_t(static_cast(i)); + } + else + { + ar[i] = static_cast(i); + } + } + + auto citer = ar_values.begin(); + for (std::size_t i = 0; i < ar.size(); ++i, ++citer) + { + if constexpr (std::same_as) + { + CHECK_EQ(*citer, float16_t(static_cast(i))); + } + else + { + CHECK_EQ(*citer, i); + } + } } - } - TEST_CASE("const_bitmap_iterator_ordering") - { - array_test_type ar(make_arrow_proxy(size, offset)); - auto ar_bitmap = ar.bitmap(); - array_test_type::const_bitmap_iterator citer = ar_bitmap.begin(); - CHECK(citer < ar_bitmap.end()); - } + SUBCASE("const_bitmap_iterator_ordering") + { + const auto ar_bitmap = ar.bitmap(); + const auto citer = ar_bitmap.begin(); + CHECK(citer < ar_bitmap.end()); + } - TEST_CASE("const_bitmap_iterator_equality") - { - array_test_type ar(make_arrow_proxy(size, offset)); - auto ar_bitmap = ar.bitmap(); - for (std::size_t i = 0; i < ar.size(); ++i) + SUBCASE("const_bitmap_iterator_equality") { - if (i % 2 != 0) + auto ar_bitmap = ar.bitmap(); + for (std::size_t i = 0; i < ar.size(); ++i) { - ar[i] = nullval; + if (i % 2 != 0) + { + ar[i] = nullval; + } + } + + auto citer = ar_bitmap.begin(); + for (std::size_t i = 0; i < ar.size(); ++i, ++citer) + { + CHECK_EQ(*citer, i % 2 == 0); } } - array_test_type::const_bitmap_iterator citer = ar_bitmap.begin(); - for (std::size_t i = 0; i < ar.size(); ++i, ++citer) + SUBCASE("iterator") { - CHECK_EQ(*citer, i % 2 == 0); + auto it = ar.begin(); + const auto end = ar.end(); + CHECK(it->has_value()); + CHECK_EQ(*it, values[1]); + ++it; + CHECK_FALSE(it->has_value()); + CHECK_EQ(*it, make_nullable(values[2], false)); + ++it; + CHECK(it->has_value()); + CHECK_EQ(*it, make_nullable(values[3])); + ++it; + CHECK(it->has_value()); + CHECK_EQ(*it, make_nullable(values[4])); + ++it; + + CHECK_EQ(it, end); + + const array_test_type ar_empty( + make_primitive_arrow_proxy(std::array{}, std::array{}, 0, "test", std::nullopt) + ); + CHECK_EQ(ar_empty.begin(), ar_empty.end()); } - } - TEST_CASE("iterator") - { - array_test_type ar(make_arrow_proxy(size, offset)); - auto it = ar.begin(); - auto end = ar.end(); + SUBCASE("resize") + { + const T new_value{99}; + ar.resize(7, make_nullable(99)); + REQUIRE_EQ(ar.size(), 7); + CHECK(ar[0].has_value()); + CHECK_EQ(ar[0].get(), values[1]); + CHECK_FALSE(ar[1].has_value()); + CHECK_EQ(ar[1].get(), values[2]); + CHECK(ar[2].has_value()); + CHECK_EQ(ar[2].get(), values[3]); + CHECK(ar[3].has_value()); + CHECK_EQ(ar[3].get(), values[4]); + CHECK(ar[4].has_value()); + CHECK_EQ(ar[4].get(), new_value); + CHECK(ar[5].has_value()); + CHECK_EQ(ar[5].get(), new_value); + CHECK(ar[6].has_value()); + CHECK_EQ(ar[6].get(), new_value); + } - for (std::size_t i = 0; i != ar.size(); ++it, ++i) + SUBCASE("insert") { - CHECK_EQ(*it, make_nullable(ar[i].value())); - CHECK(it->has_value()); + SUBCASE("with pos and value") + { + SUBCASE("at the beginning") + { + const auto pos = ar.cbegin(); + const T new_value{99}; + const auto iter = ar.insert(pos, make_nullable(99)); + CHECK_EQ(iter, ar.begin()); + REQUIRE_EQ(ar.size(), 5); + CHECK(ar[0].has_value()); + CHECK_EQ(ar[0].get(), new_value); + CHECK(ar[1].has_value()); + CHECK_EQ(ar[1].get(), values[1]); + CHECK_FALSE(ar[2].has_value()); + CHECK_EQ(ar[2].get(), values[2]); + CHECK(ar[3].has_value()); + CHECK_EQ(ar[3].get(), values[3]); + CHECK(ar[4].has_value()); + CHECK_EQ(ar[4].get(), values[4]); + } + + SUBCASE("in the middle") + { + const auto pos = sparrow::next(ar.cbegin(), 1); + const T new_value{99}; + const auto iter = ar.insert(pos, make_nullable(99)); + CHECK_EQ(iter, sparrow::next(ar.begin(), 1)); + REQUIRE_EQ(ar.size(), 5); + CHECK(ar[0].has_value()); + CHECK_EQ(ar[0].get(), values[1]); + CHECK(ar[1].has_value()); + CHECK_EQ(ar[1].get(), new_value); + CHECK_FALSE(ar[2].has_value()); + CHECK_EQ(ar[2].get(), values[2]); + CHECK(ar[3].has_value()); + CHECK_EQ(ar[3].get(), values[3]); + CHECK(ar[4].has_value()); + CHECK_EQ(ar[4].get(), values[4]); + } + + SUBCASE("at the end") + { + const auto pos = ar.cend(); + const T new_value{99}; + const auto iter = ar.insert(pos, make_nullable(99)); + CHECK_EQ(iter, ar.begin() + 4); + REQUIRE_EQ(ar.size(), 5); + CHECK(ar[0].has_value()); + CHECK_EQ(ar[0].get(), values[1]); + CHECK_FALSE(ar[1].has_value()); + CHECK_EQ(ar[1].get(), values[2]); + CHECK(ar[2].has_value()); + CHECK_EQ(ar[2].get(), values[3]); + CHECK(ar[3].has_value()); + CHECK_EQ(ar[3].get(), values[4]); + CHECK(ar[4].has_value()); + CHECK_EQ(ar[4].get(), new_value); + } + } + + SUBCASE("with pos, count and value") + { + SUBCASE("at the beginning") + { + const auto pos = ar.cbegin(); + const T new_value{99}; + const auto iter = ar.insert(pos, make_nullable(new_value), 3); + CHECK_EQ(iter, ar.begin()); + REQUIRE_EQ(ar.size(), 7); + CHECK(ar[0].has_value()); + CHECK_EQ(ar[0].get(), new_value); + CHECK(ar[1].has_value()); + CHECK_EQ(ar[1].get(), new_value); + CHECK(ar[2].has_value()); + CHECK_EQ(ar[2].get(), new_value); + CHECK(ar[3].has_value()); + CHECK_EQ(ar[3].get(), values[1]); + CHECK_FALSE(ar[4].has_value()); + CHECK_EQ(ar[4].get(), values[2]); + CHECK(ar[5].has_value()); + CHECK_EQ(ar[5].get(), values[3]); + CHECK(ar[6].has_value()); + CHECK_EQ(ar[6].get(), values[4]); + } + + SUBCASE("in the middle") + { + const auto pos = sparrow::next(ar.cbegin(), 1); + const T new_value{99}; + const auto iter = ar.insert(pos, make_nullable(new_value), 3); + CHECK_EQ(iter, sparrow::next(ar.begin(), 1)); + REQUIRE_EQ(ar.size(), 7); + CHECK(ar[0].has_value()); + CHECK_EQ(ar[0].get(), values[1]); + CHECK(ar[1].has_value()); + CHECK_EQ(ar[1].get(), new_value); + CHECK(ar[2].has_value()); + CHECK_EQ(ar[2].get(), new_value); + CHECK(ar[3].has_value()); + CHECK_EQ(ar[3].get(), new_value); + CHECK_FALSE(ar[4].has_value()); + CHECK_EQ(ar[4].get(), values[2]); + CHECK(ar[5].has_value()); + CHECK_EQ(ar[5].get(), values[3]); + CHECK(ar[6].has_value()); + CHECK_EQ(ar[6].get(), values[4]); + } + + SUBCASE("at the end") + { + const auto pos = ar.cend(); + const T new_value{99}; + const auto iter = ar.insert(pos, make_nullable(new_value), 3); + CHECK_EQ(iter, ar.begin() + 4); + REQUIRE_EQ(ar.size(), 7); + CHECK(ar[0].has_value()); + CHECK_EQ(ar[0].get(), values[1]); + CHECK_FALSE(ar[1].has_value()); + CHECK_EQ(ar[1].get(), values[2]); + CHECK(ar[2].has_value()); + CHECK_EQ(ar[2].get(), values[3]); + CHECK(ar[3].has_value()); + CHECK_EQ(ar[3].get(), values[4]); + CHECK(ar[4].has_value()); + CHECK_EQ(ar[4].get(), new_value); + CHECK(ar[5].has_value()); + CHECK_EQ(ar[5].get(), new_value); + CHECK(ar[6].has_value()); + CHECK_EQ(ar[6].get(), new_value); + } + } + + SUBCASE("with pos, first and last iterators") + { + SUBCASE("at the beginning") + { + const auto pos = ar.cbegin(); + const std::array, 3> new_values{ + make_nullable(99), + make_nullable(100), + make_nullable(101) + }; + const auto iter = ar.insert(pos, new_values); + CHECK_EQ(iter, ar.begin()); + REQUIRE_EQ(ar.size(), 7); + CHECK(ar[0].has_value()); + CHECK_EQ(ar[0].get(), new_values[0]); + CHECK(ar[1].has_value()); + CHECK_EQ(ar[1].get(), new_values[1]); + CHECK(ar[2].has_value()); + CHECK_EQ(ar[2].get(), new_values[2]); + CHECK(ar[3].has_value()); + CHECK_EQ(ar[3].get(), values[1]); + CHECK_FALSE(ar[4].has_value()); + CHECK(ar[5].has_value()); + CHECK_EQ(ar[5].get(), values[3]); + CHECK(ar[6].has_value()); + CHECK_EQ(ar[6].get(), values[4]); + } + + SUBCASE("in the middle") + { + const auto pos = sparrow::next(ar.cbegin(), 1); + const std::array, 3> new_values{ + make_nullable(99), + make_nullable(100), + make_nullable(101) + }; + const auto iter = ar.insert(pos, new_values); + CHECK_EQ(iter, sparrow::next(ar.begin(), 1)); + REQUIRE_EQ(ar.size(), 7); + CHECK(ar[0].has_value()); + CHECK_EQ(ar[0].get(), values[1]); + CHECK(ar[1].has_value()); + CHECK_EQ(ar[1].get(), new_values[0]); + CHECK(ar[2].has_value()); + CHECK_EQ(ar[2].get(), new_values[1]); + CHECK(ar[3].has_value()); + CHECK_EQ(ar[3].get(), new_values[2]); + CHECK_FALSE(ar[4].has_value()); + CHECK_EQ(ar[4].get(), values[2]); + CHECK(ar[5].has_value()); + CHECK_EQ(ar[5].get(), values[3]); + CHECK(ar[6].has_value()); + CHECK_EQ(ar[6].get(), values[4]); + } + + SUBCASE("at the end") + { + const auto pos = ar.cend(); + const std::array, 3> new_values{ + make_nullable(99), + make_nullable(100), + make_nullable(101) + }; + const auto iter = ar.insert(pos, new_values); + CHECK_EQ(iter, ar.begin() + 4); + REQUIRE_EQ(ar.size(), 7); + CHECK(ar[0].has_value()); + CHECK_EQ(ar[0].get(), values[1]); + CHECK_FALSE(ar[1].has_value()); + CHECK_EQ(ar[1].get(), values[2]); + CHECK(ar[2].has_value()); + CHECK_EQ(ar[2].get(), values[3]); + CHECK(ar[3].has_value()); + CHECK_EQ(ar[3].get(), values[4]); + CHECK(ar[4].has_value()); + CHECK_EQ(ar[4].get(), new_values[0]); + CHECK(ar[5].has_value()); + CHECK_EQ(ar[5].get(), new_values[1]); + CHECK(ar[6].has_value()); + CHECK_EQ(ar[6].get(), new_values[2]); + } + } + + SUBCASE("with pos and initializer list") + { + SUBCASE("at the beginning") + { + const auto pos = ar.cbegin(); + auto new_val_99 = make_nullable(99); + auto new_val_100 = make_nullable(100); + auto new_val_101 = make_nullable(101); + const auto iter = ar.insert(pos, {new_val_99, new_val_100, new_val_101}); + CHECK_EQ(iter, ar.begin()); + REQUIRE_EQ(ar.size(), 7); + CHECK_EQ(ar[0], new_val_99); + CHECK_EQ(ar[1], new_val_100); + CHECK_EQ(ar[2], new_val_101); + CHECK(ar[3].has_value()); + CHECK_EQ(ar[3].get(), values[1]); + CHECK_FALSE(ar[4].has_value()); + CHECK(ar[5].has_value()); + CHECK_EQ(ar[5].get(), values[3]); + CHECK(ar[6].has_value()); + } + + SUBCASE("in the middle") + { + const auto pos = sparrow::next(ar.cbegin(), 1); + auto new_val_99 = make_nullable(99); + auto new_val_100 = make_nullable(100); + auto new_val_101 = make_nullable(101); + const auto iter = ar.insert(pos, {new_val_99, new_val_100, new_val_101}); + CHECK_EQ(iter, sparrow::next(ar.begin(), 1)); + REQUIRE_EQ(ar.size(), 7); + CHECK(ar[0].has_value()); + CHECK_EQ(ar[0].get(), values[1]); + CHECK_EQ(ar[1], new_val_99); + CHECK_EQ(ar[2], new_val_100); + CHECK_EQ(ar[3], new_val_101); + CHECK_FALSE(ar[4].has_value()); + CHECK_EQ(ar[4].get(), values[2]); + CHECK(ar[5].has_value()); + CHECK_EQ(ar[5].get(), values[3]); + CHECK(ar[6].has_value()); + } + + SUBCASE("at the end") + { + const auto pos = ar.cend(); + auto new_val_99 = make_nullable(99); + auto new_val_100 = make_nullable(100); + auto new_val_101 = make_nullable(101); + const auto iter = ar.insert(pos, {new_val_99, new_val_100, new_val_101}); + CHECK_EQ(iter, ar.begin() + 4); + REQUIRE_EQ(ar.size(), 7); + CHECK(ar[0].has_value()); + CHECK_EQ(ar[0].get(), values[1]); + CHECK_FALSE(ar[1].has_value()); + CHECK_EQ(ar[1].get(), values[2]); + CHECK(ar[2].has_value()); + CHECK_EQ(ar[2].get(), values[3]); + CHECK(ar[3].has_value()); + CHECK_EQ(ar[3].get(), values[4]); + CHECK_EQ(ar[4], new_val_99); + CHECK_EQ(ar[5], new_val_100); + CHECK_EQ(ar[6], new_val_101); + } + } + + SUBCASE("with pos and range") + { + SUBCASE("at the beginning") + { + const auto pos = ar.cbegin(); + const std::array, 3> new_values{ + make_nullable(99), + make_nullable(100), + make_nullable(101) + }; + const auto iter = ar.insert(pos, new_values); + CHECK_EQ(iter, ar.begin()); + REQUIRE_EQ(ar.size(), 7); + CHECK(ar[0].has_value()); + CHECK_EQ(ar[0].get(), new_values[0]); + CHECK(ar[1].has_value()); + CHECK_EQ(ar[1].get(), new_values[1]); + CHECK(ar[2].has_value()); + CHECK_EQ(ar[2].get(), new_values[2]); + CHECK(ar[3].has_value()); + CHECK_EQ(ar[3].get(), values[1]); + CHECK_FALSE(ar[4].has_value()); + CHECK_EQ(ar[4].get(), values[2]); + CHECK(ar[5].has_value()); + CHECK_EQ(ar[5].get(), values[3]); + CHECK(ar[6].has_value()); + CHECK_EQ(ar[6].get(), values[4]); + } + } } - CHECK_EQ(it, end); + SUBCASE("erase") + { + SUBCASE("with pos") + { + SUBCASE("at the beginning") + { + const auto pos = ar.cbegin(); + const auto iter = ar.erase(pos); + CHECK_EQ(iter, ar.begin()); + REQUIRE_EQ(ar.size(), 3); + CHECK_FALSE(ar[0].has_value()); + CHECK_EQ(ar[0].get(), values[2]); + CHECK(ar[1].has_value()); + CHECK_EQ(ar[1].get(), values[3]); + CHECK(ar[2].has_value()); + CHECK_EQ(ar[2].get(), values[4]); + } + + SUBCASE("in the middle") + { + const auto pos = sparrow::next(ar.cbegin(), 1); + const auto iter = ar.erase(pos); + CHECK_EQ(iter, sparrow::next(ar.begin(), 1)); + REQUIRE_EQ(ar.size(), 3); + CHECK(ar[0].has_value()); + CHECK_EQ(ar[0].get(), values[1]); + CHECK(ar[1].has_value()); + CHECK_EQ(ar[1].get(), values[3]); + CHECK(ar[2].has_value()); + CHECK_EQ(ar[2].get(), values[4]); + } - for (auto v : ar) + SUBCASE("at the end") + { + const auto pos = std::prev(ar.cend()); + const auto iter = ar.erase(pos); + CHECK_EQ(iter, ar.begin() + 3); + REQUIRE_EQ(ar.size(), 3); + REQUIRE(ar[0].has_value()); + CHECK_EQ(ar[0].get(), values[1]); + CHECK_FALSE(ar[1].has_value()); + CHECK_EQ(ar[1].get(), values[2]); + CHECK(ar[2].has_value()); + CHECK_EQ(ar[2].get(), values[3]); + } + } + + SUBCASE("with iterators") + { + const auto pos = ar.cbegin() + 1; + const auto iter = ar.erase(pos, pos + 2); + CHECK_EQ(iter, ar.begin() + 1); + REQUIRE_EQ(ar.size(), 2); + CHECK(ar[0].has_value()); + CHECK_EQ(ar[0].get(), values[1]); + CHECK(ar[1].has_value()); + CHECK_EQ(ar[1].get(), values[4]); + } + } + + SUBCASE("push_back") { - CHECK(v.has_value()); + const T new_value{99}; + ar.push_back(make_nullable(99)); + REQUIRE_EQ(ar.size(), 5); + CHECK(ar[0].has_value()); + CHECK_EQ(ar[0].get(), values[1]); + CHECK_FALSE(ar[1].has_value()); + CHECK_EQ(ar[1].get(), values[2]); + CHECK(ar[2].has_value()); + CHECK_EQ(ar[2].get(), values[3]); + CHECK(ar[3].has_value()); + CHECK_EQ(ar[3].get(), values[4]); + CHECK(ar[4].has_value()); + CHECK_EQ(ar[4].value(), new_value); } - array_test_type ar_empty(make_arrow_proxy(0, 0)); - CHECK_EQ(ar_empty.begin(), ar_empty.end()); + SUBCASE("pop_back") + { + ar.pop_back(); + REQUIRE_EQ(ar.size(), 3); + CHECK(ar[0].has_value()); + CHECK_EQ(ar[0].get(), values[1]); + CHECK_FALSE(ar[1].has_value()); + CHECK_EQ(ar[1].get(), values[2]); + CHECK(ar[2].has_value()); + CHECK_EQ(ar[2].get(), values[3]); + } } + TEST_CASE_TEMPLATE_APPLY(primitive_array_id, testing_types); } }