From 3f996ab7af24f895f75cbf5cec2ea6dadcc91f4a Mon Sep 17 00:00:00 2001 From: Alexis Placet Date: Fri, 19 Apr 2024 14:04:15 +0200 Subject: [PATCH] Implement typed_array (#65) --- CMakeLists.txt | 7 +- include/sparrow/algorithm.hpp | 126 ++++++ include/sparrow/array_data.hpp | 22 +- include/sparrow/config.hpp | 26 ++ include/sparrow/data_traits.hpp | 10 +- .../sparrow/details/3rdparty/float16_t.hpp | 14 +- include/sparrow/fixed_size_layout.hpp | 2 +- include/sparrow/typed_array.hpp | 365 ++++++++++++++++++ .../sparrow/variable_size_binary_layout.hpp | 2 + test/CMakeLists.txt | 11 +- test/array_data_creation.hpp | 120 ++++++ test/test_algorithm.cpp | 47 +++ test/test_array_data_creation.cpp | 55 +++ test/test_typed_array.cpp | 309 +++++++++++++++ test/test_variable_size_binary_layout.cpp | 2 +- 15 files changed, 1098 insertions(+), 20 deletions(-) create mode 100644 include/sparrow/algorithm.hpp create mode 100644 include/sparrow/config.hpp create mode 100644 include/sparrow/typed_array.hpp create mode 100644 test/array_data_creation.hpp create mode 100644 test/test_algorithm.cpp create mode 100644 test/test_array_data_creation.cpp create mode 100644 test/test_typed_array.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index ca3100d7..ce38f02c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -59,19 +59,22 @@ endif() # ===== set(SPARROW_HEADERS + ${SPARROW_INCLUDE_DIR}/sparrow/algorithm.hpp ${SPARROW_INCLUDE_DIR}/sparrow/allocator.hpp ${SPARROW_INCLUDE_DIR}/sparrow/array_data.hpp ${SPARROW_INCLUDE_DIR}/sparrow/buffer.hpp ${SPARROW_INCLUDE_DIR}/sparrow/buffer_view.hpp - ${SPARROW_INCLUDE_DIR}/sparrow/contracts.hpp - ${SPARROW_INCLUDE_DIR}/sparrow/data_type.hpp + ${SPARROW_INCLUDE_DIR}/sparrow/config.hpp + ${SPARROW_INCLUDE_DIR}/sparrow/contracts.hpp ${SPARROW_INCLUDE_DIR}/sparrow/data_traits.hpp + ${SPARROW_INCLUDE_DIR}/sparrow/data_type.hpp ${SPARROW_INCLUDE_DIR}/sparrow/dynamic_bitset.hpp ${SPARROW_INCLUDE_DIR}/sparrow/fixed_size_layout.hpp ${SPARROW_INCLUDE_DIR}/sparrow/iterator.hpp ${SPARROW_INCLUDE_DIR}/sparrow/memory.hpp ${SPARROW_INCLUDE_DIR}/sparrow/mp_utils.hpp ${SPARROW_INCLUDE_DIR}/sparrow/sparrow_version.hpp + ${SPARROW_INCLUDE_DIR}/sparrow/typed_array.hpp ${SPARROW_INCLUDE_DIR}/sparrow/variable_size_binary_layout.hpp ${SPARROW_INCLUDE_DIR}/sparrow/details/3rdparty/float16_t.hpp diff --git a/include/sparrow/algorithm.hpp b/include/sparrow/algorithm.hpp new file mode 100644 index 00000000..63c1df12 --- /dev/null +++ b/include/sparrow/algorithm.hpp @@ -0,0 +1,126 @@ +// Copyright 2024 Man Group Operations Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include "sparrow/config.hpp" + +namespace sparrow +{ +#if COMPILING_WITH_APPLE_CLANG + + template + concept OrdCategory = std::same_as || std::same_as + || std::same_as; + + template + concept LexicographicalComparable = requires(const R1& r1, const R2& r2, Cmp comp) { + { r1.cbegin() } -> std::input_or_output_iterator; + { r2.cbegin() } -> std::input_or_output_iterator; + OrdCategory; + }; + + template + requires LexicographicalComparable + constexpr auto lexicographical_compare_three_way_non_std(const R1& range1, const R2& range2, Cmp comp) + -> decltype(comp(*range1.cbegin(), *range1.cbegin())) + { + auto iter_1 = range1.cbegin(); + const auto end_1 = range1.cend(); + auto iter_2 = range2.cbegin(); + const auto end_2 = range2.cend(); + + while (true) + { + if (iter_1 == end_1) + { + return iter_2 == end_2 ? std::strong_ordering::equal : std::strong_ordering::less; + } + + if (iter_2 == end_2) + { + return std::strong_ordering::greater; + } + + if (const auto result = comp(*iter_1, *iter_2); result != 0) + { + return result; + } + + ++iter_1; + ++iter_2; + } + } +#endif + + template + constexpr auto lexicographical_compare_three_way(const R1& range1, const R2& range2, Cmp comp) + -> decltype(comp(*range1.cbegin(), *range2.cbegin())) + { +#if COMPILING_WITH_APPLE_CLANG + return lexicographical_compare_three_way_non_std(range1, range2, comp); +#else + return std::lexicographical_compare_three_way( + range1.cbegin(), + range1.cend(), + range2.cbegin(), + range2.cend(), + comp + ); +#endif + } + +#if COMPILING_WITH_APPLE_CLANG + struct compare_three_way + { + template + constexpr auto operator()(const T& t, const U& u) const noexcept -> std::partial_ordering + { + if (t < u) + { + return std::partial_ordering::less; + } + if (u < t) + { + return std::partial_ordering::greater; + } + return std::partial_ordering::equivalent; + } + }; +#endif + + template + constexpr auto lexicographical_compare_three_way(const R1& r1, const R2& r2) -> std::partial_ordering + { + return lexicographical_compare_three_way( + r1, + r2, +#if COMPILING_WITH_APPLE_CLANG + compare_three_way {} +#else + std::compare_three_way{} +#endif + ); + } + + template + constexpr auto lexicographical_compare(const R1& r1, const R2& r2) -> bool + { + return lexicographical_compare_three_way(r1, r2) == std::strong_ordering::less; + } + +} // namespace sparrow diff --git a/include/sparrow/array_data.hpp b/include/sparrow/array_data.hpp index 361cb30e..4c6dd91f 100644 --- a/include/sparrow/array_data.hpp +++ b/include/sparrow/array_data.hpp @@ -97,10 +97,10 @@ namespace sparrow bool operator==(const reference_proxy_base& lhs, std::nullopt_t); template - std::strong_ordering operator<=>(const reference_proxy_base& lhs, const reference_proxy_base& rhs); + auto operator<=>(const reference_proxy_base& lhs, const reference_proxy_base& rhs); template - std::strong_ordering operator<=>(const reference_proxy_base& lhs, const T& rhs); + std::partial_ordering operator<=>(const reference_proxy_base& lhs, const T& rhs); template std::strong_ordering operator<=>(const reference_proxy_base& lhs, std::nullopt_t); @@ -282,17 +282,27 @@ namespace sparrow } template - std::strong_ordering operator<=>(const reference_proxy_base& lhs, const reference_proxy_base& rhs) + auto operator<=>(const reference_proxy_base& lhs, const reference_proxy_base& rhs) { const D1& dlhs = lhs.derived_cast(); const D2& drhs = rhs.derived_cast(); - return (dlhs && drhs) ? (dlhs.value() <=> drhs.value()) : (dlhs.has_value() <=> drhs.has_value()); + + using TOrdering = decltype(dlhs.value() <=> drhs.value()); + if (dlhs && drhs) + { + return dlhs.value() <=> drhs.value(); + } + return TOrdering(dlhs.has_value() <=> drhs.has_value()); } template - std::strong_ordering operator<=>(const reference_proxy_base& lhs, const T& rhs) + std::partial_ordering operator<=>(const reference_proxy_base& lhs, const T& rhs) { - return lhs.derived_cast() ? (lhs.derived_cast().value() <=> rhs) : std::strong_ordering::less; + if (lhs.derived_cast()) + { + return lhs.derived_cast().value() <=> rhs; + } + return std::partial_ordering::less; } template diff --git a/include/sparrow/config.hpp b/include/sparrow/config.hpp new file mode 100644 index 00000000..d473737e --- /dev/null +++ b/include/sparrow/config.hpp @@ -0,0 +1,26 @@ +// Copyright 2024 Man Group Operations Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#if defined(__apple_build_version__) +#define COMPILING_WITH_APPLE_CLANG 1 +#else +#define COMPILING_WITH_APPLE_CLANG 0 +#endif + +consteval bool is_apple_compiler() +{ + return static_cast(COMPILING_WITH_APPLE_CLANG); +} diff --git a/include/sparrow/data_traits.hpp b/include/sparrow/data_traits.hpp index 2920d5d6..cfd74b70 100644 --- a/include/sparrow/data_traits.hpp +++ b/include/sparrow/data_traits.hpp @@ -14,9 +14,9 @@ #pragma once -#include "data_type.hpp" -#include "fixed_size_layout.hpp" -#include "variable_size_binary_layout.hpp" +#include "sparrow/data_type.hpp" +#include "sparrow/fixed_size_layout.hpp" +#include "sparrow/variable_size_binary_layout.hpp" namespace sparrow { @@ -114,7 +114,7 @@ namespace sparrow { static constexpr data_type type_id = data_type::STRING; using value_type = std::string; - using default_layout = variable_size_binary_layout; // FIXME: this is incorrect, change when we have the right types + using default_layout = variable_size_binary_layout; // FIXME: this is incorrect, change when we have the right types }; template <> @@ -122,7 +122,7 @@ namespace sparrow { static constexpr data_type type_id = data_type::STRING; using value_type = std::vector; - using default_layout = variable_size_binary_layout, std::span>; // FIXME: this is incorrect, change when we have the right types + using default_layout = variable_size_binary_layout, const std::span>; // FIXME: this is incorrect, change when we have the right types }; namespace predicate diff --git a/include/sparrow/details/3rdparty/float16_t.hpp b/include/sparrow/details/3rdparty/float16_t.hpp index 3659ca24..e9f94618 100644 --- a/include/sparrow/details/3rdparty/float16_t.hpp +++ b/include/sparrow/details/3rdparty/float16_t.hpp @@ -3,7 +3,9 @@ // PLEASE UPDATE THIS COMMENT IF YOU REPLACE THIS // SEE README.md for rational //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - +// Modification from the original: +// - Added of the <=> operator +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #ifndef FLOAT16_T_HPP_INCLUDED_OSDIJSALKJS8OU4LKJAFSOIUASFD98U3LJKASFOIJFFDDDDDF #define FLOAT16_T_HPP_INCLUDED_OSDIJSALKJS8OU4LKJAFSOIUASFD98U3LJKASFOIJFFDDDDDF // @@ -11,6 +13,7 @@ // https://github.com/acgessler/half_float // https://github.com/x448/float16 // +#include #include #include #include @@ -786,6 +789,15 @@ namespace numeric return !( lhs == rhs ); } + // Added by Alexis Placet from Quantstack + constexpr std::partial_ordering operator <=> ( float16_t lhs, float16_t rhs ) noexcept + { + if ( lhs < rhs ) return std::partial_ordering::less; + if ( lhs > rhs ) return std::partial_ordering::greater; + if ( lhs == rhs ) return std::partial_ordering::equivalent; + return std::partial_ordering::unordered; + } + template std::basic_ostream& operator << ( std::basic_ostream& os, float16_t const& f ) { diff --git a/include/sparrow/fixed_size_layout.hpp b/include/sparrow/fixed_size_layout.hpp index c1152531..0144393f 100644 --- a/include/sparrow/fixed_size_layout.hpp +++ b/include/sparrow/fixed_size_layout.hpp @@ -45,7 +45,7 @@ namespace sparrow using self_type = fixed_size_layout; using inner_value_type = T; using inner_reference = inner_value_type&; - using inner_const_reference = const inner_reference; + using inner_const_reference = const inner_value_type&; using bitmap_type = array_data::bitmap_type; using bitmap_reference = typename bitmap_type::reference; using bitmap_const_reference = typename bitmap_type::const_reference; diff --git a/include/sparrow/typed_array.hpp b/include/sparrow/typed_array.hpp new file mode 100644 index 00000000..81436570 --- /dev/null +++ b/include/sparrow/typed_array.hpp @@ -0,0 +1,365 @@ +// Copyright 2024 Man Group Operations Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or mplied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include + +#include "sparrow/algorithm.hpp" +#include "sparrow/array_data.hpp" +#include "sparrow/contracts.hpp" +#include "sparrow/data_traits.hpp" +#include "sparrow/data_type.hpp" +namespace sparrow +{ + template + requires is_arrow_base_type + class typed_array; + + template + std::partial_ordering operator<=>(const typed_array& ta1, const typed_array& ta2); + + template + bool operator==(const typed_array& ta1, const typed_array& ta2); + + /** + * A class template representing a typed array. + * + * The `typed_array` class template provides an container interface over `array_data` for elements of a specific type `T`. + * The access to the elements are executed according to the layout `L` of the array. + * + * @tparam T The type of elements stored in the array. + * @tparam L The layout type of the array. Defaults to the default layout defined by the `arrow_traits` of `T`. + */ + template ::default_layout> + requires is_arrow_base_type + class typed_array + { + public: + + using layout_type = L; + + using reference = typename layout_type::reference; + using const_reference = typename layout_type::const_reference; + + using iterator = typename layout_type::iterator; + using const_iterator = typename layout_type::const_iterator; + + using size_type = typename layout_type::size_type; + using const_bitmap_range = typename layout_type::const_bitmap_range; + using const_value_range = typename layout_type::const_value_range; + + explicit typed_array(array_data data); + + // Element access + + ///@{ + /* + * Access specified element with bounds checking. + * + * Returns a reference to the element at the specified index \p i, with bounds checking. + * If \p i is not within the range of the container, an exception of type std::out_of_range is thrown. + * + * @param i The index of the element to access. + * @return A reference to the element at the specified index. + * @throws std::out_of_range if i is out of range. + */ + reference at(size_type i); + const_reference at(size_type i) const; + ///@} + + ///@{ + /* + * Access specified element. + * + * Returns a reference to the element at the specified index \p i. No bounds checking is performed. + * + * @param i The index of the element to access. + * @pre @p i must be lower than the size of the container. + * @return A reference to the element at the specified index. + */ + reference operator[](size_type); + const_reference operator[](size_type) const; + ///@} + + ///@{ + /* + * Access the first element. + * + * Returns a reference to the first element. + * @pre The container must not be empty (\see empty()). + * + * @return A reference to the first element. + */ + reference front(); + const_reference front() const; + ///@} + + ///@{ + /* + * Access the last element. + * + * Returns a reference to the last element. + * + * @pre The container must not be empty (\see empty()). + * + * @return A reference to the last element. + */ + reference back(); + const_reference back() const; + ///@} + + // Iterators + + ///@{ + /* Returns an iterator to the first element. + * If the vector is empty, the returned iterator will be equal to end(). + * + * @return An iterator to the first element. + */ + iterator begin(); + const_iterator begin() const; + const_iterator cbegin() const; + ///@} + + ///@{ + /** + * This element acts as a placeholder; attempting to access it results in undefined behavior. + * + * @return An iterator to the element following the last element of the vector. + */ + iterator end(); + const_iterator end() const; + const_iterator cend() const; + ///@} + + /* + * @return A range of the bitmap. For each index position in this range, if `true` then there is a value at the same index position in the `values()` range, `false` means the value there is null. + */ + const_bitmap_range bitmap() const; + + /* + * @return A range of the values. + */ + const_value_range values() const; + + // Capacity + + /* + * @return true if the container is empty, false otherwise. + */ + bool empty() const; + + /* + * @return The number of elements in the container. + */ + size_type size() const; + + // TODO: Add reserve, capacity, shrink_to_fit + + // Modifiers + + // TODO: Implement insert, erase, push_back, pop_back, clear, resize, swap + + friend std::partial_ordering operator<=>(const typed_array& ta1, const typed_array& ta2); + + friend bool operator==(const typed_array& ta1, const typed_array& ta2); + + private: + + array_data m_data; + layout_type m_layout; + }; + + // Constructors + template + requires is_arrow_base_type + typed_array::typed_array(array_data data) + : m_data(std::move(data)) + , m_layout(m_data) + { + } + + // Element access + + template + requires is_arrow_base_type + auto typed_array::at(size_type i) -> reference + { + if (i >= size()) + { + // TODO: Use our own format function + throw std::out_of_range( + "typed_array::at: index out of range for array of size " + std::to_string(size()) + + " at index " + std::to_string(i) + ); + } + return m_layout[i]; + } + + template + requires is_arrow_base_type + auto typed_array::at(size_type i) const -> const_reference + { + if (i >= size()) + { + // TODO: Use our own format function + throw std::out_of_range( + "typed_array::at: index out of range for array of size " + std::to_string(size()) + + " at index " + std::to_string(i) + ); + } + return m_layout[i]; + } + + template + requires is_arrow_base_type + auto typed_array::operator[](size_type i) -> reference + { + SPARROW_ASSERT_TRUE(i < size()); + return m_layout[i]; + } + + template + requires is_arrow_base_type + auto typed_array::operator[](size_type i) const -> const_reference + { + SPARROW_ASSERT_TRUE(i < size()); + return m_layout[i]; + } + + template + requires is_arrow_base_type + auto typed_array::front() -> reference + { + SPARROW_ASSERT_FALSE(empty()); + return m_layout[0]; + } + + template + requires is_arrow_base_type + auto typed_array::front() const -> const_reference + { + SPARROW_ASSERT_FALSE(empty()); + return m_layout[0]; + } + + template + requires is_arrow_base_type + auto typed_array::back() -> reference + { + SPARROW_ASSERT_FALSE(empty()); + return m_layout[size() - 1]; + } + + template + requires is_arrow_base_type + auto typed_array::back() const -> const_reference + { + SPARROW_ASSERT_FALSE(empty()); + return m_layout[size() - 1]; + } + + // Iterators + + template + requires is_arrow_base_type + auto typed_array::begin() -> iterator + { + return m_layout.begin(); + } + + template + requires is_arrow_base_type + auto typed_array::begin() const -> const_iterator + { + return m_layout.cbegin(); + } + + template + requires is_arrow_base_type + auto typed_array::end() -> iterator + { + return m_layout.end(); + } + + template + requires is_arrow_base_type + auto typed_array::end() const -> const_iterator + { + return m_layout.cend(); + } + + template + requires is_arrow_base_type + auto typed_array::cbegin() const -> const_iterator + { + return begin(); + } + + template + requires is_arrow_base_type + auto typed_array::cend() const -> const_iterator + { + return end(); + } + + template + requires is_arrow_base_type + auto typed_array::bitmap() const -> const_bitmap_range + { + return m_layout.bitmap(); + } + + template + requires is_arrow_base_type + auto typed_array::values() const -> const_value_range + { + return m_layout.values(); + } + + // Capacity + + template + requires is_arrow_base_type + bool typed_array::empty() const + { + return m_layout.size() == 0; + } + + template + requires is_arrow_base_type + auto typed_array::size() const -> size_type + { + return m_layout.size(); + } + + // Comparators + + template + requires is_arrow_base_type + auto operator<=>(const typed_array& ta1, const typed_array& ta2) -> std::partial_ordering + { + return lexicographical_compare_three_way(ta1, ta2); + } + + template + requires is_arrow_base_type + bool operator==(const typed_array& ta1, const typed_array& ta2) + { + return std::equal(ta1.cbegin(), ta1.cend(), ta2.cbegin(), ta2.cend()); + } + +} // namespace sparrow diff --git a/include/sparrow/variable_size_binary_layout.hpp b/include/sparrow/variable_size_binary_layout.hpp index 413faeb2..4f526fa2 100644 --- a/include/sparrow/variable_size_binary_layout.hpp +++ b/include/sparrow/variable_size_binary_layout.hpp @@ -111,6 +111,7 @@ namespace sparrow using bitmap_type = array_data::bitmap_type; using bitmap_const_reference = typename bitmap_type::const_reference; using value_type = std::optional; + using reference = const_reference_proxy; using const_reference = const_reference_proxy; using size_type = std::size_t; using iterator_tag = std::contiguous_iterator_tag; @@ -128,6 +129,7 @@ namespace sparrow using const_value_iterator = vs_binary_value_iterator; using const_bitmap_iterator = array_data::bitmap_type::const_iterator; + using iterator = layout_iterator; using const_iterator = layout_iterator; // // TODO: required by layout_iterator, replace them with the right types diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index d7182ba9..1984f827 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -34,17 +34,20 @@ endif() set(SPARROW_TESTS_SOURCES main.cpp + test_algorithm.cpp test_allocator.cpp + test_array_data_creation.cpp test_array_data.cpp test_buffer.cpp + test_dictionary_encoded_layout.cpp test_dynamic_bitset.cpp - test_iterator.cpp test_fixed_size_layout.cpp - test_variable_size_binary_layout.cpp - test_dictionary_encoded_layout.cpp + test_iterator.cpp + test_memory.cpp test_mpl.cpp test_traits.cpp - test_memory.cpp + test_typed_array.cpp + test_variable_size_binary_layout.cpp ) set(test_target "test_sparrow_lib") add_executable(${test_target} ${SPARROW_TESTS_SOURCES}) diff --git a/test/array_data_creation.hpp b/test/array_data_creation.hpp new file mode 100644 index 00000000..f115dcd7 --- /dev/null +++ b/test/array_data_creation.hpp @@ -0,0 +1,120 @@ +// Copyright 2024 Man Group Operations Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or mplied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include + +#include "sparrow/array_data.hpp" +#include "sparrow/data_traits.hpp" + +namespace sparrow::test +{ + + // Creates an array_data object for testing purposes. + // + // The bitmap is initialized with all bits set to true, except for the indices specified in the + // false_bitmap vector. The buffer is filled with values from 0 to n-1, where n is the size of the array. + // + // tparam T The type of the elements in the array. + // param n The size of the array. + // param offset The offset of the array. + // param false_bitmap A vector containing indices to set as false in the bitmap. + // return The created array_data object. + // throws std::invalid_argument If an index in false_bitmap is out of range. + template + sparrow::array_data + make_test_array_data(size_t n = 10, size_t offset = 0, const std::vector& false_bitmap = {}) + { + sparrow::array_data ad; + ad.type = sparrow::data_descriptor(sparrow::arrow_traits::type_id); + ad.bitmap = sparrow::dynamic_bitset(n, true); + for (const auto i : false_bitmap) + { + if (i >= n) + { + throw std::invalid_argument("Index out of range"); + } + ad.bitmap.set(i, false); + } + const size_t buffer_size = (n * sizeof(T)) / sizeof(uint8_t); + sparrow::buffer b(buffer_size); + for (uint8_t i = 0; i < n; ++i) + { + b.data()[i] = static_cast(i); + } + ad.buffers.push_back(b); + ad.length = n; + ad.offset = offset; + ad.child_data.emplace_back(); + return ad; + } + + // Creates an array_data object for testing with std::string elements. + // + // param n The number of elements in the array. + // param offset The offset value for the array_data object. + // param false_bitmap A vector of indices to set as false in the bitmap. + // return The created array_data object. + // throws std::invalid_argument if any index in false_bitmap is out of range. + template <> + inline sparrow::array_data + make_test_array_data(size_t n, size_t offset, const std::vector& false_bitmap) + { + std::vector words; + for (size_t i = 0; i < n; ++i) + { + words.push_back(std::to_string(i)); + } + sparrow::array_data ad; + ad.bitmap.resize(n); + ad.buffers.resize(2); + ad.buffers[0].resize(sizeof(std::int64_t) * (n + 1)); + ad.buffers[1].resize(std::accumulate( + words.begin(), + words.end(), + size_t(0), + [](std::size_t res, const auto& s) + { + return res + s.size(); + } + )); + ad.buffers[0].data()[0] = 0u; + auto iter = ad.buffers[1].begin(); + const auto offset_func = [&ad]() + { + return ad.buffers[0].data(); + }; + for (size_t i = 0; i < words.size(); ++i) + { + offset_func()[i + 1] = offset_func()[i] + words[i].size(); + std::ranges::copy(words[i], iter); + iter += words[i].size(); + ad.bitmap.set(i, true); + } + + for (const auto i : false_bitmap) + { + if (i >= n) + { + throw std::invalid_argument("Index out of range"); + } + ad.bitmap.set(i, false); + } + + ad.length = n; + ad.offset = offset; + return ad; + } +} \ No newline at end of file diff --git a/test/test_algorithm.cpp b/test/test_algorithm.cpp new file mode 100644 index 00000000..3fdf9894 --- /dev/null +++ b/test/test_algorithm.cpp @@ -0,0 +1,47 @@ +// Copyright 2024 Man Group Operations Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include "sparrow/algorithm.hpp" + +#include "doctest/doctest.h" + +TEST_SUITE("algorithm") +{ + TEST_CASE("lexicographical_compare_three_way") + { + const std::vector v1 = {1, 2, 3}; + std::vector v2 = {1, 2, 3}; + CHECK_EQ(sparrow::lexicographical_compare_three_way(v1, v2), std::strong_ordering::equal); + + v2 = {1, 2, 4}; + CHECK_EQ(sparrow::lexicographical_compare_three_way(v1, v2), std::strong_ordering::less); + + v2 = {1, 2, 2}; + CHECK_EQ(sparrow::lexicographical_compare_three_way(v1, v2), std::strong_ordering::greater); + } + + TEST_CASE("lexicographical_compare_three_way with empty ranges") + { + const std::vector v1 = {1, 2, 3}; + const std::vector v2 = {}; + CHECK_EQ(sparrow::lexicographical_compare_three_way(v1, v2), std::strong_ordering::greater); + CHECK_EQ(sparrow::lexicographical_compare_three_way(v2, v1), std::strong_ordering::less); + + const std::vector v3 = {}; + CHECK_EQ(sparrow::lexicographical_compare_three_way(v2, v3), std::strong_ordering::equal); + } +} diff --git a/test/test_array_data_creation.cpp b/test/test_array_data_creation.cpp new file mode 100644 index 00000000..2ac5e91e --- /dev/null +++ b/test/test_array_data_creation.cpp @@ -0,0 +1,55 @@ +// Copyright 2024 Man Group Operations Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include "sparrow/algorithm.hpp" + +#include "array_data_creation.hpp" +#include "doctest/doctest.h" + +TEST_SUITE("array_data_creation") +{ + TEST_CASE("make_test_array_data") + { + SUBCASE("Default parameters") + { + constexpr size_t n = 10; + const sparrow::array_data data = sparrow::test::make_test_array_data(n); + CHECK_EQ(data.length, n); + CHECK_EQ(data.offset, 0); + for (size_t i = 0; i < n; i++) + { + CHECK(data.bitmap[i]); + } + } + + SUBCASE("Custom parameters") + { + constexpr size_t n = 5; + constexpr size_t offset = 2; + const std::vector false_bitmap = {1, 3}; + + const sparrow::array_data data = sparrow::test::make_test_array_data(n, offset, false_bitmap); + CHECK_EQ(data.length, n); + CHECK_EQ(data.offset, offset); + CHECK(data.bitmap[0]); + CHECK_FALSE(data.bitmap[1]); + CHECK(data.bitmap[2]); + CHECK_FALSE(data.bitmap[3]); + CHECK(data.bitmap[4]); + } + } +} \ No newline at end of file diff --git a/test/test_typed_array.cpp b/test/test_typed_array.cpp new file mode 100644 index 00000000..16c9af58 --- /dev/null +++ b/test/test_typed_array.cpp @@ -0,0 +1,309 @@ +// Copyright 2024 Man Group Operations Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or mplied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include // For Doctest +#include +#include +#include + +#include "sparrow/typed_array.hpp" + +#include "array_data_creation.hpp" +#include "doctest/doctest.h" + +using namespace sparrow; + +namespace +{ + template + constexpr O to_value_type(I i) + { + if constexpr (std::is_same_v) + { + return static_cast(i); + } + else if constexpr (std::is_arithmetic_v) + { + return static_cast(i); + } + else if constexpr (std::is_same_v) + { + return std::to_string(i); + } + } + + constexpr size_t n = 10; + constexpr size_t offset = 1; + const std::vector false_bitmap = {9}; +} + +TEST_SUITE("typed_array") +{ + TEST_CASE_TEMPLATE_DEFINE("all", T, all) + { + SUBCASE("constructor with parameter") + { + constexpr size_t n = 10; + constexpr size_t offset = 1; + const auto array_data = sparrow::test::make_test_array_data(n, offset); + const typed_array ta{array_data}; + CHECK_EQ(ta.size(), n - offset); + } + + // Element access + + SUBCASE("at") + { + const auto array_data = sparrow::test::make_test_array_data(n, offset, false_bitmap); + typed_array ta{array_data}; + for (typename typed_array::size_type i = 0; i < ta.size() - 1; ++i) + { + CHECK_EQ(ta.at(i).value(), to_value_type(i + offset)); + } + CHECK_FALSE(ta.at(false_bitmap[0] - offset).has_value()); + + CHECK_THROWS_AS(ta.at(ta.size()), std::out_of_range); + } + + SUBCASE("const at") + { + const auto array_data = sparrow::test::make_test_array_data(n, offset, false_bitmap); + const typed_array ta{array_data}; + for (typename typed_array::size_type i = 0; i < ta.size() - 1; ++i) + { + CHECK_EQ(ta.at(i).value(), to_value_type(i + offset)); + } + CHECK_FALSE(ta.at(false_bitmap[0] - offset).has_value()); + + CHECK_THROWS_AS(ta.at(ta.size()), std::out_of_range); + } + + SUBCASE("operator[]") + { + const auto array_data = sparrow::test::make_test_array_data(n, offset, false_bitmap); + typed_array ta{array_data}; + for (typename typed_array::size_type i = 0; i < ta.size() - 1; ++i) + { + CHECK_EQ(ta[i].value(), to_value_type(i + 1)); + } + CHECK_FALSE(ta[ta.size() - 1].has_value()); + } + + SUBCASE("const operator[]") + { + const auto array_data = sparrow::test::make_test_array_data(n, offset, false_bitmap); + const typed_array ta{array_data}; + for (typename typed_array::size_type i = 0; i < ta.size() - 1; ++i) + { + CHECK_EQ(ta[i].value(), to_value_type(i + offset)); + } + CHECK_FALSE(ta[false_bitmap[0] - offset].has_value()); + } + + SUBCASE("front") + { + const auto array_data = sparrow::test::make_test_array_data(n, offset, false_bitmap); + typed_array ta{array_data}; + CHECK_EQ(ta.front().value(), to_value_type(1)); + } + + SUBCASE("const front") + { + const auto array_data = sparrow::test::make_test_array_data(n, offset, false_bitmap); + const typed_array ta{array_data}; + CHECK_EQ(ta.front().value(), to_value_type(1)); + } + + SUBCASE("back") + { + const auto array_data = sparrow::test::make_test_array_data(n, offset, false_bitmap); + typed_array ta{array_data}; + CHECK_FALSE(ta.back().has_value()); + } + + SUBCASE("const back") + { + const auto array_data = sparrow::test::make_test_array_data(n, offset, false_bitmap); + const typed_array ta{array_data}; + CHECK_FALSE(ta.back().has_value()); + } + + // Iterators + + SUBCASE("const iterators") + { + const auto array_data = sparrow::test::make_test_array_data(n, offset, false_bitmap); + const typed_array ta{array_data}; + + auto iter = ta.cbegin(); + CHECK(std::is_const_vvalue())>>); + auto iter_bis = ta.begin(); + CHECK(std::is_const_vvalue())>>); + CHECK_EQ(iter, iter_bis); + + const auto end = ta.cend(); + CHECK(std::is_const_vvalue())>>); + const auto end_bis = ta.end(); + CHECK(std::is_const_vvalue())>>); + CHECK_EQ(end, end_bis); + + for (typename typed_array::size_type i = 0; i < ta.size() - 1; ++iter, ++i) + { + REQUIRE(iter->has_value()); + CHECK_EQ(*iter, std::make_optional(ta[i].value())); + } + + CHECK_EQ(++iter, end); + + const auto array_data_empty = sparrow::test::make_test_array_data(0, 0); + const typed_array typed_array_empty(array_data_empty); + CHECK_EQ(typed_array_empty.cbegin(), typed_array_empty.cend()); + } + + SUBCASE("bitmap") + { + const auto array_data = sparrow::test::make_test_array_data(n, offset, false_bitmap); + const typed_array ta{array_data}; + const auto bitmap = ta.bitmap(); + REQUIRE_EQ(bitmap.size(), n - offset); + for (size_t i = 0; i < bitmap.size() - 1; ++i) + { + CHECK(bitmap[i]); + } + CHECK_FALSE(bitmap[8]); + } + + SUBCASE("values") + { + const auto array_data = sparrow::test::make_test_array_data(n, offset, false_bitmap); + const typed_array ta{array_data}; + const auto values = ta.values(); + CHECK_EQ(values.size(), n - offset); + for (size_t i = 0; i < values.size(); ++i) + { + CHECK_EQ(values[i], to_value_type(i + 1)); + } + } + + // Capacity + + SUBCASE("empty") + { + const auto array_data = sparrow::test::make_test_array_data(n, offset, false_bitmap); + const typed_array ta{array_data}; + CHECK_FALSE(ta.empty()); + + const auto array_data_empty = sparrow::test::make_test_array_data(0, 0); + const typed_array typed_array_empty(array_data_empty); + CHECK(typed_array_empty.empty()); + } + + SUBCASE("size") + { + const auto array_data = sparrow::test::make_test_array_data(n, offset, false_bitmap); + const typed_array ta{array_data}; + CHECK_EQ(ta.size(), n - offset); + } + + // Operators + + SUBCASE("<=>") + { + const auto array_data = sparrow::test::make_test_array_data(n, offset, false_bitmap); + const typed_array ta{array_data}; + CHECK_EQ(ta <=> ta, std::strong_ordering::equal); + + const auto array_data_less = sparrow::test::make_test_array_data(n - 1, offset - 1, {8}); + const typed_array typed_array_less(array_data_less); + CHECK_EQ(ta <=> typed_array_less, std::strong_ordering::greater); + CHECK_EQ(typed_array_less <=> ta, std::strong_ordering::less); + } + + SUBCASE("==") + { + const auto array_data = sparrow::test::make_test_array_data(n, offset, false_bitmap); + const typed_array ta{array_data}; + const typed_array ta_same{array_data}; + CHECK(ta == ta); + CHECK(ta == ta_same); + + const auto array_data_less = sparrow::test::make_test_array_data(n - 1, offset - 1, {8}); + const ::typed_array ta_less{array_data_less}; + CHECK_FALSE(ta == ta_less); + CHECK_FALSE(ta_less == ta); + } + + SUBCASE("!=") + { + const auto array_data = sparrow::test::make_test_array_data(n, offset, false_bitmap); + const typed_array ta{array_data}; + const typed_array ta_same{array_data}; + CHECK_FALSE(ta != ta); + CHECK_FALSE(ta != ta_same); + + const auto array_data_less = sparrow::test::make_test_array_data(n - 1, offset - 1, {8}); + const typed_array ta_less{array_data_less}; + CHECK(ta != ta_less); + CHECK(ta_less != ta); + } + + SUBCASE("<") + { + const auto array_data = sparrow::test::make_test_array_data(n, offset, false_bitmap); + const typed_array ta{array_data}; + const typed_array ta_same{array_data}; + CHECK_FALSE(ta < ta); + CHECK_FALSE(ta < ta_same); + + const auto array_data_less = sparrow::test::make_test_array_data(n - 1, offset - 1, {8}); + const typed_array ta_less{array_data_less}; + CHECK_FALSE(ta < ta_less); + CHECK(ta_less < ta); + } + + SUBCASE(">") + { + const auto array_data = sparrow::test::make_test_array_data(n, offset, false_bitmap); + const typed_array ta{array_data}; + const typed_array ta_same{array_data}; + CHECK_FALSE(ta > ta); + CHECK_FALSE(ta > ta_same); + + const auto array_data_less = sparrow::test::make_test_array_data(n - 1, offset - 1, {8}); + const ::typed_array ta_less{array_data_less}; + CHECK(ta > ta_less); + CHECK_FALSE(ta_less > ta); + } + } + + TEST_CASE_TEMPLATE_INVOKE( + all, + bool, + std::uint8_t, + std::int8_t, + std::uint16_t, + std::int16_t, + std::uint32_t, + std::int32_t, + std::uint64_t, + std::int64_t, + std::string, + float16_t, + float32_t, + float64_t + ); +} diff --git a/test/test_variable_size_binary_layout.cpp b/test/test_variable_size_binary_layout.cpp index 44e24b5b..7ae8e0cb 100644 --- a/test/test_variable_size_binary_layout.cpp +++ b/test/test_variable_size_binary_layout.cpp @@ -34,7 +34,7 @@ namespace sparrow m_data.buffers[1].resize(std::accumulate( words, words + nb_words, - 0u, + size_t(0), [](std::size_t res, const auto& s) { return res + s.size();