From dcab0186d01362e835737a4a9b8f6b04e5c7a699 Mon Sep 17 00:00:00 2001 From: Alexis Placet Date: Fri, 5 Apr 2024 17:04:39 +0200 Subject: [PATCH] Implement typed_array --- CMakeLists.txt | 1 + include/sparrow/data_traits.hpp | 6 +- include/sparrow/fixed_size_layout.hpp | 5 +- include/sparrow/typed_array.hpp | 357 +++++++++++++++++ .../sparrow/variable_size_binary_layout.hpp | 2 + test/CMakeLists.txt | 1 + test/test_typed_array.cpp | 371 ++++++++++++++++++ 7 files changed, 736 insertions(+), 7 deletions(-) create mode 100644 include/sparrow/typed_array.hpp create mode 100644 test/test_typed_array.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index f16b8717..bd6d7ae7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -70,6 +70,7 @@ set(SPARROW_HEADERS ${SPARROW_INCLUDE_DIR}/sparrow/memory.hpp ${SPARROW_INCLUDE_DIR}/sparrow/mp_utils.hpp ${SPARROW_INCLUDE_DIR}/sparrow/sparrow_version.hpp + ${SPARROW_INCLUDE_DIR}/sparrow/typed_array.hpp ${SPARROW_INCLUDE_DIR}/sparrow/variable_size_binary_layout.hpp ${SPARROW_INCLUDE_DIR}/sparrow/details/3rdparty/float16_t.hpp diff --git a/include/sparrow/data_traits.hpp b/include/sparrow/data_traits.hpp index 2920d5d6..0a704979 100644 --- a/include/sparrow/data_traits.hpp +++ b/include/sparrow/data_traits.hpp @@ -14,9 +14,9 @@ #pragma once -#include "data_type.hpp" -#include "fixed_size_layout.hpp" -#include "variable_size_binary_layout.hpp" +#include "sparrow/data_type.hpp" +#include "sparrow/fixed_size_layout.hpp" +#include "sparrow/variable_size_binary_layout.hpp" namespace sparrow { diff --git a/include/sparrow/fixed_size_layout.hpp b/include/sparrow/fixed_size_layout.hpp index f3a4866e..3ae28aea 100644 --- a/include/sparrow/fixed_size_layout.hpp +++ b/include/sparrow/fixed_size_layout.hpp @@ -14,10 +14,7 @@ #pragma once -#include #include -#include -#include #include "sparrow/array_data.hpp" #include "sparrow/buffer.hpp" @@ -45,7 +42,7 @@ namespace sparrow using self_type = fixed_size_layout; using inner_value_type = T; using inner_reference = inner_value_type&; - using inner_const_reference = const inner_reference; + using inner_const_reference = const inner_value_type&; using bitmap_type = array_data::bitmap_type; using bitmap_reference = typename bitmap_type::reference; using bitmap_const_reference = typename bitmap_type::const_reference; diff --git a/include/sparrow/typed_array.hpp b/include/sparrow/typed_array.hpp new file mode 100644 index 00000000..e24ef6fa --- /dev/null +++ b/include/sparrow/typed_array.hpp @@ -0,0 +1,357 @@ +// Copyright 2024 Man Group Operations Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or mplied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include + +#include "sparrow/array_data.hpp" +#include "sparrow/data_traits.hpp" + +namespace sparrow +{ + template ::default_layout> + class typed_array + { + public: + + using layout_type = L; + + using reference = typename layout_type::reference; + using const_reference = typename layout_type::const_reference; + + using iterator = typename layout_type::iterator; + using const_iterator = typename layout_type::const_iterator; + + using size_type = typename layout_type::size_type; + using const_bitmap_range = typename layout_type::const_bitmap_range; + using const_value_range = typename layout_type::const_value_range; + + explicit typed_array(array_data data); + + // Element access + + ///@{ + /* + * @brief Access specified element with bounds checking. + * + * @description Returns a reference to the element at the specified index \p i, with bounds checking. + * If \p i is not within the range of the container, an exception of type std::out_of_range is thrown. + * + * @param i The index of the element to access. + * @return A reference to the element at the specified index. + * @throws std::out_of_range if i is out of range. + */ + reference at(size_type i); + const_reference at(size_type i) const; + ///@} + + ///@{ + /* + * @brief Access specified element. + * + * @description Returns a reference to the element at the specified index \p i. No bounds checking is + * performed. + * + * @param i The index of the element to access. + * @return A reference to the element at the specified index. + */ + reference operator[](size_type); + const_reference operator[](size_type) const; + ///@} + + ///@{ + /* + * @brief Access the first element. + * + * @description Returns a reference to the first element. + * Calling front on an empty container is undefined. + * + * @return A reference to the first element. + */ + reference front(); + const_reference front() const; + ///@} + + ///@{ + /* + * @brief Access the last element. + * + * @description Returns a reference to the last element. + * Calling back on an empty container is undefined. + * + * @return A reference to the last element. + */ + reference back(); + const_reference back() const; + ///@} + + // Iterators + + ///@{ + /* + * @brief Returns an iterator to the beginning. + * + * @description Returns an iterator to the first element of the vector. + * If the vector is empty, the returned iterator will be equal to end(). + * + * @return An iterator to the beginning. + */ + iterator begin(); + const_iterator cbegin() const; + ///@} + + ///@{ + /** + * @brief Returns an iterator to the end. + * + * @description Returns an iterator to the element following the last element of the vector. + * This element acts as a placeholder; attempting to access it results in undefined behavior. + * + * @return An iterator to the end. + */ + iterator end(); + const_iterator cend() const; + ///@} + + /* + * @brief Returns a range of the bitmap. + * + * @return A range of the bitmap. + */ + const_bitmap_range bitmap() const; + + /* + * @brief Returns a range of the values. + * + * @return A range of the values. + */ + const_value_range values() const; + + // Capacity + + /* + * @brief Checks whether the container is empty. + * + * @description Returns whether the container is empty (i.e. whether its size is 0). + * + * @return true if the container is empty, false otherwise. + */ + bool empty() const; + + /* + * @brief Returns the number of elements. + * + * @description Returns the number of elements in the container, i.e. std::distance(begin(), end()). + * + * @return The number of elements in the container. + */ + size_type size() const; + + // TODO: Add reserve, capacity, shrink_to_fit + + // Modifiers + + // TODO: Implement insert, erase, push_back, pop_back, clear, resize, swap + + std::strong_ordering operator<=>(const typed_array& other) const; + + bool operator==(const typed_array& other) const; + + bool operator!=(const typed_array& other) const; + + bool operator<(const typed_array& other) const; + + bool operator>(const typed_array& other) const; + + bool operator<=(const typed_array& other) const; + + bool operator>=(const typed_array& other) const; + + private: + + array_data m_data; + layout_type m_layout; + }; + + // Constructors + template + typed_array::typed_array(array_data data) + : m_data(std::move(data)) + , m_layout(m_data) + { + } + + // Element access + + template + auto typed_array::at(size_type i) -> reference + { + if (i >= size()) + { + throw std::out_of_range( + "typed_array::at: index out of range for array of size " + std::to_string(size()) + + " at index " + std::to_string(i) + ); + } + return m_layout[i]; + } + + template + auto typed_array::at(size_type i) const -> const_reference + { + if (i >= size()) + { + throw std::out_of_range( + "typed_array::at: index out of range for array of size " + std::to_string(size()) + + " at index " + std::to_string(i) + ); + } + return m_layout[i]; + } + + template + auto typed_array::operator[](size_type i) -> reference + { + return m_layout[i]; + } + + template + auto typed_array::operator[](size_type i) const -> const_reference + { + return m_layout[i]; + } + + template + auto typed_array::front() -> reference + { + return m_layout[0]; + } + + template + auto typed_array::front() const -> const_reference + { + return m_layout[0]; + } + + template + auto typed_array::back() -> reference + { + return m_layout[size() - 1]; + } + + template + auto typed_array::back() const -> const_reference + { + return m_layout[size() - 1]; + } + + // Iterators + + template + auto typed_array::begin() -> iterator + { + return m_layout.begin(); + } + + template + auto typed_array::end() -> iterator + { + return m_layout.end(); + } + + template + auto typed_array::cbegin() const -> const_iterator + { + return m_layout.cbegin(); + } + + template + auto typed_array::cend() const -> const_iterator + { + return m_layout.cend(); + } + + template + auto typed_array::bitmap() const -> const_bitmap_range + { + return m_layout.bitmap(); + } + + template + auto typed_array::values() const -> const_value_range + { + return m_layout.values(); + } + + // Capacity + + template + bool typed_array::empty() const + { + return m_layout.size() == 0; + } + + template + auto typed_array::size() const -> size_type + { + return m_layout.size(); + } + + // Comparators + + template + auto typed_array::operator<=>(const typed_array& other) const -> std::strong_ordering + { + return std::lexicographical_compare_three_way(cbegin(), cend(), other.cbegin(), other.cend()); + } + + template + bool typed_array::operator==(const typed_array& other) const + { + return std::equal(cbegin(), cend(), other.cbegin(), other.cend()); + } + + template + bool typed_array::operator!=(const typed_array& other) const + { + return !(*this == other); + } + + template + bool typed_array::operator<(const typed_array& other) const + { + return std::lexicographical_compare(cbegin(), cend(), other.cbegin(), other.cend()); + } + + template + bool typed_array::operator>(const typed_array& other) const + { + return other < *this; + } + + template + bool typed_array::operator<=(const typed_array& other) const + { + return !(other < *this); + } + + template + bool typed_array::operator>=(const typed_array& other) const + { + return !(*this < other); + } + +} // namespace sparrow diff --git a/include/sparrow/variable_size_binary_layout.hpp b/include/sparrow/variable_size_binary_layout.hpp index 5c257dfc..a1806600 100644 --- a/include/sparrow/variable_size_binary_layout.hpp +++ b/include/sparrow/variable_size_binary_layout.hpp @@ -110,6 +110,7 @@ namespace sparrow using bitmap_type = array_data::bitmap_type; using bitmap_const_reference = typename bitmap_type::const_reference; using value_type = std::optional; + using reference = const_reference_proxy; using const_reference = const_reference_proxy; using size_type = std::size_t; using iterator_tag = std::contiguous_iterator_tag; @@ -127,6 +128,7 @@ namespace sparrow using const_value_iterator = vs_binary_value_iterator; using const_bitmap_iterator = array_data::bitmap_type::const_iterator; + using iterator = layout_iterator; using const_iterator = layout_iterator; // // TODO: required by layout_iterator, replace them with the right types diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index d7182ba9..a16ea2ca 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -45,6 +45,7 @@ set(SPARROW_TESTS_SOURCES test_mpl.cpp test_traits.cpp test_memory.cpp + test_typed_array.cpp ) set(test_target "test_sparrow_lib") add_executable(${test_target} ${SPARROW_TESTS_SOURCES}) diff --git a/test/test_typed_array.cpp b/test/test_typed_array.cpp new file mode 100644 index 00000000..bec389e2 --- /dev/null +++ b/test/test_typed_array.cpp @@ -0,0 +1,371 @@ +// Copyright 2024 Man Group Operations Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or mplied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include // For Doctest +#include +#include + +#include "sparrow/typed_array.hpp" + +#include "doctest/doctest.h" + +using namespace sparrow; + +namespace +{ + template + array_data make_test_array_data(size_t n = 10, size_t offset = 0) + { + array_data ad; + ad.type = data_descriptor(arrow_traits::type_id); + ad.bitmap = dynamic_bitset(n, true); + if (n >= 10) + { + ad.bitmap.set(9, false); + } + size_t buffer_size = (n * sizeof(T)) / sizeof(uint8_t); + buffer b(buffer_size); + if constexpr (std::is_same_v) + { + std::fill(b.data(), b.data() + n, 0x01); + if (n > 0) + { + b.data()[0] = 0x00; + } + } + else + { + std::iota(b.data(), b.data() + n, static_cast(0)); + } + + ad.buffers.push_back(b); + ad.length = n; + ad.offset = offset; + ad.child_data.emplace_back(); + return ad; + } + + template <> + array_data make_test_array_data(size_t n, size_t offset) + { + std::vector words; + for (size_t i = 0; i < n; ++i) + { + words.push_back(std::to_string(i)); + } + array_data ad; + ad.bitmap.resize(n); + ad.buffers.resize(2); + ad.buffers[0].resize(sizeof(std::int64_t) * (n + 1)); + ad.buffers[1].resize(std::accumulate( + words.begin(), + words.end(), + size_t(0), + [](std::size_t res, const auto& s) + { + return res + s.size(); + } + )); + ad.buffers[0].data()[0] = 0u; + auto iter = ad.buffers[1].begin(); + const auto offset_func = [&ad]() + { + return ad.buffers[0].data(); + }; + for (size_t i = 0; i < words.size(); ++i) + { + offset_func()[i + 1] = offset_func()[i] + words[i].size(); + std::ranges::copy(words[i], iter); + iter += words[i].size(); + ad.bitmap.set(i, true); + } + if (n > 0) + { + ad.bitmap.set(n - 1, false); + } + + ad.length = n; + ad.offset = offset; + return ad; + } + + template + typed_array make_typed_array_type() + { + auto array_data = make_test_array_data(10, 1); + return typed_array(std::move(array_data)); + } + + template + O to_value_type(I i) + { + if constexpr (std::is_arithmetic_v) + { + return static_cast(i); + } + else if constexpr (std::is_same_v) + { + return std::to_string(i); + } + } + +} + +TEST_SUITE("typed_array") +{ + TEST_CASE_TEMPLATE_DEFINE("scalar", T, scalar) + { + // Element access + + SUBCASE("constructor with parameter") + { + const auto array_data = make_test_array_data(10, 1); + typed_array typed_array(array_data); + CHECK_EQ(typed_array.size(), 9); + } + + SUBCASE("at") + { + const auto array_data = make_test_array_data(10, 1); + typed_array typed_array(array_data); + for (typename ::typed_array::size_type i = 0; i < typed_array.size() - 1; ++i) + { + CHECK_EQ(typed_array.at(i).value(), to_value_type(i + 1)); + } + CHECK_FALSE(typed_array.at(typed_array.size() - 1).has_value()); + + CHECK_THROWS_AS(typed_array.at(typed_array.size()), std::out_of_range); + } + + SUBCASE("const at") + { + const auto array_data = make_test_array_data(10, 1); + const typed_array typed_array(array_data); + for (typename ::typed_array::size_type i = 0; i < typed_array.size() - 1; ++i) + { + CHECK_EQ(typed_array.at(i).value(), to_value_type(i + 1)); + } + CHECK_FALSE(typed_array.at(typed_array.size() - 1).has_value()); + + CHECK_THROWS_AS(typed_array.at(typed_array.size()), std::out_of_range); + } + + SUBCASE("operator[]") + { + const auto array_data = make_test_array_data(10, 1); + typed_array typed_array(array_data); + for (typename ::typed_array::size_type i = 0; i < typed_array.size() - 1; ++i) + { + CHECK_EQ(typed_array[i].value(), to_value_type(i + 1)); + } + CHECK_FALSE(typed_array[typed_array.size() - 1].has_value()); + } + + SUBCASE("const operator[]") + { + const auto array_data = make_test_array_data(10, 1); + const typed_array typed_array(array_data); + for (typename ::typed_array::size_type i = 0; i < typed_array.size() - 1; ++i) + { + CHECK_EQ(typed_array[i].value(), to_value_type(i + 1)); + } + CHECK_FALSE(typed_array[typed_array.size() - 1].has_value()); + } + + SUBCASE("front") + { + const auto array_data = make_test_array_data(10, 1); + typed_array typed_array(array_data); + CHECK_EQ(typed_array.front().value(), to_value_type(1)); + } + + SUBCASE("const front") + { + const auto array_data = make_test_array_data(10, 1); + const typed_array typed_array(array_data); + CHECK_EQ(typed_array.front().value(), to_value_type(1)); + } + + SUBCASE("back") + { + const auto array_data = make_test_array_data(10, 1); + typed_array typed_array(array_data); + CHECK_FALSE(typed_array.back().has_value()); + } + + SUBCASE("const back") + { + const auto array_data = make_test_array_data(10, 1); + const typed_array typed_array(array_data); + CHECK_FALSE(typed_array.back().has_value()); + } + + // Iterators + + // SUBCASE("const iterators") + // { + // const auto array_data = make_test_array_data(10, 1); + // const typed_array typed_array(array_data); + + // auto iter = typed_array.cbegin(); + // const auto end = typed_array.cend(); + + // for (typename ::typed_array::size_type i = 0; i != typed_array.size() - 1; ++iter, ++i) + // { + // REQUIRE(iter->has_value()); + // CHECK_EQ(*iter, std::make_optional(typed_array[i].value())); + // } + + // CHECK_EQ(++iter, end); + + // const auto array_data_empty = make_test_array_data(0, 0); + // const ::typed_array typed_array_empty(array_data_empty); + // CHECK_EQ(typed_array_empty.cbegin(), typed_array_empty.cend()); + // } + + SUBCASE("bitmap") + { + const auto array_data = make_test_array_data(10, 1); + const typed_array typed_array(array_data); + const auto bitmap = typed_array.bitmap(); + CHECK_EQ(bitmap.size(), 9); + for (size_t i = 0; i < bitmap.size() - 1; ++i) + { + CHECK(bitmap[i]); + } + CHECK_FALSE(bitmap[8]); + } + + SUBCASE("values") + { + const auto array_data = make_test_array_data(10, 1); + const typed_array typed_array(array_data); + const auto values = typed_array.values(); + CHECK_EQ(values.size(), 9); + for (size_t i = 0; i < values.size(); ++i) + { + CHECK_EQ(values[i], to_value_type(i + 1)); + } + } + + // Capacity + + SUBCASE("empty") + { + const auto array_data = make_test_array_data(10, 1); + typed_array typed_array(array_data); + CHECK_FALSE(typed_array.empty()); + + const auto array_data_empty = make_test_array_data(0, 0); + const ::typed_array typed_array_empty(array_data_empty); + } + + SUBCASE("size") + { + const auto array_data = make_test_array_data(10, 1); + typed_array typed_array(array_data); + CHECK_EQ(typed_array.size(), 9); + } + + // Operators + + SUBCASE("<=>") + { + const auto array_data = make_test_array_data(10, 1); + const ::typed_array typed_array(array_data); + const ::typed_array typed_array_copy(array_data); + CHECK_EQ(typed_array <=> typed_array, std::strong_ordering::equal); + + const auto array_data_less = make_test_array_data(9, 0); + const ::typed_array typed_array_less(array_data_less); + CHECK_EQ(typed_array <=> typed_array_less, std::strong_ordering::greater); + CHECK_EQ(typed_array_less <=> typed_array, std::strong_ordering::less); + } + + SUBCASE("==") + { + const auto array_data = make_test_array_data(10, 1); + const ::typed_array typed_array(array_data); + const ::typed_array typed_array_copy(array_data); + CHECK(typed_array == typed_array); + CHECK(typed_array == typed_array_copy); + + const auto array_data_less = make_test_array_data(9, 0); + const ::typed_array typed_array_less(array_data_less); + CHECK_FALSE(typed_array == typed_array_less); + CHECK_FALSE(typed_array_less == typed_array); + } + + SUBCASE("!=") + { + const auto array_data = make_test_array_data(10, 1); + const ::typed_array typed_array(array_data); + const ::typed_array typed_array_copy(array_data); + CHECK_FALSE(typed_array != typed_array); + CHECK_FALSE(typed_array != typed_array_copy); + + const auto array_data_less = make_test_array_data(9, 0); + const ::typed_array typed_array_less(array_data_less); + CHECK(typed_array != typed_array_less); + CHECK(typed_array_less != typed_array); + } + + SUBCASE("<") + { + const auto array_data = make_test_array_data(10, 1); + const ::typed_array typed_array(array_data); + const ::typed_array typed_array_copy(array_data); + CHECK_FALSE(typed_array < typed_array); + CHECK_FALSE(typed_array < typed_array_copy); + + const auto array_data_less = make_test_array_data(9, 0); + const ::typed_array typed_array_less(array_data_less); + CHECK_FALSE(typed_array < typed_array_less); + CHECK(typed_array_less < typed_array); + } + + SUBCASE(">") + { + const auto array_data = make_test_array_data(10, 1); + const ::typed_array typed_array(array_data); + const ::typed_array typed_array_copy(array_data); + CHECK_FALSE(typed_array > typed_array); + CHECK_FALSE(typed_array > typed_array_copy); + + const auto array_data_less = make_test_array_data(9, 0); + const ::typed_array typed_array_less(array_data_less); + CHECK(typed_array > typed_array_less); + CHECK_FALSE(typed_array_less > typed_array); + } + } + + TEST_CASE_TEMPLATE_INVOKE( + scalar, + bool, + std::uint8_t, + std::int8_t, + std::uint16_t, + std::int16_t, + std::uint32_t, + std::int32_t, + std::uint64_t, + std::int64_t, + std::string + // float16_t + // float32_t, + // float64_t + ); +}