Skip to content

Commit

Permalink
Add resizing method in primitive array
Browse files Browse the repository at this point in the history
  • Loading branch information
Alex-PLACET committed Oct 21, 2024
1 parent 68335e5 commit 0153cf3
Show file tree
Hide file tree
Showing 16 changed files with 1,761 additions and 248 deletions.
10 changes: 7 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -120,10 +120,15 @@ set(SPARROW_HEADERS
${SPARROW_INCLUDE_DIR}/sparrow/arrow_interface/arrow_schema/smart_pointers.hpp
# buffer
${SPARROW_INCLUDE_DIR}/sparrow/buffer/allocator.hpp
${SPARROW_INCLUDE_DIR}/sparrow/buffer/buffer.hpp
${SPARROW_INCLUDE_DIR}/sparrow/buffer/buffer_adaptor.hpp
${SPARROW_INCLUDE_DIR}/sparrow/buffer/buffer_view.hpp
${SPARROW_INCLUDE_DIR}/sparrow/buffer/buffer.hpp
${SPARROW_INCLUDE_DIR}/sparrow/buffer/dynamic_bitset.hpp
${SPARROW_INCLUDE_DIR}/sparrow/buffer/dynamic_bitset/dynamic_bitset_base.hpp
${SPARROW_INCLUDE_DIR}/sparrow/buffer/dynamic_bitset/bitset_iterator.hpp
${SPARROW_INCLUDE_DIR}/sparrow/buffer/dynamic_bitset/bitset_reference.hpp
${SPARROW_INCLUDE_DIR}/sparrow/buffer/dynamic_bitset/dynamic_bitset_view.hpp
${SPARROW_INCLUDE_DIR}/sparrow/buffer/dynamic_bitset/dynamic_bitset.hpp
# config
${SPARROW_INCLUDE_DIR}/sparrow/config/config.hpp
${SPARROW_INCLUDE_DIR}/sparrow/config/sparrow_version.hpp
Expand Down Expand Up @@ -178,8 +183,7 @@ set(SPARROW_SRC
${SPARROW_SOURCE_DIR}/arrow_interface/arrow_schema.cpp
${SPARROW_SOURCE_DIR}/list_value.cpp
${SPARROW_SOURCE_DIR}/run_encoded_array.cpp
${SPARROW_SOURCE_DIR}/struct_value.cpp
)
${SPARROW_SOURCE_DIR}/struct_value.cpp)

add_library(sparrow SHARED ${SPARROW_HEADERS} ${SPARROW_SRC})
# TODO: handle static lib, so name and versionning
Expand Down
163 changes: 153 additions & 10 deletions include/sparrow/arrow_array_schema_proxy.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,15 @@
#include <string_view>

#include "sparrow/arrow_interface/arrow_array/private_data.hpp"
#include "sparrow/arrow_interface/arrow_array_schema_info_utils.hpp"
#include "sparrow/arrow_interface/arrow_schema/private_data.hpp"
#include "sparrow/buffer/buffer_view.hpp"
#include "sparrow/buffer/dynamic_bitset/non_owning_dynamic_bitset.hpp"
#include "sparrow/c_interface.hpp"
#include "sparrow/config/config.hpp"
#include "sparrow/types/data_type.hpp"


namespace sparrow
{
/**
Expand Down Expand Up @@ -122,20 +125,22 @@ namespace sparrow
[[nodiscard]] SPARROW_API size_t length() const;

/**
* Set the length of the `ArrowArray`.
* Set the length of the `ArrowArray`. This method does not resize the buffers of the `ArrowArray`.
* You have to change the length before replacing/resizing the buffers to have the right sizes when
* calling `buffers()`.
* @exception `arrow_proxy_exception` If the `ArrowArray` was not created with sparrow.
* @param length The length to set.
*/
SPARROW_API void set_length(size_t length);
[[nodiscard]] SPARROW_API int64_t null_count() const;

/**
* Set the null count of the `ArrowArray`.
* Set the null count of the `ArrowArray`. This method does not change the bitmap.
* @exception `arrow_proxy_exception` If the `ArrowArray` was not created with sparrow.
* @param null_count The null count to set.
*/
SPARROW_API void set_null_count(int64_t null_count);
[[nodiscard]] SPARROW_API size_t offset() const;
[[nodiscard]] SPARROW_API size_t offset() const;

/**
* Set the offset of the `ArrowArray`.
Expand All @@ -146,7 +151,8 @@ namespace sparrow
[[nodiscard]] SPARROW_API size_t n_buffers() const;

/**
* Set the number of buffers of the `ArrowArray`.
* Set the number of buffers of the `ArrowArray`. Resize the buffers vector of the `ArrowArray`
* private data.
* @exception `arrow_proxy_exception` If the `ArrowArray` was not created with sparrow.
* @param n_buffers The number of buffers to set.
*/
Expand All @@ -156,21 +162,126 @@ namespace sparrow
[[nodiscard]] SPARROW_API std::vector<sparrow::buffer_view<uint8_t>>& buffers();

/**
* Set the buffer at the given index.
* Set the buffer at the given index. You have to call the `set_length` method before calling this
* method to have the right sizes when calling `buffers()`.
* @exception `arrow_proxy_exception` If the `ArrowArray` was not created with sparrow.
* @param index The index of the buffer to set.
* @param buffer The buffer to set.
*/
SPARROW_API void set_buffer(size_t index, const buffer_view<uint8_t>& buffer);

/**
* Set the buffer at the given index.
* Set the buffer at the given index. You have to call the `set_length` method before calling this
* method to have the right sizes when calling `buffers()`.
* @exception `arrow_proxy_exception` If the `ArrowArray` was not created with sparrow.
* @param index The index of the buffer to set.
* @param buffer The buffer to set.
*/
SPARROW_API void set_buffer(size_t index, buffer<uint8_t>&& buffer);

/**
* Resize the bitmap buffer of the `ArrowArray`.
* @exception `arrow_proxy_exception` If the `ArrowArray` was not created with sparrow.
* @exception `arrow_proxy_exception` If the array format does not support a validity bitmap.
* @param new_size The new size of the bitmap buffer.
*/
SPARROW_API void resize_bitmap(size_t new_size);

/**
* Insert a value in the bitmap buffer at the given index.
* @exception `arrow_proxy_exception` If the `ArrowArray` was not created with sparrow.
* @exception `arrow_proxy_exception` If the array format does not support a validity bitmap.
* @exception `std::out_of_range` If the index is greater than the length of the bitmap.
* @param index The index where to insert the value. Must be less than the length of the bitmap.
* @param value The value to insert.
* @return The index of the inserted value.
*/
SPARROW_API size_t insert_bitmap(size_t index, bool value);

/**
* Insert several element of the same value in the bitmap buffer at the given index.
* @exception `arrow_proxy_exception` If the `ArrowArray` was not created with sparrow.
* @exception `arrow_proxy_exception` If the array format does not support a validity bitmap.
* @exception `std::out_of_range` If the index is greater than the length of the bitmap.
* @param index The index where to insert the value. Must be less than the length of the bitmap.
* @param value The value to insert.
* @param count The number of times to insert the value.
* @return The index of the first inserted value.
*/
SPARROW_API size_t insert_bitmap(size_t index, bool value, size_t count);

/**
* Insert several elements in the bitmap buffer at the given index.
* @exception `arrow_proxy_exception` If the `ArrowArray` was not created with sparrow.
* @exception `arrow_proxy_exception` If the array format does not support a validity bitmap.
* @exception `std::out_of_range` If the index is greater than the length of the bitmap.
* @param index The index where to insert the values. Must be less than the length of the bitmap.
* @param values The values to insert.
* @return The index of the first inserted value.
*/
SPARROW_API size_t insert_bitmap(size_t index, std::initializer_list<bool> values);

/**
* Insert several elements in the bitmap buffer at the given index.
* @exception `arrow_proxy_exception` If the `ArrowArray` was not created with sparrow.
* @exception `arrow_proxy_exception` If the array format does not support a validity bitmap.
* @exception `std::out_of_range` If the index is greater than the length of the bitmap.
* @param index The index where to insert the values. Must be less than the length of the bitmap.
* @param first The beginning of the range of values to insert.
* @param last The end of the range of values to insert.
* @return The index of the first inserted value.
*/
template <std::input_iterator InputIt>
size_t insert_bitmap(size_t index, InputIt first, InputIt last);

/**
* Insert several elements in the bitmap buffer at the given index.
* @exception `arrow_proxy_exception` If the `ArrowArray` was not created with sparrow.
* @exception `arrow_proxy_exception` If the array format does not support a validity bitmap.
* @exception `std::out_of_range` If the index is greater than the length of the bitmap.
* @param index The index where to insert the values. Must be less than the length of the bitmap.
* @param range The range of values to insert.
* @return The index of the first inserted value.
*/
template <std::ranges::input_range R>
size_t insert_bitmap(size_t index, const R& range);

/**
* Erase a value in the bitmap buffer at the given index.
* @exception `arrow_proxy_exception` If the `ArrowArray` was not created with sparrow.
* @exception `arrow_proxy_exception` If the array format does not support a validity bitmap.
* @exception `std::out_of_range` If the index is greater than the length of the bitmap.
* @param index The index of the element to erase. Must be less than the length of the bitmap.
* @return The index of the erased value.
*/
SPARROW_API size_t erase_bitmap(size_t index);

/**
* Erase several elements in the bitmap buffer at the given index.
* @exception `arrow_proxy_exception` If the `ArrowArray` was not created with sparrow.
* @exception `arrow_proxy_exception` If the array format does not support a validity bitmap.
* @exception `std::out_of_range` If the index is greater than the length of the bitmap.
* @param index The index of the first value to erase. Must be less than the length of the bitmap.
* @param count The number of elements to erase.
* @return The index of the first erased value.
*/
SPARROW_API size_t erase_bitmap(size_t index, size_t count);

/**
* Push a value at the end of the bitmap buffer.
* @exception `arrow_proxy_exception` If the `ArrowArray` was not created with sparrow.
* @exception `arrow_proxy_exception` If the array format does not support a validity bitmap.
* @param value The value to push.
*/
SPARROW_API void push_back_bitmap(bool value);

/**
* Pop a value at the end of the bitmap buffer.
* @exception `arrow_proxy_exception` If the `ArrowArray` was not created with sparrow.
* @exception `arrow_proxy_exception` If the array format does not support a validity bitmap.
*/
SPARROW_API void pop_back_bitmap();

/**
* Add children.
* @exception `arrow_proxy_exception` If the `ArrowArray` or `ArrowSchema` were not created with
Expand Down Expand Up @@ -235,6 +346,11 @@ namespace sparrow
[[nodiscard]] SPARROW_API ArrowSchema& schema();
[[nodiscard]] SPARROW_API const ArrowSchema& schema() const;

[[nodiscard]] [[nodiscard]]SPARROW_API arrow_schema_private_data* get_schema_private_data();
[[nodiscard]] SPARROW_API arrow_array_private_data* get_array_private_data();

SPARROW_API void update_buffers();

private:

std::variant<ArrowArray*, ArrowArray> m_array;
Expand All @@ -254,7 +370,8 @@ namespace sparrow

SPARROW_API void resize_children(size_t children_count);

void update_buffers();
[[nodiscard]] SPARROW_API non_owning_dynamic_bitset<uint8_t> get_non_owning_dynamic_bitset();

void update_children();
void update_dictionary();
void update_null_count();
Expand All @@ -265,13 +382,12 @@ namespace sparrow

void validate_array_and_schema() const;

arrow_schema_private_data* get_schema_private_data();
arrow_array_private_data* get_array_private_data();

[[nodiscard]] bool is_arrow_array_valid() const;
[[nodiscard]] bool is_arrow_schema_valid() const;
[[nodiscard]] bool is_proxy_valid() const;

[[nodiscard]] size_t get_null_count() const;

void swap(arrow_proxy& other) noexcept;
};

Expand All @@ -298,4 +414,31 @@ namespace sparrow
);
}
}

template <std::input_iterator InputIt>
inline size_t arrow_proxy::insert_bitmap(size_t index, InputIt first, InputIt last)
{
if (!is_created_with_sparrow())
{
throw arrow_proxy_exception("Cannot modify the bitmap on non-sparrow created ArrowArray");
}
SPARROW_ASSERT_TRUE(has_bitmap(data_type()))
SPARROW_ASSERT_TRUE(first <= last)
SPARROW_ASSERT_TRUE(index <= length())

auto bitmap = get_non_owning_dynamic_bitset();
const auto it = bitmap.insert(sparrow::next(bitmap.cbegin(), index), first, last);
return static_cast<size_t>(std::distance(bitmap.begin(), it));
}

template <std::ranges::input_range R>
inline size_t arrow_proxy::insert_bitmap(size_t index, const R& range)
{
if (!is_created_with_sparrow())
{
throw arrow_proxy_exception("Cannot modify the bitmap on non-sparrow created ArrowArray");
}
SPARROW_ASSERT_TRUE(has_bitmap(data_type()))
return insert_bitmap(index, std::ranges::begin(range), std::ranges::end(range));
}
}
40 changes: 40 additions & 0 deletions include/sparrow/arrow_array_schema_proxy_factory.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
// Copyright 2024 Man Group Operations Limited
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include "sparrow/arrow_array_schema_proxy.hpp"
#include "sparrow/arrow_interface/arrow_array_schema_factory.hpp"
#include "sparrow/types/data_traits.hpp"

namespace sparrow
{
template <std::ranges::sized_range Values, std::ranges::sized_range Nulls>
requires std::is_arithmetic_v<std::ranges::range_value_t<Values>>
&& std::integral<std::ranges::range_value_t<Nulls>>
arrow_proxy make_primitive_arrow_proxy(
Values&& values,
Nulls&& nulls,
int64_t offset,
std::string_view name,
std::optional<std::string_view> metadata
)
{
using ValueType = std::ranges::range_value_t<Values>;
return arrow_proxy{
make_primitive_arrow_array(std::forward<Values>(values), std::forward<Nulls>(nulls), offset),
make_primitive_arrow_schema(arrow_traits<ValueType>::type_id, name, metadata, std::nullopt)
};
}
}
19 changes: 12 additions & 7 deletions include/sparrow/arrow_interface/arrow_array/private_data.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,12 @@ namespace sparrow

[[nodiscard]] constexpr BufferType& buffers() noexcept;
[[nodiscard]] constexpr const BufferType& buffers() const noexcept;

constexpr void resize_buffers(std::size_t size);
void set_buffer(std::size_t index, buffer<std::uint8_t>&& buffer);
void set_buffer(std::size_t index, const buffer_view<std::uint8_t>& buffer);
constexpr void resize_buffer(std::size_t index, std::size_t size, std::uint8_t value);
constexpr void update_buffers_ptrs();

template <class T>
[[nodiscard]] constexpr const T** buffers_ptrs() noexcept;
Expand All @@ -62,8 +63,7 @@ namespace sparrow
{
}

[[nodiscard]] constexpr std::vector<buffer<std::uint8_t>>&
arrow_array_private_data::buffers() noexcept
[[nodiscard]] constexpr std::vector<buffer<std::uint8_t>>& arrow_array_private_data::buffers() noexcept
{
return m_buffers;
}
Expand All @@ -77,34 +77,39 @@ namespace sparrow
constexpr void arrow_array_private_data::resize_buffers(std::size_t size)
{
m_buffers.resize(size);
m_buffers_pointers = to_raw_ptr_vec<std::uint8_t>(m_buffers);
update_buffers_ptrs();
}

inline void arrow_array_private_data::set_buffer(std::size_t index, buffer<std::uint8_t>&& buffer)
{
SPARROW_ASSERT_TRUE(index < m_buffers.size());
m_buffers[index] = std::move(buffer);
m_buffers_pointers[index] = m_buffers[index].data();
update_buffers_ptrs();
}

inline void arrow_array_private_data::set_buffer(std::size_t index, const buffer_view<std::uint8_t>& buffer)
{
SPARROW_ASSERT_TRUE(index < m_buffers.size());
m_buffers[index] = buffer;
m_buffers_pointers[index] = m_buffers[index].data();
update_buffers_ptrs();
}

constexpr void
arrow_array_private_data::resize_buffer(std::size_t index, std::size_t size, std::uint8_t value)
{
SPARROW_ASSERT_TRUE(index < m_buffers.size());
m_buffers[index].resize(size, value);
m_buffers_pointers[index] = m_buffers[index].data();
update_buffers_ptrs();
}

template <class T>
[[nodiscard]] constexpr const T** arrow_array_private_data::buffers_ptrs() noexcept
{
return const_cast<const T**>(reinterpret_cast<T**>(m_buffers_pointers.data()));
}

constexpr void arrow_array_private_data::update_buffers_ptrs()
{
m_buffers_pointers = to_raw_ptr_vec<std::uint8_t>(m_buffers);
}
}
Loading

0 comments on commit 0153cf3

Please sign in to comment.