From 88732c3cb2228696ccbf71a74bde3cd2b3520c8f Mon Sep 17 00:00:00 2001 From: Johan Mabille Date: Tue, 12 Mar 2024 15:44:45 +0100 Subject: [PATCH] Added data_type and array_data structures --- CMakeLists.txt | 2 + include/sparrow/array_data.hpp | 43 +++++++++++++++++++ include/sparrow/data_type.hpp | 67 ++++++++++++++++++++++++++++++ include/sparrow/dynamic_bitset.hpp | 1 + 4 files changed, 113 insertions(+) create mode 100644 include/sparrow/array_data.hpp create mode 100644 include/sparrow/data_type.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index d2dedf3d..2c2796d8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -50,7 +50,9 @@ OPTION(BUILD_TESTS "sparrow test suite" OFF) # ===== set(SPARROW_HEADERS + ${SPARROW_INCLUDE_DIR}/sparrow/array_data.hpp ${SPARROW_INCLUDE_DIR}/sparrow/buffer.hpp + ${SPARROW_INCLUDE_DIR}/sparrow/data_type.hpp ${SPARROW_INCLUDE_DIR}/sparrow/dynamic_bitset.hpp ${SPARROW_INCLUDE_DIR}/sparrow/iterator.hpp ${SPARROW_INCLUDE_DIR}/sparrow/sparrow_version.hpp diff --git a/include/sparrow/array_data.hpp b/include/sparrow/array_data.hpp new file mode 100644 index 00000000..141e58fd --- /dev/null +++ b/include/sparrow/array_data.hpp @@ -0,0 +1,43 @@ +// Copyright 2024 Man Group Operations Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "sparrow/buffer.hpp" +#include "sparrow/data_type.hpp" +#include "sparrow/dynamic_bitset.hpp" + +namespace sparrow +{ + struct array_data + { + using block_type = std::uint8_t; + using bitmap_type = dynamic_bitset; + using buffer_type = buffer; + + data_descriptor type; + std::int64_t length = 0; + std::int64_t offset = 0; + // bitmap buffer and null_count + bitmap_type bitmap; + // Other buffers + std::vector buffers; + std::vector child_data; + }; + + struct null_type + { + }; + constexpr null_type null; +} diff --git a/include/sparrow/data_type.hpp b/include/sparrow/data_type.hpp new file mode 100644 index 00000000..8565ee06 --- /dev/null +++ b/include/sparrow/data_type.hpp @@ -0,0 +1,67 @@ +// Copyright 2024 Man Group Operations Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +namespace sparrow +{ + // TODO: does not support all types specified by the Arrow specification + // yet + enum class data_type + { + NA = 0, + BOOL, + UINT8, + INT8, + UINT16, + INT16, + UINT32, + INT32, + UINT64, + INT64, + HALF_FLOAT, + FLOAT, + DOUBLE, + // UTF8 variable-length string + STRING, + // Variable-length bytes (no guarantee of UTF8-ness) + BINARY, + // Fixed-size binary. Each value occupies the same number of bytes + FIXED_SIZE_BINARY + }; + + // For now, a tiny wrapper around data_type + // More data and functions to come + class data_descriptor + { + public: + + constexpr data_descriptor() + : data_descriptor(data_type::UINT8) + { + } + + constexpr explicit data_descriptor(data_type id) + : m_id(id) + { + } + + constexpr data_type id() const { return m_id; } + + private: + + data_type m_id; + }; +} + diff --git a/include/sparrow/dynamic_bitset.hpp b/include/sparrow/dynamic_bitset.hpp index 7479059b..9bc0615b 100644 --- a/include/sparrow/dynamic_bitset.hpp +++ b/include/sparrow/dynamic_bitset.hpp @@ -1,3 +1,4 @@ +// Copyright 2024 Man Group Operations Limited // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License.