From d32a07a1fed8b0b0e7235854f75c4daacc14b290 Mon Sep 17 00:00:00 2001 From: Maximilian Kleinert Date: Wed, 17 Jan 2024 22:03:04 +0100 Subject: [PATCH] Added documentation Installing pyarrow in ci workflow Split Table, RecordBatch and Field/Schema bindings into separate headers --- .github/workflows/ci.yml | 5 ++ cmake/nanobind-config.cmake | 4 +- docs/index.rst | 1 + docs/pyarrow.rst | 76 +++++++++++++++++++ include/nanobind/pyarrow/record_batch.h | 28 +++++++ include/nanobind/pyarrow/table.h | 28 +++++++ .../nanobind/pyarrow/{tabular.h => type.h} | 19 +---- tests/test_pyarrow.cpp | 4 +- 8 files changed, 145 insertions(+), 20 deletions(-) create mode 100644 docs/pyarrow.rst create mode 100644 include/nanobind/pyarrow/record_batch.h create mode 100644 include/nanobind/pyarrow/table.h rename include/nanobind/pyarrow/{tabular.h => type.h} (55%) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index bc168616..8e8a5c40 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -53,6 +53,11 @@ jobs: run: | python -m pip install numpy scipy + - name: Install pyarrow + if: ${{ !startsWith(matrix.python, 'pypy') && !contains(matrix.python, 'alpha') }} + run: | + python -m pip install pyarrow + - name: Configure run: > cmake -S . -B build -DNB_TEST_STABLE_ABI=ON -DNB_TEST_SHARED_BUILD="$(python3 -c 'import sys; print(int(sys.version_info.minor>=11))')" diff --git a/cmake/nanobind-config.cmake b/cmake/nanobind-config.cmake index 8b7356ba..8f00c101 100644 --- a/cmake/nanobind-config.cmake +++ b/cmake/nanobind-config.cmake @@ -159,10 +159,12 @@ function (nanobind_build_library TARGET_NAME) ${NB_DIR}/include/nanobind/pyarrow/chunked_array.h ${NB_DIR}/include/nanobind/pyarrow/datatype.h ${NB_DIR}/include/nanobind/pyarrow/pyarrow_import.h + ${NB_DIR}/include/nanobind/pyarrow/record_batch.h ${NB_DIR}/include/nanobind/pyarrow/scalar.h ${NB_DIR}/include/nanobind/pyarrow/sparse_tensor.h - ${NB_DIR}/include/nanobind/pyarrow/tabular.h + ${NB_DIR}/include/nanobind/pyarrow/table.h ${NB_DIR}/include/nanobind/pyarrow/tensor.h + ${NB_DIR}/include/nanobind/pyarrow/type.h ${NB_DIR}/src/buffer.h ${NB_DIR}/src/hash.h ${NB_DIR}/src/nb_internals.h diff --git a/docs/index.rst b/docs/index.rst index ed27f366..6e03e9ae 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -122,6 +122,7 @@ The nanobind logo was designed by `AndoTwin Studio classes exceptions ndarray_index + pyarrow packaging utilities diff --git a/docs/pyarrow.rst b/docs/pyarrow.rst new file mode 100644 index 00000000..1f27d0b6 --- /dev/null +++ b/docs/pyarrow.rst @@ -0,0 +1,76 @@ +.. _pyarrow: + +PyArrow Bindings +================ + +nanobind can exchange ``pyarrow`` objects via a ``std::shared_ptr<..>``. To get started you have to + +.. code-block:: cpp + + #include + +and make sure to call the following `pyarrow initialization `__ on top of your module definition + +.. code-block:: cpp + + NB_MODULE(test_pyarrow_ext, m) { + static nanobind::detail::pyarrow::ImportPyarrow module; + // ... + } + +The type caster headers are structured in a similar form than the headers in ``pyarrow`` (``array_primitive.h``, ``array_binary.h``, etc) itself: + +.. list-table:: + :widths: 42 48 + :header-rows: 1 + + * - Types + - Type caster header + * - ``Array``, ``DoubleArray``, ``Int64Array``, ... + - ``#include `` + * - ``BinaryArray``, ``LargeBinaryArray``, ``StringArray``, ``LargeStringArray``, ``FixedSizeBinaryArray`` + - ``#include `` + * - ``ListArray``, ``LargeListArray``, ``MapArray``, ``FixedSizeListArray``, ``StructArray``, ``UnionArray``, ``SparseUnionArray``, ``DenseUnionArray`` + - ``#include `` + * - ``ChunkedArray`` + - ``#include `` + * - ``Table`` + - ``#include `` + * - ``RecordBatch`` + - ``#include `` + * - ``Field``, ``Schema`` + - ``#include `` + * - ``Scalars`` + - ``#include `` + * - ``DataTypes`` + - ``#include `` + * - ``Buffer``, ``ResizableBuffer``, ``MutableBuffer`` + - ``#include `` + * - ``Tensor``, ``NumericTensor<..>`` + - ``#include `` + * - ``SparseCOOTensor``, ``SparseCSCMatrix``, ``SparseCSFTensor``, ``SparseCSRMatrix`` + - ``#include `` + +**Example**: The following code snippet shows how to create bindings for a ``pyarrow.DoubleArray``: + +.. code-block:: cpp + + #include + #include + + #include + #include + + namespace nb = nanobind; + + NB_MODULE(test_pyarrow_ext, m) { + static nb::detail::pyarrow::ImportPyarrow module; + m.def("my_pyarrow_function", [](std::shared_ptr arr) { + auto data = arr->data()->Copy(); + return std::make_shared(std::move(data)); + } + ); + } + +If you want to consume the ``C++`` artifacts as distributed by the ``PyPi`` ``pyarrow`` package in your own ``CMake`` +project, please have a look at `FindPyArrow.cmake `__. \ No newline at end of file diff --git a/include/nanobind/pyarrow/record_batch.h b/include/nanobind/pyarrow/record_batch.h new file mode 100644 index 00000000..627bf959 --- /dev/null +++ b/include/nanobind/pyarrow/record_batch.h @@ -0,0 +1,28 @@ +/* + nanobind/pyarrow/record_batch.h: conversion between arrow and pyarrow + + Copyright (c) 2024 Maximilian Kleinert and + Wenzel Jakob + + All rights reserved. Use of this source code is governed by a + BSD-style license that can be found in the LICENSE file. +*/ +#pragma once + +#include +#include +#include +#include + +NAMESPACE_BEGIN(NB_NAMESPACE) +NAMESPACE_BEGIN(detail) + +template<> +struct pyarrow::pyarrow_caster_name_trait { + static constexpr auto Name = const_name("RecordBatch"); +}; +template<> +struct type_caster> : pyarrow::pyarrow_caster {}; + +NAMESPACE_END(detail) +NAMESPACE_END(NB_NAMESPACE) \ No newline at end of file diff --git a/include/nanobind/pyarrow/table.h b/include/nanobind/pyarrow/table.h new file mode 100644 index 00000000..6598b6d9 --- /dev/null +++ b/include/nanobind/pyarrow/table.h @@ -0,0 +1,28 @@ +/* + nanobind/pyarrow/table.h: conversion between arrow and pyarrow + + Copyright (c) 2024 Maximilian Kleinert and + Wenzel Jakob + + All rights reserved. Use of this source code is governed by a + BSD-style license that can be found in the LICENSE file. +*/ +#pragma once + +#include +#include +#include +#include + +NAMESPACE_BEGIN(NB_NAMESPACE) +NAMESPACE_BEGIN(detail) + +template<> +struct pyarrow::pyarrow_caster_name_trait { + static constexpr auto Name = const_name("Table"); +}; +template<> +struct type_caster> : pyarrow::pyarrow_caster {}; + +NAMESPACE_END(detail) +NAMESPACE_END(NB_NAMESPACE) \ No newline at end of file diff --git a/include/nanobind/pyarrow/tabular.h b/include/nanobind/pyarrow/type.h similarity index 55% rename from include/nanobind/pyarrow/tabular.h rename to include/nanobind/pyarrow/type.h index bc55781d..a20bd26d 100644 --- a/include/nanobind/pyarrow/tabular.h +++ b/include/nanobind/pyarrow/type.h @@ -1,5 +1,5 @@ /* - nanobind/pyarrow/tabular.h: conversion between arrow and pyarrow + nanobind/pyarrow/type.h: conversion between arrow and pyarrow Copyright (c) 2024 Maximilian Kleinert and Wenzel Jakob @@ -12,28 +12,11 @@ #include #include #include -#include -#include #include NAMESPACE_BEGIN(NB_NAMESPACE) NAMESPACE_BEGIN(detail) -template<> -struct pyarrow::pyarrow_caster_name_trait { - static constexpr auto Name = const_name("Table"); -}; -template<> -struct type_caster> : pyarrow::pyarrow_caster {}; - -template<> -struct pyarrow::pyarrow_caster_name_trait { - static constexpr auto Name = const_name("RecordBatch"); -}; -template<> -struct type_caster> : pyarrow::pyarrow_caster {}; - - template<> struct pyarrow::pyarrow_caster_name_trait { static constexpr auto Name = const_name("Schema"); diff --git a/tests/test_pyarrow.cpp b/tests/test_pyarrow.cpp index 1154643a..aff93d59 100644 --- a/tests/test_pyarrow.cpp +++ b/tests/test_pyarrow.cpp @@ -6,7 +6,9 @@ #include #include #include -#include +#include +#include +#include #include #include #include