diff --git a/apis/python/setup.py b/apis/python/setup.py index 4b6214911f..8f141e9204 100644 --- a/apis/python/setup.py +++ b/apis/python/setup.py @@ -320,6 +320,7 @@ def run(self): "src/tiledbsoma/soma_object.cc", "src/tiledbsoma/soma_dataframe.cc", "src/tiledbsoma/soma_point_cloud_dataframe.cc", + "src/tiledbsoma/soma_geometry_dataframe.cc", "src/tiledbsoma/soma_dense_ndarray.cc", "src/tiledbsoma/soma_sparse_ndarray.cc", "src/tiledbsoma/soma_group.cc", diff --git a/apis/python/src/tiledbsoma/__init__.py b/apis/python/src/tiledbsoma/__init__.py index 43da8e9c69..bc520b0ba8 100644 --- a/apis/python/src/tiledbsoma/__init__.py +++ b/apis/python/src/tiledbsoma/__init__.py @@ -175,6 +175,7 @@ get_storage_engine, show_package_versions, ) +from ._geometry_dataframe import GeometryDataFrame from ._indexer import IntIndexer, tiledbsoma_build_index from ._measurement import Measurement from ._multiscale_image import MultiscaleImage @@ -209,6 +210,7 @@ "DoesNotExistError", "Experiment", "ExperimentAxisQuery", + "GeometryDataFrame", "get_implementation_version", "get_implementation", "get_SOMA_version", diff --git a/apis/python/src/tiledbsoma/_dataframe.py b/apis/python/src/tiledbsoma/_dataframe.py index 7b8794bc7c..ac11239bed 100644 --- a/apis/python/src/tiledbsoma/_dataframe.py +++ b/apis/python/src/tiledbsoma/_dataframe.py @@ -27,7 +27,7 @@ from . import _arrow_types, _util from . import pytiledbsoma as clib -from ._constants import SOMA_JOINID +from ._constants import SOMA_GEOMETRY, SOMA_JOINID from ._exception import SOMAError, map_exception_for_create from ._read_iters import ManagedQuery, TableReadIter from ._soma_array import SOMAArray @@ -790,7 +790,9 @@ def write( def _canonicalize_schema( - schema: pa.Schema, index_column_names: Sequence[str] + schema: pa.Schema, + index_column_names: Sequence[str], + required_columns: Sequence[str] = [SOMA_JOINID], ) -> pa.Schema: """Turns an Arrow schema into the canonical version and checks for errors. @@ -807,13 +809,33 @@ def _canonicalize_schema( raise ValueError( f"{SOMA_JOINID} field must be of type Arrow int64 but is {joinid_type}" ) - else: + elif SOMA_JOINID in required_columns: # add SOMA_JOINID schema = schema.append(pa.field(SOMA_JOINID, pa.int64())) + if SOMA_GEOMETRY in schema.names: + geometry_type = schema.field(SOMA_GEOMETRY).type + if geometry_type != pa.binary() and geometry_type != pa.large_binary(): + raise ValueError( + f"{SOMA_GEOMETRY} field must be of type Arrow binary or large_binary but is {geometry_type}" + ) + schema.set( + schema.get_field_index(SOMA_GEOMETRY), + schema.field(SOMA_GEOMETRY).with_metadata({"dtype": "WKB"}), + ) + elif SOMA_GEOMETRY in required_columns: + # add SOMA_GEOMETRY + schema = schema.append( + pa.field(SOMA_GEOMETRY, pa.large_binary(), metadata={"dtype": "WKB"}) + ) + # verify no illegal use of soma_ prefix for field_name in schema.names: - if field_name.startswith("soma_") and field_name != SOMA_JOINID: + if ( + field_name.startswith("soma_") + and field_name != SOMA_JOINID + and field_name != SOMA_GEOMETRY + ): raise ValueError( f"DataFrame schema may not contain fields with name prefix ``soma_``: got ``{field_name}``" ) @@ -821,7 +843,11 @@ def _canonicalize_schema( # verify that all index_column_names are present in the schema schema_names_set = set(schema.names) for index_column_name in index_column_names: - if index_column_name.startswith("soma_") and index_column_name != SOMA_JOINID: + if ( + index_column_name.startswith("soma_") + and index_column_name != SOMA_JOINID + and index_column_name != SOMA_GEOMETRY + ): raise ValueError( f'index_column_name other than "soma_joinid" must not begin with "soma_"; got "{index_column_name}"' ) @@ -864,7 +890,7 @@ def _fill_out_slot_soma_domain( index_column_name: str, pa_type: pa.DataType, dtype: Any, -) -> Tuple[Tuple[Any, Any], bool]: +) -> Tuple[Tuple[Any, Any], Union[bool, Tuple[bool, ...]]]: """Helper function for _build_tiledb_schema. Given a user-specified domain for a dimension slot -- which may be ``None``, or a two-tuple of which either element may be ``None`` -- return either what the user specified (if adequate) or @@ -873,6 +899,37 @@ def _fill_out_slot_soma_domain( Returns a boolean for whether the underlying datatype's max range was used. """ saturated_range = False + if index_column_name == SOMA_GEOMETRY: + # SOMA_GEOMETRY domain should be either a list of None or a list of tuple[float, float] + axes_lo = [] + axes_hi = [] + if isinstance(slot_domain, list): + f64info: NPFInfo = np.finfo(np.float64) + saturated_multi_range = [] + for axis_domain in slot_domain: + if axis_domain is None: + axes_lo.append(f64info.min) + axes_hi.append(f64info.max) + saturated_multi_range.append(True) + elif not isinstance(axis_domain, tuple) or len(axis_domain) != 2: + raise ValueError("Axis domain should be a tuple[float, float]") + else: + if np.issubdtype(type(axis_domain[0]), NPFloating) or np.issubdtype( + type(axis_domain[1]), NPFloating + ): + raise ValueError("Axis domain should be a tuple[float, float]") + + axes_lo.append(axis_domain[0]) + axes_hi.append(axis_domain[1]) + saturated_multi_range.append(False) + slot_domain = tuple(axes_lo), tuple(axes_hi) + else: + raise ValueError( + f"{SOMA_GEOMETRY} domain should be either a list of None or a list of tuple[float, float]" + ) + + return (slot_domain, tuple(saturated_multi_range)) + if slot_domain is not None: # User-specified; go with it when possible if ( @@ -1007,6 +1064,9 @@ def _find_extent_for_domain( if isinstance(dtype, np.dtype) and dtype.itemsize == 1: extent = 1 + if index_column_name == SOMA_GEOMETRY: + return extent + # Core string dims have no extent and no (core) domain. We return "" here # simply so we can pass libtiledbsoma "" for domain and extent, while it # will (and must) ignore these when creating the TileDB schema. @@ -1053,9 +1113,26 @@ def _find_extent_for_domain( # extent exceeds max value representable by domain type. Reduce domain max # by 1 tile extent to allow for expansion. def _revise_domain_for_extent( - domain: Tuple[Any, Any], extent: Any, saturated_range: bool + domain: Tuple[Any, Any], extent: Any, saturated_range: Union[bool, Tuple[bool, ...]] ) -> Tuple[Any, Any]: - if saturated_range: + if isinstance(saturated_range, tuple): + # Handle SOMA_GEOMETRY domain with is tuple[list[float], list[float]] + if isinstance(domain[1], tuple): + if len(saturated_range) != len(domain[1]): + raise ValueError( + "Internal error: Saturatin flag length does not match domain size" + ) + + return ( + domain[0], + [ + (dim_max - extent) if saturated_range[idx] else dim_max + for idx, dim_max in enumerate(domain[1]) + ], + ) + + raise ValueError("Expected a complex domain") + elif saturated_range: return (domain[0], domain[1] - extent) else: return domain diff --git a/apis/python/src/tiledbsoma/_geometry_dataframe.py b/apis/python/src/tiledbsoma/_geometry_dataframe.py index c41a5b72d3..a3d51be6d5 100644 --- a/apis/python/src/tiledbsoma/_geometry_dataframe.py +++ b/apis/python/src/tiledbsoma/_geometry_dataframe.py @@ -15,16 +15,39 @@ from somacore import CoordinateSpace, CoordinateTransform, options from typing_extensions import Self -from ._constants import SPATIAL_DISCLAIMER -from ._dataframe import Domain +from tiledbsoma._tdb_handles import GeometryDataFrameWrapper +from tiledbsoma.options._soma_tiledb_context import _validate_soma_tiledb_context +from tiledbsoma.options._tiledb_create_write_options import TileDBCreateOptions + +from . import _arrow_types, _util +from . import pytiledbsoma as clib +from ._constants import ( + SOMA_COORDINATE_SPACE_METADATA_KEY, + SOMA_GEOMETRY, + SOMA_JOINID, + SPATIAL_DISCLAIMER, +) +from ._dataframe import ( + Domain, + _canonicalize_schema, + _fill_out_slot_soma_domain, + _find_extent_for_domain, + _revise_domain_for_extent, +) +from ._exception import SOMAError, map_exception_for_create from ._read_iters import TableReadIter +from ._spatial_dataframe import SpatialDataFrame +from ._spatial_util import ( + coordinate_space_from_json, + coordinate_space_to_json, +) from ._types import OpenTimestamp from .options import SOMATileDBContext _UNBATCHED = options.BatchSize() -class GeometryDataFrame(somacore.GeometryDataFrame): +class GeometryDataFrame(SpatialDataFrame, somacore.GeometryDataFrame): """A specialized SOMA object for storing complex geometries with spatial indexing. The ``GeometryDataFrame`` class is designed to store and manage geometric shapes @@ -35,7 +58,8 @@ class GeometryDataFrame(somacore.GeometryDataFrame): Experimental. """ - __slots__ = () + __slots__ = ("_coord_space",) + _wrapper_type = GeometryDataFrameWrapper # Lifecycle @@ -85,7 +109,205 @@ def create( Experimental. """ warnings.warn(SPATIAL_DISCLAIMER) - raise NotImplementedError() + + # Get coordinate space axis data. + if isinstance(coordinate_space, CoordinateSpace): + axis_names = tuple(axis.name for axis in coordinate_space) + axis_units = tuple(axis.unit for axis in coordinate_space) + else: + axis_names = tuple(coordinate_space) + axis_units = tuple(len(axis_names) * [None]) + + index_column_names = ( + SOMA_GEOMETRY, + SOMA_JOINID, + ) + + context = _validate_soma_tiledb_context(context) + schema = _canonicalize_schema( + schema, index_column_names, [SOMA_JOINID, SOMA_GEOMETRY] + ) + + # SOMA-to-core mappings: + # + # Before the current-domain feature was enabled (possible after core 2.25): + # + # * SOMA domain <-> core domain, AKA "max domain" which is a name we'll use for clarity + # * core current domain did not exist + # + # After the current-domain feature was enabled: + # + # * SOMA max_domain <-> core domain + # * SOMA domain <-> core current domain + # + # As far as the user is concerned, the SOMA-level domain is the only + # thing they see and care about. Before 2.25 support, it was immutable + # (since it was implemented by core domain). After 2.25 support, it is + # mutable/up-resizeable (since it is implemented by core current domain). + + # At this point shift from API terminology "domain" to specifying a soma_ or core_ + # prefix for these variables. This is crucial to avoid developer confusion. + soma_domain = domain + domain = None + + # Check if domain has the right size (number of axis + 1 for SOMA_JOINID) + + if soma_domain is None: + soma_domain = tuple(None for _ in index_column_names) + else: + ndom = len(soma_domain) + nidx = len(index_column_names) + if ndom != nidx: + raise ValueError( + f"if domain is specified, it must have the same length as " + f"index_column_names; got {ndom} != {nidx}" + ) + + mutable_soma_domain = list(soma_domain) + soma_geometry_domain = mutable_soma_domain[ + index_column_names.index(SOMA_GEOMETRY) + ] + + if soma_geometry_domain is None: + soma_geometry_domain = [None for _ in axis_names] + elif not isinstance(soma_geometry_domain, list): + raise ValueError( + f"'{SOMA_GEOMETRY}' domain should be a list of tuple[float, float]" + ) + elif len(soma_geometry_domain) != len(axis_names): + raise ValueError( + f"Dimension mishmatch between '{SOMA_GEOMETRY}' domain and coordinate system" + ) + + mutable_soma_domain[index_column_names.index(SOMA_GEOMETRY)] = ( + soma_geometry_domain + ) + soma_domain = tuple(mutable_soma_domain) + + index_column_schema = [] + index_column_data = {} + + for index_column_name, slot_soma_domain in zip(index_column_names, soma_domain): + pa_field = schema.field(index_column_name) + dtype = _arrow_types.tiledb_type_from_arrow_type( + pa_field.type, is_indexed_column=True + ) + + (slot_core_current_domain, saturated_cd) = _fill_out_slot_soma_domain( + slot_soma_domain, False, index_column_name, pa_field.type, dtype + ) + + if index_column_name == SOMA_GEOMETRY: + (slot_core_max_domain, saturated_md) = _fill_out_slot_soma_domain( + [None for _ in axis_names], + True, + index_column_name, + pa_field.type, + dtype, + ) + else: + (slot_core_max_domain, saturated_md) = _fill_out_slot_soma_domain( + None, True, index_column_name, pa_field.type, dtype + ) + + extent = _find_extent_for_domain( + index_column_name, + TileDBCreateOptions.from_platform_config(platform_config), + dtype, + slot_core_current_domain, + ) + + # Necessary to avoid core array-creation error "Reduce domain max by + # 1 tile extent to allow for expansion." + slot_core_current_domain = _revise_domain_for_extent( + slot_core_current_domain, extent, saturated_cd + ) + slot_core_max_domain = _revise_domain_for_extent( + slot_core_max_domain, extent, saturated_md + ) + + # Here is our Arrow data API for communicating schema info between + # Python/R and C++ libtiledbsoma: + # + # [0] core max domain lo + # [1] core max domain hi + # [2] core extent parameter + # If present, these next two signal to use the current-domain feature: + # [3] core current domain lo + # [4] core current domain hi + if index_column_name == SOMA_GEOMETRY: + # SOMA_GEOMETRY has a specific schema + index_column_schema.append( + pa.field( + pa_field.name, + pa.struct({axis: pa.float64() for axis in axis_names}), + ) + ) + index_column_data[pa_field.name] = [ + [ + (axis, slot_core_max_domain[0][idx]) + for idx, axis in enumerate(axis_names) + ], + [ + (axis, slot_core_max_domain[1][idx]) + for idx, axis in enumerate(axis_names) + ], + [(axis, extent) for axis in axis_names], + [ + (axis, slot_core_current_domain[0][idx]) + for idx, axis in enumerate(axis_names) + ], + [ + (axis, slot_core_current_domain[1][idx]) + for idx, axis in enumerate(axis_names) + ], + ] + else: + index_column_schema.append(pa_field) + index_column_data[pa_field.name] = [ + *slot_core_max_domain, + extent, + *slot_core_current_domain, + ] + + index_column_info = pa.RecordBatch.from_pydict( + index_column_data, schema=pa.schema(index_column_schema) + ) + + plt_cfg = _util.build_clib_platform_config(platform_config) + timestamp_ms = context._open_timestamp_ms(tiledb_timestamp) + try: + clib.SOMAGeometryDataFrame.create( + uri, + schema=schema, + index_column_info=index_column_info, + axis_names=axis_names, + axis_units=axis_units, + ctx=context.native_context, + platform_config=plt_cfg, + timestamp=(0, timestamp_ms), + ) + except SOMAError as e: + raise map_exception_for_create(e, uri) from None + + return cls( + cls._wrapper_type.open(uri, "w", context, tiledb_timestamp), + _dont_call_this_use_create_or_open_instead="tiledbsoma-internal-code", + ) + + def __init__( + self, + handle: GeometryDataFrameWrapper, + **kwargs: Any, + ): + super().__init__(handle, **kwargs) + + # Get and validate coordinate space. + try: + coord_space = self.metadata[SOMA_COORDINATE_SPACE_METADATA_KEY] + except KeyError as ke: + raise SOMAError("Missing coordinate space metadata") from ke + self._coord_space = coordinate_space_from_json(coord_space) # Data operations @@ -196,24 +418,6 @@ def write( # Metadata operations - @property - def schema(self) -> pa.Schema: - """The schema of the data in this dataframe. - - Lifecycle: - Experimental. - """ - raise NotImplementedError() - - @property - def index_column_names(self) -> Tuple[str, ...]: - """The names of the index (dimension) columns. - - Lifecycle: - Experimental. - """ - raise NotImplementedError() - @property def axis_names(self) -> Tuple[str, ...]: """The names of the axes of the coordinate space the data is defined on. @@ -221,7 +425,7 @@ def axis_names(self) -> Tuple[str, ...]: Lifecycle: Experimental. """ - raise NotImplementedError() + return self._coord_space.axis_names @property def coordinate_space(self) -> CoordinateSpace: @@ -230,7 +434,7 @@ def coordinate_space(self) -> CoordinateSpace: Lifecycle: Experimental. """ - raise NotImplementedError() + return self._coord_space @coordinate_space.setter def coordinate_space(self, value: CoordinateSpace) -> None: @@ -239,16 +443,14 @@ def coordinate_space(self, value: CoordinateSpace) -> None: Lifecycle: Experimental. """ - raise NotImplementedError() - - @property - def domain(self) -> Tuple[Tuple[Any, Any], ...]: - """The allowable range of values in each index column. - - Returns: a tuple of minimum and maximum values, inclusive, - storable on each index column of the dataframe. - - Lifecycle: - Experimental. - """ - raise NotImplementedError() + if self._coord_space is not None: + if value.axis_names != self._coord_space.axis_names: + raise ValueError( + f"Cannot change axis names of a geometry dataframe. Existing " + f"axis names are {self._coord_space.axis_names}. New coordinate " + f"space has axis names {value.axis_names}." + ) + self.metadata[SOMA_COORDINATE_SPACE_METADATA_KEY] = coordinate_space_to_json( + value + ) + self._coord_space = value diff --git a/apis/python/src/tiledbsoma/_scene.py b/apis/python/src/tiledbsoma/_scene.py index cc19511aa1..196ce357ac 100644 --- a/apis/python/src/tiledbsoma/_scene.py +++ b/apis/python/src/tiledbsoma/_scene.py @@ -213,7 +213,7 @@ def _set_transform_to_element( # Either set the new coordinate space or check the axes of the current # coordinate space the element is defined on. if coordinate_space is None: - elem_axis_names: Tuple[str, ...] = elem.coordinate_space.axis_names # type: ignore[attr-defined] + elem_axis_names: Tuple[str, ...] = elem.coordinate_space.axis_names if elem_axis_names != transform.output_axes: raise ValueError( f"The name of transform output axes, {transform.output_axes}, do " @@ -221,7 +221,7 @@ def _set_transform_to_element( f" space, {elem_axis_names}." ) else: - elem.coordinate_space = coordinate_space # type: ignore[attr-defined] + elem.coordinate_space = coordinate_space # Set the transform metadata and return the multisclae image. coll.metadata[f"soma_scene_registry_{key}"] = transform_to_json(transform) diff --git a/apis/python/src/tiledbsoma/_tdb_handles.py b/apis/python/src/tiledbsoma/_tdb_handles.py index 9934a1e5fb..81ecc76fe9 100644 --- a/apis/python/src/tiledbsoma/_tdb_handles.py +++ b/apis/python/src/tiledbsoma/_tdb_handles.py @@ -45,6 +45,7 @@ clib.SOMAArray, clib.SOMADataFrame, clib.SOMAPointCloudDataFrame, + clib.SOMAGeometryDataFrame, clib.SOMASparseNDArray, clib.SOMADenseNDArray, clib.SOMAGroup, @@ -77,6 +78,7 @@ def open( "somagroup": clib.SOMAGroup.open, "somadataframe": clib.SOMADataFrame.open, "somapointclouddataframe": clib.SOMAPointCloudDataFrame.open, + "somageometrydataframe": clib.SOMAGeometryDataFrame.open, "somadensendarray": clib.SOMADenseNDArray.open, "somasparsendarray": clib.SOMASparseNDArray.open, "somacollection": clib.SOMACollection.open, @@ -105,6 +107,7 @@ def open( _type_to_class = { "somadataframe": DataFrameWrapper, "somapointclouddataframe": PointCloudDataFrameWrapper, + "somageometrydataframe": GeometryDataFrameWrapper, "somadensendarray": DenseNDArrayWrapper, "somasparsendarray": SparseNDArrayWrapper, "somacollection": CollectionWrapper, @@ -638,6 +641,19 @@ def write(self, values: pa.RecordBatch) -> None: self._handle.write(values) +class GeometryDataFrameWrapper(SOMAArrayWrapper[clib.SOMAGeometryDataFrame]): + """Wrapper around a Pybind11 SOMAGeometryDataFrame handle.""" + + _WRAPPED_TYPE = clib.SOMAGeometryDataFrame + + @property + def count(self) -> int: + return int(self._handle.count) + + def write(self, values: pa.RecordBatch) -> None: + self._handle.write(values) + + class DenseNDArrayWrapper(SOMAArrayWrapper[clib.SOMADenseNDArray]): """Wrapper around a Pybind11 DenseNDArrayWrapper handle.""" diff --git a/apis/python/src/tiledbsoma/pytiledbsoma.cc b/apis/python/src/tiledbsoma/pytiledbsoma.cc index 8d5514f7b4..3f3e1a5090 100644 --- a/apis/python/src/tiledbsoma/pytiledbsoma.cc +++ b/apis/python/src/tiledbsoma/pytiledbsoma.cc @@ -21,6 +21,7 @@ void load_soma_object(py::module&); void load_soma_array(py::module&); void load_soma_dataframe(py::module&); void load_soma_point_cloud_dataframe(py::module&); +void load_soma_geometry_dataframe(py::module&); void load_soma_dense_ndarray(py::module&); void load_soma_sparse_ndarray(py::module&); void load_soma_group(py::module&); @@ -166,6 +167,7 @@ PYBIND11_MODULE(pytiledbsoma, m) { load_soma_dense_ndarray(m); load_soma_sparse_ndarray(m); load_soma_point_cloud_dataframe(m); + load_soma_geometry_dataframe(m); load_soma_group(m); load_soma_collection(m); load_query_condition(m); diff --git a/apis/python/src/tiledbsoma/soma_geometry_dataframe.cc b/apis/python/src/tiledbsoma/soma_geometry_dataframe.cc new file mode 100644 index 0000000000..157526697c --- /dev/null +++ b/apis/python/src/tiledbsoma/soma_geometry_dataframe.cc @@ -0,0 +1,141 @@ +/** + * @file soma_point_cloud_dataframe.cc + * + * @section LICENSE + * + * Licensed under the MIT License. + * Copyright (c) TileDB, Inc. and The Chan Zuckerberg Initiative Foundation + * + * @section DESCRIPTION + * + * This file defines the SOMAGeometryDataFrame bindings. + */ + +#include +#include +#include +#include +#include + +#include + +#include "common.h" + +namespace libtiledbsomacpp { + +namespace py = pybind11; +using namespace py::literals; +using namespace tiledbsoma; + +void load_soma_geometry_dataframe(py::module& m) { + py::class_( + m, "SOMAGeometryDataFrame") + + .def_static( + "create", + [](std::string_view uri, + py::object py_schema, + py::object index_column_info, + std::vector axis_names, + std::vector> axis_units, + std::shared_ptr context, + PlatformConfig platform_config, + std::optional> timestamp) { + ArrowSchema schema; + uintptr_t schema_ptr = (uintptr_t)(&schema); + py_schema.attr("_export_to_c")(schema_ptr); + + // Please see + // https://github.com/single-cell-data/TileDB-SOMA/issues/2869 + // for the reasoning here. + // + // TL;DR: + // * The table has an `ArrowSchema`; each of its children + // is also an `ArrowSchema`. + // * Arrow fields are nullable by default in the user API. + // * There is a field-level nullability flag, _and_ users + // can set a "nullable" metadata as well. + // * In the absence of metadata, respect the flag we get. + // * In the present of metdata with "nullable", let that + // override. + + auto metadata = py_schema.attr("metadata"); + if (py::hasattr(metadata, "get")) { + for (int64_t i = 0; i < schema.n_children; ++i) { + auto child = schema.children[i]; + auto val = metadata.attr("get")( + py::str(child->name).attr("encode")("utf-8")); + + if (!val.is(py::none()) && + val.cast() == "nullable") { + child->flags |= ARROW_FLAG_NULLABLE; + } + } + } + + ArrowSchema index_column_schema; + ArrowArray index_column_array; + uintptr_t + index_column_schema_ptr = (uintptr_t)(&index_column_schema); + uintptr_t + index_column_array_ptr = (uintptr_t)(&index_column_array); + index_column_info.attr("_export_to_c")( + index_column_array_ptr, index_column_schema_ptr); + + SOMACoordinateSpace coord_space{axis_names, axis_units}; + + try { + SOMAGeometryDataFrame::create( + uri, + std::make_unique(schema), + ArrowTable( + std::make_unique(index_column_array), + std::make_unique(index_column_schema)), + coord_space, + context, + platform_config, + timestamp); + } catch (const std::out_of_range& e) { + throw py::type_error(e.what()); + } catch (const std::exception& e) { + TPY_ERROR_LOC(e.what()); + } + schema.release(&schema); + }, + "uri"_a, + py::kw_only(), + "schema"_a, + "index_column_info"_a, + "axis_names"_a, + "axis_units"_a, + "ctx"_a, + "platform_config"_a, + "timestamp"_a = py::none()) + + .def_static( + "open", + py::overload_cast< + std::string_view, + OpenMode, + std::shared_ptr, + std::vector, + ResultOrder, + std::optional>>( + &SOMAGeometryDataFrame::open), + "uri"_a, + "mode"_a, + "context"_a, + py::kw_only(), + "column_names"_a = py::tuple(), + "result_order"_a = ResultOrder::automatic, + "timestamp"_a = py::none()) + + .def_static("exists", &SOMAGeometryDataFrame::exists) + .def_property_readonly( + "index_column_names", &SOMAGeometryDataFrame::index_column_names) + .def_property_readonly( + "count", + &SOMAGeometryDataFrame::count, + py::call_guard()); +} +} // namespace libtiledbsomacpp diff --git a/apis/python/src/tiledbsoma/soma_object.cc b/apis/python/src/tiledbsoma/soma_object.cc index 288a263856..afbbb7ca6e 100644 --- a/apis/python/src/tiledbsoma/soma_object.cc +++ b/apis/python/src/tiledbsoma/soma_object.cc @@ -68,9 +68,12 @@ void load_soma_object(py::module& m) { if (soma_obj_type == "somadataframe") return py::cast(dynamic_cast(*soma_obj)); - if (soma_obj_type == "somapointclouddataframe") + else if (soma_obj_type == "somapointclouddataframe") return py::cast( dynamic_cast(*soma_obj)); + else if (soma_obj_type == "somageometrydataframe") + return py::cast( + dynamic_cast(*soma_obj)); else if (soma_obj_type == "somasparsendarray") return py::cast( dynamic_cast(*soma_obj)); diff --git a/apis/python/tests/test_geometry_dataframe.py b/apis/python/tests/test_geometry_dataframe.py new file mode 100644 index 0000000000..eb8dde08a2 --- /dev/null +++ b/apis/python/tests/test_geometry_dataframe.py @@ -0,0 +1,57 @@ +import numpy as np +import pyarrow as pa +import pytest + +import tiledbsoma as soma + + +@pytest.mark.parametrize("domain", [None, [[(-1000, 1000), (0, 100)], [0, 100]]]) +def test_geometry_domain(tmp_path, domain): + uri = tmp_path.as_uri() + + asch = pa.schema([("quality", pa.float32())]) + + with soma.GeometryDataFrame.create(uri, schema=asch, domain=domain) as geom: + soma_domain = geom.domain + + assert len(soma_domain) == 2 + + for idx, name in enumerate(geom.index_column_names): + if name == "soma_geometry": + f64info = np.finfo(np.float64) + + assert len(soma_domain[idx][0]) == 2 + assert len(soma_domain[idx][1]) == 2 + + for axis_idx, axis_name in enumerate(geom.coordinate_space.axis_names): + assert soma_domain[idx][0][axis_name] == ( + f64info.min if domain is None else domain[0][axis_idx][0] + ) + + for axis_idx, axis_name in enumerate(geom.coordinate_space.axis_names): + assert soma_domain[idx][1][axis_name] == ( + f64info.max if domain is None else domain[0][axis_idx][1] + ) + + +def test_geometry_coordinate_space(tmp_path): + uri = tmp_path.as_uri() + + asch = pa.schema([("quality", pa.float32())]) + + with soma.GeometryDataFrame.create(uri, schema=asch) as geom: + assert len(geom.coordinate_space) == 2 + assert geom.coordinate_space.axis_names == ("x", "y") + assert geom.coordinate_space.axes == (soma.Axis(name="x"), soma.Axis(name="y")) + + # Axis names do not match + with pytest.raises(ValueError): + geom.coordinate_space = soma.CoordinateSpace( + [soma.Axis(name="a"), soma.Axis(name="y")] + ) + + geom.coordinate_space = soma.CoordinateSpace( + [soma.Axis(name="x", unit="m"), soma.Axis(name="y", unit="in")] + ) + assert geom.coordinate_space[0] == soma.Axis(name="x", unit="m") + assert geom.coordinate_space[1] == soma.Axis(name="y", unit="in") diff --git a/libtiledbsoma/src/soma/soma_array.cc b/libtiledbsoma/src/soma/soma_array.cc index 9020c311d7..4e8b20071f 100644 --- a/libtiledbsoma/src/soma/soma_array.cc +++ b/libtiledbsoma/src/soma/soma_array.cc @@ -224,37 +224,9 @@ void SOMAArray::fill_columns() { TILEDB_SOMA_SCHEMA_KEY, uri())); } - - auto soma_schema_extension_raw = get_metadata(TILEDB_SOMA_SCHEMA_KEY) - .value(); - auto data = static_cast( - std::get<2>(soma_schema_extension_raw)); - auto soma_schema_extension = data != nullptr ? - nlohmann::json::parse(std::string( - data, - std::get<1>( - soma_schema_extension_raw))) : - nlohmann::json::object(); - - if (!soma_schema_extension.contains(TILEDB_SOMA_SCHEMA_COL_KEY)) { - throw TileDBSOMAError(std::format( - "[SOMAArray][fill_columns] Missing '{}' key from '{}'", - TILEDB_SOMA_SCHEMA_COL_KEY, - TILEDB_SOMA_SCHEMA_KEY)); - } - - columns_ = SOMAColumn::deserialize( - soma_schema_extension.value( - TILEDB_SOMA_SCHEMA_COL_KEY, nlohmann::json::array()), - *ctx_->tiledb_ctx(), - *arr_); - - } else { - // Non-geometry dataframes have trivially constructible columns and do - // not require a schema - columns_ = SOMAColumn::deserialize( - nlohmann::json::array(), *ctx_->tiledb_ctx(), *arr_); } + + columns_ = SOMAColumn::deserialize(*ctx_->tiledb_ctx(), *arr_, metadata_); } const std::string SOMAArray::uri() const { @@ -509,11 +481,12 @@ ArrowTable SOMAArray::_get_core_domainish(enum Domainish which_kind) { for (const auto& column : columns_ | std::views::filter( [](const auto& col) { return col->isIndexColumn(); })) { - arrow_schema->children[child_index] = column->arrow_schema_slot( - *ctx_, *arr_); - arrow_array->children[child_index] = column->arrow_domain_slot( + auto kind_domain_slot = column->arrow_domain_slot( *ctx_, *arr_, which_kind); + arrow_array->children[child_index] = kind_domain_slot.first; + arrow_schema->children[child_index] = kind_domain_slot.second; + ++child_index; } diff --git a/libtiledbsoma/src/soma/soma_array.h b/libtiledbsoma/src/soma/soma_array.h index 57a745334b..bb277d5a35 100644 --- a/libtiledbsoma/src/soma/soma_array.h +++ b/libtiledbsoma/src/soma/soma_array.h @@ -624,8 +624,13 @@ class SOMAArray : public SOMAObject { * @return std::unique_ptr Schema */ std::unique_ptr arrow_schema() const { - return ArrowAdapter::arrow_schema_from_tiledb_array( - ctx_->tiledb_ctx(), arr_); + auto schema = ArrowAdapter::make_arrow_schema_parent(columns_.size()); + + for (size_t i = 0; i < columns_.size(); ++i) { + schema->children[i] = columns_[i]->arrow_schema_slot(*ctx_, *arr_); + } + + return schema; } /** diff --git a/libtiledbsoma/src/soma/soma_attribute.cc b/libtiledbsoma/src/soma/soma_attribute.cc index 2c57768abf..2bb048797e 100644 --- a/libtiledbsoma/src/soma/soma_attribute.cc +++ b/libtiledbsoma/src/soma/soma_attribute.cc @@ -15,7 +15,10 @@ namespace tiledbsoma { std::shared_ptr SOMAAttribute::deserialize( - const nlohmann::json& soma_schema, const Context& ctx, const Array& array) { + const nlohmann::json& soma_schema, + const Context& ctx, + const Array& array, + const std::map&) { if (!soma_schema.contains(TILEDB_SOMA_SCHEMA_COL_ATTR_KEY)) { throw TileDBSOMAError( "[SOMAAttribute][deserialize] Missing required field " @@ -136,7 +139,7 @@ std::any SOMAAttribute::_core_current_domain_slot(NDRectangle&) const { name())); } -ArrowArray* SOMAAttribute::arrow_domain_slot( +std::pair SOMAAttribute::arrow_domain_slot( const SOMAContext&, Array&, enum Domainish) const { throw TileDBSOMAError(std::format( "[SOMAAttribute][arrow_domain_slot] Column with name {} is not an " @@ -145,7 +148,7 @@ ArrowArray* SOMAAttribute::arrow_domain_slot( } ArrowSchema* SOMAAttribute::arrow_schema_slot( - const SOMAContext& ctx, Array& array) { + const SOMAContext& ctx, Array& array) const { return ArrowAdapter::arrow_schema_from_tiledb_attribute( attribute, *ctx.tiledb_ctx(), array) .release(); diff --git a/libtiledbsoma/src/soma/soma_attribute.h b/libtiledbsoma/src/soma/soma_attribute.h index ef6b6c23f3..6fcbbd88fd 100644 --- a/libtiledbsoma/src/soma/soma_attribute.h +++ b/libtiledbsoma/src/soma/soma_attribute.h @@ -35,7 +35,8 @@ class SOMAAttribute : public SOMAColumn { static std::shared_ptr deserialize( const nlohmann::json& soma_schema, const Context& ctx, - const Array& array); + const Array& array, + const std::map& metadata); /** * Create a ``SOMAAttribute`` shared pointer from an Arrow schema @@ -96,13 +97,13 @@ class SOMAAttribute : public SOMAColumn { return std::vector({enumeration.value()}); } - ArrowArray* arrow_domain_slot( + std::pair arrow_domain_slot( const SOMAContext& ctx, Array& array, enum Domainish kind) const override; ArrowSchema* arrow_schema_slot( - const SOMAContext& ctx, Array& array) override; + const SOMAContext& ctx, Array& array) const override; void serialize(nlohmann::json&) const override; diff --git a/libtiledbsoma/src/soma/soma_column.cc b/libtiledbsoma/src/soma/soma_column.cc index 3062eabf6c..7f41986ff6 100644 --- a/libtiledbsoma/src/soma/soma_column.cc +++ b/libtiledbsoma/src/soma/soma_column.cc @@ -26,22 +26,46 @@ std::map SOMAColumn::deserialiser_map = { SOMAGeometryColumn::deserialize}}; std::vector> SOMAColumn::deserialize( - const nlohmann::json& soma_schema_columns, const Context& ctx, - const Array& array) { + const Array& array, + const std::map& metadata) { std::vector> columns; + nlohmann::json soma_schema_columns = nlohmann::json::array(); + + if (metadata.contains(TILEDB_SOMA_SCHEMA_KEY)) { + auto soma_schema_extension_raw = metadata.at(TILEDB_SOMA_SCHEMA_KEY); + auto data = static_cast( + std::get<2>(soma_schema_extension_raw)); + auto soma_schema_extension = data != nullptr ? + nlohmann::json::parse(std::string( + data, + std::get<1>( + soma_schema_extension_raw))) : + nlohmann::json::object(); + + if (!soma_schema_extension.contains(TILEDB_SOMA_SCHEMA_COL_KEY)) { + throw TileDBSOMAError(std::format( + "[SOMAArray][fill_columns] Missing '{}' key from '{}'", + TILEDB_SOMA_SCHEMA_COL_KEY, + TILEDB_SOMA_SCHEMA_KEY)); + } + + soma_schema_columns = soma_schema_extension.value( + TILEDB_SOMA_SCHEMA_COL_KEY, nlohmann::json::array()); + } + if (!soma_schema_columns.empty()) { for (auto& column : soma_schema_columns) { auto type = column[TILEDB_SOMA_SCHEMA_COL_TYPE_KEY] .template get(); - auto col = deserialiser_map[type](column, ctx, array); + auto col = deserialiser_map[type](column, ctx, array, metadata); if (col) { // Deserialized column can be null in case the array is modified // and the column no longer exists. - columns.push_back(deserialiser_map[type](column, ctx, array)); + columns.push_back(col); } } diff --git a/libtiledbsoma/src/soma/soma_column.h b/libtiledbsoma/src/soma/soma_column.h index 397759f696..4610a32d22 100644 --- a/libtiledbsoma/src/soma/soma_column.h +++ b/libtiledbsoma/src/soma/soma_column.h @@ -40,9 +40,9 @@ class SOMAColumn { //=================================================================== static std::vector> deserialize( - const nlohmann::json& soma_schema, const Context& ctx, - const Array& array); + const Array& array, + const std::map& metadata); //=================================================================== //= public non-static @@ -121,7 +121,7 @@ class SOMAColumn { * @param array * @param which_kind */ - virtual ArrowArray* arrow_domain_slot( + virtual std::pair arrow_domain_slot( const SOMAContext& ctx, Array& array, enum Domainish which_kind) const = 0; @@ -133,7 +133,7 @@ class SOMAColumn { * @param array */ virtual ArrowSchema* arrow_schema_slot( - const SOMAContext& ctx, Array& array) = 0; + const SOMAContext& ctx, Array& array) const = 0; /** * Get the domain kind of the SOMAColumn. @@ -531,7 +531,10 @@ class SOMAColumn { private: typedef std::shared_ptr (*Factory)( - const nlohmann::json&, const Context&, const Array&); + const nlohmann::json&, + const Context&, + const Array&, + const std::map&); static std::map deserialiser_map; }; diff --git a/libtiledbsoma/src/soma/soma_dataframe.cc b/libtiledbsoma/src/soma/soma_dataframe.cc index 1a1eff4136..3ab970e130 100644 --- a/libtiledbsoma/src/soma/soma_dataframe.cc +++ b/libtiledbsoma/src/soma/soma_dataframe.cc @@ -14,6 +14,7 @@ #include "soma_dataframe.h" #include #include "../utils/logger.h" +#include "soma_coordinates.h" namespace tiledbsoma { using namespace tiledb; @@ -34,6 +35,7 @@ void SOMADataFrame::create( ctx->tiledb_ctx(), schema, index_columns, + std::nullopt, "SOMADataFrame", true, platform_config); diff --git a/libtiledbsoma/src/soma/soma_dense_ndarray.cc b/libtiledbsoma/src/soma/soma_dense_ndarray.cc index 76ec8444ad..52c0e89d50 100644 --- a/libtiledbsoma/src/soma/soma_dense_ndarray.cc +++ b/libtiledbsoma/src/soma/soma_dense_ndarray.cc @@ -11,6 +11,7 @@ * This file defines the SOMADenseNDArray class. */ #include "soma_dense_ndarray.h" +#include "soma_coordinates.h" namespace tiledbsoma { using namespace tiledb; @@ -65,6 +66,7 @@ void SOMADenseNDArray::create( ctx->tiledb_ctx(), schema, index_columns, + std::nullopt, "SOMADenseNDArray", false, platform_config); diff --git a/libtiledbsoma/src/soma/soma_dimension.cc b/libtiledbsoma/src/soma/soma_dimension.cc index d87fe9028f..2ae71923c2 100644 --- a/libtiledbsoma/src/soma/soma_dimension.cc +++ b/libtiledbsoma/src/soma/soma_dimension.cc @@ -17,7 +17,10 @@ namespace tiledbsoma { std::shared_ptr SOMADimension::deserialize( - const nlohmann::json& soma_schema, const Context&, const Array& array) { + const nlohmann::json& soma_schema, + const Context&, + const Array& array, + const std::map&) { if (!soma_schema.contains(TILEDB_SOMA_SCHEMA_COL_DIM_KEY)) { throw TileDBSOMAError( "[SOMADimension][deserialize] Missing required field " @@ -949,8 +952,10 @@ std::any SOMADimension::_core_current_domain_slot(NDRectangle& ndrect) const { } } -ArrowArray* SOMADimension::arrow_domain_slot( +std::pair SOMADimension::arrow_domain_slot( const SOMAContext& ctx, Array& array, enum Domainish kind) const { + ArrowArray* arrow_array; + switch (domain_type().value()) { case TILEDB_DATETIME_YEAR: case TILEDB_DATETIME_MONTH: @@ -975,39 +980,50 @@ ArrowArray* SOMADimension::arrow_domain_slot( case TILEDB_TIME_FS: case TILEDB_TIME_AS: case TILEDB_INT64: - return ArrowAdapter::make_arrow_array_child( + arrow_array = ArrowAdapter::make_arrow_array_child( domain_slot(ctx, array, kind)); + break; case TILEDB_UINT64: - return ArrowAdapter::make_arrow_array_child( + arrow_array = ArrowAdapter::make_arrow_array_child( domain_slot(ctx, array, kind)); + break; case TILEDB_INT32: - return ArrowAdapter::make_arrow_array_child( + arrow_array = ArrowAdapter::make_arrow_array_child( domain_slot(ctx, array, kind)); + break; case TILEDB_UINT32: - return ArrowAdapter::make_arrow_array_child( + arrow_array = ArrowAdapter::make_arrow_array_child( domain_slot(ctx, array, kind)); + break; case TILEDB_INT16: - return ArrowAdapter::make_arrow_array_child( + arrow_array = ArrowAdapter::make_arrow_array_child( domain_slot(ctx, array, kind)); + break; case TILEDB_UINT16: - return ArrowAdapter::make_arrow_array_child( + arrow_array = ArrowAdapter::make_arrow_array_child( domain_slot(ctx, array, kind)); + break; case TILEDB_INT8: - return ArrowAdapter::make_arrow_array_child( + arrow_array = ArrowAdapter::make_arrow_array_child( domain_slot(ctx, array, kind)); + break; case TILEDB_UINT8: - return ArrowAdapter::make_arrow_array_child( + arrow_array = ArrowAdapter::make_arrow_array_child( domain_slot(ctx, array, kind)); + break; case TILEDB_FLOAT64: - return ArrowAdapter::make_arrow_array_child( + arrow_array = ArrowAdapter::make_arrow_array_child( domain_slot(ctx, array, kind)); + break; case TILEDB_FLOAT32: - return ArrowAdapter::make_arrow_array_child( + arrow_array = ArrowAdapter::make_arrow_array_child( domain_slot(ctx, array, kind)); + break; case TILEDB_STRING_ASCII: case TILEDB_STRING_UTF8: - return ArrowAdapter::make_arrow_array_child_string( + arrow_array = ArrowAdapter::make_arrow_array_child_string( domain_slot(ctx, array, kind)); + break; default: throw TileDBSOMAError(std::format( "[SOMADimension][arrow_domain_slot] dim {} has unhandled " @@ -1016,9 +1032,12 @@ ArrowArray* SOMADimension::arrow_domain_slot( name(), tiledb::impl::type_to_str(domain_type().value()))); } + + return std::make_pair(arrow_array, arrow_schema_slot(ctx, array)); } -ArrowSchema* SOMADimension::arrow_schema_slot(const SOMAContext&, Array&) { +ArrowSchema* SOMADimension::arrow_schema_slot( + const SOMAContext&, Array&) const { return ArrowAdapter::arrow_schema_from_tiledb_dimension(dimension) .release(); } diff --git a/libtiledbsoma/src/soma/soma_dimension.h b/libtiledbsoma/src/soma/soma_dimension.h index abf512c8fe..97584c658d 100644 --- a/libtiledbsoma/src/soma/soma_dimension.h +++ b/libtiledbsoma/src/soma/soma_dimension.h @@ -36,7 +36,8 @@ class SOMADimension : public SOMAColumn { static std::shared_ptr deserialize( const nlohmann::json& soma_schema, const Context& ctx, - const Array& array); + const Array& array, + const std::map& metadata); static std::shared_ptr create( std::shared_ptr ctx, @@ -89,13 +90,13 @@ class SOMADimension : public SOMAColumn { return std::nullopt; } - ArrowArray* arrow_domain_slot( + std::pair arrow_domain_slot( const SOMAContext& ctx, Array& array, enum Domainish kind) const override; ArrowSchema* arrow_schema_slot( - const SOMAContext& ctx, Array& array) override; + const SOMAContext& ctx, Array& array) const override; void serialize(nlohmann::json&) const override; diff --git a/libtiledbsoma/src/soma/soma_geometry_column.cc b/libtiledbsoma/src/soma/soma_geometry_column.cc index 537d3ecc71..b89471b011 100644 --- a/libtiledbsoma/src/soma/soma_geometry_column.cc +++ b/libtiledbsoma/src/soma/soma_geometry_column.cc @@ -15,7 +15,10 @@ namespace tiledbsoma { std::shared_ptr SOMAGeometryColumn::deserialize( - const nlohmann::json& soma_schema, const Context&, const Array& array) { + const nlohmann::json& soma_schema, + const Context&, + const Array& array, + const std::map& metadata) { if (!soma_schema.contains(TILEDB_SOMA_SCHEMA_COL_DIM_KEY)) { throw TileDBSOMAError( "[SOMAGeometryColumn][deserialize] Missing required field " @@ -58,7 +61,19 @@ std::shared_ptr SOMAGeometryColumn::deserialize( auto attribute = array.schema().attribute(attribute_names[0]); - return std::make_shared(dimensions, attribute); + if (!metadata.contains(SOMA_COORDINATE_SPACE_KEY)) { + throw TileDBSOMAError(std::format( + "[SOMAGeometryColumn][deserialize] Missing required '{}' " + "metadata key.", + SOMA_COORDINATE_SPACE_KEY)); + } + + auto coordinate_space = std::apply( + SOMACoordinateSpace::from_metadata, + metadata.at(SOMA_COORDINATE_SPACE_KEY)); + + return std::make_shared( + dimensions, attribute, coordinate_space); } std::shared_ptr SOMAGeometryColumn::create( @@ -66,6 +81,7 @@ std::shared_ptr SOMAGeometryColumn::create( ArrowSchema* schema, ArrowSchema* spatial_schema, ArrowArray* spatial_array, + const SOMACoordinateSpace& coordinate_space, const std::string& soma_type, std::string_view type_metadata, PlatformConfig platform_config) { @@ -74,7 +90,7 @@ std::shared_ptr SOMAGeometryColumn::create( throw TileDBSOMAError(std::format( "[SOMAGeometryColumn] " "Unkwown type metadata for `{}`: " - "Expected 'WKB', got {}", + "Expected 'WKB', got '{}'", SOMA_GEOMETRY_COLUMN_NAME, type_metadata)); } @@ -107,7 +123,7 @@ std::shared_ptr SOMAGeometryColumn::create( ctx, schema, type_metadata, platform_config); return std::make_shared( - SOMAGeometryColumn(dims, attribute.first)); + SOMAGeometryColumn(dims, attribute.first, coordinate_space)); } void SOMAGeometryColumn::_set_dim_points( @@ -464,13 +480,49 @@ std::any SOMAGeometryColumn::_core_current_domain_slot( std::make_pair(min, max)); } -ArrowArray* SOMAGeometryColumn::arrow_domain_slot( +std::pair SOMAGeometryColumn::arrow_domain_slot( const SOMAContext& ctx, Array& array, enum Domainish kind) const { switch (domain_type().value()) { - case TILEDB_FLOAT64: - return ArrowAdapter::make_arrow_array_child_var( - domain_slot>(ctx, array, kind)); - break; + case TILEDB_FLOAT64: { + auto parent_schema = ArrowAdapter::make_arrow_schema_parent( + TDB_DIM_PER_SPATIAL_AXIS, name()); + auto parent_array = ArrowAdapter::make_arrow_array_parent( + TDB_DIM_PER_SPATIAL_AXIS); + + parent_array->length = 2; + parent_array->n_buffers = 1; + parent_array->buffers = (const void**)malloc(sizeof(void*)); + parent_array->buffers[0] = nullptr; + + auto kind_domain = domain_slot>( + ctx, array, kind); + + for (size_t i = 0; i < TDB_DIM_PER_SPATIAL_AXIS; ++i) { + // Generate coordinate space axie schema + auto child_schema = static_cast( + malloc(sizeof(ArrowSchema))); + child_schema->format = strdup( + ArrowAdapter::to_arrow_format(TILEDB_FLOAT64).data()); + child_schema->name = strdup( + coordinate_space.axis(i).name.c_str()); + child_schema->metadata = nullptr; + child_schema->flags = 0; + child_schema->n_children = 0; + child_schema->children = nullptr; + child_schema->dictionary = nullptr; + child_schema->release = &ArrowAdapter::release_schema; + child_schema->private_data = nullptr; + + parent_schema->children[i] = child_schema; + + parent_array->children[i] = + ArrowAdapter::make_arrow_array_child(std::vector( + {kind_domain.first[i], kind_domain.second[i]})); + } + + return std::make_pair( + parent_array.release(), parent_schema.release()); + } break; default: throw TileDBSOMAError(std::format( "[SOMAGeometryColumn][arrow_domain_slot] dim {} has unhandled " @@ -482,7 +534,7 @@ ArrowArray* SOMAGeometryColumn::arrow_domain_slot( } ArrowSchema* SOMAGeometryColumn::arrow_schema_slot( - const SOMAContext& ctx, Array& array) { + const SOMAContext& ctx, Array& array) const { return ArrowAdapter::arrow_schema_from_tiledb_attribute( attribute, *ctx.tiledb_ctx(), array) .release(); diff --git a/libtiledbsoma/src/soma/soma_geometry_column.h b/libtiledbsoma/src/soma/soma_geometry_column.h index 1b8a0e7624..b9409aeeea 100644 --- a/libtiledbsoma/src/soma/soma_geometry_column.h +++ b/libtiledbsoma/src/soma/soma_geometry_column.h @@ -28,6 +28,7 @@ #include #include "soma_column.h" +#include "soma_coordinates.h" namespace tiledbsoma { @@ -43,20 +44,26 @@ class SOMAGeometryColumn : public SOMAColumn { static std::shared_ptr deserialize( const nlohmann::json& soma_schema, const Context& ctx, - const Array& array); + const Array& array, + const std::map&); static std::shared_ptr create( std::shared_ptr ctx, ArrowSchema* schema, ArrowSchema* spatial_schema, ArrowArray* spatial_array, + const SOMACoordinateSpace& coordinate_space, const std::string& soma_type, std::string_view type_metadata, PlatformConfig platform_config); - SOMAGeometryColumn(std::vector dimensions, Attribute attribute) + SOMAGeometryColumn( + std::vector dimensions, + Attribute attribute, + SOMACoordinateSpace coordinate_space) : dimensions(dimensions) - , attribute(attribute){}; + , attribute(attribute) + , coordinate_space(coordinate_space){}; inline std::string name() const override { return SOMA_GEOMETRY_COLUMN_NAME; @@ -97,16 +104,20 @@ class SOMAGeometryColumn : public SOMAColumn { return std::nullopt; } - ArrowArray* arrow_domain_slot( + std::pair arrow_domain_slot( const SOMAContext& ctx, Array& array, enum Domainish kind) const override; ArrowSchema* arrow_schema_slot( - const SOMAContext& ctx, Array& array) override; + const SOMAContext& ctx, Array& array) const override; void serialize(nlohmann::json&) const override; + inline SOMACoordinateSpace get_coordinate_space() const { + return coordinate_space; + } + protected: void _set_dim_points( const std::unique_ptr& query, @@ -148,6 +159,7 @@ class SOMAGeometryColumn : public SOMAColumn { const size_t TDB_DIM_PER_SPATIAL_AXIS = 2; std::vector dimensions; Attribute attribute; + SOMACoordinateSpace coordinate_space; /** * Compute the usable domain limits. If the array has a current domain then diff --git a/libtiledbsoma/src/soma/soma_geometry_dataframe.cc b/libtiledbsoma/src/soma/soma_geometry_dataframe.cc index ec2a2eb063..b19612e259 100644 --- a/libtiledbsoma/src/soma/soma_geometry_dataframe.cc +++ b/libtiledbsoma/src/soma/soma_geometry_dataframe.cc @@ -16,6 +16,7 @@ #include "../geometry/operators/envelope.h" #include "../geometry/operators/io/write.h" #include "../utils/util.h" +#include "soma_geometry_column.h" #include #include @@ -31,7 +32,6 @@ void SOMAGeometryDataFrame::create( std::string_view uri, const std::unique_ptr& schema, const ArrowTable& index_columns, - const ArrowTable& spatial_columns, const SOMACoordinateSpace& coordinate_space, std::shared_ptr ctx, PlatformConfig platform_config, @@ -41,10 +41,10 @@ void SOMAGeometryDataFrame::create( ctx->tiledb_ctx(), schema, index_columns, + std::make_optional(coordinate_space), "SOMAGeometryDataFrame", true, - platform_config, - spatial_columns); + platform_config); auto array = SOMAArray::_create( ctx, diff --git a/libtiledbsoma/src/soma/soma_geometry_dataframe.h b/libtiledbsoma/src/soma/soma_geometry_dataframe.h index 22a4746a92..d712de5a8a 100644 --- a/libtiledbsoma/src/soma/soma_geometry_dataframe.h +++ b/libtiledbsoma/src/soma/soma_geometry_dataframe.h @@ -49,7 +49,6 @@ class SOMAGeometryDataFrame : virtual public SOMAArray { std::string_view uri, const std::unique_ptr& schema, const ArrowTable& index_columns, - const ArrowTable& spatial_columns, const SOMACoordinateSpace& coordinate_space, std::shared_ptr ctx, PlatformConfig platform_config = PlatformConfig(), diff --git a/libtiledbsoma/src/soma/soma_point_cloud_dataframe.cc b/libtiledbsoma/src/soma/soma_point_cloud_dataframe.cc index 84c331e903..a028e266ab 100644 --- a/libtiledbsoma/src/soma/soma_point_cloud_dataframe.cc +++ b/libtiledbsoma/src/soma/soma_point_cloud_dataframe.cc @@ -36,6 +36,7 @@ void SOMAPointCloudDataFrame::create( ctx->tiledb_ctx(), schema, index_columns, + std::make_optional(coordinate_space), "SOMAPointCloudDataFrame", true, platform_config); diff --git a/libtiledbsoma/src/soma/soma_sparse_ndarray.cc b/libtiledbsoma/src/soma/soma_sparse_ndarray.cc index 889aa5ce4f..4e83a78f2a 100644 --- a/libtiledbsoma/src/soma/soma_sparse_ndarray.cc +++ b/libtiledbsoma/src/soma/soma_sparse_ndarray.cc @@ -12,6 +12,7 @@ */ #include "soma_sparse_ndarray.h" +#include "soma_coordinates.h" namespace tiledbsoma { using namespace tiledb; @@ -66,6 +67,7 @@ void SOMASparseNDArray::create( ctx->tiledb_ctx(), schema, index_columns, + std::nullopt, "SOMASparseNDArray", true, platform_config); diff --git a/libtiledbsoma/src/utils/arrow_adapter.cc b/libtiledbsoma/src/utils/arrow_adapter.cc index 827730fe49..5f30371448 100644 --- a/libtiledbsoma/src/utils/arrow_adapter.cc +++ b/libtiledbsoma/src/utils/arrow_adapter.cc @@ -19,10 +19,7 @@ #include "util.h" #include "../soma/soma_attribute.h" -#include "../soma/soma_dimension.h" -#include "../soma/soma_geometry_column.h" - -#include "../soma/soma_attribute.h" +#include "../soma/soma_coordinates.h" #include "../soma/soma_dimension.h" #include "../soma/soma_geometry_column.h" @@ -479,7 +476,7 @@ std::unique_ptr ArrowAdapter::arrow_schema_from_tiledb_dimension( } std::unique_ptr ArrowAdapter::arrow_schema_from_tiledb_attribute( - Attribute& attribute, const Context& ctx, const Array& tiledb_array) { + const Attribute& attribute, const Context& ctx, const Array& tiledb_array) { std::unique_ptr arrow_schema = std::make_unique(); arrow_schema->format = strdup( ArrowAdapter::to_arrow_format(attribute.type()).data()); @@ -502,9 +499,10 @@ std::unique_ptr ArrowAdapter::arrow_schema_from_tiledb_attribute( "name {}", arrow_schema->format, arrow_schema->name)); - + // We shouldn;t have to cast constness away. Maybe missing const qualifier + // from AttributeExperimental::get_enumeration_name auto enmr_name = AttributeExperimental::get_enumeration_name( - ctx, attribute); + ctx, const_cast(attribute)); if (enmr_name.has_value()) { auto enmr = ArrayExperimental::get_enumeration( ctx, tiledb_array, attribute.name()); @@ -877,10 +875,10 @@ ArrowAdapter::tiledb_schema_from_arrow_schema( std::shared_ptr ctx, const std::unique_ptr& arrow_schema, const ArrowTable& index_column_info, + const std::optional& coordinate_space, std::string soma_type, bool is_sparse, - PlatformConfig platform_config, - const ArrowTable& spatial_column_info) { + PlatformConfig platform_config) { auto& index_column_array = index_column_info.first; auto& index_column_schema = index_column_info.second; @@ -940,13 +938,13 @@ ArrowAdapter::tiledb_schema_from_arrow_schema( if (strcmp(child->name, index_column_schema->children[i]->name) == 0) { if (strcmp(child->name, SOMA_GEOMETRY_COLUMN_NAME.c_str()) == - 0 && - spatial_column_info.first.get() != nullptr) { + 0) { columns.push_back(SOMAGeometryColumn::create( ctx, child, - spatial_column_info.second.get(), - spatial_column_info.first.get(), + index_column_schema->children[i], + index_column_array->children[i], + coordinate_space.value(), soma_type, type_metadata, platform_config)); @@ -1053,23 +1051,28 @@ ArrowAdapter::tiledb_schema_from_arrow_schema( continue; } - if (column->name() == SOMA_GEOMETRY_COLUMN_NAME) { - std::vector cdslot; - for (int64_t j = 0; j < spatial_column_info.first->n_children; - ++j) { - cdslot.push_back(ArrowAdapter::get_table_any_column<2>( - spatial_column_info.first->children[j], - spatial_column_info.second->children[j], - 3)); - } - - column->set_current_domain_slot(ndrect, cdslot); - } else { - column->set_current_domain_slot( - ndrect, - get_table_any_column_by_name<2>( - index_column_info, column->name(), 3)); - } + column->set_current_domain_slot( + ndrect, + get_table_any_column_by_name<2>( + index_column_info, column->name(), 3)); + + // if (column->name() == SOMA_GEOMETRY_COLUMN_NAME) { + // std::vector cdslot; + // for (int64_t j = 0; j < spatial_column_info.first->n_children; + // ++j) { + // cdslot.push_back(ArrowAdapter::get_table_any_column<2>( + // spatial_column_info.first->children[j], + // spatial_column_info.second->children[j], + // 3)); + // } + + // column->set_current_domain_slot(ndrect, cdslot); + // } else { + // column->set_current_domain_slot( + // ndrect, + // get_table_any_column_by_name<2>( + // index_column_info, column->name(), 3)); + // } } current_domain.set_ndrectangle(ndrect); @@ -1121,6 +1124,54 @@ Dimension ArrowAdapter::tiledb_dimension_from_arrow_schema( return dim; } +Dimension ArrowAdapter::tiledb_dimension_from_arrow_schema_ext( + std::shared_ptr ctx, + ArrowSchema* schema, + ArrowArray* array, + std::string soma_type, + std::string_view type_metadata, + std::string prefix, + std::string suffix, + PlatformConfig platform_config) { + if (strcmp(schema->format, "+l") != 0) { + throw TileDBSOMAError( + std::format("[tiledb_dimension_from_arrow_schema_ext] Schema " + "should be of type list.")); + } + + if (schema->n_children != 1) { + throw TileDBSOMAError( + std::format("[tiledb_dimension_from_arrow_schema_ext] Schema " + "should have exactly 1 child")); + } + + auto type = ArrowAdapter::to_tiledb_format( + schema->children[0]->format, type_metadata); + + if (ArrowAdapter::arrow_is_var_length_type(schema->format)) { + type = TILEDB_STRING_ASCII; + } + + auto col_name = prefix + std::string(schema->name) + suffix; + + FilterList filter_list = ArrowAdapter::_create_dim_filter_list( + col_name, platform_config, soma_type, ctx); + + if (array->length != 5) { + throw TileDBSOMAError(std::format( + "ArrowAdapter: unexpected length {} != 5 for name " + "'{}'", + array->length, + col_name)); + } + + const void* buff = array->children[0]->buffers[1]; + auto dim = ArrowAdapter::_create_dim(type, col_name, buff, ctx); + dim.set_filter_list(filter_list); + + return dim; +} + std::pair> ArrowAdapter::tiledb_attribute_from_arrow_schema( std::shared_ptr ctx, @@ -1611,10 +1662,10 @@ std::unique_ptr ArrowAdapter::make_arrow_schema( } std::unique_ptr ArrowAdapter::make_arrow_schema_parent( - size_t num_columns) { + size_t num_columns, std::string_view name) { auto arrow_schema = std::make_unique(); arrow_schema->format = strdup("+s"); // structure, i.e. non-leaf node - arrow_schema->name = strdup("parent"); + arrow_schema->name = strdup(name.data()); arrow_schema->metadata = nullptr; arrow_schema->flags = 0; arrow_schema->n_children = static_cast( diff --git a/libtiledbsoma/src/utils/arrow_adapter.h b/libtiledbsoma/src/utils/arrow_adapter.h index 29bc0159b2..ccfc671ee3 100644 --- a/libtiledbsoma/src/utils/arrow_adapter.h +++ b/libtiledbsoma/src/utils/arrow_adapter.h @@ -58,6 +58,7 @@ using namespace tiledb; using json = nlohmann::json; class ColumnBuffer; +class SOMACoordinateSpace; /** * @brief The ArrowBuffer holds a shared pointer to a ColumnBuffer, which @@ -366,7 +367,9 @@ class ArrowAdapter { * @return ArrowSchema */ static std::unique_ptr arrow_schema_from_tiledb_attribute( - Attribute& attribute, const Context& ctx, const Array& tiledb_array); + const Attribute& attribute, + const Context& ctx, + const Array& tiledb_array); /** * @brief Get members of the TileDB Schema in the form of a @@ -406,12 +409,10 @@ class ArrowAdapter { std::shared_ptr ctx, const std::unique_ptr& arrow_schema, const ArrowTable& index_column_info, + const std::optional& coordinate_space, std::string soma_type, bool is_sparse = true, - PlatformConfig platform_config = PlatformConfig(), - const ArrowTable& spatial_column_info = { - std::unique_ptr(nullptr), - std::unique_ptr(nullptr)}); + PlatformConfig platform_config = PlatformConfig()); /** * @brief Get a TileDB dimension from an Arrow schema. @@ -428,6 +429,26 @@ class ArrowAdapter { std::string suffix = std::string(), PlatformConfig platform_config = PlatformConfig()); + /** + * @brief Get a TileDB dimension from an Arrow schema. + * + * @remarks This is a list variation which expects a schemaand a data array + * to describe a list instead of a simple columns. Used especialy with + * nested domains where it is described by a struct and each nested + * dimension is described by a list. + * + * @return std::pair The TileDB dimension. + */ + static Dimension tiledb_dimension_from_arrow_schema_ext( + std::shared_ptr ctx, + ArrowSchema* schema, + ArrowArray* array, + std::string soma_type, + std::string_view type_metadata, + std::string prefix = std::string(), + std::string suffix = std::string(), + PlatformConfig platform_config = PlatformConfig()); + /** * @brief Get a TileDB attribute with its enumeration from an Arrow schema. * @@ -498,7 +519,7 @@ class ArrowAdapter { * ArrowSchema. This constructs the parent and not the children. */ static std::unique_ptr make_arrow_schema_parent( - size_t num_columns); + size_t num_columns, std::string_view name = "parent"); /** * @brief Creates a nanoarrow ArrowArray which accommodates @@ -909,8 +930,9 @@ class ArrowAdapter { ArrowArray* selected_array = arrow_array->children[column_index]; ArrowSchema* selected_schema = arrow_schema->children[column_index]; - // Complex domain - if (selected_array->n_children != 0) { + if (strcmp(selected_schema->format, "+s") == 0) { + // Complex domain like the ones required by `GeometryColumn` expect + // a struct containing lists of values for (int64_t i = 0; i < selected_schema->n_children; ++i) { ArrowArray* array = selected_array->children[i]; ArrowSchema* schema = selected_schema->children[i]; diff --git a/libtiledbsoma/test/common.cc b/libtiledbsoma/test/common.cc index 6c75251de5..8176c56022 100644 --- a/libtiledbsoma/test/common.cc +++ b/libtiledbsoma/test/common.cc @@ -17,7 +17,8 @@ namespace helper { static std::unique_ptr _create_index_cols_info_array( - const std::vector& dim_infos); + const std::vector& dim_infos, + std::optional coordinate_space = std::nullopt); // Core PR: https://github.com/TileDB-Inc/TileDB/pull/5303 bool have_dense_current_domain_support() { @@ -64,7 +65,8 @@ bool have_dense_current_domain_support() { std::pair, ArrowTable> create_arrow_schema_and_index_columns( const std::vector& dim_infos, - const std::vector& attr_infos) { + const std::vector& attr_infos, + std::optional coordinate_space) { int ndim = dim_infos.size(); int nattr = attr_infos.size(); int nfield = ndim + nattr; @@ -84,8 +86,10 @@ create_arrow_schema_and_index_columns( auto arrow_schema = ArrowAdapter::make_arrow_schema( names, tiledb_datatypes); - auto index_cols_info_schema = create_index_cols_info_schema(dim_infos); - auto index_cols_info_array = _create_index_cols_info_array(dim_infos); + auto index_cols_info_schema = create_index_cols_info_schema( + dim_infos, coordinate_space); + auto index_cols_info_array = _create_index_cols_info_array( + dim_infos, coordinate_space); return std::pair( std::move(arrow_schema), @@ -112,7 +116,8 @@ ArrowTable create_column_index_info(const std::vector& dim_infos) { } std::unique_ptr create_index_cols_info_schema( - const std::vector& dim_infos) { + const std::vector& dim_infos, + std::optional coordinate_space) { auto ndim = dim_infos.size(); std::vector names(ndim); @@ -127,18 +132,31 @@ std::unique_ptr create_index_cols_info_schema( auto schema = ArrowAdapter::make_arrow_schema(names, tiledb_datatypes); for (size_t i = 0; i < static_cast(schema->n_children); ++i) { - if (strcmp(schema->children[i]->name, "soma_geometry")) { - nanoarrow::UniqueBuffer buffer; - ArrowMetadataBuilderInit(buffer.get(), nullptr); - ArrowMetadataBuilderAppend( - buffer.get(), - ArrowCharView("dtype"), - ArrowCharView( - dim_infos[i].tiledb_datatype == TILEDB_GEOM_WKB ? "WKB" : - "WKT")); - ArrowSchemaSetMetadata( - schema->children[i], - std::string((char*)buffer->data, buffer->size_bytes).c_str()); + if (strcmp(schema->children[i]->name, "soma_geometry") == 0) { + // Recreate schema for WKB domain (struct of floats) + auto geometry_schema = ArrowAdapter::make_arrow_schema_parent( + coordinate_space->size(), "soma_geometry"); + for (size_t j = 0; j < coordinate_space->size(); ++j) { + ArrowSchema* dim_schema = (ArrowSchema*)malloc( + sizeof(ArrowSchema)); + auto arrow_type_name = ArrowAdapter::tdb_to_arrow_type( + TILEDB_FLOAT64); + dim_schema->name = strdup( + coordinate_space->axis(j).name.c_str()); + dim_schema->format = strdup(arrow_type_name.c_str()); + dim_schema->metadata = nullptr; + dim_schema->flags = 0; + dim_schema->n_children = 0; // leaf node + dim_schema->children = nullptr; // leaf node + dim_schema->dictionary = nullptr; + dim_schema->release = &ArrowAdapter::release_schema; + dim_schema->private_data = nullptr; + + geometry_schema->children[j] = dim_schema; + } + + schema->release(schema->children[i]); + schema->children[i] = geometry_schema.release(); } } @@ -146,7 +164,8 @@ std::unique_ptr create_index_cols_info_schema( } static std::unique_ptr _create_index_cols_info_array( - const std::vector& dim_infos) { + const std::vector& dim_infos, + std::optional coordinate_space) { int ndim = dim_infos.size(); auto index_cols_info_array = ArrowAdapter::make_arrow_array_parent(ndim); @@ -179,7 +198,23 @@ static std::unique_ptr _create_index_cols_info_array( } else if (info.tiledb_datatype == TILEDB_GEOM_WKB) { // No domain can be set for WKB. The domain will be set to the // individual spatial axes. - dim_array = ArrowAdapter::make_arrow_array_child_binary(); + dim_array = ArrowAdapter::make_arrow_array_parent( + coordinate_space->size()) + .release(); + dim_array->n_buffers = 1; + dim_array->buffers = (const void**)malloc(sizeof(void*)); + dim_array->buffers[0] = nullptr; + dim_array->length = 5; + for (size_t j = 0; j < coordinate_space->size(); ++j) { + std::vector dom( + {0, + (double_t)CORE_DOMAIN_MAX, + 1, + 0, + (double_t)info.dim_max}); + dim_array->children[j] = ArrowAdapter::make_arrow_array_child( + dom); + } } else if (info.tiledb_datatype == TILEDB_FLOAT64) { // domain big; current_domain small std::vector dom( diff --git a/libtiledbsoma/test/common.h b/libtiledbsoma/test/common.h index ea9d4ce85b..dcf82416b4 100644 --- a/libtiledbsoma/test/common.h +++ b/libtiledbsoma/test/common.h @@ -68,7 +68,8 @@ struct AttrInfo { std::pair, ArrowTable> create_arrow_schema_and_index_columns( const std::vector& dim_infos, - const std::vector& attr_infos); + const std::vector& attr_infos, + std::optional coordinate_space = std::nullopt); ArrowTable create_column_index_info(const std::vector& dim_infos); @@ -78,7 +79,8 @@ std::string to_arrow_format(tiledb_datatype_t tiledb_datatype); bool have_dense_current_domain_support(); std::unique_ptr create_index_cols_info_schema( - const std::vector& dim_infos); + const std::vector& dim_infos, + std::optional coordinate_space = std::nullopt); } // namespace helper #endif diff --git a/libtiledbsoma/test/unit_soma_array.cc b/libtiledbsoma/test/unit_soma_array.cc index efb15f0c22..b5f7e92e4b 100644 --- a/libtiledbsoma/test/unit_soma_array.cc +++ b/libtiledbsoma/test/unit_soma_array.cc @@ -466,7 +466,7 @@ TEST_CASE("SOMAArray: Write and read back Boolean") { schema.add_attribute(attr); schema.set_allows_dups(true); - SOMAArray::create(ctx, uri, std::move(schema), "NONE", ""); + SOMAArray::create(ctx, uri, std::move(schema), "NONE"); auto soma_array = SOMAArray::open(OpenMode::write, uri, ctx); auto arrow_schema = std::make_unique(); diff --git a/libtiledbsoma/test/unit_soma_column.cc b/libtiledbsoma/test/unit_soma_column.cc index 36d526ba5c..4a11b83179 100644 --- a/libtiledbsoma/test/unit_soma_column.cc +++ b/libtiledbsoma/test/unit_soma_column.cc @@ -14,6 +14,7 @@ #include #include #include +#include #include "../src/soma/soma_attribute.h" #include "../src/soma/soma_column.h" #include "../src/soma/soma_dimension.h" @@ -342,33 +343,30 @@ TEST_CASE_METHOD( ned_str = ArrowAdapter::get_table_string_column_by_name( non_empty_domain, "mystring"); - std::vector - ned_str_col = ArrowAdapter::get_array_string_column( - columns[0]->arrow_domain_slot( - *ctx_, raw_array, Domainish::kind_non_empty_domain), - columns[0]->arrow_schema_slot(*ctx_, raw_array)); + std::vector ned_str_col = std::apply( + ArrowAdapter::get_array_string_column, + columns[0]->arrow_domain_slot( + *ctx_, raw_array, Domainish::kind_non_empty_domain)); ArrowTable soma_domain = sdf->get_soma_domain(); std::vector dom_str = ArrowAdapter::get_table_string_column_by_name( soma_domain, "mystring"); - std::vector - dom_str_col = ArrowAdapter::get_array_string_column( - columns[0]->arrow_domain_slot( - *ctx_, raw_array, Domainish::kind_core_current_domain), - columns[0]->arrow_schema_slot(*ctx_, raw_array)); + std::vector dom_str_col = std::apply( + ArrowAdapter::get_array_string_column, + columns[0]->arrow_domain_slot( + *ctx_, raw_array, Domainish::kind_core_current_domain)); ArrowTable soma_maxdomain = sdf->get_soma_maxdomain(); std::vector maxdom_str = ArrowAdapter::get_table_string_column_by_name( soma_maxdomain, "mystring"); - std::vector - maxdom_str_col = ArrowAdapter::get_array_string_column( - columns[0]->arrow_domain_slot( - *ctx_, raw_array, Domainish::kind_core_domain), - columns[0]->arrow_schema_slot(*ctx_, raw_array)); + std::vector maxdom_str_col = std::apply( + ArrowAdapter::get_array_string_column, + columns[0]->arrow_domain_slot( + *ctx_, raw_array, Domainish::kind_core_domain)); REQUIRE(ned_str == std::vector({"", ""})); @@ -501,33 +499,30 @@ TEST_CASE_METHOD( ned_str = ArrowAdapter::get_table_string_column_by_name( non_empty_domain, "mystring"); - std::vector - ned_str_col = ArrowAdapter::get_array_string_column( - columns[0]->arrow_domain_slot( - *ctx_, raw_array, Domainish::kind_non_empty_domain), - columns[0]->arrow_schema_slot(*ctx_, raw_array)); + std::vector ned_str_col = std::apply( + ArrowAdapter::get_array_string_column, + columns[0]->arrow_domain_slot( + *ctx_, raw_array, Domainish::kind_non_empty_domain)); ArrowTable soma_domain = sdf->get_soma_domain(); std::vector dom_str = ArrowAdapter::get_table_string_column_by_name( soma_domain, "mystring"); - std::vector - dom_str_col = ArrowAdapter::get_array_string_column( - columns[0]->arrow_domain_slot( - *ctx_, raw_array, Domainish::kind_core_current_domain), - columns[0]->arrow_schema_slot(*ctx_, raw_array)); + std::vector dom_str_col = std::apply( + ArrowAdapter::get_array_string_column, + columns[0]->arrow_domain_slot( + *ctx_, raw_array, Domainish::kind_core_current_domain)); ArrowTable soma_maxdomain = sdf->get_soma_maxdomain(); std::vector maxdom_str = ArrowAdapter::get_table_string_column_by_name( soma_maxdomain, "mystring"); - std::vector - maxdom_str_col = ArrowAdapter::get_array_string_column( - columns[0]->arrow_domain_slot( - *ctx_, raw_array, Domainish::kind_core_domain), - columns[0]->arrow_schema_slot(*ctx_, raw_array)); + std::vector maxdom_str_col = std::apply( + ArrowAdapter::get_array_string_column, + columns[0]->arrow_domain_slot( + *ctx_, raw_array, Domainish::kind_core_domain)); REQUIRE(ned_str == std::vector({"", ""})); diff --git a/libtiledbsoma/test/unit_soma_geometry_dataframe.cc b/libtiledbsoma/test/unit_soma_geometry_dataframe.cc index bd2f263337..c2d39a863f 100644 --- a/libtiledbsoma/test/unit_soma_geometry_dataframe.cc +++ b/libtiledbsoma/test/unit_soma_geometry_dataframe.cc @@ -40,20 +40,6 @@ TEST_CASE("SOMAGeometryDataFrame: basic", "[SOMAGeometryDataFrame]") { .string_lo = "N/A", .string_hi = "N/A"})}); - std::vector spatial_dim_infos( - {helper::DimInfo( - {.name = "x", - .tiledb_datatype = TILEDB_FLOAT64, - .dim_max = 200, - .string_lo = "N/A", - .string_hi = "N/A"}), - helper::DimInfo( - {.name = "y", - .tiledb_datatype = TILEDB_FLOAT64, - .dim_max = 100, - .string_lo = "N/A", - .string_hi = "N/A"})}); - std::vector attr_infos({helper::AttrInfo( {.name = "quality", .tiledb_datatype = TILEDB_FLOAT64})}); @@ -62,17 +48,14 @@ TEST_CASE("SOMAGeometryDataFrame: basic", "[SOMAGeometryDataFrame]") { // Create the geometry dataframe. auto [schema, index_columns] = - helper::create_arrow_schema_and_index_columns(dim_infos, attr_infos); - auto spatial_columns = helper::create_column_index_info(spatial_dim_infos); + helper::create_arrow_schema_and_index_columns( + dim_infos, attr_infos, coord_space); SOMAGeometryDataFrame::create( uri, std::move(schema), ArrowTable( std::move(index_columns.first), std::move(index_columns.second)), - ArrowTable( - std::move(spatial_columns.first), - std::move(spatial_columns.second)), coord_space, ctx, platform_config, @@ -98,8 +81,6 @@ TEST_CASE("SOMAGeometryDataFrame: basic", "[SOMAGeometryDataFrame]") { std::vector expected_index_column_names = { dim_infos[0].name, dim_infos[1].name}; - std::vector expected_spatial_column_names = { - spatial_dim_infos[0].name, spatial_dim_infos[1].name}; REQUIRE(soma_geometry->index_column_names() == expected_index_column_names); REQUIRE(soma_geometry->coordinate_space() == coord_space); REQUIRE(soma_geometry->nnz() == 0); @@ -130,35 +111,18 @@ TEST_CASE("SOMAGeometryDataFrame: Roundtrip", "[SOMAGeometryDataFrame]") { .string_lo = "N/A", .string_hi = "N/A"})}); - std::vector spatial_dim_infos( - {helper::DimInfo( - {.name = "x", - .tiledb_datatype = TILEDB_FLOAT64, - .dim_max = 200, - .string_lo = "N/A", - .string_hi = "N/A"}), - helper::DimInfo( - {.name = "y", - .tiledb_datatype = TILEDB_FLOAT64, - .dim_max = 100, - .string_lo = "N/A", - .string_hi = "N/A"})}); - std::vector attr_infos({helper::AttrInfo( {.name = "quality", .tiledb_datatype = TILEDB_FLOAT64})}); auto [schema, index_columns] = - helper::create_arrow_schema_and_index_columns(dim_infos, attr_infos); - auto spatial_columns = helper::create_column_index_info(spatial_dim_infos); + helper::create_arrow_schema_and_index_columns( + dim_infos, attr_infos, coord_space); SOMAGeometryDataFrame::create( uri, std::move(schema), ArrowTable( std::move(index_columns.first), std::move(index_columns.second)), - ArrowTable( - std::move(spatial_columns.first), - std::move(spatial_columns.second)), coord_space, ctx, platform_config, @@ -263,25 +227,13 @@ TEST_CASE("SOMAGeometryDataFrame: Roundtrip", "[SOMAGeometryDataFrame]") { while (auto batch = soma_geometry->read_next()) { auto arrbuf = batch.value(); auto d0span = arrbuf->at(dim_infos[0].name)->data(); - auto d1span = arrbuf - ->at( - SOMA_GEOMETRY_DIMENSION_PREFIX + - spatial_dim_infos[0].name + "__min") + auto d1span = arrbuf->at(SOMA_GEOMETRY_DIMENSION_PREFIX + "x__min") ->data(); - auto d2span = arrbuf - ->at( - SOMA_GEOMETRY_DIMENSION_PREFIX + - spatial_dim_infos[0].name + "__max") + auto d2span = arrbuf->at(SOMA_GEOMETRY_DIMENSION_PREFIX + "x__max") ->data(); - auto d3span = arrbuf - ->at( - SOMA_GEOMETRY_DIMENSION_PREFIX + - spatial_dim_infos[1].name + "__min") + auto d3span = arrbuf->at(SOMA_GEOMETRY_DIMENSION_PREFIX + "y__min") ->data(); - auto d4span = arrbuf - ->at( - SOMA_GEOMETRY_DIMENSION_PREFIX + - spatial_dim_infos[1].name + "__max") + auto d4span = arrbuf->at(SOMA_GEOMETRY_DIMENSION_PREFIX + "y__max") ->data(); auto wkbs = arrbuf->at(dim_infos[1].name)->binaries(); auto a0span = arrbuf->at(attr_infos[0].name)->data();