diff --git a/benchmarks/benchmarks/anndata.py b/benchmarks/benchmarks/anndata.py new file mode 100644 index 000000000..15fb485a2 --- /dev/null +++ b/benchmarks/benchmarks/anndata.py @@ -0,0 +1,33 @@ +from __future__ import annotations + +import tracemalloc + +import numpy as np + +from .utils import gen_adata + + +class GarbargeCollectionSuite: + runs = 10 + + # custom because `memory_profiler` is a line-by-line profiler (also: https://github.com/pythonprofilers/memory_profiler/issues/402) + def track_peakmem_garbage_collection(self, *_): + def display_top(snapshot, key_type="lineno"): + snapshot = snapshot.filter_traces( + ( + tracemalloc.Filter(False, ""), + tracemalloc.Filter(False, ""), + ) + ) + top_stats = snapshot.statistics(key_type) + total = sum(stat.size for stat in top_stats) + return total + + total = np.zeros(self.runs) + tracemalloc.start() + for i in range(self.runs): + data = gen_adata(10000, 10000, "X-csc") # noqa: F841 + snapshot = tracemalloc.take_snapshot() + total[i] = display_top(snapshot) + tracemalloc.stop() + return max(total) diff --git a/docs/_templates/autosummary/class.rst b/docs/_templates/autosummary/class.rst index b4e7370aa..8fe1d69d0 100644 --- a/docs/_templates/autosummary/class.rst +++ b/docs/_templates/autosummary/class.rst @@ -13,7 +13,7 @@ .. autosummary:: :toctree: . {% for item in attributes %} - ~{{ fullname }}.{{ item }} + ~{{ name }}.{{ item }} {%- endfor %} {% endif %} {% endblock %} @@ -26,7 +26,7 @@ :toctree: . {% for item in methods %} {%- if item != '__init__' %} - ~{{ fullname }}.{{ item }} + ~{{ name }}.{{ item }} {%- endif -%} {%- endfor %} {% endif %} diff --git a/docs/conf.py b/docs/conf.py index f943fbb60..5b1b95f30 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -27,7 +27,7 @@ # default settings templates_path = ["_templates"] html_static_path = ["_static"] -source_suffix = [".rst", ".md"] +source_suffix = {".rst": "restructuredtext", ".md": "markdown"} master_doc = "index" default_role = "literal" exclude_patterns = [ diff --git a/src/anndata/_core/aligned_mapping.py b/src/anndata/_core/aligned_mapping.py index 90a99dedc..26c89d9f2 100644 --- a/src/anndata/_core/aligned_mapping.py +++ b/src/anndata/_core/aligned_mapping.py @@ -2,9 +2,10 @@ import warnings from abc import ABC, abstractmethod -from collections.abc import Collection, MutableMapping, Sequence +from collections.abc import MutableMapping, Sequence from copy import copy -from typing import TYPE_CHECKING, TypeVar, Union +from dataclasses import dataclass +from typing import TYPE_CHECKING, Generic, TypeVar, Union import numpy as np import pandas as pd @@ -12,15 +13,15 @@ from .._warnings import ExperimentalFeatureWarning, ImplicitModificationWarning from ..compat import AwkArray -from ..utils import axis_len, deprecated, warn_once +from ..utils import axis_len, convert_to_dict, deprecated, warn_once from .access import ElementRef from .index import _subset from .storage import coerce_array from .views import as_view, view_update if TYPE_CHECKING: - from collections.abc import Iterator, Mapping - from typing import ClassVar, Literal + from collections.abc import Callable, Iterable, Iterator, Mapping + from typing import ClassVar, Literal, Self from .anndata import AnnData from .raw import Raw @@ -28,13 +29,15 @@ OneDIdx = Union[Sequence[int], Sequence[bool], slice] TwoDIdx = tuple[OneDIdx, OneDIdx] - -I = TypeVar("I", OneDIdx, TwoDIdx, covariant=True) # TODO: pd.DataFrame only allowed in AxisArrays? -V = Union[pd.DataFrame, spmatrix, np.ndarray] +Value = Union[pd.DataFrame, spmatrix, np.ndarray] + +P = TypeVar("P", bound="AlignedMappingBase") +"""Parent mapping an AlignedView is based on.""" +I = TypeVar("I", OneDIdx, TwoDIdx) -class AlignedMapping(MutableMapping, ABC): +class AlignedMappingBase(MutableMapping[str, Value], ABC): """\ An abstract base class for Mappings containing array-like values aligned to either one or both AnnData axes. @@ -43,19 +46,22 @@ class AlignedMapping(MutableMapping, ABC): _allow_df: ClassVar[bool] """If this mapping supports heterogeneous DataFrames""" - _view_class: ClassVar[type[AlignedViewMixin]] + _view_class: ClassVar[type[AlignedView]] """The view class for this aligned mapping.""" - _actual_class: ClassVar[type[AlignedActualMixin]] + _actual_class: ClassVar[type[AlignedActual]] """The actual class (which has it’s own data) for this aligned mapping.""" + _parent: AnnData | Raw + """The parent object that this mapping is aligned to.""" + def __repr__(self): return f"{type(self).__name__} with keys: {', '.join(self.keys())}" def _ipython_key_completions_(self) -> list[str]: return list(self.keys()) - def _validate_value(self, val: V, key: str) -> V: + def _validate_value(self, val: Value, key: str) -> Value: """Raises an error if value is invalid""" if isinstance(val, AwkArray): warn_once( @@ -91,34 +97,27 @@ def _validate_value(self, val: V, key: str) -> V: @abstractmethod def attrname(self) -> str: """What attr for the AnnData is this?""" - pass @property @abstractmethod def axes(self) -> tuple[Literal[0, 1], ...]: """Which axes of the parent is this aligned to?""" - pass @property @abstractmethod - def is_view(self) -> bool: - pass + def is_view(self) -> bool: ... @property def parent(self) -> AnnData | Raw: return self._parent - def copy(self): - d = self._actual_class(self.parent, self._axis) - for k, v in self.items(): - if isinstance(v, AwkArray): - # Shallow copy since awkward array buffers are immutable - d[k] = copy(v) - else: - d[k] = v.copy() - return d + def copy(self) -> dict[str, Value]: + # Shallow copy for awkward array since their buffers are immutable + return { + k: copy(v) if isinstance(v, AwkArray) else v.copy() for k, v in self.items() + } - def _view(self, parent: AnnData, subset_idx: I): + def _view(self, parent: AnnData, subset_idx: I) -> AlignedView[Self, I]: """Returns a subset copy-on-write view of the object.""" return self._view_class(self, parent, subset_idx) @@ -127,25 +126,37 @@ def as_dict(self) -> dict: return dict(self) -class AlignedViewMixin: +class AlignedView(AlignedMappingBase, Generic[P, I]): + is_view: ClassVar[Literal[True]] = True + + # override docstring parent: AnnData """Reference to parent AnnData view""" attrname: str """What attribute in the parent is this?""" - parent_mapping: Mapping[str, V] + parent_mapping: P """The object this is a view of.""" - is_view = True + subset_idx: I + """The subset of the parent to view.""" + + def __init__(self, parent_mapping: P, parent_view: AnnData, subset_idx: I): + self.parent_mapping = parent_mapping + self._parent = parent_view + self.subset_idx = subset_idx + if hasattr(parent_mapping, "_axis"): + # LayersBase has no _axis, the rest does + self._axis = parent_mapping._axis # type: ignore - def __getitem__(self, key: str) -> V: + def __getitem__(self, key: str) -> Value: return as_view( _subset(self.parent_mapping[key], self.subset_idx), ElementRef(self.parent, self.attrname, (key,)), ) - def __setitem__(self, key: str, value: V): + def __setitem__(self, key: str, value: Value) -> None: value = self._validate_value(value, key) # Validate before mutating warnings.warn( f"Setting element `.{self.attrname}['{key}']` of view, " @@ -156,7 +167,7 @@ def __setitem__(self, key: str, value: V): with view_update(self.parent, self.attrname, ()) as new_mapping: new_mapping[key] = value - def __delitem__(self, key: str): + def __delitem__(self, key: str) -> None: if key not in self: raise KeyError( "'{key!r}' not found in view of {self.attrname}" @@ -180,16 +191,22 @@ def __len__(self) -> int: return len(self.parent_mapping) -class AlignedActualMixin: - _data: dict[str, V] +class AlignedActual(AlignedMappingBase): + is_view: ClassVar[Literal[False]] = False + + _data: MutableMapping[str, Value] """Underlying mapping to the data""" - is_view = False + def __init__(self, parent: AnnData | Raw, *, store: MutableMapping[str, Value]): + self._parent = parent + self._data = store + for k, v in self._data.items(): + self._data[k] = self._validate_value(v, k) - def __getitem__(self, key: str) -> V: + def __getitem__(self, key: str) -> Value: return self._data[key] - def __setitem__(self, key: str, value: V): + def __setitem__(self, key: str, value: Value): value = self._validate_value(value, key) self._data[key] = value @@ -206,14 +223,16 @@ def __len__(self) -> int: return len(self._data) -class AxisArraysBase(AlignedMapping): +class AxisArraysBase(AlignedMappingBase): """\ Mapping of key→array-like, where array-like is aligned to an axis of parent AnnData. """ - _allow_df = True - _dimnames = ("obs", "var") + _allow_df: ClassVar = True + _dimnames: ClassVar = ("obs", "var") + + _axis: Literal[0, 1] @property def attrname(self) -> str: @@ -229,12 +248,6 @@ def dim(self) -> str: """Name of the dimension this aligned to.""" return self._dimnames[self._axis] - def flipped(self) -> AxisArraysBase: - """Transpose.""" - new = self.copy() - new.dimension = abs(self._axis - 1) - return new - def to_df(self) -> pd.DataFrame: """Convert to pandas dataframe.""" df = pd.DataFrame(index=self.dim_names) @@ -244,12 +257,8 @@ def to_df(self) -> pd.DataFrame: df[f"{key}{icolumn + 1}"] = column return df - def _validate_value(self, val: V, key: str) -> V: - if ( - hasattr(val, "index") - and isinstance(val.index, Collection) - and not val.index.equals(self.dim_names) - ): + def _validate_value(self, val: Value, key: str) -> Value: + if isinstance(val, pd.DataFrame) and not val.index.equals(self.dim_names): # Could probably also re-order index if it’s contained try: pd.testing.assert_index_equal(val.index, self.dim_names) @@ -266,89 +275,61 @@ def dim_names(self) -> pd.Index: return (self.parent.obs_names, self.parent.var_names)[self._axis] -class AxisArrays(AlignedActualMixin, AxisArraysBase): +class AxisArrays(AlignedActual, AxisArraysBase): def __init__( self, parent: AnnData | Raw, - axis: int, - vals: Mapping | AxisArraysBase | None = None, + *, + axis: Literal[0, 1], + store: MutableMapping[str, Value] | AxisArraysBase, ): - self._parent = parent if axis not in {0, 1}: raise ValueError() self._axis = axis - self._data = dict() - if vals is not None: - self.update(vals) + super().__init__(parent, store=store) -class AxisArraysView(AlignedViewMixin, AxisArraysBase): - def __init__( - self, - parent_mapping: AxisArraysBase, - parent_view: AnnData, - subset_idx: OneDIdx, - ): - self.parent_mapping = parent_mapping - self._parent = parent_view - self.subset_idx = subset_idx - self._axis = parent_mapping._axis +class AxisArraysView(AlignedView[AxisArraysBase, OneDIdx], AxisArraysBase): + pass AxisArraysBase._view_class = AxisArraysView AxisArraysBase._actual_class = AxisArrays -class LayersBase(AlignedMapping): +class LayersBase(AlignedMappingBase): """\ Mapping of key: array-like, where array-like is aligned to both axes of the parent anndata. """ - _allow_df = False - attrname = "layers" - axes = (0, 1) + _allow_df: ClassVar = False + attrname: ClassVar[Literal["layers"]] = "layers" + axes: ClassVar[tuple[Literal[0], Literal[1]]] = (0, 1) - # TODO: I thought I had a more elegant solution to overriding this... - def copy(self) -> Layers: - d = self._actual_class(self.parent) - for k, v in self.items(): - d[k] = v.copy() - return d - -class Layers(AlignedActualMixin, LayersBase): - def __init__(self, parent: AnnData, vals: Mapping | None = None): - self._parent = parent - self._data = dict() - if vals is not None: - self.update(vals) +class Layers(AlignedActual, LayersBase): + pass -class LayersView(AlignedViewMixin, LayersBase): - def __init__( - self, - parent_mapping: LayersBase, - parent_view: AnnData, - subset_idx: TwoDIdx, - ): - self.parent_mapping = parent_mapping - self._parent = parent_view - self.subset_idx = subset_idx +class LayersView(AlignedView[LayersBase, TwoDIdx], LayersBase): + pass LayersBase._view_class = LayersView LayersBase._actual_class = Layers -class PairwiseArraysBase(AlignedMapping): +class PairwiseArraysBase(AlignedMappingBase): """\ Mapping of key: array-like, where both axes of array-like are aligned to one axis of the parent anndata. """ - _allow_df = False - _dimnames = ("obs", "var") + _allow_df: ClassVar = False + _dimnames: ClassVar = ("obs", "var") + + _axis: Literal[0, 1] @property def attrname(self) -> str: @@ -357,7 +338,7 @@ def attrname(self) -> str: @property def axes(self) -> tuple[Literal[0], Literal[0]] | tuple[Literal[1], Literal[1]]: """Axes of the parent this is aligned to""" - return self._axis, self._axis + return self._axis, self._axis # type: ignore @property def dim(self) -> str: @@ -365,34 +346,85 @@ def dim(self) -> str: return self._dimnames[self._axis] -class PairwiseArrays(AlignedActualMixin, PairwiseArraysBase): +class PairwiseArrays(AlignedActual, PairwiseArraysBase): def __init__( self, parent: AnnData, - axis: int, - vals: Mapping | None = None, + *, + axis: Literal[0, 1], + store: MutableMapping[str, Value], ): - self._parent = parent if axis not in {0, 1}: raise ValueError() self._axis = axis - self._data = dict() - if vals is not None: - self.update(vals) + super().__init__(parent, store=store) -class PairwiseArraysView(AlignedViewMixin, PairwiseArraysBase): - def __init__( - self, - parent_mapping: PairwiseArraysBase, - parent_view: AnnData, - subset_idx: OneDIdx, - ): - self.parent_mapping = parent_mapping - self._parent = parent_view - self.subset_idx = (subset_idx, subset_idx) - self._axis = parent_mapping._axis +class PairwiseArraysView(AlignedView[PairwiseArraysBase, OneDIdx], PairwiseArraysBase): + pass PairwiseArraysBase._view_class = PairwiseArraysView PairwiseArraysBase._actual_class = PairwiseArrays + + +AlignedMapping = Union[ + AxisArrays, AxisArraysView, Layers, LayersView, PairwiseArrays, PairwiseArraysView +] +T = TypeVar("T", bound=AlignedMapping) +"""Pair of types to be aligned.""" + + +@dataclass +class AlignedMappingProperty(property, Generic[T]): + """A :class:`property` that creates an ephemeral AlignedMapping. + + The actual data is stored as `f'_{self.name}'` in the parent object. + """ + + name: str + """Name of the attribute in the parent object.""" + cls: type[T] + """Concrete type that will be constructed.""" + axis: Literal[0, 1] | None = None + """Axis of the parent to align to.""" + + def construct(self, obj: AnnData, *, store: MutableMapping[str, Value]) -> T: + if self.axis is None: + return self.cls(obj, store=store) + return self.cls(obj, axis=self.axis, store=store) + + @property + def fget(self) -> Callable[[], None]: + """Fake fget for sphinx-autodoc-typehints.""" + + def fake(): ... + + fake.__annotations__ = { + "return": Union[self.cls._actual_class, self.cls._view_class] + } + return fake + + def __get__(self, obj: None | AnnData, objtype: type | None = None) -> T: + if obj is None: + # When accessed from the class, e.g. via `AnnData.obs`, + # this needs to return a `property` instance, e.g. for Sphinx + return self # type: ignore + if not obj.is_view: + return self.construct(obj, store=getattr(obj, f"_{self.name}")) + parent_anndata = obj._adata_ref + idxs = (obj._oidx, obj._vidx) + parent: AlignedMapping = getattr(parent_anndata, self.name) + return parent._view(obj, tuple(idxs[ax] for ax in parent.axes)) + + def __set__( + self, obj: AnnData, value: Mapping[str, Value] | Iterable[tuple[str, Value]] + ) -> None: + value = convert_to_dict(value) + _ = self.construct(obj, store=value) # Validate + if obj.is_view: + obj._init_as_actual(obj.copy()) + setattr(obj, f"_{self.name}", value) + + def __delete__(self, obj) -> None: + setattr(obj, self.name, dict()) diff --git a/src/anndata/_core/anndata.py b/src/anndata/_core/anndata.py index 7c621ade6..7da29e4a4 100644 --- a/src/anndata/_core/anndata.py +++ b/src/anndata/_core/anndata.py @@ -26,10 +26,10 @@ from .._settings import settings from ..compat import DaskArray, SpArray, ZarrArray, _move_adj_mtx from ..logging import anndata_logger as logger -from ..utils import axis_len, convert_to_dict, deprecated, ensure_df_homogeneous +from ..utils import axis_len, deprecated, ensure_df_homogeneous from .access import ElementRef from .aligned_df import _gen_dataframe -from .aligned_mapping import AxisArrays, Layers, PairwiseArrays +from .aligned_mapping import AlignedMappingProperty, AxisArrays, Layers, PairwiseArrays from .file_backing import AnnDataFileManager, to_memory from .index import _normalize_indices, _subset, get_vector from .raw import Raw @@ -286,11 +286,6 @@ def _init_as_view(self, adata_ref: AnnData, oidx: Index, vidx: Index): # views on attributes of adata_ref obs_sub = adata_ref.obs.iloc[oidx] var_sub = adata_ref.var.iloc[vidx] - self._obsm = adata_ref.obsm._view(self, (oidx,)) - self._varm = adata_ref.varm._view(self, (vidx,)) - self._layers = adata_ref.layers._view(self, (oidx, vidx)) - self._obsp = adata_ref.obsp._view(self, oidx) - self._varp = adata_ref.varp._view(self, vidx) # fix categories uns = copy(adata_ref._uns) if settings.should_remove_unused_categories: @@ -434,12 +429,11 @@ def _init_as_actual( # unstructured annotations self.uns = uns or OrderedDict() - # TODO: Think about consequences of making obsm a group in hdf - self._obsm = AxisArrays(self, 0, vals=convert_to_dict(obsm)) - self._varm = AxisArrays(self, 1, vals=convert_to_dict(varm)) + self.obsm = obsm + self.varm = varm - self._obsp = PairwiseArrays(self, 0, vals=convert_to_dict(obsp)) - self._varp = PairwiseArrays(self, 1, vals=convert_to_dict(varp)) + self.obsp = obsp + self.varp = varp # Backwards compat for connectivities matrices in uns["neighbors"] _move_adj_mtx({"uns": self._uns, "obsp": self._obsp}) @@ -464,7 +458,7 @@ def _init_as_actual( self._clean_up_old_format(uns) # layers - self._layers = Layers(self, layers) + self.layers = layers def __sizeof__(self, show_stratified=None, with_disk: bool = False) -> int: def get_size(X) -> int: @@ -656,45 +650,34 @@ def X(self, value: np.ndarray | sparse.spmatrix | SpArray | None): def X(self): self.X = None - @property - def layers(self) -> Layers | LayersView: - """\ - Dictionary-like object with values of the same dimensions as :attr:`X`. - - Layers in AnnData are inspired by loompy’s :ref:`loomlayers`. + layers: AlignedMappingProperty[Layers | LayersView] = AlignedMappingProperty( + "layers", Layers + ) + """\ + Dictionary-like object with values of the same dimensions as :attr:`X`. - Return the layer named `"unspliced"`:: + Layers in AnnData are inspired by loompy’s :ref:`loomlayers`. - adata.layers["unspliced"] + Return the layer named `"unspliced"`:: - Create or replace the `"spliced"` layer:: + adata.layers["unspliced"] - adata.layers["spliced"] = ... + Create or replace the `"spliced"` layer:: - Assign the 10th column of layer `"spliced"` to the variable a:: + adata.layers["spliced"] = ... - a = adata.layers["spliced"][:, 10] + Assign the 10th column of layer `"spliced"` to the variable a:: - Delete the `"spliced"` layer:: + a = adata.layers["spliced"][:, 10] - del adata.layers["spliced"] + Delete the `"spliced"` layer:: - Return layers’ names:: + del adata.layers["spliced"] - adata.layers.keys() - """ - return self._layers + Return layers’ names:: - @layers.setter - def layers(self, value): - layers = Layers(self, vals=convert_to_dict(value)) - if self.is_view: - self._init_as_actual(self.copy()) - self._layers = layers - - @layers.deleter - def layers(self): - self.layers = dict() + adata.layers.keys() + """ @property def raw(self) -> Raw: @@ -803,7 +786,7 @@ def _set_dim_index(self, value: pd.Index, attr: str): if self.is_view: self._init_as_actual(self.copy()) getattr(self, attr).index = value - for v in getattr(self, f"{attr}m").values(): + for v in getattr(self, f"_{attr}m").values(): if isinstance(v, pd.DataFrame): v.index = value @@ -877,97 +860,53 @@ def uns(self, value: MutableMapping): def uns(self): self.uns = OrderedDict() - @property - def obsm(self) -> AxisArrays | AxisArraysView: - """\ - Multi-dimensional annotation of observations - (mutable structured :class:`~numpy.ndarray`). - - Stores for each key a two or higher-dimensional :class:`~numpy.ndarray` - of length `n_obs`. - Is sliced with `data` and `obs` but behaves otherwise like a :term:`mapping`. - """ - return self._obsm - - @obsm.setter - def obsm(self, value): - obsm = AxisArrays(self, 0, vals=convert_to_dict(value)) - if self.is_view: - self._init_as_actual(self.copy()) - self._obsm = obsm - - @obsm.deleter - def obsm(self): - self.obsm = dict() - - @property - def varm(self) -> AxisArrays | AxisArraysView: - """\ - Multi-dimensional annotation of variables/features - (mutable structured :class:`~numpy.ndarray`). - - Stores for each key a two or higher-dimensional :class:`~numpy.ndarray` - of length `n_vars`. - Is sliced with `data` and `var` but behaves otherwise like a :term:`mapping`. - """ - return self._varm - - @varm.setter - def varm(self, value): - varm = AxisArrays(self, 1, vals=convert_to_dict(value)) - if self.is_view: - self._init_as_actual(self.copy()) - self._varm = varm - - @varm.deleter - def varm(self): - self.varm = dict() - - @property - def obsp(self) -> PairwiseArrays | PairwiseArraysView: - """\ - Pairwise annotation of observations, - a mutable mapping with array-like values. + obsm: AlignedMappingProperty[AxisArrays | AxisArraysView] = AlignedMappingProperty( + "obsm", AxisArrays, 0 + ) + """\ + Multi-dimensional annotation of observations + (mutable structured :class:`~numpy.ndarray`). - Stores for each key a two or higher-dimensional :class:`~numpy.ndarray` - whose first two dimensions are of length `n_obs`. - Is sliced with `data` and `obs` but behaves otherwise like a :term:`mapping`. - """ - return self._obsp + Stores for each key a two or higher-dimensional :class:`~numpy.ndarray` + of length `n_obs`. + Is sliced with `data` and `obs` but behaves otherwise like a :term:`mapping`. + """ - @obsp.setter - def obsp(self, value): - obsp = PairwiseArrays(self, 0, vals=convert_to_dict(value)) - if self.is_view: - self._init_as_actual(self.copy()) - self._obsp = obsp + varm: AlignedMappingProperty[AxisArrays | AxisArraysView] = AlignedMappingProperty( + "varm", AxisArrays, 1 + ) + """\ + Multi-dimensional annotation of variables/features + (mutable structured :class:`~numpy.ndarray`). - @obsp.deleter - def obsp(self): - self.obsp = dict() + Stores for each key a two or higher-dimensional :class:`~numpy.ndarray` + of length `n_vars`. + Is sliced with `data` and `var` but behaves otherwise like a :term:`mapping`. + """ - @property - def varp(self) -> PairwiseArrays | PairwiseArraysView: - """\ - Pairwise annotation of variables/features, - a mutable mapping with array-like values. + obsp: AlignedMappingProperty[PairwiseArrays | PairwiseArraysView] = ( + AlignedMappingProperty("obsp", PairwiseArrays, 0) + ) + """\ + Pairwise annotation of observations, + a mutable mapping with array-like values. - Stores for each key a two or higher-dimensional :class:`~numpy.ndarray` - whose first two dimensions are of length `n_var`. - Is sliced with `data` and `var` but behaves otherwise like a :term:`mapping`. - """ - return self._varp + Stores for each key a two or higher-dimensional :class:`~numpy.ndarray` + whose first two dimensions are of length `n_obs`. + Is sliced with `data` and `obs` but behaves otherwise like a :term:`mapping`. + """ - @varp.setter - def varp(self, value): - varp = PairwiseArrays(self, 1, vals=convert_to_dict(value)) - if self.is_view: - self._init_as_actual(self.copy()) - self._varp = varp + varp: AlignedMappingProperty[PairwiseArrays | PairwiseArraysView] = ( + AlignedMappingProperty("varp", PairwiseArrays, 1) + ) + """\ + Pairwise annotation of variables/features, + a mutable mapping with array-like values. - @varp.deleter - def varp(self): - self.varp = dict() + Stores for each key a two or higher-dimensional :class:`~numpy.ndarray` + whose first two dimensions are of length `n_var`. + Is sliced with `data` and `var` but behaves otherwise like a :term:`mapping`. + """ def obs_keys(self) -> list[str]: """List keys of observation annotation :attr:`obs`.""" @@ -979,11 +918,11 @@ def var_keys(self) -> list[str]: def obsm_keys(self) -> list[str]: """List keys of observation annotation :attr:`obsm`.""" - return list(self._obsm.keys()) + return list(self.obsm.keys()) def varm_keys(self) -> list[str]: """List keys of variable annotation :attr:`varm`.""" - return list(self._varm.keys()) + return list(self.varm.keys()) def uns_keys(self) -> list[str]: """List keys of unstructured annotation.""" @@ -1262,10 +1201,10 @@ def transpose(self) -> AnnData: obs=self.var, var=self.obs, uns=self._uns, - obsm=self._varm, - varm=self._obsm, - obsp=self._varp, - varp=self._obsp, + obsm=self.varm, + varm=self.obsm, + obsp=self.varp, + varp=self.obsp, filename=self.filename, ) @@ -1441,8 +1380,8 @@ def to_memory(self, copy=False) -> AnnData: Params ------ - copy: - Whether the arrays that are already in-memory should be copied. + copy + Whether the arrays that are already in-memory should be copied. Example ------- @@ -1822,24 +1761,22 @@ def _check_dimensions(self, key=None): else: key = {key} if "obsm" in key: - obsm = self._obsm if ( - not all([axis_len(o, 0) == self.n_obs for o in obsm.values()]) - and len(obsm.dim_names) != self.n_obs + not all([axis_len(o, 0) == self.n_obs for o in self.obsm.values()]) + and len(self.obsm.dim_names) != self.n_obs ): raise ValueError( "Observations annot. `obsm` must have number of rows of `X`" - f" ({self.n_obs}), but has {len(obsm)} rows." + f" ({self.n_obs}), but has {len(self.obsm)} rows." ) if "varm" in key: - varm = self._varm if ( - not all([axis_len(v, 0) == self.n_vars for v in varm.values()]) - and len(varm.dim_names) != self.n_vars + not all([axis_len(v, 0) == self.n_vars for v in self.varm.values()]) + and len(self.varm.dim_names) != self.n_vars ): raise ValueError( "Variables annot. `varm` must have number of columns of `X`" - f" ({self.n_vars}), but has {len(varm)} rows." + f" ({self.n_vars}), but has {len(self.varm)} rows." ) def write_h5ad( diff --git a/src/anndata/_core/file_backing.py b/src/anndata/_core/file_backing.py index d283a1dfd..6346100ba 100644 --- a/src/anndata/_core/file_backing.py +++ b/src/anndata/_core/file_backing.py @@ -1,5 +1,6 @@ from __future__ import annotations +import weakref from collections.abc import Mapping from functools import singledispatch from pathlib import Path @@ -27,13 +28,26 @@ def __init__( filename: PathLike | None = None, filemode: Literal["r", "r+"] | None = None, ): - self._adata = adata + self._adata_ref = weakref.ref(adata) self.filename = filename self._filemode = filemode self._file = None if filename: self.open() + def __getstate__(self): + state = self.__dict__.copy() + state["_adata_ref"] = state["_adata_ref"]() + return state + + def __setstate__(self, state): + self.__dict__ = state.copy() + self.__dict__["_adata_ref"] = weakref.ref(state["_adata_ref"]) + + @property + def _adata(self): + return self._adata_ref() + def __repr__(self) -> str: if self.filename is None: return "Backing file manager: no file is set." diff --git a/src/anndata/_core/raw.py b/src/anndata/_core/raw.py index 8070ec224..7237c06b4 100644 --- a/src/anndata/_core/raw.py +++ b/src/anndata/_core/raw.py @@ -9,21 +9,25 @@ from ..compat import CupyArray, CupySparseMatrix from .aligned_df import _gen_dataframe -from .aligned_mapping import AxisArrays +from .aligned_mapping import AlignedMappingProperty, AxisArrays from .index import _normalize_index, _subset, get_vector, unpack_index from .sparse_dataset import sparse_dataset if TYPE_CHECKING: from collections.abc import Mapping, Sequence + from typing import ClassVar from scipy import sparse + from .aligned_mapping import AxisArraysView from .anndata import AnnData from .sparse_dataset import BaseCompressedSparseDataset # TODO: Implement views for Raw class Raw: + is_view: ClassVar = False + def __init__( self, adata: AnnData, @@ -44,7 +48,7 @@ def __init__( self._var = _gen_dataframe( var, ["var_names"], source="X", attr="var", length=n_var ) - self._varm = AxisArrays(self, 1, varm) + self.varm = varm elif X is None: # construct from adata # Move from GPU to CPU since it's large and not always used if isinstance(adata.X, (CupyArray, CupySparseMatrix)): @@ -52,7 +56,7 @@ def __init__( else: self._X = adata.X.copy() self._var = adata.var.copy() - self._varm = AxisArrays(self, 1, adata.varm.copy()) + self.varm = adata.varm.copy() elif adata.isbacked: raise ValueError("Cannot specify X if adata is backed") @@ -89,31 +93,31 @@ def X(self) -> BaseCompressedSparseDataset | np.ndarray | sparse.spmatrix: return X @property - def shape(self): + def shape(self) -> tuple[int, int]: return self.n_obs, self.n_vars @property - def var(self): + def var(self) -> pd.DataFrame: return self._var @property - def n_vars(self): + def n_vars(self) -> int: return self._var.shape[0] @property - def n_obs(self): + def n_obs(self) -> int: return self._n_obs - @property - def varm(self): - return self._varm + varm: AlignedMappingProperty[AxisArrays | AxisArraysView] = AlignedMappingProperty( + "varm", AxisArrays, 1 + ) @property - def var_names(self): + def var_names(self) -> pd.Index[str]: return self.var.index @property - def obs_names(self): + def obs_names(self) -> pd.Index[str]: return self._adata.obs_names def __getitem__(self, index): @@ -132,12 +136,12 @@ def __getitem__(self, index): var = self._var.iloc[vidx] new = Raw(self._adata, X=X, var=var) - if self._varm is not None: + if self.varm is not None: # Since there is no view of raws - new._varm = self._varm._view(_RawViewHack(self, vidx), (vidx,)).copy() + new.varm = self.varm._view(_RawViewHack(self, vidx), (vidx,)).copy() return new - def __str__(self): + def __str__(self) -> str: descr = f"Raw AnnData with n_obs × n_vars = {self.n_obs} × {self.n_vars}" for attr in ["var", "varm"]: keys = getattr(self, attr).keys() @@ -145,7 +149,7 @@ def __str__(self): descr += f"\n {attr}: {str(list(keys))[1:-1]}" return descr - def copy(self): + def copy(self) -> Raw: return Raw( self._adata, X=self.X.copy(), @@ -153,7 +157,7 @@ def copy(self): varm=None if self._varm is None else self._varm.copy(), ) - def to_adata(self): + def to_adata(self) -> AnnData: """Create full AnnData object.""" from anndata import AnnData diff --git a/src/anndata/_io/write.py b/src/anndata/_io/write.py index a92dac56e..fb5dd6699 100644 --- a/src/anndata/_io/write.py +++ b/src/anndata/_io/write.py @@ -37,8 +37,8 @@ def write_csvs( d = dict( obs=adata._obs, var=adata._var, - obsm=adata._obsm.to_df(), - varm=adata._varm.to_df(), + obsm=adata.obsm.to_df(), + varm=adata.varm.to_df(), ) if not skip_data: d["X"] = pd.DataFrame(adata.X.toarray() if issparse(adata.X) else adata.X) diff --git a/src/anndata/experimental/multi_files/_anncollection.py b/src/anndata/experimental/multi_files/_anncollection.py index ba3d7895f..31b27c879 100644 --- a/src/anndata/experimental/multi_files/_anncollection.py +++ b/src/anndata/experimental/multi_files/_anncollection.py @@ -764,7 +764,9 @@ def __init__( self._obsm = inner_concat_aligned_mapping( [a.obsm for a in adatas], index=self.obs_names ) - self._obsm = AxisArrays(self, axis=0) if self._obsm == {} else self._obsm + self._obsm = ( + AxisArrays(self, axis=0, store={}) if self._obsm == {} else self._obsm + ) # process inner join of views self._view_attrs_keys = {} diff --git a/src/anndata/tests/helpers.py b/src/anndata/tests/helpers.py index f7ec7ad14..6a30f4c32 100644 --- a/src/anndata/tests/helpers.py +++ b/src/anndata/tests/helpers.py @@ -17,7 +17,7 @@ from scipy import sparse from anndata import AnnData, ExperimentalFeatureWarning, Raw -from anndata._core.aligned_mapping import AlignedMapping +from anndata._core.aligned_mapping import AlignedMappingBase from anndata._core.sparse_dataset import BaseCompressedSparseDataset from anndata._core.views import ArrayView from anndata.compat import ( @@ -585,7 +585,7 @@ def assert_equal_mapping(a, b, exact=False, elem_name=None): assert_equal(a[k], b[k], exact, f"{elem_name}/{k}") -@assert_equal.register(AlignedMapping) +@assert_equal.register(AlignedMappingBase) def assert_equal_aligned_mapping(a, b, exact=False, elem_name=None): a_indices = (a.parent.obs_names, a.parent.var_names) b_indices = (b.parent.obs_names, b.parent.var_names) diff --git a/tests/test_base.py b/tests/test_base.py index 53d315a52..02e4eabe1 100644 --- a/tests/test_base.py +++ b/tests/test_base.py @@ -703,6 +703,9 @@ def assert_eq_not_id(a, b): map_sprs = getattr(adata_sparse, attr) map_copy = getattr(adata_copy, attr) assert map_sprs is not map_copy + if attr not in {"obs", "var"}: + # check that we don’t create too many references + assert getattr(adata_copy, f"_{attr}") is map_copy._data assert_eq_not_id(map_sprs.keys(), map_copy.keys()) for key in map_sprs.keys(): assert_eq_not_id(map_sprs[key], map_copy[key]) diff --git a/tests/test_layers.py b/tests/test_layers.py index f2a92eb73..ba1f96e49 100644 --- a/tests/test_layers.py +++ b/tests/test_layers.py @@ -8,7 +8,7 @@ import pytest from numba.core.errors import NumbaDeprecationWarning -from anndata import AnnData, read_h5ad, read_loom +from anndata import AnnData, ImplicitModificationWarning, read_h5ad, read_loom from anndata.tests.helpers import gen_typed_df_t2_size from testing.anndata._helpers import xfail_if_numpy2_loompy @@ -41,7 +41,8 @@ def test_views(): assert adata_view.layers.keys() == adata.layers.keys() assert (adata_view.layers["S"] == adata.layers["S"][1:, 1:]).all() - adata_view.layers["T"] = X[1:, 1:] + with pytest.warns(ImplicitModificationWarning): + adata_view.layers["T"] = X[1:, 1:] assert not adata_view.layers.is_view assert not adata_view.is_view @@ -106,6 +107,9 @@ def test_backed(): def test_copy(): adata = AnnData(X=X, layers=dict(L=L.copy())) bdata = adata.copy() + # check that we don’t create too many references + assert bdata._layers is bdata.layers._data + # check that we have a copy adata.layers["L"] += 10 assert np.all(adata.layers["L"] != bdata.layers["L"]) # 201