From f2af154260282e9d197bba5d5320fc427b0d7979 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Thu, 14 Nov 2024 15:34:53 +0100 Subject: [PATCH] (chore): migrate to only checking `cs{r,c}_matrix` instead of `spmatrix` --- src/anndata/_core/aligned_mapping.py | 5 ++--- src/anndata/_core/anndata.py | 8 ++++---- src/anndata/_core/index.py | 12 ++++++------ src/anndata/_core/merge.py | 25 +++++++++++-------------- src/anndata/_core/raw.py | 7 +++---- src/anndata/_core/sparse_dataset.py | 6 +++--- src/anndata/_core/storage.py | 4 ++-- src/anndata/_io/h5ad.py | 13 +++++++------ src/anndata/_io/specs/methods.py | 15 ++++++++------- src/anndata/compat/__init__.py | 6 +++--- src/anndata/tests/helpers.py | 11 ++++++----- src/anndata/typing.py | 6 ++---- tests/test_base.py | 2 +- tests/test_concatenate.py | 4 +++- tests/test_x.py | 6 +++--- 15 files changed, 64 insertions(+), 66 deletions(-) diff --git a/src/anndata/_core/aligned_mapping.py b/src/anndata/_core/aligned_mapping.py index 9df5ac977..dbe5dbebf 100644 --- a/src/anndata/_core/aligned_mapping.py +++ b/src/anndata/_core/aligned_mapping.py @@ -9,10 +9,9 @@ import numpy as np import pandas as pd -from scipy.sparse import spmatrix from .._warnings import ExperimentalFeatureWarning, ImplicitModificationWarning -from ..compat import AwkArray +from ..compat import AwkArray, SpMatrix from ..utils import ( axis_len, convert_to_dict, @@ -36,7 +35,7 @@ OneDIdx = Sequence[int] | Sequence[bool] | slice TwoDIdx = tuple[OneDIdx, OneDIdx] # TODO: pd.DataFrame only allowed in AxisArrays? -Value = pd.DataFrame | spmatrix | np.ndarray +Value = pd.DataFrame | SpMatrix | np.ndarray P = TypeVar("P", bound="AlignedMappingBase") """Parent mapping an AlignedView is based on.""" diff --git a/src/anndata/_core/anndata.py b/src/anndata/_core/anndata.py index 8a8eaf949..92fd7a2b2 100644 --- a/src/anndata/_core/anndata.py +++ b/src/anndata/_core/anndata.py @@ -221,13 +221,13 @@ class AnnData(metaclass=utils.DeprecationMixinMeta): def __init__( self, - X: np.ndarray | sparse.spmatrix | pd.DataFrame | None = None, + X: ArrayDataStructureType | pd.DataFrame | None = None, obs: pd.DataFrame | Mapping[str, Iterable[Any]] | None = None, var: pd.DataFrame | Mapping[str, Iterable[Any]] | None = None, uns: Mapping[str, Any] | None = None, obsm: np.ndarray | Mapping[str, Sequence[Any]] | None = None, varm: np.ndarray | Mapping[str, Sequence[Any]] | None = None, - layers: Mapping[str, np.ndarray | sparse.spmatrix] | None = None, + layers: Mapping[str, ArrayDataStructureType] | None = None, raw: Mapping[str, Any] | None = None, dtype: np.dtype | type | str | None = None, shape: tuple[int, int] | None = None, @@ -573,7 +573,7 @@ def X(self) -> ArrayDataStructureType | None: # return X @X.setter - def X(self, value: np.ndarray | sparse.spmatrix | SpArray | None): + def X(self, value: ArrayDataStructureType | None): if value is None: if self.isbacked: raise NotImplementedError( @@ -1169,7 +1169,7 @@ def _inplace_subset_obs(self, index: Index1D): self._init_as_actual(adata_subset) # TODO: Update, possibly remove - def __setitem__(self, index: Index, val: float | np.ndarray | sparse.spmatrix): + def __setitem__(self, index: Index, val: ArrayDataStructureType): if self.is_view: raise ValueError("Object is view and cannot be accessed with `[]`.") obs, var = self._normalize_indices(index) diff --git a/src/anndata/_core/index.py b/src/anndata/_core/index.py index f1d72ce0d..44ab83666 100644 --- a/src/anndata/_core/index.py +++ b/src/anndata/_core/index.py @@ -8,9 +8,9 @@ import h5py import numpy as np import pandas as pd -from scipy.sparse import issparse, spmatrix +from scipy.sparse import issparse -from ..compat import AwkArray, DaskArray, SpArray +from ..compat import AwkArray, DaskArray, SpArray, SpMatrix if TYPE_CHECKING: from ..compat import Index, Index1D @@ -69,13 +69,13 @@ def name_idx(i): elif isinstance(indexer, str): return index.get_loc(indexer) # int elif isinstance( - indexer, Sequence | np.ndarray | pd.Index | spmatrix | np.matrix | SpArray + indexer, Sequence | np.ndarray | pd.Index | SpMatrix | np.matrix | SpArray ): if hasattr(indexer, "shape") and ( (indexer.shape == (index.shape[0], 1)) or (indexer.shape == (1, index.shape[0])) ): - if isinstance(indexer, spmatrix | SpArray): + if isinstance(indexer, SpMatrix | SpArray): indexer = indexer.toarray() indexer = np.ravel(indexer) if not isinstance(indexer, np.ndarray | pd.Index): @@ -167,9 +167,9 @@ def _subset_dask(a: DaskArray, subset_idx: Index): return a[subset_idx] -@_subset.register(spmatrix) +@_subset.register(SpMatrix) @_subset.register(SpArray) -def _subset_sparse(a: spmatrix | SpArray, subset_idx: Index): +def _subset_sparse(a: SpMatrix | SpArray, subset_idx: Index): # Correcting for indexing behaviour of sparse.spmatrix if len(subset_idx) > 1 and all(isinstance(x, Iterable) for x in subset_idx): first_idx = subset_idx[0] diff --git a/src/anndata/_core/merge.py b/src/anndata/_core/merge.py index 0dfa5dab2..858c346bc 100644 --- a/src/anndata/_core/merge.py +++ b/src/anndata/_core/merge.py @@ -17,7 +17,6 @@ import pandas as pd from natsort import natsorted from scipy import sparse -from scipy.sparse import spmatrix from anndata._warnings import ExperimentalFeatureWarning @@ -29,6 +28,7 @@ CupySparseMatrix, DaskArray, SpArray, + SpMatrix, _map_cat_to_str, ) from ..utils import asarray, axis_len, warn_once @@ -135,7 +135,7 @@ def equal_dask_array(a, b) -> bool: if isinstance(b, DaskArray): if tokenize(a) == tokenize(b): return True - if isinstance(a._meta, spmatrix): + if isinstance(a._meta, SpMatrix): # TODO: Maybe also do this in the other case? return da.map_blocks(equal, a, b, drop_axis=(0, 1)).all() else: @@ -165,7 +165,7 @@ def equal_series(a, b) -> bool: return a.equals(b) -@equal.register(sparse.spmatrix) +@equal.register(SpMatrix) @equal.register(SpArray) @equal.register(CupySparseMatrix) def equal_sparse(a, b) -> bool: @@ -174,7 +174,7 @@ def equal_sparse(a, b) -> bool: xp = array_api_compat.array_namespace(a.data) - if isinstance(b, CupySparseMatrix | sparse.spmatrix | SpArray): + if isinstance(b, CupySparseMatrix | SpMatrix | SpArray): if isinstance(a, CupySparseMatrix): # Comparison broken for CSC matrices # https://github.com/cupy/cupy/issues/7757 @@ -206,7 +206,7 @@ def equal_awkward(a, b) -> bool: def as_sparse(x, use_sparse_array=False): - if not isinstance(x, sparse.spmatrix | SpArray): + if not isinstance(x, SpMatrix | SpArray): if CAN_USE_SPARSE_ARRAY and use_sparse_array: return sparse.csr_array(x) return sparse.csr_matrix(x) @@ -536,7 +536,7 @@ def apply(self, el, *, axis, fill_value=None): return el if isinstance(el, pd.DataFrame): return self._apply_to_df(el, axis=axis, fill_value=fill_value) - elif isinstance(el, sparse.spmatrix | SpArray | CupySparseMatrix): + elif isinstance(el, SpMatrix | SpArray | CupySparseMatrix): return self._apply_to_sparse(el, axis=axis, fill_value=fill_value) elif isinstance(el, AwkArray): return self._apply_to_awkward(el, axis=axis, fill_value=fill_value) @@ -614,8 +614,8 @@ def _apply_to_array(self, el, *, axis, fill_value=None): ) def _apply_to_sparse( - self, el: sparse.spmatrix | SpArray, *, axis, fill_value=None - ) -> spmatrix: + self, el: SpMatrix | SpArray, *, axis, fill_value=None + ) -> SpMatrix: if isinstance(el, CupySparseMatrix): from cupyx.scipy import sparse else: @@ -724,11 +724,8 @@ def default_fill_value(els): This is largely due to backwards compat, and might not be the ideal solution. """ if any( - isinstance(el, sparse.spmatrix | SpArray) - or ( - isinstance(el, DaskArray) - and isinstance(el._meta, sparse.spmatrix | SpArray) - ) + isinstance(el, SpMatrix | SpArray) + or (isinstance(el, DaskArray) and isinstance(el._meta, SpMatrix | SpArray)) for el in els ): return 0 @@ -828,7 +825,7 @@ def concat_arrays(arrays, reindexers, axis=0, index=None, fill_value=None): ], axis=axis, ) - elif any(isinstance(a, sparse.spmatrix | SpArray) for a in arrays): + elif any(isinstance(a, SpMatrix | SpArray) for a in arrays): sparse_stack = (sparse.vstack, sparse.hstack)[axis] use_sparse_array = any(issubclass(type(a), SpArray) for a in arrays) return sparse_stack( diff --git a/src/anndata/_core/raw.py b/src/anndata/_core/raw.py index d138440b5..7b96cd562 100644 --- a/src/anndata/_core/raw.py +++ b/src/anndata/_core/raw.py @@ -17,8 +17,7 @@ from collections.abc import Mapping, Sequence from typing import ClassVar - from scipy import sparse - + from ..compat import SpMatrix from .aligned_mapping import AxisArraysView from .anndata import AnnData from .sparse_dataset import BaseCompressedSparseDataset @@ -31,7 +30,7 @@ class Raw: def __init__( self, adata: AnnData, - X: np.ndarray | sparse.spmatrix | None = None, + X: np.ndarray | SpMatrix | None = None, var: pd.DataFrame | Mapping[str, Sequence] | None = None, varm: AxisArrays | Mapping[str, np.ndarray] | None = None, ): @@ -66,7 +65,7 @@ def _get_X(self, layer=None): return self.X @property - def X(self) -> BaseCompressedSparseDataset | np.ndarray | sparse.spmatrix: + def X(self) -> BaseCompressedSparseDataset | np.ndarray | SpMatrix: # TODO: Handle unsorted array of integer indices for h5py.Datasets if not self._adata.isbacked: return self._X diff --git a/src/anndata/_core/sparse_dataset.py b/src/anndata/_core/sparse_dataset.py index ae6b47c7f..f25bf5f5e 100644 --- a/src/anndata/_core/sparse_dataset.py +++ b/src/anndata/_core/sparse_dataset.py @@ -28,7 +28,7 @@ from .. import abc from .._settings import settings -from ..compat import H5Group, SpArray, ZarrArray, ZarrGroup, _read_attr +from ..compat import H5Group, SpArray, SpMatrix, ZarrArray, ZarrGroup, _read_attr from .index import _fix_slice_bounds, _subset, unpack_index if TYPE_CHECKING: @@ -312,7 +312,7 @@ def get_memory_class( if format == fmt: if use_sparray_in_io and issubclass(memory_class, SpArray): return memory_class - elif not use_sparray_in_io and issubclass(memory_class, ss.spmatrix): + elif not use_sparray_in_io and issubclass(memory_class, SpMatrix): return memory_class raise ValueError(f"Format string {format} is not supported.") @@ -324,7 +324,7 @@ def get_backed_class( if format == fmt: if use_sparray_in_io and issubclass(backed_class, SpArray): return backed_class - elif not use_sparray_in_io and issubclass(backed_class, ss.spmatrix): + elif not use_sparray_in_io and issubclass(backed_class, SpMatrix): return backed_class raise ValueError(f"Format string {format} is not supported.") diff --git a/src/anndata/_core/storage.py b/src/anndata/_core/storage.py index 9e036ba44..30b7a8b6d 100644 --- a/src/anndata/_core/storage.py +++ b/src/anndata/_core/storage.py @@ -5,9 +5,9 @@ import numpy as np import pandas as pd -from scipy import sparse from .._warnings import ImplicitModificationWarning +from ..compat import SpMatrix from ..utils import ( ensure_df_homogeneous, join_english, @@ -39,7 +39,7 @@ def coerce_array( warnings.warn(msg, ImplicitModificationWarning) value = value.A return value - elif isinstance(value, sparse.spmatrix): + elif isinstance(value, SpMatrix): msg = ( f"AnnData previously had undefined behavior around matrices of type {type(value)}." "In 0.12, passing in this type will throw an error. Please convert to a supported type." diff --git a/src/anndata/_io/h5ad.py b/src/anndata/_io/h5ad.py index edf4977cc..93d9623aa 100644 --- a/src/anndata/_io/h5ad.py +++ b/src/anndata/_io/h5ad.py @@ -18,6 +18,7 @@ from .._core.file_backing import filename from .._core.sparse_dataset import BaseCompressedSparseDataset from ..compat import ( + SpMatrix, _clean_uns, _decode_structured_array, _from_fixed_length_strings, @@ -82,14 +83,14 @@ def write_h5ad( f.attrs.setdefault("encoding-version", "0.1.0") if "X" in as_dense and isinstance( - adata.X, sparse.spmatrix | BaseCompressedSparseDataset + adata.X, SpMatrix | BaseCompressedSparseDataset ): write_sparse_as_dense(f, "X", adata.X, dataset_kwargs=dataset_kwargs) elif not (adata.isbacked and Path(adata.filename) == Path(filepath)): # If adata.isbacked, X should already be up to date write_elem(f, "X", adata.X, dataset_kwargs=dataset_kwargs) if "raw/X" in as_dense and isinstance( - adata.raw.X, sparse.spmatrix | BaseCompressedSparseDataset + adata.raw.X, SpMatrix | BaseCompressedSparseDataset ): write_sparse_as_dense( f, "raw/X", adata.raw.X, dataset_kwargs=dataset_kwargs @@ -115,7 +116,7 @@ def write_h5ad( def write_sparse_as_dense( f: h5py.Group, key: str, - value: sparse.spmatrix | BaseCompressedSparseDataset, + value: SpMatrix | BaseCompressedSparseDataset, *, dataset_kwargs: Mapping[str, Any] = MappingProxyType({}), ): @@ -172,7 +173,7 @@ def read_h5ad( backed: Literal["r", "r+"] | bool | None = None, *, as_sparse: Sequence[str] = (), - as_sparse_fmt: type[sparse.spmatrix] = sparse.csr_matrix, + as_sparse_fmt: type[SpMatrix] = sparse.csr_matrix, chunk_size: int = 6000, # TODO, probably make this 2d chunks ) -> AnnData: """\ @@ -275,7 +276,7 @@ def callback(func, elem_name: str, elem, iospec): def _read_raw( f: h5py.File | AnnDataFileManager, as_sparse: Collection[str] = (), - rdasp: Callable[[h5py.Dataset], sparse.spmatrix] | None = None, + rdasp: Callable[[h5py.Dataset], SpMatrix] | None = None, *, attrs: Collection[str] = ("X", "var", "varm"), ) -> dict: @@ -348,7 +349,7 @@ def read_dataset(dataset: h5py.Dataset): @report_read_key_on_error def read_dense_as_sparse( - dataset: h5py.Dataset, sparse_format: sparse.spmatrix, axis_chunk: int + dataset: h5py.Dataset, sparse_format: SpMatrix, axis_chunk: int ): if sparse_format == sparse.csr_matrix: return read_dense_as_csr(dataset, axis_chunk) diff --git a/src/anndata/_io/specs/methods.py b/src/anndata/_io/specs/methods.py index 582245310..52fd82427 100644 --- a/src/anndata/_io/specs/methods.py +++ b/src/anndata/_io/specs/methods.py @@ -52,7 +52,10 @@ from numpy.typing import NDArray from anndata._types import ArrayStorageType, GroupStorageType - from anndata.compat import SpArray + from anndata.compat import ( + SpArray, + SpMatrix, + ) from anndata.typing import AxisStorable, InMemoryArrayOrScalarType from .registry import Reader, Writer @@ -127,7 +130,7 @@ def wrapper( @_REGISTRY.register_read(H5Array, IOSpec("", "")) def read_basic( elem: H5File | H5Group | H5Array, *, _reader: Reader -) -> dict[str, InMemoryArrayOrScalarType] | npt.NDArray | sparse.spmatrix | SpArray: +) -> dict[str, InMemoryArrayOrScalarType] | npt.NDArray | SpMatrix | SpArray: from anndata._io import h5ad warn( @@ -149,7 +152,7 @@ def read_basic( @_REGISTRY.register_read(ZarrArray, IOSpec("", "")) def read_basic_zarr( elem: ZarrGroup | ZarrArray, *, _reader: Reader -) -> dict[str, InMemoryArrayOrScalarType] | npt.NDArray | sparse.spmatrix | SpArray: +) -> dict[str, InMemoryArrayOrScalarType] | npt.NDArray | SpMatrix | SpArray: from anndata._io import zarr warn( @@ -588,7 +591,7 @@ def write_recarray_zarr( def write_sparse_compressed( f: GroupStorageType, key: str, - value: sparse.spmatrix | SpArray, + value: SpMatrix | SpArray, *, _writer: Writer, fmt: Literal["csr", "csc"], @@ -755,9 +758,7 @@ def chunk_slice(start: int, stop: int) -> tuple[slice | None, slice | None]: @_REGISTRY.register_read(H5Group, IOSpec("csr_matrix", "0.1.0")) @_REGISTRY.register_read(ZarrGroup, IOSpec("csc_matrix", "0.1.0")) @_REGISTRY.register_read(ZarrGroup, IOSpec("csr_matrix", "0.1.0")) -def read_sparse( - elem: GroupStorageType, *, _reader: Reader -) -> sparse.spmatrix | SpArray: +def read_sparse(elem: GroupStorageType, *, _reader: Reader) -> SpMatrix | SpArray: return sparse_dataset(elem).to_memory() diff --git a/src/anndata/compat/__init__.py b/src/anndata/compat/__init__.py index 255ffa548..d6acc90ec 100644 --- a/src/anndata/compat/__init__.py +++ b/src/anndata/compat/__init__.py @@ -18,7 +18,6 @@ import numpy as np import pandas as pd import scipy -import scipy.sparse from packaging.version import Version from .exceptiongroups import add_note # noqa: F401 @@ -32,6 +31,7 @@ CAN_USE_SPARSE_ARRAY = Version(scipy.__version__) >= Version("1.11") +SpMatrix = scipy.sparse.csr_matrix | scipy.sparse.csc_matrix if not CAN_USE_SPARSE_ARRAY: @@ -40,7 +40,7 @@ class SpArray: def __repr__(): return "mock scipy.sparse.sparray" else: - SpArray = scipy.sparse.sparray + SpArray = scipy.sparse.csr_array | scipy.sparse.csc_array class Empty: @@ -56,7 +56,7 @@ class Empty: | tuple[Index1D, Index1D, EllipsisType] | tuple[EllipsisType, Index1D, Index1D] | tuple[Index1D, EllipsisType, Index1D] - | scipy.sparse.spmatrix + | SpMatrix | SpArray ) H5Group = h5py.Group diff --git a/src/anndata/tests/helpers.py b/src/anndata/tests/helpers.py index 6ed637ed8..979a2e27b 100644 --- a/src/anndata/tests/helpers.py +++ b/src/anndata/tests/helpers.py @@ -32,6 +32,7 @@ CupySparseMatrix, DaskArray, SpArray, + SpMatrix, ZarrArray, ) from anndata.utils import asarray @@ -598,7 +599,7 @@ def assert_equal_arrayview(a, b, exact=False, elem_name=None): @assert_equal.register(BaseCompressedSparseDataset) -@assert_equal.register(sparse.spmatrix) +@assert_equal.register(SpMatrix) def assert_equal_sparse(a, b, exact=False, elem_name=None): a = asarray(a) assert_equal(b, a, exact, elem_name=elem_name) @@ -785,7 +786,7 @@ def as_dense_dask_array(a): return da.asarray(a, chunks=_half_chunk_size(a.shape)) -@as_dense_dask_array.register(sparse.spmatrix) +@as_dense_dask_array.register(SpMatrix) def _(a): return as_dense_dask_array(a.toarray()) @@ -802,7 +803,7 @@ def as_sparse_dask_array(a) -> DaskArray: return da.from_array(sparse.csr_matrix(a), chunks=_half_chunk_size(a.shape)) -@as_sparse_dask_array.register(sparse.spmatrix) +@as_sparse_dask_array.register(SpMatrix) def _(a): import dask.array as da @@ -952,7 +953,7 @@ def as_cupy(val, typ=None): if issubclass(typ, CupyArray): import cupy as cp - if isinstance(val, sparse.spmatrix): + if isinstance(val, SpMatrix): val = val.toarray() return cp.array(val) elif issubclass(typ, CupyCSRMatrix): @@ -990,7 +991,7 @@ def shares_memory(x, y) -> bool: return np.shares_memory(x, y) -@shares_memory.register(sparse.spmatrix) +@shares_memory.register(SpMatrix) def shares_memory_sparse(x, y): return ( np.shares_memory(x.data, y.data) diff --git a/src/anndata/typing.py b/src/anndata/typing.py index d13927bad..8012a162d 100644 --- a/src/anndata/typing.py +++ b/src/anndata/typing.py @@ -5,7 +5,6 @@ import numpy as np import pandas as pd from numpy import ma -from scipy import sparse from . import abc from ._core.anndata import AnnData @@ -16,6 +15,7 @@ DaskArray, H5Array, SpArray, + SpMatrix, ZappyArray, ZarrArray, ) @@ -31,12 +31,10 @@ Index = _Index """1D or 2D index an :class:`~anndata.AnnData` object can be sliced with.""" - ArrayDataStructureType: TypeAlias = ( np.ndarray | ma.MaskedArray - | sparse.csr_matrix - | sparse.csc_matrix + | SpMatrix | SpArray | AwkArray | H5Array diff --git a/tests/test_base.py b/tests/test_base.py index e1401ed74..a9f1092a9 100644 --- a/tests/test_base.py +++ b/tests/test_base.py @@ -32,7 +32,7 @@ def test_creation(): AnnData(ma.array([[1, 2], [3, 4]]), uns=dict(mask=[0, 1, 1, 0])) AnnData(sp.eye(2, format="csr")) if CAN_USE_SPARSE_ARRAY: - AnnData(sp.eye_array(2)) + AnnData(sp.eye_array(2, format="csr")) X = np.array([[1, 2, 3], [4, 5, 6]]) adata = AnnData( X=X, diff --git a/tests/test_concatenate.py b/tests/test_concatenate.py index e034debd2..d9f399dd6 100644 --- a/tests/test_concatenate.py +++ b/tests/test_concatenate.py @@ -1044,7 +1044,9 @@ def gen_list(n): def gen_sparse(n): - return sparse.random(np.random.randint(1, 100), np.random.randint(1, 100)) + return sparse.random( + np.random.randint(1, 100), np.random.randint(1, 100), format="csr" + ) def gen_something(n): diff --git a/tests/test_x.py b/tests/test_x.py index 64b1bb87d..42de50b23 100644 --- a/tests/test_x.py +++ b/tests/test_x.py @@ -186,8 +186,8 @@ def test_set_dense_x_view_from_sparse(): def test_warn_on_non_csr_csc_matrix(): X = sparse.eye(100) - with pytest.warns( - FutureWarning, - match=rf"AnnData previously had undefined behavior around matrices of type {type(X)}.*", + with pytest.raises( + ValueError, + match=r"X needs to be of one of", ): ad.AnnData(X=X)