Skip to content

Commit

Permalink
Basic concatenation support + bedtime
Browse files Browse the repository at this point in the history
  • Loading branch information
ivirshup committed Jul 25, 2023
1 parent 55f2c6d commit 1b7c1ed
Show file tree
Hide file tree
Showing 5 changed files with 217 additions and 43 deletions.
159 changes: 148 additions & 11 deletions anndata/_core/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
from scipy.sparse import spmatrix

from .anndata import AnnData
from ..compat import AwkArray, DaskArray
from ..compat import AwkArray, DaskArray, CupySparseMatrix, CupyArray
from ..utils import asarray, dim_len
from .index import _subset, make_slice
from anndata._warnings import ExperimentalFeatureWarning
Expand Down Expand Up @@ -132,24 +132,39 @@ def equal_array(a, b) -> bool:
return equal(pd.DataFrame(a), pd.DataFrame(asarray(b)))


@equal.register(CupyArray)
def equal_cupyarray(a, b) -> bool:
import cupy as cp

return bool(cp.array_equal(a, b, equal_nan=True))


@equal.register(pd.Series)
def equal_series(a, b) -> bool:
return a.equals(b)


@equal.register(sparse.spmatrix)
@equal.register(CupySparseMatrix)
def equal_sparse(a, b) -> bool:
# It's a weird api, don't blame me
if isinstance(b, sparse.spmatrix):
import array_api_compat

xp = array_api_compat.array_namespace(a.data)

if isinstance(b, (CupySparseMatrix, sparse.spmatrix)):
comp = a != b
if isinstance(comp, bool):
return not comp
if isinstance(comp, CupySparseMatrix):
# https://github.com/cupy/cupy/issues/7751
comp = comp.get()
# fmt: off
return (
(len(comp.data) == 0)
or (
np.isnan(a[comp]).all()
and np.isnan(b[comp]).all()
xp.isnan(a[comp]).all()
and xp.isnan(b[comp]).all()
)
)
# fmt: on
Expand All @@ -171,6 +186,17 @@ def as_sparse(x):
return x


def as_cp_sparse(x) -> CupySparseMatrix:
import cupyx.scipy.sparse as cpsparse

if isinstance(x, cpsparse.spmatrix):
return x
elif isinstance(x, np.ndarray):
return cpsparse.csr_matrix(as_sparse(x))
else:
return cpsparse.csr_matrix(x)


def unify_dtypes(dfs: Iterable[pd.DataFrame]) -> list[pd.DataFrame]:
"""
Attempts to unify datatypes from multiple dataframes.
Expand Down Expand Up @@ -268,6 +294,43 @@ def check_combinable_cols(cols: list[pd.Index], join: Literal["inner", "outer"])
)


# TODO: open PR or feature request to cupy
def _cpblock_diag(mats, format=None, dtype=None):
"""
Modified version of scipy.sparse.block_diag for cupy sparse.
"""
import cupy as cp
from cupyx.scipy import sparse as cpsparse

row = []
col = []
data = []
r_idx = 0
c_idx = 0
for a in mats:
# if isinstance(a, (list, numbers.Number)):
# a = cpsparse.coo_matrix(a)
nrows, ncols = a.shape
if cpsparse.issparse(a):
a = a.tocoo()
row.append(a.row + r_idx)
col.append(a.col + c_idx)
data.append(a.data)
else:
a_row, a_col = cp.divmod(cp.arange(nrows * ncols), ncols)
row.append(a_row + r_idx)
col.append(a_col + c_idx)
data.append(a.reshape(-1))
r_idx += nrows
c_idx += ncols
row = cp.concatenate(row)
col = cp.concatenate(col)
data = cp.concatenate(data)
return cpsparse.coo_matrix(
(data, (row, col)), shape=(r_idx, c_idx), dtype=dtype
).asformat(format)


###################
# Per element logic
###################
Expand Down Expand Up @@ -430,6 +493,10 @@ def apply(self, el, *, axis, fill_value=None):
return self._apply_to_awkward(el, axis=axis, fill_value=fill_value)
elif isinstance(el, DaskArray):
return self._apply_to_dask_array(el, axis=axis, fill_value=fill_value)
elif isinstance(el, CupyArray):
return self._apply_to_cupy_array(el, axis=axis, fill_value=fill_value)
elif isinstance(el, CupySparseMatrix):
return self._apply_to_sparse(el, axis=axis, fill_value=fill_value)
else:
return self._apply_to_array(el, axis=axis, fill_value=fill_value)

Expand Down Expand Up @@ -457,6 +524,32 @@ def _apply_to_dask_array(self, el: DaskArray, *, axis, fill_value=None):

return sub_el

def _apply_to_cupy_array(self, el, *, axis, fill_value=None):
import cupy as cp

if fill_value is None:
fill_value = default_fill_value([el])
if el.shape[axis] == 0:
# Presumably faster since it won't allocate the full array
shape = list(el.shape)
shape[axis] = len(self.new_idx)
return cp.broadcast_to(cp.asarray(fill_value), tuple(shape))

old_idx_tuple = [slice(None)] * len(el.shape)
old_idx_tuple[axis] = self.old_pos
old_idx_tuple = tuple(old_idx_tuple)
new_idx_tuple = [slice(None)] * len(el.shape)
new_idx_tuple[axis] = self.new_pos
new_idx_tuple = tuple(new_idx_tuple)

out_shape = list(el.shape)
out_shape[axis] = len(self.new_idx)

out = cp.full(tuple(out_shape), fill_value)
out[new_idx_tuple] = el[old_idx_tuple]

return out

def _apply_to_array(self, el, *, axis, fill_value=None):
if fill_value is None:
fill_value = default_fill_value([el])
Expand All @@ -474,12 +567,20 @@ def _apply_to_array(self, el, *, axis, fill_value=None):
)

def _apply_to_sparse(self, el: spmatrix, *, axis, fill_value=None) -> spmatrix:
if isinstance(el, CupySparseMatrix):
from cupyx.scipy import sparse
else:
from scipy import sparse
import array_api_compat

xp = array_api_compat.array_namespace(el.data)

if fill_value is None:
fill_value = default_fill_value([el])
if fill_value != 0:
to_fill = self.new_idx.get_indexer(self.new_idx.difference(self.old_idx))
else:
to_fill = np.array([])
to_fill = xp.array([])

# Fixing outer indexing for missing values
if el.shape[axis] == 0:
Expand All @@ -489,18 +590,21 @@ def _apply_to_sparse(self, el: spmatrix, *, axis, fill_value=None) -> spmatrix:
if fill_value == 0:
return sparse.csr_matrix(shape)
else:
return np.broadcast_to(fill_value, shape)
return type(el)(xp.broadcast_to(xp.asarray(fill_value), shape))

fill_idxer = None

if len(to_fill) > 0:
idxmtx_dtype = np.promote_types(el.dtype, np.array(fill_value).dtype)
if len(to_fill) > 0 or isinstance(el, CupySparseMatrix):
idxmtx_dtype = xp.promote_types(el.dtype, xp.array(fill_value).dtype)
else:
idxmtx_dtype = bool

if axis == 1:
idxmtx = sparse.coo_matrix(
(np.ones(len(self.new_pos), dtype=bool), (self.old_pos, self.new_pos)),
(
xp.ones(len(self.new_pos), dtype=idxmtx_dtype),
(xp.asarray(self.old_pos), xp.asarray(self.new_pos)),
),
shape=(len(self.old_idx), len(self.new_idx)),
dtype=idxmtx_dtype,
)
Expand All @@ -511,7 +615,10 @@ def _apply_to_sparse(self, el: spmatrix, *, axis, fill_value=None) -> spmatrix:
fill_idxer = (slice(None), to_fill)
elif axis == 0:
idxmtx = sparse.coo_matrix(
(np.ones(len(self.new_pos), dtype=bool), (self.new_pos, self.old_pos)),
(
xp.ones(len(self.new_pos), dtype=idxmtx_dtype),
(xp.asarray(self.new_pos), xp.asarray(self.old_pos)),
),
shape=(len(self.new_idx), len(self.old_idx)),
dtype=idxmtx_dtype,
)
Expand Down Expand Up @@ -626,6 +733,31 @@ def concat_arrays(arrays, reindexers, axis=0, index=None, fill_value=None):
)

return ak.concatenate([f(a) for f, a in zip(reindexers, arrays)], axis=axis)
elif any(isinstance(a, CupySparseMatrix) for a in arrays):
import cupyx.scipy.sparse as cpsparse

sparse_stack = (cpsparse.vstack, cpsparse.hstack)[axis]
return sparse_stack(
[
f(as_cp_sparse(a), axis=1 - axis, fill_value=fill_value)
for f, a in zip(reindexers, arrays)
],
format="csr",
)
elif any(isinstance(a, CupyArray) for a in arrays):
import cupy as cp

if not all(isinstance(a, CupyArray) or 0 in a.shape for a in arrays):
raise NotImplementedError(
"Cannot concatenate a cupy array with other array types."
)
return cp.concatenate(
[
f(cp.asarray(x), fill_value=fill_value, axis=1 - axis)
for f, x in zip(reindexers, arrays)
],
axis=axis,
)
elif any(isinstance(a, sparse.spmatrix) for a in arrays):
sparse_stack = (sparse.vstack, sparse.hstack)[axis]
return sparse_stack(
Expand Down Expand Up @@ -774,7 +906,12 @@ def concat_pairwise_mapping(
m.get(k, sparse.csr_matrix((s, s), dtype=bool))
for m, s in zip(mappings, shapes)
]
result[k] = sparse.block_diag(els, format="csr")
if all(isinstance(el, (CupySparseMatrix, CupyArray)) for el in els):
from cupyx.scipy import sparse as cpsparse

result[k] = _cpblock_diag(els, format="csr")
else:
result[k] = sparse.block_diag(els, format="csr")
return result


Expand Down
41 changes: 40 additions & 1 deletion anndata/tests/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,14 @@
from anndata._core.sparse_dataset import SparseDataset
from anndata._core.aligned_mapping import AlignedMapping
from anndata.utils import asarray
from anndata.compat import AwkArray, DaskArray, CupySparseMatrix, CupyArray
from anndata.compat import (
AwkArray,
DaskArray,
CupySparseMatrix,
CupyArray,
CupyCSCMatrix,
CupyCSRMatrix,
)

# Give this to gen_adata when dask array support is expected.
GEN_ADATA_DASK_ARGS = dict(
Expand Down Expand Up @@ -592,3 +599,35 @@ def as_dense_dask_array(a):
@as_dense_dask_array.register(sparse.spmatrix)
def _(a):
return as_dense_dask_array(a.toarray())


def as_cupy_type(val, typ):
"""
Rough conversion function
"""
if issubclass(typ, CupyArray):
import cupy as cp

if isinstance(val, sparse.spmatrix):
val = val.toarray()
return cp.array(val)
elif issubclass(typ, CupyCSRMatrix):
import cupyx.scipy.sparse as cpsparse
import cupy as cp

if isinstance(val, np.ndarray):
return cpsparse.csr_matrix(cp.array(val))
else:
return cpsparse.csr_matrix(val)
elif issubclass(typ, CupyCSCMatrix):
import cupyx.scipy.sparse as cpsparse
import cupy as cp

if isinstance(val, np.ndarray):
return cpsparse.csr_matrix(cp.array(val))
else:
return cpsparse.csr_matrix(val)
else:
raise NotImplementedError(
f"Conversion from {type(val)} to {typ} not implemented"
)
30 changes: 27 additions & 3 deletions anndata/tests/test_concatenate.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,12 @@
from anndata.tests.helpers import (
assert_equal,
as_dense_dask_array,
as_cupy_type,
gen_adata,
GEN_ADATA_DASK_ARGS,
)
from anndata.utils import asarray
from anndata.compat import DaskArray, AwkArray
from anndata.compat import DaskArray, AwkArray, CupyArray, CupyCSRMatrix, CupyCSCMatrix


@singledispatch
Expand Down Expand Up @@ -72,11 +73,34 @@ def make_idx_tuple(idx, axis):
return tuple(tup)


# TODO: copied from test_views, consider deduplication
_matrix_casting_params = [
pytest.param(asarray, id="np_array"),
pytest.param(sparse.csr_matrix, id="scipy_csr"),
pytest.param(sparse.csc_matrix, id="scipy_csc"),
pytest.param(as_dense_dask_array, id="dask_array"),
]
_cupy_casting_params = [
pytest.param(
partial(as_cupy_type, typ=CupyArray), id="cupy_array", marks=pytest.mark.gpu
),
pytest.param(
partial(as_cupy_type, typ=CupyCSRMatrix),
id="cupy_csr",
marks=pytest.mark.gpu,
),
pytest.param(
partial(as_cupy_type, typ=CupyCSCMatrix),
id="cupy_csc",
marks=pytest.mark.gpu,
),
]


# Will call func(sparse_matrix) so these types should be sparse compatible
# See array_type if only dense arrays are expected as input.
@pytest.fixture(
params=[asarray, sparse.csr_matrix, sparse.csc_matrix, as_dense_dask_array],
ids=["np_array", "scipy_csr", "scipy_csc", "dask_array"],
params=_matrix_casting_params + _cupy_casting_params,
)
def array_type(request):
return request.param
Expand Down
Loading

0 comments on commit 1b7c1ed

Please sign in to comment.