diff --git a/.azure-pipelines.yml b/.azure-pipelines.yml index 4642fe4435..cbe87aadb9 100644 --- a/.azure-pipelines.yml +++ b/.azure-pipelines.yml @@ -6,10 +6,9 @@ variables: python.version: '3.11' PIP_CACHE_DIR: $(Pipeline.Workspace)/.pip PYTEST_ADDOPTS: '-v --color=yes --durations=0 --nunit-xml=test-data/test-results.xml' - ANNDATA_DEV: no - RUN_COVERAGE: no TEST_EXTRA: 'test-full' - PRERELEASE_DEPENDENCIES: no + DEPENDENCIES_VERSION: "latest" # |"pre-release" | "minimum-version" + TEST_TYPE: "standard" # | "coverage" jobs: - job: PyTest @@ -20,12 +19,16 @@ jobs: Python3.9: python.version: '3.9' Python3.11: {} - minimal_tests: + minimal_dependencies: TEST_EXTRA: 'test-min' anndata_dev: - ANNDATA_DEV: yes - RUN_COVERAGE: yes - PRERELEASE_DEPENDENCIES: yes + DEPENDENCIES_VERSION: "pre-release" + TEST_TYPE: "coverage" + minimum_versions: + python.version: '3.9' + DEPENDENCIES_VERSION: "minimum-version" + TEST_TYPE: "coverage" + steps: - task: UsePythonVersion@0 @@ -52,19 +55,22 @@ jobs: pip install wheel coverage pip install .[dev,$(TEST_EXTRA)] displayName: 'Install dependencies' - condition: eq(variables['PRERELEASE_DEPENDENCIES'], 'no') + condition: eq(variables['DEPENDENCIES_VERSION'], 'latest') - script: | python -m pip install --pre --upgrade pip pip install --pre wheel coverage pip install --pre .[dev,$(TEST_EXTRA)] + pip install -v "anndata[dev,test] @ git+https://github.com/scverse/anndata" displayName: 'Install dependencies release candidates' - condition: eq(variables['PRERELEASE_DEPENDENCIES'], 'yes') + condition: eq(variables['DEPENDENCIES_VERSION'], 'pre-release') - script: | - pip install -v "anndata[dev,test] @ git+https://github.com/scverse/anndata" - displayName: 'Install development anndata' - condition: eq(variables['ANNDATA_DEV'], 'yes') + python -m pip install pip wheel tomli packaging pytest-cov + pip install `python3 ci/scripts/min-deps.py pyproject.toml --extra dev test` + pip install --no-deps . + displayName: 'Install dependencies minimum version' + condition: eq(variables['DEPENDENCIES_VERSION'], 'minimum-version') - script: | pip list @@ -72,31 +78,31 @@ jobs: - script: pytest displayName: 'PyTest' - condition: eq(variables['RUN_COVERAGE'], 'no') + condition: eq(variables['TEST_TYPE'], 'standard') - script: | coverage run -m pytest coverage xml displayName: 'PyTest (coverage)' - condition: eq(variables['RUN_COVERAGE'], 'yes') + condition: eq(variables['TEST_TYPE'], 'coverage') - task: PublishCodeCoverageResults@1 inputs: codeCoverageTool: Cobertura summaryFileLocation: 'test-data/coverage.xml' failIfCoverageEmpty: true - condition: eq(variables['RUN_COVERAGE'], 'yes') + condition: eq(variables['TEST_TYPE'], 'coverage') - task: PublishTestResults@2 condition: succeededOrFailed() inputs: testResultsFiles: 'test-data/test-results.xml' testResultsFormat: NUnit - testRunTitle: 'Publish test results for Python $(python.version)' + testRunTitle: 'Publish test results for $(Agent.JobName)' - script: bash <(curl -s https://codecov.io/bash) displayName: 'Upload to codecov.io' - condition: eq(variables['RUN_COVERAGE'], 'yes') + condition: eq(variables['TEST_TYPE'], 'coverage') - job: CheckBuild pool: diff --git a/ci/scripts/min-deps.py b/ci/scripts/min-deps.py new file mode 100755 index 0000000000..b3f393ea57 --- /dev/null +++ b/ci/scripts/min-deps.py @@ -0,0 +1,99 @@ +#!python3 +from __future__ import annotations + +import argparse +import sys +from collections import deque +from pathlib import Path +from typing import TYPE_CHECKING + +if sys.version_info >= (3, 11): + import tomllib +else: + import tomli as tomllib + +from packaging.requirements import Requirement +from packaging.version import Version + +if TYPE_CHECKING: + from collections.abc import Generator, Iterable + + +def min_dep(req: Requirement) -> Requirement: + """ + Given a requirement, return the minimum version specifier. + + Example + ------- + + >>> min_dep(Requirement("numpy>=1.0")) + "numpy==1.0" + """ + req_name = req.name + if req.extras: + req_name = f"{req_name}[{','.join(req.extras)}]" + + if not req.specifier: + return Requirement(req_name) + + min_version = Version("0.0.0.a1") + for spec in req.specifier: + if spec.operator in [">", ">=", "~="]: + min_version = max(min_version, Version(spec.version)) + elif spec.operator == "==": + min_version = Version(spec.version) + + return Requirement(f"{req_name}=={min_version}.*") + + +def extract_min_deps( + dependencies: Iterable[Requirement], *, pyproject +) -> Generator[Requirement, None, None]: + dependencies = deque(dependencies) # We'll be mutating this + project_name = pyproject["project"]["name"] + + while len(dependencies) > 0: + req = dependencies.pop() + + # If we are referring to other optional dependency lists, resolve them + if req.name == project_name: + assert req.extras, f"Project included itself as dependency, without specifying extras: {req}" + for extra in req.extras: + extra_deps = pyproject["project"]["optional-dependencies"][extra] + dependencies += map(Requirement, extra_deps) + else: + yield min_dep(req) + + +def main(): + parser = argparse.ArgumentParser( + prog="min-deps", + description="""Parse a pyproject.toml file and output a list of minimum dependencies. + + Output is directly passable to `pip install`.""", + usage="pip install `python min-deps.py pyproject.toml`", + ) + parser.add_argument( + "path", type=Path, help="pyproject.toml to parse minimum dependencies from" + ) + parser.add_argument( + "--extras", type=str, nargs="*", default=(), help="extras to install" + ) + + args = parser.parse_args() + + pyproject = tomllib.loads(args.path.read_text()) + + project_name = pyproject["project"]["name"] + deps = [ + *map(Requirement, pyproject["project"]["dependencies"]), + *(Requirement(f"{project_name}[{extra}]") for extra in args.extras), + ] + + min_deps = extract_min_deps(deps, pyproject=pyproject) + + print(" ".join(map(str, min_deps))) + + +if __name__ == "__main__": + main() diff --git a/docs/release-notes/1.10.0.md b/docs/release-notes/1.10.0.md index 37a6adf97f..7d48d4fd9d 100644 --- a/docs/release-notes/1.10.0.md +++ b/docs/release-notes/1.10.0.md @@ -35,6 +35,7 @@ * Fix setting `sc.settings.verbosity` in some cases {pr}`2605` {smaller}`P Angerer` * Fix all remaining pandas warnings {pr}`2789` {smaller}`P Angerer` * Fix some annoying plotting warnings around violin plots {pr}`2844` {smaller}`P Angerer` +* Scanpy now has a test job which tests against the minumum versions of the dependencies. In the process of implementing this, many bugs associated with using older versions of `pandas`, `anndata`, `numpy`, and `matplotlib` were fixed. {pr}`2816` {smaller}`I Virshup` ```{rubric} Ecosystem ``` diff --git a/pyproject.toml b/pyproject.toml index bd5e2b5215..fda07ea18b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,24 +46,25 @@ classifiers = [ "Topic :: Scientific/Engineering :: Visualization", ] dependencies = [ - "anndata>=0.7.4", + "anndata>=0.8", # numpy needs a version due to #1320 - "numpy>=1.17.0", + "numpy>=1.23", "matplotlib>=3.6", - "pandas >=2.1.3", - "scipy>=1.4", - "seaborn>=0.13.0", - "h5py>=3", + "pandas >=1.5", + "scipy>=1.8", + "seaborn>=0.13", + "h5py>=3.1", "tqdm", "scikit-learn>=0.24", - "statsmodels>=0.10.0rc2", + "statsmodels>=0.13", "patsy", - "networkx>=2.3", + "networkx>=2.7", "natsort", "joblib", - "numba>=0.41.0", + "numba>=0.56", "umap-learn>=0.3.10", - "packaging", + "pynndescent>=0.5", + "packaging>=21.3", "session-info", "legacy-api-wrap>=1.4", # for positional API deprecations "get-annotations; python_version < '3.10'", @@ -132,8 +133,8 @@ dev = [ ] # Algorithms paga = ["igraph"] -louvain = ["igraph", "louvain>=0.6,!=0.6.2"] # Louvain community detection -leiden = ["igraph>=0.10", "leidenalg>=0.9"] # Leiden community detection +louvain = ["igraph", "louvain>=0.6.0,!=0.6.2"] # Louvain community detection +leiden = ["igraph>=0.10", "leidenalg>=0.9.0"] # Leiden community detection bbknn = ["bbknn"] # Batch balanced KNN (batch correction) magic = ["magic-impute>=2.0"] # MAGIC imputation method skmisc = ["scikit-misc>=0.1.3"] # highly_variable_genes method 'seurat_v3' @@ -142,7 +143,7 @@ scanorama = ["scanorama"] # Scanorama dataset integration scrublet = ["scikit-image"] # Doublet detection with automatic thresholds # Acceleration rapids = ["cudf>=0.9", "cuml>=0.9", "cugraph>=0.9"] # GPU accelerated calculation of neighbors -dask = ["dask[array]!=2.17.0"] # Use the Dask parallelization engine +dask = ["dask[array]>=2022.09.2"] # Use the Dask parallelization engine dask-ml = ["dask-ml", "scanpy[dask]"] # Dask-ML for sklearn-like API [tool.hatch.build] @@ -166,6 +167,7 @@ nunit_attach_on = "fail" markers = [ "internet: tests which rely on internet resources (enable with `--internet-tests`)", "gpu: tests that use a GPU (currently unused, but needs to be specified here as we import anndata.tests.helpers, which uses it)", + "anndata_dask_support: tests that require dask support in anndata", ] filterwarnings = [ # legacy-api-wrap: internal use of positional API diff --git a/scanpy/get/get.py b/scanpy/get/get.py index b51dc30a26..8ef58f4b51 100644 --- a/scanpy/get/get.py +++ b/scanpy/get/get.py @@ -260,7 +260,7 @@ def obs_df( ... ) >>> plotdf.columns Index(['CD8B', 'n_genes', 'X_umap-0', 'X_umap-1'], dtype='object') - >>> plotdf.plot.scatter("X_umap-0", "X_umap-1", c="CD8B") + >>> plotdf.plot.scatter("X_umap-0", "X_umap-1", c="CD8B") # doctest: +SKIP Calculating mean expression for marker genes by cluster: diff --git a/scanpy/neighbors/_backends/rapids.py b/scanpy/neighbors/_backends/rapids.py index 78a6bb7359..ef6b9b23f1 100644 --- a/scanpy/neighbors/_backends/rapids.py +++ b/scanpy/neighbors/_backends/rapids.py @@ -3,8 +3,9 @@ from typing import TYPE_CHECKING, Any, Literal import numpy as np -from sklearn.base import BaseEstimator, TransformerMixin, check_is_fitted +from sklearn.base import BaseEstimator, TransformerMixin from sklearn.exceptions import NotFittedError +from sklearn.utils.validation import check_is_fitted from ..._settings import settings from ._common import TransformerChecksMixin diff --git a/scanpy/plotting/_baseplot_class.py b/scanpy/plotting/_baseplot_class.py index 22ff04913e..10329f16e4 100644 --- a/scanpy/plotting/_baseplot_class.py +++ b/scanpy/plotting/_baseplot_class.py @@ -347,7 +347,7 @@ def add_totals( >>> adata = sc.datasets.pbmc68k_reduced() >>> markers = {'T-cell': 'CD3D', 'B-cell': 'CD79A', 'myeloid': 'CST3'} >>> plot = sc.pl._baseplot_class.BasePlot(adata, markers, groupby='bulk_labels').add_totals() - >>> plot.plot_group_extra['counts_df'] + >>> plot.plot_group_extra['counts_df'] # doctest: +SKIP bulk_labels CD4+/CD25 T Reg 68 CD4+/CD45RA+/CD25- Naive T 8 diff --git a/scanpy/plotting/_matrixplot.py b/scanpy/plotting/_matrixplot.py index d811a12a8a..a65332be65 100644 --- a/scanpy/plotting/_matrixplot.py +++ b/scanpy/plotting/_matrixplot.py @@ -168,7 +168,15 @@ def __init__( if values_df is None: # compute mean value - values_df = self.obs_tidy.groupby(level=0, observed=True).mean() + values_df = ( + self.obs_tidy.groupby(level=0, observed=True) + .mean() + .loc[ + self.categories_order + if self.categories_order is not None + else self.categories + ] + ) if standard_scale == "group": values_df = values_df.sub(values_df.min(1), axis=0) diff --git a/scanpy/plotting/_stacked_violin.py b/scanpy/plotting/_stacked_violin.py index fed4893cde..6b9abfd49c 100644 --- a/scanpy/plotting/_stacked_violin.py +++ b/scanpy/plotting/_stacked_violin.py @@ -383,14 +383,17 @@ def _mainplot(self, ax): if self.var_names_idx_order is not None: _matrix = _matrix.iloc[:, self.var_names_idx_order] - if self.categories_order is not None: - _matrix.index = _matrix.index.reorder_categories( - self.categories_order, ordered=True - ) - # get mean values for color and transform to color values # using colormap - _color_df = _matrix.groupby(level=0, observed=True).median() + _color_df = ( + _matrix.groupby(level=0, observed=True) + .median() + .loc[ + self.categories_order + if self.categories_order is not None + else self.categories + ] + ) if self.are_axes_swapped: _color_df = _color_df.T diff --git a/scanpy/plotting/_tools/scatterplots.py b/scanpy/plotting/_tools/scatterplots.py index b1b8937ff5..59c828d517 100644 --- a/scanpy/plotting/_tools/scatterplots.py +++ b/scanpy/plotting/_tools/scatterplots.py @@ -20,6 +20,7 @@ from matplotlib.colors import Colormap, Normalize from matplotlib.figure import Figure # noqa: TCH002 from numpy.typing import NDArray # noqa: TCH002 +from packaging.version import Version from ... import logging as logg from ..._settings import settings @@ -1247,8 +1248,10 @@ def _color_vector( } # If color_map does not have unique values, this can be slow as the # result is not categorical - color_vector = pd.Categorical(values.map(color_map, na_action="ignore")) - + if Version(pd.__version__) < Version("2.1.0"): + color_vector = pd.Categorical(values.map(color_map)) + else: + color_vector = pd.Categorical(values.map(color_map, na_action="ignore")) # Set color to 'missing color' for all missing values if color_vector.isna().any(): color_vector = color_vector.add_categories([to_hex(na_color)]) diff --git a/scanpy/preprocessing/_highly_variable_genes.py b/scanpy/preprocessing/_highly_variable_genes.py index e6f925a168..0dd2a33c51 100644 --- a/scanpy/preprocessing/_highly_variable_genes.py +++ b/scanpy/preprocessing/_highly_variable_genes.py @@ -252,6 +252,11 @@ def _highly_variable_genes_single_batch( `highly_variable`, `means`, `dispersions`, and `dispersions_norm`. """ X = _get_obs_rep(adata, layer=layer) + + if hasattr(X, "_view_args"): # AnnData array view + # For compatibility with anndata<0.9 + X = X.copy() # Doesn't actually copy memory, just removes View class wrapper + if flavor == "seurat": X = X.copy() if "log1p" in adata.uns_keys() and adata.uns["log1p"].get("base") is not None: diff --git a/scanpy/preprocessing/_pca.py b/scanpy/preprocessing/_pca.py index 53377f1321..df88b57b19 100644 --- a/scanpy/preprocessing/_pca.py +++ b/scanpy/preprocessing/_pca.py @@ -3,6 +3,7 @@ import warnings from warnings import warn +import anndata as ad import numpy as np from anndata import AnnData from packaging import version @@ -188,6 +189,19 @@ def pca( X = _get_obs_rep(adata_comp, layer=layer) + # See: https://github.com/scverse/scanpy/pull/2816#issuecomment-1932650529 + if ( + version.parse(ad.__version__) < version.parse("0.9") + and mask is not None + and isinstance(X, np.ndarray) + ): + warnings.warn( + "When using a mask parameter with anndata<0.9 on a dense array, the PCA" + "can have slightly different results due the array being column major " + "instead of row major.", + UserWarning, + ) + is_dask = isinstance(X, DaskArray) # check_random_state returns a numpy RandomState when passed an int but diff --git a/scanpy/testing/_helpers/__init__.py b/scanpy/testing/_helpers/__init__.py index 939ce60c53..c36ed5a2cb 100644 --- a/scanpy/testing/_helpers/__init__.py +++ b/scanpy/testing/_helpers/__init__.py @@ -20,9 +20,24 @@ # These functions can be used to check that functions are correctly using arugments like `layers`, `obsm`, etc. +def anndata_v0_8_constructor_compat(X, *args, **kwargs): + """Constructor for anndata that uses dtype of X for test compatibility with older versions of AnnData. + + Once the minimum version of AnnData is 0.9, this function can be replaced with the default constructor. + """ + import anndata as ad + from packaging.version import Version + + if Version(ad.__version__) < Version("0.9"): + return ad.AnnData(X=X, *args, **kwargs, dtype=X.dtype) + else: + return ad.AnnData(X=X, *args, **kwargs) + + def check_rep_mutation(func, X, *, fields=("layer", "obsm"), **kwargs): """Check that only the array meant to be modified is modified.""" - adata = sc.AnnData(X=X.copy()) + adata = anndata_v0_8_constructor_compat(X.copy()) + for field in fields: sc.get._set_obs_rep(adata, X, **{field: field}) X_array = asarray(X) @@ -105,3 +120,16 @@ def _check_check_values_warnings(function, adata, expected_warning, kwargs={}): function(adata.copy(), **kwargs, check_values=True) warning_msgs = [w.message.args[0] for w in record] assert expected_warning in warning_msgs + + +# Delayed imports for case where we aren't using dask +def as_dense_dask_array(*args, **kwargs): + from anndata.tests.helpers import as_dense_dask_array + + return as_dense_dask_array(*args, **kwargs) + + +def as_sparse_dask_array(*args, **kwargs): + from anndata.tests.helpers import as_sparse_dask_array + + return as_sparse_dask_array(*args, **kwargs) diff --git a/scanpy/testing/_pytest/__init__.py b/scanpy/testing/_pytest/__init__.py index 1dfc3f78d2..ad42b8ec74 100644 --- a/scanpy/testing/_pytest/__init__.py +++ b/scanpy/testing/_pytest/__init__.py @@ -3,6 +3,7 @@ import os import sys +import warnings from typing import TYPE_CHECKING import pytest @@ -71,10 +72,12 @@ def _fix_dask_df_warning(): import dask # noqa: F401 except ImportError: return - with pytest.warns( - DeprecationWarning, - match=r"The current Dask DataFrame implementation is deprecated", - ): + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + category=DeprecationWarning, + message=r"The current Dask DataFrame implementation is deprecated", + ) import dask.dataframe # noqa: F401 @@ -118,3 +121,18 @@ def _modify_doctests(request: pytest.FixtureRequest) -> None: skip_reason: str | None if skip_reason := getattr(func, "_doctest_skip_reason", None): pytest.skip(reason=skip_reason) + + +def pytest_itemcollected(item: pytest.Item) -> None: + # Dask AnnData tests require anndata > 0.10 + import anndata + from packaging.version import Version + + requires_anndata_dask_support = ( + len([mark for mark in item.iter_markers(name="anndata_dask_support")]) > 0 + ) + + if requires_anndata_dask_support and Version(anndata.__version__) < Version("0.10"): + item.add_marker( + pytest.mark.skip(reason="dask support requires anndata version > 0.10") + ) diff --git a/scanpy/testing/_pytest/params.py b/scanpy/testing/_pytest/params.py index 3ff543bfa7..655daf6c7a 100644 --- a/scanpy/testing/_pytest/params.py +++ b/scanpy/testing/_pytest/params.py @@ -5,9 +5,10 @@ from typing import TYPE_CHECKING, Literal import pytest -from anndata.tests.helpers import as_dense_dask_array, as_sparse_dask_array, asarray +from anndata.tests.helpers import asarray from scipy import sparse +from .._helpers import as_dense_dask_array, as_sparse_dask_array from .._pytest.marks import needs if TYPE_CHECKING: @@ -35,10 +36,18 @@ def param_with( pytest.param(sparse.csc_matrix, id="scipy_csc"), ), ("dask", "dense"): ( - pytest.param(as_dense_dask_array, marks=[needs.dask], id="dask_array_dense"), + pytest.param( + as_dense_dask_array, + marks=[needs.dask, pytest.mark.anndata_dask_support], + id="dask_array_dense", + ), ), ("dask", "sparse"): ( - pytest.param(as_sparse_dask_array, marks=[needs.dask], id="dask_array_sparse"), + pytest.param( + as_sparse_dask_array, + marks=[needs.dask, pytest.mark.anndata_dask_support], + id="dask_array_sparse", + ), # probably not necessary to also do csc ), } diff --git a/scanpy/tests/test_get.py b/scanpy/tests/test_get.py index bbdea8ce3d..b55064088a 100644 --- a/scanpy/tests/test_get.py +++ b/scanpy/tests/test_get.py @@ -11,6 +11,7 @@ import scanpy as sc from scanpy.datasets._utils import filter_oldformatwarning +from scanpy.testing._helpers import anndata_v0_8_constructor_compat from scanpy.testing._helpers.data import pbmc68k_reduced @@ -38,7 +39,7 @@ def adata(): adata.X is np.ones((2, 2)) adata.layers['double'] is sparse np.ones((2,2)) * 2 to also test sparse matrices """ - return AnnData( + return anndata_v0_8_constructor_compat( X=np.ones((2, 2), dtype=int), obs=pd.DataFrame( {"obs1": [0, 1], "obs2": ["a", "b"]}, index=["cell1", "cell2"] @@ -60,7 +61,7 @@ def test_obs_df(adata): adata.obsm["sparse"] = sparse.csr_matrix(np.eye(2), dtype="float64") # make raw with different genes than adata - adata.raw = AnnData( + adata.raw = anndata_v0_8_constructor_compat( X=np.array([[1, 2, 3], [2, 4, 6]], dtype=np.float64), var=pd.DataFrame( {"gene_symbols": ["raw1", "raw2", "raw3"]}, diff --git a/scanpy/tests/test_metrics.py b/scanpy/tests/test_metrics.py index 8d0acf3c33..226aecd7f6 100644 --- a/scanpy/tests/test_metrics.py +++ b/scanpy/tests/test_metrics.py @@ -5,10 +5,12 @@ from operator import eq from string import ascii_letters +import numba import numpy as np import pandas as pd import pytest import threadpoolctl +from packaging.version import Version from scipy import sparse import scanpy as sc @@ -77,7 +79,10 @@ def test_consistency(metric, threading): all_genes = metric(pbmc, layer="raw") first_gene = metric(pbmc, vals=pbmc.obs_vector(pbmc.var_names[0], layer="raw")) - np.testing.assert_allclose(all_genes[0], first_gene, rtol=1e-9) + if Version(numba.__version__) < Version("0.57"): + np.testing.assert_allclose(all_genes[0], first_gene, rtol=1e-5) + else: + np.testing.assert_allclose(all_genes[0], first_gene, rtol=1e-9) # Test that results are similar for sparse and dense reps of same data equality_check( diff --git a/scanpy/tests/test_neighbors.py b/scanpy/tests/test_neighbors.py index dbefe5a006..d623682ff3 100644 --- a/scanpy/tests/test_neighbors.py +++ b/scanpy/tests/test_neighbors.py @@ -11,6 +11,7 @@ import scanpy as sc from scanpy import Neighbors +from scanpy.testing._helpers import anndata_v0_8_constructor_compat if TYPE_CHECKING: from pytest_mock import MockerFixture @@ -113,7 +114,7 @@ def get_neighbors() -> Neighbors: - return Neighbors(AnnData(np.array(X))) + return Neighbors(anndata_v0_8_constructor_compat(np.array(X))) @pytest.fixture diff --git a/scanpy/tests/test_pca.py b/scanpy/tests/test_pca.py index 6e529b4320..c9b6d913c2 100644 --- a/scanpy/tests/test_pca.py +++ b/scanpy/tests/test_pca.py @@ -3,19 +3,20 @@ import warnings from typing import Literal +import anndata as ad import numpy as np import pytest from anndata import AnnData from anndata.tests.helpers import ( - as_dense_dask_array, - as_sparse_dask_array, asarray, assert_equal, ) +from packaging.version import Version from scipy import sparse from sklearn.utils import issparse import scanpy as sc +from scanpy.testing._helpers import as_dense_dask_array, as_sparse_dask_array from scanpy.testing._helpers.data import pbmc3k_normalized from scanpy.testing._pytest.marks import needs from scanpy.testing._pytest.params import ARRAY_TYPES, ARRAY_TYPES_SUPPORTED, param_with @@ -337,12 +338,20 @@ def test_mask_argument_equivalence(float_dtype, array_type): ) -def test_mask(array_type): +def test_mask(array_type, request): if array_type is as_dense_dask_array: pytest.xfail("TODO: Dask arrays are not supported") adata = sc.datasets.blobs(n_variables=10, n_centers=3, n_observations=100) adata.X = array_type(adata.X) + if isinstance(adata.X, np.ndarray) and Version(ad.__version__) < Version("0.9"): + request.node.add_marker( + pytest.mark.xfail( + reason="TODO: Previous version of anndata would return an F ordered array for one" + " case here, which suprisingly considerably changes the results of PCA. " + ) + ) + mask = np.random.choice([True, False], adata.shape[1]) adata_masked = adata[:, mask].copy() @@ -359,6 +368,24 @@ def test_mask(array_type): ) +def test_mask_order_warning(request): + if Version(ad.__version__) >= Version("0.9"): + request.node.add_marker( + pytest.mark.xfail( + reason="Not expected to warn in later versions of anndata" + ) + ) + + adata = ad.AnnData(X=np.random.randn(50, 5)) + mask = np.array([True, False, True, False, True]) + + with pytest.warns( + UserWarning, + match="When using a mask parameter with anndata<0.9 on a dense array", + ): + sc.pp.pca(adata, mask=mask) + + def test_mask_defaults(array_type, float_dtype): """ Test if pca result is equal without highly variable and with-but mask is None diff --git a/scanpy/tests/test_preprocessing.py b/scanpy/tests/test_preprocessing.py index 3029068776..5ae3b3e08f 100644 --- a/scanpy/tests/test_preprocessing.py +++ b/scanpy/tests/test_preprocessing.py @@ -11,7 +11,11 @@ from scipy import sparse as sp import scanpy as sc -from scanpy.testing._helpers import check_rep_mutation, check_rep_results +from scanpy.testing._helpers import ( + anndata_v0_8_constructor_compat, + check_rep_mutation, + check_rep_results, +) from scanpy.testing._helpers.data import pbmc3k, pbmc68k_reduced from scanpy.testing._pytest.params import ARRAY_TYPES_SUPPORTED @@ -187,11 +191,11 @@ def test_scale_array(count_matrix_format, zero_center): Test that running sc.pp.scale on an anndata object and an array returns the same results. """ X = count_matrix_format(sp.random(100, 200, density=0.3).toarray()) - adata = sc.AnnData(X=X.copy().astype(np.float64)) + adata = anndata_v0_8_constructor_compat(X=X.copy()) sc.pp.scale(adata, zero_center=zero_center) scaled_X = sc.pp.scale(X, zero_center=zero_center, copy=True) - assert np.array_equal(asarray(scaled_X), asarray(adata.X)) + np.testing.assert_equal(asarray(scaled_X), asarray(adata.X)) def test_recipe_plotting(): @@ -314,7 +318,7 @@ def test_downsample_counts_per_cell(count_matrix_format, replace, dtype): TARGET = 1000 X = np.random.randint(0, 100, (1000, 100)) * np.random.binomial(1, 0.3, (1000, 100)) X = X.astype(dtype) - adata = AnnData(X=count_matrix_format(X).astype(dtype)) + adata = anndata_v0_8_constructor_compat(X=count_matrix_format(X).astype(dtype)) with pytest.raises(ValueError): sc.pp.downsample_counts( adata, counts_per_cell=TARGET, total_counts=TARGET, replace=replace @@ -346,7 +350,7 @@ def test_downsample_counts_per_cell_multiple_targets( TARGETS = np.random.randint(500, 1500, 1000) X = np.random.randint(0, 100, (1000, 100)) * np.random.binomial(1, 0.3, (1000, 100)) X = X.astype(dtype) - adata = AnnData(X=count_matrix_format(X).astype(dtype)) + adata = anndata_v0_8_constructor_compat(X=count_matrix_format(X).astype(dtype)) initial_totals = np.ravel(adata.X.sum(axis=1)) with pytest.raises(ValueError): sc.pp.downsample_counts(adata, counts_per_cell=[40, 10], replace=replace) @@ -372,7 +376,7 @@ def test_downsample_counts_per_cell_multiple_targets( def test_downsample_total_counts(count_matrix_format, replace, dtype): X = np.random.randint(0, 100, (1000, 100)) * np.random.binomial(1, 0.3, (1000, 100)) X = X.astype(dtype) - adata_orig = AnnData(X=count_matrix_format(X)) + adata_orig = anndata_v0_8_constructor_compat(X=count_matrix_format(X)) total = X.sum() target = np.floor_divide(total, 10) initial_totals = np.ravel(adata_orig.X.sum(axis=1)) diff --git a/scanpy/tests/test_preprocessing_distributed.py b/scanpy/tests/test_preprocessing_distributed.py index 6dfa78459d..5f7a7bf443 100644 --- a/scanpy/tests/test_preprocessing_distributed.py +++ b/scanpy/tests/test_preprocessing_distributed.py @@ -4,9 +4,10 @@ import numpy.testing as npt import pytest -from anndata import AnnData, OldFormatWarning, read_zarr +from anndata import AnnData, read_zarr from scanpy._compat import DaskArray, ZappyArray +from scanpy.datasets._utils import filter_oldformatwarning from scanpy.preprocessing import ( filter_cells, filter_genes, @@ -27,24 +28,24 @@ @pytest.fixture() +@filter_oldformatwarning def adata() -> AnnData: - with pytest.warns(OldFormatWarning): - a = read_zarr(input_file) # regular anndata + a = read_zarr(input_file) a.var_names_make_unique() a.X = a.X[:] # convert to numpy array return a +@filter_oldformatwarning @pytest.fixture( params=[ pytest.param("direct", marks=[needs.zappy]), - pytest.param("dask", marks=[needs.dask]), + pytest.param("dask", marks=[needs.dask, pytest.mark.anndata_dask_support]), ] ) def adata_dist(request: pytest.FixtureRequest) -> AnnData: # regular anndata except for X, which we replace on the next line - with pytest.warns(OldFormatWarning): - a = read_zarr(input_file) + a = read_zarr(input_file) a.var_names_make_unique() a.uns["dist-mode"] = request.param input_file_X = f"{input_file}/X" @@ -133,6 +134,7 @@ def test_filter_genes(adata: AnnData, adata_dist: AnnData): npt.assert_allclose(result, adata.X) +@filter_oldformatwarning def test_write_zarr(adata: AnnData, adata_dist: AnnData): import zarr @@ -153,7 +155,7 @@ def test_write_zarr(adata: AnnData, adata_dist: AnnData): assert False, "add branch for new dist-mode" # read back as zarr directly and check it is the same as adata.X - with pytest.warns(OldFormatWarning, match="without encoding metadata"): - adata_log1p = read_zarr(temp_store) + adata_log1p = read_zarr(temp_store) + log1p(adata) npt.assert_allclose(adata_log1p.X, adata.X) diff --git a/scanpy/tools/_dendrogram.py b/scanpy/tools/_dendrogram.py index 31fe6564fc..68902a6632 100644 --- a/scanpy/tools/_dendrogram.py +++ b/scanpy/tools/_dendrogram.py @@ -111,7 +111,7 @@ def dendrogram( >>> import scanpy as sc >>> adata = sc.datasets.pbmc68k_reduced() >>> sc.tl.dendrogram(adata, groupby='bulk_labels') - >>> sc.pl.dendrogram(adata, groupby='bulk_labels') + >>> sc.pl.dendrogram(adata, groupby='bulk_labels') # doctest: +SKIP >>> markers = ['C1QA', 'PSAP', 'CD79A', 'CD79B', 'CST3', 'LYZ'] >>> sc.pl.dotplot(adata, markers, groupby='bulk_labels', dendrogram=True) @@ -155,7 +155,11 @@ def dendrogram( ) # aggregate values within categories using 'mean' - mean_df = rep_df.groupby(level=0, observed=True).mean() + mean_df = ( + rep_df.groupby(level=0, observed=True) + .mean() + .loc[categories] # Fixed ordering for pandas < 2 + ) import scipy.cluster.hierarchy as sch from scipy.spatial import distance