scverse · ivirshup · Feb 15, 2024 · Jan 17, 2024 · Jan 17, 2024 · Jan 17, 2024
diff --git a/.azure-pipelines.yml b/.azure-pipelines.yml
@@ -6,10 +6,9 @@ variables:
   python.version: '3.11'
   PIP_CACHE_DIR: $(Pipeline.Workspace)/.pip
   PYTEST_ADDOPTS: '-v --color=yes --durations=0 --nunit-xml=test-data/test-results.xml'
-  ANNDATA_DEV: no
-  RUN_COVERAGE: no
   TEST_EXTRA: 'test-full'
-  PRERELEASE_DEPENDENCIES: no
+  DEPENDENCIES_VERSION: "latest"  # |"pre-release" | "minimum-version"
+  TEST_TYPE: "standard" # | "coverage"
 
 jobs:
 - job: PyTest
@@ -20,12 +19,16 @@ jobs:
       Python3.9:
         python.version: '3.9'
       Python3.11: {}
-      minimal_tests:
+      minimal_dependencies:
         TEST_EXTRA: 'test-min'
       anndata_dev:
-        ANNDATA_DEV: yes
-        RUN_COVERAGE: yes
-        PRERELEASE_DEPENDENCIES: yes
+        DEPENDENCIES_VERSION: "pre-release"
+        TEST_TYPE: "coverage"
+      minimum_versions:
+        python.version: '3.9'
+        DEPENDENCIES_VERSION: "minimum-version"
+        TEST_TYPE: "coverage"
+
 
   steps:
   - task: UsePythonVersion@0
@@ -52,51 +55,54 @@ jobs:
       pip install wheel coverage
       pip install .[dev,$(TEST_EXTRA)]
     displayName: 'Install dependencies'
-    condition: eq(variables['PRERELEASE_DEPENDENCIES'], 'no')
+    condition: eq(variables['DEPENDENCIES_VERSION'], 'latest')
 
   - script: |
       python -m pip install --pre --upgrade pip
       pip install --pre wheel coverage
       pip install --pre .[dev,$(TEST_EXTRA)]
+      pip install -v "anndata[dev,test] @ git+https://github.com/scverse/anndata"
     displayName: 'Install dependencies release candidates'
-    condition: eq(variables['PRERELEASE_DEPENDENCIES'], 'yes')
+    condition: eq(variables['DEPENDENCIES_VERSION'], 'pre-release')
 
   - script: |
-      pip install -v "anndata[dev,test] @ git+https://github.com/scverse/anndata"
-    displayName: 'Install development anndata'
-    condition: eq(variables['ANNDATA_DEV'], 'yes')
+      python -m pip install pip wheel tomli packaging
+      pip install `python3 ci/scripts/min-deps.py pyproject.toml --extra dev test`
+      pip install --no-deps .
+    displayName: 'Install dependencies minimum version'
+    condition: eq(variables['DEPENDENCIES_VERSION'], 'minimum-version')
 
   - script: |
       pip list
     displayName: 'Display installed versions'
 
   - script: pytest
     displayName: 'PyTest'
-    condition: eq(variables['RUN_COVERAGE'], 'no')
+    condition: eq(variables['TEST_TYPE'], 'standard')
 
   - script: |
       coverage run -m pytest
       coverage xml
     displayName: 'PyTest (coverage)'
-    condition: eq(variables['RUN_COVERAGE'], 'yes')
+    condition: eq(variables['TEST_TYPE'], 'coverage')
 
   - task: PublishCodeCoverageResults@1
     inputs:
       codeCoverageTool: Cobertura
       summaryFileLocation: 'test-data/coverage.xml'
       failIfCoverageEmpty: true
-    condition: eq(variables['RUN_COVERAGE'], 'yes')
+    condition: eq(variables['TEST_TYPE'], 'coverage')
 
   - task: PublishTestResults@2
     condition: succeededOrFailed()
     inputs:
       testResultsFiles: 'test-data/test-results.xml'
       testResultsFormat: NUnit
-      testRunTitle: 'Publish test results for Python $(python.version)'
+      testRunTitle: 'Publish test results for $(Agent.JobName)'
 
   - script: bash <(curl -s https://codecov.io/bash)
     displayName: 'Upload to codecov.io'
-    condition: eq(variables['RUN_COVERAGE'], 'yes')
+    condition: eq(variables['TEST_TYPE'], 'coverage')
 
 - job: CheckBuild
   pool:

diff --git a/ci/scripts/min-deps.py b/ci/scripts/min-deps.py
@@ -0,0 +1,99 @@
+#!python3
+from __future__ import annotations
+
+import argparse
+import sys
+from collections import deque
+from pathlib import Path
+from typing import TYPE_CHECKING
+
+if sys.version_info >= (3, 11):
+    import tomllib
+else:
+    import tomli as tomllib
+
+from packaging.requirements import Requirement
+from packaging.version import Version
+
+if TYPE_CHECKING:
+    from collections.abc import Generator, Iterable
+
+
+def min_dep(req: Requirement) -> Requirement:
+    """
+    Given a requirement, return the minimum version specifier.
+
+    Example
+    -------
+
+    >>> min_dep(Requirement("numpy>=1.0"))
+    "numpy==1.0"
+    """
+    req_name = req.name
+    if req.extras:
+        req_name = f"{req_name}[{','.join(req.extras)}]"
+
+    if not req.specifier:
+        return Requirement(req_name)
+
+    min_version = Version("0.0.0.a1")
+    for spec in req.specifier:
+        if spec.operator in [">", ">=", "~="]:
+            min_version = max(min_version, Version(spec.version))
+        elif spec.operator == "==":
+            min_version = Version(spec.version)
+
+    return Requirement(f"{req_name}=={min_version}.*")
+
+
+def extract_min_deps(
+    dependencies: Iterable[Requirement], *, pyproject
+) -> Generator[Requirement, None, None]:
+    dependencies = deque(dependencies)  # We'll be mutating this
+    project_name = pyproject["project"]["name"]
+
+    while len(dependencies) > 0:
+        req = dependencies.pop()
+
+        # If we are referring to other optional dependency lists, resolve them
+        if req.name == project_name:
+            assert req.extras, f"Project included itself as dependency, without specifying extras: {req}"
+            for extra in req.extras:
+                extra_deps = pyproject["project"]["optional-dependencies"][extra]
+                dependencies += map(Requirement, extra_deps)
+        else:
+            yield min_dep(req)
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        prog="min-deps",
+        description="""Parse a pyproject.toml file and output a list of minimum dependencies.
+
+        Output is directly passable to `pip install`.""",
+        usage="pip install `python min-deps.py pyproject.toml`",
+    )
+    parser.add_argument(
+        "path", type=Path, help="pyproject.toml to parse minimum dependencies from"
+    )
+    parser.add_argument(
+        "--extras", type=str, nargs="*", default=(), help="extras to install"
+    )
+
+    args = parser.parse_args()
+
+    pyproject = tomllib.loads(args.path.read_text())
+
+    project_name = pyproject["project"]["name"]
+    deps = [
+        *map(Requirement, pyproject["project"]["dependencies"]),
+        *(Requirement(f"{project_name}[{extra}]") for extra in args.extras),
+    ]
+
+    min_deps = extract_min_deps(deps, pyproject=pyproject)
+
+    print(" ".join(map(str, min_deps)))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/docs/release-notes/1.10.0.md b/docs/release-notes/1.10.0.md
@@ -35,6 +35,7 @@
 * Fix setting `sc.settings.verbosity` in some cases {pr}`2605` {smaller}`P Angerer`
 * Fix all remaining pandas warnings {pr}`2789` {smaller}`P Angerer`
 * Fix some annoying plotting warnings around violin plots {pr}`2844` {smaller}`P Angerer`
+* Scanpy now has a test job which tests against the minumum versions of the dependencies. In the process of implementing this, many bugs associated with using older versions of `pandas`, `anndata`, `numpy`, and `matplotlib` were fixed. {pr}`2816` {smaller}`I Virshup`
 
 ```{rubric} Ecosystem
 ```

diff --git a/pyproject.toml b/pyproject.toml
@@ -46,24 +46,25 @@ classifiers = [
     "Topic :: Scientific/Engineering :: Visualization",
 ]
 dependencies = [
-    "anndata>=0.7.4",
+    "anndata>=0.8",
     # numpy needs a version due to #1320
-    "numpy>=1.17.0",
+    "numpy>=1.23",
     "matplotlib>=3.6",
-    "pandas >=2.1.3",
-    "scipy>=1.4",
-    "seaborn>=0.13.0",
-    "h5py>=3",
+    "pandas >=1.5",
+    "scipy>=1.8",
+    "seaborn>=0.13",
+    "h5py>=3.1",
     "tqdm",
     "scikit-learn>=0.24",
-    "statsmodels>=0.10.0rc2",
+    "statsmodels>=0.13",
     "patsy",
-    "networkx>=2.3",
+    "networkx>=2.7",
     "natsort",
     "joblib",
-    "numba>=0.41.0",
+    "numba>=0.56",
     "umap-learn>=0.3.10",
-    "packaging",
+    "pynndescent>=0.5",
+    "packaging>=21.3",
     "session-info",
     "legacy-api-wrap>=1.4",  # for positional API deprecations
     "get-annotations; python_version < '3.10'",
@@ -132,8 +133,8 @@ dev = [
 ]
 # Algorithms
 paga = ["igraph"]
-louvain = ["igraph", "louvain>=0.6,!=0.6.2"]  # Louvain community detection
-leiden = ["igraph>=0.10", "leidenalg>=0.9"]  # Leiden community detection
+louvain = ["igraph", "louvain>=0.6.0,!=0.6.2"]  # Louvain community detection
+leiden = ["igraph>=0.10", "leidenalg>=0.9.0"]  # Leiden community detection
 bbknn = ["bbknn"]  # Batch balanced KNN (batch correction)
 magic = ["magic-impute>=2.0"]  # MAGIC imputation method
 skmisc = ["scikit-misc>=0.1.3"]  # highly_variable_genes method 'seurat_v3'
@@ -142,7 +143,7 @@ scanorama = ["scanorama"]  # Scanorama dataset integration
 scrublet = ["scikit-image"]  # Doublet detection with automatic thresholds
 # Acceleration
 rapids = ["cudf>=0.9", "cuml>=0.9", "cugraph>=0.9"]  # GPU accelerated calculation of neighbors
-dask = ["dask[array]!=2.17.0"]  # Use the Dask parallelization engine
+dask = ["dask[array]>=2022.09.2"]  # Use the Dask parallelization engine
 dask-ml = ["dask-ml", "scanpy[dask]"]  # Dask-ML for sklearn-like API
 
 [tool.hatch.build]
@@ -166,6 +167,7 @@ nunit_attach_on = "fail"
 markers = [
     "internet: tests which rely on internet resources (enable with `--internet-tests`)",
     "gpu: tests that use a GPU (currently unused, but needs to be specified here as we import anndata.tests.helpers, which uses it)",
+    "anndata_dask_support: tests that require dask support in anndata",
 ]
 filterwarnings = [
     # legacy-api-wrap: internal use of positional API

diff --git a/scanpy/get/get.py b/scanpy/get/get.py
@@ -260,7 +260,7 @@ def obs_df(
     ... )
     >>> plotdf.columns
     Index(['CD8B', 'n_genes', 'X_umap-0', 'X_umap-1'], dtype='object')
-    >>> plotdf.plot.scatter("X_umap-0", "X_umap-1", c="CD8B")
+    >>> plotdf.plot.scatter("X_umap-0", "X_umap-1", c="CD8B")  # doctest: +SKIP
     <Axes: xlabel='X_umap-0', ylabel='X_umap-1'>
 
     Calculating mean expression for marker genes by cluster:

diff --git a/scanpy/neighbors/_backends/rapids.py b/scanpy/neighbors/_backends/rapids.py
@@ -3,8 +3,9 @@
 from typing import TYPE_CHECKING, Any, Literal
 
 import numpy as np
-from sklearn.base import BaseEstimator, TransformerMixin, check_is_fitted
+from sklearn.base import BaseEstimator, TransformerMixin
 from sklearn.exceptions import NotFittedError
+from sklearn.utils.validation import check_is_fitted
 
 from ..._settings import settings
 from ._common import TransformerChecksMixin

diff --git a/scanpy/plotting/_baseplot_class.py b/scanpy/plotting/_baseplot_class.py
@@ -347,7 +347,7 @@ def add_totals(
         >>> adata = sc.datasets.pbmc68k_reduced()
         >>> markers = {'T-cell': 'CD3D', 'B-cell': 'CD79A', 'myeloid': 'CST3'}
         >>> plot = sc.pl._baseplot_class.BasePlot(adata, markers, groupby='bulk_labels').add_totals()
-        >>> plot.plot_group_extra['counts_df']
+        >>> plot.plot_group_extra['counts_df']  # doctest: +SKIP
         bulk_labels
         CD4+/CD25 T Reg                  68
         CD4+/CD45RA+/CD25- Naive T        8

diff --git a/scanpy/plotting/_matrixplot.py b/scanpy/plotting/_matrixplot.py
@@ -168,7 +168,15 @@ def __init__(
 
         if values_df is None:
             # compute mean value
-            values_df = self.obs_tidy.groupby(level=0, observed=True).mean()
+            values_df = (
+                self.obs_tidy.groupby(level=0, observed=True)
+                .mean()
+                .loc[
+                    self.categories_order
+                    if self.categories_order is not None
+                    else self.categories
+                ]
+            )
 
             if standard_scale == "group":
                 values_df = values_df.sub(values_df.min(1), axis=0)

diff --git a/scanpy/plotting/_stacked_violin.py b/scanpy/plotting/_stacked_violin.py
@@ -383,14 +383,17 @@ def _mainplot(self, ax):
         if self.var_names_idx_order is not None:
             _matrix = _matrix.iloc[:, self.var_names_idx_order]
 
-        if self.categories_order is not None:
-            _matrix.index = _matrix.index.reorder_categories(
-                self.categories_order, ordered=True
-            )
-
         # get mean values for color and transform to color values
         # using colormap
-        _color_df = _matrix.groupby(level=0, observed=True).median()
+        _color_df = (
+            _matrix.groupby(level=0, observed=True)
+            .median()
+            .loc[
+                self.categories_order
+                if self.categories_order is not None
+                else self.categories
+            ]
+        )
         if self.are_axes_swapped:
             _color_df = _color_df.T
 

diff --git a/scanpy/plotting/_tools/scatterplots.py b/scanpy/plotting/_tools/scatterplots.py
@@ -20,6 +20,7 @@
 from matplotlib.colors import Colormap, Normalize
 from matplotlib.figure import Figure  # noqa: TCH002
 from numpy.typing import NDArray  # noqa: TCH002
+from packaging.version import Version
 
 from ... import logging as logg
 from ..._settings import settings
@@ -1247,8 +1248,10 @@
     }
     # If color_map does not have unique values, this can be slow as the
     # result is not categorical
-    color_vector = pd.Categorical(values.map(color_map, na_action="ignore"))
-
+    if Version(pd.__version__) < Version("2.1.0"):
+        color_vector = pd.Categorical(values.map(color_map))
+    else:
+        color_vector = pd.Categorical(values.map(color_map, na_action="ignore"))
     # Set color to 'missing color' for all missing values
     if color_vector.isna().any():
         color_vector = color_vector.add_categories([to_hex(na_color)])

diff --git a/scanpy/preprocessing/_highly_variable_genes.py b/scanpy/preprocessing/_highly_variable_genes.py
@@ -252,6 +252,11 @@ def _highly_variable_genes_single_batch(
     `highly_variable`, `means`, `dispersions`, and `dispersions_norm`.
     """
     X = _get_obs_rep(adata, layer=layer)
+
+    if hasattr(X, "_view_args"):  # AnnData array view
+        # For compatibility with anndata<0.9
+        X = X.copy()  # Doesn't actually copy memory, just removes View class wrapper
+
     if flavor == "seurat":
         X = X.copy()
         if "log1p" in adata.uns_keys() and adata.uns["log1p"].get("base") is not None: