Merge pull request #91 from nf-core/fix-legacy-anndata

Use spatialdata_io.experimental.to_legacy_anndata
nf-core · Oct 26, 2024 · 0b1c35a · 0b1c35a
2 parents aa3472a + a510eec
commit 0b1c35a
Show file tree

Hide file tree

Showing 9 changed files with 65 additions and 102 deletions.
diff --git a/bin/clustering.qmd b/bin/clustering.qmd
@@ -26,6 +26,7 @@ import os
 import scanpy as sc
 import numpy as np
 import pandas as pd
+from spatialdata_io.experimental import to_legacy_anndata
 from anndata import AnnData
 from umap import UMAP
 from matplotlib import pyplot as plt
@@ -35,32 +36,13 @@ from IPython.display import display, Markdown
 ```
 
 ```{python}
-# Make sure we can use scanpy plots with the AnnData object exported from
-# `sdata.tables`. This code is taken from the early version of https://github.com/scverse/spatialdata-io/pull/102/
-# Once that PR is merged into spatialdata-io, we should instead use
-# `spatialdata_io.to_legacy_anndata(sdata)`.
-def to_legacy_anndata(sdata: spatialdata.SpatialData) -> AnnData:
-    adata = sdata.tables["table"]
-    for dataset_id in adata.uns["spatial"]:
-        adata.uns["spatial"][dataset_id]["images"] = {
-            "hires": np.array(sdata.images[f"{dataset_id}_hires_image"]).transpose([1, 2, 0]),
-            "lowres": np.array(sdata.images[f"{dataset_id}_lowres_image"]).transpose([1, 2, 0]),
-        }
-        adata.uns["spatial"][dataset_id]["scalefactors"] = {
-            "tissue_hires_scalef": spatialdata.transformations.get_transformation(
-                sdata.shapes[dataset_id], to_coordinate_system="downscaled_hires"
-            ).scale[0],
-            "tissue_lowres_scalef": spatialdata.transformations.get_transformation(
-                sdata.shapes[dataset_id], to_coordinate_system="downscaled_lowres"
-            ).scale[0],
-            "spot_diameter_fullres": sdata.shapes[dataset_id]["radius"][0] * 2,
-        }
-    return adata
-```
+# Keep only alphanumeric characters, underscores, and hyphens in the sample ID
+sample_id = "".join(
+    filter(lambda x: x.isalnum() or x in ["_", "-"], meta["id"])
+)
 
-```{python}
 sdata = spatialdata.read_zarr(input_sdata, ["images", "tables", "shapes"])
-adata = to_legacy_anndata(sdata)
+adata = to_legacy_anndata(sdata, coordinate_system="downscaled_hires", table_name="table", include_images=True)
 
 print("Content of the SpatialData table object:")
 print(adata)
@@ -151,8 +133,8 @@ spatial coordinates by overlaying the spots on the tissue image itself.
 ```{python}
 #| layout-nrow: 2
 plt.rcParams["figure.figsize"] = (8, 8)
-sc.pl.spatial(adata, img_key="hires", color="total_counts", size=1.25)
-sc.pl.spatial(adata, img_key="hires", color="n_genes_by_counts", size=1.25)
+sc.pl.spatial(adata, img_key="hires", library_id=f"{sample_id}_hires_image", color="total_counts", size=1.25)
+sc.pl.spatial(adata, img_key="hires", library_id=f"{sample_id}_hires_image", color="n_genes_by_counts", size=1.25)
 ```
 
 To gain insights into tissue organization and potential inter-cellular
@@ -164,7 +146,7 @@ organization of cells.
 ```{python}
 # TODO: Can the colour bar on this figure be fit to the figure?
 plt.rcParams["figure.figsize"] = (7, 7)
-sc.pl.spatial(adata, img_key="hires", color="clusters", size=1.25)
+sc.pl.spatial(adata, img_key="hires", library_id=f"{sample_id}_hires_image", color="clusters", size=1.25)
 ```
 
 ```{python}

diff --git a/bin/quality_controls.qmd b/bin/quality_controls.qmd
@@ -46,39 +46,22 @@ import scanpy as sc
 import scipy
 import seaborn as sns
 import spatialdata
+from spatialdata_io.experimental import to_legacy_anndata
 from anndata import AnnData
 from IPython.display import display, Markdown
 from textwrap import dedent
 plt.rcParams["figure.figsize"] = (6, 6)
 ```
 
 ```{python}
-# Make sure we can use scanpy plots with the AnnData object exported from sdata.tables
-# This code is taken from the early version of https://github.com/scverse/spatialdata-io/pull/102/
-# Once the PR will be merged in spatialdata-io, we should use spatialdata_io.to_legacy_anndata(sdata).
-def to_legacy_anndata(sdata: spatialdata.SpatialData) -> AnnData:
-    adata = sdata.tables["table"]
-    for dataset_id in adata.uns["spatial"]:
-        adata.uns["spatial"][dataset_id]["images"] = {
-            "hires": np.array(sdata.images[f"{dataset_id}_hires_image"]).transpose([1, 2, 0]),
-            "lowres": np.array(sdata.images[f"{dataset_id}_lowres_image"]).transpose([1, 2, 0]),
-        }
-        adata.uns["spatial"][dataset_id]["scalefactors"] = {
-            "tissue_hires_scalef": spatialdata.transformations.get_transformation(
-                sdata.shapes[dataset_id], to_coordinate_system="downscaled_hires"
-            ).scale[0],
-            "tissue_lowres_scalef": spatialdata.transformations.get_transformation(
-                sdata.shapes[dataset_id], to_coordinate_system="downscaled_lowres"
-            ).scale[0],
-            "spot_diameter_fullres": sdata.shapes[dataset_id]["radius"][0] * 2,
-        }
-    return adata
-```
+# Keep only alphanumeric characters, underscores, and hyphens in the sample ID
+sample_id = "".join(
+    filter(lambda x: x.isalnum() or x in ["_", "-"], meta["id"])
+)
 
-```{python}
 # Read the data
 sdata = spatialdata.read_zarr(input_sdata, ["images", "tables", "shapes"])
-adata = to_legacy_anndata(sdata)
+adata = to_legacy_anndata(sdata, coordinate_system="downscaled_hires", table_name="table", include_images=True)
 
 # Convert X matrix from CSR to CSC dense matrix for output compatibility
 adata.X = scipy.sparse.csc_matrix(adata.X)
@@ -132,8 +115,8 @@ spatial patterns may be discerned:
 
 ```{python}
 #| layout-nrow: 2
-sc.pl.spatial(adata, color = ["total_counts", "n_genes_by_counts"], size=1.25)
-sc.pl.spatial(adata, color = ["pct_counts_mt", "pct_counts_ribo", "pct_counts_hb"], size=1.25)
+sc.pl.spatial(adata, img_key="hires", library_id=f"{sample_id}_hires_image", color = ["total_counts", "n_genes_by_counts"], size=1.25)
+sc.pl.spatial(adata, img_key="hires", library_id=f"{sample_id}_hires_image", color = ["pct_counts_mt", "pct_counts_ribo", "pct_counts_hb"], size=1.25)
 ```
 
 ## Scatter plots
@@ -169,7 +152,7 @@ are uninformative and are thus removed.
 adata.obs["in_tissue_str"] = ["In tissue" if x == 1 else "Outside tissue" for x in adata.obs["in_tissue"]]
 
 # Plot spots inside tissue
-sc.pl.spatial(adata, color=["in_tissue_str"], title="Spots in tissue", size=1.25)
+sc.pl.spatial(adata, img_key="hires", library_id=f"{sample_id}_hires_image", color=["in_tissue_str"], title="Spots in tissue", size=1.25)
 del adata.obs["in_tissue_str"]
 
 # Remove spots outside tissue and print results
@@ -297,6 +280,9 @@ sc.pl.violin(adata, ['pct_counts_mt', 'pct_counts_ribo', 'pct_counts_hb'],
 ```{python}
 del sdata.tables["table"]
 sdata.tables["table"] = adata
+# Filtering spatialdata elements based on the anndata elements:
+matched_element, _ = spatialdata.match_element_to_table(sdata, sample_id, "table")
+sdata[sample_id] = matched_element[sample_id]
 sdata.write(os.path.join(artifact_dir, output_sdata))
 ```
 

diff --git a/bin/read_data.py b/bin/read_data.py
@@ -24,12 +24,26 @@
         default=None,
         help="Output spatialdata zarr path.",
     )
+    parser.add_argument(
+        "--sampleID",
+        metavar="sampleID",
+        type=str,
+        default=None,
+        help="Sample ID.",
+    )
 
     args = parser.parse_args()
 
+    # Keep only alphanumeric characters, underscores, and hyphens in the sample ID
+    args.sampleID = "".join(
+        filter(lambda x: x.isalnum() or x in ["_", "-"], args.sampleID)
+    )
+
     # Read Visium data
     spatialdata = spatialdata_io.visium(
-        args.SRCountDir, counts_file="raw_feature_bc_matrix.h5", dataset_id="visium"
+        args.SRCountDir,
+        counts_file="raw_feature_bc_matrix.h5",
+        dataset_id=args.sampleID,
     )
 
     # Write raw spatialdata to file

diff --git a/bin/spatially_variable_genes.qmd b/bin/spatially_variable_genes.qmd
@@ -24,38 +24,16 @@ import pandas as pd
 import scanpy as sc
 import squidpy as sq
 import spatialdata
+from spatialdata_io.experimental import to_legacy_anndata
 from anndata import AnnData
 from matplotlib import pyplot as plt
 ```
 
-```{python}
-# Make sure we can use scanpy plots with the AnnData object exported from sdata.tables
-# This code is taken from the early version of https://github.com/scverse/spatialdata-io/pull/102/
-# Once the PR will be merged in spatialdata-io, we should use spatialdata_io.to_legacy_anndata(sdata).
-def to_legacy_anndata(sdata: spatialdata.SpatialData) -> AnnData:
-    adata = sdata.tables["table"]
-    for dataset_id in adata.uns["spatial"]:
-        adata.uns["spatial"][dataset_id]["images"] = {
-            "hires": np.array(sdata.images[f"{dataset_id}_hires_image"]).transpose([1, 2, 0]),
-            "lowres": np.array(sdata.images[f"{dataset_id}_lowres_image"]).transpose([1, 2, 0]),
-        }
-        adata.uns["spatial"][dataset_id]["scalefactors"] = {
-            "tissue_hires_scalef": spatialdata.transformations.get_transformation(
-                sdata.shapes[dataset_id], to_coordinate_system="downscaled_hires"
-            ).scale[0],
-            "tissue_lowres_scalef": spatialdata.transformations.get_transformation(
-                sdata.shapes[dataset_id], to_coordinate_system="downscaled_lowres"
-            ).scale[0],
-            "spot_diameter_fullres": sdata.shapes[dataset_id]["radius"][0] * 2,
-        }
-    return adata
-```
-
 ```{python}
 # Read data
 sdata = spatialdata.read_zarr(input_sdata, ["images", "tables", "shapes"])
+adata = to_legacy_anndata(sdata, coordinate_system="downscaled_hires", table_name="table", include_images=True)
 
-adata = to_legacy_anndata(sdata)
 print("Content of the AnnData object:")
 print(adata)
 

diff --git a/env/environment.yml b/env/environment.yml
@@ -2,19 +2,21 @@ channels:
   - conda-forge
   - bioconda
 dependencies:
-  - python=3.10
+  - gcc=13.2.0
+  - gxx=13.2.0
+  - imagecodecs=2024.1.1
   - jupyter=1.0.0
   - leidenalg=0.9.1
+  - libgdal=3.8.3
   - papermill=2.3.4
   - pip=23.0.1
-  - gcc=13.2.0
-  - libgdal=3.8.3
-  - gxx=13.2.0
-  - imagecodecs=2024.1.1
+  - python=3.10
   - pip:
+      - harmonypy==0.0.10
+      - scanorama==1.7.4
       - scanpy==1.10.0
-      - scipy==1.12.0
       - squidpy==1.4.1
-      - spatialdata==0.1.2
-      - spatialdata-io==0.1.2
-      - spatialdata-plot==0.2.1
+      - scipy==1.12.0
+      - spatialdata-io==0.1.5
+      - spatialdata-plot==0.2.6
+      - spatialdata==0.2.3
diff --git a/modules/local/read_data.nf b/modules/local/read_data.nf
@@ -38,6 +38,7 @@ process READ_DATA {
     # Execute read data script
     read_data.py \\
         --SRCountDir "${meta.id}" \\
+        --sampleID "${meta.id}" \\
         --output_sdata sdata_raw.zarr
 
     cat <<-END_VERSIONS > versions.yml

diff --git a/tests/pipeline/test_downstream.nf.test.snap b/tests/pipeline/test_downstream.nf.test.snap
diff --git a/tests/pipeline/test_spaceranger_ffpe_v1.nf.test.snap b/tests/pipeline/test_spaceranger_ffpe_v1.nf.test.snap
diff --git a/tests/pipeline/test_spaceranger_ffpe_v2_cytassist.nf.test.snap b/tests/pipeline/test_spaceranger_ffpe_v2_cytassist.nf.test.snap