diff --git a/bin/clustering.qmd b/bin/clustering.qmd index 8c62585..552c50c 100644 --- a/bin/clustering.qmd +++ b/bin/clustering.qmd @@ -26,6 +26,7 @@ import os import scanpy as sc import numpy as np import pandas as pd +from spatialdata_io.experimental import to_legacy_anndata from anndata import AnnData from umap import UMAP from matplotlib import pyplot as plt @@ -35,32 +36,13 @@ from IPython.display import display, Markdown ``` ```{python} -# Make sure we can use scanpy plots with the AnnData object exported from -# `sdata.tables`. This code is taken from the early version of https://github.com/scverse/spatialdata-io/pull/102/ -# Once that PR is merged into spatialdata-io, we should instead use -# `spatialdata_io.to_legacy_anndata(sdata)`. -def to_legacy_anndata(sdata: spatialdata.SpatialData) -> AnnData: - adata = sdata.tables["table"] - for dataset_id in adata.uns["spatial"]: - adata.uns["spatial"][dataset_id]["images"] = { - "hires": np.array(sdata.images[f"{dataset_id}_hires_image"]).transpose([1, 2, 0]), - "lowres": np.array(sdata.images[f"{dataset_id}_lowres_image"]).transpose([1, 2, 0]), - } - adata.uns["spatial"][dataset_id]["scalefactors"] = { - "tissue_hires_scalef": spatialdata.transformations.get_transformation( - sdata.shapes[dataset_id], to_coordinate_system="downscaled_hires" - ).scale[0], - "tissue_lowres_scalef": spatialdata.transformations.get_transformation( - sdata.shapes[dataset_id], to_coordinate_system="downscaled_lowres" - ).scale[0], - "spot_diameter_fullres": sdata.shapes[dataset_id]["radius"][0] * 2, - } - return adata -``` +# Keep only alphanumeric characters, underscores, and hyphens in the sample ID +sample_id = "".join( + filter(lambda x: x.isalnum() or x in ["_", "-"], meta["id"]) +) -```{python} sdata = spatialdata.read_zarr(input_sdata, ["images", "tables", "shapes"]) -adata = to_legacy_anndata(sdata) +adata = to_legacy_anndata(sdata, coordinate_system="downscaled_hires", table_name="table", include_images=True) print("Content of the SpatialData table object:") print(adata) @@ -151,8 +133,8 @@ spatial coordinates by overlaying the spots on the tissue image itself. ```{python} #| layout-nrow: 2 plt.rcParams["figure.figsize"] = (8, 8) -sc.pl.spatial(adata, img_key="hires", color="total_counts", size=1.25) -sc.pl.spatial(adata, img_key="hires", color="n_genes_by_counts", size=1.25) +sc.pl.spatial(adata, img_key="hires", library_id=f"{sample_id}_hires_image", color="total_counts", size=1.25) +sc.pl.spatial(adata, img_key="hires", library_id=f"{sample_id}_hires_image", color="n_genes_by_counts", size=1.25) ``` To gain insights into tissue organization and potential inter-cellular @@ -164,7 +146,7 @@ organization of cells. ```{python} # TODO: Can the colour bar on this figure be fit to the figure? plt.rcParams["figure.figsize"] = (7, 7) -sc.pl.spatial(adata, img_key="hires", color="clusters", size=1.25) +sc.pl.spatial(adata, img_key="hires", library_id=f"{sample_id}_hires_image", color="clusters", size=1.25) ``` ```{python} diff --git a/bin/quality_controls.qmd b/bin/quality_controls.qmd index c8c9109..e8ce955 100644 --- a/bin/quality_controls.qmd +++ b/bin/quality_controls.qmd @@ -46,6 +46,7 @@ import scanpy as sc import scipy import seaborn as sns import spatialdata +from spatialdata_io.experimental import to_legacy_anndata from anndata import AnnData from IPython.display import display, Markdown from textwrap import dedent @@ -53,32 +54,14 @@ plt.rcParams["figure.figsize"] = (6, 6) ``` ```{python} -# Make sure we can use scanpy plots with the AnnData object exported from sdata.tables -# This code is taken from the early version of https://github.com/scverse/spatialdata-io/pull/102/ -# Once the PR will be merged in spatialdata-io, we should use spatialdata_io.to_legacy_anndata(sdata). -def to_legacy_anndata(sdata: spatialdata.SpatialData) -> AnnData: - adata = sdata.tables["table"] - for dataset_id in adata.uns["spatial"]: - adata.uns["spatial"][dataset_id]["images"] = { - "hires": np.array(sdata.images[f"{dataset_id}_hires_image"]).transpose([1, 2, 0]), - "lowres": np.array(sdata.images[f"{dataset_id}_lowres_image"]).transpose([1, 2, 0]), - } - adata.uns["spatial"][dataset_id]["scalefactors"] = { - "tissue_hires_scalef": spatialdata.transformations.get_transformation( - sdata.shapes[dataset_id], to_coordinate_system="downscaled_hires" - ).scale[0], - "tissue_lowres_scalef": spatialdata.transformations.get_transformation( - sdata.shapes[dataset_id], to_coordinate_system="downscaled_lowres" - ).scale[0], - "spot_diameter_fullres": sdata.shapes[dataset_id]["radius"][0] * 2, - } - return adata -``` +# Keep only alphanumeric characters, underscores, and hyphens in the sample ID +sample_id = "".join( + filter(lambda x: x.isalnum() or x in ["_", "-"], meta["id"]) +) -```{python} # Read the data sdata = spatialdata.read_zarr(input_sdata, ["images", "tables", "shapes"]) -adata = to_legacy_anndata(sdata) +adata = to_legacy_anndata(sdata, coordinate_system="downscaled_hires", table_name="table", include_images=True) # Convert X matrix from CSR to CSC dense matrix for output compatibility adata.X = scipy.sparse.csc_matrix(adata.X) @@ -132,8 +115,8 @@ spatial patterns may be discerned: ```{python} #| layout-nrow: 2 -sc.pl.spatial(adata, color = ["total_counts", "n_genes_by_counts"], size=1.25) -sc.pl.spatial(adata, color = ["pct_counts_mt", "pct_counts_ribo", "pct_counts_hb"], size=1.25) +sc.pl.spatial(adata, img_key="hires", library_id=f"{sample_id}_hires_image", color = ["total_counts", "n_genes_by_counts"], size=1.25) +sc.pl.spatial(adata, img_key="hires", library_id=f"{sample_id}_hires_image", color = ["pct_counts_mt", "pct_counts_ribo", "pct_counts_hb"], size=1.25) ``` ## Scatter plots @@ -169,7 +152,7 @@ are uninformative and are thus removed. adata.obs["in_tissue_str"] = ["In tissue" if x == 1 else "Outside tissue" for x in adata.obs["in_tissue"]] # Plot spots inside tissue -sc.pl.spatial(adata, color=["in_tissue_str"], title="Spots in tissue", size=1.25) +sc.pl.spatial(adata, img_key="hires", library_id=f"{sample_id}_hires_image", color=["in_tissue_str"], title="Spots in tissue", size=1.25) del adata.obs["in_tissue_str"] # Remove spots outside tissue and print results @@ -297,6 +280,9 @@ sc.pl.violin(adata, ['pct_counts_mt', 'pct_counts_ribo', 'pct_counts_hb'], ```{python} del sdata.tables["table"] sdata.tables["table"] = adata +# Filtering spatialdata elements based on the anndata elements: +matched_element, _ = spatialdata.match_element_to_table(sdata, sample_id, "table") +sdata[sample_id] = matched_element[sample_id] sdata.write(os.path.join(artifact_dir, output_sdata)) ``` diff --git a/bin/read_data.py b/bin/read_data.py index c0ed0f4..50ac98e 100755 --- a/bin/read_data.py +++ b/bin/read_data.py @@ -24,12 +24,26 @@ default=None, help="Output spatialdata zarr path.", ) + parser.add_argument( + "--sampleID", + metavar="sampleID", + type=str, + default=None, + help="Sample ID.", + ) args = parser.parse_args() + # Keep only alphanumeric characters, underscores, and hyphens in the sample ID + args.sampleID = "".join( + filter(lambda x: x.isalnum() or x in ["_", "-"], args.sampleID) + ) + # Read Visium data spatialdata = spatialdata_io.visium( - args.SRCountDir, counts_file="raw_feature_bc_matrix.h5", dataset_id="visium" + args.SRCountDir, + counts_file="raw_feature_bc_matrix.h5", + dataset_id=args.sampleID, ) # Write raw spatialdata to file diff --git a/bin/spatially_variable_genes.qmd b/bin/spatially_variable_genes.qmd index 5adc45e..60e4686 100644 --- a/bin/spatially_variable_genes.qmd +++ b/bin/spatially_variable_genes.qmd @@ -24,38 +24,16 @@ import pandas as pd import scanpy as sc import squidpy as sq import spatialdata +from spatialdata_io.experimental import to_legacy_anndata from anndata import AnnData from matplotlib import pyplot as plt ``` -```{python} -# Make sure we can use scanpy plots with the AnnData object exported from sdata.tables -# This code is taken from the early version of https://github.com/scverse/spatialdata-io/pull/102/ -# Once the PR will be merged in spatialdata-io, we should use spatialdata_io.to_legacy_anndata(sdata). -def to_legacy_anndata(sdata: spatialdata.SpatialData) -> AnnData: - adata = sdata.tables["table"] - for dataset_id in adata.uns["spatial"]: - adata.uns["spatial"][dataset_id]["images"] = { - "hires": np.array(sdata.images[f"{dataset_id}_hires_image"]).transpose([1, 2, 0]), - "lowres": np.array(sdata.images[f"{dataset_id}_lowres_image"]).transpose([1, 2, 0]), - } - adata.uns["spatial"][dataset_id]["scalefactors"] = { - "tissue_hires_scalef": spatialdata.transformations.get_transformation( - sdata.shapes[dataset_id], to_coordinate_system="downscaled_hires" - ).scale[0], - "tissue_lowres_scalef": spatialdata.transformations.get_transformation( - sdata.shapes[dataset_id], to_coordinate_system="downscaled_lowres" - ).scale[0], - "spot_diameter_fullres": sdata.shapes[dataset_id]["radius"][0] * 2, - } - return adata -``` - ```{python} # Read data sdata = spatialdata.read_zarr(input_sdata, ["images", "tables", "shapes"]) +adata = to_legacy_anndata(sdata, coordinate_system="downscaled_hires", table_name="table", include_images=True) -adata = to_legacy_anndata(sdata) print("Content of the AnnData object:") print(adata) diff --git a/env/environment.yml b/env/environment.yml index ef9221c..61da354 100644 --- a/env/environment.yml +++ b/env/environment.yml @@ -2,19 +2,21 @@ channels: - conda-forge - bioconda dependencies: - - python=3.10 + - gcc=13.2.0 + - gxx=13.2.0 + - imagecodecs=2024.1.1 - jupyter=1.0.0 - leidenalg=0.9.1 + - libgdal=3.8.3 - papermill=2.3.4 - pip=23.0.1 - - gcc=13.2.0 - - libgdal=3.8.3 - - gxx=13.2.0 - - imagecodecs=2024.1.1 + - python=3.10 - pip: + - harmonypy==0.0.10 + - scanorama==1.7.4 - scanpy==1.10.0 - - scipy==1.12.0 - squidpy==1.4.1 - - spatialdata==0.1.2 - - spatialdata-io==0.1.2 - - spatialdata-plot==0.2.1 + - scipy==1.12.0 + - spatialdata-io==0.1.5 + - spatialdata-plot==0.2.6 + - spatialdata==0.2.3 diff --git a/modules/local/read_data.nf b/modules/local/read_data.nf index d0f3d30..e61443a 100644 --- a/modules/local/read_data.nf +++ b/modules/local/read_data.nf @@ -38,6 +38,7 @@ process READ_DATA { # Execute read data script read_data.py \\ --SRCountDir "${meta.id}" \\ + --sampleID "${meta.id}" \\ --output_sdata sdata_raw.zarr cat <<-END_VERSIONS > versions.yml diff --git a/tests/pipeline/test_downstream.nf.test.snap b/tests/pipeline/test_downstream.nf.test.snap index b99dead..0ff4c8d 100644 --- a/tests/pipeline/test_downstream.nf.test.snap +++ b/tests/pipeline/test_downstream.nf.test.snap @@ -1,12 +1,12 @@ { "nf_core_pipeline_software_mqc_versions.yml": { "content": [ - "{CLUSTERING={quarto=1.3.450, papermill=null}, QUALITY_CONTROLS={quarto=1.3.450, papermill=null}, READ_DATA={spatialdata_io=0.1.2}, SPACERANGER_UNTAR_REFERENCE={untar=1.34}, SPATIALLY_VARIABLE_GENES={quarto=1.3.450, papermill=null}, UNTAR_DOWNSTREAM_INPUT={untar=1.34}, Workflow={nf-core/spatialvi=v1.0dev}}" + "{CLUSTERING={quarto=1.3.450, papermill=null}, QUALITY_CONTROLS={quarto=1.3.450, papermill=null}, READ_DATA={spatialdata_io=0.1.5}, SPACERANGER_UNTAR_REFERENCE={untar=1.34}, SPATIALLY_VARIABLE_GENES={quarto=1.3.450, papermill=null}, UNTAR_DOWNSTREAM_INPUT={untar=1.34}, Workflow={nf-core/spatialvi=v1.0dev}}" ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-03-19T18:46:59.035976" + "timestamp": "2024-10-26T11:57:17.726652" } -} +} \ No newline at end of file diff --git a/tests/pipeline/test_spaceranger_ffpe_v1.nf.test.snap b/tests/pipeline/test_spaceranger_ffpe_v1.nf.test.snap index c08cf37..fbd26d1 100644 --- a/tests/pipeline/test_spaceranger_ffpe_v1.nf.test.snap +++ b/tests/pipeline/test_spaceranger_ffpe_v1.nf.test.snap @@ -1,12 +1,12 @@ { "nf_core_pipeline_software_mqc_versions.yml": { "content": [ - "{CLUSTERING={quarto=1.3.450, papermill=null}, FASTQC={fastqc=0.12.1}, QUALITY_CONTROLS={quarto=1.3.450, papermill=null}, READ_DATA={spatialdata_io=0.1.2}, SPACERANGER_COUNT={spaceranger=3.0.0}, SPACERANGER_UNTAR_REFERENCE={untar=1.34}, SPATIALLY_VARIABLE_GENES={quarto=1.3.450, papermill=null}, UNTAR_SPACERANGER_INPUT={untar=1.34}, Workflow={nf-core/spatialvi=v1.0dev}}" + "{CLUSTERING={quarto=1.3.450, papermill=null}, FASTQC={fastqc=0.12.1}, QUALITY_CONTROLS={quarto=1.3.450, papermill=null}, READ_DATA={spatialdata_io=0.1.5}, SPACERANGER_COUNT={spaceranger=3.0.0}, SPACERANGER_UNTAR_REFERENCE={untar=1.34}, SPATIALLY_VARIABLE_GENES={quarto=1.3.450, papermill=null}, UNTAR_SPACERANGER_INPUT={untar=1.34}, Workflow={nf-core/spatialvi=v1.0dev}}" ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.02.0" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-04-04T10:08:16.975105" + "timestamp": "2024-10-26T12:14:20.797669" } -} +} \ No newline at end of file diff --git a/tests/pipeline/test_spaceranger_ffpe_v2_cytassist.nf.test.snap b/tests/pipeline/test_spaceranger_ffpe_v2_cytassist.nf.test.snap index a245c7f..fa8dc89 100644 --- a/tests/pipeline/test_spaceranger_ffpe_v2_cytassist.nf.test.snap +++ b/tests/pipeline/test_spaceranger_ffpe_v2_cytassist.nf.test.snap @@ -1,12 +1,12 @@ { "nf_core_pipeline_software_mqc_versions.yml": { "content": [ - "{CLUSTERING={quarto=1.3.450, papermill=null}, FASTQC={fastqc=0.12.1}, QUALITY_CONTROLS={quarto=1.3.450, papermill=null}, READ_DATA={spatialdata_io=0.1.2}, SPACERANGER_COUNT={spaceranger=3.0.0}, SPACERANGER_UNTAR_REFERENCE={untar=1.34}, SPATIALLY_VARIABLE_GENES={quarto=1.3.450, papermill=null}, UNTAR_SPACERANGER_INPUT={untar=1.34}, Workflow={nf-core/spatialvi=v1.0dev}}" + "{CLUSTERING={quarto=1.3.450, papermill=null}, FASTQC={fastqc=0.12.1}, QUALITY_CONTROLS={quarto=1.3.450, papermill=null}, READ_DATA={spatialdata_io=0.1.5}, SPACERANGER_COUNT={spaceranger=3.0.0}, SPACERANGER_UNTAR_REFERENCE={untar=1.34}, SPATIALLY_VARIABLE_GENES={quarto=1.3.450, papermill=null}, UNTAR_SPACERANGER_INPUT={untar=1.34}, Workflow={nf-core/spatialvi=v1.0dev}}" ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.02.0" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-04-04T10:42:54.76102" + "timestamp": "2024-10-26T12:39:52.146218" } -} +} \ No newline at end of file