diff --git a/.gitattributes b/.gitattributes
index 7a2dabc..31ba574 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -2,3 +2,4 @@
*.nf.test linguist-language=nextflow
modules/nf-core/** linguist-generated
subworkflows/nf-core/** linguist-generated
+tests/**/*nf.test.snap linguist-generated
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index a5e811f..6972735 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -5,11 +5,16 @@ on:
branches:
- dev
pull_request:
+ branches:
+ - dev
+ - master
release:
types: [published]
env:
NXF_ANSI_LOG: false
+ CAPSULE_LOG: none
+ NFTEST_VER: "0.7.3"
concurrency:
group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}"
@@ -22,25 +27,38 @@ jobs:
if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/spatialvi') }}"
runs-on: ubuntu-latest
strategy:
+ fail-fast: false
matrix:
NXF_VER:
- "23.04.0"
- "latest-everything"
+ test:
+ - tests/pipeline/test_spaceranger_ffpe_v1.nf.test
+ - tests/pipeline/test_spaceranger_ffpe_v2_cytassist.nf.test
+ - tests/pipeline/test_downstream.nf.test
steps:
- name: Check out pipeline code
uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4
+ # Install Nextflow
- name: Install Nextflow
uses: nf-core/setup-nextflow@v2
with:
version: "${{ matrix.NXF_VER }}"
- - name: Disk space cleanup
- uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1
+ # Install nf-test
+ - name: Install nf-test
+ run: |
+ wget -qO- https://code.askimed.com/install/nf-test | bash -s $NFTEST_VER
+ sudo mv nf-test /usr/local/bin/
+
+ # Run nf-test
+ - name: Run nf-test
+ run: nf-test test --profile=test,docker --tap=test.tap ${{ matrix.test }}
- - name: Run pipeline with test data
- # TODO nf-core: You can customise CI pipeline run tests as required
- # For example: adding multiple test runs with different parameters
- # Remember that you can parallelise this by using strategy.matrix
+ # If the test fails, output the software_versions.yml using the 'batcat' utility
+ - name: Output log on failure
+ if: failure()
run: |
- nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results
+ sudo apt install bat > /dev/null
+ batcat --decorations=always --color=always .nf-test/tests/*/output/pipeline_info/software_versions.yml
diff --git a/.gitignore b/.gitignore
index 5124c9a..0699257 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,3 +6,9 @@ results/
testing/
testing*
*.pyc
+log
+reports
+.nf-test/
+nf-test
+.nf-test*
+test-datasets
diff --git a/.nf-core.yml b/.nf-core.yml
index e0b85a7..0697501 100644
--- a/.nf-core.yml
+++ b/.nf-core.yml
@@ -1,2 +1,11 @@
repository_type: pipeline
nf_core_version: "2.14.1"
+lint:
+ actions_ci: False
+ files_exist:
+ - conf/igenomes.config
+ files_unchanged:
+ - .gitattributes
+ - assets/nf-core-spatialvi_logo_light.png
+ - docs/images/nf-core-spatialvi_logo_light.png
+ - docs/images/nf-core-spatialvi_logo_dark.png
\ No newline at end of file
diff --git a/.prettierignore b/.prettierignore
index 437d763..dd856fc 100644
--- a/.prettierignore
+++ b/.prettierignore
@@ -10,3 +10,6 @@ testing/
testing*
*.pyc
bin/
+test-datasets/
+.nf-test/
+*.scss
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7af7cd7..84cc97d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,14 +3,67 @@
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
-## v1.0dev - [date]
+## [Unreleased]
-Initial release of nf-core/spatialvi, created with the [nf-core](https://nf-co.re/) template.
+Initial release of nf-core/spatialvi, created with the
+[nf-core](https://nf-co.re/) template. This marks the point at which the
+pipeline development was moved to nf-core and NBIS. The pipeline has undergone
+several iterations regarding its functionality and content; there are a
+significant number of changes, of which not all are listed here. In summary, the
+pipeline contains best-practice processing and analyses of pre- and post-Space
+Ranger-processed data, including quality controls, normalisation, dimensionality
+reduction, clustering, differential expression testing as well as output files
+compatible with further downstream analyses and/or exploration in _e.g._
+[TissUUmaps](https://tissuumaps.github.io/) or bespoke user code.
### `Added`
+- Add MultiQC support for Space Ranger outputs [[#70](https://github.com/nf-core/spatialvi/pull/70)]
+- Use the QUARTONOTEBOOK nf-core module instead of local Quarto-based modules [[#68](https://github.com/nf-core/spatialvi/pull/68)]
+- Add support for SpatialData [[$67](https://github.com/nf-core/spatialvi/pull/67)]
+- Add a custom nf-core Quarto template for the downstream analysis reports [[#64](https://github.com/nf-core/spatialvi/pull/64)]
+- Allow input directories `fastq_dir` and `spaceranger_dir` to be specified as tar archives (`.tar.gz`)
+- Add a check to make sure that there are spots left after filtering [[#46](https://github.com/nf-core/spatialvi/issues/46)]
+- Implement tests with nf-test [[#42](https://github.com/nf-core/spatialvi/pull/42)]
+- Replace custom code to download reference with `untar` module [[#44](https://github.com/nf-core/spatialvi/pull/44)]
+- Embed resources in quarto reports [[#43](https://github.com/nf-core/spatialvi/pull/43)]
+- Use a samplesheet for input specification [[#30](https://github.com/nf-core/spatialvi/pull/30), [#31](https://github.com/nf-core/spatialvi/pull/31) and [#45](https://github.com/nf-core/spatialvi/pull/45)]
+- Add Space Ranger pre-processing as an optional pipeline step using the `spaceranger` nf-core module [[#17](https://github.com/nf-core/spatialvi/pull/17) and [#45](https://github.com/nf-core/spatialvi/pull/45)]
+- Add `env/` directory with pipeline-specific container and Conda environment specifications [[#17](https://github.com/nf-core/spatialvi/pull/17) and [#28](https://github.com/nf-core/spatialvi/pull/28)]
+- Use a more standardised practice to find mitochondrial genes [[#30](https://github.com/nf-core/spatialvi/pull/30)]
+- Make pipeline output compatible with TissUUmaps [[#31](https://github.com/nf-core/spatialvi/pull/31)]
+- Add custom Quarto-based reports for all downstream processing [[#31](https://github.com/nf-core/spatialvi/pull/31)]
+- Embed resources in quarto reports [[#43](https://github.com/nf-core/spatialvi/pull/43)]
+
### `Fixed`
+- [#51](https://github.com/nf-core/spatialvi/issues/51): Fix version export of `leidenalg` and `SpatialDE` Python modules
+- [#38](https://github.com/nf-core/spatialvi/issues/38): Specify manual alignment files in samplesheet
+- [#20](https://github.com/nf-core/spatialvi/issues/20) and [#22](https://github.com/nf-core/spatialvi/issues/22): Add missing Groovy module
+- [#53](https://github.com/nf-core/spatialvi/pull/53): Use ensemble IDs as index in adata.var and fix related
+ issue with SpatialDE
+
### `Dependencies`
-### `Deprecated`
+Note, since the pipeline is using Nextflow DSL2, each process will be run
+with its own [Biocontainer](https://biocontainers.pro/#/registry). This means
+that on occasion it is entirely possible for the pipeline to be using different
+versions of the same tool.
+
+| Dependency | Version |
+| ----------- | ------- |
+| `SpatialDE` | 1.1.3 |
+| `leidenalg` | 0.9.1 |
+| `python` | 3.12.0 |
+| `quarto` | 1.3.302 |
+| `scanpy` | 1.9.3 |
+
+### `Removed`
+
+- Streamline pipeline for basic ST data processing; remove SC processing and deconvolution (for now) [[#31](https://github.com/nf-core/spatialvi/pull/31)]
+
+## v0.1.0 - 2023-03-31
+
+Initial release of nf-core/spatialvi, created with the
+[nf-core](https://nf-co.re/) template by the Jackson Laboratory contributors
+(see `README.md` for details).
diff --git a/CITATIONS.md b/CITATIONS.md
index b15a7c6..5f048a5 100644
--- a/CITATIONS.md
+++ b/CITATIONS.md
@@ -10,6 +10,10 @@
## Pipeline tools
+- [AnnData](https://github.com/scverse/anndata)
+
+ > Virshup I, Rybakov S, Theis FJ, Angerer P, Wolf FA. bioRxiv 2021.12.16.473007; doi: https://doi.org/10.1101/2021.12.16.473007
+
- [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)
> Andrews, S. (2010). FastQC: A Quality Control Tool for High Throughput Sequence Data [Online].
@@ -18,6 +22,26 @@
> Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924.
+- [Quarto](https://quarto.org/)
+
+ > Allaire J, Teague C, Scheidegger C, Xie Y, Dervieux C. Quarto (2022). doi: 10.5281/zenodo.5960048
+
+- [Scanpy](https://github.com/theislab/scanpy)
+
+ > Wolf F, Angerer P, Theis F. SCANPY: large-scale single-cell gene expression data analysis. Genome Biol 19, 15 (2018). doi: https://doi.org/10.1186/s13059-017-1382-0
+
+- [Space Ranger](https://www.10xgenomics.com/support/software/space-ranger)
+
+ > 10x Genomics Space Ranger 2.1.0 [Online]
+
+- [SpatialData](https://www.biorxiv.org/content/10.1101/2023.05.05.539647v1)
+
+ > Marconato L, Palla G, Yamauchi K, Virshup I, Heidari E, Treis T, Toth M, Shrestha R, Vöhringer H, Huber W, Gerstung M, Moore J, Theis F, Stegle O. SpatialData: an open and universal data framework for spatial omics. bioRxiv 2023.05.05.539647; doi: https://doi.org/10.1101/2023.05.05.539647
+
+- [Squipy](https://www.nature.com/articles/s41592-021-01358-2)
+
+ > Palla G, Spitzer H, Klein M et al. Squidpy: a scalable framework for spatial omics analysis. Nat Methods 19, 171–178 (2022). doi: https://doi.org/10.1038/s41592-021-01358-2
+
## Software packaging/containerisation tools
- [Anaconda](https://anaconda.com)
diff --git a/README.md b/README.md
index b7a0472..637d261 100644
--- a/README.md
+++ b/README.md
@@ -19,45 +19,45 @@
## Introduction
-**nf-core/spatialvi** is a bioinformatics pipeline that ...
-
-
-
-
-
-
-1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))
-2. Present QC for raw reads ([`MultiQC`](http://multiqc.info/))
+**nf-core/spatialvi** is a bioinformatics analysis pipeline for Visium spatial
+transcriptomics data from 10x Genomics. It can process and analyse spatial data
+either directly from raw data by running [Space Ranger](https://support.10xgenomics.com/spatial-gene-expression/software/pipelines/latest/what-is-space-ranger)
+or data already processed by Space Ranger. The pipeline currently consists of
+the following steps:
+
+
+
+
+
+0. Raw data processing with Space Ranger (optional)
+1. Quality controls and filtering
+2. Normalisation
+3. Dimensionality reduction and clustering
+4. Differential gene expression testing
+
+The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool
+to run tasks across multiple compute infrastructures in a very portable manner.
+It uses Docker/Singularity containers making installation trivial and results
+highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html)
+implementation of this pipeline uses one container per process which makes it
+much easier to maintain and update software dependencies. Where possible, these
+processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules)
+in order to make them available to all nf-core pipelines, and to everyone within
+the Nextflow community!
+
+On release, automated continuous integration tests run the pipeline on a
+full-sized dataset on the AWS cloud infrastructure. This ensures that the
+pipeline runs on AWS, has sensible resource allocation defaults set to run on
+real-world datasets, and permits the persistent storage of results to benchmark
+between pipeline releases and other analysis sources. The results obtained from
+the full-sized test can be viewed on the [nf-core website](https://nf-co.re/spatialvi/results).
## Usage
> [!NOTE]
> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.
-
-
-Now, you can run the pipeline using:
-
-
+You can run the pipeline using:
```bash
nextflow run nf-core/spatialvi \
@@ -67,8 +67,7 @@ nextflow run nf-core/spatialvi \
```
> [!WARNING]
-> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_;
-> see [docs](https://nf-co.re/usage/configuration#custom-configuration-files).
+> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/usage/configuration#custom-configuration-files).
For more details and further functionality, please refer to the [usage documentation](https://nf-co.re/spatialvi/usage) and the [parameter documentation](https://nf-co.re/spatialvi/parameters).
@@ -80,11 +79,22 @@ For more details about the output files and reports, please refer to the
## Credits
-nf-core/spatialvi was originally written by Erik Fasterius, Christophe Avenel, Sergii Domanskyi, Jeffrey Chuang, Anuj Srivastava.
+nf-core/spatialvi was originally developed by the Jackson
+Laboratory1, up to the [0.1.0](https://github.com/nf-core/spatialvi/releases/tag/0.1.0)
+tag. It was further developed in a collaboration between the [National
+Bioinformatics Infrastructure Sweden](https://nbis.se/) and [National Genomics
+Infrastructure](https://ngisweden.scilifelab.se/) within [SciLifeLab](https://scilifelab.se/);
+it is currently developed and maintained by [Erik Fasterius](https://github.com/fasterius)
+and [Christophe Avenel](https://github.com/cavenel).
-We thank the following people for their extensive assistance in the development of this pipeline:
+Many thanks to others who have helped out along the way too, especially [Gregor
+Sturm](https://github.com/grst)!
-
+_1 Supported by grants from the US National Institutes of Health
+[U24CA224067](https://reporter.nih.gov/project-details/10261367) and
+[U54AG075941](https://reporter.nih.gov/project-details/10376627). Original
+authors [Dr. Sergii Domanskyi](https://github.com/sdomanskyi), Prof. Jeffrey
+Chuang and Dr. Anuj Srivastava._
## Contributions and Support
@@ -97,8 +107,6 @@ For further information or help, don't hesitate to get in touch on the [Slack `#
-
-
An extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.
You can cite the `nf-core` publication as follows:
diff --git a/assets/_extensions/nf-core/_extension.yml b/assets/_extensions/nf-core/_extension.yml
new file mode 100644
index 0000000..95759fc
--- /dev/null
+++ b/assets/_extensions/nf-core/_extension.yml
@@ -0,0 +1,25 @@
+title: nf-core Quarto Extension
+author: Erik Fasterius
+version: 1.0.0
+quarto-required: ">=1.2.0"
+contributes:
+ formats:
+ html:
+ code-fold: true
+ df-print: paged
+ embed-resources: true
+ highlight-style: nf-core.theme
+ smooth-scroll: true
+ theme: [default, nf-core.scss]
+ toc: true
+ toc-image: nf-core-spatialvi_logo_light.png
+ toc-location: left
+ template-partials:
+ - toc.html
+ revealjs:
+ code-line-numbers: false
+ embed-resources: true
+ logo: nf-core-spatialvi_logo_light.png
+ slide-level: 2
+ slide-number: false
+ theme: [default, nf-core.scss]
diff --git a/assets/_extensions/nf-core/nf-core-spatialvi_logo_light.png b/assets/_extensions/nf-core/nf-core-spatialvi_logo_light.png
new file mode 120000
index 0000000..b20f5fe
--- /dev/null
+++ b/assets/_extensions/nf-core/nf-core-spatialvi_logo_light.png
@@ -0,0 +1 @@
+../../nf-core-spatialvi_logo_light.png
\ No newline at end of file
diff --git a/assets/_extensions/nf-core/nf-core.scss b/assets/_extensions/nf-core/nf-core.scss
new file mode 100644
index 0000000..788a029
--- /dev/null
+++ b/assets/_extensions/nf-core/nf-core.scss
@@ -0,0 +1,194 @@
+/*-- scss:defaults --*/
+
+$theme: "nf-core" !default;
+
+// Colours
+$green: #24B064 !default;
+$blue: #3073AF !default;
+$red: #E0191A !default;
+$yellow: #DABC25 !default;
+
+// Greyscale
+$white: #FFFFFF !default;
+$grey-100: #F5F5F5 !default;
+$grey-90: #E5E5E5 !default;
+$grey-80: #CCCCCC !default;
+$grey-70: #B2B2B2 !default;
+$grey-60: #999999 !default;
+$grey-50: #7F7F7F !default;
+$grey-40: #666666 !default;
+$grey-30: #4C4C4C !default;
+$grey-20: #333333 !default;
+$grey-10: #191919 !default;
+$black: #000000 !default;
+
+// Theme
+$primary: $green !default;
+$secondary: $blue !default;
+$tertiary: $red !default;
+$light: $grey-100 !default;
+$dark: $grey-30 !default;
+$success: $green !default;
+$info: $blue !default;
+$warning: $yellow !default;
+$danger: $red !default;
+
+// Code
+$code-color: $primary !default;
+$code-bg: $light !default;
+$code-block-bg: $light !default;
+
+// Links
+$link-color: $primary !default;
+
+// Popover
+$popover: $light !default;
+
+// Dropdowns
+$dropdown-link-color: $grey-30 !default;
+$dropdown-link-hover-color: $white !default;
+$dropdown-link-hover-bg: $primary !default;
+
+// Font
+@import "https://fonts.googleapis.com/css2?family=Maven+Pro:wght@300;400;500;600";
+$font-family-sans-serif: "Maven Pro";
+
+// Font size for headers
+$h1-font-size: 1.75rem !default;
+$h2-font-size: 1.50rem !default;
+$h3-font-size: 1.25rem !default;
+
+// Font size base for reveal.js presentations
+$presentation-font-size-root: 35px !default;
+
+// Tables
+$table-bg-scale: 0 !default;
+
+// Navs
+$nav-link-padding-y: .5rem !default !default;
+$nav-link-padding-x: 2rem !default;
+$nav-link-disabled-color: $grey-40 !default !default;
+$nav-tabs-border-color: $grey-80 !default;
+
+// Navbar
+$navbar-padding-y: 1rem !default;
+$navbar-light-bg: $primary !default;
+$navbar-light-color: $white !default;
+$navbar-light-hover-color: $success !default;
+$navbar-light-active-color: $success !default;
+$navbar-light-brand-color: $white !default;
+$navbar-light-brand-hover-color: $navbar-light-brand-color !default;
+$navbar-dark-color: $white !default;
+$navbar-dark-hover-color: $primary !default;
+$navbar-dark-active-color: $primary !default;
+$navbar-dark-brand-color: $white !default;
+$navbar-dark-brand-hover-color: $navbar-dark-brand-color !default;
+
+// Pagination
+$pagination-color: $white !default;
+$pagination-bg: $success !default;
+$pagination-border-width: 0 !default;
+$pagination-border-color: transparent !default;
+$pagination-hover-color: $white !default;
+$pagination-hover-bg: darken($success, 15%) !default;
+$pagination-hover-border-color: transparent !default;
+$pagination-active-bg: $pagination-hover-bg !default;
+$pagination-active-border-color: transparent !default;
+$pagination-disabled-color: $grey-80 !default;
+$pagination-disabled-bg: lighten($success, 15%) !default;
+$pagination-disabled-border-color: transparent !default;
+
+// List group
+$list-group-hover-bg: $grey-80 !default;
+$list-group-disabled-bg: $grey-80 !default;
+
+// Close
+$btn-close-color: $white !default;
+$btn-close-opacity: .4 !default;
+$btn-close-hover-opacity: 1 !default;
+
+/*-- scss:rules --*/
+
+// Variables
+$web-font-path: "https://fonts.googleapis.com/css2?family=Lato:ital,wght@0,400;0,700;1,400&display=swap" !default;
+@if $web-font-path {
+ @import url($web-font-path);
+}
+
+// Navbar
+.bg-primary {
+ .navbar-nav .show > .nav-link,
+ .navbar-nav .nav-link.active,
+ .navbar-nav .nav-link:hover,
+ .navbar-nav .nav-link:focus {
+ color: $success !important;
+ }
+}
+
+// Navs
+.nav-tabs {
+ .nav-link.active,
+ .nav-link.active:focus,
+ .nav-link.active:hover,
+ .nav-item.open .nav-link,
+ .nav-item.open .nav-link:focus,
+ .nav-item.open .nav-link:hover {
+ color: $primary;
+ }
+}
+
+// Pagination
+.pagination {
+ a:hover {
+ text-decoration: none;
+ }
+}
+
+// Blockquotes
+.blockquote {
+ color: $primary;
+ border-left-color: $primary;
+}
+
+// Cell Output
+.cell-output-error > pre > code {
+ color: $red;
+}
+.cell-output-stderr > pre > code {
+ color: $yellow;
+}
+
+// Horizontally center level 1 headers
+.center h1 {
+ text-align: center
+}
+
+// Text justification
+.justify-right {
+ text-align: right
+}
+.justify-center {
+ text-align: center
+}
+
+// Custom colours
+.green {
+ color: $green;
+ font-weight: bold;
+}
+.blue {
+ color: $blue;
+ font-weight: bold;
+}
+.red {
+ color: $red;
+ font-weight: bold;
+}
+.yellow {
+ color: $yellow;
+ font-weight: bold;
+}
+.grey {
+ color: $grey-70;
+ font-weight: bold;
+}
diff --git a/assets/_extensions/nf-core/nf-core.theme b/assets/_extensions/nf-core/nf-core.theme
new file mode 100644
index 0000000..1039cfc
--- /dev/null
+++ b/assets/_extensions/nf-core/nf-core.theme
@@ -0,0 +1,211 @@
+{
+ "text-color": null,
+ "background-color": null,
+ "line-number-color": "#aaaaaa",
+ "line-number-background-color": null,
+ "text-styles": {
+ "Other": {
+ "text-color": "#9e9370",
+ "background-color": null,
+ "bold": false,
+ "italic": false,
+ "underline": false
+ },
+ "Attribute": {
+ "text-color": "#000000",
+ "background-color": null,
+ "bold": false,
+ "italic": false,
+ "underline": false
+ },
+ "SpecialString": {
+ "text-color": "#bb6688",
+ "background-color": null,
+ "bold": false,
+ "italic": false,
+ "underline": false
+ },
+ "Annotation": {
+ "text-color": "#005c86",
+ "background-color": null,
+ "bold": true,
+ "italic": true,
+ "underline": false
+ },
+ "Function": {
+ "text-color": "#3073AF",
+ "background-color": null,
+ "bold": false,
+ "italic": false,
+ "underline": false
+ },
+ "String": {
+ "text-color": "#24B064 ",
+ "background-color": null,
+ "bold": false,
+ "italic": false,
+ "underline": false
+ },
+ "ControlFlow": {
+ "text-color": "#9e9370",
+ "background-color": null,
+ "bold": true,
+ "italic": false,
+ "underline": false
+ },
+ "Operator": {
+ "text-color": "#666666",
+ "background-color": null,
+ "bold": false,
+ "italic": false,
+ "underline": false
+ },
+ "Error": {
+ "text-color": "#BB5454 ",
+ "background-color": null,
+ "bold": true,
+ "italic": false,
+ "underline": false
+ },
+ "BaseN": {
+ "text-color": "#DABC25",
+ "background-color": null,
+ "bold": false,
+ "italic": false,
+ "underline": false
+ },
+ "Alert": {
+ "text-color": "#BB5454 ",
+ "background-color": null,
+ "bold": true,
+ "italic": false,
+ "underline": false
+ },
+ "Variable": {
+ "text-color": "#005c86",
+ "background-color": null,
+ "bold": false,
+ "italic": false,
+ "underline": false
+ },
+ "BuiltIn": {
+ "text-color": "#005c86",
+ "background-color": null,
+ "bold": false,
+ "italic": false,
+ "underline": false
+ },
+ "Extension": {
+ "text-color": null,
+ "background-color": null,
+ "bold": false,
+ "italic": false,
+ "underline": false
+ },
+ "Preprocessor": {
+ "text-color": "#BBBB54 ",
+ "background-color": null,
+ "bold": false,
+ "italic": false,
+ "underline": false
+ },
+ "Information": {
+ "text-color": "#005c86",
+ "background-color": null,
+ "bold": true,
+ "italic": true,
+ "underline": false
+ },
+ "VerbatimString": {
+ "text-color": "#005c86",
+ "background-color": null,
+ "bold": false,
+ "italic": false,
+ "underline": false
+ },
+ "Warning": {
+ "text-color": "#BBBB54 ",
+ "background-color": null,
+ "bold": true,
+ "italic": true,
+ "underline": false
+ },
+ "Documentation": {
+ "text-color": "#BB5454 ",
+ "background-color": null,
+ "bold": false,
+ "italic": true,
+ "underline": false
+ },
+ "Import": {
+ "text-color": "#BB5454 ",
+ "background-color": null,
+ "bold": true,
+ "italic": false,
+ "underline": false
+ },
+ "Char": {
+ "text-color": "#af75a7 ",
+ "background-color": null,
+ "bold": false,
+ "italic": false,
+ "underline": false
+ },
+ "DataType": {
+ "text-color": "#9e9370",
+ "background-color": null,
+ "bold": false,
+ "italic": false,
+ "underline": false
+ },
+ "Float": {
+ "text-color": "#DABC25 ",
+ "background-color": null,
+ "bold": false,
+ "italic": false,
+ "underline": false
+ },
+ "Comment": {
+ "text-color": "#B2B2B2 ",
+ "background-color": null,
+ "bold": false,
+ "italic": true,
+ "underline": false
+ },
+ "CommentVar": {
+ "text-color": "#B2B2B2 ",
+ "background-color": null,
+ "bold": true,
+ "italic": true,
+ "underline": false
+ },
+ "Constant": {
+ "text-color": "#B2B2B2",
+ "background-color": null,
+ "bold": false,
+ "italic": false,
+ "underline": false
+ },
+ "SpecialChar": {
+ "text-color": "#B2B2B2",
+ "background-color": null,
+ "bold": false,
+ "italic": false,
+ "underline": false
+ },
+ "DecVal": {
+ "text-color": "#DABC25 ",
+ "background-color": null,
+ "bold": false,
+ "italic": false,
+ "underline": false
+ },
+ "Keyword": {
+ "text-color": "#BB5454 ",
+ "background-color": null,
+ "bold": true,
+ "italic": false,
+ "underline": false
+ }
+ }
+}
diff --git a/assets/_extensions/nf-core/toc.html b/assets/_extensions/nf-core/toc.html
new file mode 100644
index 0000000..b402642
--- /dev/null
+++ b/assets/_extensions/nf-core/toc.html
@@ -0,0 +1,7 @@
+
diff --git a/assets/methods_description_template.yml b/assets/methods_description_template.yml
index b4c018f..11102b9 100644
--- a/assets/methods_description_template.yml
+++ b/assets/methods_description_template.yml
@@ -3,7 +3,6 @@ description: "Suggested text and references to use when describing pipeline usag
section_name: "nf-core/spatialvi Methods Description"
section_href: "https://github.com/nf-core/spatialvi"
plot_type: "html"
-## TODO nf-core: Update the HTML below to your preferred methods description, e.g. add publication citation for this pipeline
## You inject any metadata in the Nextflow '${workflow}' object
data: |
Methods
@@ -12,13 +11,7 @@ data: |
${workflow.commandLine}
${tool_citations}
References
-
-
Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., & Notredame, C. (2017). Nextflow enables reproducible computational workflows. Nature Biotechnology, 35(4), 316-319. doi: 10.1038/nbt.3820
-
Ewels, P. A., Peltzer, A., Fillinger, S., Patel, H., Alneberg, J., Wilm, A., Garcia, M. U., Di Tommaso, P., & Nahnsen, S. (2020). The nf-core framework for community-curated bioinformatics pipelines. Nature Biotechnology, 38(3), 276-278. doi: 10.1038/s41587-020-0439-x
-
Grüning, B., Dale, R., Sjödin, A., Chapman, B. A., Rowe, J., Tomkins-Tinch, C. H., Valieris, R., Köster, J., & Bioconda Team. (2018). Bioconda: sustainable and comprehensive software distribution for the life sciences. Nature Methods, 15(7), 475–476. doi: 10.1038/s41592-018-0046-7
-
da Veiga Leprevost, F., Grüning, B. A., Alves Aflitos, S., Röst, H. L., Uszkoreit, J., Barsnes, H., Vaudel, M., Moreno, P., Gatto, L., Weber, J., Bai, M., Jimenez, R. C., Sachsenberg, T., Pfeuffer, J., Vera Alvarez, R., Griss, J., Nesvizhskii, A. I., & Perez-Riverol, Y. (2017). BioContainers: an open-source and community-driven framework for software standardization. Bioinformatics (Oxford, England), 33(16), 2580–2582. doi: 10.1093/bioinformatics/btx192
- ${tool_bibliography}
-
+
${tool_bibliography}
Notes:
diff --git a/assets/nf-core-spatialvi_logo_light.png b/assets/nf-core-spatialvi_logo_light.png
index 6a3e713..c3bb5a5 100644
Binary files a/assets/nf-core-spatialvi_logo_light.png and b/assets/nf-core-spatialvi_logo_light.png differ
diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv
deleted file mode 100644
index 5f653ab..0000000
--- a/assets/samplesheet.csv
+++ /dev/null
@@ -1,3 +0,0 @@
-sample,fastq_1,fastq_2
-SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz
-SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz,
diff --git a/assets/schema_input.json b/assets/schema_input.json
index d3c899e..cdefe2d 100644
--- a/assets/schema_input.json
+++ b/assets/schema_input.json
@@ -12,22 +12,8 @@
"pattern": "^\\S+$",
"errorMessage": "Sample name must be provided and cannot contain spaces",
"meta": ["id"]
- },
- "fastq_1": {
- "type": "string",
- "format": "file-path",
- "exists": true,
- "pattern": "^\\S+\\.f(ast)?q\\.gz$",
- "errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'"
- },
- "fastq_2": {
- "type": "string",
- "format": "file-path",
- "exists": true,
- "pattern": "^\\S+\\.f(ast)?q\\.gz$",
- "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'"
}
},
- "required": ["sample", "fastq_1"]
+ "required": ["sample"]
}
}
diff --git a/bin/clustering.qmd b/bin/clustering.qmd
new file mode 100644
index 0000000..671471c
--- /dev/null
+++ b/bin/clustering.qmd
@@ -0,0 +1,177 @@
+---
+title: "nf-core/spatialvi"
+subtitle: "Dimensionality reduction and clustering"
+format:
+ nf-core-html: default
+jupyter: python3
+---
+
+```{python}
+#| tags: [parameters]
+#| echo: false
+input_sdata = "sdata_filtered.zarr" # Input: SpatialData file
+cluster_resolution = 1 # Resolution for Leiden clustering
+n_hvgs = 2000 # Number of HVGs to use for analyses
+artifact_dir = "artifacts" # Output directory
+output_adata = "adata_processed.h5ad" # Output: AnnData file
+output_sdata = "sdata_processed.zarr" # Output: SpatialData file
+```
+
+The data has already been filtered in the _quality controls_ reports and is
+saved in the SpatialData format:
+
+```{python}
+#| warning: false
+import spatialdata
+import os
+import scanpy as sc
+import numpy as np
+import pandas as pd
+from anndata import AnnData
+from umap import UMAP
+from matplotlib import pyplot as plt
+import seaborn as sns
+import leidenalg
+from IPython.display import display, Markdown
+```
+
+```{python}
+# Make sure we can use scanpy plots with the AnnData object exported from
+# `sdata.tables`. This code is taken from the early version of https://github.com/scverse/spatialdata-io/pull/102/
+# Once that PR is merged into spatialdata-io, we should instead use
+# `spatialdata_io.to_legacy_anndata(sdata)`.
+def to_legacy_anndata(sdata: spatialdata.SpatialData) -> AnnData:
+ adata = sdata.tables["table"]
+ for dataset_id in adata.uns["spatial"]:
+ adata.uns["spatial"][dataset_id]["images"] = {
+ "hires": np.array(sdata.images[f"{dataset_id}_hires_image"]).transpose([1, 2, 0]),
+ "lowres": np.array(sdata.images[f"{dataset_id}_lowres_image"]).transpose([1, 2, 0]),
+ }
+ adata.uns["spatial"][dataset_id]["scalefactors"] = {
+ "tissue_hires_scalef": spatialdata.transformations.get_transformation(
+ sdata.shapes[dataset_id], to_coordinate_system="downscaled_hires"
+ ).scale[0],
+ "tissue_lowres_scalef": spatialdata.transformations.get_transformation(
+ sdata.shapes[dataset_id], to_coordinate_system="downscaled_lowres"
+ ).scale[0],
+ "spot_diameter_fullres": sdata.shapes[dataset_id]["radius"][0] * 2,
+ }
+ return adata
+```
+
+```{python}
+sdata = spatialdata.read_zarr(input_sdata, ["images", "tables", "shapes"])
+adata = to_legacy_anndata(sdata)
+
+print("Content of the SpatialData table object:")
+print(adata)
+```
+
+# Normalization
+
+Before we can continue working on the data it needs to be normalized. We here
+use the built-in `normalize_total` method from [Scanpy](https://scanpy.readthedocs.io/en/stable/)
+followed by a log-transformation.
+
+```{python}
+sc.pp.normalize_total(adata, inplace=True)
+sc.pp.log1p(adata)
+```
+
+# Feature selection
+
+Not all features (genes, in this case) are informative, and selecting for a
+subset of the total features is commonly done prior to clustering. By selecting
+the most variable genes in a dataset we can capture those most important in
+regards to yielding a good separation of clusters.
+
+```{python}
+# layout-nrow: 1
+# Find top HVGs and print results
+sc.pp.highly_variable_genes(adata, flavor="seurat", n_top_genes=n_hvgs)
+var_genes_all = adata.var.highly_variable
+print("Extracted highly variable genes: %d"%sum(var_genes_all))
+
+# Plot the HVGs
+plt.rcParams["figure.figsize"] = (4.5, 4.5)
+sc.pl.highly_variable_genes(adata)
+```
+
+# Clustering
+
+To uncover the underlying structure of the transcriptional landscape, we perform
+manifold embedding and clustering based on transcriptional similarity. Principal
+Component Analysis (PCA) is applied to reduce dimensionality, and UMAP (Uniform
+Manifold Approximation and Projection) is used for visualization. The Leiden
+algorithm is employed for clustering with a given resolution.
+
+```{python}
+sc.pp.pca(adata)
+sc.pp.neighbors(adata)
+sc.tl.umap(adata)
+sc.tl.leiden(adata, key_added="clusters", resolution=cluster_resolution)
+Markdown(f"Resolution for Leiden clustering: `{cluster_resolution}`")
+```
+
+## All clusters
+
+We then generate UMAP plots to visualize the distribution of clusters:
+
+```{python}
+#| warning: false
+plt.rcParams["figure.figsize"] = (7, 7)
+sc.pl.umap(adata, color="clusters")
+```
+
+## Counts and genes
+
+We can also visualise the total counts and the genes with at least 1 count in
+the UMAP:
+
+```{python}
+# Make plots of UMAP of ST spots clusters
+plt.rcParams["figure.figsize"] = (3.5, 3.5)
+sc.pl.umap(adata, color=["total_counts", "n_genes_by_counts"])
+```
+
+## Individual clusters
+
+An additional visualisation is to show where the various spots are in each
+individual cluster while ignoring all other cluster:
+
+```{python}
+sc.tl.embedding_density(adata, basis="umap", groupby="clusters")
+sc.pl.embedding_density(adata, groupby="clusters", ncols=2)
+```
+
+# Spatial visualisation
+
+Next, we examine how total counts and the number of detected genes behave in
+spatial coordinates by overlaying the spots on the tissue image itself.
+
+```{python}
+#| layout-nrow: 2
+plt.rcParams["figure.figsize"] = (8, 8)
+sc.pl.spatial(adata, img_key="hires", color="total_counts", size=1.25)
+sc.pl.spatial(adata, img_key="hires", color="n_genes_by_counts", size=1.25)
+```
+
+To gain insights into tissue organization and potential inter-cellular
+communication, we visualize the spatial distribution of clusters on the image.
+Spots belonging to the same cluster in gene expression space often co-occur in
+spatial dimensions, providing valuable information about the spatial
+organization of cells.
+
+```{python}
+# TODO: Can the colour bar on this figure be fit to the figure?
+plt.rcParams["figure.figsize"] = (7, 7)
+sc.pl.spatial(adata, img_key="hires", color="clusters", size=1.25)
+```
+
+```{python}
+#| echo: false
+del sdata.tables["table"]
+sdata.tables["table"] = adata
+adata.write(os.path.join(artifact_dir, output_adata))
+sdata.write(os.path.join(artifact_dir, output_sdata))
+```
diff --git a/bin/quality_controls.qmd b/bin/quality_controls.qmd
new file mode 100644
index 0000000..6bfe9c3
--- /dev/null
+++ b/bin/quality_controls.qmd
@@ -0,0 +1,289 @@
+---
+title: "nf-core/spatialvi"
+subtitle: "Pre-processing and quality controls"
+format:
+ nf-core-html: default
+jupyter: python3
+---
+
+# Introduction
+
+Spatial Transcriptomics data analysis involves several steps, including quality
+controls (QC) and pre-processing, to ensure the reliability of downstream
+analyses. This is an essential step in spatial transcriptomics to identify and
+filter out spots and genes that may introduce noise and/or bias into the
+analysis.
+
+This report outlines the QC and pre-processing steps for Visium Spatial
+Transcriptomics data using the [AnnData format](https://anndata.readthedocs.io/en/latest/tutorials/notebooks/getting-started.html)
+and the [`scanpy` Python package](https://scanpy.readthedocs.io/en/stable/).
+The AnnData format is utilized to organize and store the Spatial Transcriptomics
+data. It includes information about counts, features, observations, and
+additional metadata. The AnnData format ensures compatibility with various
+analysis tools and facilitates seamless integration into existing workflows.
+The AnnData object is saved in the `Tables` element of a zarr [SpatialData object](https://spatialdata.scverse.org/en/latest/design_doc.html#table-table-of-annotations-for-regions).
+
+```{python}
+#| tags: [parameters]
+#| echo: false
+input_sdata = "sdata_raw.zarr" # Input: SpatialData file
+min_counts = 500 # Min counts per spot
+min_genes = 250 # Min genes per spot
+min_spots = 1 # Min spots per gene
+mito_threshold = 20 # Mitochondrial content threshold (%)
+ribo_threshold = 0 # Ribosomal content threshold (%)
+hb_threshold = 100 # content threshold (%)
+artifact_dir = "artifacts"
+output_adata = "adata_filtered.h5ad" # Output: AnnData file
+output_sdata = "sdata_filtered.zarr" # Output: SpatialData file
+```
+
+```{python}
+import pandas as pd
+import matplotlib.pyplot as plt
+import numpy as np
+import os
+import scanpy as sc
+import scipy
+import seaborn as sns
+import spatialdata
+from anndata import AnnData
+from IPython.display import display, Markdown
+from textwrap import dedent
+plt.rcParams["figure.figsize"] = (6, 6)
+```
+
+```{python}
+# Make sure we can use scanpy plots with the AnnData object exported from sdata.tables
+# This code is taken from the early version of https://github.com/scverse/spatialdata-io/pull/102/
+# Once the PR will be merged in spatialdata-io, we should use spatialdata_io.to_legacy_anndata(sdata).
+def to_legacy_anndata(sdata: spatialdata.SpatialData) -> AnnData:
+ adata = sdata.tables["table"]
+ for dataset_id in adata.uns["spatial"]:
+ adata.uns["spatial"][dataset_id]["images"] = {
+ "hires": np.array(sdata.images[f"{dataset_id}_hires_image"]).transpose([1, 2, 0]),
+ "lowres": np.array(sdata.images[f"{dataset_id}_lowres_image"]).transpose([1, 2, 0]),
+ }
+ adata.uns["spatial"][dataset_id]["scalefactors"] = {
+ "tissue_hires_scalef": spatialdata.transformations.get_transformation(
+ sdata.shapes[dataset_id], to_coordinate_system="downscaled_hires"
+ ).scale[0],
+ "tissue_lowres_scalef": spatialdata.transformations.get_transformation(
+ sdata.shapes[dataset_id], to_coordinate_system="downscaled_lowres"
+ ).scale[0],
+ "spot_diameter_fullres": sdata.shapes[dataset_id]["radius"][0] * 2,
+ }
+ return adata
+```
+
+```{python}
+# Read the data
+sdata = spatialdata.read_zarr(input_sdata, ["images", "tables", "shapes"])
+adata = to_legacy_anndata(sdata)
+
+# Convert X matrix from CSR to CSC dense matrix for output compatibility
+adata.X = scipy.sparse.csc_matrix(adata.X)
+
+# Store the raw data so that it can be used for analyses from scratch if desired
+adata.layers['raw'] = adata.X.copy()
+
+# Print the anndata object for inspection
+print("Content of the AnnData object:")
+print(adata)
+```
+
+# Quality controls
+
+There are several different quality metrics that are normally computed for
+spatial data. Common metrics include the number of genes with a least 1 count
+(`n_genes_by_counts`), counts per spot (`total_counts`) as well as the
+percentage of counts from mitochondrial, ribosomal and haemoglobin genes
+(`pct_counts_[mt/ribo/hb]`).
+
+```{python}
+# Calculate mitochondrial, ribosomal and haemoglobin percentages
+adata.var['mt'] = adata.var_names.str.startswith('MT-')
+adata.var['ribo'] = adata.var_names.str.contains(("^RP[LS]"))
+adata.var['hb'] = adata.var_names.str.contains(("^HB[AB]"))
+sc.pp.calculate_qc_metrics(adata, qc_vars=["mt", "ribo", "hb"],
+ inplace=True, log1p=False)
+
+# Save a copy of data as a restore-point if filtering results in 0 spots left
+adata_before_filtering = adata.copy()
+```
+
+## Violin plots
+
+The following violin plots show the distribution of the number of genes per
+counts and counts per spot, as well as the percentage of counts from
+mitochondrial, ribosomal and haemoglobin genes:
+
+```{python}
+#| layout-nrow: 2
+sc.pl.violin(adata, ['n_genes_by_counts', 'total_counts'],
+ multi_panel=True, jitter=0.4, rotation= 45)
+sc.pl.violin(adata, ['pct_counts_mt', 'pct_counts_ribo', 'pct_counts_hb'],
+ multi_panel=True, jitter=0.4, rotation= 45)
+```
+
+## Spatial distributions
+
+The same quality metrics can also be plotted on top of the tissue so that
+spatial patterns may be discerned:
+
+```{python}
+#| layout-nrow: 2
+sc.pl.spatial(adata, color = ["total_counts", "n_genes_by_counts"], size=1.25)
+sc.pl.spatial(adata, color = ["pct_counts_mt", "pct_counts_ribo", "pct_counts_hb"], size=1.25)
+```
+
+## Scatter plots
+
+It is also useful to compare some of these quality metrics against each other in
+scatter plots, such as mitochondrial versus ribosomal content and the total
+counts versus the number of genes:
+
+```{python}
+#| layout-ncol: 2
+sc.pl.scatter(adata, x='pct_counts_ribo', y='pct_counts_mt')
+sc.pl.scatter(adata, x='total_counts', y='n_genes_by_counts')
+```
+
+## Top expressed genes
+
+It can also be informative to see which genes are the most expressed in the
+dataset; the following figure shows the top 20 most expressed genes.
+
+```{python}
+sc.pl.highest_expr_genes(adata, n_top=20)
+```
+
+# Filtering
+
+## Non-tissue spots
+
+The following plot indicates which spots are outside of the tissue. These spots
+are uninformative and are thus removed.
+
+```{python}
+# Create a string observation "obs/in_tissue_str" with "In tissue" and "Outside tissue":
+adata.obs["in_tissue_str"] = ["In tissue" if x == 1 else "Outside tissue" for x in adata.obs["in_tissue"]]
+
+# Plot spots inside tissue
+sc.pl.spatial(adata, color=["in_tissue_str"], title="Spots in tissue", size=1.25)
+del adata.obs["in_tissue_str"]
+
+# Remove spots outside tissue and print results
+n_spots = adata.shape[0]
+adata = adata[adata.obs["in_tissue"] == 1]
+n_spots_in_tissue = adata.shape[0]
+Markdown(f"""A total of `{n_spots_in_tissue}` spots are situated inside the
+tissue, out of `{n_spots}` spots in total.""")
+```
+
+## Counts, genes and spots
+
+We filter spots based on minimum counts and genes, but also filter genes based
+on minimum spots; exactly what filtering criteria is reasonable is up to you and
+your knowledge of the specific tissue at hand.
+
+```{python}
+#| warning: false
+# Filter spots based on counts
+n_spots = adata.shape[0]
+n_genes = adata.shape[1]
+sc.pp.filter_cells(adata, min_counts=min_counts)
+n_spots_filtered_min_counts = adata.shape[0]
+
+# Filter spots based on genes
+sc.pp.filter_cells(adata, min_genes=min_genes)
+n_spots_filtered_min_genes = adata.shape[0]
+
+# Filter genes based on spots
+sc.pp.filter_genes(adata, min_cells=min_spots)
+n_genes_filtered_min_spots = adata.shape[1]
+
+# Print results
+Markdown(f"""
+- Removed `{n_spots - n_spots_filtered_min_counts}` spots with less than `{min_counts}` total counts.
+- Removed `{n_spots_filtered_min_counts - n_spots_filtered_min_genes}` spots with less than `{min_genes}` genes expressed.
+- Removed `{n_genes - n_genes_filtered_min_spots}` genes expressed in less than `{min_spots}` spots.
+""")
+```
+
+## Mito, ribo and Hb
+
+We can also filter for mitochondrial, ribosomal and haemoglobin content of the
+cells; exactly which filtering threshold should be used are, again, up to you
+and your biological knowledge of the sample at hand. Please note that neither
+ribosomal nor haemoglobin content is filtered by default.
+
+```{python}
+# Filter spots
+adata = adata[adata.obs["pct_counts_mt"] <= mito_threshold]
+n_spots_filtered_mito = adata.shape[0]
+adata = adata[adata.obs["pct_counts_ribo"] >= ribo_threshold]
+n_spots_filtered_ribo = adata.shape[0]
+adata = adata[adata.obs["pct_counts_hb"] <= hb_threshold]
+n_spots_filtered_hb = adata.shape[0]
+
+# Print results
+Markdown(f"""
+- Removed `{adata.shape[0] - n_spots_filtered_mito}` spots with more than `{mito_threshold}%` mitochondrial content.
+- Removed `{n_spots_filtered_mito - n_spots_filtered_ribo}` spots with less than `{ribo_threshold}%` ribosomal content.
+- Removed `{n_spots_filtered_ribo - n_spots_filtered_hb}` spots with more than `{hb_threshold}%` haemoglobin content.
+""")
+```
+
+```{python}
+#| echo: false
+# Restore non-filtered data if filtering results in 0 spots left
+if (adata.shape[0] == 0 or adata.shape[1] == 0):
+ adata = adata_before_filtering
+ display(
+ Markdown(dedent(
+ """
+ ::: {.callout-important .content-visible when-format="html"}
+ ## Issue: no spots remain after filtering
+
+ An anomaly has been detected in the data: following the filtering
+ process, all spots have been excluded. It is imperative to assess
+ the data quality and carefully review the values of the filtering
+ parameters.
+
+ To ensure the smooth progression of downstream analysis, the
+ exported AnnData will, for the time being, remain unfiltered. This
+ precautionary measure is implemented to facilitate continued
+ analysis while investigating and resolving the cause of the
+ unexpected removal of all spots during filtering.
+ :::
+ """
+ ))
+ )
+```
+
+## Filtering results
+
+```{python}
+# Print filtering results
+Markdown(f"""
+The final results of all the filtering is as follows:
+
+- A total of `{adata.shape[0]}` spots out of `{n_spots}` remain after filtering.
+- A total of `{adata.shape[1]}` genes out of `{n_genes}` remain after filtering.
+""")
+```
+
+```{python}
+#| layout-nrow: 2
+sc.pl.violin(adata, ['n_genes_by_counts', 'total_counts'],
+ multi_panel=True, jitter=0.4, rotation= 45)
+sc.pl.violin(adata, ['pct_counts_mt', 'pct_counts_ribo', 'pct_counts_hb'],
+ multi_panel=True, jitter=0.4, rotation= 45)
+```
+
+```{python}
+del sdata.tables["table"]
+sdata.tables["table"] = adata
+sdata.write(os.path.join(artifact_dir, output_sdata))
+```
diff --git a/bin/read_data.py b/bin/read_data.py
new file mode 100755
index 0000000..c0ed0f4
--- /dev/null
+++ b/bin/read_data.py
@@ -0,0 +1,36 @@
+#!/usr/bin/env python
+
+# Load packages
+import argparse
+
+import spatialdata_io
+
+if __name__ == "__main__":
+ # Parse command-line arguments
+ parser = argparse.ArgumentParser(
+ description="Load spatial transcriptomics data from MTX matrices and aligned images."
+ )
+ parser.add_argument(
+ "--SRCountDir",
+ metavar="SRCountDir",
+ type=str,
+ default=None,
+ help="Input directory with Spaceranger data.",
+ )
+ parser.add_argument(
+ "--output_sdata",
+ metavar="output_sdata",
+ type=str,
+ default=None,
+ help="Output spatialdata zarr path.",
+ )
+
+ args = parser.parse_args()
+
+ # Read Visium data
+ spatialdata = spatialdata_io.visium(
+ args.SRCountDir, counts_file="raw_feature_bc_matrix.h5", dataset_id="visium"
+ )
+
+ # Write raw spatialdata to file
+ spatialdata.write(args.output_sdata, overwrite=True)
diff --git a/bin/spatially_variable_genes.qmd b/bin/spatially_variable_genes.qmd
new file mode 100644
index 0000000..6d52c92
--- /dev/null
+++ b/bin/spatially_variable_genes.qmd
@@ -0,0 +1,142 @@
+---
+title: "nf-core/spatialvi"
+subtitle: "Neighborhood enrichment analysis and Spatially variable genes"
+format:
+ nf-core-html: default
+jupyter: python3
+---
+
+```{python}
+#| tags: [parameters]
+#| echo: false
+input_sdata = "sdata_processed.zarr" # Input: SpatialData file
+svg_autocorr_method = "moran" # Parameter: SVG autocorrelation method
+n_top_svgs = 14 # Number of spatially variable genes to plot
+artifact_dir = "artifacts" # Output directory
+output_csv = "spatially_variable_genes.csv" # Output: gene list
+output_adata = "adata_spatially_variable_genes.h5ad" # Output: AnnData file
+output_sdata = "sdata.zarr" # Output: SpatialData file
+```
+
+```{python}
+import numpy as np
+import os
+import pandas as pd
+import scanpy as sc
+import squidpy as sq
+import spatialdata
+from anndata import AnnData
+from matplotlib import pyplot as plt
+```
+
+```{python}
+# Make sure we can use scanpy plots with the AnnData object exported from sdata.tables
+# This code is taken from the early version of https://github.com/scverse/spatialdata-io/pull/102/
+# Once the PR will be merged in spatialdata-io, we should use spatialdata_io.to_legacy_anndata(sdata).
+def to_legacy_anndata(sdata: spatialdata.SpatialData) -> AnnData:
+ adata = sdata.tables["table"]
+ for dataset_id in adata.uns["spatial"]:
+ adata.uns["spatial"][dataset_id]["images"] = {
+ "hires": np.array(sdata.images[f"{dataset_id}_hires_image"]).transpose([1, 2, 0]),
+ "lowres": np.array(sdata.images[f"{dataset_id}_lowres_image"]).transpose([1, 2, 0]),
+ }
+ adata.uns["spatial"][dataset_id]["scalefactors"] = {
+ "tissue_hires_scalef": spatialdata.transformations.get_transformation(
+ sdata.shapes[dataset_id], to_coordinate_system="downscaled_hires"
+ ).scale[0],
+ "tissue_lowres_scalef": spatialdata.transformations.get_transformation(
+ sdata.shapes[dataset_id], to_coordinate_system="downscaled_lowres"
+ ).scale[0],
+ "spot_diameter_fullres": sdata.shapes[dataset_id]["radius"][0] * 2,
+ }
+ return adata
+```
+
+```{python}
+# Read data
+sdata = spatialdata.read_zarr(input_sdata, ["images", "tables", "shapes"])
+
+adata = to_legacy_anndata(sdata)
+print("Content of the AnnData object:")
+print(adata)
+
+# Suppress scanpy-specific warnings
+sc.settings.verbosity = 0
+```
+
+# Differential gene expression
+
+Before we look for spatially variable genes we first find differentially
+expressed genes (DEG) across the different clusters found in the data. We can
+visualize the top DEGs in a heatmap:
+
+```{python}
+#| warning: false
+sc.tl.rank_genes_groups(adata, 'clusters', method='t-test')
+sc.pl.rank_genes_groups_heatmap(adata, n_genes=5, groupby="clusters")
+```
+
+A different but similar visualization of the DEGs is the dot plot, where we can
+also include the gene names:
+
+```{python}
+#| warning: false
+sc.pl.rank_genes_groups_dotplot(adata, n_genes=5, groupby="clusters")
+```
+
+::: {.callout-note}
+Please note that you may need to scroll sidewise in these figures, as their
+height and width depends on the number of clusters as well as the number and
+intersection of the DEGs that are being plotted.
+:::
+
+# Neighborhood enrichment analysis
+
+We can perform a neighborhood enrichment analysis to find out which
+genes are enriched in the neighborhood of each cluster:
+
+```{python}
+#| warning: false
+sq.gr.spatial_neighbors(adata, coord_type="generic")
+sq.gr.nhood_enrichment(adata, cluster_key="clusters")
+sq.pl.nhood_enrichment(adata, cluster_key="clusters", method="ward", vmin=-100, vmax=100)
+```
+
+We visualize the interaction matrix between the different clusters:
+
+```{python}
+#| warning: false
+sq.gr.interaction_matrix(adata, cluster_key="clusters")
+sq.pl.interaction_matrix(adata, cluster_key="clusters", method="ward")
+```
+
+# Spatially variable genes with spatial autocorrelation statistics
+
+Spatial transcriptomics data can give insight into how genes are expressed in
+different areas in a tissue, allowing identification of spatial gene expression
+patterns. Here we use [Moran's I](https://en.wikipedia.org/wiki/Moran%27s_I) autocorrelation score to identify such patterns.
+
+```{python}
+adata.var_names_make_unique()
+sq.gr.spatial_autocorr(adata, mode=svg_autocorr_method)
+if svg_autocorr_method == "moran":
+ svg_autocorr_method_string = "moranI"
+else:
+ svg_autocorr_method_string = "gearyC"
+adata.uns[svg_autocorr_method_string].head(n_top_svgs)
+```
+
+```{python}
+#| echo: false
+# Save the spatially variable genes to a CSV file:
+adata.uns[svg_autocorr_method_string].to_csv(os.path.join(artifact_dir, output_csv))
+```
+
+```{python}
+#| echo: false
+#| info: false
+adata.write(output_adata)
+del sdata.tables["table"]
+sdata.tables["table"] = adata
+sdata.write("./" + output_sdata)
+```
diff --git a/conf/base.config b/conf/base.config
index e3cd6a0..804d57a 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -10,7 +10,7 @@
process {
- // TODO nf-core: Check the defaults for all processes
+ // Default process resource requirements
cpus = { check_max( 1 * task.attempt, 'cpus' ) }
memory = { check_max( 6.GB * task.attempt, 'memory' ) }
time = { check_max( 4.h * task.attempt, 'time' ) }
@@ -20,12 +20,6 @@ process {
maxErrors = '-1'
// Process-specific resource requirements
- // NOTE - Please try and re-use the labels below as much as possible.
- // These labels are used and recognised by default in DSL2 files hosted on nf-core/modules.
- // If possible, it would be nice to keep the same label naming convention when
- // adding in your local modules too.
- // TODO nf-core: Customise requirements for specific processes.
- // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors
withLabel:process_single {
cpus = { check_max( 1 , 'cpus' ) }
memory = { check_max( 6.GB * task.attempt, 'memory' ) }
@@ -52,6 +46,11 @@ process {
withLabel:process_high_memory {
memory = { check_max( 200.GB * task.attempt, 'memory' ) }
}
+ withLabel:process_spaceranger {
+ cpus = { check_max( 8 * task.attempt, 'cpus' ) }
+ memory = { check_max( 64.GB * task.attempt, 'memory' ) }
+ time = { check_max( 20.h * task.attempt, 'time' ) }
+ }
withLabel:error_ignore {
errorStrategy = 'ignore'
}
diff --git a/conf/igenomes.config b/conf/igenomes.config
deleted file mode 100644
index 3f11437..0000000
--- a/conf/igenomes.config
+++ /dev/null
@@ -1,440 +0,0 @@
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- Nextflow config file for iGenomes paths
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- Defines reference genomes using iGenome paths.
- Can be used by any config that customises the base path using:
- $params.igenomes_base / --igenomes_base
-----------------------------------------------------------------------------------------
-*/
-
-params {
- // illumina iGenomes reference file paths
- genomes {
- 'GRCh37' {
- fasta = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/WholeGenomeFasta/genome.fa"
- bwa = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BWAIndex/version0.6.0/"
- bowtie2 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/Bowtie2Index/"
- star = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/STARIndex/"
- bismark = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BismarkIndex/"
- gtf = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.gtf"
- bed12 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.bed"
- readme = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/README.txt"
- mito_name = "MT"
- macs_gsize = "2.7e9"
- blacklist = "${projectDir}/assets/blacklists/GRCh37-blacklist.bed"
- }
- 'GRCh38' {
- fasta = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/WholeGenomeFasta/genome.fa"
- bwa = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BWAIndex/version0.6.0/"
- bowtie2 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/Bowtie2Index/"
- star = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/STARIndex/"
- bismark = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BismarkIndex/"
- gtf = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.gtf"
- bed12 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.bed"
- mito_name = "chrM"
- macs_gsize = "2.7e9"
- blacklist = "${projectDir}/assets/blacklists/hg38-blacklist.bed"
- }
- 'CHM13' {
- fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/WholeGenomeFasta/genome.fa"
- bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/BWAIndex/"
- bwamem2 = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/BWAmem2Index/"
- gtf = "${params.igenomes_base}/Homo_sapiens/NCBI/CHM13/Annotation/Genes/genes.gtf"
- gff = "ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/009/914/755/GCF_009914755.1_T2T-CHM13v2.0/GCF_009914755.1_T2T-CHM13v2.0_genomic.gff.gz"
- mito_name = "chrM"
- }
- 'GRCm38' {
- fasta = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa"
- bwa = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BWAIndex/version0.6.0/"
- bowtie2 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/Bowtie2Index/"
- star = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/STARIndex/"
- bismark = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BismarkIndex/"
- gtf = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.gtf"
- bed12 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.bed"
- readme = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/README.txt"
- mito_name = "MT"
- macs_gsize = "1.87e9"
- blacklist = "${projectDir}/assets/blacklists/GRCm38-blacklist.bed"
- }
- 'TAIR10' {
- fasta = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/WholeGenomeFasta/genome.fa"
- bwa = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BWAIndex/version0.6.0/"
- bowtie2 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/Bowtie2Index/"
- star = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/STARIndex/"
- bismark = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BismarkIndex/"
- gtf = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.gtf"
- bed12 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.bed"
- readme = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/README.txt"
- mito_name = "Mt"
- }
- 'EB2' {
- fasta = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/WholeGenomeFasta/genome.fa"
- bwa = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/BWAIndex/version0.6.0/"
- bowtie2 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/Bowtie2Index/"
- star = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/STARIndex/"
- bismark = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/BismarkIndex/"
- gtf = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.gtf"
- bed12 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.bed"
- readme = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/README.txt"
- }
- 'UMD3.1' {
- fasta = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/WholeGenomeFasta/genome.fa"
- bwa = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BWAIndex/version0.6.0/"
- bowtie2 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/Bowtie2Index/"
- star = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/STARIndex/"
- bismark = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BismarkIndex/"
- gtf = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.gtf"
- bed12 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.bed"
- readme = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/README.txt"
- mito_name = "MT"
- }
- 'WBcel235' {
- fasta = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/WholeGenomeFasta/genome.fa"
- bwa = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BWAIndex/version0.6.0/"
- bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/Bowtie2Index/"
- star = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/STARIndex/"
- bismark = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BismarkIndex/"
- gtf = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.gtf"
- bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.bed"
- mito_name = "MtDNA"
- macs_gsize = "9e7"
- }
- 'CanFam3.1' {
- fasta = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/WholeGenomeFasta/genome.fa"
- bwa = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BWAIndex/version0.6.0/"
- bowtie2 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/Bowtie2Index/"
- star = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/STARIndex/"
- bismark = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BismarkIndex/"
- gtf = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.gtf"
- bed12 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.bed"
- readme = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/README.txt"
- mito_name = "MT"
- }
- 'GRCz10' {
- fasta = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/WholeGenomeFasta/genome.fa"
- bwa = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BWAIndex/version0.6.0/"
- bowtie2 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/Bowtie2Index/"
- star = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/STARIndex/"
- bismark = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BismarkIndex/"
- gtf = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.gtf"
- bed12 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.bed"
- mito_name = "MT"
- }
- 'BDGP6' {
- fasta = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/WholeGenomeFasta/genome.fa"
- bwa = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BWAIndex/version0.6.0/"
- bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/Bowtie2Index/"
- star = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/STARIndex/"
- bismark = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BismarkIndex/"
- gtf = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.gtf"
- bed12 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.bed"
- mito_name = "M"
- macs_gsize = "1.2e8"
- }
- 'EquCab2' {
- fasta = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/WholeGenomeFasta/genome.fa"
- bwa = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BWAIndex/version0.6.0/"
- bowtie2 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/Bowtie2Index/"
- star = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/STARIndex/"
- bismark = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BismarkIndex/"
- gtf = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.gtf"
- bed12 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.bed"
- readme = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/README.txt"
- mito_name = "MT"
- }
- 'EB1' {
- fasta = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/WholeGenomeFasta/genome.fa"
- bwa = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BWAIndex/version0.6.0/"
- bowtie2 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/Bowtie2Index/"
- star = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/STARIndex/"
- bismark = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BismarkIndex/"
- gtf = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.gtf"
- bed12 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.bed"
- readme = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/README.txt"
- }
- 'Galgal4' {
- fasta = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/WholeGenomeFasta/genome.fa"
- bwa = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BWAIndex/version0.6.0/"
- bowtie2 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/Bowtie2Index/"
- star = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/STARIndex/"
- bismark = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BismarkIndex/"
- gtf = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.gtf"
- bed12 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.bed"
- mito_name = "MT"
- }
- 'Gm01' {
- fasta = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/WholeGenomeFasta/genome.fa"
- bwa = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/BWAIndex/version0.6.0/"
- bowtie2 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/Bowtie2Index/"
- star = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/STARIndex/"
- bismark = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/BismarkIndex/"
- gtf = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.gtf"
- bed12 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.bed"
- readme = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/README.txt"
- }
- 'Mmul_1' {
- fasta = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/WholeGenomeFasta/genome.fa"
- bwa = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BWAIndex/version0.6.0/"
- bowtie2 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/Bowtie2Index/"
- star = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/STARIndex/"
- bismark = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BismarkIndex/"
- gtf = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.gtf"
- bed12 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.bed"
- readme = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/README.txt"
- mito_name = "MT"
- }
- 'IRGSP-1.0' {
- fasta = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/WholeGenomeFasta/genome.fa"
- bwa = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BWAIndex/version0.6.0/"
- bowtie2 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/Bowtie2Index/"
- star = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/STARIndex/"
- bismark = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BismarkIndex/"
- gtf = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.gtf"
- bed12 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.bed"
- mito_name = "Mt"
- }
- 'CHIMP2.1.4' {
- fasta = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/WholeGenomeFasta/genome.fa"
- bwa = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BWAIndex/version0.6.0/"
- bowtie2 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/Bowtie2Index/"
- star = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/STARIndex/"
- bismark = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BismarkIndex/"
- gtf = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.gtf"
- bed12 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.bed"
- readme = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/README.txt"
- mito_name = "MT"
- }
- 'Rnor_5.0' {
- fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/WholeGenomeFasta/genome.fa"
- bwa = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/BWAIndex/version0.6.0/"
- bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/Bowtie2Index/"
- star = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/STARIndex/"
- bismark = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/BismarkIndex/"
- gtf = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Annotation/Genes/genes.gtf"
- bed12 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Annotation/Genes/genes.bed"
- mito_name = "MT"
- }
- 'Rnor_6.0' {
- fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/WholeGenomeFasta/genome.fa"
- bwa = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BWAIndex/version0.6.0/"
- bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/Bowtie2Index/"
- star = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/STARIndex/"
- bismark = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BismarkIndex/"
- gtf = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.gtf"
- bed12 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.bed"
- mito_name = "MT"
- }
- 'R64-1-1' {
- fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/WholeGenomeFasta/genome.fa"
- bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BWAIndex/version0.6.0/"
- bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/Bowtie2Index/"
- star = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/STARIndex/"
- bismark = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BismarkIndex/"
- gtf = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.gtf"
- bed12 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.bed"
- mito_name = "MT"
- macs_gsize = "1.2e7"
- }
- 'EF2' {
- fasta = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/WholeGenomeFasta/genome.fa"
- bwa = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BWAIndex/version0.6.0/"
- bowtie2 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/Bowtie2Index/"
- star = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/STARIndex/"
- bismark = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BismarkIndex/"
- gtf = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.gtf"
- bed12 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.bed"
- readme = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/README.txt"
- mito_name = "MT"
- macs_gsize = "1.21e7"
- }
- 'Sbi1' {
- fasta = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/WholeGenomeFasta/genome.fa"
- bwa = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BWAIndex/version0.6.0/"
- bowtie2 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/Bowtie2Index/"
- star = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/STARIndex/"
- bismark = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BismarkIndex/"
- gtf = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.gtf"
- bed12 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.bed"
- readme = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/README.txt"
- }
- 'Sscrofa10.2' {
- fasta = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/WholeGenomeFasta/genome.fa"
- bwa = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BWAIndex/version0.6.0/"
- bowtie2 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/Bowtie2Index/"
- star = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/STARIndex/"
- bismark = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BismarkIndex/"
- gtf = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.gtf"
- bed12 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.bed"
- readme = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/README.txt"
- mito_name = "MT"
- }
- 'AGPv3' {
- fasta = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/WholeGenomeFasta/genome.fa"
- bwa = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BWAIndex/version0.6.0/"
- bowtie2 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/Bowtie2Index/"
- star = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/STARIndex/"
- bismark = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BismarkIndex/"
- gtf = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.gtf"
- bed12 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.bed"
- mito_name = "Mt"
- }
- 'hg38' {
- fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/WholeGenomeFasta/genome.fa"
- bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BWAIndex/version0.6.0/"
- bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/Bowtie2Index/"
- star = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/STARIndex/"
- bismark = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BismarkIndex/"
- gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.gtf"
- bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.bed"
- mito_name = "chrM"
- macs_gsize = "2.7e9"
- blacklist = "${projectDir}/assets/blacklists/hg38-blacklist.bed"
- }
- 'hg19' {
- fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa"
- bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BWAIndex/version0.6.0/"
- bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/Bowtie2Index/"
- star = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/STARIndex/"
- bismark = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BismarkIndex/"
- gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.gtf"
- bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.bed"
- readme = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/README.txt"
- mito_name = "chrM"
- macs_gsize = "2.7e9"
- blacklist = "${projectDir}/assets/blacklists/hg19-blacklist.bed"
- }
- 'mm10' {
- fasta = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/WholeGenomeFasta/genome.fa"
- bwa = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BWAIndex/version0.6.0/"
- bowtie2 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/Bowtie2Index/"
- star = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/STARIndex/"
- bismark = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BismarkIndex/"
- gtf = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.gtf"
- bed12 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.bed"
- readme = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/README.txt"
- mito_name = "chrM"
- macs_gsize = "1.87e9"
- blacklist = "${projectDir}/assets/blacklists/mm10-blacklist.bed"
- }
- 'bosTau8' {
- fasta = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/WholeGenomeFasta/genome.fa"
- bwa = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/BWAIndex/version0.6.0/"
- bowtie2 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/Bowtie2Index/"
- star = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/STARIndex/"
- bismark = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/BismarkIndex/"
- gtf = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Annotation/Genes/genes.gtf"
- bed12 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Annotation/Genes/genes.bed"
- mito_name = "chrM"
- }
- 'ce10' {
- fasta = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/WholeGenomeFasta/genome.fa"
- bwa = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/BWAIndex/version0.6.0/"
- bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/Bowtie2Index/"
- star = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/STARIndex/"
- bismark = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/BismarkIndex/"
- gtf = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/Genes/genes.gtf"
- bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/Genes/genes.bed"
- readme = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/README.txt"
- mito_name = "chrM"
- macs_gsize = "9e7"
- }
- 'canFam3' {
- fasta = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/WholeGenomeFasta/genome.fa"
- bwa = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BWAIndex/version0.6.0/"
- bowtie2 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/Bowtie2Index/"
- star = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/STARIndex/"
- bismark = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BismarkIndex/"
- gtf = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/Genes/genes.gtf"
- bed12 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/Genes/genes.bed"
- readme = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/README.txt"
- mito_name = "chrM"
- }
- 'danRer10' {
- fasta = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/WholeGenomeFasta/genome.fa"
- bwa = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/BWAIndex/version0.6.0/"
- bowtie2 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/Bowtie2Index/"
- star = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/STARIndex/"
- bismark = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/BismarkIndex/"
- gtf = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Annotation/Genes/genes.gtf"
- bed12 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Annotation/Genes/genes.bed"
- mito_name = "chrM"
- macs_gsize = "1.37e9"
- }
- 'dm6' {
- fasta = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/WholeGenomeFasta/genome.fa"
- bwa = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/BWAIndex/version0.6.0/"
- bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/Bowtie2Index/"
- star = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/STARIndex/"
- bismark = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/BismarkIndex/"
- gtf = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Annotation/Genes/genes.gtf"
- bed12 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Annotation/Genes/genes.bed"
- mito_name = "chrM"
- macs_gsize = "1.2e8"
- }
- 'equCab2' {
- fasta = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/WholeGenomeFasta/genome.fa"
- bwa = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/BWAIndex/version0.6.0/"
- bowtie2 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/Bowtie2Index/"
- star = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/STARIndex/"
- bismark = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/BismarkIndex/"
- gtf = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/Genes/genes.gtf"
- bed12 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/Genes/genes.bed"
- readme = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/README.txt"
- mito_name = "chrM"
- }
- 'galGal4' {
- fasta = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/WholeGenomeFasta/genome.fa"
- bwa = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/BWAIndex/version0.6.0/"
- bowtie2 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/Bowtie2Index/"
- star = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/STARIndex/"
- bismark = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/BismarkIndex/"
- gtf = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/Genes/genes.gtf"
- bed12 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/Genes/genes.bed"
- readme = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/README.txt"
- mito_name = "chrM"
- }
- 'panTro4' {
- fasta = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/WholeGenomeFasta/genome.fa"
- bwa = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/BWAIndex/version0.6.0/"
- bowtie2 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/Bowtie2Index/"
- star = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/STARIndex/"
- bismark = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/BismarkIndex/"
- gtf = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/Genes/genes.gtf"
- bed12 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/Genes/genes.bed"
- readme = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/README.txt"
- mito_name = "chrM"
- }
- 'rn6' {
- fasta = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/WholeGenomeFasta/genome.fa"
- bwa = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/BWAIndex/version0.6.0/"
- bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/Bowtie2Index/"
- star = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/STARIndex/"
- bismark = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/BismarkIndex/"
- gtf = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Annotation/Genes/genes.gtf"
- bed12 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Annotation/Genes/genes.bed"
- mito_name = "chrM"
- }
- 'sacCer3' {
- fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/WholeGenomeFasta/genome.fa"
- bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/BWAIndex/version0.6.0/"
- bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/Bowtie2Index/"
- star = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/STARIndex/"
- bismark = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/BismarkIndex/"
- readme = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Annotation/README.txt"
- mito_name = "chrM"
- macs_gsize = "1.2e7"
- }
- 'susScr3' {
- fasta = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/WholeGenomeFasta/genome.fa"
- bwa = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/BWAIndex/version0.6.0/"
- bowtie2 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/Bowtie2Index/"
- star = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/STARIndex/"
- bismark = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/BismarkIndex/"
- gtf = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/Genes/genes.gtf"
- bed12 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/Genes/genes.bed"
- readme = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/README.txt"
- mito_name = "chrM"
- }
- }
-}
diff --git a/conf/modules.config b/conf/modules.config
index d203d2b..6740b30 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -18,17 +18,76 @@ process {
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
+ // Optionally save extracted Space Ranger reference archive
+ withName: 'SPACERANGER_UNTAR_REFERENCE' {
+ publishDir = [
+ enabled: params.spaceranger_save_reference,
+ path: { "${params.outdir}/reference" },
+ mode: params.publish_dir_mode,
+ saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+ ]
+ }
+
+ // Store sample-specific results in per-sample subdirectories
+ withName: 'UNTAR_SPACERANGER_INPUT|UNTAR_DOWNSTREAM_INPUT' {
+ publishDir = [
+ enabled: params.save_untar_output,
+ path: { "${params.outdir}/${meta.id}/data/untar" },
+ mode: params.publish_dir_mode,
+ saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+ ]
+ }
+
withName: FASTQC {
- ext.args = '--quiet'
+ publishDir = [
+ path: { "${params.outdir}/${meta.id}/fastqc" },
+ mode: params.publish_dir_mode,
+ saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+ ]
}
- withName: 'MULTIQC' {
- ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' }
+ withName: SPACERANGER_COUNT {
+ ext.args = '--create-bam false'
publishDir = [
- path: { "${params.outdir}/multiqc" },
+ path: { "${params.outdir}/${meta.id}/spaceranger" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}
+ withName: 'READ_DATA|QUALITY_CONTROLS|CLUSTERING|SPATIALLY_VARIABLE_GENES' {
+ ext.prefix = { "${notebook.baseName}" }
+ publishDir = [
+ [
+ path: { "${params.outdir}/${meta.id}/reports" },
+ mode: params.publish_dir_mode,
+ pattern: "*{.html,.qmd,_extensions}"
+ ],
+ [
+ path: { "${params.outdir}/${meta.id}/reports" },
+ mode: params.publish_dir_mode,
+ pattern: "params.yml",
+ saveAs: { "${notebook.baseName}.yml" }
+ ],
+ [
+ path: { "${params.outdir}/${meta.id}/data" },
+ mode: params.publish_dir_mode,
+ pattern: "artifacts/sdata_processed.zarr",
+ saveAs: { "sdata_processed.zarr" }
+ ],
+ [
+ path: { "${params.outdir}/${meta.id}/data" },
+ mode: params.publish_dir_mode,
+ pattern: "artifacts/adata_processed.h5ad",
+ saveAs: { "adata_processed.h5ad" }
+ ],
+ [
+ path: { "${params.outdir}/${meta.id}/data" },
+ mode: params.publish_dir_mode,
+ pattern: "artifacts/spatially_variable_genes.csv",
+ saveAs: { "spatially_variable_genes.csv" }
+ ]
+ ]
+ }
+
}
diff --git a/conf/test.config b/conf/test.config
index 0549c99..ffebede 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -12,18 +12,20 @@
params {
config_profile_name = 'Test profile'
- config_profile_description = 'Minimal test dataset to check pipeline function'
+ config_profile_description = 'Test pipeline functionality, including Space Ranger v2'
// Limit resources so that this can run on GitHub Actions
max_cpus = 2
- max_memory = '6.GB'
- max_time = '6.h'
+ max_memory = '3.GB'
+ max_time = '2.h'
- // Input data
- // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets
- // TODO nf-core: Give any required params for the test so that command line flags are not needed
- input = params.pipelines_testdata_base_path + 'viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv'
+ // Input and output
+ input = "https://raw.githubusercontent.com/nf-core/test-datasets/spatialvi/testdata/human-brain-cancer-11-mm-capture-area-ffpe-2-standard_v2_ffpe_cytassist/samplesheet_spaceranger.csv"
+ spaceranger_probeset = "https://raw.githubusercontent.com/nf-core/test-datasets/spatialvi/testdata/human-brain-cancer-11-mm-capture-area-ffpe-2-standard_v2_ffpe_cytassist/outs/probe_set.csv"
+ spaceranger_reference = "https://raw.githubusercontent.com/nf-core/test-datasets/spatialvi/testdata/homo_sapiens_chr22_reference.tar.gz"
- // Genome references
- genome = 'R64-1-1'
+ // Parameters
+ qc_min_counts = 5
+ qc_min_genes = 3
+ outdir = 'results'
}
diff --git a/conf/test_downstream.config b/conf/test_downstream.config
new file mode 100644
index 0000000..263b0b5
--- /dev/null
+++ b/conf/test_downstream.config
@@ -0,0 +1,31 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ Nextflow config file for running minimal tests
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ Defines input files and everything required to run a fast and simple pipeline test.
+
+ Use as follows:
+ nextflow run nf-core/spatialvi -profile test_downstream, --outdir
+
+----------------------------------------------------------------------------------------
+*/
+
+params {
+ config_profile_name = 'Downstream test profile'
+ config_profile_description = 'Test pipeline for downstream (post-Space Ranger) functionality'
+
+ // Limit resources so that this can run on GitHub Actions
+ max_cpus = 2
+ max_memory = '3.GB'
+ max_time = '2.h'
+
+ // Input and output
+ input = 'https://raw.githubusercontent.com/nf-core/test-datasets/spatialvi/testdata/human-brain-cancer-11-mm-capture-area-ffpe-2-standard_v2_ffpe_cytassist/samplesheet_downstream.csv'
+ spaceranger_probeset = "https://raw.githubusercontent.com/nf-core/test-datasets/spatialvi/testdata/human-brain-cancer-11-mm-capture-area-ffpe-2-standard_v2_ffpe_cytassist/outs/probe_set.csv"
+ spaceranger_reference = "https://raw.githubusercontent.com/nf-core/test-datasets/spatialvi/testdata/homo_sapiens_chr22_reference.tar.gz"
+
+ // Parameters
+ qc_min_counts = 5
+ qc_min_genes = 3
+ outdir = 'results'
+}
diff --git a/conf/test_full.config b/conf/test_full.config
index d2c9c61..770a747 100644
--- a/conf/test_full.config
+++ b/conf/test_full.config
@@ -15,10 +15,5 @@ params {
config_profile_description = 'Full test dataset to check pipeline function'
// Input data for full size test
- // TODO nf-core: Specify the paths to your full test data ( on nf-core/test-datasets or directly in repositories, e.g. SRA)
- // TODO nf-core: Give any required params for the test so that command line flags are not needed
- input = params.pipelines_testdata_base_path + 'viralrecon/samplesheet/samplesheet_full_illumina_amplicon.csv'
-
- // Genome references
- genome = 'R64-1-1'
+ input = 'https://raw.githubusercontent.com/nf-core/test-datasets/spatialvi/testdata/test-dataset/samplesheet.csv'
}
diff --git a/conf/test_spaceranger_v1.config b/conf/test_spaceranger_v1.config
new file mode 100644
index 0000000..5b36146
--- /dev/null
+++ b/conf/test_spaceranger_v1.config
@@ -0,0 +1,31 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ Nextflow config file for running minimal tests
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ Defines input files and everything required to run a fast and simple pipeline test.
+
+ Use as follows:
+ nextflow run nf-core/spatialvi -profile test_spaceranger_v1, --outdir
+
+----------------------------------------------------------------------------------------
+*/
+
+params {
+ config_profile_name = 'Space Ranger v1 test profile'
+ config_profile_description = 'Test pipeline functionality, including Space Ranger v1'
+
+ // Limit resources so that this can run on GitHub Actions
+ max_cpus = 2
+ max_memory = '3.GB'
+ max_time = '2.h'
+
+ // Input and output
+ input = 'https://raw.githubusercontent.com/nf-core/test-datasets/spatialvi/testdata/human-ovarian-cancer-1-standard_v1_ffpe/samplesheet_spaceranger.csv'
+ spaceranger_probeset = 'https://raw.githubusercontent.com/nf-core/test-datasets/spatialvi/testdata/human-ovarian-cancer-1-standard_v1_ffpe/Visium_Human_Transcriptome_Probe_Set_v1.0_GRCh38-2020-A.csv'
+ spaceranger_reference = "https://raw.githubusercontent.com/nf-core/test-datasets/spatialvi/testdata/homo_sapiens_chr22_reference.tar.gz"
+
+ // Parameters
+ qc_min_counts = 5
+ qc_min_genes = 3
+ outdir = 'results'
+}
diff --git a/docs/images/mqc_fastqc_adapter.png b/docs/images/mqc_fastqc_adapter.png
deleted file mode 100755
index 361d0e4..0000000
Binary files a/docs/images/mqc_fastqc_adapter.png and /dev/null differ
diff --git a/docs/images/mqc_fastqc_counts.png b/docs/images/mqc_fastqc_counts.png
deleted file mode 100755
index cb39ebb..0000000
Binary files a/docs/images/mqc_fastqc_counts.png and /dev/null differ
diff --git a/docs/images/mqc_fastqc_quality.png b/docs/images/mqc_fastqc_quality.png
deleted file mode 100755
index a4b89bf..0000000
Binary files a/docs/images/mqc_fastqc_quality.png and /dev/null differ
diff --git a/docs/images/nf-core-spatialvi_logo_light.png b/docs/images/nf-core-spatialvi_logo_light.png
index 2467c33..533c426 100644
Binary files a/docs/images/nf-core-spatialvi_logo_light.png and b/docs/images/nf-core-spatialvi_logo_light.png differ
diff --git a/docs/images/sm-Clusters_scanpy_spatial.png b/docs/images/sm-Clusters_scanpy_spatial.png
new file mode 100644
index 0000000..de0daae
Binary files /dev/null and b/docs/images/sm-Clusters_scanpy_spatial.png differ
diff --git a/docs/images/sm-STdeconvolve_sc_clusters.png b/docs/images/sm-STdeconvolve_sc_clusters.png
new file mode 100644
index 0000000..98645b4
Binary files /dev/null and b/docs/images/sm-STdeconvolve_sc_clusters.png differ
diff --git a/docs/images/sm-STdeconvolve_st_scatterpies.png b/docs/images/sm-STdeconvolve_st_scatterpies.png
new file mode 100644
index 0000000..70fce20
Binary files /dev/null and b/docs/images/sm-STdeconvolve_st_scatterpies.png differ
diff --git a/docs/images/sm-Topics_LDA_spatial.png b/docs/images/sm-Topics_LDA_spatial.png
new file mode 100644
index 0000000..d2033ad
Binary files /dev/null and b/docs/images/sm-Topics_LDA_spatial.png differ
diff --git a/docs/images/sm-UMAP_LDA_topics.png b/docs/images/sm-UMAP_LDA_topics.png
new file mode 100644
index 0000000..1976c5b
Binary files /dev/null and b/docs/images/sm-UMAP_LDA_topics.png differ
diff --git a/docs/images/sm-UMAP_clusters_embedding_density.png b/docs/images/sm-UMAP_clusters_embedding_density.png
new file mode 100644
index 0000000..5c9380c
Binary files /dev/null and b/docs/images/sm-UMAP_clusters_embedding_density.png differ
diff --git a/docs/images/sm-st_QC_out.png b/docs/images/sm-st_QC_out.png
new file mode 100644
index 0000000..6a96556
Binary files /dev/null and b/docs/images/sm-st_QC_out.png differ
diff --git a/docs/images/sm-st_bayes_clusters.png b/docs/images/sm-st_bayes_clusters.png
new file mode 100644
index 0000000..6fb606d
Binary files /dev/null and b/docs/images/sm-st_bayes_clusters.png differ
diff --git a/docs/images/sm-st_bayes_clusters_enhanced.png b/docs/images/sm-st_bayes_clusters_enhanced.png
new file mode 100644
index 0000000..e4fedd7
Binary files /dev/null and b/docs/images/sm-st_bayes_clusters_enhanced.png differ
diff --git a/docs/images/sm-violin_topics_LDA.png b/docs/images/sm-violin_topics_LDA.png
new file mode 100644
index 0000000..c2191c5
Binary files /dev/null and b/docs/images/sm-violin_topics_LDA.png differ
diff --git a/docs/images/spatialvi_subway.png b/docs/images/spatialvi_subway.png
new file mode 100644
index 0000000..978148f
Binary files /dev/null and b/docs/images/spatialvi_subway.png differ
diff --git a/docs/output.md b/docs/output.md
index 115da0e..ee2eed5 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -2,58 +2,130 @@
## Introduction
-This document describes the output produced by the pipeline. Most of the plots are taken from the MultiQC report, which summarises results at the end of the pipeline.
+This document describes the output produced by the pipeline. Most of the output
+is contained within HTML reports created with [Quarto](https://quarto.org/), but
+there are also other files which you can either take and analyse further by
+yourself or explore interactively with _e.g._ [TissUUmaps](https://tissuumaps.github.io/).
+
+The directories listed below will be created in the results directory after the
+pipeline has finished. Results for individual samples will be created in
+subdirectories following the `//` structure. All paths are
+relative to the top-level results directory.
+
+The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes
+data using the following steps:
+
+- [Space Ranger](#space-ranger)
+- [Data](#data)
+- [Reports](#reports)
+ - [Quality controls and normalisation](#quality-controls-and-normalisation)
+ - [Clustering](#clustering)
+ - [Differential expression](#differential-expression)
+- [Workflow reporting](#workflow-reporting)
+ - [Pipeline information](#pipeline-information) - Report metrics generated
+ during the workflow execution
+
+## Space Ranger
-The directories listed below will be created in the results directory after the pipeline has finished. All paths are relative to the top-level results directory.
+
+Output files
+
+- `/spaceranger/`
+ - `outs/spatial/tissue_[hi/low]res_image.png`: High and low resolution images.
+ - `outs/spatial/tissue_positions_list.csv`: Spot barcodes and their array
+ positions.
+ - `outs/spatial/scalefactors_json.json`: Scale conversion factors for the
+ spots.
+ - `outs/filtered_feature_bc_matrix/barcodes.tsv.gz`: List of barcode IDs.
+ - `outs/filtered_feature_bc_matrix/features.tsv.gz`: List of feature IDs.
+ - `outs/filtered_feature_bc_matrix/matrix.mtx.gz`: Matrix of UMIs, barcodes
+ and features.
+
+
+
+All files produced by Space Ranger are currently published as output of this
+pipeline, regardless if they're being used downstream or not; you can find more
+information about these files at the [10X website](https://support.10xgenomics.com/spatial-gene-expression/software/pipelines/latest/output/overview).
-
+## Data
-## Pipeline overview
+
+Output files
+
+- `/data/`
+ - `sdata_processed.zarr`: Processed data in SpatialData format.
+ - `adata_processed.h5ad`: Processed data in AnnData format.
+ - `spatially_variable_genes.csv`: List of spatially variable genes.
-The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps:
+
-- [FastQC](#fastqc) - Raw read QC
-- [MultiQC](#multiqc) - Aggregate report describing results and QC from the whole pipeline
-- [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution
+Data in `.zarr` and `.h5ad` formats as processed by the pipeline, which can be
+used for further downstream analyses if desired; unprocessed data is also
+present in these files. It can also be used by the [TissUUmaps](https://tissuumaps.github.io/)
+browser-based tool for visualisation and exploration, allowing you to delve into
+the data in an interactive way. The list of spatially variable genes are added
+as a convenience if you want to explore them in _e.g._ Excel.
-### FastQC
+## Reports
Output files
-- `fastqc/`
- - `*_fastqc.html`: FastQC report containing quality metrics.
- - `*_fastqc.zip`: Zip archive containing the FastQC report, tab-delimited data file and plot images.
+- `/reports/`
+ - `_extensions/`: Quarto nf-core extension, common to all reports.
-[FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) gives general quality metrics about your sequenced reads. It provides information about the quality score distribution across your reads, per base sequence content (%A/T/G/C), adapter contamination and overrepresented sequences. For further reading and documentation see the [FastQC help pages](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/).
+### Quality controls and filtering
-![MultiQC - FastQC sequence counts plot](images/mqc_fastqc_counts.png)
+
+Output files
-![MultiQC - FastQC mean quality scores plot](images/mqc_fastqc_quality.png)
+- `/reports/`
+ - `quality_controls.html`: Rendered HTML report.
+ - `quality_controls.yml`: YAML file containing parameters used in the report.
+ - `quality_controls.qmd`: Quarto document used for rendering the report.
-![MultiQC - FastQC adapter content plot](images/mqc_fastqc_adapter.png)
+
-:::note
-The FastQC plots displayed in the MultiQC report shows _untrimmed_ reads. They may contain adapter sequence and potentially regions with low quality.
-:::
+Report containing analyses related to quality controls and filtering of spatial
+data. Spots are filtered based on total counts, number of expressed genes as
+well as presence in tissue; you can find more details in the report itself.
-### MultiQC
+### Clustering
Output files
-- `multiqc/`
- - `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser.
- - `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline.
- - `multiqc_plots/`: directory containing static images from the report in various formats.
+- `/reports/`
+ - `clustering.html`: Rendered HTML report.
+ - `clustering.yml`: YAML file containing parameters used in the report.
+ - `clustering.qmd`: Quarto document used for rendering the report.
-[MultiQC](http://multiqc.info) is a visualization tool that generates a single HTML report summarising all samples in your project. Most of the pipeline QC results are visualised in the report and further statistics are available in the report data directory.
+Report containing analyses related to normalisation, dimensionality reduction,
+clustering and spatial visualisation. Leiden clustering is currently the only
+option; you can find more details in the report itself.
+
+### Spatially variable genes
+
+
+Output files
-Results generated by MultiQC collate pipeline QC from supported tools e.g. FastQC. The pipeline has special steps which also allow the software versions to be reported in the MultiQC output for future traceability. For more information about how to use MultiQC reports, see .
+- `/reports/`
+ - `spatially_variable_genes.html`: Rendered HTML report.
+ - `spatially_variable_genes.yml`: YAML file containing parameters used in the report.
+ - `spatially_variable_genes.qmd`: Quarto document used for rendering the report.
+
+
+
+Report containing analyses related to differential expression testing and
+spatially varying genes. The [Moran 1](https://en.wikipedia.org/wiki/Moran%27s_I)
+score is currently the only option for spatial testing; you can find more
+details in the report itself.
+
+## Workflow reporting
### Pipeline information
@@ -65,7 +137,14 @@ Results generated by MultiQC collate pipeline QC from supported tools e.g. FastQ
- Reports generated by the pipeline: `pipeline_report.html`, `pipeline_report.txt` and `software_versions.yml`. The `pipeline_report*` files will only be present if the `--email` / `--email_on_fail` parameter's are used when running the pipeline.
- Reformatted samplesheet files used as input to the pipeline: `samplesheet.valid.csv`.
- Parameters used by the pipeline run: `params.json`.
+- `multiqc/`
+ - Report generated by MultiQC: `multiqc_report.html`.
+ - Data and plots generated by MultiQC: `multiqc_data/` and `multiqc_plots/`.
-[Nextflow](https://www.nextflow.io/docs/latest/tracing.html) provides excellent functionality for generating various reports relevant to the running and execution of the pipeline. This will allow you to troubleshoot errors with the running of the pipeline, and also provide you with other information such as launch commands, run times and resource usage.
+[Nextflow](https://www.nextflow.io/docs/latest/tracing.html) provides excellent
+functionality for generating various reports relevant to the running and
+execution of the pipeline. This will allow you to troubleshoot errors with the
+running of the pipeline, and also provide you with other information such as
+launch commands, run times and resource usage.
diff --git a/docs/usage.md b/docs/usage.md
index fced1c9..af86b41 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -4,60 +4,142 @@
> _Documentation of pipeline parameters is generated automatically from the pipeline schema and can no longer be found in markdown files._
-## Introduction
-
-
-
## Samplesheet input
-You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row as shown in the examples below.
+You will need to create a samplesheet with information about the samples you
+would like to analyse before running the pipeline. It has to be a comma-separated file as described
+in the examples below and depends on the input data type. Use this parameter to specify its location.
```bash
--input '[path to samplesheet file]'
```
-### Multiple runs of the same sample
+There are two types of samplesheets that the pipeline can handle: those
+specifying _raw data_ (to be analysed by Space Ranger) and _processed data_
+(_i.e._ already analysed by Space Ranger). The workflow will automatically
+detect the samplesheet type and run the appropriate analysis steps. The two
+types of samplesheet are described in the following sections.
+
+### Raw spatial data
+
+This section describes samplesheets for processing _raw spatial data_ yet to be analysed with Space Ranger.
-The `sample` identifiers have to be the same when you have re-sequenced the same sample more than once e.g. to increase sequencing depth. The pipeline will concatenate the raw reads before performing any downstream analysis. Below is an example for the same sample sequenced across 3 lanes:
+Here is an example of a typical samplesheet for analysing FFPE or fresh frozen (FF) data with bright field microscopy
+imagery:
-```csv title="samplesheet.csv"
-sample,fastq_1,fastq_2
-CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz
-CONTROL_REP1,AEG588A1_S1_L003_R1_001.fastq.gz,AEG588A1_S1_L003_R2_001.fastq.gz
-CONTROL_REP1,AEG588A1_S1_L004_R1_001.fastq.gz,AEG588A1_S1_L004_R2_001.fastq.gz
+```no-highlight
+sample,fastq_dir,image,slide,area
+SAMPLE_1,fastqs_1/,hires_1.png,V11J26,B1
+SAMPLE_2,fastqs_2/,hires_2.png,V11J26,B1
```
-### Full samplesheet
+You may also supply a compressed tarball containing the FASTQ files in lieu of a
+directory path:
-The pipeline will auto-detect whether a sample is single- or paired-end using the information provided in the samplesheet. The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 3 columns to match those defined in the table below.
+```no-highlight
+sample,fastq_dir,image,slide,area
+SAMPLE_1,fastqs_1.tar.gz,hires_1.png,V11J26,B1
+SAMPLE_2,fastqs_2.tar.gz,hires_2.png,V11J26,B1
+```
+
+For Cytassist samples, the `image` column gets replaced with the `cytaimage` column:
-A final samplesheet file consisting of both single- and paired-end data may look something like the one below. This is for 6 samples, where `TREATMENT_REP3` has been sequenced twice.
+```no-highlight
+sample,fastq_dir,cytaimage,slide,area
+SAMPLE_1,fastqs_1/,cytassist_1.tif,V11J26,B1
+SAMPLE_2,fastqs_2/,cytassist_2.tif,V11J26,B1
+```
-```csv title="samplesheet.csv"
-sample,fastq_1,fastq_2
-CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz
-CONTROL_REP2,AEG588A2_S2_L002_R1_001.fastq.gz,AEG588A2_S2_L002_R2_001.fastq.gz
-CONTROL_REP3,AEG588A3_S3_L002_R1_001.fastq.gz,AEG588A3_S3_L002_R2_001.fastq.gz
-TREATMENT_REP1,AEG588A4_S4_L003_R1_001.fastq.gz,
-TREATMENT_REP2,AEG588A5_S5_L003_R1_001.fastq.gz,
-TREATMENT_REP3,AEG588A6_S6_L003_R1_001.fastq.gz,
-TREATMENT_REP3,AEG588A6_S6_L004_R1_001.fastq.gz,
+Depending on the experimental setup, (additional) colour composite fluorescence images or dark background
+fluorescence images can be supplied using the `colorizedimage` or `darkimage` columns, respectively.
+
+Please refer to the following table for an overview of all supported columns:
+
+| Column | Description |
+| ------------------ | --------------------------------------------------------------------------------------------------------------------- |
+| `sample` | Unique sample identifier. MUST match the prefix of the fastq files |
+| `fastq_dir` | Path to directory where the sample FASTQ files are stored. May be a `.tar.gz` file instead of a directory. |
+| `image` | Brightfield microscopy image |
+| `cytaimage` | Brightfield tissue image captured with Cytassist device |
+| `colorizedimage` | A colour composite of one or more fluorescence image channels saved as a single-page, single-file colour TIFF or JPEG |
+| `darkimage` | Dark background fluorescence microscopy image |
+| `slide` | The Visium slide ID used for the sequencing. |
+| `area` | Which slide area contains the tissue sample. |
+| `manual_alignment` | Path to the manual alignment file (optional) |
+| `slidefile` | Slide specification as JSON. Overrides `slide` and `area` if specified. (optional) |
+
+> [!NOTE]
+>
+> - You need to specify _at least one_ of `image`, `cytaimage`, `darkimage`,
+> `colorizedimage`. Most commonly, you'll specify `image` for bright field
+> microscopy data, or `cytaimage` for tissue scans generated with the 10x
+> Cyatassist device. Please refer to the [Space Ranger documentation](https://support.10xgenomics.com/spatial-gene-expression/software/pipelines/latest/what-is-space-ranger),
+> how multiple image types can be combined.
+> - The `manual_alignment` column is only required for samples for which a
+> manual alignment file is needed and can be ignored if you're using automatic
+> alignment.
+
+If you are unsure, please see the Visium documentation for details regarding the
+different variants of [FASTQ directory structures](https://support.10xgenomics.com/spatial-gene-expression/software/pipelines/latest/using/fastq-input)
+and [slide parameters](https://support.10xgenomics.com/spatial-gene-expression/software/pipelines/latest/using/slide-info)
+appropriate for your samples.
+
+### Processed data
+
+If your data has already been processed by Space Ranger and you are only
+interested in running downstream steps, the samplesheet looks as follows:
+
+```no-highlight
+sample,spaceranger_dir
+SAMPLE_1,results/SAMPLE_1/outs
+SAMPLE_2,results/SAMPLE_2/outs
```
-| Column | Description |
-| --------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). |
-| `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". |
-| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". |
+You may alternatively supply a compressed tarball containing the Space Ranger output:
-An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline.
+```no-highlight
+sample,spaceranger_dir
+SAMPLE_1,outs.tar.gz
+SAMPLE_2,outs.tar.gz
+```
+
+| Column | Description |
+| ----------------- | ----------------------------------------------------------------------------------------- |
+| `sample` | Unique sample identifier. |
+| `spaceranger_dir` | Output directory generated by spaceranger. May be a `.tar.gz` file instead of a directory |
+
+The Space Ranger output directory is typically called `outs` and contains both
+gene expression matrices as well as spatial information.
+
+## Space Ranger
+
+The pipeline exposes several of Space Ranger's parameters when executing with
+raw spatial data. Space Ranger requires a lot of memory (64 GB) and several
+threads (8) to be able to run. You can find the Space Ranger documentation at
+the [10X website](https://support.10xgenomics.com/spatial-gene-expression/software/pipelines/latest/what-is-space-ranger).
+
+You are only able to run Space Ranger on the [officially supported organisms](https://support.10xgenomics.com/spatial-gene-expression/software/downloads/latest):
+human and mouse. If you have already downloaded a reference you may supply the
+path to its directory (or another link from the 10X website above) using the
+`--spaceranger_reference` parameter, otherwise the pipeline will download the
+default human reference for you automatically.
+
+> [!NOTE]
+> For FFPE and Cytassist experiments, you need to manually supply the
+> appropriate probeset using the `--spaceranger_probeset` parameter Please refer
+> to the [Space Ranger Downloads page](https://support.10xgenomics.com/spatial-gene-expression/software/downloads/latest)
+> to obtain the correct probeset.
## Running the pipeline
The typical command for running the pipeline is as follows:
```bash
-nextflow run nf-core/spatialvi --input ./samplesheet.csv --outdir ./results --genome GRCh37 -profile docker
+nextflow run \
+ nf-core/spatialvi \
+ --input \
+ --outdir \
+ -profile docker
```
This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles.
@@ -88,9 +170,8 @@ nextflow run nf-core/spatialvi -profile docker -params-file params.yaml
with `params.yaml` containing:
```yaml
-input: './samplesheet.csv'
-outdir: './results/'
-genome: 'GRCh37'
+input: ''
+outdir: ''
<...>
```
@@ -110,9 +191,9 @@ It is a good idea to specify a pipeline version when running the pipeline on you
First, go to the [nf-core/spatialvi releases page](https://github.com/nf-core/spatialvi/releases) and find the latest pipeline version - numeric only (eg. `1.3.1`). Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 1.3.1`. Of course, you can switch to another version by changing the number after the `-r` flag.
-This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. For example, at the bottom of the MultiQC reports.
+This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future.
-To further assist in reproducbility, you can use share and re-use [parameter files](#running-the-pipeline) to repeat pipeline runs with the same settings without having to write out a command with every single parameter.
+To further assist in reproducibility, you can use share and re-use [parameter files](#running-the-pipeline) to repeat pipeline runs with the same settings without having to write out a command with every single parameter.
:::tip
If you wish to share such profile (such as upload as supplementary material for academic publications), make sure to NOT include cluster specific paths to files, nor institutional specific profiles.
@@ -130,16 +211,19 @@ Use this parameter to choose a configuration profile. Profiles can give configur
Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Apptainer, Conda) - see below.
-:::info
-We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility, however when this is not possible, Conda is also supported.
-:::
+> [!INFO]
+> We highly recommend the use of Docker or Singularity containers for full
+> pipeline reproducibility, however when this is not possible, Conda is
+> partially supported. Please note that Conda is not at all supported for Space
+> Ranger processing, and only supported on non-ARM64 architectures for analyses
+> downstream of Space Ranger.
The pipeline also dynamically loads configurations from [https://github.com/nf-core/configs](https://github.com/nf-core/configs) when it runs, making multiple config profiles for various institutional clusters available at run time. For more information and to see if your system is available in these configs please see the [nf-core/configs documentation](https://github.com/nf-core/configs#documentation).
Note that multiple profiles can be loaded, for example: `-profile test,docker` - the order of arguments is important!
They are loaded in sequence, so later profiles can overwrite earlier profiles.
-If `-profile` is not specified, the pipeline will run locally and expect all software to be installed and available on the `PATH`. This is _not_ recommended, since it can lead to different results on different machines dependent on the computer enviroment.
+If `-profile` is not specified, the pipeline will run locally and expect all software to be installed and available on the `PATH`. This is _not_ recommended, since it can lead to different results on different machines dependent on the computer environment.
- `test`
- A profile with a complete configuration for automated testing
diff --git a/env/.dockerignore b/env/.dockerignore
new file mode 100644
index 0000000..ad4a18b
--- /dev/null
+++ b/env/.dockerignore
@@ -0,0 +1,2 @@
+*
+!environment.yml
diff --git a/env/Dockerfile b/env/Dockerfile
new file mode 100644
index 0000000..f6b50ae
--- /dev/null
+++ b/env/Dockerfile
@@ -0,0 +1,45 @@
+#
+# First stage: Quarto installation
+#
+FROM ubuntu:20.04 as quarto
+ARG QUARTO_VERSION=1.3.450
+ARG TARGETARCH
+RUN apt-get update \
+ && apt-get install -y --no-install-recommends \
+ ca-certificates \
+ curl \
+ && apt-get clean
+
+RUN mkdir -p /opt/quarto \
+ && curl -o quarto.tar.gz -L "https://github.com/quarto-dev/quarto-cli/releases/download/v${QUARTO_VERSION}/quarto-${QUARTO_VERSION}-linux-${TARGETARCH}.tar.gz" \
+ && tar -zxvf quarto.tar.gz -C /opt/quarto/ --strip-components=1 \
+ && rm quarto.tar.gz
+
+#
+# Second stage: Conda environment
+#
+FROM condaforge/mambaforge:23.11.0-0
+COPY --from=quarto /opt/quarto /opt/quarto
+ENV PATH="${PATH}:/opt/quarto/bin"
+RUN apt-get update \
+ && apt-get install -y --no-install-recommends \
+ pkg-config \
+ && apt-get clean
+
+# Install packages using Mamba; also remove static libraries, python bytecode
+# files and javascript source maps that are not required for execution
+COPY environment.yml ./
+RUN mamba env update --name base --file environment.yml \
+ && mamba clean --all --force-pkgs-dirs --yes \
+ && find /opt/conda -follow -type f -name '*.a' -delete \
+ && find /opt/conda -follow -type f -name '*.pyc' -delete \
+ && find /opt/conda -follow -type f -name '*.js.map' -delete
+
+# Set environment variable for leidenalg-related ARM64 issue
+ENV LD_PRELOAD=/opt/conda/lib/libgomp.so.1
+
+CMD /bin/bash
+
+LABEL \
+ authors = "Erik Fasterius, Christophe Avenel" \
+ description = "Dockerfile for nf-core/spatialvi report modules"
diff --git a/env/environment.yml b/env/environment.yml
new file mode 100644
index 0000000..c376fb2
--- /dev/null
+++ b/env/environment.yml
@@ -0,0 +1,19 @@
+channels:
+ - conda-forge
+ - bioconda
+dependencies:
+ - python=3.10
+ - jupyter=1.0.0
+ - leidenalg=0.9.1
+ - papermill=2.3.4
+ - pip=23.0.1
+ - gcc=13.2.0
+ - libgdal=3.8.3
+ - gxx=13.2.0
+ - imagecodecs=2024.1.1
+ - pip:
+ - scanpy==1.10.0
+ - squidpy==1.4.1
+ - spatialdata==0.1.2
+ - spatialdata-io==0.1.2
+ - spatialdata-plot==0.2.1
diff --git a/main.nf b/main.nf
index 02fad16..d750244 100644
--- a/main.nf
+++ b/main.nf
@@ -17,23 +17,10 @@ nextflow.enable.dsl = 2
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
-include { SPATIALVI } from './workflows/spatialvi'
+include { SPATIALVI } from './workflows/spatialvi'
include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_spatialvi_pipeline'
include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_spatialvi_pipeline'
-include { getGenomeAttribute } from './subworkflows/local/utils_nfcore_spatialvi_pipeline'
-
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- GENOME PARAMETER VALUES
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-*/
-
-// TODO nf-core: Remove this line if you don't need a FASTA file
-// This is an example of how to use getGenomeAttribute() to fetch parameters
-// from igenomes.config using `--genome`
-params.fasta = getGenomeAttribute('fasta')
-
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
NAMED WORKFLOWS FOR PIPELINE
@@ -46,7 +33,7 @@ params.fasta = getGenomeAttribute('fasta')
workflow NFCORE_SPATIALVI {
take:
- samplesheet // channel: samplesheet read in from --input
+ samplesheet // file: samplesheet read in from --input
main:
@@ -88,7 +75,7 @@ workflow {
// WORKFLOW: Run main workflow
//
NFCORE_SPATIALVI (
- PIPELINE_INITIALISATION.out.samplesheet
+ params.input
)
//
diff --git a/modules.json b/modules.json
index 5940c72..d9db588 100644
--- a/modules.json
+++ b/modules.json
@@ -14,6 +14,22 @@
"branch": "master",
"git_sha": "b7ebe95761cd389603f9cc0e0dc384c0f663815a",
"installed_by": ["modules"]
+ },
+ "quartonotebook": {
+ "branch": "master",
+ "git_sha": "93b7e1bf63944488fe77ad490a9de62a73959bed",
+ "installed_by": ["modules"],
+ "patch": "modules/nf-core/quartonotebook/quartonotebook.diff"
+ },
+ "spaceranger/count": {
+ "branch": "master",
+ "git_sha": "2f0ef0cd414ea43e33625023c72b6af936dce63d",
+ "installed_by": ["modules"]
+ },
+ "untar": {
+ "branch": "master",
+ "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa",
+ "installed_by": ["modules"]
}
}
},
diff --git a/modules/local/read_data.nf b/modules/local/read_data.nf
new file mode 100644
index 0000000..d0f3d30
--- /dev/null
+++ b/modules/local/read_data.nf
@@ -0,0 +1,48 @@
+//
+// Read ST 10x visium and SC 10x data with spatialdata_io and save to `SpatialData` file
+//
+process READ_DATA {
+
+ tag "${meta.id}"
+ label 'process_low'
+
+ container "docker.io/erikfas/spatialvi"
+
+ input:
+ tuple val (meta), path("${meta.id}/*")
+
+ output:
+ tuple val(meta), path("sdata_raw.zarr"), emit: sdata_raw
+ path("versions.yml") , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
+ exit 1, "The READ_DATA module does not support Conda/Mamba, please use Docker / Singularity / Podman instead."
+ }
+ """
+ # Fix required directory structure
+ mkdir "${meta.id}/spatial"
+ mv "${meta.id}/scalefactors_json.json" \\
+ "${meta.id}/tissue_hires_image.png" \\
+ "${meta.id}/tissue_lowres_image.png" \\
+ "${meta.id}/tissue_positions.csv" \\
+ "${meta.id}/spatial/"
+
+ # Set environment variables
+ export XDG_CACHE_HOME="./.xdg_cache_home"
+ export XDG_DATA_HOME="./.xdg_data_home"
+
+ # Execute read data script
+ read_data.py \\
+ --SRCountDir "${meta.id}" \\
+ --output_sdata sdata_raw.zarr
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ spatialdata_io: \$(python -c "import spatialdata_io; print(spatialdata_io.__version__)")
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/multiqc/tests/main.nf.test.snap b/modules/nf-core/multiqc/tests/main.nf.test.snap
index bfebd80..7c3ff58 100644
--- a/modules/nf-core/multiqc/tests/main.nf.test.snap
+++ b/modules/nf-core/multiqc/tests/main.nf.test.snap
@@ -38,4 +38,4 @@
},
"timestamp": "2024-02-29T08:49:25.457567"
}
-}
\ No newline at end of file
+}
diff --git a/modules/nf-core/quartonotebook/Dockerfile b/modules/nf-core/quartonotebook/Dockerfile
new file mode 100644
index 0000000..78d2ab2
--- /dev/null
+++ b/modules/nf-core/quartonotebook/Dockerfile
@@ -0,0 +1,38 @@
+#
+# First stage: Quarto installation
+#
+FROM ubuntu:20.04 as quarto
+ARG QUARTO_VERSION=1.3.433
+ARG TARGETARCH
+RUN apt-get update \
+ && apt-get install -y --no-install-recommends \
+ ca-certificates \
+ curl \
+ && apt-get clean
+
+RUN mkdir -p /opt/quarto \
+ && curl -o quarto.tar.gz -L "https://github.com/quarto-dev/quarto-cli/releases/download/v${QUARTO_VERSION}/quarto-${QUARTO_VERSION}-linux-${TARGETARCH}.tar.gz" \
+ && tar -zxvf quarto.tar.gz -C /opt/quarto/ --strip-components=1 \
+ && rm quarto.tar.gz
+
+#
+# Second stage: Conda environment
+#
+FROM condaforge/mambaforge:24.1.2-0@sha256:64c45c1a743737f61cf201f54cae974b5c853be94f9c1a84f5e82e0e854f0407
+COPY --from=quarto /opt/quarto /opt/quarto
+ENV PATH="${PATH}:/opt/quarto/bin"
+
+# Install packages using Mamba; also remove static libraries, python bytecode
+# files and javascript source maps that are not required for execution
+COPY environment.yml ./
+RUN mamba env update --name base --file environment.yml \
+ && mamba clean --all --force-pkgs-dirs --yes \
+ && find /opt/conda -follow -type f -name '*.a' -delete \
+ && find /opt/conda -follow -type f -name '*.pyc' -delete \
+ && find /opt/conda -follow -type f -name '*.js.map' -delete
+
+CMD /bin/bash
+
+LABEL \
+ authors = "Erik Fasterius" \
+ description = "Dockerfile for the quartonotebook nf-core module"
diff --git a/modules/nf-core/quartonotebook/environment.yml b/modules/nf-core/quartonotebook/environment.yml
new file mode 100644
index 0000000..1084ec0
--- /dev/null
+++ b/modules/nf-core/quartonotebook/environment.yml
@@ -0,0 +1,12 @@
+name: quartonotebook
+
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+
+dependencies:
+ - conda-forge::jupyter=1.0.0
+ - conda-forge::matplotlib=3.4.3
+ - conda-forge::papermill=2.4.0
+ - conda-forge::r-rmarkdown=2.25
diff --git a/modules/nf-core/quartonotebook/main.nf b/modules/nf-core/quartonotebook/main.nf
new file mode 100644
index 0000000..c21abf7
--- /dev/null
+++ b/modules/nf-core/quartonotebook/main.nf
@@ -0,0 +1,103 @@
+include { dumpParamsYaml; indentCodeBlock } from "./parametrize"
+
+process QUARTONOTEBOOK {
+ tag "$meta.id"
+ label 'process_low'
+
+ container "docker.io/erikfas/spatialvi"
+
+ input:
+ tuple val(meta), path(notebook)
+ val parameters
+ path input_files
+ path extensions
+
+ output:
+ tuple val(meta), path("*.html") , emit: html
+ tuple val(meta), path("${notebook}"), emit: notebook
+ tuple val(meta), path("artifacts/*"), emit: artifacts, optional: true
+ tuple val(meta), path("params.yml") , emit: params_yaml, optional: true
+ tuple val(meta), path("_extensions"), emit: extensions, optional: true
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ // Exit if running this module with -profile conda / -profile mamba
+ // This is because of issues with getting a homogenous environment across
+ // both AMD64 and ARM64 architectures; please find more information at
+ // https://github.com/nf-core/modules/pull/4876#discussion_r1483541037.
+ if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
+ exit 1, "The QUARTONOTEBOOK module does not support Conda/Mamba, please use Docker / Singularity / Podman instead."
+ }
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def parametrize = (task.ext.parametrize == null) ? true : task.ext.parametrize
+ def implicit_params = (task.ext.implicit_params == null) ? true : task.ext.implicit_params
+ def meta_params = (task.ext.meta_params == null) ? true : task.ext.meta_params
+
+ // Dump parameters to yaml file.
+ // Using a YAML file over using the CLI params because
+ // - No issue with escaping
+ // - Allows passing nested maps instead of just single values
+ // - Allows running with the language-agnostic `--execute-params`
+ def params_cmd = ""
+ def render_args = ""
+ if (parametrize) {
+ nb_params = [:]
+ if (implicit_params) {
+ nb_params["cpus"] = task.cpus
+ nb_params["artifact_dir"] = "artifacts"
+ nb_params["input_dir"] = "./"
+ }
+ if (meta_params) {
+ nb_params["meta"] = meta
+ }
+ nb_params += parameters
+ params_cmd = dumpParamsYaml(nb_params)
+ render_args = "--execute-params params.yml"
+ }
+ """
+ # Dump .params.yml heredoc (section will be empty if parametrization is disabled)
+ ${indentCodeBlock(params_cmd, 4)}
+
+ # Create output directory
+ mkdir artifacts
+
+ # Set environment variables needed for Quarto rendering
+ export XDG_CACHE_HOME="./.xdg_cache_home"
+ export XDG_DATA_HOME="./.xdg_data_home"
+
+ # Set parallelism for BLAS/MKL etc. to avoid over-booking of resources
+ export MKL_NUM_THREADS="$task.cpus"
+ export OPENBLAS_NUM_THREADS="$task.cpus"
+ export OMP_NUM_THREADS="$task.cpus"
+ export NUMBA_NUM_THREADS="$task.cpus"
+
+ # Render notebook
+ quarto render \\
+ ${notebook} \\
+ ${render_args} \\
+ ${args} \\
+ --output ${prefix}.html
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ quarto: \$(quarto -v)
+ papermill: \$(papermill --version | cut -f1 -d' ')
+ END_VERSIONS
+ """
+
+ stub:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ """
+ touch ${prefix}.html
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ quarto: \$(quarto -v)
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/quartonotebook/meta.yml b/modules/nf-core/quartonotebook/meta.yml
new file mode 100644
index 0000000..5d95e8b
--- /dev/null
+++ b/modules/nf-core/quartonotebook/meta.yml
@@ -0,0 +1,83 @@
+name: "quartonotebook"
+description: Render a Quarto notebook, including parametrization.
+keywords:
+ - quarto
+ - notebook
+ - reports
+ - python
+ - r
+tools:
+ - quartonotebook:
+ description: An open-source scientific and technical publishing system.
+ homepage: https://quarto.org/
+ documentation: https://quarto.org/docs/reference/
+ tool_dev_url: https://github.com/quarto-dev/quarto-cli
+ licence: ["MIT"]
+ - papermill:
+ description: Parameterize, execute, and analyze notebooks
+ homepage: https://github.com/nteract/papermill
+ documentation: http://papermill.readthedocs.io/en/latest/
+ tool_dev_url: https://github.com/nteract/papermill
+ licence: ["BSD 3-clause"]
+
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. `[ id:'sample1', single_end:false ]`.
+ - notebook:
+ type: file
+ description: The Quarto notebook to be rendered.
+ pattern: "*.{qmd}"
+ - parameters:
+ type: map
+ description: |
+ Groovy map with notebook parameters which will be passed to Quarto to
+ generate parametrized reports.
+ - input_files:
+ type: file
+ description: One or multiple files serving as input data for the notebook.
+ pattern: "*"
+ - extensions:
+ type: file
+ description: |
+ A quarto `_extensions` directory with custom template(s) to be
+ available for rendering.
+ pattern: "*"
+
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. `[ id:'sample1', single_end:false ]`.
+ - html:
+ type: file
+ description: HTML report generated by Quarto.
+ pattern: "*.html"
+ - notebook:
+ type: file
+ description: The original, un-rendered notebook.
+ pattern: "*.[qmd,ipynb,rmd]"
+ - artifacts:
+ type: file
+ description: Artifacts generated during report rendering.
+ pattern: "*"
+ - params_yaml:
+ type: file
+ description: Parameters used during report rendering.
+ pattern: "*"
+ - extensions:
+ type: file
+ description: Quarto extensions used during report rendering.
+ pattern: "*"
+ - versions:
+ type: file
+ description: File containing software versions.
+ pattern: "versions.yml"
+
+authors:
+ - "@fasterius"
+maintainers:
+ - "@fasterius"
diff --git a/modules/nf-core/quartonotebook/parametrize.nf b/modules/nf-core/quartonotebook/parametrize.nf
new file mode 100644
index 0000000..b3d8cea
--- /dev/null
+++ b/modules/nf-core/quartonotebook/parametrize.nf
@@ -0,0 +1,36 @@
+import org.yaml.snakeyaml.Yaml
+import org.yaml.snakeyaml.DumperOptions
+
+
+/**
+ * Multiline code blocks need to have the same indentation level
+ * as the `script:` section. This function re-indents code to the specified level.
+ */
+def indentCodeBlock(code, n_spaces) {
+ def indent_str = " ".multiply(n_spaces)
+ return code.stripIndent().split("\n").join("\n" + indent_str)
+}
+
+/**
+ * Create a config YAML file from a groovy map
+ *
+ * @params task The process' `task` variable
+ * @returns a line to be inserted in the bash script.
+ */
+def dumpParamsYaml(params) {
+ DumperOptions options = new DumperOptions();
+ options.setDefaultFlowStyle(DumperOptions.FlowStyle.BLOCK);
+ def yaml = new Yaml(options)
+ def yaml_str = yaml.dump(params)
+
+ // Writing the params.yml file directly as follows does not work.
+ // It only works in 'exec:', but not if there is a `script:` section:
+ // task.workDir.resolve('params.yml').text = yaml_str
+
+ // Therefore, we inject it into the bash script:
+ return """\
+ cat <<"END_PARAMS_SECTION" > ./params.yml
+ ${indentCodeBlock(yaml_str, 8)}
+ END_PARAMS_SECTION
+ """
+}
diff --git a/modules/nf-core/quartonotebook/quartonotebook.diff b/modules/nf-core/quartonotebook/quartonotebook.diff
new file mode 100644
index 0000000..61538bd
--- /dev/null
+++ b/modules/nf-core/quartonotebook/quartonotebook.diff
@@ -0,0 +1,18 @@
+Changes in module 'nf-core/quartonotebook'
+--- modules/nf-core/quartonotebook/main.nf
++++ modules/nf-core/quartonotebook/main.nf
+@@ -4,11 +4,7 @@
+ tag "$meta.id"
+ label 'process_low'
+
+- // NB: You'll likely want to override this with a container containing all
+- // required dependencies for your analyses. You'll at least need Quarto
+- // itself, Papermill and whatever language you are running your analyses on;
+- // you can see an example in this module's Dockerfile.
+- container "docker.io/erikfas/quartonotebook"
++ container "docker.io/erikfas/spatialvi"
+
+ input:
+ tuple val(meta), path(notebook)
+
+************************************************************
diff --git a/modules/nf-core/quartonotebook/tests/main.nf.test b/modules/nf-core/quartonotebook/tests/main.nf.test
new file mode 100644
index 0000000..aeec8b1
--- /dev/null
+++ b/modules/nf-core/quartonotebook/tests/main.nf.test
@@ -0,0 +1,212 @@
+nextflow_process {
+
+ name "Test Process QUARTONOTEBOOK"
+ script "../main.nf"
+ process "QUARTONOTEBOOK"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "quartonotebook"
+
+ test("test notebook - [qmd:r]") {
+
+ config "./no-parametrization.config"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test' ], // meta map
+ file(params.test_data['generic']['notebooks']['quarto_r'], checkIfExists: true) // Notebook
+ ]
+ input[1] = [:] // Parameters
+ input[2] = [] // Input files
+ input[3] = [] // Extensions
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() },
+ )
+ }
+
+ }
+
+ test("test notebook - [qmd:python]") {
+
+ config "./no-parametrization.config"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test' ], // meta map
+ file(params.test_data['generic']['notebooks']['quarto_python'], checkIfExists: true) // Notebook
+ ]
+ input[1] = [] // Parameters
+ input[2] = [] // Input files
+ input[3] = [] // Extensions
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(
+ process.out.versions,
+ process.out.artifacts,
+ process.out.params_yaml,
+ ).match() },
+ { assert path(process.out.html[0][1]).readLines().any { it.contains('Hello world') } }
+ )
+ }
+
+ }
+
+ test("test notebook - parametrized - [qmd:r]") {
+
+ config "./with-parametrization.config"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test' ], // meta map
+ file(params.test_data['generic']['notebooks']['quarto_r'], checkIfExists: true) // Notebook
+ ]
+ input[1] = [input_filename: "hello.txt", n_iter: 12] // parameters
+ input[2] = file(params.test_data['generic']['txt']['hello'], checkIfExists: true) // Input files
+ input[3] = [] // Extensions
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() },
+ )
+ }
+
+ }
+
+ test("test notebook - parametrized - [qmd:python]") {
+
+ config "./with-parametrization.config"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test' ], // meta map
+ file(params.test_data['generic']['notebooks']['quarto_python'], checkIfExists: true) // Notebook
+ ]
+ input[1] = [input_filename: "hello.txt", n_iter: 12] // parameters
+ input[2] = file(params.test_data['generic']['txt']['hello'], checkIfExists: true) // Input files
+ input[3] = [] // Extensions
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(
+ process.out.versions,
+ process.out.artifacts,
+ process.out.params_yaml,
+ ).match() },
+ { assert path(process.out.html[0][1]).readLines().any { it.contains('Hello world') } }
+ )
+ }
+
+ }
+
+ test("test notebook - parametrized - [rmd]") {
+
+ config "./with-parametrization.config"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test' ], // meta map
+ file(params.test_data['generic']['notebooks']['rmarkdown'], checkIfExists: true) // notebook
+ ]
+ input[1] = [input_filename: "hello.txt", n_iter: 12] // parameters
+ input[2] = file(params.test_data['generic']['txt']['hello'], checkIfExists: true) // Input files
+ input[3] = [] // Extensions
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() },
+ )
+ }
+
+ }
+
+ test("test notebook - parametrized - [ipynb]") {
+
+ config "./with-parametrization.config"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test' ], // meta map
+ file(params.test_data['generic']['notebooks']['ipython_ipynb'], checkIfExists: true) // notebook
+ ]
+ input[1] = [input_filename: "hello.txt", n_iter: 12] // parameters
+ input[2] = file(params.test_data['generic']['txt']['hello'], checkIfExists: true) // Input files
+ input[3] = [] // Extensions
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() },
+ )
+ }
+
+ }
+
+ test("test notebook - stub - [qmd:r]") {
+
+ config "./no-parametrization.config"
+
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test' ], // meta map
+ file(params.test_data['generic']['notebooks']['quarto_r'], checkIfExists: true) // Notebook
+ ]
+ input[1] = [:] // Parameters
+ input[2] = [] // Input files
+ input[3] = [] // Extensions
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() },
+ )
+ }
+
+ }
+
+}
\ No newline at end of file
diff --git a/modules/nf-core/quartonotebook/tests/main.nf.test.snap b/modules/nf-core/quartonotebook/tests/main.nf.test.snap
new file mode 100644
index 0000000..f0f04cb
--- /dev/null
+++ b/modules/nf-core/quartonotebook/tests/main.nf.test.snap
@@ -0,0 +1,433 @@
+{
+ "test notebook - stub - [qmd:r]": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "test.html:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "test"
+ },
+ "quarto_r.qmd:md5,b3fa8b456efae62495c0b278a4f7694c"
+ ]
+ ],
+ "2": [
+
+ ],
+ "3": [
+
+ ],
+ "4": [
+
+ ],
+ "5": [
+ "versions.yml:md5,93481281b24bb1b44ecc4387e0957a0e"
+ ],
+ "artifacts": [
+
+ ],
+ "extensions": [
+
+ ],
+ "html": [
+ [
+ {
+ "id": "test"
+ },
+ "test.html:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "notebook": [
+ [
+ {
+ "id": "test"
+ },
+ "quarto_r.qmd:md5,b3fa8b456efae62495c0b278a4f7694c"
+ ]
+ ],
+ "params_yaml": [
+
+ ],
+ "versions": [
+ "versions.yml:md5,93481281b24bb1b44ecc4387e0957a0e"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-09T11:06:33.408525"
+ },
+ "test notebook - [qmd:r]": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "test.html:md5,f09282296a5eee0154665975d842c07e"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "test"
+ },
+ "quarto_r.qmd:md5,b3fa8b456efae62495c0b278a4f7694c"
+ ]
+ ],
+ "2": [
+
+ ],
+ "3": [
+
+ ],
+ "4": [
+
+ ],
+ "5": [
+ "versions.yml:md5,55e1f767fbd72aae14cbbfb638e38a90"
+ ],
+ "artifacts": [
+
+ ],
+ "extensions": [
+
+ ],
+ "html": [
+ [
+ {
+ "id": "test"
+ },
+ "test.html:md5,f09282296a5eee0154665975d842c07e"
+ ]
+ ],
+ "notebook": [
+ [
+ {
+ "id": "test"
+ },
+ "quarto_r.qmd:md5,b3fa8b456efae62495c0b278a4f7694c"
+ ]
+ ],
+ "params_yaml": [
+
+ ],
+ "versions": [
+ "versions.yml:md5,55e1f767fbd72aae14cbbfb638e38a90"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-09T11:05:50.985424"
+ },
+ "test notebook - parametrized - [qmd:python]": {
+ "content": [
+ [
+ "versions.yml:md5,55e1f767fbd72aae14cbbfb638e38a90"
+ ],
+ [
+ [
+ {
+ "id": "test"
+ },
+ "artifact.txt:md5,8ddd8be4b179a529afa5f2ffae4b9858"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test"
+ },
+ "params.yml:md5,efd62bc975f429e8749ba787a93042dd"
+ ]
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-09T09:21:18.194591"
+ },
+ "test notebook - parametrized - [rmd]": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "test.html:md5,2b2026646ed8b59d49fdcbd54cb3a463"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "test"
+ },
+ "rmarkdown_notebook.Rmd:md5,1f5e4efbb41fd499b23c5bea2fc32e68"
+ ]
+ ],
+ "2": [
+ [
+ {
+ "id": "test"
+ },
+ "artifact.txt:md5,b10a8db164e0754105b7a99be72e3fe5"
+ ]
+ ],
+ "3": [
+ [
+ {
+ "id": "test"
+ },
+ "params.yml:md5,efd62bc975f429e8749ba787a93042dd"
+ ]
+ ],
+ "4": [
+
+ ],
+ "5": [
+ "versions.yml:md5,55e1f767fbd72aae14cbbfb638e38a90"
+ ],
+ "artifacts": [
+ [
+ {
+ "id": "test"
+ },
+ "artifact.txt:md5,b10a8db164e0754105b7a99be72e3fe5"
+ ]
+ ],
+ "extensions": [
+
+ ],
+ "html": [
+ [
+ {
+ "id": "test"
+ },
+ "test.html:md5,2b2026646ed8b59d49fdcbd54cb3a463"
+ ]
+ ],
+ "notebook": [
+ [
+ {
+ "id": "test"
+ },
+ "rmarkdown_notebook.Rmd:md5,1f5e4efbb41fd499b23c5bea2fc32e68"
+ ]
+ ],
+ "params_yaml": [
+ [
+ {
+ "id": "test"
+ },
+ "params.yml:md5,efd62bc975f429e8749ba787a93042dd"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,55e1f767fbd72aae14cbbfb638e38a90"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-09T11:06:25.046249"
+ },
+ "test notebook - parametrized - [ipynb]": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "test.html:md5,d7378ec0d1fd83b44424a68bf03a8fc3"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "test"
+ },
+ "ipython_notebook.ipynb:md5,02a206bf6c66396827dd310e7443926d"
+ ]
+ ],
+ "2": [
+
+ ],
+ "3": [
+ [
+ {
+ "id": "test"
+ },
+ "params.yml:md5,efd62bc975f429e8749ba787a93042dd"
+ ]
+ ],
+ "4": [
+
+ ],
+ "5": [
+ "versions.yml:md5,55e1f767fbd72aae14cbbfb638e38a90"
+ ],
+ "artifacts": [
+
+ ],
+ "extensions": [
+
+ ],
+ "html": [
+ [
+ {
+ "id": "test"
+ },
+ "test.html:md5,d7378ec0d1fd83b44424a68bf03a8fc3"
+ ]
+ ],
+ "notebook": [
+ [
+ {
+ "id": "test"
+ },
+ "ipython_notebook.ipynb:md5,02a206bf6c66396827dd310e7443926d"
+ ]
+ ],
+ "params_yaml": [
+ [
+ {
+ "id": "test"
+ },
+ "params.yml:md5,efd62bc975f429e8749ba787a93042dd"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,55e1f767fbd72aae14cbbfb638e38a90"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-09T11:06:30.278412"
+ },
+ "test notebook - [qmd:python]": {
+ "content": [
+ [
+ "versions.yml:md5,55e1f767fbd72aae14cbbfb638e38a90"
+ ],
+ [
+
+ ],
+ [
+
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-09T09:21:00.324109"
+ },
+ "test notebook - parametrized - [qmd:r]": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "test.html:md5,a25cdff28851a163d28669d4e62655af"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "test"
+ },
+ "quarto_r.qmd:md5,b3fa8b456efae62495c0b278a4f7694c"
+ ]
+ ],
+ "2": [
+ [
+ {
+ "id": "test"
+ },
+ "artifact.txt:md5,b10a8db164e0754105b7a99be72e3fe5"
+ ]
+ ],
+ "3": [
+ [
+ {
+ "id": "test"
+ },
+ "params.yml:md5,efd62bc975f429e8749ba787a93042dd"
+ ]
+ ],
+ "4": [
+
+ ],
+ "5": [
+ "versions.yml:md5,55e1f767fbd72aae14cbbfb638e38a90"
+ ],
+ "artifacts": [
+ [
+ {
+ "id": "test"
+ },
+ "artifact.txt:md5,b10a8db164e0754105b7a99be72e3fe5"
+ ]
+ ],
+ "extensions": [
+
+ ],
+ "html": [
+ [
+ {
+ "id": "test"
+ },
+ "test.html:md5,a25cdff28851a163d28669d4e62655af"
+ ]
+ ],
+ "notebook": [
+ [
+ {
+ "id": "test"
+ },
+ "quarto_r.qmd:md5,b3fa8b456efae62495c0b278a4f7694c"
+ ]
+ ],
+ "params_yaml": [
+ [
+ {
+ "id": "test"
+ },
+ "params.yml:md5,efd62bc975f429e8749ba787a93042dd"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,55e1f767fbd72aae14cbbfb638e38a90"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-09T11:06:08.013103"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/quartonotebook/tests/no-parametrization.config b/modules/nf-core/quartonotebook/tests/no-parametrization.config
new file mode 100644
index 0000000..f686514
--- /dev/null
+++ b/modules/nf-core/quartonotebook/tests/no-parametrization.config
@@ -0,0 +1,9 @@
+profiles {
+ docker {
+ docker.runOptions = '-u $(id -u):$(id -g)'
+ }
+}
+
+process {
+ ext.parametrize = false
+}
diff --git a/modules/nf-core/quartonotebook/tests/tags.yml b/modules/nf-core/quartonotebook/tests/tags.yml
new file mode 100644
index 0000000..638b0ce
--- /dev/null
+++ b/modules/nf-core/quartonotebook/tests/tags.yml
@@ -0,0 +1,2 @@
+quartonotebook:
+ - "modules/nf-core/quartonotebook/**"
diff --git a/modules/nf-core/quartonotebook/tests/with-parametrization.config b/modules/nf-core/quartonotebook/tests/with-parametrization.config
new file mode 100644
index 0000000..ab7df66
--- /dev/null
+++ b/modules/nf-core/quartonotebook/tests/with-parametrization.config
@@ -0,0 +1,5 @@
+profiles {
+ docker {
+ docker.runOptions = '-u $(id -u):$(id -g)'
+ }
+}
diff --git a/modules/nf-core/spaceranger/count/main.nf b/modules/nf-core/spaceranger/count/main.nf
new file mode 100644
index 0000000..4f766cb
--- /dev/null
+++ b/modules/nf-core/spaceranger/count/main.nf
@@ -0,0 +1,71 @@
+process SPACERANGER_COUNT {
+ tag "$meta.id"
+ label 'process_high'
+
+ container "nf-core/spaceranger:3.0.0"
+
+ input:
+ tuple val(meta), path(reads), path(image), path(cytaimage), path(darkimage), path(colorizedimage), path(alignment), path(slidefile)
+ path(reference)
+ path(probeset)
+
+ output:
+ tuple val(meta), path("outs/**"), emit: outs
+ path "versions.yml", emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ // Exit if running this module with -profile conda / -profile mamba
+ if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
+ error "SPACERANGER_COUNT module does not support Conda. Please use Docker / Singularity / Podman instead."
+ }
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ // Add flags for optional inputs on demand.
+ def probeset = probeset ? "--probe-set=\"${probeset}\"" : ""
+ def alignment = alignment ? "--loupe-alignment=\"${alignment}\"" : ""
+ def slidefile = slidefile ? "--slidefile=\"${slidefile}\"" : ""
+ def image = image ? "--image=\"${image}\"" : ""
+ def cytaimage = cytaimage ? "--cytaimage=\"${cytaimage}\"" : ""
+ def darkimage = darkimage ? "--darkimage=\"${darkimage}\"" : ""
+ def colorizedimage = colorizedimage ? "--colorizedimage=\"${colorizedimage}\"" : ""
+ """
+ spaceranger count \\
+ --id="${prefix}" \\
+ --sample="${meta.id}" \\
+ --fastqs=. \\
+ --slide="${meta.slide}" \\
+ --area="${meta.area}" \\
+ --transcriptome="${reference}" \\
+ --localcores=${task.cpus} \\
+ --localmem=${task.memory.toGiga()} \\
+ $image $cytaimage $darkimage $colorizedimage \\
+ $probeset \\
+ $alignment \\
+ $slidefile \\
+ $args
+ mv ${prefix}/outs outs
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ spaceranger: \$(spaceranger -V | sed -e "s/spaceranger spaceranger-//g")
+ END_VERSIONS
+ """
+
+ stub:
+ // Exit if running this module with -profile conda / -profile mamba
+ if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
+ error "SPACERANGER_COUNT module does not support Conda. Please use Docker / Singularity / Podman instead."
+ }
+ """
+ mkdir -p outs/
+ touch outs/fake_file.txt
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ spaceranger: \$(spaceranger -V | sed -e "s/spaceranger spaceranger-//g")
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/spaceranger/count/meta.yml b/modules/nf-core/spaceranger/count/meta.yml
new file mode 100644
index 0000000..167ac8c
--- /dev/null
+++ b/modules/nf-core/spaceranger/count/meta.yml
@@ -0,0 +1,95 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json
+name: "spaceranger_count"
+description: Module to use the 10x Space Ranger pipeline to process 10x spatial transcriptomics data
+keywords:
+ - align
+ - count
+ - spatial
+ - spaceranger
+ - imaging
+tools:
+ - "spaceranger":
+ description: |
+ Visium Spatial Gene Expression is a next-generation molecular profiling solution for classifying tissue
+ based on total mRNA. Space Ranger is a set of analysis pipelines that process Visium Spatial Gene Expression
+ data with brightfield and fluorescence microscope images. Space Ranger allows users to map the whole
+ transcriptome in formalin fixed paraffin embedded (FFPE) and fresh frozen tissues to discover novel
+ insights into normal development, disease pathology, and clinical translational research. Space Ranger provides
+ pipelines for end to end analysis of Visium Spatial Gene Expression experiments.
+ homepage: "https://support.10xgenomics.com/spatial-gene-expression/software/pipelines/latest/what-is-space-ranger"
+ documentation: "https://support.10xgenomics.com/spatial-gene-expression/software/pipelines/latest/what-is-space-ranger"
+ tool_dev_url: "https://support.10xgenomics.com/spatial-gene-expression/software/pipelines/latest/what-is-space-ranger"
+ licence:
+ - "10x Genomics EULA"
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', slide:'10L13-020', area: 'B1']
+
+ `id`, `slide` and `area` are mandatory information!
+ - reads:
+ type: file
+ description: |
+ List of input FastQ files of size 1 and 2 for single-end and paired-end data,
+ respectively.
+ pattern: "${Sample_Name}_S1_L00${Lane_Number}_${I1,I2,R1,R2}_001.fastq.gz"
+ - image:
+ type: file
+ description: Brightfield tissue H&E image in JPEG or TIFF format.
+ pattern: "*.{tif,tiff,jpg,jpeg}"
+ - cytaimage:
+ type: file
+ description: |
+ CytAssist instrument captured eosin stained Brightfield tissue image with fiducial
+ frame in TIFF format. The size of this image is set at 3k in both dimensions and this image should
+ not be modified any way before passing it as input to either Space Ranger or Loupe Browser.
+ pattern: "*.{tif,tiff}"
+ - darkimage:
+ type: file
+ description: |
+ Optional for dark background fluorescence microscope image input. Multi-channel, dark-background fluorescence
+ image as either a single, multi-layer TIFF file or as multiple TIFF or JPEG files.
+ pattern: "*.{tif,tiff,jpg,jpeg}"
+ - colorizedimage:
+ type: file
+ description: |
+ Required for color composite fluorescence microscope image input.
+ A color composite of one or more fluorescence image channels saved as a single-page,
+ single-file color TIFF or JPEG.
+ pattern: "*.{tif,tiff,jpg,jpeg}"
+ - alignment:
+ type: file
+ description: OPTIONAL - Path to manual image alignment.
+ pattern: "*.json"
+ - slidefile:
+ type: file
+ description: OPTIONAL - Path to slide specifications.
+ pattern: "*.json"
+ - reference:
+ type: directory
+ description: Folder containing all the reference indices needed by Space Ranger
+ - probeset:
+ type: file
+ description: OPTIONAL - Probe set specification.
+ pattern: "*.csv"
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - outs:
+ type: file
+ description: Files containing the outputs of Space Ranger, see official 10X Genomics documentation for a complete list
+ pattern: "outs/*"
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+authors:
+ - "@grst"
+maintainers:
+ - "@grst"
diff --git a/modules/nf-core/spaceranger/count/tests/main.nf.test b/modules/nf-core/spaceranger/count/tests/main.nf.test
new file mode 100644
index 0000000..7631d85
--- /dev/null
+++ b/modules/nf-core/spaceranger/count/tests/main.nf.test
@@ -0,0 +1,228 @@
+nextflow_process {
+
+ name "Test Process SPACERANGER_COUNT"
+ script "../main.nf"
+ config "./nextflow.config"
+ process "SPACERANGER_COUNT"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "spaceranger"
+ tag "spaceranger/count"
+ tag "spaceranger/mkgtf"
+ tag "spaceranger/mkref"
+
+ test("spaceranger v1 - homo_sapiens - fasta - gtf - fastq - tif - csv") {
+
+ setup {
+ run("SPACERANGER_MKGTF") {
+ script "../../mkgtf/main.nf"
+ process {
+ """
+ input[0] = file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)
+ """
+ }
+ }
+ }
+
+ setup {
+ run("SPACERANGER_MKREF") {
+ script "../../mkref/main.nf"
+ process {
+ """
+ input[0] = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
+ input[1] = file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)
+ input[2] = 'homo_sapiens_chr22_reference'
+ """
+ }
+ }
+ }
+
+ when {
+ process {
+ """
+ input[0] = [
+ [
+ id: 'Visium_FFPE_Human_Ovarian_Cancer',
+ slide: 'V10L13-020',
+ area: 'D1'
+ ], // Meta map
+ [
+ file(params.test_data['homo_sapiens']['10xgenomics']['spaceranger']['test_10x_ffpe_v1_fastq_1_gz']),
+ file(params.test_data['homo_sapiens']['10xgenomics']['spaceranger']['test_10x_ffpe_v1_fastq_2_gz'])
+ ], // Reads
+ file(params.test_data['homo_sapiens']['10xgenomics']['spaceranger']['test_10x_ffpe_v1_image']), // Image
+ [], // Cytaimage
+ [], // Darkimage
+ [], // Colorizedimage
+ [], // Manual alignment (default: automatic alignment)
+ [], // Slide specification (default: automatic download)
+ ]
+ input[1] = SPACERANGER_MKREF.out.reference // Reference
+ input[2] = [] // Probeset (default: use the one included with Space Ranger)
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(
+ process.out.versions,
+ process.out.outs.get(0).get(1).findAll { file(it).name !in [
+ 'web_summary.html',
+ 'scalefactors_json.json',
+ 'barcodes.tsv.gz',
+ 'features.tsv.gz',
+ 'matrix.mtx.gz'
+ ]}
+ ).match()
+ },
+ { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'web_summary.html' }).exists() },
+ { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'scalefactors_json.json' }).exists() },
+ { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'barcodes.tsv.gz' }).exists() },
+ { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'features.tsv.gz' }).exists() },
+ { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'matrix.mtx.gz' }).exists() }
+ )
+ }
+ }
+
+ test("spaceranger v1 (stub) - homo_sapiens - fasta - gtf - fastq - tif - csv") {
+
+ setup {
+ run("SPACERANGER_MKGTF") {
+ script "../../mkgtf/main.nf"
+ process {
+ """
+ input[0] = file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)
+ """
+ }
+ }
+ }
+
+ setup {
+ run("SPACERANGER_MKREF") {
+ script "../../mkref/main.nf"
+ process {
+ """
+ input[0] = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
+ input[1] = file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)
+ input[2] = 'homo_sapiens_chr22_reference'
+ """
+ }
+ }
+ }
+
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [
+ id: 'Visium_FFPE_Human_Ovarian_Cancer',
+ slide: 'V10L13-020',
+ area: 'D1'
+ ], // Meta map
+ [
+ file(params.test_data['homo_sapiens']['10xgenomics']['spaceranger']['test_10x_ffpe_v1_fastq_1_gz']),
+ file(params.test_data['homo_sapiens']['10xgenomics']['spaceranger']['test_10x_ffpe_v1_fastq_2_gz'])
+ ], // Reads
+ file(params.test_data['homo_sapiens']['10xgenomics']['spaceranger']['test_10x_ffpe_v1_image']), // Image
+ [], // Cytaimage
+ [], // Darkimage
+ [], // Colorizedimage
+ [], // Manual alignment (default: automatic alignment)
+ [], // Slide specification (default: automatic download)
+ ]
+ input[1] = SPACERANGER_MKREF.out.reference // Reference
+ input[2] = [] // Probeset (default: use the one included with Space Ranger)
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out.versions).match() }
+ )
+ }
+ }
+
+ test("spaceranger v2 - homo_sapiens - fasta - gtf - fastq - tif - csv") {
+ setup {
+ run("SPACERANGER_MKGTF") {
+ script "../../mkgtf/main.nf"
+ process {
+ """
+ input[0] = file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)
+ """
+ }
+ }
+ }
+
+ setup {
+ run("SPACERANGER_MKREF") {
+ script "../../mkref/main.nf"
+ process {
+ """
+ input[0] = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
+ input[1] = file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)
+ input[2] = 'homo_sapiens_chr22_reference'
+ """
+ }
+ }
+ }
+
+ when {
+ process {
+ """
+ input[0] = [
+ [
+ id: 'CytAssist_11mm_FFPE_Human_Glioblastoma_2',
+ slide: 'V52Y10-317',
+ area: 'B1'
+ ], // Meta map
+ [
+ file(params.test_data['homo_sapiens']['10xgenomics']['spaceranger']['test_10x_ffpe_cytassist_fastq_1_gz']),
+ file(params.test_data['homo_sapiens']['10xgenomics']['spaceranger']['test_10x_ffpe_cytassist_fastq_2_gz'])
+ ], // Reads
+ [], // Image
+ file(params.test_data['homo_sapiens']['10xgenomics']['spaceranger']['test_10x_ffpe_cytassist_image']), // Cytaimage
+ [], // Darkimage
+ [], // Colorizedimage
+ [], // Manual alignment (default: automatic alignment)
+ file('https://s3.us-west-2.amazonaws.com/10x.spatial-slides/gpr/V52Y10/V52Y10-317.gpr') // Slide specification (default: automatic download)
+ ]
+ input[1] = SPACERANGER_MKREF.out.reference // Reference
+ input[2] = file(params.test_data['homo_sapiens']['10xgenomics']['spaceranger']['test_10x_ffpe_cytassist_probeset']) // Probeset (default: use the one included with Space Ranger)
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(
+ process.out.versions,
+ process.out.outs.get(0).get(1).findAll { file(it).name !in [
+ 'web_summary.html',
+ 'scalefactors_json.json',
+ 'molecule_info.h5',
+ 'barcodes.tsv.gz',
+ 'features.tsv.gz',
+ 'matrix.mtx.gz',
+ 'cloupe.cloupe'
+ ]}
+ ).match()
+ },
+ { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'web_summary.html' }).exists() },
+ { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'scalefactors_json.json' }).exists() },
+ { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'molecule_info.h5' }).exists() },
+ { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'barcodes.tsv.gz' }).exists() },
+ { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'features.tsv.gz' }).exists() },
+ { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'matrix.mtx.gz' }).exists() }
+ )
+ }
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/spaceranger/count/tests/main.nf.test.snap b/modules/nf-core/spaceranger/count/tests/main.nf.test.snap
new file mode 100644
index 0000000..c13496e
--- /dev/null
+++ b/modules/nf-core/spaceranger/count/tests/main.nf.test.snap
@@ -0,0 +1,90 @@
+{
+ "spaceranger v1 (stub) - homo_sapiens - fasta - gtf - fastq - tif - csv": {
+ "content": [
+ [
+ "versions.yml:md5,1539e8a9a3d63ce3653920721d1af509"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-04-02T09:29:02.205153668"
+ },
+ "spaceranger v2 - homo_sapiens - fasta - gtf - fastq - tif - csv": {
+ "content": [
+ [
+ "versions.yml:md5,1539e8a9a3d63ce3653920721d1af509"
+ ],
+ [
+ "clusters.csv:md5,2cc2d0c94ec0af69f03db235f9ea6932",
+ "clusters.csv:md5,46c12f3845e28f27f2cd580cb004c0ea",
+ "clusters.csv:md5,4e5f082240b9c9903168842d1f9dbe34",
+ "clusters.csv:md5,e626eb7049baf591ea49f5d8c305621c",
+ "clusters.csv:md5,65cfb24fc937e4df903a742c1adf8b08",
+ "clusters.csv:md5,819a71787618945dacfa2d5301b953b1",
+ "clusters.csv:md5,5ae17ed02cdb9f61d7ceb0cd6922c9d4",
+ "clusters.csv:md5,641550bec22e02fff3611087f7fd6e07",
+ "clusters.csv:md5,9fbe5c79035175bc1899e9a7fc80f7ac",
+ "clusters.csv:md5,ed0c2dcca15c14a9983407ff9af0daaf",
+ "differential_expression.csv:md5,d37a8ef21699372ec4a4bdf0c43d71b7",
+ "differential_expression.csv:md5,ac3181524385c88d38a0fc17d3bdd526",
+ "differential_expression.csv:md5,557d6dfec7421c392aa6443725608cd1",
+ "differential_expression.csv:md5,1437fad68d701c97a4a46318aee45575",
+ "differential_expression.csv:md5,7a2f3d0e90782055580f4903617a7d27",
+ "differential_expression.csv:md5,41756e9570d07aee6aed710e6a965846",
+ "differential_expression.csv:md5,62ea7651c3f195d3c960c6c688dca477",
+ "differential_expression.csv:md5,b630542266c4abb71f4205922340498d",
+ "differential_expression.csv:md5,0deb97f0be7e72ad73e456092db31e6d",
+ "differential_expression.csv:md5,3bba8490f753507e7e2e29be759f218b",
+ "components.csv:md5,568bb9bcb6ee913356fcb4be3fea1911",
+ "dispersion.csv:md5,e2037b1db404f6e5d8b3144629f2500d",
+ "features_selected.csv:md5,3ba6d1315ae594963b306d94ba1180e7",
+ "projection.csv:md5,aef5d71381678d5245e471f3d5a8ab67",
+ "variance.csv:md5,475a95e51ce66e639ae21d801c455e2b",
+ "projection.csv:md5,928c0f68a9c773fba590941d3d5af7ca",
+ "projection.csv:md5,216dcc5589a083fcc27d981aa90fa2ab",
+ "filtered_feature_bc_matrix.h5:md5,f1a8f225c113974b47efffe08e70f367",
+ "metrics_summary.csv:md5,faa17487b479eab361050d3266da2efb",
+ "probe_set.csv:md5,5bfb8f12319be1b2b6c14142537c3804",
+ "raw_feature_bc_matrix.h5:md5,6e40ae93a116c6fc0adbe707b0eb415f",
+ "raw_probe_bc_matrix.h5:md5,3d5e711d0891ca2caaf301a2c1fbda91",
+ "aligned_fiducials.jpg:md5,51dcc3a32d3d5ca4704f664c8ede81ef",
+ "cytassist_image.tiff:md5,0fb04a55e5658f4d158d986a334b034d",
+ "detected_tissue_image.jpg:md5,1d3ccc1e12c4fee091b006e48b9cc16a",
+ "spatial_enrichment.csv:md5,1117792553e82feb2b4b3934907a0136",
+ "tissue_hires_image.png:md5,834706fff299024fab48e6366afc9cb9",
+ "tissue_lowres_image.png:md5,8c1fcb378f7f886301f49ffc4f84360a",
+ "tissue_positions.csv:md5,425601ef21661ec0126000f905ef044f"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-04-02T10:13:00.787792273"
+ },
+ "spaceranger v1 - homo_sapiens - fasta - gtf - fastq - tif - csv": {
+ "content": [
+ [
+ "versions.yml:md5,1539e8a9a3d63ce3653920721d1af509"
+ ],
+ [
+ "filtered_feature_bc_matrix.h5:md5,7e09d1cd2e1f497a698c5efde9e4af84",
+ "metrics_summary.csv:md5,07a6fcc2e20f854f8d3fcde2457a2f9a",
+ "molecule_info.h5:md5,1f2e0fd31d15509e7916e84f22632c9c",
+ "raw_feature_bc_matrix.h5:md5,5a4184a3bfaf722eec8d1a763a45906e",
+ "aligned_fiducials.jpg:md5,f6217ddd707bb189e665f56b130c3da8",
+ "detected_tissue_image.jpg:md5,c1c7e8741701a576c1ec103c1aaf98ea",
+ "tissue_hires_image.png:md5,d91f8f176ae35ab824ede87117ac0889",
+ "tissue_lowres_image.png:md5,475a04208d193191c84d7a3b5d4eb287",
+ "tissue_positions.csv:md5,748bf590c445db409d7dbdf5a08e72e8"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-04-02T09:37:13.128424153"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/spaceranger/count/tests/nextflow.config b/modules/nf-core/spaceranger/count/tests/nextflow.config
new file mode 100644
index 0000000..fe9d61a
--- /dev/null
+++ b/modules/nf-core/spaceranger/count/tests/nextflow.config
@@ -0,0 +1,5 @@
+process {
+ withName: SPACERANGER_COUNT {
+ ext.args = '--create-bam false'
+ }
+}
diff --git a/modules/nf-core/spaceranger/count/tests/tags.yml b/modules/nf-core/spaceranger/count/tests/tags.yml
new file mode 100644
index 0000000..ad51f67
--- /dev/null
+++ b/modules/nf-core/spaceranger/count/tests/tags.yml
@@ -0,0 +1,2 @@
+spaceranger/count:
+ - "modules/nf-core/spaceranger/count/**"
diff --git a/modules/nf-core/untar/environment.yml b/modules/nf-core/untar/environment.yml
new file mode 100644
index 0000000..0c9cbb1
--- /dev/null
+++ b/modules/nf-core/untar/environment.yml
@@ -0,0 +1,11 @@
+name: untar
+
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+
+dependencies:
+ - conda-forge::grep=3.11
+ - conda-forge::sed=4.7
+ - conda-forge::tar=1.34
diff --git a/modules/nf-core/untar/main.nf b/modules/nf-core/untar/main.nf
new file mode 100644
index 0000000..8a75bb9
--- /dev/null
+++ b/modules/nf-core/untar/main.nf
@@ -0,0 +1,63 @@
+process UNTAR {
+ tag "$archive"
+ label 'process_single'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' :
+ 'nf-core/ubuntu:20.04' }"
+
+ input:
+ tuple val(meta), path(archive)
+
+ output:
+ tuple val(meta), path("$prefix"), emit: untar
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def args2 = task.ext.args2 ?: ''
+ prefix = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.baseName.toString().replaceFirst(/\.tar$/, ""))
+
+ """
+ mkdir $prefix
+
+ ## Ensures --strip-components only applied when top level of tar contents is a directory
+ ## If just files or multiple directories, place all in prefix
+ if [[ \$(tar -taf ${archive} | grep -o -P "^.*?\\/" | uniq | wc -l) -eq 1 ]]; then
+ tar \\
+ -C $prefix --strip-components 1 \\
+ -xavf \\
+ $args \\
+ $archive \\
+ $args2
+ else
+ tar \\
+ -C $prefix \\
+ -xavf \\
+ $args \\
+ $archive \\
+ $args2
+ fi
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//')
+ END_VERSIONS
+ """
+
+ stub:
+ prefix = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.toString().replaceFirst(/\.[^\.]+(.gz)?$/, ""))
+ """
+ mkdir $prefix
+ touch ${prefix}/file.txt
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//')
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/untar/meta.yml b/modules/nf-core/untar/meta.yml
new file mode 100644
index 0000000..a9a2110
--- /dev/null
+++ b/modules/nf-core/untar/meta.yml
@@ -0,0 +1,46 @@
+name: untar
+description: Extract files.
+keywords:
+ - untar
+ - uncompress
+ - extract
+tools:
+ - untar:
+ description: |
+ Extract tar.gz files.
+ documentation: https://www.gnu.org/software/tar/manual/
+ licence: ["GPL-3.0-or-later"]
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - archive:
+ type: file
+ description: File to be untar
+ pattern: "*.{tar}.{gz}"
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - untar:
+ type: directory
+ description: Directory containing contents of archive
+ pattern: "*/"
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+authors:
+ - "@joseespinosa"
+ - "@drpatelh"
+ - "@matthdsm"
+ - "@jfy133"
+maintainers:
+ - "@joseespinosa"
+ - "@drpatelh"
+ - "@matthdsm"
+ - "@jfy133"
diff --git a/modules/nf-core/untar/tests/main.nf.test b/modules/nf-core/untar/tests/main.nf.test
new file mode 100644
index 0000000..2a7c97b
--- /dev/null
+++ b/modules/nf-core/untar/tests/main.nf.test
@@ -0,0 +1,47 @@
+nextflow_process {
+
+ name "Test Process UNTAR"
+ script "../main.nf"
+ process "UNTAR"
+ tag "modules"
+ tag "modules_nfcore"
+ tag "untar"
+ test("test_untar") {
+
+ when {
+ process {
+ """
+ input[0] = [ [], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/db/kraken2.tar.gz', checkIfExists: true) ]
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(process.out.untar).match("test_untar") },
+ )
+ }
+
+ }
+
+ test("test_untar_onlyfiles") {
+
+ when {
+ process {
+ """
+ input[0] = [ [], file(params.modules_testdata_base_path + 'generic/tar/hello.tar.gz', checkIfExists: true) ]
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(process.out.untar).match("test_untar_onlyfiles") },
+ )
+ }
+
+ }
+
+}
diff --git a/modules/nf-core/untar/tests/main.nf.test.snap b/modules/nf-core/untar/tests/main.nf.test.snap
new file mode 100644
index 0000000..6455029
--- /dev/null
+++ b/modules/nf-core/untar/tests/main.nf.test.snap
@@ -0,0 +1,42 @@
+{
+ "test_untar_onlyfiles": {
+ "content": [
+ [
+ [
+ [
+
+ ],
+ [
+ "hello.txt:md5,e59ff97941044f85df5297e1c302d260"
+ ]
+ ]
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-28T11:49:41.320643"
+ },
+ "test_untar": {
+ "content": [
+ [
+ [
+ [
+
+ ],
+ [
+ "hash.k2d:md5,8b8598468f54a7087c203ad0190555d9",
+ "opts.k2d:md5,a033d00cf6759407010b21700938f543",
+ "taxo.k2d:md5,094d5891cdccf2f1468088855c214b2c"
+ ]
+ ]
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-28T11:49:33.795172"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/untar/tests/tags.yml b/modules/nf-core/untar/tests/tags.yml
new file mode 100644
index 0000000..feb6f15
--- /dev/null
+++ b/modules/nf-core/untar/tests/tags.yml
@@ -0,0 +1,2 @@
+untar:
+ - modules/nf-core/untar/**
diff --git a/nextflow.config b/nextflow.config
index dfece07..d18fe47 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -9,20 +9,39 @@
// Global default params, used in configs
params {
- // TODO nf-core: Specify your pipeline's command line flags
// Input options
- input = null
- // References
- genome = null
- igenomes_base = 's3://ngi-igenomes/igenomes/'
- igenomes_ignore = false
+ input = null
+
+ // Spaceranger options
+ spaceranger_reference = "https://cf.10xgenomics.com/supp/spatial-exp/refdata-gex-GRCh38-2020-A.tar.gz"
+ spaceranger_probeset = null
+ spaceranger_save_reference = false
+
+ // Quality controls and filtering
+ qc_min_counts = 500
+ qc_min_genes = 250
+ qc_min_spots = 1
+ qc_mito_threshold = 20.0
+ qc_ribo_threshold = 0.0
+ qc_hb_threshold = 100.0
+
+ // Clustering
+ cluster_n_hvgs = 2000
+ cluster_resolution = 1.0
+
+ // Spatially variable genes
+ svg_autocorr_method = "moran"
+ n_top_svgs = 14
// MultiQC options
- multiqc_config = null
- multiqc_title = null
- multiqc_logo = null
- max_multiqc_email_size = '25.MB'
- multiqc_methods_description = null
+ multiqc_config = null
+ multiqc_title = null
+ multiqc_logo = null
+ max_multiqc_email_size = '25.MB'
+ multiqc_methods_description = null
+
+ // Untar options
+ save_untar_output = false
// Boilerplate options
outdir = null
@@ -46,9 +65,9 @@ params {
// Max resource options
// Defaults only, expecting to be overwritten
- max_memory = '128.GB'
- max_cpus = 16
- max_time = '240.h'
+ max_memory = '128.GB'
+ max_cpus = 64
+ max_time = '240.h'
// Schema validation default options
validationFailUnrecognisedParams = false
@@ -174,8 +193,10 @@ profiles {
executor.cpus = 4
executor.memory = 8.GB
}
- test { includeConfig 'conf/test.config' }
- test_full { includeConfig 'conf/test_full.config' }
+ test { includeConfig 'conf/test.config' }
+ test_spaceranger_v1 { includeConfig 'conf/test_spaceranger_v1.config' }
+ test_downstream { includeConfig 'conf/test_downstream.config' }
+ test_full { includeConfig 'conf/test_full.config' }
}
// Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile
@@ -191,16 +212,9 @@ plugins {
id 'nf-validation@1.1.3' // Validation of pipeline parameters and creation of an input channel from a sample sheet
}
-// Load igenomes.config if required
-if (!params.igenomes_ignore) {
- includeConfig 'conf/igenomes.config'
-} else {
- params.genomes = [:]
-}
// Export these variables to prevent local Python/R libraries from conflicting with those in the container
// The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container.
// See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable.
-
env {
PYTHONNOUSERSITE = 1
R_PROFILE_USER = "/.Rprofile"
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 8673db7..84ce88c 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -20,7 +20,7 @@
"mimetype": "text/csv",
"pattern": "^\\S+\\.csv$",
"description": "Path to comma-separated file containing information about the samples in the experiment.",
- "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/spatialvi/usage#samplesheet-input).",
+ "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline, use this parameter to specify its location. It has to be a comma-separated file with 2 or 5 columns, plus a header row. See [usage docs](https://nf-co.re/spatialvi/usage#samplesheet-input).",
"fa_icon": "fas fa-file-csv"
},
"outdir": {
@@ -43,37 +43,127 @@
}
}
},
- "reference_genome_options": {
- "title": "Reference genome options",
+
+ "spaceranger_options": {
+ "title": "Space Ranger options",
"type": "object",
- "fa_icon": "fas fa-dna",
- "description": "Reference genome related files and options required for the workflow.",
+ "fa_icon": "fas fa-rocket",
+ "description": "Options related to Space Ranger execution and raw spatial data processing",
"properties": {
- "genome": {
+ "spaceranger_probeset": {
"type": "string",
- "description": "Name of iGenomes reference.",
- "fa_icon": "fas fa-book",
- "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details."
+ "format": "file-path",
+ "mimetype": "text/csv",
+ "pattern": "^\\S+\\.csv$",
+ "description": "Location of Space Ranger probeset file.",
+ "fa_icon": "fas fa-file-csv",
+ "exists": true
},
- "fasta": {
+ "spaceranger_reference": {
"type": "string",
- "format": "file-path",
- "exists": true,
- "mimetype": "text/plain",
- "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$",
- "description": "Path to FASTA genome file.",
- "help_text": "This parameter is *mandatory* if `--genome` is not specified. If you don't have a BWA index available this will be generated for you automatically. Combine with `--save_reference` to save BWA index for future runs.",
- "fa_icon": "far fa-file-code"
+ "format": "path",
+ "description": "Location of Space Ranger reference directory. May be packed as `tar.gz` file.",
+ "help_text": "Please see the [10x website](https://support.10xgenomics.com/spatial-gene-expression/software/downloads/latest) to download either of the supported human or mouse references. If not specified the GRCh38 human reference is automatically downladed and used.",
+ "fa_icon": "fas fa-folder-open",
+ "default": "https://cf.10xgenomics.com/supp/spatial-exp/refdata-gex-GRCh38-2020-A.tar.gz",
+ "exists": true
+ }
+ }
+ },
+
+ "optional_outputs": {
+ "title": "Optional outputs",
+ "type": "object",
+ "fa_icon": "fas fa-floppy-disk",
+ "description": "Additional intermediate output files that can be optionally saved.",
+ "properties": {
+ "spaceranger_save_reference": {
+ "type": "boolean",
+ "description": "Save the extracted tar archive of the Space Ranger reference.",
+ "help_text": "By default, extracted versions of archived Space Ranger reference data will not be saved to the results directory. Specify this flag (or set to true in your config file) to copy these files to the results directory when complete.",
+ "fa_icon": "fas fa-floppy-disk"
},
- "igenomes_ignore": {
+ "save_untar_output": {
"type": "boolean",
- "description": "Do not load the iGenomes reference config.",
- "fa_icon": "fas fa-ban",
- "hidden": true,
- "help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`."
+ "description": "Save extracted tar archives of input data.",
+ "help_text": "By default, extracted versions of archived input data will not be saved to the results directory. Specify this flag (or set to true in your config file) to copy these files to the results directory when complete.",
+ "fa_icon": "fas fa-floppy-disk"
+ }
+ }
+ },
+
+ "analysis_options": {
+ "title": "Analysis options",
+ "type": "object",
+ "fa_icon": "fas fa-magnifying-glass-chart",
+ "description": "Options related to the downstream analyses performed by the pipeline.",
+ "properties": {
+ "qc_min_counts": {
+ "type": "integer",
+ "default": 500,
+ "description": "The minimum number of UMIs needed in a spot for that spot to pass the filtering.",
+ "fa_icon": "fas fa-hashtag"
+ },
+ "qc_min_genes": {
+ "type": "integer",
+ "default": 250,
+ "description": "The minimum number of expressed genes in a spot needed for that spot to pass the filtering.",
+ "fa_icon": "fas fa-hashtag"
+ },
+ "qc_min_spots": {
+ "type": "integer",
+ "default": 1,
+ "description": "The minimum number of spots in which a gene is expressed for that gene to pass the filtering.",
+ "fa_icon": "fas fa-hashtag"
+ },
+ "qc_mito_threshold": {
+ "type": "number",
+ "default": 20,
+ "description": "The maximum proportion of mitochondrial content that a spot is allowed to have to pass the filtering.",
+ "help_text": "If you do not wish to filter based on mitochondrial content, set this parameter to `100`.",
+ "fa_icon": "fas fa-hashtag"
+ },
+ "qc_ribo_threshold": {
+ "type": "number",
+ "default": 0,
+ "description": "The minimum proportion of ribosomal content that a spot is needs to have to pass the filtering (no filtering is done by default).",
+ "fa_icon": "fas fa-hashtag"
+ },
+ "qc_hb_threshold": {
+ "type": "number",
+ "default": 100,
+ "description": "The maximum proportion of haemoglobin content that a spot is allowed to have to pass the filtering (no filtering is done by default).",
+ "fa_icon": "fas fa-hashtag"
+ },
+ "cluster_n_hvgs": {
+ "type": "integer",
+ "default": 2000,
+ "description": "The number of top highly variable genes to use for the analyses.",
+ "fa_icon": "fas fa-hashtag"
+ },
+ "cluster_resolution": {
+ "type": "number",
+ "default": 1,
+ "description": "The resolution for the clustering of the spots.",
+ "help_text": "The resolution controls the coarseness of the clustering, where a higher resolution leads to more clusters.",
+ "fa_icon": "fas fa-circle-nodes"
+ },
+ "svg_autocorr_method": {
+ "type": "string",
+ "default": "moran",
+ "description": "The method to use for spatially variable gene autocorrelation.",
+ "enum": ["moran", "geary"],
+ "fa_icon": "fas fa-circle-nodes"
+ },
+ "n_top_svgs": {
+ "type": "integer",
+ "default": 14,
+ "description": "The number of top spatially variable genes to plot.",
+ "fa_icon": "fas fa-hashtag"
}
}
},
+
"institutional_config_options": {
"title": "Institutional config options",
"type": "object",
@@ -122,6 +212,7 @@
}
}
},
+
"max_job_request_options": {
"title": "Max job request options",
"type": "object",
@@ -132,7 +223,7 @@
"max_cpus": {
"type": "integer",
"description": "Maximum number of CPUs that can be requested for any single job.",
- "default": 16,
+ "default": 64,
"fa_icon": "fas fa-microchip",
"hidden": true,
"help_text": "Use to set an upper-limit for the CPU requirement for each process. Should be an integer e.g. `--max_cpus 1`"
@@ -157,6 +248,7 @@
}
}
},
+
"generic_options": {
"title": "Generic options",
"type": "object",
@@ -281,7 +373,13 @@
"$ref": "#/definitions/input_output_options"
},
{
- "$ref": "#/definitions/reference_genome_options"
+ "$ref": "#/definitions/spaceranger_options"
+ },
+ {
+ "$ref": "#/definitions/optional_outputs"
+ },
+ {
+ "$ref": "#/definitions/analysis_options"
},
{
"$ref": "#/definitions/institutional_config_options"
diff --git a/nf-test.config b/nf-test.config
new file mode 100644
index 0000000..b57ac7d
--- /dev/null
+++ b/nf-test.config
@@ -0,0 +1,16 @@
+config {
+ // location for all nf-tests
+ testsDir "tests"
+
+ // nf-test directory including temporary files for each test
+ workDir ".nf-test"
+
+ // location of library folder that is added automatically to the classpath
+ libDir "tests/pipeline/lib/"
+
+ // location of an optional nextflow.config file specific for executing tests
+ configFile "nextflow.config"
+
+ // test profile is overriden via nf-test CLI
+ profile ""
+}
diff --git a/subworkflows/local/downstream.nf b/subworkflows/local/downstream.nf
new file mode 100644
index 0000000..a477bbc
--- /dev/null
+++ b/subworkflows/local/downstream.nf
@@ -0,0 +1,117 @@
+//
+// Subworkflow for downstream analyses of ST data
+//
+
+include { QUARTONOTEBOOK as QUALITY_CONTROLS } from '../../modules/nf-core/quartonotebook/main'
+include { QUARTONOTEBOOK as SPATIALLY_VARIABLE_GENES } from '../../modules/nf-core/quartonotebook/main'
+include { QUARTONOTEBOOK as CLUSTERING } from '../../modules/nf-core/quartonotebook/main'
+
+workflow DOWNSTREAM {
+
+ take:
+ sdata_raw
+
+ main:
+
+ ch_versions = Channel.empty()
+
+ //
+ // Quarto reports and extension files
+ //
+ quality_controls_notebook = file("${projectDir}/bin/quality_controls.qmd", checkIfExists: true)
+ clustering_notebook = file("${projectDir}/bin/clustering.qmd", checkIfExists: true)
+ spatially_variable_genes_notebook = file("${projectDir}/bin/spatially_variable_genes.qmd", checkIfExists: true)
+ extensions = Channel.fromPath("${projectDir}/assets/_extensions").collect()
+
+ //
+ // Quality controls and filtering
+ //
+ ch_quality_controls_input_data = sdata_raw
+ .map { it -> it[1] }
+ ch_quality_controls_notebook = sdata_raw
+ .map { tuple(it[0], quality_controls_notebook) }
+ quality_controls_params = [
+ input_sdata: "sdata_raw.zarr",
+ min_counts: params.qc_min_counts,
+ min_genes: params.qc_min_genes,
+ min_spots: params.qc_min_spots,
+ mito_threshold: params.qc_mito_threshold,
+ ribo_threshold: params.qc_ribo_threshold,
+ hb_threshold: params.qc_hb_threshold,
+ artifact_dir: "artifacts",
+ output_adata: "adata_filtered.h5ad",
+ output_sdata: "sdata_filtered.zarr",
+ ]
+ QUALITY_CONTROLS (
+ ch_quality_controls_notebook,
+ quality_controls_params,
+ ch_quality_controls_input_data,
+ extensions
+ )
+ ch_versions = ch_versions.mix(QUALITY_CONTROLS.out.versions)
+
+ //
+ // Normalisation, dimensionality reduction and clustering
+ //
+ ch_clustering_input_data = QUALITY_CONTROLS.out.artifacts
+ .map { it -> it[1] }
+ ch_clustering_notebook = QUALITY_CONTROLS.out.artifacts
+ .map { tuple(it[0], clustering_notebook) }
+ clustering_params = [
+ input_sdata: "sdata_filtered.zarr",
+ cluster_resolution: params.cluster_resolution,
+ n_hvgs: params.cluster_n_hvgs,
+ artifact_dir: "artifacts",
+ output_adata: "adata_processed.h5ad",
+ output_sdata: "sdata_processed.zarr",
+ ]
+ CLUSTERING (
+ ch_clustering_notebook,
+ clustering_params,
+ ch_clustering_input_data,
+ extensions
+ )
+ ch_versions = ch_versions.mix(CLUSTERING.out.versions)
+
+ //
+ // Spatially variable genes
+ //
+ ch_spatially_variable_genes_input_data = CLUSTERING.out.artifacts
+ .map { it -> it[1] }
+ ch_spatially_variable_genes_notebook = CLUSTERING.out.artifacts
+ .map { tuple(it[0], spatially_variable_genes_notebook) }
+ spatially_variable_genes_params = [
+ input_sdata: "sdata_processed.zarr",
+ svg_autocorr_method: params.svg_autocorr_method,
+ n_top_svgs: params.n_top_svgs,
+ artifact_dir: "artifacts",
+ output_csv: "spatially_variable_genes.csv",
+ output_adata: "adata_spatially_variable_genes.h5ad",
+ output_sdata: "sdata.zarr",
+ ]
+ SPATIALLY_VARIABLE_GENES (
+ ch_spatially_variable_genes_notebook,
+ spatially_variable_genes_params,
+ ch_spatially_variable_genes_input_data,
+ extensions
+ )
+ ch_versions = ch_versions.mix(SPATIALLY_VARIABLE_GENES.out.versions)
+
+ emit:
+ qc_html = QUALITY_CONTROLS.out.html // channel: [ meta, html ]
+ qc_sdata = QUALITY_CONTROLS.out.artifacts // channel: [ meta, h5ad ]
+ qc_nb = QUALITY_CONTROLS.out.notebook // channel: [ meta, qmd ]
+ qc_params = QUALITY_CONTROLS.out.params_yaml // channel: [ meta, yml ]
+
+ clustering_html = CLUSTERING.out.html // channel: [ html ]
+ clustering_sdata = CLUSTERING.out.artifacts // channel: [ meta, h5ad]
+ clustering_nb = CLUSTERING.out.notebook // channel: [ meta, qmd ]
+ clustering_params = CLUSTERING.out.params_yaml // channel: [ meta, yml ]
+
+ svg_html = SPATIALLY_VARIABLE_GENES.out.html // channel: [ meta, html ]
+ svg_csv = SPATIALLY_VARIABLE_GENES.out.artifacts // channel: [ meta, csv ]
+ svg_nb = SPATIALLY_VARIABLE_GENES.out.notebook // channel: [ meta, qmd ]
+ svg_params = SPATIALLY_VARIABLE_GENES.out.params_yaml // channel: [ meta, yml ]
+
+ versions = ch_versions // channel: [ versions.yml ]
+}
diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf
new file mode 100644
index 0000000..06a9721
--- /dev/null
+++ b/subworkflows/local/input_check.nf
@@ -0,0 +1,138 @@
+//
+// Check input samplesheet and get read channels
+//
+
+include { UNTAR as UNTAR_SPACERANGER_INPUT } from "../../modules/nf-core/untar"
+include { UNTAR as UNTAR_DOWNSTREAM_INPUT } from "../../modules/nf-core/untar"
+
+workflow INPUT_CHECK {
+
+ take:
+ samplesheet // file: samplesheet read in from --input
+
+ main:
+
+ ch_versions = Channel.empty()
+
+ ch_st = Channel.fromPath(samplesheet)
+ .splitCsv ( header: true, sep: ',')
+ .branch {
+ spaceranger: !it.containsKey("spaceranger_dir")
+ downstream: it.containsKey("spaceranger_dir")
+ }
+
+ // Space Ranger analysis: --------------------------------------------------
+
+ // Split channel into tarballed and directory inputs
+ ch_spaceranger = ch_st.spaceranger
+ .map { it -> [it, it.fastq_dir]}
+ .branch {
+ tar: it[1].contains(".tar.gz")
+ dir: !it[1].contains(".tar.gz")
+ }
+
+ // Extract tarballed inputs
+ UNTAR_SPACERANGER_INPUT ( ch_spaceranger.tar )
+ ch_versions = ch_versions.mix(UNTAR_SPACERANGER_INPUT.out.versions)
+
+ // Combine extracted and directory inputs into one channel
+ ch_spaceranger_combined = UNTAR_SPACERANGER_INPUT.out.untar
+ .mix ( ch_spaceranger.dir )
+ .map { meta, dir -> meta + [fastq_dir: dir] }
+
+ // Create final meta map and check input existance
+ ch_spaceranger_input = ch_spaceranger_combined.map { create_channel_spaceranger(it) }
+
+ // Downstream analysis: ----------------------------------------------------
+
+ // Split channel into tarballed and directory inputs
+ ch_downstream = ch_st.downstream
+ .map { create_channel_downstream_tar(it) }
+ .branch {
+ tar: it[1].contains(".tar.gz")
+ dir: !it[1].contains(".tar.gz")
+ }
+
+ // Extract tarballed inputs
+ UNTAR_DOWNSTREAM_INPUT ( ch_downstream.tar )
+ ch_versions = ch_versions.mix(UNTAR_DOWNSTREAM_INPUT.out.versions)
+
+ // Combine extracted and directory inputs into one channel
+ ch_downstream_combined = UNTAR_DOWNSTREAM_INPUT.out.untar
+ .mix ( ch_downstream.dir )
+ .map { meta, dir -> [sample: meta.id, spaceranger_dir: dir] }
+
+ // Create final meta map and check input file existance
+ ch_downstream_input = ch_downstream_combined.map { create_channel_downstream(it) }
+
+ emit:
+ ch_spaceranger_input // channel: [ val(meta), [ st data ] ]
+ ch_downstream_input // channel: [ val(meta), [ st data ] ]
+ versions = ch_versions // channel: [ versions.yml ]
+}
+
+// Function to get list of [ meta, [ spaceranger_dir ]]
+def create_channel_downstream_tar(LinkedHashMap meta) {
+ meta['id'] = meta.remove('sample')
+ spaceranger_dir = meta.remove('spaceranger_dir')
+ return [meta, spaceranger_dir]
+}
+
+
+// Function to get list of [ meta, [ raw_feature_bc_matrix, tissue_positions,
+// scalefactors, hires_image, lowres_image ]]
+def create_channel_downstream(LinkedHashMap meta) {
+ meta["id"] = meta.remove("sample")
+ spaceranger_dir = file("${meta.remove('spaceranger_dir')}/**")
+ DOWNSTREAM_REQUIRED_SPACERANGER_FILES = [
+ "raw_feature_bc_matrix.h5",
+ "tissue_positions.csv",
+ "scalefactors_json.json",
+ "tissue_hires_image.png",
+ "tissue_lowres_image.png"
+ ]
+ for (f in DOWNSTREAM_REQUIRED_SPACERANGER_FILES) {
+ if(!spaceranger_dir*.name.contains(f)) {
+ error "The specified spaceranger output directory doesn't contain the required file `${f}` for sample `${meta.id}`"
+ }
+ }
+ return [meta, spaceranger_dir]
+}
+
+// Function to get list of [ meta, [ fastq_dir, tissue_hires_image, slide, area ]]
+def create_channel_spaceranger(LinkedHashMap meta) {
+ meta["id"] = meta.remove("sample")
+
+ // Convert a path in `meta` to a file object and return it. If `key` is not contained in `meta`
+ // return an empty list which is recognized as 'no file' by nextflow.
+ def get_file_from_meta = {key ->
+ v = meta.remove(key);
+ return v ? file(v) : []
+ }
+
+ fastq_dir = meta.remove("fastq_dir")
+ fastq_files = file("${fastq_dir}/${meta['id']}*.fastq.gz")
+ manual_alignment = get_file_from_meta("manual_alignment")
+ slidefile = get_file_from_meta("slidefile")
+ image = get_file_from_meta("image")
+ cytaimage = get_file_from_meta("cytaimage")
+ colorizedimage = get_file_from_meta("colorizedimage")
+ darkimage = get_file_from_meta("darkimage")
+
+ if(!fastq_files.size()) {
+ error "No `fastq_dir` specified or no samples found in folder."
+ }
+
+ check_optional_files = ["manual_alignment", "slidefile", "image", "cytaimage", "colorizedimage", "darkimage"]
+ for(k in check_optional_files) {
+ if(this.binding[k] && !this.binding[k].exists()) {
+ error "File for `${k}` is specified, but does not exist: ${this.binding[k]}."
+ }
+ }
+ if(!(image || cytaimage || colorizedimage || darkimage)) {
+ error "Need to specify at least one of 'image', 'cytaimage', 'colorizedimage', or 'darkimage' in the samplesheet"
+ }
+
+ return [meta, fastq_files, image, cytaimage, darkimage, colorizedimage, manual_alignment, slidefile]
+}
+
diff --git a/subworkflows/local/spaceranger.nf b/subworkflows/local/spaceranger.nf
new file mode 100644
index 0000000..3dab2bf
--- /dev/null
+++ b/subworkflows/local/spaceranger.nf
@@ -0,0 +1,56 @@
+//
+// Raw data processing with Space Ranger
+//
+
+include { UNTAR as SPACERANGER_UNTAR_REFERENCE } from "../../modules/nf-core/untar"
+include { SPACERANGER_COUNT } from '../../modules/nf-core/spaceranger/count'
+
+workflow SPACERANGER {
+
+ take:
+ ch_data // channel: [ val(meta), [ raw st data ] ]
+
+ main:
+
+ ch_versions = Channel.empty()
+
+ //
+ // Reference files
+ //
+ ch_reference = Channel.empty()
+ if (params.spaceranger_reference ==~ /.*\.tar\.gz$/) {
+ ref_file = file(params.spaceranger_reference)
+ SPACERANGER_UNTAR_REFERENCE ([
+ [id: "reference"],
+ ref_file
+ ])
+ ch_reference = SPACERANGER_UNTAR_REFERENCE.out.untar.map({meta, ref -> ref})
+ ch_versions = ch_versions.mix(SPACERANGER_UNTAR_REFERENCE.out.versions)
+ } else {
+ ch_reference = file ( params.spaceranger_reference, type: "dir", checkIfExists: true )
+ }
+
+ //
+ // Optional: probe set
+ //
+ ch_probeset = Channel.empty()
+ if (params.spaceranger_probeset) {
+ ch_probeset = file ( params.spaceranger_probeset, checkIfExists: true )
+ } else {
+ ch_probeset = []
+ }
+
+ //
+ // Run Space Ranger count
+ //
+ SPACERANGER_COUNT (
+ ch_data,
+ ch_reference,
+ ch_probeset
+ )
+ ch_versions = ch_versions.mix(SPACERANGER_COUNT.out.versions.first())
+
+ emit:
+ sr_dir = SPACERANGER_COUNT.out.outs
+ versions = ch_versions // channel: [ versions.yml ]
+}
diff --git a/subworkflows/local/utils_nfcore_spatialvi_pipeline/main.nf b/subworkflows/local/utils_nfcore_spatialvi_pipeline/main.nf
index 2c67711..03bf27f 100644
--- a/subworkflows/local/utils_nfcore_spatialvi_pipeline/main.nf
+++ b/subworkflows/local/utils_nfcore_spatialvi_pipeline/main.nf
@@ -72,36 +72,8 @@ workflow PIPELINE_INITIALISATION {
UTILS_NFCORE_PIPELINE (
nextflow_cli_args
)
- //
- // Custom validation for pipeline parameters
- //
- validateInputParameters()
-
- //
- // Create channel from input file provided through params.input
- //
- Channel
- .fromSamplesheet("input")
- .map {
- meta, fastq_1, fastq_2 ->
- if (!fastq_2) {
- return [ meta.id, meta + [ single_end:true ], [ fastq_1 ] ]
- } else {
- return [ meta.id, meta + [ single_end:false ], [ fastq_1, fastq_2 ] ]
- }
- }
- .groupTuple()
- .map {
- validateInputSamplesheet(it)
- }
- .map {
- meta, fastqs ->
- return [ meta, fastqs.flatten() ]
- }
- .set { ch_samplesheet }
emit:
- samplesheet = ch_samplesheet
versions = ch_versions
}
@@ -151,77 +123,42 @@ workflow PIPELINE_COMPLETION {
FUNCTIONS
========================================================================================
*/
-//
-// Check and validate pipeline parameters
-//
-def validateInputParameters() {
- genomeExistsError()
-}
-
-//
-// Validate channels from input samplesheet
-//
-def validateInputSamplesheet(input) {
- def (metas, fastqs) = input[1..2]
-
- // Check that multiple runs of the same sample are of the same datatype i.e. single-end / paired-end
- def endedness_ok = metas.collect{ it.single_end }.unique().size == 1
- if (!endedness_ok) {
- error("Please check input samplesheet -> Multiple runs of a sample must be of the same datatype i.e. single-end or paired-end: ${metas[0].id}")
- }
-
- return [ metas[0], fastqs ]
-}
-//
-// Get attribute from genome config file e.g. fasta
-//
-def getGenomeAttribute(attribute) {
- if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) {
- if (params.genomes[ params.genome ].containsKey(attribute)) {
- return params.genomes[ params.genome ][ attribute ]
- }
- }
- return null
-}
-
-//
-// Exit pipeline if incorrect --genome key provided
-//
-def genomeExistsError() {
- if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) {
- def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" +
- " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" +
- " Currently, the available genome keys are:\n" +
- " ${params.genomes.keySet().join(", ")}\n" +
- "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
- error(error_string)
- }
-}
//
// Generate methods description for MultiQC
//
def toolCitationText() {
- // TODO nf-core: Optionally add in-text citation tools to this list.
- // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "",
- // Uncomment function in methodsDescriptionText to render in MultiQC report
+
def citation_text = [
"Tools used in the workflow included:",
+ "AnnData (Virshup et al. 2021),",
"FastQC (Andrews 2010),",
- "MultiQC (Ewels et al. 2016)",
- "."
+ "MultiQC (Ewels et al. 2016),",
+ "Quarto (Allaire et al. 2022),",
+ "Scanpy (Wolf et al. 2018),",
+ "Space Ranger (10x Genomics)",
+ "SpatialData (Marconato et al. 2023) and",
+ "Squidpy (Palla et al. 2022)"
].join(' ').trim()
return citation_text
}
def toolBibliographyText() {
- // TODO nf-core: Optionally add bibliographic entries to this list.
- // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
Author (2023) Pub name, Journal, DOI
" : "",
- // Uncomment function in methodsDescriptionText to render in MultiQC report
+
def reference_text = [
- "
Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
"
+ '
Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., & Notredame, C. (2017). Nextflow enables reproducible computational workflows. Nature Biotechnology, 35(4), 316-319. doi: 10.1038/nbt.3820
',
+ '
Ewels, P. A., Peltzer, A., Fillinger, S., Patel, H., Alneberg, J., Wilm, A., Garcia, M. U., Di Tommaso, P., & Nahnsen, S. (2020). The nf-core framework for community-curated bioinformatics pipelines. Nature Biotechnology, 38(3), 276-278. doi: 10.1038/s41587-020-0439-x
',
+ '
Grüning, B., Dale, R., Sjödin, A., Chapman, B. A., Rowe, J., Tomkins-Tinch, C. H., Valieris, R., Köster, J., & Bioconda Team. (2018). Bioconda: sustainable and comprehensive software distribution for the life sciences. Nature Methods, 15(7), 475–476. doi: 10.1038/s41592-018-0046-7
',
+ '
da Veiga Leprevost, F., Grüning, B. A., Alves Aflitos, S., Röst, H. L., Uszkoreit, J., Barsnes, H., Vaudel, M., Moreno, P., Gatto, L., Weber, J., Bai, M., Jimenez, R. C., Sachsenberg, T., Pfeuffer, J., Vera Alvarez, R., Griss, J., Nesvizhskii, A. I., & Perez-Riverol, Y. (2017). BioContainers: an open-source and community-driven framework for software standardization. Bioinformatics (Oxford, England), 33(16), 2580–2582. doi: 10.1093/bioinformatics/btx192
',
+ '
Virshup I, Rybakov S, Theis FJ, Angerer P, Wolf FA. bioRxiv 2021.12.16.473007. doi: 10.1101/2021.12.16.473007
Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. doi: 10.1093/bioinformatics/btw354
',
+ '
Allaire J, Teague C, Scheidegger C, Xie Y, Dervieux C. Quarto (2022). doi: 10.5281/zenodo.5960048
',
+ '
Wolf F, Angerer P, Theis F. SCANPY: large-scale single-cell gene expression data analysis. Genome Biol 19, 15 (2018). doi: 10.1186/s13059-017-1382-0
Marconato L, Palla G, Yamauchi K, Virshup I, Heidari E, Treis T, Toth M, Shrestha R, Vöhringer H, Huber W, Gerstung M, Moore J, Theis F, Stegle O. SpatialData: an open and universal data framework for spatial omics. bioRxiv 2023.05.05.539647; doi: 10.1101/2023.05.05.539647
',
+ '
Palla G, Spitzer H, Klein M et al. Squidpy: a scalable framework for spatial omics analysis. Nat Methods 19, 171–178 (2022). doi: 10.1038/s41592-021-01358-2