From da040753bf94b7a29ac8fe6b4e2207e0a3165c0d Mon Sep 17 00:00:00 2001 From: Erik Fasterius Date: Thu, 4 Jan 2024 12:04:52 +0100 Subject: [PATCH 01/14] Clean-up of intro doc --- README.md | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 71689af..f8736dd 100644 --- a/README.md +++ b/README.md @@ -57,8 +57,7 @@ nextflow run nf-core/spatialtranscriptomics \ ``` > [!WARNING] -> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; -> see [docs](https://nf-co.re/usage/configuration#custom-configuration-files). +> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/usage/configuration#custom-configuration-files). For more details and further functionality, please refer to the [usage documentation](https://nf-co.re/spatialtranscriptomics/usage) and the [parameter documentation](https://nf-co.re/spatialtranscriptomics/parameters). @@ -74,18 +73,18 @@ nf-core/spatialtranscriptomics was originally developed by the Jackson Laboratory1, up to the [0.1.0](https://github.com/nf-core/spatialtranscriptomics/releases/tag/0.1.0) tag. It was further developed in a collaboration between the [National Bioinformatics Infrastructure Sweden](https://nbis.se/) and [National Genomics -Infastructure](https://ngisweden.scilifelab.se/) within [SciLifeLab](https://scilifelab.se/); +Infrastructure](https://ngisweden.scilifelab.se/) within [SciLifeLab](https://scilifelab.se/); it is currently developed and maintained by [Erik Fasterius](https://github.com/fasterius) and [Christophe Avenel](https://github.com/cavenel). Many thanks to others who have helped out along the way too, especially [Gregor Sturm](https://github.com/grst)! -1 Supported by grants from the US National Institutes of Health +_1 Supported by grants from the US National Institutes of Health [U24CA224067](https://reporter.nih.gov/project-details/10261367) and [U54AG075941](https://reporter.nih.gov/project-details/10376627). Original authors [Dr. Sergii Domanskyi](https://github.com/sdomanskyi), Prof. Jeffrey -Chuang and Dr. Anuj Srivastava. +Chuang and Dr. Anuj Srivastava._ ## Contributions and Support From 0a075bbbf812f2948e099b76b2e0a8d4adde01c4 Mon Sep 17 00:00:00 2001 From: Erik Fasterius Date: Thu, 4 Jan 2024 12:31:30 +0100 Subject: [PATCH 02/14] Clarify and fix formatting of usage docs --- docs/usage.md | 88 +++++++++++++++++++++++++++------------------------ 1 file changed, 47 insertions(+), 41 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index 8010490..694218b 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -14,13 +14,17 @@ in the examples below and depends on the input data type. Use this parameter to --input '[path to samplesheet file]' ``` -The workflow will automatically detect the samplesheet type and run the appropriate analysis steps. +There are two types of samplesheets that the pipeline can handle: those +specifying _raw data_ (to be analysed by Space Ranger) and _processed data_ +(_i.e._ already analysed by Space Ranger). The workflow will automatically +detect the samplesheet type and run the appropriate analysis steps. The two +types of samplesheet are described in the following sections. ### Raw spatial data -This section describes samplesheets for processing _raw spatial data_ yet to be analyzed with Space Ranger. +This section describes samplesheets for processing _raw spatial data_ yet to be analysed with Space Ranger. -Here is an example of a typical samplesheet for analyzing FFPE or fresh frozen (FF) data with bright field microscopy +Here is an example of a typical samplesheet for analysing FFPE or fresh frozen (FF) data with bright field microscopy imagery: ```no-highlight @@ -46,29 +50,30 @@ SAMPLE_1,fastqs_1/,cytassist_1.tif,V11J26,B1 SAMPLE_2,fastqs_2/,cytassist_2.tif,V11J26,B1 ``` -Depending on the experimental setup, (additional) color composite fluorescence images or dark background +Depending on the experimental setup, (additional) colour composite fluorescence images or dark background fluorescence images can be supplied using the `colorizedimage` or `darkimage` columns, respectively. Please refer to the following table for an overview of all supported columns: -| Column | Description | -| ------------------ | ------------------------------------------------------------------------------------------------------------------- | -| `sample` | Unique sample identifier. MUST match the prefix of the fastq files | -| `fastq_dir` | Path to directory where the sample FASTQ files are stored. May be a `.tar.gz` file instead of a directory. | -| `image` | Brightfield microscopy image | -| `cytaimage` | Brightfield tissue image captured with Cytassist device | -| `colorizedimage` | A color composite of one or more fluorescence image channels saved as a single-page, single-file color TIFF or JPEG | -| `darkimage` | Dark background fluorescence microscopy image | -| `slide` | The Visium slide ID used for the sequencing. | -| `area` | Which slide area contains the tissue sample. | -| `manual_alignment` | Path to the manual alignment file (optional) | -| `slidefile` | Slide specification as JSON. Overrides `slide` and `area` if specified. (optional) | - -> **NB:** -> -> - You need to specify _at least one_ of `image`, `cytaimage`, `darkimage`, `colorizedimage`. Most commonly, you'll -> specify `image` for bright field microscopy data, or `cytaimage` for tissue scans generated with the 10x Cyatassist -> device. Please refer to the [Space Ranger documentation](https://support.10xgenomics.com/spatial-gene-expression/software/pipelines/latest/what-is-space-ranger), how multiple image types can be combined. +| Column | Description | +| ------------------ | --------------------------------------------------------------------------------------------------------------------- | +| `sample` | Unique sample identifier. MUST match the prefix of the fastq files | +| `fastq_dir` | Path to directory where the sample FASTQ files are stored. May be a `.tar.gz` file instead of a directory. | +| `image` | Brightfield microscopy image | +| `cytaimage` | Brightfield tissue image captured with Cytassist device | +| `colorizedimage` | A colour composite of one or more fluorescence image channels saved as a single-page, single-file colour TIFF or JPEG | +| `darkimage` | Dark background fluorescence microscopy image | +| `slide` | The Visium slide ID used for the sequencing. | +| `area` | Which slide area contains the tissue sample. | +| `manual_alignment` | Path to the manual alignment file (optional) | +| `slidefile` | Slide specification as JSON. Overrides `slide` and `area` if specified. (optional) | + +> [!NOTE] +> - You need to specify _at least one_ of `image`, `cytaimage`, `darkimage`, +> `colorizedimage`. Most commonly, you'll specify `image` for bright field +> microscopy data, or `cytaimage` for tissue scans generated with the 10x +> Cyatassist device. Please refer to the [Space Ranger documentation](https://support.10xgenomics.com/spatial-gene-expression/software/pipelines/latest/what-is-space-ranger), +> how multiple image types can be combined. > - The `manual_alignment` column is only required for samples for which a > manual alignment file is needed and can be ignored if you're using automatic > alignment. @@ -80,8 +85,8 @@ appropriate for your samples. ### Processed data -If your data has already been processed by Space Ranger and you are only interested in running downstream QC steps, -the samplesheet looks as follows: +If your data has already been processed by Space Ranger and you are only +interested in running downstream steps, the samplesheet looks as follows: ```no-highlight sample,spaceranger_dir @@ -118,15 +123,15 @@ path to its directory (or another link from the 10X website above) using the `--spaceranger_reference` parameter, otherwise the pipeline will download the default human reference for you automatically. -> **Important**: -> +> [!NOTE] > For FFPE and Cytassist experiments, you need to manually supply the appropriate probset using the `--spaceranger_probeset` parameter > Please refer to the [Spaceranger Downloads page](https://support.10xgenomics.com/spatial-gene-expression/software/downloads/latest) > to obtain the correct probeset. ## Analysis options -The pipeline is using Python and the scverse tools to do the downstream analysis (quality control, filtering, clustering, spatial differential equations). +The pipeline uses Python and the `scverse` tools to do the downstream analysis +(quality control, filtering, clustering, spatial differential equations). ### Parameters for Quality Control and Filtering: @@ -135,7 +140,7 @@ The following parameters are exposed for preprocessing: - `--st_preprocess_min_counts`: Minimum number of counts for a spot to be considered in the analysis. - `--st_preprocess_min_genes`: Minimum number of genes expressed in a spot for the spot to be considered. - `--st_preprocess_min_cells`: Minimum number of spots expressing a gene for the gene to be considered. -- `--st_preprocess_fig_size`: The figure size for the plots generated during preprocessing (e.g., quality control plots). +- `--st_preprocess_fig_size`: The figure size for the plots generated during preprocessing (_e.g._, quality control plots). - `--st_preprocess_hist_qc_max_total_counts`: Maximum total counts for the histogram plot in quality control. - `--st_preprocess_hist_qc_min_gene_counts`: Minimum gene counts for the histogram plot in quality control. - `--st_preprocess_hist_qc_bins`: Number of bins for the histogram plot in quality control. @@ -153,14 +158,14 @@ The following parameters are exposed for preprocessing: The typical command for running the pipeline is as follows: ```bash -# Run the pipeline with raw data yet to be processed by Space Ranger -nextflow run nf-core/spatialtranscriptomics --input samplesheet.csv --outdir -profile docker - -# Run pipeline with data already processed by Space Ranger -nextflow run nf-core/spatialtranscriptomics --input samplesheet.csv --outdir -profile docker +nextflow run \ + nf-core/spatialtranscriptomics \ + --input \ + --outdir \ + -profile docker ``` -This will launch the pipeline with the docker configuration profile. See below for more information about profiles. +This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles. Note that the pipeline will create the following files in your working directory: @@ -188,8 +193,8 @@ nextflow run nf-core/spatialtranscriptomics -profile docker -params-file params. with `params.yaml` containing: ```yaml -input: './samplesheet.csv' -outdir: './results/' +input: '' +outdir: '' <...> ``` @@ -211,7 +216,7 @@ First, go to the [nf-core/spatialtranscriptomics releases page](https://github.c This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. -To further assist in reproducbility, you can use share and re-use [parameter files](#running-the-pipeline) to repeat pipeline runs with the same settings without having to write out a command with every single parameter. +To further assist in reproducibility, you can use share and re-use [parameter files](#running-the-pipeline) to repeat pipeline runs with the same settings without having to write out a command with every single parameter. :::tip If you wish to share such profile (such as upload as supplementary material for academic publications), make sure to NOT include cluster specific paths to files, nor institutional specific profiles. @@ -229,10 +234,11 @@ Use this parameter to choose a configuration profile. Profiles can give configur Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Apptainer, Conda) - see below. +> [!INFO] > We highly recommend the use of Docker or Singularity containers for full -> pipeline reproducibility, however when this is not possible, Conda is also -> supported. Please note that Conda is not at all supported for Space Ranger -> processing, and only supported on non-ARM64 architectures for analyses +> pipeline reproducibility, however when this is not possible, Conda is +> partially supported. Please note that Conda is not at all supported for Space +> Ranger processing, and only supported on non-ARM64 architectures for analyses > downstream of Space Ranger. The pipeline also dynamically loads configurations from [https://github.com/nf-core/configs](https://github.com/nf-core/configs) when it runs, making multiple config profiles for various institutional clusters available at run time. For more information and to see if your system is available in these configs please see the [nf-core/configs documentation](https://github.com/nf-core/configs#documentation). @@ -240,7 +246,7 @@ The pipeline also dynamically loads configurations from [https://github.com/nf-c Note that multiple profiles can be loaded, for example: `-profile test,docker` - the order of arguments is important! They are loaded in sequence, so later profiles can overwrite earlier profiles. -If `-profile` is not specified, the pipeline will run locally and expect all software to be installed and available on the `PATH`. This is _not_ recommended, since it can lead to different results on different machines dependent on the computer enviroment. +If `-profile` is not specified, the pipeline will run locally and expect all software to be installed and available on the `PATH`. This is _not_ recommended, since it can lead to different results on different machines dependent on the computer environment. - `test` - A profile with a complete configuration for automated testing From 7528170276d83f3a70a0a8ec887dfb23f7e46b6f Mon Sep 17 00:00:00 2001 From: Erik Fasterius Date: Thu, 4 Jan 2024 12:48:58 +0100 Subject: [PATCH 03/14] Fix minor formatting of schema params --- nextflow_schema.json | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index b489a54..793553d 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -40,7 +40,7 @@ "format": "file-path", "mimetype": "text/csv", "pattern": "^\\S+\\.csv$", - "description": "Location of Space Ranger probeset file", + "description": "Location of Space Ranger probeset file.", "fa_icon": "fas fa-file-csv" }, "spaceranger_save_reference": { @@ -77,73 +77,73 @@ "st_load_min_counts": { "type": "integer", "default": 1, - "description": "Minimum genes count", + "description": "Minimum genes count.", "fa_icon": "fas fa-hashtag" }, "st_load_min_cells": { "type": "integer", "default": 1, - "description": "Minimum cells count", + "description": "Minimum cells count.", "fa_icon": "fas fa-hashtag" }, "st_preprocess_fig_size": { "type": "integer", "default": 6, - "description": "Figure size, inches", + "description": "Figure size, inches.", "fa_icon": "fas fa-up-right-and-down-left-from-center" }, "st_preprocess_min_counts": { "type": "integer", "default": 500, - "description": "Minimum UMI count", + "description": "Minimum UMI count.", "fa_icon": "fas fa-hashtag" }, "st_preprocess_min_genes": { "type": "integer", "default": 250, - "description": "Minimum genes count", + "description": "Minimum genes count.", "fa_icon": "fas fa-hashtag" }, "st_preprocess_min_cells": { "type": "integer", "default": 1, - "description": "Minimum cells count", + "description": "Minimum cells count.", "fa_icon": "fas fa-hashtag" }, "st_preprocess_hist_qc_max_total_counts": { "type": "integer", "default": 10000, - "description": "Max total counts cutoff for histogram QC plot", + "description": "Max total counts cutoff for histogram QC plot.", "fa_icon": "fas fa-hashtag" }, "st_preprocess_hist_qc_min_gene_counts": { "type": "integer", "default": 4000, - "description": "Min total gene counts cutoff for histogram QC plot", + "description": "Min total gene counts cutoff for histogram QC plot.", "fa_icon": "fas fa-hashtag" }, "st_preprocess_hist_qc_bins": { "type": "integer", "default": 40, - "description": "Histogram QC plot number of bins", + "description": "Histogram QC plot number of bins.", "fa_icon": "fas fa-chart-simple" }, "st_cluster_resolution": { "type": "number", "default": 0.4, - "description": "Clustering resolution for ST spots", + "description": "Clustering resolution for ST spots.", "fa_icon": "fas fa-circle-nodes" }, "st_spatial_de_top_hgv": { "type": "integer", "default": 15, - "description": "Number of top highly variable genes to plot", + "description": "Number of top highly variable genes to plot.", "fa_icon": "fas fa-hashtag" }, "st_spatial_de_ncols": { "type": "integer", "default": 5, - "description": "Number of columns to group genes plots into", + "description": "Number of columns to group genes plots into.", "fa_icon": "fas fa-hashtag" } } From 819041083aa0d7ad2eacdca54a2301146cab3d6b Mon Sep 17 00:00:00 2001 From: Erik Fasterius Date: Thu, 4 Jan 2024 12:49:20 +0100 Subject: [PATCH 04/14] Add additional schema help texts --- nextflow_schema.json | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 793553d..5cab25c 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -19,7 +19,7 @@ "mimetype": "text/csv", "pattern": "^\\S+\\.csv$", "description": "Path to comma-separated file containing information about the samples in the experiment.", - "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/spatialtranscriptomics/usage#samplesheet-input).", + "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline, use this parameter to specify its location. It has to be a comma-separated file with 2 or 5 columns, plus a header row. See [usage docs](https://nf-co.re/spatialtranscriptomics/usage#samplesheet-input).", "fa_icon": "fas fa-file-csv" }, "outdir": { @@ -46,11 +46,13 @@ "spaceranger_save_reference": { "type": "boolean", "description": "Save the extracted tar archive of the Space Ranger reference.", + "help_text": "By default, extracted versions of archived Space Ranger reference data will not be saved to the results directory. Specify this flag (or set to true in your config file) to copy these files to the results directory when complete.", "fa_icon": "fas fa-floppy-disk" }, "save_untar_output": { "type": "boolean", "description": "Save extracted tar archives of input data.", + "help_text": "By default, extracted versions of archived input data will not be saved to the results directory. Specify this flag (or set to true in your config file) to copy these files to the results directory when complete.", "fa_icon": "fas fa-floppy-disk" }, "email": { From cefbabb60c1e1c5abf702bc53fc677b5f75e3c24 Mon Sep 17 00:00:00 2001 From: Erik Fasterius Date: Thu, 4 Jan 2024 13:20:51 +0100 Subject: [PATCH 05/14] Add additional schema categories; add help texts --- nextflow_schema.json | 64 +++++++++++++++++++++++++++++++------------- 1 file changed, 46 insertions(+), 18 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 5cab25c..b1bdb93 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -5,6 +5,7 @@ "description": "Spatial Transcriptomics", "type": "object", "definitions": { + "input_output_options": { "title": "Input/output options", "type": "object", @@ -28,13 +29,27 @@ "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", "fa_icon": "fas fa-folder-open" }, - "spaceranger_reference": { + "email": { "type": "string", - "format": "file-path", - "description": "Location of Space Ranger reference directory. May be packed as `tar.gz` file.", - "fa_icon": "fas fa-folder-open", - "default": "https://cf.10xgenomics.com/supp/spatial-exp/refdata-gex-GRCh38-2020-A.tar.gz" + "description": "Email address for completion summary.", + "fa_icon": "fas fa-envelope", + "help_text": "Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run.", + "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$" }, + "multiqc_title": { + "type": "string", + "description": "MultiQC report title. Printed as page header, used for filename if not otherwise specified.", + "fa_icon": "fas fa-file-signature" + } + } + }, + + "spaceranger_options": { + "title": "Space Ranger options", + "type": "object", + "fa_icon": "fas fa-rocket", + "description": "Options related to Space Ranger execution and raw spatial data processing", + "properties": { "spaceranger_probeset": { "type": "string", "format": "file-path", @@ -43,6 +58,23 @@ "description": "Location of Space Ranger probeset file.", "fa_icon": "fas fa-file-csv" }, + "spaceranger_reference": { + "type": "string", + "format": "file-path", + "description": "Location of Space Ranger reference directory. May be packed as `tar.gz` file.", + "help_text": "Please see the [10x website](https://support.10xgenomics.com/spatial-gene-expression/software/downloads/latest) to download either of the supported human or mouse references. If not specified the GRCh38 human reference is automatically downladed and used.", + "fa_icon": "fas fa-folder-open", + "default": "https://cf.10xgenomics.com/supp/spatial-exp/refdata-gex-GRCh38-2020-A.tar.gz" + } + } + }, + + "optional_outputs": { + "title": "Optional outputs", + "type": "object", + "fa_icon": "fas fa-floppy-disk", + "description": "Additional intermediate output files that can be optionally saved.", + "properties": { "spaceranger_save_reference": { "type": "boolean", "description": "Save the extracted tar archive of the Space Ranger reference.", @@ -54,18 +86,6 @@ "description": "Save extracted tar archives of input data.", "help_text": "By default, extracted versions of archived input data will not be saved to the results directory. Specify this flag (or set to true in your config file) to copy these files to the results directory when complete.", "fa_icon": "fas fa-floppy-disk" - }, - "email": { - "type": "string", - "description": "Email address for completion summary.", - "fa_icon": "fas fa-envelope", - "help_text": "Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run.", - "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$" - }, - "multiqc_title": { - "type": "string", - "description": "MultiQC report title. Printed as page header, used for filename if not otherwise specified.", - "fa_icon": "fas fa-file-signature" } } }, @@ -73,8 +93,8 @@ "analysis_options": { "title": "Analysis option", "type": "object", + "fa_icon": "fas fa-magnifying-glass-chart", "description": "Define options for each tool in the pipeline", - "default": "", "properties": { "st_load_min_counts": { "type": "integer", @@ -199,6 +219,7 @@ } } }, + "max_job_request_options": { "title": "Max job request options", "type": "object", @@ -234,6 +255,7 @@ } } }, + "generic_options": { "title": "Generic options", "type": "object", @@ -350,6 +372,12 @@ { "$ref": "#/definitions/input_output_options" }, + { + "$ref": "#/definitions/spaceranger_options" + }, + { + "$ref": "#/definitions/optional_outputs" + }, { "$ref": "#/definitions/analysis_options" }, From 35b306d17c464c07b7eb9703dfc859e74c75e5d4 Mon Sep 17 00:00:00 2001 From: Erik Fasterius Date: Thu, 4 Jan 2024 13:24:08 +0100 Subject: [PATCH 06/14] Fix formatting with prettier --- docs/usage.md | 1 + nextflow_schema.json | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/usage.md b/docs/usage.md index 694218b..e255a6f 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -69,6 +69,7 @@ Please refer to the following table for an overview of all supported columns: | `slidefile` | Slide specification as JSON. Overrides `slide` and `area` if specified. (optional) | > [!NOTE] +> > - You need to specify _at least one_ of `image`, `cytaimage`, `darkimage`, > `colorizedimage`. Most commonly, you'll specify `image` for bright field > microscopy data, or `cytaimage` for tissue scans generated with the 10x diff --git a/nextflow_schema.json b/nextflow_schema.json index b1bdb93..9e39722 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -5,7 +5,6 @@ "description": "Spatial Transcriptomics", "type": "object", "definitions": { - "input_output_options": { "title": "Input/output options", "type": "object", From 70960c3c2c2dfd845c8236ad53b94add270bf27c Mon Sep 17 00:00:00 2001 From: Erik Fasterius Date: Thu, 4 Jan 2024 16:35:29 +0100 Subject: [PATCH 07/14] Remove unused `st_load` parameters --- conf/analysis.config | 4 ---- nextflow_schema.json | 12 ------------ 2 files changed, 16 deletions(-) diff --git a/conf/analysis.config b/conf/analysis.config index 5e062eb..cc08da4 100644 --- a/conf/analysis.config +++ b/conf/analysis.config @@ -4,10 +4,6 @@ Default config options params { - // Data loading - st_load_min_counts = 1 - st_load_min_cells = 1 - // Preprocessing, QC and normalisation st_preprocess_fig_size = 6 st_preprocess_min_counts = 500 diff --git a/nextflow_schema.json b/nextflow_schema.json index 9e39722..670ad73 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -95,18 +95,6 @@ "fa_icon": "fas fa-magnifying-glass-chart", "description": "Define options for each tool in the pipeline", "properties": { - "st_load_min_counts": { - "type": "integer", - "default": 1, - "description": "Minimum genes count.", - "fa_icon": "fas fa-hashtag" - }, - "st_load_min_cells": { - "type": "integer", - "default": 1, - "description": "Minimum cells count.", - "fa_icon": "fas fa-hashtag" - }, "st_preprocess_fig_size": { "type": "integer", "default": 6, From 2eb7312cb2b5542d7e36f5c29d4f01fc6dbc5315 Mon Sep 17 00:00:00 2001 From: Erik Fasterius Date: Thu, 4 Jan 2024 16:38:09 +0100 Subject: [PATCH 08/14] Pass `st_spatial_de_top_hvg` param to analysis --- conf/analysis.config | 2 +- modules/local/st_spatial_de.nf | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/conf/analysis.config b/conf/analysis.config index cc08da4..5fad12f 100644 --- a/conf/analysis.config +++ b/conf/analysis.config @@ -17,6 +17,6 @@ params { st_cluster_resolution = 1 // Spatial differential expression - st_spatial_de_top_hgv = 15 + st_spatial_de_top_hvg = 15 st_spatial_de_ncols = 5 } diff --git a/modules/local/st_spatial_de.nf b/modules/local/st_spatial_de.nf index 14dbc18..6f0f617 100644 --- a/modules/local/st_spatial_de.nf +++ b/modules/local/st_spatial_de.nf @@ -37,6 +37,7 @@ process ST_SPATIAL_DE { --output "st_spatial_de.html" \ -P fileNameST:${st_adata_norm} \ -P numberOfColumns:${params.st_spatial_de_ncols} \ + -P plotTopHVG:${params.st_spatial_de_top_hvg} \ -P saveDEFileName:st_gde.csv \ -P saveSpatialDEFileName:st_spatial_de.csv From a4fcad2b6613f29d8239711bc98976d6352f1de1 Mon Sep 17 00:00:00 2001 From: Erik Fasterius Date: Thu, 4 Jan 2024 16:38:56 +0100 Subject: [PATCH 09/14] Also use `st_spatial_de_top_hvg` for HVG table --- bin/st_spatial_de.qmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/st_spatial_de.qmd b/bin/st_spatial_de.qmd index f1f3265..d8a407a 100644 --- a/bin/st_spatial_de.qmd +++ b/bin/st_spatial_de.qmd @@ -76,7 +76,7 @@ Then we can inspect significant genes that varies in space and visualize them wi ```{python} results_tab = st_adata.var.sort_values("qval", ascending=True) results_tab.to_csv(saveSpatialDEFileName) -results_tab.head(10) +results_tab.head(plotTopHVG) ``` ```{python} From e8b627a411997d14d244bc0f609fc1d68fb9cbbf Mon Sep 17 00:00:00 2001 From: Erik Fasterius Date: Thu, 4 Jan 2024 17:41:40 +0100 Subject: [PATCH 10/14] Clarify analysis params; add help text --- nextflow_schema.json | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 670ad73..19c226d 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -5,6 +5,7 @@ "description": "Spatial Transcriptomics", "type": "object", "definitions": { + "input_output_options": { "title": "Input/output options", "type": "object", @@ -90,33 +91,33 @@ }, "analysis_options": { - "title": "Analysis option", + "title": "Analysis options", "type": "object", "fa_icon": "fas fa-magnifying-glass-chart", - "description": "Define options for each tool in the pipeline", + "description": "Options related to the downstream analyses performed by the pipeline.", "properties": { "st_preprocess_fig_size": { "type": "integer", "default": 6, - "description": "Figure size, inches.", + "description": "The size of the QC figures, in inches.", "fa_icon": "fas fa-up-right-and-down-left-from-center" }, "st_preprocess_min_counts": { "type": "integer", "default": 500, - "description": "Minimum UMI count.", + "description": "The minimum number of UMIs needed in a spot for that spot to pass the filtering.", "fa_icon": "fas fa-hashtag" }, "st_preprocess_min_genes": { "type": "integer", "default": 250, - "description": "Minimum genes count.", + "description": "The minimum number of expressed genes in a spot needed for that spot to pass the filtering.", "fa_icon": "fas fa-hashtag" }, "st_preprocess_min_cells": { "type": "integer", "default": 1, - "description": "Minimum cells count.", + "description": "The minimum number of spots in which a gene is expressed for that gene to pass the filtering.", "fa_icon": "fas fa-hashtag" }, "st_preprocess_hist_qc_max_total_counts": { @@ -134,25 +135,27 @@ "st_preprocess_hist_qc_bins": { "type": "integer", "default": 40, - "description": "Histogram QC plot number of bins.", + "description": "The number of bins for the QC histogram plots.", "fa_icon": "fas fa-chart-simple" }, "st_cluster_resolution": { "type": "number", "default": 0.4, - "description": "Clustering resolution for ST spots.", + "description": "The resolution for the clustering of the spots.", + "help_text": "The resolution controls the coarseness of the clustering, where a higher resolution leads to more clusters.", "fa_icon": "fas fa-circle-nodes" }, - "st_spatial_de_top_hgv": { + "st_spatial_de_top_hvg": { "type": "integer", "default": 15, - "description": "Number of top highly variable genes to plot.", + "description": "The number of top spatially highly variable genes to plot.", "fa_icon": "fas fa-hashtag" }, "st_spatial_de_ncols": { "type": "integer", "default": 5, - "description": "Number of columns to group genes plots into.", + "description": "Number of columns to group gene plots into.", + "help_text": "The default, 5, will plot the top spatially highly variable genes into groups of 5 plots per row. This, in combinationation with the default number of top HVGs to plot (15) will yield three rows with 5 plots each.", "fa_icon": "fas fa-hashtag" } } From 841dcfd2c6c21fea9b547ae29f550d84e3d065cf Mon Sep 17 00:00:00 2001 From: Erik Fasterius Date: Thu, 4 Jan 2024 18:04:31 +0100 Subject: [PATCH 11/14] Move analysis params to `nextflow.config` --- conf/analysis.config | 22 ---------------------- nextflow.config | 19 ++++++++++++++++--- 2 files changed, 16 insertions(+), 25 deletions(-) delete mode 100644 conf/analysis.config diff --git a/conf/analysis.config b/conf/analysis.config deleted file mode 100644 index 5fad12f..0000000 --- a/conf/analysis.config +++ /dev/null @@ -1,22 +0,0 @@ -/* -Default config options -*/ - -params { - - // Preprocessing, QC and normalisation - st_preprocess_fig_size = 6 - st_preprocess_min_counts = 500 - st_preprocess_min_genes = 250 - st_preprocess_min_cells = 1 - st_preprocess_hist_qc_max_total_counts = 10000 - st_preprocess_hist_qc_min_gene_counts = 4000 - st_preprocess_hist_qc_bins = 40 - - // Clustering - st_cluster_resolution = 1 - - // Spatial differential expression - st_spatial_de_top_hvg = 15 - st_spatial_de_ncols = 5 -} diff --git a/nextflow.config b/nextflow.config index a801be0..e351f8b 100644 --- a/nextflow.config +++ b/nextflow.config @@ -17,6 +17,22 @@ params { spaceranger_probeset = null spaceranger_save_reference = false + // Preprocessing, QC and normalisation + st_preprocess_fig_size = 6 + st_preprocess_min_counts = 500 + st_preprocess_min_genes = 250 + st_preprocess_min_cells = 1 + st_preprocess_hist_qc_max_total_counts = 10000 + st_preprocess_hist_qc_min_gene_counts = 4000 + st_preprocess_hist_qc_bins = 40 + + // Clustering + st_cluster_resolution = 1 + + // Spatial differential expression + st_spatial_de_top_hvg = 15 + st_spatial_de_ncols = 5 + // MultiQC options multiqc_config = null multiqc_title = null @@ -64,9 +80,6 @@ params { // Load base.config by default for all pipelines includeConfig 'conf/base.config' -// Default analysis parameters -includeConfig 'conf/analysis.config' - // Load nf-core custom profiles from different Institutions try { includeConfig "${params.custom_config_base}/nfcore_custom.config" From 06ee063749b319b6dd3112f3c479335594e6f1a5 Mon Sep 17 00:00:00 2001 From: Erik Fasterius Date: Thu, 4 Jan 2024 18:06:13 +0100 Subject: [PATCH 12/14] Fix prettier formatting --- nextflow_schema.json | 1 - 1 file changed, 1 deletion(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 19c226d..31648bf 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -5,7 +5,6 @@ "description": "Spatial Transcriptomics", "type": "object", "definitions": { - "input_output_options": { "title": "Input/output options", "type": "object", From 6fcdce5cb81212b4e7156a1b96a9061bd305211d Mon Sep 17 00:00:00 2001 From: Erik Fasterius Date: Thu, 4 Jan 2024 18:10:12 +0100 Subject: [PATCH 13/14] Update `fastqc` module --- modules.json | 2 +- modules/nf-core/fastqc/tests/main.nf.test | 203 ++++++++++++++++-- .../nf-core/fastqc/tests/main.nf.test.snap | 12 +- 3 files changed, 203 insertions(+), 14 deletions(-) diff --git a/modules.json b/modules.json index b06cceb..922f593 100644 --- a/modules.json +++ b/modules.json @@ -12,7 +12,7 @@ }, "fastqc": { "branch": "master", - "git_sha": "65ad3e0b9a4099592e1102e92e10455dc661cf53", + "git_sha": "617777a807a1770f73deb38c80004bac06807eef", "installed_by": ["modules"] }, "multiqc": { diff --git a/modules/nf-core/fastqc/tests/main.nf.test b/modules/nf-core/fastqc/tests/main.nf.test index 6437a14..ad9bc54 100644 --- a/modules/nf-core/fastqc/tests/main.nf.test +++ b/modules/nf-core/fastqc/tests/main.nf.test @@ -3,23 +3,21 @@ nextflow_process { name "Test Process FASTQC" script "../main.nf" process "FASTQC" + tag "modules" tag "modules_nfcore" tag "fastqc" - test("Single-Read") { + test("sarscov2 single-end [fastq]") { when { - params { - outdir = "$outputDir" - } process { """ input[0] = [ - [ id: 'test', single_end:true ], - [ - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) - ] + [ id: 'test', single_end:true ], + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] ] """ } @@ -28,14 +26,195 @@ nextflow_process { then { assertAll ( { assert process.success }, + // NOTE The report contains the date inside it, which means that the md5sum is stable per day, but not longer than that. So you can't md5sum it. // looks like this:
Mon 2 Oct 2023
test.gz
// https://github.com/nf-core/modules/pull/3903#issuecomment-1743620039 - { assert process.out.html.get(0).get(1) ==~ ".*/test_fastqc.html" }, - { assert path(process.out.html.get(0).get(1)).getText().contains("File typeConventional base calls") }, - { assert snapshot(process.out.versions).match("versions") }, - { assert process.out.zip.get(0).get(1) ==~ ".*/test_fastqc.zip" } + + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, + + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("sarscov2 paired-end [fastq]") { + + when { + process { + """ + input[0] = [ + [id: 'test', single_end: false], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" }, + { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" }, + { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" }, + { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" }, + { assert path(process.out.html[0][1][0]).text.contains("File typeConventional base calls") }, + { assert path(process.out.html[0][1][1]).text.contains("File typeConventional base calls") }, + + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("sarscov2 interleaved [fastq]") { + + when { + process { + """ + input[0] = [ + [id: 'test', single_end: false], // meta map + file(params.test_data['sarscov2']['illumina']['test_interleaved_fastq_gz'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, + + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("sarscov2 paired-end [bam]") { + + when { + process { + """ + input[0] = [ + [id: 'test', single_end: false], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, + + { assert snapshot(process.out.versions).match("versions") } ) } } + + test("sarscov2 multiple [fastq]") { + + when { + process { + """ + input[0] = [ + [id: 'test', single_end: false], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test2_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test2_2_fastq_gz'], checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" }, + { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" }, + { assert process.out.html[0][1][2] ==~ ".*/test_3_fastqc.html" }, + { assert process.out.html[0][1][3] ==~ ".*/test_4_fastqc.html" }, + { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" }, + { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" }, + { assert process.out.zip[0][1][2] ==~ ".*/test_3_fastqc.zip" }, + { assert process.out.zip[0][1][3] ==~ ".*/test_4_fastqc.zip" }, + { assert path(process.out.html[0][1][0]).text.contains("File typeConventional base calls") }, + { assert path(process.out.html[0][1][1]).text.contains("File typeConventional base calls") }, + { assert path(process.out.html[0][1][2]).text.contains("File typeConventional base calls") }, + { assert path(process.out.html[0][1][3]).text.contains("File typeConventional base calls") }, + + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("sarscov2 custom_prefix") { + + when { + process { + """ + input[0] = [ + [ id:'mysample', single_end:true ], // meta map + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1] ==~ ".*/mysample_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/mysample_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, + + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("sarscov2 single-end [fastq] - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id: 'test', single_end:true ], + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.html.collect { file(it[1]).getName() } + + process.out.zip.collect { file(it[1]).getName() } + + process.out.versions ).match() } + ) + } + } + } diff --git a/modules/nf-core/fastqc/tests/main.nf.test.snap b/modules/nf-core/fastqc/tests/main.nf.test.snap index 636a32c..5ef5afb 100644 --- a/modules/nf-core/fastqc/tests/main.nf.test.snap +++ b/modules/nf-core/fastqc/tests/main.nf.test.snap @@ -1,10 +1,20 @@ { + "sarscov2 single-end [fastq] - stub": { + "content": [ + [ + "test.html", + "test.zip", + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "timestamp": "2023-12-29T02:48:05.126117287" + }, "versions": { "content": [ [ "versions.yml:md5,e1cc25ca8af856014824abd842e93978" ] ], - "timestamp": "2023-10-09T23:40:54+0000" + "timestamp": "2023-12-29T02:46:49.507942667" } } \ No newline at end of file From 0fe89c8c0ca4163827bac7b077a691cf239a3212 Mon Sep 17 00:00:00 2001 From: Erik Fasterius Date: Thu, 4 Jan 2024 18:10:40 +0100 Subject: [PATCH 14/14] Update `multiqc` module --- modules.json | 2 +- modules/nf-core/multiqc/main.nf | 2 +- modules/nf-core/multiqc/meta.yml | 1 - modules/nf-core/multiqc/tests/main.nf.test | 48 +++++++++++++------ .../nf-core/multiqc/tests/main.nf.test.snap | 21 ++++++++ 5 files changed, 57 insertions(+), 17 deletions(-) create mode 100644 modules/nf-core/multiqc/tests/main.nf.test.snap diff --git a/modules.json b/modules.json index 922f593..ad78221 100644 --- a/modules.json +++ b/modules.json @@ -17,7 +17,7 @@ }, "multiqc": { "branch": "master", - "git_sha": "4ab13872435962dadc239979554d13709e20bf29", + "git_sha": "642a0d8afe373ac45244a7947fb8a6c0a5a312d4", "installed_by": ["modules"] }, "spaceranger/count": { diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index 00cc48d..70708f3 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -43,7 +43,7 @@ process MULTIQC { stub: """ - touch multiqc_data + mkdir multiqc_data touch multiqc_plots touch multiqc_report.html diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml index f1aa660..45a9bc3 100644 --- a/modules/nf-core/multiqc/meta.yml +++ b/modules/nf-core/multiqc/meta.yml @@ -1,4 +1,3 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: multiqc description: Aggregate results from bioinformatics analyses across many samples into a single report keywords: diff --git a/modules/nf-core/multiqc/tests/main.nf.test b/modules/nf-core/multiqc/tests/main.nf.test index c2dad21..d0438ed 100644 --- a/modules/nf-core/multiqc/tests/main.nf.test +++ b/modules/nf-core/multiqc/tests/main.nf.test @@ -7,12 +7,9 @@ nextflow_process { tag "modules_nfcore" tag "multiqc" - test("MULTIQC: FASTQC") { + test("sarscov2 single-end [fastqc]") { when { - params { - outdir = "$outputDir" - } process { """ input[0] = Channel.of([file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz_fastqc_zip'], checkIfExists: true)]) @@ -26,20 +23,17 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert path(process.out.report.get(0)).exists() }, - { assert path(process.out.data.get(0)).exists() }, - { assert path(process.out.versions.get(0)).getText().contains("multiqc") } + { assert process.out.report[0] ==~ ".*/multiqc_report.html" }, + { assert process.out.data[0] ==~ ".*/multiqc_data" }, + { assert snapshot(process.out.versions).match("versions") } ) } } - test("MULTIQC: FASTQC and a config file") { + test("sarscov2 single-end [fastqc] [config]") { when { - params { - outdir = "$outputDir" - } process { """ input[0] = Channel.of([file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz_fastqc_zip'], checkIfExists: true)]) @@ -53,9 +47,35 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert path(process.out.report.get(0)).exists() }, - { assert path(process.out.data.get(0)).exists() }, - { assert path(process.out.versions.get(0)).getText().contains("multiqc") } + { assert process.out.report[0] ==~ ".*/multiqc_report.html" }, + { assert process.out.data[0] ==~ ".*/multiqc_data" }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("sarscov2 single-end [fastqc] - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz_fastqc_zip'], checkIfExists: true)]) + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.report.collect { file(it).getName() } + + process.out.data.collect { file(it).getName() } + + process.out.plots.collect { file(it).getName() } + + process.out.versions ).match() } ) } diff --git a/modules/nf-core/multiqc/tests/main.nf.test.snap b/modules/nf-core/multiqc/tests/main.nf.test.snap new file mode 100644 index 0000000..d087a9d --- /dev/null +++ b/modules/nf-core/multiqc/tests/main.nf.test.snap @@ -0,0 +1,21 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,f81e19ab3a8e2b6f2b5d22078117df71" + ] + ], + "timestamp": "2023-12-30T00:26:14.048089591" + }, + "sarscov2 single-end [fastqc] - stub": { + "content": [ + [ + "multiqc_report.html", + "multiqc_data", + "multiqc_plots", + "versions.yml:md5,f81e19ab3a8e2b6f2b5d22078117df71" + ] + ], + "timestamp": "2023-12-30T00:26:52.963964055" + } +} \ No newline at end of file