diff --git a/CHANGELOG.md b/CHANGELOG.md
index 98d5b04d..bbf061d7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- [#417](https://github.com/nf-core/taxprofiler/pull/417) - Added reference-free metagenome estimation with Nonpareil (added by @jfy133)
- [#466](https://github.com/nf-core/taxprofiler/pull/466) - Input database sheets now require a `db_type` column to distinguish between short- and long-read databases (added by @LilyAnderssonLee)
- [#505](https://github.com/nf-core/taxprofiler/pull/505) - Add small files to the file `tower.yml` (added by @LilyAnderssonLee)
+- [#508](https://github.com/nf-core/taxprofiler/pull/508) - Add `nanoq` as a filtering tool for nanopore reads (added by @LilyAnderssonLee)
### `Fixed`
diff --git a/CITATIONS.md b/CITATIONS.md
index a4788d69..36456b25 100644
--- a/CITATIONS.md
+++ b/CITATIONS.md
@@ -42,6 +42,10 @@
> Wick R (2021) Filtlong, URL: https://github.com/rrwick/Filtlong
+- [nanoq](https://github.com/esteinig/nanoq)
+
+ > Steinig, E., & Coin, L. (2022). Nanoq: ultra-fast quality control for nanopore reads. Journal of Open Source Software, 7(69). https://doi.org/10.21105/joss.02991
+
- [BBTools](http://sourceforge.net/projects/bbmap/)
> Bushnell B. (2022) BBMap, URL: http://sourceforge.net/projects/bbmap/
diff --git a/README.md b/README.md
index 2db2c603..d87c5f63 100644
--- a/README.md
+++ b/README.md
@@ -30,7 +30,7 @@
1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) or [`falco`](https://github.com/smithlabcode/falco) as an alternative option)
2. Performs optional read pre-processing
- Adapter clipping and merging (short-read: [fastp](https://github.com/OpenGene/fastp), [AdapterRemoval2](https://github.com/MikkelSchubert/adapterremoval); long-read: [porechop](https://github.com/rrwick/Porechop))
- - Low complexity and quality filtering (short-read: [bbduk](https://jgi.doe.gov/data-and-tools/software-tools/bbtools/), [PRINSEQ++](https://github.com/Adrian-Cantu/PRINSEQ-plus-plus); long-read: [Filtlong](https://github.com/rrwick/Filtlong))
+ - Low complexity and quality filtering (short-read: [bbduk](https://jgi.doe.gov/data-and-tools/software-tools/bbtools/), [PRINSEQ++](https://github.com/Adrian-Cantu/PRINSEQ-plus-plus); long-read: [Filtlong](https://github.com/rrwick/Filtlong)), [Nanoq](https://github.com/esteinig/nanoq)
- Host-read removal (short-read: [BowTie2](http://bowtie-bio.sourceforge.net/bowtie2/); long-read: [Minimap2](https://github.com/lh3/minimap2))
- Run merging
3. Supports statistics metagenome coverage estimation ([Nonpareil](https://nonpareil.readthedocs.io/en/latest/)) and for host-read removal ([Samtools](http://www.htslib.org/))
diff --git a/conf/modules.config b/conf/modules.config
index dbd4a227..22e9fd0a 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -294,6 +294,36 @@ process {
]
}
+ withName: NANOQ {
+ ext.args = [
+ "-vv",
+ "--min-len ${params.longread_qc_qualityfilter_minlength}",
+ "--min-qual ${params.longread_qc_qualityfilter_minquality}"
+ ]
+ .join(' ').trim()
+ ext.prefix = { "${meta.id}_${meta.run_accession}_filtered" }
+ publishDir = [
+ [
+ path: { "${params.outdir}/nanoq" },
+ mode: params.publish_dir_mode,
+ pattern: '*.fastq.gz',
+ enabled: params.save_preprocessed_reads
+ ],
+ [
+ path: { "${params.outdir}/nanoq" },
+ mode: params.publish_dir_mode,
+ pattern: '*.stats'
+ ],
+ [
+ path: { "${params.outdir}/analysis_ready_fastqs" },
+ mode: params.publish_dir_mode,
+ pattern: '*.fastq.gz',
+ enabled: params.save_analysis_ready_fastqs,
+ saveAs: { ( params.perform_runmerging == false || ( params.perform_runmerging && !meta.is_multirun ) ) && !params.perform_longread_hostremoval && !params.longread_qc_skipqualityfilter && params.perform_longread_qc && params.save_analysis_ready_fastqs ? it : null }
+ ]
+ ]
+ }
+
withName: BBMAP_BBDUK {
ext.args = [
"entropy=${params.shortread_complexityfilter_entropy}",
diff --git a/docs/output.md b/docs/output.md
index edc90bd2..6cecb839 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -20,6 +20,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
- [BBDuk](#bbduk) - Quality trimming and filtering for Illumina data
- [PRINSEQ++](#prinseq) - Quality trimming and filtering for Illunina data
- [Filtlong](#filtlong) - Quality trimming and filtering for Nanopore data
+- [Nanoq] (#nanoq) - Quality trimming and filtering for Nanopore data
- [Bowtie2](#bowtie2) - Host removal for Illumina reads
- [minimap2](#minimap2) - Host removal for Nanopore reads
- [SAMtools stats](#samtools-stats) - Statistics from host removal
@@ -238,6 +239,21 @@ You will only find the `.fastq` files in the results directory if you provide `
We do _not_ recommend using Filtlong if you are performing filtering of low quality reads with ONT's basecaller Guppy.
:::
+### Nanoq
+
+[nanoq](https://github.com/esteinig/nanoq) is an ultra-fast quality filtering tool that also provides summary reports for nanopore reads.
+
+
+Output files
+
+- `nanoq/`
+ - `_filtered.fastq.gz`: Quality or long read data filtered file
+ - `_filtered.stats`: Summary statistics report
+
+
+
+You will only find the `.fastq` files in the results directory if you provide ` --save_preprocessed_reads`. Alternatively, if you wish only to have the 'final' reads that go into classification/profiling (i.e., that may have additional processing), do not specify this flag but rather specify `--save_analysis_ready_reads`, in which case the reads will be in the folder `analysis_ready_reads`.
+
### Bowtie2
[Bowtie 2](https://bowtie-bio.sourceforge.net/bowtie2/index.shtml) is an ultrafast and memory-efficient tool for aligning sequencing reads to long reference sequences. It is particularly good at aligning reads of about 50 up to 100s or 1,000s of characters, and particularly good at aligning to relatively long (e.g. mammalian) genomes.
diff --git a/docs/usage.md b/docs/usage.md
index 370b8e66..4f1939e3 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -299,7 +299,7 @@ Complexity filtering is primarily a run-time optimisation step. It is not necess
There are currently three options for short-read complexity filtering: [`bbduk`](https://jgi.doe.gov/data-and-tools/software-tools/bbtools/bb-tools-user-guide/bbduk-guide/), [`prinseq++`](https://github.com/Adrian-Cantu/PRINSEQ-plus-plus), and [`fastp`](https://github.com/OpenGene/fastp#low-complexity-filter).
-There is one option for long-read quality filtering: [`Filtlong`](https://github.com/rrwick/Filtlong)
+There are two options for long-read quality filtering: [`Filtlong`](https://github.com/rrwick/Filtlong) and [`nanoq`](https://github.com/esteinig/nanoq).
The tools offer different algorithms and parameters for removing low complexity reads and quality filtering. We therefore recommend reviewing the pipeline's [parameter documentation](https://nf-co.re/taxprofiler/parameters) and the documentation of the tools (see links above) to decide on optimal methods and parameters for your dataset.
diff --git a/modules.json b/modules.json
index 8c612479..d72d4e3d 100644
--- a/modules.json
+++ b/modules.json
@@ -195,6 +195,11 @@
"git_sha": "b7ebe95761cd389603f9cc0e0dc384c0f663815a",
"installed_by": ["modules"]
},
+ "nanoq": {
+ "branch": "master",
+ "git_sha": "cf05b61191f5df35cbbf33d47bbf8f22ca0ae0ab",
+ "installed_by": ["modules"]
+ },
"nonpareil/curve": {
"branch": "master",
"git_sha": "729335dda8ba226323edc54dec80ae959079207e",
diff --git a/modules/nf-core/nanoq/environment.yml b/modules/nf-core/nanoq/environment.yml
new file mode 100644
index 00000000..41448d5b
--- /dev/null
+++ b/modules/nf-core/nanoq/environment.yml
@@ -0,0 +1,9 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+name: "nanoq"
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - "bioconda::nanoq=0.10.0"
diff --git a/modules/nf-core/nanoq/main.nf b/modules/nf-core/nanoq/main.nf
new file mode 100644
index 00000000..6d35a407
--- /dev/null
+++ b/modules/nf-core/nanoq/main.nf
@@ -0,0 +1,49 @@
+process NANOQ {
+ tag "$meta.id"
+ label 'process_low'
+
+ conda "${moduleDir}/environment.yml"
+ container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/nanoq:0.10.0--h031d066_2' :
+ 'biocontainers/nanoq:0.10.0--h031d066_2'}"
+
+ input:
+ tuple val(meta), path(ontreads)
+ val(output_format) //One of the following: fastq, fastq.gz, fastq.bz2, fastq.lzma, fasta, fasta.gz, fasta.bz2, fasta.lzma.
+
+ output:
+ tuple val(meta), path("*.{stats,json}") , emit: stats
+ tuple val(meta), path("*_filtered.${output_format}") , emit: reads
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}_filtered"
+ """
+ nanoq -i $ontreads \\
+ ${args} \\
+ -r ${prefix}.stats \\
+ -o ${prefix}.$output_format
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ nanoq: \$(nanoq --version | sed -e 's/nanoq //g')
+ END_VERSIONS
+ """
+
+ stub:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}_filtered"
+ """
+ echo "" | gzip > ${prefix}.$output_format
+ touch ${prefix}.stats
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ nanoq: \$(nanoq --version | sed -e 's/nanoq //g')
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/nanoq/meta.yml b/modules/nf-core/nanoq/meta.yml
new file mode 100644
index 00000000..85c0c978
--- /dev/null
+++ b/modules/nf-core/nanoq/meta.yml
@@ -0,0 +1,60 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
+name: "nanoq"
+description: Nanoq implements ultra-fast read filters and summary reports for high-throughput nanopore reads.
+keywords:
+ - nanoq
+ - Read filters
+ - Read trimming
+ - Read report
+tools:
+ - "nanoq":
+ description: "Ultra-fast quality control and summary reports for nanopore reads"
+ homepage: "https://github.com/esteinig/nanoq"
+ documentation: "https://github.com/esteinig/nanoq"
+ tool_dev_url: "https://github.com/esteinig/nanoq"
+ doi: "10.21105/joss.02991"
+ licence: ["MIT"]
+
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. `[ id:'sample1', single_end:false ]`
+
+ - ontreads:
+ type: file
+ description: Compressed or uncompressed nanopore reads in fasta or fastq formats.
+ pattern: "*.{fa,fna,faa,fasta,fq,fastq}{,.gz,.bz2,.xz}"
+
+ - output_format:
+ type: string
+ description: "Specifies the output format. One of these formats: fasta, fastq; fasta.gz, fastq.gz; fasta.bz2, fastq.bz2; fasta.lzma, fastq.lzma."
+
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. `[ id:'sample1', single_end:false ]`
+
+ - stats:
+ type: file
+ description: Summary report of reads statistics.
+ pattern: "*.{stats,json}"
+
+ - reads:
+ type: file
+ description: Filtered reads.
+ pattern: "*.{fasta,fastq}{,.gz,.bz2,.lzma}"
+
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+
+authors:
+ - "@LilyAnderssonLee"
+maintainers:
+ - "@LilyAnderssonLee"
diff --git a/modules/nf-core/nanoq/tests/main.nf.test b/modules/nf-core/nanoq/tests/main.nf.test
new file mode 100644
index 00000000..1dfbae33
--- /dev/null
+++ b/modules/nf-core/nanoq/tests/main.nf.test
@@ -0,0 +1,122 @@
+nextflow_process {
+
+ name "Test Process NANOQ"
+ script "../main.nf"
+ process "NANOQ"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "nanoq"
+
+ test("sarscov2 - nanopore_uncompressed") {
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:true ], // meta map
+ file(params.test_data['sarscov2']['nanopore']['test_fastq_gz'], checkIfExists: true)
+ ]
+
+ input[1] = 'fastq'
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+ }
+
+ test("sarscov2 - nanopore_compressed_gz") {
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:true ], // meta map
+ file(params.test_data['sarscov2']['nanopore']['test_fastq_gz'], checkIfExists: true)
+ ]
+ input[1] = 'fastq.gz'
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+ }
+ test("sarscov2 - nanopore_compressed_bz2") {
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:true ], // meta map
+ file(params.test_data['sarscov2']['nanopore']['test_fastq_gz'], checkIfExists: true)
+ ]
+ input[1] = 'fastq.bz2'
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+ }
+ test("sarscov2 - nanopore_compressed_lzma") {
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:true ], // meta map
+ file(params.test_data['sarscov2']['nanopore']['test_fastq_gz'], checkIfExists: true)
+ ]
+ input[1] = 'fastq.lzma'
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+ }
+
+ test("sarscov2 - nanopore_compressed_gz - stub") {
+
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:true ], // meta map
+ file(params.test_data['sarscov2']['nanopore']['test_fastq_gz'], checkIfExists: true)
+ ]
+ input[1] = 'fastq.gz'
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+}
diff --git a/modules/nf-core/nanoq/tests/main.nf.test.snap b/modules/nf-core/nanoq/tests/main.nf.test.snap
new file mode 100644
index 00000000..b5dda2a7
--- /dev/null
+++ b/modules/nf-core/nanoq/tests/main.nf.test.snap
@@ -0,0 +1,267 @@
+{
+ "sarscov2 - nanopore_compressed_gz": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test_filtered.stats:md5,5ab32af3352dfeca8268e10edf6e4dbe"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test_filtered.fastq.gz:md5,7567d853ada6ac142332619d0b541d76"
+ ]
+ ],
+ "2": [
+ "versions.yml:md5,7a40efe417ff7dbb9e91e9c1629a04e6"
+ ],
+ "reads": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test_filtered.fastq.gz:md5,7567d853ada6ac142332619d0b541d76"
+ ]
+ ],
+ "stats": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test_filtered.stats:md5,5ab32af3352dfeca8268e10edf6e4dbe"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,7a40efe417ff7dbb9e91e9c1629a04e6"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.04.1"
+ },
+ "timestamp": "2024-07-11T11:39:32.117229"
+ },
+ "sarscov2 - nanopore_compressed_gz - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test_filtered.stats:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test_filtered.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "2": [
+ "versions.yml:md5,7a40efe417ff7dbb9e91e9c1629a04e6"
+ ],
+ "reads": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test_filtered.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "stats": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test_filtered.stats:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,7a40efe417ff7dbb9e91e9c1629a04e6"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.04.1"
+ },
+ "timestamp": "2024-07-11T11:42:06.039307"
+ },
+ "sarscov2 - nanopore_compressed_bz2": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test_filtered.stats:md5,5ab32af3352dfeca8268e10edf6e4dbe"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test_filtered.fastq.bz2:md5,b53cf14fd4eb5b16c459c41f03cc8a4b"
+ ]
+ ],
+ "2": [
+ "versions.yml:md5,7a40efe417ff7dbb9e91e9c1629a04e6"
+ ],
+ "reads": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test_filtered.fastq.bz2:md5,b53cf14fd4eb5b16c459c41f03cc8a4b"
+ ]
+ ],
+ "stats": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test_filtered.stats:md5,5ab32af3352dfeca8268e10edf6e4dbe"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,7a40efe417ff7dbb9e91e9c1629a04e6"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.04.1"
+ },
+ "timestamp": "2024-07-11T11:39:36.674647"
+ },
+ "sarscov2 - nanopore_compressed_lzma": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test_filtered.stats:md5,5ab32af3352dfeca8268e10edf6e4dbe"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test_filtered.fastq.lzma:md5,65dda701689f913734dc245b68c89e07"
+ ]
+ ],
+ "2": [
+ "versions.yml:md5,7a40efe417ff7dbb9e91e9c1629a04e6"
+ ],
+ "reads": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test_filtered.fastq.lzma:md5,65dda701689f913734dc245b68c89e07"
+ ]
+ ],
+ "stats": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test_filtered.stats:md5,5ab32af3352dfeca8268e10edf6e4dbe"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,7a40efe417ff7dbb9e91e9c1629a04e6"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.04.1"
+ },
+ "timestamp": "2024-07-11T11:39:41.51344"
+ },
+ "sarscov2 - nanopore_uncompressed": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test_filtered.stats:md5,5ab32af3352dfeca8268e10edf6e4dbe"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test_filtered.fastq:md5,7567d853ada6ac142332619d0b541d76"
+ ]
+ ],
+ "2": [
+ "versions.yml:md5,7a40efe417ff7dbb9e91e9c1629a04e6"
+ ],
+ "reads": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test_filtered.fastq:md5,7567d853ada6ac142332619d0b541d76"
+ ]
+ ],
+ "stats": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test_filtered.stats:md5,5ab32af3352dfeca8268e10edf6e4dbe"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,7a40efe417ff7dbb9e91e9c1629a04e6"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.04.1"
+ },
+ "timestamp": "2024-07-11T11:39:26.868897"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/nanoq/tests/tags.yml b/modules/nf-core/nanoq/tests/tags.yml
new file mode 100644
index 00000000..37457df1
--- /dev/null
+++ b/modules/nf-core/nanoq/tests/tags.yml
@@ -0,0 +1,2 @@
+nanoq:
+ - "modules/nf-core/nanoq/**"
diff --git a/nextflow.config b/nextflow.config
index ae255fdd..3903aa4c 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -79,10 +79,13 @@ params {
shortread_qc_dedup = false
perform_longread_qc = false
+ longread_adapterremoval_tool = 'porechop'
longread_qc_skipadaptertrim = false
longread_qc_skipqualityfilter = false
+ longread_filter_tool = 'nanoq'
longread_qc_qualityfilter_minlength = 1000
longread_qc_qualityfilter_keeppercent = 90
+ longread_qc_qualityfilter_minquality = 7
longread_qc_qualityfilter_targetbases = 500000000
save_preprocessed_reads = false
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 810629f4..e2c13e7c 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -84,7 +84,7 @@
"type": "boolean",
"fa_icon": "fas fa-save",
"description": "Save reads from samples that went through the adapter clipping, pair-merging, and length filtering steps for both short and long reads",
- "help_text": "This saves the FASTQ output from the following tools:\n\n- fastp\n- AdapterRemoval\n- Porechop\n- Filtlong\n\nThese reads will be a mixture of: adapter clipped, quality trimmed, pair-merged, and length filtered, depending on the parameters you set."
+ "help_text": "This saves the FASTQ output from the following tools:\n\n- fastp\n- AdapterRemoval\n- Porechop\n- Filtlong\n- Nanoq\n\nThese reads will be a mixture of: adapter clipped, quality trimmed, pair-merged, and length filtered, depending on the parameters you set."
},
"save_analysis_ready_fastqs": {
"type": "boolean",
@@ -238,24 +238,39 @@
"description": "Turns on long read quality control steps (adapter clipping, length filtering etc.)",
"help_text": "Turns on long read quality control steps (adapter clipping, length and/or quality filtering.)\n\nRemoving adapters (if present) is recommend to reduce false-postive hits that may occur from 'dirty' or 'contaminated' reference genomes in a profiling database that contain accidentially incorporated adapter sequences.\n\nLength filtering, and quality filtering can speed up alignment by reducing the number of unspecific reads that need to be aligned."
},
+ "longread_adapterremoval_tool": {
+ "type": "string",
+ "default": "porechop",
+ "enum": ["porechop"],
+ "fa_icon": "fas fa-hammer",
+ "description": "Specify which tool to use for adapter trimming. Currently only one option."
+ },
"longread_qc_skipadaptertrim": {
"type": "boolean",
"description": "Skip long-read trimming",
"fa_icon": "fas fa-forward",
"help_text": "Skip removal of adapters by Porechop. This can be useful in some cases to speed up run time - particularly when you are running data downloading from public databases such as the ENA/SRA that should already have adapters removed. We recommend that you check your FastQC results this is indeed the case."
},
+ "longread_filter_tool": {
+ "type": "string",
+ "default": "nanoq",
+ "enum": ["filtlong", "nanoq"],
+ "fa_icon": "fas fa-hammer",
+ "description": "Specify which tool to use for long reads filtering",
+ "help_text": "Nanoq is a filtering tool only for Nanopore reads. Nanoq is faster and more memory-efficient than Filtlong. Nanoq also provides a summary of input read statistics; see [benchmarking](https://github.com/esteinig/nanoq?tab=readme-ov-file#benchmarks). \n\nFiltlong is a good option if you want to keep a certain percentage of reads after filtering, and you can also use it for non-Nanopore long reads."
+ },
"longread_qc_skipqualityfilter": {
"type": "boolean",
"description": "Skip long-read length and quality filtering",
"fa_icon": "fas fa-forward",
- "help_text": "Skip removal of quality filtering with Filtlong. This will skip length, percent reads, and target bases filtering (see other `--longread_qc_qualityfilter_*` parameters)."
+ "help_text": "Skip removal of quality filtering with Filtlong or Nanoq. This will skip length, percent reads, and target bases filtering (see other `--longread_qc_qualityfilter_*` parameters)."
},
"longread_qc_qualityfilter_minlength": {
"type": "integer",
"default": 1000,
"description": "Specify the minimum length of reads to be retained",
"fa_icon": "fas fa-ruler-horizontal",
- "help_text": "Specify the minimum of length of reads to be kept for downstream analysis.\n\n> Modifies tool parameter(s):\n> - Filtlong: `--min_length`"
+ "help_text": "Specify the minimum of length of reads to be kept for downstream analysis.\n\n> Modifies tool parameter(s):\n> - Filtlong: `--min_length` or - Nanoq: `--min-len`"
},
"longread_qc_qualityfilter_keeppercent": {
"type": "integer",
@@ -267,9 +282,16 @@
"longread_qc_qualityfilter_targetbases": {
"type": "integer",
"default": 500000000,
- "description": "Specify the number of high-quality bases in the library to be retained",
+ "description": "Filtlong only: specify the number of high-quality bases in the library to be retained",
"fa_icon": "fas fa-bullseye",
"help_text": "Removes the worst reads until only the specified value of bases remain, useful for very large read sets. If the input read set is less than the specified value, this setting will have no effect. _Modified from [Filtlong documentation](https://github.com/rrwick/Filtlong)_\n\n> Modifies tool parameter(s):\n> - Filtlong: `--keep_percent`"
+ },
+ "longread_qc_qualityfilter_minquality": {
+ "type": "integer",
+ "default": 7,
+ "description": "Nanoq only: specify the minimum average read quality filter (Q)",
+ "fa_icon": "fas fa-bullseye",
+ "help_text": "Remove the reads with quality score lower than 7. \n\n> Modifies tool parameter(s):\n> - Nanoq: `--min-qual`"
}
},
"fa_icon": "fas fa-expand-alt"
@@ -370,7 +392,7 @@
"type": "boolean",
"fa_icon": "fas fa-save",
"description": "Save reads from samples that went through the run-merging step",
- "help_text": "Save the run- and library-concatenated reads of a given sample in FASTQ format.\n\n> ⚠️ Only samples that went through the run-merging step of the pipeline will be stored in the resulting directory. \n\nIf you wish to save the files that go to the classification/profiling steps for samples that _did not_ go through run merging, you must supply the appropriate upstream `--save_` flag.\n\n"
+ "help_text": "Save the run- and library-concatenated reads of a given sample in FASTQ format.\n\n> \u26a0\ufe0f Only samples that went through the run-merging step of the pipeline will be stored in the resulting directory. \n\nIf you wish to save the files that go to the classification/profiling steps for samples that _did not_ go through run merging, you must supply the appropriate upstream `--save_` flag.\n\n"
}
},
"fa_icon": "fas fa-clipboard-check"
diff --git a/subworkflows/local/longread_adapterremoval.nf b/subworkflows/local/longread_adapterremoval.nf
new file mode 100644
index 00000000..3e2ffcc4
--- /dev/null
+++ b/subworkflows/local/longread_adapterremoval.nf
@@ -0,0 +1,29 @@
+//
+// Process long raw reads with porechop
+//
+
+include { PORECHOP_PORECHOP } from '../../modules/nf-core/porechop/porechop/main'
+
+workflow LONGREAD_ADAPTERREMOVAL {
+ take:
+ reads
+
+ main:
+ ch_versions = Channel.empty()
+ ch_multiqc_files = Channel.empty()
+
+ if (params.longread_adapterremoval_tool == 'porechop') {
+ PORECHOP_PORECHOP ( reads )
+ ch_processed_reads = PORECHOP_PORECHOP.out.reads
+ .map { meta, reads -> [ meta + [single_end: true], reads ] }
+ ch_versions = ch_versions.mix(PORECHOP_PORECHOP.out.versions.first())
+ ch_multiqc_files = ch_multiqc_files.mix( PORECHOP_PORECHOP.out.log )
+ } else {
+ ch_processed_reads = reads
+ }
+
+ emit:
+ reads = ch_processed_reads // channel: [ val(meta), [ reads ] ]
+ versions = ch_versions // channel: [ versions.yml ]
+ mqc = ch_multiqc_files
+}
diff --git a/subworkflows/local/longread_filtering.nf b/subworkflows/local/longread_filtering.nf
new file mode 100644
index 00000000..67cd8f2b
--- /dev/null
+++ b/subworkflows/local/longread_filtering.nf
@@ -0,0 +1,33 @@
+//
+// Check input samplesheet and get read channels
+//
+
+include { FILTLONG } from '../../modules/nf-core/filtlong/main'
+include { NANOQ } from '../../modules/nf-core/nanoq/main'
+
+workflow LONGREAD_FILTERING {
+ take:
+ reads // [ [ meta ], [ reads ] ]
+
+ main:
+ ch_versions = Channel.empty()
+ ch_multiqc_files = Channel.empty()
+
+ // fastp complexity filtering is activated via modules.conf in shortread_preprocessing
+ if ( params.longread_filter_tool == 'filtlong' ) {
+ ch_filtered_reads = FILTLONG ( reads.map { meta, reads -> [ meta, [], reads ] } ).reads
+ ch_versions = ch_versions.mix( FILTLONG.out.versions.first() )
+ ch_multiqc_files = ch_multiqc_files.mix( FILTLONG.out.log )
+ } else if ( params.longread_filter_tool == 'nanoq' ) {
+ ch_filtered_reads = NANOQ ( reads , 'fastq.gz' ).reads
+ ch_versions = ch_versions.mix( NANOQ.out.versions.first() )
+ ch_multiqc_files = ch_multiqc_files.mix( NANOQ.out.stats )
+ } else {
+ ch_filtered_reads = reads
+ }
+
+ emit:
+ reads = ch_filtered_reads // channel: [ val(meta), [ reads ] ]
+ versions = ch_versions // channel: [ versions.yml ]
+ mqc = ch_multiqc_files
+}
diff --git a/subworkflows/local/longread_preprocessing.nf b/subworkflows/local/longread_preprocessing.nf
index 72261013..c35e0c1f 100644
--- a/subworkflows/local/longread_preprocessing.nf
+++ b/subworkflows/local/longread_preprocessing.nf
@@ -2,11 +2,11 @@
// Process long raw reads with porechop
//
-include { FASTQC as FASTQC_PROCESSED } from '../../modules/nf-core/fastqc/main'
-include { FALCO as FALCO_PROCESSED } from '../../modules/nf-core/falco/main'
+include { FASTQC as FASTQC_PROCESSED } from '../../modules/nf-core/fastqc/main'
+include { FALCO as FALCO_PROCESSED } from '../../modules/nf-core/falco/main'
-include { PORECHOP_PORECHOP } from '../../modules/nf-core/porechop/porechop/main'
-include { FILTLONG } from '../../modules/nf-core/filtlong/main'
+include { LONGREAD_ADAPTERREMOVAL } from './longread_adapterremoval.nf'
+include { LONGREAD_FILTERING } from './longread_filtering.nf'
workflow LONGREAD_PREPROCESSING {
take:
@@ -17,41 +17,32 @@ workflow LONGREAD_PREPROCESSING {
ch_multiqc_files = Channel.empty()
if ( !params.longread_qc_skipadaptertrim && params.longread_qc_skipqualityfilter) {
- PORECHOP_PORECHOP ( reads )
-
- ch_processed_reads = PORECHOP_PORECHOP.out.reads
- .map { meta, reads -> [ meta + [single_end: true], reads ] }
-
- ch_versions = ch_versions.mix(PORECHOP_PORECHOP.out.versions.first())
- ch_multiqc_files = ch_multiqc_files.mix( PORECHOP_PORECHOP.out.log )
-
+ ch_processed_reads = LONGREAD_ADAPTERREMOVAL ( reads ).reads
+ ch_versions = ch_versions.mix(LONGREAD_ADAPTERREMOVAL.out.versions.first())
+ ch_multiqc_files = ch_multiqc_files.mix( LONGREAD_ADAPTERREMOVAL.out.mqc )
} else if ( params.longread_qc_skipadaptertrim && !params.longread_qc_skipqualityfilter) {
-
- ch_processed_reads = FILTLONG ( reads.map { meta, reads -> [meta, [], reads ] } )
- ch_versions = ch_versions.mix(FILTLONG.out.versions.first())
- ch_multiqc_files = ch_multiqc_files.mix( FILTLONG.out.log )
-
+ ch_processed_reads = LONGREAD_FILTERING ( reads ).reads
+ ch_versions = ch_versions.mix(LONGREAD_FILTERING.out.versions.first())
+ ch_multiqc_files = ch_multiqc_files.mix( LONGREAD_FILTERING.out.mqc )
} else {
- PORECHOP_PORECHOP ( reads )
- ch_clipped_reads = PORECHOP_PORECHOP.out.reads
+ LONGREAD_ADAPTERREMOVAL ( reads )
+ ch_clipped_reads = LONGREAD_ADAPTERREMOVAL.out.reads
.map { meta, reads -> [ meta + [single_end: true], reads ] }
-
- ch_processed_reads = FILTLONG ( ch_clipped_reads.map { meta, reads -> [ meta, [], reads ] } ).reads
-
- ch_versions = ch_versions.mix(PORECHOP_PORECHOP.out.versions.first())
- ch_versions = ch_versions.mix(FILTLONG.out.versions.first())
- ch_multiqc_files = ch_multiqc_files.mix( PORECHOP_PORECHOP.out.log )
- ch_multiqc_files = ch_multiqc_files.mix( FILTLONG.out.log )
+ ch_processed_reads = LONGREAD_FILTERING ( ch_clipped_reads ).reads
+ ch_versions = ch_versions.mix(LONGREAD_ADAPTERREMOVAL.out.versions.first())
+ ch_versions = ch_versions.mix(LONGREAD_FILTERING.out.versions.first())
+ ch_multiqc_files = ch_multiqc_files.mix( LONGREAD_ADAPTERREMOVAL.out.mqc )
+ ch_multiqc_files = ch_multiqc_files.mix( LONGREAD_FILTERING.out.mqc )
}
if (params.preprocessing_qc_tool == 'fastqc') {
FASTQC_PROCESSED ( ch_processed_reads )
- ch_versions = ch_versions.mix( FASTQC_PROCESSED.out.versions )
+ ch_versions = ch_versions.mix( FASTQC_PROCESSED.out.versions )
ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip )
} else if (params.preprocessing_qc_tool == 'falco') {
FALCO_PROCESSED ( ch_processed_reads )
- ch_versions = ch_versions.mix( FALCO_PROCESSED.out.versions )
+ ch_versions = ch_versions.mix( FALCO_PROCESSED.out.versions )
ch_multiqc_files = ch_multiqc_files.mix( FALCO_PROCESSED.out.txt )
}
@@ -60,4 +51,3 @@ workflow LONGREAD_PREPROCESSING {
versions = ch_versions // channel: [ versions.yml ]
mqc = ch_multiqc_files
}
-
diff --git a/subworkflows/local/utils_nfcore_taxprofiler_pipeline/main.nf b/subworkflows/local/utils_nfcore_taxprofiler_pipeline/main.nf
index eb143651..1e3d08c3 100644
--- a/subworkflows/local/utils_nfcore_taxprofiler_pipeline/main.nf
+++ b/subworkflows/local/utils_nfcore_taxprofiler_pipeline/main.nf
@@ -211,8 +211,8 @@ def toolCitationText() {
def text_longread_qc = [
"Long read preprocessing was performed with:",
!params.longread_qc_skipadaptertrim ? "Porechop (Wick et al. 2017)," : "",
- !params.longread_qc_skipqualityfilter ? "Filtlong (Wick 2021)," : "",
- "."
+ params.longread_filter_tool == "filtlong" ? "Filtlong (Wick 2021)," : "",
+ params.longread_filter_tool == "nanoq" ? "Nanoq (Steinig and Coin 2022)," : "",
].join(' ').trim()
def text_shortreadcomplexity = [
@@ -290,7 +290,8 @@ def toolBibliographyText() {
def text_longread_qc = [
!params.longread_qc_skipadaptertrim ? "Wick, R. R., Judd, L. M., Gorrie, C. L., & Holt, K. E. (2017). Completing bacterial genome assemblies with multiplex MinION sequencing. Microbial Genomics, 3(10), e000132. 10.1099/mgen.0.000132" : "",
- !params.longread_qc_skipqualityfilter ? "Wick R. (2021) Filtlong, URL: https://github.com/rrwick/Filtlong" : ""
+ params.longread_filter_tool == "filtlong" ? "Wick R. (2021) Filtlong, URL: https://github.com/rrwick/Filtlong" : "",
+ params.longread_filter_tool == "nanoq" ? "Steinig, E., & Coin, L. (2022). Nanoq: ultra-fast quality control for nanopore reads. Journal of Open Source Software, 7(69). 10.21105/joss.02991" : ""
].join(' ').trim()
def text_shortreadcomplexity = [