diff --git a/CHANGELOG.md b/CHANGELOG.md
index bbf061d7..f6d1882a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- [#466](https://github.com/nf-core/taxprofiler/pull/466) - Input database sheets now require a `db_type` column to distinguish between short- and long-read databases (added by @LilyAnderssonLee)
- [#505](https://github.com/nf-core/taxprofiler/pull/505) - Add small files to the file `tower.yml` (added by @LilyAnderssonLee)
- [#508](https://github.com/nf-core/taxprofiler/pull/508) - Add `nanoq` as a filtering tool for nanopore reads (added by @LilyAnderssonLee)
+- [#511](https://github.com/nf-core/taxprofiler/pull/511) - Add `porechop_abi` as an alternative adapter removal tool for long reads nanopore data (added by @LilyAnderssonLee)
### `Fixed`
diff --git a/CITATIONS.md b/CITATIONS.md
index 36456b25..9079373f 100644
--- a/CITATIONS.md
+++ b/CITATIONS.md
@@ -38,6 +38,10 @@
> Wick, R. R., Judd, L. M., Gorrie, C. L., & Holt, K. E. (2017). Completing bacterial genome assemblies with multiplex MinION sequencing. Microbial Genomics, 3(10), e000132. https://doi.org/10.1099/mgen.0.000132
+- [Porechop_ABI](https://github.com/bonsai-team/Porechop_ABI)
+
+ > Bonenfant, Q., Noé, L., & Touzet, H. (2023). Porechop_ABI: discovering unknown adapters in Oxford Nanopore Technology sequencing reads for downstream trimming. Bioinformatics Advances, 3(1):vbac085. https://10.1093/bioadv/vbac085
+
- [Filtlong](https://github.com/rrwick/Filtlong)
> Wick R (2021) Filtlong, URL: https://github.com/rrwick/Filtlong
diff --git a/README.md b/README.md
index d87c5f63..a3edf867 100644
--- a/README.md
+++ b/README.md
@@ -29,7 +29,7 @@
1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) or [`falco`](https://github.com/smithlabcode/falco) as an alternative option)
2. Performs optional read pre-processing
- - Adapter clipping and merging (short-read: [fastp](https://github.com/OpenGene/fastp), [AdapterRemoval2](https://github.com/MikkelSchubert/adapterremoval); long-read: [porechop](https://github.com/rrwick/Porechop))
+ - Adapter clipping and merging (short-read: [fastp](https://github.com/OpenGene/fastp), [AdapterRemoval2](https://github.com/MikkelSchubert/adapterremoval); long-read: [porechop](https://github.com/rrwick/Porechop), [Porechop_ABI](https://github.com/bonsai-team/Porechop_ABI))
- Low complexity and quality filtering (short-read: [bbduk](https://jgi.doe.gov/data-and-tools/software-tools/bbtools/), [PRINSEQ++](https://github.com/Adrian-Cantu/PRINSEQ-plus-plus); long-read: [Filtlong](https://github.com/rrwick/Filtlong)), [Nanoq](https://github.com/esteinig/nanoq)
- Host-read removal (short-read: [BowTie2](http://bowtie-bio.sourceforge.net/bowtie2/); long-read: [Minimap2](https://github.com/lh3/minimap2))
- Run merging
diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml
index 087099ba..6f1bfa10 100644
--- a/assets/multiqc_config.yml
+++ b/assets/multiqc_config.yml
@@ -25,6 +25,8 @@ report_section_order:
order: 500
porechop:
order: 400
+ porechop_abi:
+ order: 450
bbduk:
order: 300
prinseqplusplus:
@@ -106,7 +108,21 @@ top_modules:
- "*raw*"
extra: "If used in this run, Falco is a drop-in replacement for FastQC producing the same output, written by Guilherme de Sena Brandine and Andrew D. Smith."
- "porechop":
+ name: "Porechop"
+ anchor: "porechop"
+ target: "Porechop"
+ path_filters:
+ - "*porechop.log"
extra: "ℹ️: if you get the error message 'Error - was not able to plot data.' this means that porechop did not detect any adapters and therefore no statistics generated."
+ - "porechop":
+ name: "Porechop_ABI"
+ anchor: "porechop_abi"
+ target: "Porechop_ABI"
+ doi: "10.1093/bioadv/vbac085"
+ info: "find and remove adapters from Oxford Nanopore reads."
+ path_filters:
+ - "*porechop_abi.log"
+ extra: "ℹ️: if you get the error message 'Error - was not able to plot data.' this means that porechop_abi did not detect any adapters and therefore no statistics generated."
- "bowtie2":
name: "bowtie2"
- "samtools":
@@ -177,6 +193,14 @@ table_columns_placement:
End Trimmed Percent: 440
Middle Split: 450
Middle Split Percent: 460
+ Porechop_ABI:
+ Input Reads: 400
+ Start Trimmed: 410
+ Start Trimmed Percent: 420
+ End Trimmed: 430
+ End Trimmed Percent: 440
+ Middle Split: 450
+ Middle Split Percent: 460
Filtlong:
Target bases: 500
BBDuk:
@@ -250,6 +274,14 @@ table_columns_visible:
End Trimmed Percent: True
Middle Split: False
Middle Split Percent: True
+ porechop_abi:
+ Input reads: False
+ Start Trimmed:
+ Start Trimmed Percent: True
+ End Trimmed: False
+ End Trimmed Percent: True
+ Middle Split: False
+ Middle Split Percent: True
fastp:
pct_adapter: True
pct_surviving: True
@@ -315,6 +347,8 @@ extra_fn_clean_exts:
- ".bbduk"
- ".unmapped"
- "_filtered"
+ - "porechop"
+ - "porechop_abi"
- type: remove
pattern: "_falco"
diff --git a/conf/modules.config b/conf/modules.config
index 22e9fd0a..2252b57f 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -241,12 +241,12 @@ process {
}
withName: PORECHOP_PORECHOP {
- ext.prefix = { "${meta.id}_${meta.run_accession}" }
+ ext.prefix = { "${meta.id}_${meta.run_accession}_porechop" }
publishDir = [
[
path: { "${params.outdir}/porechop" },
mode: params.publish_dir_mode,
- pattern: '*_porechopped.fastq.gz',
+ pattern: '*.fastq.gz',
enabled: params.save_preprocessed_reads
],
[
@@ -257,7 +257,31 @@ process {
[
path: { "${params.outdir}/analysis_ready_fastqs" },
mode: params.publish_dir_mode,
- pattern: '*_porechopped.fastq.gz',
+ pattern: '*.fastq.gz',
+ enabled: params.save_analysis_ready_fastqs,
+ saveAs: { ( params.perform_runmerging == false || ( params.perform_runmerging && !meta.is_multirun ) ) && !params.perform_longread_hostremoval && params.longread_qc_skipqualityfilter && !params.longread_qc_skipadaptertrim && params.perform_longread_qc && params.save_analysis_ready_fastqs ? it : null }
+ ]
+ ]
+ }
+
+ withName: PORECHOP_ABI {
+ ext.prefix = { "${meta.id}_${meta.run_accession}_porechop_abi" }
+ publishDir = [
+ [
+ path: { "${params.outdir}/porechop_abi" },
+ mode: params.publish_dir_mode,
+ pattern: '*.fastq.gz',
+ enabled: params.save_preprocessed_reads
+ ],
+ [
+ path: { "${params.outdir}/porechop_abi" },
+ mode: params.publish_dir_mode,
+ pattern: '*.log'
+ ],
+ [
+ path: { "${params.outdir}/analysis_ready_fastqs" },
+ mode: params.publish_dir_mode,
+ pattern: '*.fastq.gz',
enabled: params.save_analysis_ready_fastqs,
saveAs: { ( params.perform_runmerging == false || ( params.perform_runmerging && !meta.is_multirun ) ) && !params.perform_longread_hostremoval && params.longread_qc_skipqualityfilter && !params.longread_qc_skipadaptertrim && params.perform_longread_qc && params.save_analysis_ready_fastqs ? it : null }
]
diff --git a/docs/output.md b/docs/output.md
index 6cecb839..285c4ec1 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -16,6 +16,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
- [fastp](#fastp) - Adapter trimming for Illumina data
- [AdapterRemoval](#adapterremoval) - Adapter trimming for Illumina data
- [Porechop](#porechop) - Adapter removal for Oxford Nanopore data
+- [Porechop_ABI](#porechop_abi) - Adapter removal for Oxford Nanopore data
- [Nonpareil](#nonpareil) - Read redundancy and metagenome coverage estimation for short reads
- [BBDuk](#bbduk) - Quality trimming and filtering for Illumina data
- [PRINSEQ++](#prinseq) - Quality trimming and filtering for Illunina data
@@ -178,6 +179,23 @@ You will only find the `.fastq` files in the results directory if you provide `
We do **not** recommend using Porechop if you are already trimming the adapters with ONT's basecaller Guppy.
:::
+### Porechop_ABI
+
+[Porechop_ABI](https://github.com/bonsai-team/Porechop_ABI) is an extension of [Porechop](https://github.com/rrwick/Porechop). Porechop_ABI does not use any external knowledge or database for the adapters. Adapters are discovered directly from the reads using approximate k-mers counting and assembly. Then these sequences can be used for trimming, using all standard Porechop options. The software is able to report a combination of distinct sequences if a mix of adapters is used. It can also be used to check whether a dataset has already been trimmed out or not, or to find leftover adapters in datasets that have been previously processed with Guppy.
+
+
+Output files
+
+- `porechop_abi/`
+ - `.log`: Log file containing trimming statistics
+ - `.fastq.gz`: Adapter-trimmed file
+
+
+
+The output logs are saved in the output folder and are part of MultiQC report.You do not normally need to check these manually.
+
+You will only find the `.fastq` files in the results directory if you provide ` --save_preprocessed_reads`. Alternatively, if you wish only to have the 'final' reads that go into classification/profiling (i.e., that may have additional processing), do not specify this flag but rather specify `--save_analysis_ready_reads`, in which case the reads will be in the folder `analysis_ready_reads`.
+
### BBDuk
[BBDuk](https://jgi.doe.gov/data-and-tools/software-tools/bbtools/bb-tools-user-guide/bbduk-guide/) stands for Decontamination Using Kmers. BBDuk was developed to combine most common data-quality-related trimming, filtering, and masking operations into a single high-performance tool.
diff --git a/docs/usage.md b/docs/usage.md
index 4f1939e3..c6294bd9 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -252,7 +252,7 @@ By default, paired-end merging is not activated. In this case paired-end 'alignm
You can also turn off clipping and only perform paired-end merging, if requested. This can be useful when processing data downloaded from the ENA, SRA, or DDBJ (`--shortread_qc_skipadaptertrim`).
Both tools support length filtering of reads and can be tuned with `--shortread_qc_minlength`. Performing length filtering can be useful to remove short (often low sequencing complexity) sequences that result in unspecific classification and therefore slow down runtime during classification/profiling, with minimal gain.
-There is currently one option for long-read Oxford Nanopore processing: [`porechop`](https://github.com/rrwick/Porechop).
+There are currently two options for long-read Oxford Nanopore processing: [`porechop`](https://github.com/rrwick/Porechop), [`porechop_abi`](https://github.com/bonsai-team/Porechop_ABI).
For both short-read and long-read preprocessing, you can optionally save the resulting processed reads with `--save_preprocessed_reads`.
diff --git a/modules.json b/modules.json
index d72d4e3d..47792ee0 100644
--- a/modules.json
+++ b/modules.json
@@ -215,6 +215,12 @@
"git_sha": "729335dda8ba226323edc54dec80ae959079207e",
"installed_by": ["modules"]
},
+ "porechop/abi": {
+ "branch": "master",
+ "git_sha": "870f9af2eaf0000c94d74910d762cf153752af98",
+ "installed_by": ["modules"],
+ "patch": "modules/nf-core/porechop/abi/porechop-abi.diff"
+ },
"porechop/porechop": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
diff --git a/modules/nf-core/porechop/abi/environment.yml b/modules/nf-core/porechop/abi/environment.yml
new file mode 100644
index 00000000..4dd2eab1
--- /dev/null
+++ b/modules/nf-core/porechop/abi/environment.yml
@@ -0,0 +1,9 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+name: porechop_abi
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - bioconda::porechop_abi=0.5.0
diff --git a/modules/nf-core/porechop/abi/main.nf b/modules/nf-core/porechop/abi/main.nf
new file mode 100644
index 00000000..e9fa9e91
--- /dev/null
+++ b/modules/nf-core/porechop/abi/main.nf
@@ -0,0 +1,55 @@
+process PORECHOP_ABI {
+ tag "$meta.id"
+ label 'process_medium'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/porechop_abi:0.5.0--py310h590eda1_0':
+ 'biocontainers/porechop_abi:0.5.0--py310h590eda1_0' }"
+
+ input:
+ tuple val(meta), path(reads)
+
+ output:
+ tuple val(meta), path("*.fastq.gz") , emit: reads
+ tuple val(meta), path("*.log") , emit: log
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}.porechop_abi"
+ if ("$reads" == "${prefix}.fastq.gz") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"
+
+ """
+ ## To ensure ID matches rest of pipeline based on meta.id rather than input file name
+
+ [[ -f ${prefix}.fastq.gz ]] || ln -s $reads ${prefix}.fastq.gz
+
+ porechop_abi \\
+ --input ${prefix}.fastq.gz \\
+ --threads $task.cpus \\
+ $args \\
+ --output ${prefix}.fastq.gz \\
+ | tee ${prefix}.log
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ porechop_abi: \$( porechop_abi --version )
+ END_VERSIONS
+ """
+
+ stub:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}.porechop_abi"
+ """
+ echo "" | gzip > ${prefix}.fastq.gz
+ touch ${prefix}.log
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ porechop_abi: \$( porechop_abi --version )
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/porechop/abi/meta.yml b/modules/nf-core/porechop/abi/meta.yml
new file mode 100644
index 00000000..a856ffbe
--- /dev/null
+++ b/modules/nf-core/porechop/abi/meta.yml
@@ -0,0 +1,48 @@
+name: "porechop_abi"
+description: Extension of Porechop whose purpose is to process adapter sequences in ONT reads.
+keywords:
+ - porechop_abi
+ - adapter
+ - nanopore
+tools:
+ - "porechop_abi":
+ description: Extension of Porechop whose purpose is to process adapter sequences in ONT reads.
+ homepage: "https://github.com/bonsai-team/Porechop_ABI"
+ documentation: "https://github.com/bonsai-team/Porechop_ABI"
+ tool_dev_url: "https://github.com/bonsai-team/Porechop_ABI"
+ doi: "10.1101/2022.07.07.499093"
+ licence: ["MIT"]
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - reads:
+ type: file
+ description: fastq/fastq.gz file
+ pattern: "*.{fastq,fastq.gz,fq,fq.gz}"
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+ - reads:
+ type: file
+ description: Adapter-trimmed fastq.gz file
+ pattern: "*.fastq.gz"
+ - log:
+ type: file
+ description: Log file containing stdout information
+ pattern: "*.log"
+authors:
+ - "@sofstam"
+ - "LilyAnderssonLee"
+maintainers:
+ - "@sofstam"
+ - "LilyAnderssonLee"
diff --git a/modules/nf-core/porechop/abi/porechop-abi.diff b/modules/nf-core/porechop/abi/porechop-abi.diff
new file mode 100644
index 00000000..4aee2e91
--- /dev/null
+++ b/modules/nf-core/porechop/abi/porechop-abi.diff
@@ -0,0 +1,21 @@
+Changes in module 'nf-core/porechop/abi'
+--- modules/nf-core/porechop/abi/main.nf
++++ modules/nf-core/porechop/abi/main.nf
+@@ -22,9 +22,14 @@
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}.porechop_abi"
+ if ("$reads" == "${prefix}.fastq.gz") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"
++
+ """
++ ## To ensure ID matches rest of pipeline based on meta.id rather than input file name
++
++ [[ -f ${prefix}.fastq.gz ]] || ln -s $reads ${prefix}.fastq.gz
++
+ porechop_abi \\
+- --input $reads \\
++ --input ${prefix}.fastq.gz \\
+ --threads $task.cpus \\
+ $args \\
+ --output ${prefix}.fastq.gz \\
+
+************************************************************
diff --git a/modules/nf-core/porechop/abi/tests/main.nf.test b/modules/nf-core/porechop/abi/tests/main.nf.test
new file mode 100644
index 00000000..b5a29f90
--- /dev/null
+++ b/modules/nf-core/porechop/abi/tests/main.nf.test
@@ -0,0 +1,59 @@
+nextflow_process {
+
+ name "Test Process PORECHOP_ABI"
+ script "../main.nf"
+ process "PORECHOP_ABI"
+ tag "modules"
+ tag "modules_nfcore"
+ tag "porechop"
+ tag "porechop/abi"
+
+ test("sarscov2-nanopore") {
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test'], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/nanopore/fastq/test.fastq.gz', checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(
+ process.out.reads,
+ file(process.out.log.get(0).get(1)).readLines()[20..40],
+ process.out.versions).match()
+ }
+ )
+ }
+ }
+
+ test("sarscov2-nanopore - stub") {
+
+ options "-stub"
+
+ when {
+
+ process {
+ """
+ input[0] = [
+ [ id:'test'], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/nanopore/fastq/test.fastq.gz', checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+ }
+}
diff --git a/modules/nf-core/porechop/abi/tests/main.nf.test.snap b/modules/nf-core/porechop/abi/tests/main.nf.test.snap
new file mode 100644
index 00000000..ad63f4ed
--- /dev/null
+++ b/modules/nf-core/porechop/abi/tests/main.nf.test.snap
@@ -0,0 +1,94 @@
+{
+ "sarscov2-nanopore": {
+ "content": [
+ [
+ [
+ {
+ "id": "test"
+ },
+ "test.porechop_abi.fastq.gz:md5,886fdb859fb50e0dddd35007bcff043e"
+ ]
+ ],
+ [
+ " Best \u001b[0m",
+ " read Best \u001b[0m",
+ " start read end\u001b[0m",
+ " \u001b[4mSet %ID %ID \u001b[0m",
+ " \u001b[32mSQK-NSK007 100.0 73.1\u001b[0m",
+ " Rapid 40.4 0.0",
+ " RBK004_upstream 77.5 0.0",
+ " SQK-MAP006 75.8 72.7",
+ " SQK-MAP006 short 65.5 66.7",
+ " PCR adapters 1 73.9 69.6",
+ " PCR adapters 2 80.0 72.7",
+ " PCR adapters 3 70.8 69.6",
+ " 1D^2 part 1 71.4 70.0",
+ " 1D^2 part 2 84.8 75.8",
+ " cDNA SSP 63.0 61.7",
+ " \u001b[32mBarcode 1 (reverse) 100.0 100.0\u001b[0m",
+ " Barcode 2 (reverse) 70.8 69.2",
+ " Barcode 3 (reverse) 76.0 70.4",
+ " Barcode 4 (reverse) 74.1 71.4",
+ " Barcode 5 (reverse) 77.8 80.8",
+ " Barcode 6 (reverse) 73.1 70.8"
+ ],
+ [
+ "versions.yml:md5,0e9e5e0d35a68ff8e6490c949b257f98"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.04.1"
+ },
+ "timestamp": "2024-07-29T13:50:49.318599"
+ },
+ "sarscov2-nanopore - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "test.porechop_abi.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "test"
+ },
+ "test.porechop_abi.log:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "2": [
+ "versions.yml:md5,0e9e5e0d35a68ff8e6490c949b257f98"
+ ],
+ "log": [
+ [
+ {
+ "id": "test"
+ },
+ "test.porechop_abi.log:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "reads": [
+ [
+ {
+ "id": "test"
+ },
+ "test.porechop_abi.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,0e9e5e0d35a68ff8e6490c949b257f98"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.04.1"
+ },
+ "timestamp": "2024-07-29T13:50:54.425389"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/porechop/abi/tests/tags.yml b/modules/nf-core/porechop/abi/tests/tags.yml
new file mode 100644
index 00000000..e19350cd
--- /dev/null
+++ b/modules/nf-core/porechop/abi/tests/tags.yml
@@ -0,0 +1,2 @@
+porechop/abi:
+ - "modules/nf-core/porechop/abi/**"
diff --git a/nextflow.config b/nextflow.config
index 3903aa4c..8c763bcf 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -79,7 +79,7 @@ params {
shortread_qc_dedup = false
perform_longread_qc = false
- longread_adapterremoval_tool = 'porechop'
+ longread_adapterremoval_tool = 'porechop_abi'
longread_qc_skipadaptertrim = false
longread_qc_skipqualityfilter = false
longread_filter_tool = 'nanoq'
diff --git a/nextflow_schema.json b/nextflow_schema.json
index e2c13e7c..b43cefcb 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -240,10 +240,11 @@
},
"longread_adapterremoval_tool": {
"type": "string",
- "default": "porechop",
- "enum": ["porechop"],
+ "default": "porechop_abi",
+ "enum": ["porechop", "porechop_abi"],
"fa_icon": "fas fa-hammer",
- "description": "Specify which tool to use for adapter trimming. Currently only one option."
+ "description": "Specify which tool to use for adapter trimming.",
+ "help_text": "The performance of Porechop and Porechop_ABI is same in terms of removing adapter reads. However Porechop is no longer updated, Porechop_ABI receives regular updates."
},
"longread_qc_skipadaptertrim": {
"type": "boolean",
diff --git a/subworkflows/local/longread_adapterremoval.nf b/subworkflows/local/longread_adapterremoval.nf
index 3e2ffcc4..f32bf8d1 100644
--- a/subworkflows/local/longread_adapterremoval.nf
+++ b/subworkflows/local/longread_adapterremoval.nf
@@ -3,6 +3,7 @@
//
include { PORECHOP_PORECHOP } from '../../modules/nf-core/porechop/porechop/main'
+include { PORECHOP_ABI } from '../../modules/nf-core/porechop/abi/main'
workflow LONGREAD_ADAPTERREMOVAL {
take:
@@ -12,7 +13,13 @@ workflow LONGREAD_ADAPTERREMOVAL {
ch_versions = Channel.empty()
ch_multiqc_files = Channel.empty()
- if (params.longread_adapterremoval_tool == 'porechop') {
+ if (params.longread_adapterremoval_tool == 'porechop_abi') {
+ PORECHOP_ABI (reads)
+ ch_processed_reads = PORECHOP_ABI.out.reads
+ .map { meta, reads -> [meta + [ single_end: true ], reads ] }
+ ch_versions = ch_versions.mix( PORECHOP_ABI.out.versions.first() )
+ ch_multiqc_files = ch_multiqc_files.mix( PORECHOP_ABI.out.log )
+ } else if (params.longread_adapterremoval_tool == 'porechop') {
PORECHOP_PORECHOP ( reads )
ch_processed_reads = PORECHOP_PORECHOP.out.reads
.map { meta, reads -> [ meta + [single_end: true], reads ] }
diff --git a/subworkflows/local/longread_preprocessing.nf b/subworkflows/local/longread_preprocessing.nf
index c35e0c1f..3b25879d 100644
--- a/subworkflows/local/longread_preprocessing.nf
+++ b/subworkflows/local/longread_preprocessing.nf
@@ -1,5 +1,5 @@
//
-// Process long raw reads with porechop
+// Perform read trimming and filtering
//
include { FASTQC as FASTQC_PROCESSED } from '../../modules/nf-core/fastqc/main'
diff --git a/subworkflows/local/utils_nfcore_taxprofiler_pipeline/main.nf b/subworkflows/local/utils_nfcore_taxprofiler_pipeline/main.nf
index 1e3d08c3..b6e8c305 100644
--- a/subworkflows/local/utils_nfcore_taxprofiler_pipeline/main.nf
+++ b/subworkflows/local/utils_nfcore_taxprofiler_pipeline/main.nf
@@ -210,9 +210,10 @@ def toolCitationText() {
def text_longread_qc = [
"Long read preprocessing was performed with:",
- !params.longread_qc_skipadaptertrim ? "Porechop (Wick et al. 2017)," : "",
- params.longread_filter_tool == "filtlong" ? "Filtlong (Wick 2021)," : "",
- params.longread_filter_tool == "nanoq" ? "Nanoq (Steinig and Coin 2022)," : "",
+ params.longread_adapterremoval_tool == "porechop_abi" ? "Porechop_ABI (Bonenfant et al. 2023)." : "",
+ params.longread_adapterremoval_tool == "porechop" ? "Porechop (Wick et al. 2017)." : "",
+ params.longread_filter_tool == "filtlong" ? "Filtlong (Wick 2021)." : "",
+ params.longread_filter_tool == "nanoq" ? "Nanoq (Steinig and Coin 2022)." : "",
].join(' ').trim()
def text_shortreadcomplexity = [
@@ -289,7 +290,8 @@ def toolBibliographyText() {
].join(' ').trim()
def text_longread_qc = [
- !params.longread_qc_skipadaptertrim ? "
Wick, R. R., Judd, L. M., Gorrie, C. L., & Holt, K. E. (2017). Completing bacterial genome assemblies with multiplex MinION sequencing. Microbial Genomics, 3(10), e000132. 10.1099/mgen.0.000132" : "",
+ params.longread_adapterremoval_tool == "porechop_abi" ? "Bonenfant, Q., Noé, L., & Touzet, H. (2023). Porechop_ABI: discovering unknown adapters in Oxford Nanopore Technology sequencing reads for downstream trimming. Bioinformatics Advances, 3(1):vbac085. 10.1093/bioadv/vbac085" : "",
+ params.longread_adapterremoval_tool == "porechop" ? "Wick, R. R., Judd, L. M., Gorrie, C. L., & Holt, K. E. (2017). Completing bacterial genome assemblies with multiplex MinION sequencing. Microbial Genomics, 3(10), e000132. 10.1099/mgen.0.000132" : "",
params.longread_filter_tool == "filtlong" ? "Wick R. (2021) Filtlong, URL: https://github.com/rrwick/Filtlong" : "",
params.longread_filter_tool == "nanoq" ? "Steinig, E., & Coin, L. (2022). Nanoq: ultra-fast quality control for nanopore reads. Journal of Open Source Software, 7(69). 10.21105/joss.02991" : ""
].join(' ').trim()
diff --git a/subworkflows/local/visualization_krona.nf b/subworkflows/local/visualization_krona.nf
index 77e26a22..eb9b6cf6 100644
--- a/subworkflows/local/visualization_krona.nf
+++ b/subworkflows/local/visualization_krona.nf
@@ -99,6 +99,7 @@ workflow VISUALIZATION_KRONA {
KRONA_KTIMPORTTAXONOMY ( ch_krona_taxonomy_for_input, file(params.krona_taxonomy_directory, checkExists: true) )
ch_krona_html.mix( KRONA_KTIMPORTTAXONOMY.out.html )
+ ch_versions = ch_versions.mix ( GUNZIP.out.versions.first() )
ch_versions = ch_versions.mix( MEGAN_RMA2INFO_KRONA.out.versions.first() )
ch_versions = ch_versions.mix( KRONA_KTIMPORTTAXONOMY.out.versions.first() )
}