diff --git a/modules/ebi-metagenomics/fastp/main.nf b/modules/ebi-metagenomics/fastp/main.nf new file mode 100644 index 00000000..94f71383 --- /dev/null +++ b/modules/ebi-metagenomics/fastp/main.nf @@ -0,0 +1,102 @@ +// This fastp module is simply copied from the already-existing nf-core module (https://nf-co.re/modules/fastp, https://github.com/nf-core/modules/commit/d497a4868ace3302016ea8ed4b395072d5e833cd) +// This is because there are not currently any nf-core ways of adding modules from more than one nf-core repo +// One slight change to it compared to the original is I've removed the "adapter_fasta" input as we are unlikely +// to need it for our purposes + +process FASTP { + tag "$meta.id" + label 'process_medium' + + conda "bioconda::fastp=0.23.4" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/fastp:0.23.4--h5f740d0_0' : + 'biocontainers/fastp:0.23.4--h5f740d0_0' }" + + input: + tuple val(meta), path(reads) + val save_trimmed_fail + val save_merged + + output: + tuple val(meta), path('*.fastp.fastq.gz') , optional:true, emit: reads + tuple val(meta), path('*.json') , emit: json + tuple val(meta), path('*.html') , emit: html + tuple val(meta), path('*.log') , emit: log + path "versions.yml" , emit: versions + tuple val(meta), path('*.fail.fastq.gz') , optional:true, emit: reads_fail + tuple val(meta), path('*.merged.fastq.gz'), optional:true, emit: reads_merged + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def fail_fastq = save_trimmed_fail && meta.single_end ? "--failed_out ${prefix}.fail.fastq.gz" : save_trimmed_fail && !meta.single_end ? "--unpaired1 ${prefix}_1.fail.fastq.gz --unpaired2 ${prefix}_2.fail.fastq.gz" : '' + // Added soft-links to original fastqs for consistent naming in MultiQC + // Use single ended for interleaved. Add --interleaved_in in config. + if ( task.ext.args?.contains('--interleaved_in') ) { + """ + [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz + + fastp \\ + --stdout \\ + --in1 ${prefix}.fastq.gz \\ + --thread $task.cpus \\ + --json ${prefix}.fastp.json \\ + --html ${prefix}.fastp.html \\ + $fail_fastq \\ + $args \\ + 2> ${prefix}.fastp.log \\ + | gzip -c > ${prefix}.fastp.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") + END_VERSIONS + """ + } else if (meta.single_end) { + """ + [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz + + fastp \\ + --in1 ${prefix}.fastq.gz \\ + --out1 ${prefix}.fastp.fastq.gz \\ + --thread $task.cpus \\ + --json ${prefix}.fastp.json \\ + --html ${prefix}.fastp.html \\ + $fail_fastq \\ + $args \\ + 2> ${prefix}.fastp.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") + END_VERSIONS + """ + } else { + def merge_fastq = save_merged ? "-m --merged_out ${prefix}.merged.fastq.gz" : '' + """ + [ ! -f ${prefix}_1.fastq.gz ] && ln -sf ${reads[0]} ${prefix}_1.fastq.gz + [ ! -f ${prefix}_2.fastq.gz ] && ln -sf ${reads[1]} ${prefix}_2.fastq.gz + fastp \\ + --in1 ${prefix}_1.fastq.gz \\ + --in2 ${prefix}_2.fastq.gz \\ + --out1 ${prefix}_1.fastp.fastq.gz \\ + --out2 ${prefix}_2.fastp.fastq.gz \\ + --json ${prefix}.fastp.json \\ + --html ${prefix}.fastp.html \\ + $fail_fastq \\ + $merge_fastq \\ + --thread $task.cpus \\ + --detect_adapter_for_pe \\ + $args \\ + 2> ${prefix}.fastp.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") + END_VERSIONS + """ + } +} diff --git a/modules/ebi-metagenomics/fastp/meta.yml b/modules/ebi-metagenomics/fastp/meta.yml new file mode 100644 index 00000000..28c009b6 --- /dev/null +++ b/modules/ebi-metagenomics/fastp/meta.yml @@ -0,0 +1,69 @@ +name: fastp +description: Perform adapter/quality trimming on sequencing reads +keywords: + - trimming + - quality control + - fastq +tools: + - fastp: + description: | + A tool designed to provide fast all-in-one preprocessing for FastQ files. This tool is developed in C++ with multithreading supported to afford high performance. + documentation: https://github.com/OpenGene/fastp + doi: 10.1093/bioinformatics/bty560 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information. Use 'single_end: true' to specify single ended or interleaved FASTQs. Use 'single_end: false' for paired-end reads. + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. If you wish to run interleaved paired-end data, supply as single-end data + but with `--interleaved_in` in your `modules.conf`'s `ext.args` for the module. + - save_trimmed_fail: + type: boolean + description: Specify true to save files that failed to pass trimming thresholds ending in `*.fail.fastq.gz` + - save_merged: + type: boolean + description: Specify true to save all merged reads to the a file ending in `*.merged.fastq.gz` + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: The trimmed/modified/unmerged fastq reads + pattern: "*fastp.fastq.gz" + - json: + type: file + description: Results in JSON format + pattern: "*.json" + - html: + type: file + description: Results in HTML format + pattern: "*.html" + - log: + type: file + description: fastq log file + pattern: "*.log" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - reads_fail: + type: file + description: Reads the failed the preprocessing + pattern: "*fail.fastq.gz" + - reads_merged: + type: file + description: Reads that were successfully merged + pattern: "*.{merged.fastq.gz}" +authors: + - "@drpatelh" + - "@kevinmenden" diff --git a/modules/ebi-metagenomics/seqtk/seq/main.nf b/modules/ebi-metagenomics/seqtk/seq/main.nf new file mode 100644 index 00000000..9e4d63a4 --- /dev/null +++ b/modules/ebi-metagenomics/seqtk/seq/main.nf @@ -0,0 +1,43 @@ +// This seqtk/seq module is simply copied from the already-existing nf-core module (https://nf-co.re/modules/seqtk_seq/, https://github.com/nf-core/modules/commit/726ee59cd9360a965d96ea9ea8770f16b8ddd6cc) +// This is because there are not currently any nf-core ways of adding modules from more than one nf-core repo + +process SEQTK_SEQ { + tag "$meta.id" + label 'process_single' + + conda "bioconda::seqtk=1.4" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/seqtk:1.3--h5bf99c6_3' : + 'biocontainers/seqtk:1.3--h5bf99c6_3' }" + + input: + tuple val(meta), path(fastx) + + output: + tuple val(meta), path("*.gz") , emit: fastx + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + def extension = "fastq" + if ("$fastx" ==~ /.+\.fasta|.+\.fasta.gz|.+\.fa|.+\.fa.gz|.+\.fas|.+\.fas.gz|.+\.fna|.+\.fna.gz/ || "$args" ==~ /\-[aA]/ ) { + extension = "fasta" + } + """ + seqtk \\ + seq \\ + $args \\ + $fastx | \\ + gzip -c > ${prefix}.seqtk-seq.${extension}.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + seqtk: \$(echo \$(seqtk 2>&1) | sed 's/^.*Version: //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/ebi-metagenomics/seqtk/seq/meta.yml b/modules/ebi-metagenomics/seqtk/seq/meta.yml new file mode 100644 index 00000000..1dba3457 --- /dev/null +++ b/modules/ebi-metagenomics/seqtk/seq/meta.yml @@ -0,0 +1,44 @@ +name: seqtk_seq +description: Common transformation operations on FASTA or FASTQ files. +keywords: + - seq + - fasta + - fastq +tools: + - seqtk: + description: Seqtk is a fast and lightweight tool for processing sequences in the FASTA or FASTQ format. The seqtk seq command enables common transformation operations on FASTA or FASTQ files. + homepage: https://github.com/lh3/seqtk + documentation: https://docs.csc.fi/apps/seqtk/ + tool_dev_url: https://github.com/lh3/seqtk + licence: ["MIT"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - sequences: + type: file + description: A FASTQ or FASTA file + pattern: "*.{fastq.gz, fastq, fq, fq.gz, fasta, fastq.gz, fa, fa.gz, fas, fas.gz, fna, fna.gz}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - sequences: + type: file + description: FASTQ/FASTA file containing renamed sequences + pattern: "*.{fastq.gz, fasta.gz}" + +authors: + - "@hseabolt" + - "@mjcipriano" + - "@sateeshperi" diff --git a/subworkflows/ebi-metagenomics/reads_qc/main.nf b/subworkflows/ebi-metagenomics/reads_qc/main.nf new file mode 100644 index 00000000..39e0c830 --- /dev/null +++ b/subworkflows/ebi-metagenomics/reads_qc/main.nf @@ -0,0 +1,33 @@ + +include { FASTP } from '../../../modules/ebi-metagenomics/fastp/main' +include { SEQTK_SEQ } from '../../../modules/ebi-metagenomics/seqtk/seq/main' + +workflow READS_QC { + + take: + ch_reads // channel: [ val(meta), [ fastq ] ] + + main: + + ch_versions = Channel.empty() + + FASTP ( ch_reads, params.save_trimmed_fail, params.save_merged ) + ch_versions = ch_versions.mix(FASTP.out.versions.first()) + + ch_se_fastp_reads = FASTP + .out.reads + .filter { it[0].single_end } + + ch_reads_se_and_merged = ch_se_fastp_reads.concat(FASTP.out.reads_merged) + + SEQTK_SEQ(ch_reads_se_and_merged) + ch_versions = ch_versions.mix(SEQTK_SEQ.out.versions.first()) + + emit: + reads = FASTP.out.reads // channel: [ val(meta), [ fastq ] ] + reads_se_and_merged = ch_reads_se_and_merged // channel: [ val(meta), [ fastq ] ] + fastp_summary_json = FASTP.out.json // channel: [ val(meta), [ json ] ] + reads_fasta = SEQTK_SEQ.out.fastx // channel: [ val(meta), [ fasta ] ] + versions = ch_versions // channel: [ versions.yml ] +} + diff --git a/subworkflows/ebi-metagenomics/reads_qc/meta.yml b/subworkflows/ebi-metagenomics/reads_qc/meta.yml new file mode 100644 index 00000000..546348e3 --- /dev/null +++ b/subworkflows/ebi-metagenomics/reads_qc/meta.yml @@ -0,0 +1,52 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "reads_qc" +description: | + Quality control and merging of fastq-format short-reads using fastp, generating fasta +keywords: + - trimming + - quality control + - merging + - fastq + - fasta +components: + - fastp + - seqtk/seq +input: + - meta: + type: map + description: | + Groovy Map containing sample information. Use 'single_end: true' to specify single ended or interleaved FASTQs. Use 'single_end: false' for paired-end reads. + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test' ]` + - reads: + type: file + description: The trimmed/modified/unmerged fastq reads + pattern: "*fastp.fastq.gz" + - reads_se_and_merged: + type: file + description: fastp-cleaned single-end reads and merged paired-end reads + pattern: "*.merged.fastq.gz" + - fastp_summary_json: + type: file + description: fastp results in JSON format + pattern: "*.json" + - reads_fasta: + type: file + description: FASTA file converted from FASTQ + pattern: "*.fasta.gz" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@chrisata" diff --git a/subworkflows/ebi-metagenomics/reads_qc/nextflow.config b/subworkflows/ebi-metagenomics/reads_qc/nextflow.config new file mode 100644 index 00000000..9d8f666e --- /dev/null +++ b/subworkflows/ebi-metagenomics/reads_qc/nextflow.config @@ -0,0 +1,12 @@ +process { + + withName: SEQTK_SEQ { + ext.args = '-a' + } +} + +params { + + save_trimmed_fail = true + save_merged = true +} diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 846ed625..1817867a 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -18,6 +18,10 @@ eggnogmapper: - modules/ebi-metagenomics/eggnogmapper/** - tests/modules/ebi-metagenomics/eggnogmapper/** +fastp: + - modules/ebi-metagenomics/fastp/** + - tests/modules/ebi-metagenomics/fastp/** + fetchtool/assembly: - modules/ebi-metagenomics/fetchtool/assembly/** - tests/modules/ebi-metagenomics/fetchtool/assembly/** @@ -30,6 +34,14 @@ infernal/cmsearch: - modules/ebi-metagenomics/infernal/cmsearch/** - tests/modules/ebi-metagenomics/infernal/cmsearch/** +seqtk/seq: + - modules/ebi-metagenomics/seqtk/seq/** + - tests/modules/ebi-metagenomics/seqtk/seq/** + subworkflows/combined_gene_caller: - subworkflows/ebi-metagenomics/combined_gene_caller/** - tests/subworkflows/ebi-metagenomics/combined_gene_caller/** + +subworkflows/reads_qc: + - subworkflows/ebi-metagenomics/reads_qc/** + - tests/subworkflows/ebi-metagenomics/reads_qc/** diff --git a/tests/modules/ebi-metagenomics/fastp/data/SRR21814853_1.fastq.gz b/tests/modules/ebi-metagenomics/fastp/data/SRR21814853_1.fastq.gz new file mode 100644 index 00000000..d86962c3 Binary files /dev/null and b/tests/modules/ebi-metagenomics/fastp/data/SRR21814853_1.fastq.gz differ diff --git a/tests/modules/ebi-metagenomics/fastp/data/SRR21814853_2.fastq.gz b/tests/modules/ebi-metagenomics/fastp/data/SRR21814853_2.fastq.gz new file mode 100644 index 00000000..9887c261 Binary files /dev/null and b/tests/modules/ebi-metagenomics/fastp/data/SRR21814853_2.fastq.gz differ diff --git a/tests/modules/ebi-metagenomics/fastp/main.nf b/tests/modules/ebi-metagenomics/fastp/main.nf new file mode 100644 index 00000000..6ae22dbc --- /dev/null +++ b/tests/modules/ebi-metagenomics/fastp/main.nf @@ -0,0 +1,20 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { FASTP } from '../../../../modules/ebi-metagenomics/fastp/main.nf' + +workflow test_fastp { + + input = [ + [ id:'test', single_end:false ], // meta map + [ file('tests/modules/ebi-metagenomics/fastp/data/SRR21814853_1.fastq.gz', checkIfExists: true), + file('tests/modules/ebi-metagenomics/fastp/data/SRR21814853_2.fastq.gz', checkIfExists: true) ] + ] + + FASTP ( + input, + params.save_trimmed_fail, + params.save_merged + ) +} diff --git a/tests/modules/ebi-metagenomics/fastp/nextflow.config b/tests/modules/ebi-metagenomics/fastp/nextflow.config new file mode 100644 index 00000000..14b440b4 --- /dev/null +++ b/tests/modules/ebi-metagenomics/fastp/nextflow.config @@ -0,0 +1,11 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} + +params { + + save_trimmed_fail = true + save_merged = true +} \ No newline at end of file diff --git a/tests/modules/ebi-metagenomics/fastp/test.yml b/tests/modules/ebi-metagenomics/fastp/test.yml new file mode 100644 index 00000000..6fb87483 --- /dev/null +++ b/tests/modules/ebi-metagenomics/fastp/test.yml @@ -0,0 +1,24 @@ +- name: fastp test_fastp + command: nextflow run ./tests/modules/ebi-metagenomics/fastp -entry test_fastp -c ./tests/config/nextflow.config + tags: + - fastp + files: + - path: output/fastp/test.fastp.html + contains: + - "1.789477 M (97.392334%)" + - path: output/fastp/test.fastp.json + md5sum: 552903446c04b38bd8f2f8976119fc42 + - path: output/fastp/test.fastp.log + contains: + - "Q30 bases: 6864820(64.3075%)" + - path: output/fastp/test.merged.fastq.gz + md5sum: bc9ed1744b5c680ef2eb4428e9e5f0ef + - path: output/fastp/test_1.fail.fastq.gz + md5sum: 5f8a4ee1f2baee9f8214b4504358a583 + - path: output/fastp/test_1.fastp.fastq.gz + md5sum: 400ca3c0d3e01d1cdd66af6ba51e083c + - path: output/fastp/test_2.fail.fastq.gz + md5sum: 4f2f1fbc7bac46520a848af89ea87b52 + - path: output/fastp/test_2.fastp.fastq.gz + md5sum: bac3f9e14935b54e09003dcc1843d6e1 + - path: output/fastp/versions.yml diff --git a/tests/modules/ebi-metagenomics/seqtk/seq/data/SRR21814853.merged.fastq.gz b/tests/modules/ebi-metagenomics/seqtk/seq/data/SRR21814853.merged.fastq.gz new file mode 100644 index 00000000..7f0aa3d1 Binary files /dev/null and b/tests/modules/ebi-metagenomics/seqtk/seq/data/SRR21814853.merged.fastq.gz differ diff --git a/tests/modules/ebi-metagenomics/seqtk/seq/main.nf b/tests/modules/ebi-metagenomics/seqtk/seq/main.nf new file mode 100644 index 00000000..197265a4 --- /dev/null +++ b/tests/modules/ebi-metagenomics/seqtk/seq/main.nf @@ -0,0 +1,15 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { SEQTK_SEQ } from '../../../../../modules/ebi-metagenomics/seqtk/seq/main.nf' + +workflow test_seqtk_seq { + + input = [ + [ id:'test', single_end:false ], // meta map + file('tests/modules/ebi-metagenomics/seqtk/seq/data/SRR21814853.merged.fastq.gz', checkIfExists: true) + ] + + SEQTK_SEQ ( input ) +} diff --git a/tests/modules/ebi-metagenomics/seqtk/seq/nextflow.config b/tests/modules/ebi-metagenomics/seqtk/seq/nextflow.config new file mode 100644 index 00000000..7a875a5a --- /dev/null +++ b/tests/modules/ebi-metagenomics/seqtk/seq/nextflow.config @@ -0,0 +1,8 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + + withName: SEQTK_SEQ { + ext.args = '-a' + } +} \ No newline at end of file diff --git a/tests/modules/ebi-metagenomics/seqtk/seq/test.yml b/tests/modules/ebi-metagenomics/seqtk/seq/test.yml new file mode 100644 index 00000000..fea0adef --- /dev/null +++ b/tests/modules/ebi-metagenomics/seqtk/seq/test.yml @@ -0,0 +1,9 @@ +- name: seqtk seq test_seqtk_seq + command: nextflow run ./tests/modules/ebi-metagenomics/seqtk/seq -entry test_seqtk_seq -c ./tests/config/nextflow.config + tags: + - seqtk + - seqtk/seq + files: + - path: output/seqtk/test.seqtk-seq.fasta.gz + md5sum: afb4460aa8f0bda84dba3244760155a8 + - path: output/seqtk/versions.yml diff --git a/tests/subworkflows/ebi-metagenomics/reads_qc/data/SRR21814853_1.fastq.gz b/tests/subworkflows/ebi-metagenomics/reads_qc/data/SRR21814853_1.fastq.gz new file mode 100644 index 00000000..d86962c3 Binary files /dev/null and b/tests/subworkflows/ebi-metagenomics/reads_qc/data/SRR21814853_1.fastq.gz differ diff --git a/tests/subworkflows/ebi-metagenomics/reads_qc/data/SRR21814853_2.fastq.gz b/tests/subworkflows/ebi-metagenomics/reads_qc/data/SRR21814853_2.fastq.gz new file mode 100644 index 00000000..9887c261 Binary files /dev/null and b/tests/subworkflows/ebi-metagenomics/reads_qc/data/SRR21814853_2.fastq.gz differ diff --git a/tests/subworkflows/ebi-metagenomics/reads_qc/data/SRR9674626.fastq.gz b/tests/subworkflows/ebi-metagenomics/reads_qc/data/SRR9674626.fastq.gz new file mode 100644 index 00000000..70af57e2 Binary files /dev/null and b/tests/subworkflows/ebi-metagenomics/reads_qc/data/SRR9674626.fastq.gz differ diff --git a/tests/subworkflows/ebi-metagenomics/reads_qc/main.nf b/tests/subworkflows/ebi-metagenomics/reads_qc/main.nf new file mode 100644 index 00000000..faa1dfd7 --- /dev/null +++ b/tests/subworkflows/ebi-metagenomics/reads_qc/main.nf @@ -0,0 +1,44 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { READS_QC } from '../../../../subworkflows/ebi-metagenomics/reads_qc/main.nf' + +workflow test_reads_qc_pe { + + input = [ + [ id:'test', single_end:false ], // meta map + [ file('tests/subworkflows/ebi-metagenomics/reads_qc/data/SRR21814853_1.fastq.gz', checkIfExists: true), + file('tests/subworkflows/ebi-metagenomics/reads_qc/data/SRR21814853_2.fastq.gz', checkIfExists: true) ] + ] + + READS_QC ( input ) +} + +workflow test_reads_qc_se { + + input = [ + [ id:'test', single_end:true ], // meta map + file('tests/subworkflows/ebi-metagenomics/reads_qc/data/SRR9674626.fastq.gz', checkIfExists: true), + ] + + READS_QC ( input ) +} + +workflow test_reads_qc_pe_and_se { + + input_pe = [ + [ id:'test_pe', single_end:false ], // meta map + [ file('tests/subworkflows/ebi-metagenomics/reads_qc/data/SRR21814853_1.fastq.gz', checkIfExists: true), + file('tests/subworkflows/ebi-metagenomics/reads_qc/data/SRR21814853_2.fastq.gz', checkIfExists: true) ] + ] + + input_se = [ + [ id:'test_se', single_end:true ], // meta map + file('tests/subworkflows/ebi-metagenomics/reads_qc/data/SRR9674626.fastq.gz', checkIfExists: true), + ] + + input_pe_and_se = Channel.from( input_pe, input_se ) + + READS_QC ( input_pe_and_se ) +} \ No newline at end of file diff --git a/tests/subworkflows/ebi-metagenomics/reads_qc/nextflow.config b/tests/subworkflows/ebi-metagenomics/reads_qc/nextflow.config new file mode 100644 index 00000000..ebe94722 --- /dev/null +++ b/tests/subworkflows/ebi-metagenomics/reads_qc/nextflow.config @@ -0,0 +1,14 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + + withName: SEQTK_SEQ { + ext.args = '-a' + } +} + +params { + + save_trimmed_fail = true + save_merged = true +} \ No newline at end of file diff --git a/tests/subworkflows/ebi-metagenomics/reads_qc/test.yml b/tests/subworkflows/ebi-metagenomics/reads_qc/test.yml new file mode 100644 index 00000000..89086c5d --- /dev/null +++ b/tests/subworkflows/ebi-metagenomics/reads_qc/test.yml @@ -0,0 +1,93 @@ +- name: reads_qc test_reads_qc_pe + command: nextflow run ./tests/subworkflows/ebi-metagenomics/reads_qc -entry test_reads_qc_pe -c ./tests/config/nextflow.config + tags: + - fastp + - seqtk + - seqtk/seq + - subworkflows + - subworkflows/reads_qc + files: + - path: output/fastp/test.fastp.html + contains: + - "1.789477 M (97.392334%)" + - path: output/fastp/test.fastp.json + md5sum: 552903446c04b38bd8f2f8976119fc42 + - path: output/fastp/test.fastp.log + contains: + - "Q30 bases: 6864820(64.3075%)" + - path: output/fastp/test.merged.fastq.gz + md5sum: bc9ed1744b5c680ef2eb4428e9e5f0ef + - path: output/fastp/test_1.fail.fastq.gz + md5sum: 5f8a4ee1f2baee9f8214b4504358a583 + - path: output/fastp/test_1.fastp.fastq.gz + md5sum: 400ca3c0d3e01d1cdd66af6ba51e083c + - path: output/fastp/test_2.fail.fastq.gz + md5sum: 4f2f1fbc7bac46520a848af89ea87b52 + - path: output/fastp/test_2.fastp.fastq.gz + md5sum: bac3f9e14935b54e09003dcc1843d6e1 + - path: output/seqtk/test.seqtk-seq.fasta.gz + md5sum: afb4460aa8f0bda84dba3244760155a8 + +- name: reads_qc test_reads_qc_se + command: nextflow run ./tests/subworkflows/ebi-metagenomics/reads_qc -entry test_reads_qc_se -c ./tests/config/nextflow.config + tags: + - fastp + - seqtk + - seqtk/seq + - subworkflows + - subworkflows/reads_qc + files: + - path: output/fastp/test.fastp.fastq.gz + md5sum: d43e36bb4dc60ef1b4094731d76fcfa9 + - path: output/fastp/test.fastp.html + contains: + - "10.034037 M (97.324196%)" + - path: output/fastp/test.fastp.json + md5sum: 59cc86d2287da9bf5190cd49b01e8311 + - path: output/fastp/test.fastp.log + contains: + - "Q30 bases: 7949076(77.1013%)" + - path: output/seqtk/test.seqtk-seq.fasta.gz + md5sum: a8a5ccd137561b692a92acf4924275f1 + +- name: reads_qc test_reads_qc_pe_and_se + command: nextflow run ./tests/subworkflows/ebi-metagenomics/reads_qc -entry test_reads_qc_pe_and_se -c ./tests/config/nextflow.config + tags: + - fastp + - seqtk + - seqtk/seq + - subworkflows + - subworkflows/reads_qc + files: + - path: output/fastp/test_pe.fastp.html + contains: + - "1.789477 M (97.392334%)" + - path: output/fastp/test_pe.fastp.json + md5sum: 72bf9dbfa31230bb103331b1e39b52c0 + - path: output/fastp/test_pe.fastp.log + contains: + - "Q30 bases: 6864820(64.3075%)" + - path: output/fastp/test_pe.merged.fastq.gz + md5sum: bc9ed1744b5c680ef2eb4428e9e5f0ef + - path: output/fastp/test_pe_1.fail.fastq.gz + md5sum: 5f8a4ee1f2baee9f8214b4504358a583 + - path: output/fastp/test_pe_1.fastp.fastq.gz + md5sum: 400ca3c0d3e01d1cdd66af6ba51e083c + - path: output/fastp/test_pe_2.fail.fastq.gz + md5sum: 4f2f1fbc7bac46520a848af89ea87b52 + - path: output/fastp/test_pe_2.fastp.fastq.gz + md5sum: bac3f9e14935b54e09003dcc1843d6e1 + - path: output/fastp/test_se.fastp.fastq.gz + md5sum: d43e36bb4dc60ef1b4094731d76fcfa9 + - path: output/fastp/test_se.fastp.html + contains: + - "10.034037 M (97.324196%)" + - path: output/fastp/test_se.fastp.json + md5sum: f6b2cfdf44dd961f0b5117cdfcf95f85 + - path: output/fastp/test_se.fastp.log + contains: + - "Q30 bases: 7949076(77.1013%)" + - path: output/seqtk/test_pe.seqtk-seq.fasta.gz + md5sum: afb4460aa8f0bda84dba3244760155a8 + - path: output/seqtk/test_se.seqtk-seq.fasta.gz + md5sum: a8a5ccd137561b692a92acf4924275f1