diff --git a/subworkflows/nf-core/fastq_align_parabricks/main.nf b/subworkflows/nf-core/fastq_align_parabricks/main.nf new file mode 100644 index 00000000000..e27192bdbf8 --- /dev/null +++ b/subworkflows/nf-core/fastq_align_parabricks/main.nf @@ -0,0 +1,54 @@ +// +// Alignment and BQSR with Nvidia CLARA Parabricks +// +include { PARABRICKS_FQ2BAM } from '../../../modules/nf-core/parabricks/fq2bam/main' +include { PARABRICKS_APPLYBQSR } from '../../../modules/nf-core/parabricks/applybqsr/main' + +workflow FASTQ_ALIGN_PARABRICKS { + + take: + ch_reads // channel: [mandatory] meta, reads + ch_fasta // channel: [mandatory] meta, fasta + ch_index // channel: [mandatory] meta, index + ch_interval_file // channel: [optional] meta, intervals_bed_combined + ch_known_sites // channel [optional] known_sites_indels + + main: + ch_versions = Channel.empty() + ch_bam = Channel.empty() + ch_bai = Channel.empty() + ch_bqsr_table = Channel.empty() + ch_qc_metrics = Channel.empty() + ch_duplicate_metrics = Channel.empty() + + PARABRICKS_FQ2BAM( + ch_reads, + ch_fasta, + ch_index, + ch_interval_file, + ch_known_sites + ) + + // Collecting FQ2BAM outputs + ch_bam = PARABRICKS_FQ2BAM.out.bam + ch_bai = PARABRICKS_FQ2BAM.out.bai + ch_qc_metrics = PARABRICKS_FQ2BAM.out.qc_metrics + ch_bqsr_table = PARABRICKS_FQ2BAM.out.bqsr_table + ch_duplicate_metrics = PARABRICKS_FQ2BAM.out.duplicate_metrics + ch_versions = ch_versions.mix(PARABRICKS_FQ2BAM.out.versions) + + // Apply BQSR + PARABRICKS_APPLYBQSR( + ch_bam, + ch_bai, + ch_bqsr_table.ifEmpty([]), + ch_interval_file, + ch_fasta + ) + ch_versions = ch_versions.mix(PARABRICKS_APPLYBQSR.out.versions) + + emit: + bam = PARABRICKS_APPLYBQSR.out.bam // channel: [ [meta], bam ] + bai = PARABRICKS_APPLYBQSR.out.bai // channel: [ [meta], bai ] + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/nf-core/fastq_align_parabricks/meta.yml b/subworkflows/nf-core/fastq_align_parabricks/meta.yml new file mode 100644 index 00000000000..27fe1ab34df --- /dev/null +++ b/subworkflows/nf-core/fastq_align_parabricks/meta.yml @@ -0,0 +1,63 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "fastq_align_parabricks" +description: Align a fastq file using GPU-based acceleration +keywords: + - fastq + - align + - parabricks + - gpu + - preprocessing +components: + - parabricks/fq2bam + - parabricks/applybqsr +input: + - ch_reads: + type: file + description: | + Channel containing reads (either one file for se or two files for pe) + Structure: [ val(meta), [ path(fastq1), path(fastq2) ] ] + - ch_fasta: + type: file + description: | + Channel containing reference fasta file + Structure: [ val(meta), path(fasta) ] + - ch_index: + type: file + description: | + Channel containing reference BWA index + Structure: [ val(meta), path(.{amb,ann,bwt,pac,sa}) ] + - ch_interval_file: + type: file + description: | + (optional) file(s) containing genomic intervals for use in base + quality score recalibration (BQSR) + Structure: [ val(meta), path(.{bed,interval_list,picard,list,intervals}) ] + - ch_known_sites: + type: file + description: | + (optional) known sites file(s) for calculating BQSR. markdups must + be true to perform BQSR. + Structure [ path(vcf) ] +output: + - bam: + type: file + description: | + Channel containing BAM files + Structure: [ val(meta), path(bam) ] + pattern: "*.bam" + - bai: + type: file + description: | + Channel containing indexed BAM (BAI) files + Structure: [ val(meta), path(bai) ] + pattern: "*.bai" + - versions: + type: file + description: | + File containing software versions + Structure: [ path(versions.yml) ] + pattern: "versions.yml" +authors: + - "@famosab" +maintainers: + - "@famosab" diff --git a/subworkflows/nf-core/fastq_align_parabricks/tests/main.nf.test b/subworkflows/nf-core/fastq_align_parabricks/tests/main.nf.test new file mode 100644 index 00000000000..7f102f528a9 --- /dev/null +++ b/subworkflows/nf-core/fastq_align_parabricks/tests/main.nf.test @@ -0,0 +1,106 @@ +nextflow_workflow { + + name "Test Subworkflow FASTQ_ALIGN_PARABRICKS" + script "../main.nf" + workflow "FASTQ_ALIGN_PARABRICKS" + config "./nextflow.config" + + tag "subworkflows" + tag "subworkflows_nfcore" + tag "subworkflows/fastq_align_parabricks" + tag "parabricks" + tag "parabricks/fq2bam" + tag "parabricks/applybqsr" + tag "bwa" + tag "bwa/index" + tag "gpu" + + setup { + run("BWA_INDEX") { + script "../../../../modules/nf-core/bwa/index/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + } + + test("sarscov2 single-end [fastq_gz]") { + + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)] + ]) + input[1] = Channel.value([ + [id: 'reference'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = BWA_INDEX.out.index + input[3] = Channel.value([ + [id: 'intervals'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/picard/baits.interval_list', checkIfExists: true) + ]) + input[4] = file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot( + workflow.out.bam.collect { meta, bamfile -> bam(bamfile).getReadsMD5() }, + workflow.out.bai.collect { meta, bai -> file(bai).name }, + workflow.out.versions + ).match() + } + ) + } + } + + test("sarscov2 paired-end [fastq_gz]") { + + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = Channel.value([ + [id: 'reference'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = BWA_INDEX.out.index + input[3] = Channel.value([ + [id: 'intervals'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/picard/baits.interval_list', checkIfExists: true) + ]) + input[4] = file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot( + workflow.out.bam.collect { meta, bamfile -> bam(bamfile).getReadsMD5() }, + workflow.out.bai.collect { meta, bai -> file(bai).name }, + workflow.out.versions + ).match() + } + ) + } + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/fastq_align_parabricks/tests/main.nf.test.snap b/subworkflows/nf-core/fastq_align_parabricks/tests/main.nf.test.snap new file mode 100644 index 00000000000..0b536a63317 --- /dev/null +++ b/subworkflows/nf-core/fastq_align_parabricks/tests/main.nf.test.snap @@ -0,0 +1,40 @@ +{ + "sarscov2 single-end [fastq_gz]": { + "content": [ + [ + "7e2bd786d964e42ddbc2ab0c9f340b09" + ], + [ + "test.bqsr.bam.bai" + ], + [ + "versions.yml:md5,4d671c4d60b6a0279cfca507525daa77", + "versions.yml:md5,df165e28f025dad39d826caead132115" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-19T15:25:23.622710503" + }, + "sarscov2 paired-end [fastq_gz]": { + "content": [ + [ + "73e8e89cda8fce1cf07bdebff0f793ec" + ], + [ + "test.bqsr.bam.bai" + ], + [ + "versions.yml:md5,4d671c4d60b6a0279cfca507525daa77", + "versions.yml:md5,df165e28f025dad39d826caead132115" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-19T15:26:09.183487496" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/fastq_align_parabricks/tests/nextflow.config b/subworkflows/nf-core/fastq_align_parabricks/tests/nextflow.config new file mode 100644 index 00000000000..59e6276120c --- /dev/null +++ b/subworkflows/nf-core/fastq_align_parabricks/tests/nextflow.config @@ -0,0 +1,10 @@ +process { + + withName: 'PARABRICKS_FQ2BAM' { + ext.args = '--low-memory' + } + // Ref: https://forums.developer.nvidia.com/t/problem-with-gpu/256825/6 + // Parabricks’s fq2bam requires 24GB of memory. + // Using --low-memory for testing + +}