diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index c48d11e7c4..15fca38295 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -247,6 +247,7 @@ jobs:
- tags: "gatk4spark/applybqsr"
- tags: "gatk4spark/markduplicates"
- tags: "gawk"
+ - tags: "lofreq/callparallel"
- tags: "mosdepth"
- tags: "multiqc"
- tags: "ngscheckmate/ncm"
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 540e4d9826..201cbd3079 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,54 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+## dev
+
+### Added
+
+- [1640](https://github.com/nf-core/sarek/pull/1620) - Add `lofreq` as a tumor-only variant caller.
+- [1642](https://github.com/nf-core/sarek/pull/1642) - Back to dev
+
+### Changed
+
+### Fixed
+
+### Removed
+
+### Dependencies
+
+| Dependency | Old version | New version |
+| ---------- | ----------- | ----------- |
+| `lofreq` | | 2.1.5 |
+
+### Parameters
+
+## [3.4.4](https://github.com/nf-core/sarek/releases/tag/3.4.4) - Ruopsokjåkhå
+
+Ruopsokjåkhå is another peak of the Pårte massif.
+
+### Added
+
+- [1614](https://github.com/nf-core/sarek/pull/1614) - Back to dev
+- [1639](https://github.com/nf-core/sarek/pull/1639) - Bump version to prepare release
+
+### Changed
+
+- [1627](https://github.com/nf-core/sarek/pull/1627) - Correct tower reports/snpeff format
+
+### Fixed
+
+- [1623](https://github.com/nf-core/sarek/pull/1623) - Update docs to clarify vep cache folder organisation
+- [1628](https://github.com/nf-core/sarek/pull/1628) - Fix dbsnp channel mapping in germline variant calling subworkflow
+
+### Removed
+
+### Dependencies
+
+| Dependency | Old version | New version |
+| ---------- | ----------- | ----------- |
+
+### Parameters
+
## [3.4.3](https://github.com/nf-core/sarek/releases/tag/3.4.3) - Loametjåhkkå
Loametjåhkkå is another one of the main peaks of the Pårte massif.
diff --git a/CITATIONS.md b/CITATIONS.md
index f72966d98c..1c4a22cade 100644
--- a/CITATIONS.md
+++ b/CITATIONS.md
@@ -146,6 +146,10 @@
> Danecek P, Auton A, Abecasis G, et al.: The variant call format and VCFtools. Bioinformatics. 2011 Aug 1;27(15):2156-8. doi: 10.1093/bioinformatics/btr330. Epub 2011 Jun 7. PubMed PMID: 21653522; PubMed Central PMCID: PMC3137218.
+- [Lofreq](https://pubmed.ncbi.nlm.nih.gov/23066108/)
+
+ > Wilm et al. LoFreq: A sequence-quality aware, ultra-sensitive variant caller for uncovering cell-population heterogeneity from high-throughput sequencing datasets. Nucleic Acids Res. 2012; 40(22):11189-201.
+
## R packages
- [R](https://www.R-project.org/)
diff --git a/README.md b/README.md
index e36cafa38a..a508b7ceb2 100644
--- a/README.md
+++ b/README.md
@@ -61,6 +61,7 @@ Depending on the options and samples provided, the pipeline can currently perfor
- `Sentieon Haplotyper`
- `Strelka2`
- `TIDDIT`
+ - `Lofreq`
- Variant filtering and annotation (`SnpEff`, `Ensembl VEP`, `BCFtools annotate`)
- Summarise and represent QC (`MultiQC`)
@@ -132,6 +133,7 @@ We thank the following people for their extensive assistance in the development
- [Abhinav Sharma](https://github.com/abhi18av)
- [Adam Talbot](https://github.com/adamrtalbot)
- [Adrian Lärkeryd](https://github.com/adrlar)
+- [Àitor Olivares](https://github.com/AitorPeseta)
- [Alexander Peltzer](https://github.com/apeltzer)
- [Alison Meynert](https://github.com/ameynert)
- [Anders Sune Pedersen](https://github.com/asp8200)
@@ -145,6 +147,7 @@ We thank the following people for their extensive assistance in the development
- [Edmund Miller](https://github.com/edmundmiller)
- [Famke Bäuerle](https://github.com/famosab)
- [Francesco Lescai](https://github.com/lescai)
+- [Francisco Martínez](https://github.com/nevinwu)
- [Gavin Mackenzie](https://github.com/GCJMackenzie)
- [Gisela Gabernet](https://github.com/ggabernet)
- [Grant Neilson](https://github.com/grantn5)
diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml
index ea1fbb0aae..ce1a6e388e 100644
--- a/assets/multiqc_config.yml
+++ b/assets/multiqc_config.yml
@@ -3,9 +3,9 @@ custom_logo_url: https://github.com/nf-core/sarek/
custom_logo_title: "nf-core/sarek"
report_comment: >
- This report has been generated by the nf-core/sarek
+ This report has been generated by the nf-core/sarek
analysis pipeline. For information about how to interpret these results, please see the
- documentation.
+ documentation.
report_section_order:
"nf-core-sarek-methods-description":
order: -1000
diff --git a/conf/modules/lofreq.config b/conf/modules/lofreq.config
new file mode 100644
index 0000000000..253b252b3b
--- /dev/null
+++ b/conf/modules/lofreq.config
@@ -0,0 +1,45 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ Config file for defining DSL2 per module options and publishing paths
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ Available keys to override module options:
+ ext.args = Additional arguments appended to command in module.
+ ext.args2 = Second set of arguments appended to command in module (multi-tool modules).
+ ext.args3 = Third set of arguments appended to command in module (multi-tool modules).
+ ext.prefix = File name prefix for output files.
+ ext.when = When to run the module.
+----------------------------------------------------------------------------------------
+*/
+
+//LOFREQ
+
+process {
+ if (params.tools && params.tools.split(',').contains('lofreq')) {
+
+ withName: "LOFREQ_CALLPARALLEL" {
+ ext.args = { "--call-indels" }
+ ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.lofreq" : "${meta.id}.lofreq.${intervals.baseName}" }
+ ext.when = { params.tools && params.tools.split(',').contains('lofreq') }
+ publishDir = [
+ mode: params.publish_dir_mode,
+ path: { "${params.outdir}/variant_calling/" },
+ pattern: "*{vcf.gz,vcf.gz.tbi}",
+ saveAs: { meta.num_intervals > 1 ? null : "lofreq/${meta.id}/${it}" }
+ ]
+ }
+
+ withName:'VCFTOOLS_TSTV_COUNT'{
+ errorStrategy = 'ignore'
+ }
+
+ withName: 'MERGE_LOFREQ.*' {
+ ext.prefix = { "${meta.id}.lofreq" }
+ publishDir = [
+ mode: params.publish_dir_mode,
+ path: { "${params.outdir}/variant_calling/lofreq/${meta.id}" },
+ pattern: "*{vcf.gz,vcf.gz.tbi}"
+ ]
+ }
+ }
+
+}
diff --git a/docs/images/sarek_workflow.png b/docs/images/sarek_workflow.png
index 7fb4cd52c2..03709a89d6 100644
Binary files a/docs/images/sarek_workflow.png and b/docs/images/sarek_workflow.png differ
diff --git a/docs/images/sarek_workflow.svg b/docs/images/sarek_workflow.svg
index 5f4cbd2ddd..fc7e2a8f18 100644
--- a/docs/images/sarek_workflow.svg
+++ b/docs/images/sarek_workflow.svg
@@ -4,7 +4,7 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Preprocessing
Preprocessing
+
+
+
+
+
+
+
+bcftools annotate, snpeff, vep
bcftools annotate, snpeff, vep
+
+
+Annotation
Annotation
+
+
+
+
+
+
+
+Reports
Reports
+
+
+
+
+
+
+
+
+
+
+
+
+
+Variant Calling
Variant Calling
+
+
+ strelka2 strelka2, lofreq• msisensorpro
• msisensorpro
+
+
+Germline
Germline
+• deepvariant, •freebayes
+ deepvariant, freebayes
GATK GATK haplotypecaller,
haplotypecaller,
+ mpileup, strelka2,
mpileup, strelka2,
+ Sentieon haplotyper
Sentieon haplotyper
+• manta, tiddit
• manta, tiddit
+• cnvkit• cnvkit
+
+
+
+
+
+
+
+
diff --git a/docs/output.md b/docs/output.md
index 4056a6c860..4c9181bc87 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -41,6 +41,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
- [Sentieon DNAscope joint germline variant calling](#sentieon-dnascope-joint-germline-variant-calling)
- [Sentieon Haplotyper](#sentieon-haplotyper)
- [Sentieon Haplotyper joint germline variant calling](#sentieon-haplotyper-joint-germline-variant-calling)
+ - [Lofreq](#lofreq)
- [Strelka](#strelka)
- [Structural Variants](#structural-variants)
- [Indexcov](#indexcov)
@@ -571,6 +572,20 @@ For further downstream analysis, take a look [here](https://github.com/Illumina/
+#### Lofreq
+
+[Lofreq](https://github.com/CSB5/lofreq) is a fast and sensitive variant-caller for inferring SNVs and indels from next-generation sequencing data. It makes full use of base-call qualities and other sources of errors inherent in sequencing, which are usually ignored by other methods or only used for filtering. For further reading and documentation see the [Lofreq user guide](https://csb5.github.io/lofreq/).
+
+
+Output files for tumor-only samples
+
+**Output directory: `{outdir}/variant_calling/lofreq//`**
+
+-`.vcf.gz`
+-VCF which provides a detailed description of the detected genetic variants.
+
+
+
### Structural Variants
#### indexcov
diff --git a/docs/usage.md b/docs/usage.md
index 8e1acdbed2..208d04e44c 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -581,6 +581,7 @@ This list is by no means exhaustive and it will depend on the specific analysis
| [FreeBayes](https://github.com/ekg/freebayes) | x | x | x | x | x | x |
| [GATK HaplotypeCaller](https://gatk.broadinstitute.org/hc/en-us/articles/5358864757787-HaplotypeCaller) | x | x | x | x | - | - |
| [GATK Mutect2](https://gatk.broadinstitute.org/hc/en-us/articles/5358911630107-Mutect2) | x | x | x | - | x | x |
+| [lofreq](https://github.com/CSB5/lofreq) | x | x | x | - | x | - |
| [mpileup](https://www.htslib.org/doc/samtools-mpileup.html) | x | x | x | x | x | - |
| [Strelka](https://github.com/Illumina/strelka) | x | x | x | x | x | x |
| [Manta](https://github.com/Illumina/manta) | x | x | x | x | x | x |
@@ -873,7 +874,7 @@ To use these, supply the parameters `--vep_cache` and/or `--snpeff_cache` with t
### Specify the cache location
Params `--snpeff_cache` and `--vep_cache` are used to specify the locations to the root of the annotation cache folder.
-The cache will be located within a subfolder with the path `${snpeff_species}.${snpeff_version}` for SnpEff and `${vep_species}/${vep_genome}_${vep_cache_version}` for VEP.
+The cache will be located within a subfolder with the path `${snpeff_species}.${snpeff_version}` for SnpEff and `${vep_species}/${vep_cache_version}_${vep_genome}` for VEP.
If this directory is missing, Sarek will raise an error.
For example this is a typical folder structure for `GRCh38` and `WBCel235`, with SNPeff cache version 105 and VEP cache version 110:
diff --git a/modules.json b/modules.json
index 9ef6a5c996..68a92f6739 100644
--- a/modules.json
+++ b/modules.json
@@ -314,6 +314,11 @@
"git_sha": "a941aa24517960d7b9eeab4c3a5adfb3f70a5e4b",
"installed_by": ["modules"]
},
+ "lofreq/callparallel": {
+ "branch": "master",
+ "git_sha": "bf918b32044472c9346ec3444d894c69700ced6d",
+ "installed_by": ["modules"]
+ },
"manta/germline": {
"branch": "master",
"git_sha": "ebc1733b77c702f19fe42076a5edfcbaa0d84f66",
diff --git a/modules/nf-core/lofreq/callparallel/environment.yml b/modules/nf-core/lofreq/callparallel/environment.yml
new file mode 100644
index 0000000000..222d145021
--- /dev/null
+++ b/modules/nf-core/lofreq/callparallel/environment.yml
@@ -0,0 +1,7 @@
+name: lofreq_callparallel
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - bioconda::lofreq=2.1.5
diff --git a/modules/nf-core/lofreq/callparallel/main.nf b/modules/nf-core/lofreq/callparallel/main.nf
new file mode 100644
index 0000000000..93f9a3dfb1
--- /dev/null
+++ b/modules/nf-core/lofreq/callparallel/main.nf
@@ -0,0 +1,70 @@
+process LOFREQ_CALLPARALLEL {
+ tag "$meta.id"
+ label 'process_high'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/lofreq:2.1.5--py38h588ecb2_4' :
+ 'biocontainers/lofreq:2.1.5--py38h588ecb2_4' }"
+
+ input:
+ tuple val(meta) , path(bam), path(bai), path(intervals)
+ tuple val(meta2), path(fasta)
+ tuple val(meta3), path(fai)
+
+ output:
+ tuple val(meta), path("*.vcf.gz") , emit: vcf
+ tuple val(meta), path("*.vcf.gz.tbi"), emit: tbi
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def options_intervals = intervals ? "-l ${intervals}" : ""
+
+ def alignment_cram = bam.Extension == "cram" ? true : false
+ def alignment_bam = bam.Extension == "bam" ? true : false
+ def alignment_out = alignment_cram ? bam.BaseName + ".bam" : "${bam}"
+
+ def samtools_cram_convert = ''
+ samtools_cram_convert += alignment_cram ? " samtools view -T ${fasta} ${bam} -@ $task.cpus -o ${alignment_out}\n" : ''
+ samtools_cram_convert += alignment_cram ? " samtools index ${alignment_out}\n" : ''
+
+ def samtools_cram_remove = ''
+ samtools_cram_remove += alignment_cram ? " rm ${alignment_out}\n" : ''
+ samtools_cram_remove += alignment_cram ? " rm ${alignment_out}.bai\n " : ''
+ """
+ $samtools_cram_convert
+
+ lofreq \\
+ call-parallel \\
+ --pp-threads $task.cpus \\
+ $args \\
+ $options_intervals \\
+ -f $fasta \\
+ -o ${prefix}.vcf.gz \\
+ $alignment_out
+
+ $samtools_cram_remove
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ lofreq: \$(echo \$(lofreq version 2>&1) | sed 's/^version: //; s/ *commit.*\$//')
+ END_VERSIONS
+ """
+
+ stub:
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ """
+ echo "" | gzip > ${prefix}.vcf.gz
+ echo "" | gzip > ${prefix}.vcf.gz.tbi
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ lofreq: \$(echo \$(lofreq version 2>&1) | sed 's/^version: //; s/ *commit.*\$//')
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/lofreq/callparallel/meta.yml b/modules/nf-core/lofreq/callparallel/meta.yml
new file mode 100644
index 0000000000..2884773231
--- /dev/null
+++ b/modules/nf-core/lofreq/callparallel/meta.yml
@@ -0,0 +1,76 @@
+name: lofreq_callparallel
+description: It predicts variants using multiple processors
+keywords:
+ - variant calling
+ - low frequency variant calling
+ - call
+ - variants
+tools:
+ - lofreq:
+ description: Lofreq is a fast and sensitive variant-caller for inferring SNVs and indels from next-generation sequencing data. It's call-parallel programme predicts variants using multiple processors
+ homepage: https://csb5.github.io/lofreq/
+ documentation: https://csb5.github.io/lofreq/
+ doi: "10.1093/nar/gks918"
+ licence: ["MIT"]
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test' ]
+ - bam:
+ type: file
+ description: Tumor sample sorted BAM file
+ pattern: "*.{bam}"
+ - bai:
+ type: file
+ description: BAM index file
+ pattern: "*.{bam.bai}"
+ - intervals:
+ type: file
+ description: BED file containing target regions for variant calling
+ pattern: "*.{bed}"
+ - meta2:
+ type: map
+ description: |
+ Groovy Map containing sample information about the reference fasta
+ e.g. [ id:'reference' ]
+ - fasta:
+ type: file
+ description: Reference genome FASTA file
+ pattern: "*.{fasta}"
+ - meta3:
+ type: map
+ description: |
+ Groovy Map containing sample information about the reference fasta fai
+ e.g. [ id:'reference' ]
+ - fai:
+ type: file
+ description: Reference genome FASTA index file
+ pattern: "*.{fai}"
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+ - vcf:
+ type: file
+ description: Predicted variants file
+ pattern: "*.{vcf}"
+ - tbi:
+ type: file
+ description: Index of vcf file
+ pattern: "*.{vcf.gz.tbi}"
+authors:
+ - "@kaurravneet4123"
+ - "@bjohnnyd"
+maintainers:
+ - "@kaurravneet4123"
+ - "@bjohnnyd"
+ - "@nevinwu"
+ - "@AitorPeseta"
diff --git a/modules/nf-core/lofreq/callparallel/tests/main.nf.test b/modules/nf-core/lofreq/callparallel/tests/main.nf.test
new file mode 100644
index 0000000000..31f199208b
--- /dev/null
+++ b/modules/nf-core/lofreq/callparallel/tests/main.nf.test
@@ -0,0 +1,109 @@
+nextflow_process {
+
+ name "Test Process LOFREQ_CALLPARALLEL"
+ script "../main.nf"
+ process "LOFREQ_CALLPARALLEL"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "lofreq"
+ tag "lofreq/callparallel"
+
+ test("sarscov2 - bam") {
+
+ when {
+ process {
+ """
+ input[0] = [ [ id:'test' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true),
+ []
+ ]
+ input[1] = [ [ id:'fasta' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+ ]
+ input[2] = [ [ id:'fai' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(
+ process.out.versions,
+ process.out.tbi,
+ path(process.out.vcf[0][1]).vcf.summary
+ ).match() },
+ )
+ }
+
+ }
+
+ test("sarscov2 - bam - bed") {
+
+ when {
+ process {
+ """
+ input[0] = [ [ id:'test' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true)
+ ]
+ input[1] = [ [ id:'fasta' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+ ]
+ input[2] = [ [ id:'fai' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(
+ process.out.versions,
+ process.out.tbi,
+ path(process.out.vcf[0][1]).vcf.summary
+ ).match() },
+ )
+ }
+
+ }
+
+ test("sarscov2 - bam - stub") {
+
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = [ [ id:'test' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true),
+ []
+ ]
+ input[1] = [ [ id:'fasta' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+ ]
+ input[2] = [ [ id:'fai' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+}
diff --git a/modules/nf-core/lofreq/callparallel/tests/main.nf.test.snap b/modules/nf-core/lofreq/callparallel/tests/main.nf.test.snap
new file mode 100644
index 0000000000..11587f4ace
--- /dev/null
+++ b/modules/nf-core/lofreq/callparallel/tests/main.nf.test.snap
@@ -0,0 +1,93 @@
+{
+ "sarscov2 - bam - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "test"
+ },
+ "test.vcf.gz.tbi:md5,1a60c330fb42841e8dcf3cd507a70bfc"
+ ]
+ ],
+ "2": [
+ "versions.yml:md5,56d45e0015add277b2689f071a4fe3e4"
+ ],
+ "tbi": [
+ [
+ {
+ "id": "test"
+ },
+ "test.vcf.gz.tbi:md5,1a60c330fb42841e8dcf3cd507a70bfc"
+ ]
+ ],
+ "vcf": [
+ [
+ {
+ "id": "test"
+ },
+ "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,56d45e0015add277b2689f071a4fe3e4"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.04.0"
+ },
+ "timestamp": "2024-08-28T12:01:24.268196316"
+ },
+ "sarscov2 - bam": {
+ "content": [
+ [
+ "versions.yml:md5,56d45e0015add277b2689f071a4fe3e4"
+ ],
+ [
+ [
+ {
+ "id": "test"
+ },
+ "test.vcf.gz.tbi:md5,4cb176febbc8c26d717a6c6e67b9c905"
+ ]
+ ],
+ "VcfFile [chromosomes=[], sampleCount=0, variantCount=0, phased=true, phasedAutodetect=true]"
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.04.0"
+ },
+ "timestamp": "2024-08-28T14:14:55.381365088"
+ },
+ "sarscov2 - bam - bed": {
+ "content": [
+ [
+ "versions.yml:md5,56d45e0015add277b2689f071a4fe3e4"
+ ],
+ [
+ [
+ {
+ "id": "test"
+ },
+ "test.vcf.gz.tbi:md5,4cb176febbc8c26d717a6c6e67b9c905"
+ ]
+ ],
+ "VcfFile [chromosomes=[], sampleCount=0, variantCount=0, phased=true, phasedAutodetect=true]"
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.04.0"
+ },
+ "timestamp": "2024-08-28T14:15:18.221515296"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/lofreq/callparallel/tests/tags.yml b/modules/nf-core/lofreq/callparallel/tests/tags.yml
new file mode 100644
index 0000000000..14c36bc274
--- /dev/null
+++ b/modules/nf-core/lofreq/callparallel/tests/tags.yml
@@ -0,0 +1,2 @@
+lofreq/callparallel:
+ - "modules/nf-core/lofreq/callparallel/**"
diff --git a/nextflow.config b/nextflow.config
index 33776d277c..6c81e7b9a7 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -396,7 +396,7 @@ manifest {
description = """An open-source analysis pipeline to detect germline or somatic variants from whole genome or targeted sequencing"""
mainScript = 'main.nf'
nextflowVersion = '!>=23.04.0'
- version = '3.4.3'
+ version = '3.5.0dev'
doi = '10.12688/f1000research.16665.2, 10.1093/nargab/lqae031, 10.5281/zenodo.3476425'
}
@@ -443,6 +443,7 @@ includeConfig 'conf/modules/sentieon_haplotyper_joint_germline.config'
includeConfig 'conf/modules/strelka.config'
includeConfig 'conf/modules/tiddit.config'
includeConfig 'conf/modules/post_variant_calling.config'
+includeConfig 'conf/modules/lofreq.config'
//annotate
includeConfig 'conf/modules/annotate.config'
diff --git a/nextflow_schema.json b/nextflow_schema.json
index d14f023b05..9571c93125 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -111,8 +111,8 @@
"type": "string",
"fa_icon": "fas fa-toolbox",
"description": "Tools to use for duplicate marking, variant calling and/or for annotation.",
- "help_text": "Multiple tools separated with commas.\n\n**Variant Calling:**\n\nGermline variant calling can currently be performed with the following variant callers:\n- SNPs/Indels: DeepVariant, FreeBayes, GATK HaplotypeCaller, mpileup, Sentieon Haplotyper, Strelka\n- Structural Variants: indexcov, Manta, TIDDIT\n- Copy-number: CNVKit\n\nTumor-only somatic variant calling can currently be performed with the following variant callers:\n- SNPs/Indels: FreeBayes, mpileup, Mutect2, Strelka\n- Structural Variants: Manta, TIDDIT\n- Copy-number: CNVKit, ControlFREEC\n\nSomatic variant calling can currently only be performed with the following variant callers:\n- SNPs/Indels: FreeBayes, Mutect2, Strelka2\n- Structural variants: Manta, TIDDIT\n- Copy-Number: ASCAT, CNVKit, Control-FREEC\n- Microsatellite Instability: MSIsensorpro\n\n> **NB** Mutect2 for somatic variant calling cannot be combined with `--no_intervals`\n\n**Annotation:**\n \n- snpEff, VEP, merge (both consecutively), and bcftools annotate (needs `--bcftools_annotation`).\n\n> **NB** As Sarek will use bgzip and tabix to compress and index VCF files annotated, it expects VCF files to be sorted when starting from `--step annotate`.",
- "pattern": "^((ascat|bcfann|cnvkit|controlfreec|deepvariant|freebayes|haplotypecaller|sentieon_dnascope|sentieon_haplotyper|manta|indexcov|merge|mpileup|msisensorpro|mutect2|ngscheckmate|sentieon_dedup|snpeff|strelka|tiddit|vep)?,?)*(? **NB** Mutect2 for somatic variant calling cannot be combined with `--no_intervals`\n\n**Annotation:**\n \n- snpEff, VEP, merge (both consecutively), and bcftools annotate (needs `--bcftools_annotation`).\n\n> **NB** As Sarek will use bgzip and tabix to compress and index VCF files annotated, it expects VCF files to be sorted when starting from `--step annotate`.",
+ "pattern": "^((ascat|bcfann|cnvkit|controlfreec|deepvariant|freebayes|haplotypecaller|lofreq|sentieon_dnascope|sentieon_haplotyper|manta|indexcov|merge|mpileup|msisensorpro|mutect2|ngscheckmate|sentieon_dedup|snpeff|strelka|tiddit|vep)?,?)*(? [[id:it[0].baseName], it] },
- dbsnp_tbi.map{ it -> [[id:it[0].baseName], it] },
+ dbsnp.map{it -> [[:], it]},
+ dbsnp_tbi.map{it -> [[:], it]},
intervals)
vcf_haplotypecaller = BAM_VARIANT_CALLING_HAPLOTYPECALLER.out.vcf
diff --git a/subworkflows/local/bam_variant_calling_tumor_only_all/main.nf b/subworkflows/local/bam_variant_calling_tumor_only_all/main.nf
index 59b14ed898..c9f1093047 100644
--- a/subworkflows/local/bam_variant_calling_tumor_only_all/main.nf
+++ b/subworkflows/local/bam_variant_calling_tumor_only_all/main.nf
@@ -11,6 +11,7 @@ include { BAM_VARIANT_CALLING_SINGLE_TIDDIT } from '../bam_variant_cal
include { BAM_VARIANT_CALLING_TUMOR_ONLY_CONTROLFREEC } from '../bam_variant_calling_tumor_only_controlfreec/main'
include { BAM_VARIANT_CALLING_TUMOR_ONLY_MANTA } from '../bam_variant_calling_tumor_only_manta/main'
include { BAM_VARIANT_CALLING_TUMOR_ONLY_MUTECT2 } from '../bam_variant_calling_tumor_only_mutect2/main'
+include { BAM_VARIANT_CALLING_TUMOR_ONLY_LOFREQ } from '../bam_variant_calling_tumor_only_lofreq/main'
workflow BAM_VARIANT_CALLING_TUMOR_ONLY_ALL {
take:
@@ -47,6 +48,7 @@ workflow BAM_VARIANT_CALLING_TUMOR_ONLY_ALL {
vcf_mutect2 = Channel.empty()
vcf_strelka = Channel.empty()
vcf_tiddit = Channel.empty()
+ vcf_lofreq = Channel.empty()
// MPILEUP
if (tools.split(',').contains('mpileup') || tools.split(',').contains('controlfreec')) {
@@ -134,6 +136,19 @@ workflow BAM_VARIANT_CALLING_TUMOR_ONLY_ALL {
versions = versions.mix(BAM_VARIANT_CALLING_TUMOR_ONLY_MUTECT2.out.versions)
}
+ //LOFREQ
+ if (tools.split(',').contains('lofreq')) {
+ BAM_VARIANT_CALLING_TUMOR_ONLY_LOFREQ(
+ cram,
+ fasta,
+ fasta_fai,
+ intervals,
+ dict
+ )
+ vcf_lofreq = BAM_VARIANT_CALLING_TUMOR_ONLY_LOFREQ.out.vcf
+ versions = versions.mix(BAM_VARIANT_CALLING_TUMOR_ONLY_LOFREQ.out.versions)
+ }
+
// MANTA
if (tools.split(',').contains('manta')) {
BAM_VARIANT_CALLING_TUMOR_ONLY_MANTA(
@@ -176,6 +191,7 @@ workflow BAM_VARIANT_CALLING_TUMOR_ONLY_ALL {
vcf_all = Channel.empty().mix(
vcf_freebayes,
+ vcf_lofreq,
vcf_manta,
vcf_mutect2,
vcf_mpileup,
@@ -186,6 +202,7 @@ workflow BAM_VARIANT_CALLING_TUMOR_ONLY_ALL {
emit:
vcf_all
vcf_freebayes
+ vcf_lofreq
vcf_manta
vcf_mpileup
vcf_mutect2
diff --git a/subworkflows/local/bam_variant_calling_tumor_only_lofreq/main.nf b/subworkflows/local/bam_variant_calling_tumor_only_lofreq/main.nf
new file mode 100644
index 0000000000..b619c1f796
--- /dev/null
+++ b/subworkflows/local/bam_variant_calling_tumor_only_lofreq/main.nf
@@ -0,0 +1,51 @@
+include { LOFREQ_CALLPARALLEL as LOFREQ } from '../../../modules/nf-core/lofreq/callparallel/main.nf'
+include { GATK4_MERGEVCFS as MERGE_LOFREQ } from '../../../modules/nf-core/gatk4/mergevcfs/main.nf'
+
+workflow BAM_VARIANT_CALLING_TUMOR_ONLY_LOFREQ {
+ take:
+ input // channel: [mandatory] [ meta, tumor_cram, tumor_crai ]
+ fasta // channel: [mandatory] [ fasta ]
+ fai // channel: [mandatory] [ fasta_fai ]
+ intervals // channel: [mandatory] [ intervals, num_intervals ] or [ [], 0 ]
+ dict // channel: /path/to/reference/fasta/dictionary
+
+ main:
+ versions = Channel.empty()
+
+ // Combine cram and intervals for spread and gather strategy
+ input_intervals = input.combine(intervals)
+ // Move num_intervals to meta map
+ .map {meta, tumor_cram, tumor_crai, intervals, num_intervals -> [meta + [ num_intervals:num_intervals ], tumor_cram, tumor_crai, intervals]}
+
+ LOFREQ(input_intervals, fasta, fai) // Call variants with LoFreq
+
+ // Figuring out if there is one or more vcf(s) from the same sample
+ vcf_branch = LOFREQ.out.vcf.branch{
+ // Use meta.num_intervals to asses number of intervals
+ intervals: it[0].num_intervals > 1
+ no_intervals: it[0].num_intervals <= 1
+ }
+
+ // Figuring out if there is one or more tbi(s) from the same sample
+ tbi_branch = LOFREQ.out.tbi.branch{
+ // Use meta.num_intervals to asses number of intervals
+ intervals: it[0].num_intervals > 1
+ no_intervals: it[0].num_intervals <= 1
+ }
+
+ // Only when using intervals
+ vcf_to_merge = vcf_branch.intervals.map{ meta, vcf -> [ groupKey(meta, meta.num_intervals), vcf ] }.groupTuple()
+
+ MERGE_LOFREQ(vcf_to_merge, dict)
+
+ // Mix intervals and no_intervals channels together
+ // Remove unnecessary metadata
+ vcf = Channel.empty().mix(MERGE_LOFREQ.out.vcf, vcf_branch.no_intervals).map{ meta, vcf -> [ meta - meta.subMap('num_intervals') + [ variantcaller:'lofreq' ], vcf ] }
+
+ versions = versions.mix(MERGE_LOFREQ.out.versions)
+ versions = versions.mix(LOFREQ.out.versions)
+
+ emit:
+ vcf
+ versions
+}
diff --git a/tests/config/pytesttags.yml b/tests/config/pytesttags.yml
index 63c96d73a9..56b247bfa3 100644
--- a/tests/config/pytesttags.yml
+++ b/tests/config/pytesttags.yml
@@ -536,6 +536,16 @@ strelka_bp:
- tests/csv/3.0/recalibrated_somatic.csv
- tests/test_strelka_bp.yml
+## lofreq
+lofreq:
+ - conf/modules/lofreq.config
+ - modules/nf-core/mosdepth/**
+ - modules/nf-core/lofreq/callparallel/**
+ - subworkflows/local/bam_variant_calling_tumor_only_all/**
+ - subworkflows/local/bam_variant_calling_tumor_only_lofreq/**
+ - tests/csv/3.0/recalibrated_tumoronly.csv
+ - tests/test_lofreq.yml
+
## tiddit
tiddit:
- conf/modules/tiddit.config
diff --git a/tests/test_lofreq.yml b/tests/test_lofreq.yml
new file mode 100644
index 0000000000..c0ad829101
--- /dev/null
+++ b/tests/test_lofreq.yml
@@ -0,0 +1,64 @@
+- name: Run variant calling on tumor only sample with lofreq
+ command: nextflow run main.nf -profile test,tools_tumoronly --tools lofreq --outdir results
+ tags:
+ - lofreq
+ - tumor_only
+ - variant_calling
+ files:
+ - path: results/csv/variantcalled.csv
+ md5sum: cc7725ef0808ee07002a50ab873ee45c
+ - path: results/multiqc
+ - path: results/reports/bcftools/lofreq/sample2/sample2.lofreq.bcftools_stats.txt
+ md5sum: 9c9de2e4ed2f324adf1912a45d73601f
+ # conda changes md5sums for test
+ - path: results/reports/samtools/sample2/sample2.recal.cram.stats
+ md5sum: b6c2b9056dfc441fddae989cc85f4135
+ # conda changes md5sums for test
+ - path: results/variant_calling/lofreq/sample2/sample2.lofreq.vcf.gz
+ contains:
+ [
+ '##INFO=',
+ ]
+ # conda changes md5sums for test
+ - path: results/reports/vcftools/lofreq/sample2/sample2.lofreq.FILTER.summary
+ md5sum: 8dd8a0c91d5c4a260b462e04f615e502
+ - path: results/reports/vcftools/lofreq/sample2/sample2.lofreq.TsTv.qual
+ md5sum: fe2f1133a9894852603b5252f48bbc05
+ # binary changes md5sums on reruns
+ - path: results/reports/mosdepth/sample2/sample2.recal.mosdepth.global.dist.txt
+ - path: results/reports/mosdepth/sample2/sample2.recal.mosdepth.region.dist.txt
+ - path: results/reports/mosdepth/sample2/sample2.recal.mosdepth.summary.txt
+ - path: results/reports/mosdepth/sample2/sample2.recal.regions.bed.gz
+ - path: results/reports/mosdepth/sample2/sample2.recal.regions.bed.gz.csi
+- name: Run variant calling on tumor only sample with lofreq without intervals
+ command: nextflow run main.nf -profile test,tools_tumoronly --tools lofreq --no_intervals --outdir results
+ tags:
+ - lofreq
+ - no_intervals
+ - tumor_only
+ - variant_calling
+ files:
+ - path: results/csv/variantcalled.csv
+ md5sum: cc7725ef0808ee07002a50ab873ee45c
+ - path: results/multiqc
+ - path: results/reports/bcftools/lofreq/sample2/sample2.lofreq.bcftools_stats.txt
+ md5sum: e838ce412fc091918059e79727b35785
+ # conda changes md5sums for test
+ - path: results/reports/samtools/sample2/sample2.recal.cram.stats
+ md5sum: b6c2b9056dfc441fddae989cc85f4135
+ # conda changes md5sums for test
+ - path: results/variant_calling/lofreq/sample2/sample2.lofreq.vcf.gz
+ contains:
+ [
+ '##INFO=',
+ ]
+ # conda changes md5sums for test
+ - path: results/reports/vcftools/lofreq/sample2/sample2.lofreq.FILTER.summary
+ md5sum: 72beda1b57da053eb352204828605a40
+ - path: results/reports/vcftools/lofreq/sample2/sample2.lofreq.TsTv.qual
+ md5sum: e4cd60cf32b0a24df426d243b337cf90
+ # binary changes md5sums on reruns
+ - path: results/reports/mosdepth/sample2/sample2.recal.mosdepth.global.dist.txt
+ - path: results/reports/mosdepth/sample2/sample2.recal.mosdepth.summary.txt
+ - path: results/reports/mosdepth/sample2/sample2.recal.per-base.bed.gz
+ - path: results/reports/mosdepth/sample2/sample2.recal.per-base.bed.gz.csi
diff --git a/tower.yml b/tower.yml
index acfdf00c2a..5e679d223b 100644
--- a/tower.yml
+++ b/tower.yml
@@ -51,9 +51,9 @@ reports:
display: "Control-FREEC: parsable file with information about FREEC run"
"**/reports/bcftools/*.bcftools_stats.txt":
display: "All samples raw statistics"
- "**/reports/SnpEff/*/*/*_snpEff.html":
+ "**/reports/snpeff/*/*/*_snpEff.html":
display: "Statistics and plots for the SnpEff run"
- "**/reports/SnpEff/*/*/*_snpEff.genes.txt":
+ "**/reports/snpeff/*/*/*_snpEff.genes.txt":
display: "TXT (tab separated) summary counts for variants affecting each transcript and gene"
"**/reports/EnsemblVEP/*/*/*_VEP.summary.html":
display: "Summary of the VEP run"