Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add nf-test to local module STARFUSION_DETECT #586

Merged
merged 12 commits into from
Dec 20, 2024
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Add nf-test to local subworkflow: `QC_WORKFLOW` [#568](https://github.com/nf-core/rnafusion/pull/568)
- Add nf-test to local subworkflow: `TRIM_WORKFLOW` [#572](https://github.com/nf-core/rnafusion/pull/572)
- Add nf-test to local module: `FUSIONREPORT_DETECT`. Improve `FUSIONREPORT_DOWNLOAD` module [#572](https://github.com/nf-core/rnafusion/pull/577)
- Add nf-test to local module: `STARFUSION`. [#585](https://github.com/nf-core/rnafusion/pull/585)
- Add nf-test to local subworkflow: `ARRIBA_WORKFLOW` [#578](https://github.com/nf-core/rnafusion/pull/578)

### Changed
Expand Down
1 change: 1 addition & 0 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,7 @@ process {
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
]
ext.args = "--max_readlength ${params.read_length}"
}

withName: 'STARFUSION_DOWNLOAD' {
Expand Down
3 changes: 3 additions & 0 deletions conf/test_build.config
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,7 @@ params {
all = true

skip_salmon_index = true
starfusion_build = true
fusion_annot_lib = 'https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/CTAT_HumanFusionLib.mini.dat.gz'

}
9 changes: 5 additions & 4 deletions modules/local/starfusion/build/environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ channels:
dependencies:
- bioconda::dfam=3.7
- bioconda::hmmer=3.4
- bioconda::star-fusion=1.7.0
- bioconda::trinity=2.15.2
- bioconda::samtools=1.21
- bioconda::star=2.7.11b
- bioconda::minimap2=2.28
- bioconda::samtools=1.6
- bioconda::star-fusion=1.14.0
- bioconda::star=2.7.11a
- bioconda::trinity=2.8.5
116 changes: 94 additions & 22 deletions modules/local/starfusion/build/main.nf
Original file line number Diff line number Diff line change
@@ -1,37 +1,29 @@
process STARFUSION_BUILD {
tag 'star-fusion'
tag "$meta.id"
label 'process_high'

conda "${moduleDir}/environment.yml"
container 'community.wave.seqera.io/library/dfam_hmmer_samtools_star-fusion_pruned:5694d82381bf039e'
container 'community.wave.seqera.io/library/dfam_hmmer_minimap2_samtools_pruned:bd39df228dad7086'

input:
tuple val(meta), path(fasta)
tuple val(meta2), path(gtf)
path fusion_annot_lib
val dfam_species

output:
path "*" , emit: reference
tuple val(meta), path("ctat_genome_lib_build_dir"), emit: reference

script:
def binPath = (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) ? "prep_genome_lib.pl" : "/usr/local/src/STAR-Fusion/ctat-genome-lib-builder/prep_genome_lib.pl"
def args = task.ext.args ?: ''
"""
export TMPDIR=/tmp
wget http://ftp.ebi.ac.uk/pub/databases/Pfam/releases/Pfam34.0/Pfam-A.hmm.gz --no-check-certificate
wget https://github.com/FusionAnnotator/CTAT_HumanFusionLib/releases/download/v0.3.0/fusion_lib.Mar2021.dat.gz -O CTAT_HumanFusionLib_Mar2021.dat.gz --no-check-certificate
wget https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/AnnotFilterRule.pm -O AnnotFilterRule.pm --no-check-certificate
wget https://www.dfam.org/releases/Dfam_3.4/infrastructure/dfamscan/homo_sapiens_dfam.hmm --no-check-certificate
wget https://www.dfam.org/releases/Dfam_3.4/infrastructure/dfamscan/homo_sapiens_dfam.hmm.h3f --no-check-certificate
wget https://www.dfam.org/releases/Dfam_3.4/infrastructure/dfamscan/homo_sapiens_dfam.hmm.h3i --no-check-certificate
wget https://www.dfam.org/releases/Dfam_3.4/infrastructure/dfamscan/homo_sapiens_dfam.hmm.h3m --no-check-certificate
wget https://www.dfam.org/releases/Dfam_3.4/infrastructure/dfamscan/homo_sapiens_dfam.hmm.h3p --no-check-certificate
gunzip Pfam-A.hmm.gz && hmmpress Pfam-A.hmm
$binPath \\
prep_genome_lib.pl \\
--genome_fa $fasta \\
--gtf $gtf \\
--annot_filter_rule AnnotFilterRule.pm \\
--fusion_annot_lib CTAT_HumanFusionLib_Mar2021.dat.gz \\
--pfam_db Pfam-A.hmm \\
--dfam_db homo_sapiens_dfam.hmm \\
--max_readlength $params.read_length \\
--dfam_db ${dfam_species} \\
--pfam_db current \\
--fusion_annot_lib $fusion_annot_lib \\
${args} \\
--CPU $task.cpus

cat <<-END_VERSIONS > versions.yml
Expand All @@ -42,8 +34,88 @@ process STARFUSION_BUILD {

stub:
"""
mkdir ctat_genome_lib_build_dir
touch ref_annot.cdna.fa
mkdir -p ctat_genome_lib_build_dir

touch ctat_genome_lib_build_dir/AnnotFilterRule.pm
gzip -c /dev/null > ctat_genome_lib_build_dir/blast_pairs.dat.gz
touch ctat_genome_lib_build_dir/blast_pairs.idx

mkdir -p ctat_genome_lib_build_dir/__chkpts
touch ctat_genome_lib_build_dir/__chkpts/annotfiltrule_cp.ok
touch ctat_genome_lib_build_dir/__chkpts/blast_pairs.idx.ok
touch ctat_genome_lib_build_dir/__chkpts/cp_gene_blast_pairs.ok
touch ctat_genome_lib_build_dir/__chkpts/cp_pfam_dat.ok
touch ctat_genome_lib_build_dir/__chkpts/cp_ref_annot_cdna.ok
touch ctat_genome_lib_build_dir/__chkpts/fusion_annot_lib.cp.ok
touch ctat_genome_lib_build_dir/__chkpts/_fusion_annot_lib.idx.ok
touch ctat_genome_lib_build_dir/__chkpts/index_pfam_hits.ok
touch ctat_genome_lib_build_dir/__chkpts/index_ref_annot_cdna.ok
touch ctat_genome_lib_build_dir/__chkpts/makeblastdb.ok
touch ctat_genome_lib_build_dir/__chkpts/mm2_genome_idx.ok
touch ctat_genome_lib_build_dir/__chkpts/mm2.splice_bed.ok
touch ctat_genome_lib_build_dir/__chkpts/_prot_info_db.ok
touch ctat_genome_lib_build_dir/__chkpts/ref_annot.cdsplus.dfam_masked.fa.cp.ok
touch ctat_genome_lib_build_dir/__chkpts/ref_annot.cdsplus.dfam_masked.fa.idx.ok
touch ctat_genome_lib_build_dir/__chkpts/ref_annot.gtf.gene_spans.ok
touch ctat_genome_lib_build_dir/__chkpts/ref_annot.gtf.mini.sortu.ok
touch ctat_genome_lib_build_dir/__chkpts/ref_annot.gtf.ok
touch ctat_genome_lib_build_dir/__chkpts/ref_genome_fai.ok
touch ctat_genome_lib_build_dir/__chkpts/ref_genome.fa.ok
touch ctat_genome_lib_build_dir/__chkpts/trans.blast.dat.cp.ok
touch ctat_genome_lib_build_dir/__chkpts/trans.blast.dat.index.ok
touch ctat_genome_lib_build_dir/__chkpts/validate_ctat_genome_lib.ok

gzip -c /dev/null > ctat_genome_lib_build_dir/fusion_annot_lib.gz
touch ctat_genome_lib_build_dir/fusion_annot_lib.idx
touch ctat_genome_lib_build_dir/pfam_domains.dbm
gzip -c /dev/null > ctat_genome_lib_build_dir/PFAM.domtblout.dat.gz

touch ctat_genome_lib_build_dir/ref_annot.cdna.fa
touch ctat_genome_lib_build_dir/ref_annot.cdna.fa.idx
touch ctat_genome_lib_build_dir/ref_annot.cds
touch ctat_genome_lib_build_dir/ref_annot.cdsplus.fa
touch ctat_genome_lib_build_dir/ref_annot.cdsplus.fa.idx
touch ctat_genome_lib_build_dir/ref_annot.gtf
touch ctat_genome_lib_build_dir/ref_annot.gtf.gene_spans
touch ctat_genome_lib_build_dir/ref_annot.gtf.mini.sortu
touch ctat_genome_lib_build_dir/ref_annot.gtf.mm2.splice.bed
touch ctat_genome_lib_build_dir/ref_annot.pep
touch ctat_genome_lib_build_dir/ref_annot.prot_info.dbm

touch ctat_genome_lib_build_dir/ref_genome.fa
touch ctat_genome_lib_build_dir/ref_genome.fa.fai
touch ctat_genome_lib_build_dir/ref_genome.fa.mm2
touch ctat_genome_lib_build_dir/ref_genome.fa.ndb
touch ctat_genome_lib_build_dir/ref_genome.fa.nhr
touch ctat_genome_lib_build_dir/ref_genome.fa.nin
touch ctat_genome_lib_build_dir/ref_genome.fa.njs
touch ctat_genome_lib_build_dir/ref_genome.fa.not
touch ctat_genome_lib_build_dir/ref_genome.fa.nsq
touch ctat_genome_lib_build_dir/ref_genome.fa.ntf
touch ctat_genome_lib_build_dir/ref_genome.fa.nto

mkdir -p ctat_genome_lib_build_dir/ref_genome.fa.star.idx
touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/build.ok
touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/chrLength.txt
touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/chrNameLength.txt
touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/chrName.txt
touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/chrStart.txt
touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/exonGeTrInfo.tab
touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/exonInfo.tab
touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/geneInfo.tab
touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/Genome
touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/genomeParameters.txt
touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/Log.out
touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/SA
touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/SAindex
touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/sjdbInfo.txt
touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/sjdbList.fromGTF.out.tab
touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/sjdbList.out.tab
touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/transcriptInfo.tab

touch ctat_genome_lib_build_dir/trans.blast.align_coords.align_coords.dat
touch ctat_genome_lib_build_dir/trans.blast.align_coords.align_coords.dbm
gzip -c /dev/null > ctat_genome_lib_build_dir/trans.blast.dat.gz

cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand Down
11 changes: 9 additions & 2 deletions modules/local/starfusion/build/meta.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name: starfusion_downloadgenome
name: starfusion_build
description: Download STAR-fusion genome resource required to run STAR-Fusion caller
keywords:
- downoad
- download
tools:
- star-fusion:
description: Fusion calling algorithm for RNAseq data
Expand All @@ -20,6 +20,13 @@ input:
type: file
description: genome gtf file
pattern: "*.{gtf}"
- fusion_annot_lib:
type: file
description: Fusion annotation library (key/val pairs, tab-delimited).
pattern: "*.dat.gz"
- dfam_species:
type: string
description: DNA transposable element database (Dfam.hmm), required for repeat masking. Only 'human' or 'mouse' are accepted (will automatically pull the resources from dfam).

output:
- reference:
Expand Down
138 changes: 138 additions & 0 deletions modules/local/starfusion/build/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
nextflow_process {

name "Test Process STARFUSION_BUILD"
script "../main.nf"
process "STARFUSION_BUILD"

test("STARFUSION_BUILD - human - minigenome") {

when {
process {
"""
input[0] = [
[ id:'minigenome fasta' ],
file("https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/minigenome.fa")
]
input[1] = [
[ id:'minigenome gtf' ],
file("https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/minigenome.gtf")
]

input [2] = file("https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/CTAT_HumanFusionLib.mini.dat.gz")
input [3] = "human"

"""
}
}

then {
assert snapshot(
path(process.out.reference[0][1]).resolve("AnnotFilterRule.pm"),
path(process.out.reference[0][1]).resolve("blast_pairs.dat.gz").exists(),
path(process.out.reference[0][1]).resolve("blast_pairs.idx").exists(),
path(process.out.reference[0][1]).resolve("__chkpts/annotfiltrule_cp.ok"),
path(process.out.reference[0][1]).resolve("__chkpts/blast_pairs.idx.ok"),
path(process.out.reference[0][1]).resolve("__chkpts/cp_gene_blast_pairs.ok"),
path(process.out.reference[0][1]).resolve("__chkpts/cp_pfam_dat.ok"),
path(process.out.reference[0][1]).resolve("__chkpts/cp_ref_annot_cdna.ok"),
path(process.out.reference[0][1]).resolve("__chkpts/fusion_annot_lib.cp.ok"),
path(process.out.reference[0][1]).resolve("__chkpts/_fusion_annot_lib.idx.ok"),
path(process.out.reference[0][1]).resolve("__chkpts/index_pfam_hits.ok"),
path(process.out.reference[0][1]).resolve("__chkpts/index_ref_annot_cdna.ok"),
path(process.out.reference[0][1]).resolve("__chkpts/makeblastdb.ok"),
path(process.out.reference[0][1]).resolve("__chkpts/mm2_genome_idx.ok"),
path(process.out.reference[0][1]).resolve("__chkpts/mm2.splice_bed.ok"),
path(process.out.reference[0][1]).resolve("__chkpts/_prot_info_db.ok"),
path(process.out.reference[0][1]).resolve("__chkpts/ref_annot.cdsplus.dfam_masked.fa.cp.ok"),
path(process.out.reference[0][1]).resolve("__chkpts/ref_annot.cdsplus.dfam_masked.fa.idx.ok"),
path(process.out.reference[0][1]).resolve("__chkpts/ref_annot.gtf.gene_spans.ok"),
path(process.out.reference[0][1]).resolve("__chkpts/ref_annot.gtf.mini.sortu.ok"),
path(process.out.reference[0][1]).resolve("__chkpts/ref_annot.gtf.ok"),
path(process.out.reference[0][1]).resolve("__chkpts/ref_genome_fai.ok"),
path(process.out.reference[0][1]).resolve("__chkpts/ref_genome.fa.ok"),
path(process.out.reference[0][1]).resolve("__chkpts/trans.blast.dat.cp.ok"),
path(process.out.reference[0][1]).resolve("__chkpts/trans.blast.dat.index.ok"),
path(process.out.reference[0][1]).resolve("__chkpts/validate_ctat_genome_lib.ok"),
path(process.out.reference[0][1]).resolve("fusion_annot_lib.gz"),
path(process.out.reference[0][1]).resolve("fusion_annot_lib.idx").exists(),
path(process.out.reference[0][1]).resolve("pfam_domains.dbm").exists(),
path(process.out.reference[0][1]).resolve("PFAM.domtblout.dat.gz").exists(),
path(process.out.reference[0][1]).resolve("ref_annot.cdna.fa").exists(),
path(process.out.reference[0][1]).resolve("ref_annot.cdna.fa.idx").exists(),
path(process.out.reference[0][1]).resolve("ref_annot.cds").exists(),
path(process.out.reference[0][1]).resolve("ref_annot.cdsplus.fa").exists(),
path(process.out.reference[0][1]).resolve("ref_annot.cdsplus.fa.idx").exists(),
path(process.out.reference[0][1]).resolve("ref_annot.gtf"),
path(process.out.reference[0][1]).resolve("ref_annot.gtf.gene_spans").exists(),
path(process.out.reference[0][1]).resolve("ref_annot.gtf.mini.sortu"),
path(process.out.reference[0][1]).resolve("ref_annot.gtf.mm2.splice.bed"),
path(process.out.reference[0][1]).resolve("ref_annot.pep").exists(),
path(process.out.reference[0][1]).resolve("ref_annot.prot_info.dbm").exists(),
path(process.out.reference[0][1]).resolve("ref_genome.fa"),
path(process.out.reference[0][1]).resolve("ref_genome.fa.fai"),
path(process.out.reference[0][1]).resolve("ref_genome.fa.mm2"),
path(process.out.reference[0][1]).resolve("ref_genome.fa.ndb"),
path(process.out.reference[0][1]).resolve("ref_genome.fa.nhr"),
path(process.out.reference[0][1]).resolve("ref_genome.fa.nin").exists(),
path(process.out.reference[0][1]).resolve("ref_genome.fa.njs").exists(),
path(process.out.reference[0][1]).resolve("ref_genome.fa.not"),
path(process.out.reference[0][1]).resolve("ref_genome.fa.nsq"),
path(process.out.reference[0][1]).resolve("ref_genome.fa.ntf"),
path(process.out.reference[0][1]).resolve("ref_genome.fa.nto"),
path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx").exists(),
path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/build.ok"),
path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/chrLength.txt"),
path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/chrNameLength.txt"),
path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/chrName.txt"),
path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/chrStart.txt"),
path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/exonGeTrInfo.tab"),
path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/exonInfo.tab"),
path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/geneInfo.tab"),
path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/Genome"),
path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/genomeParameters.txt").exists(),
path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/Log.out").exists(),
path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/SA"),
path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/SAindex"),
path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/sjdbInfo.txt"),
path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/sjdbList.fromGTF.out.tab"),
path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/sjdbList.out.tab"),
path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/transcriptInfo.tab"),
path(process.out.reference[0][1]).resolve("trans.blast.align_coords.align_coords.dat"),
path(process.out.reference[0][1]).resolve("trans.blast.align_coords.align_coords.dbm").exists(),
path(process.out.reference[0][1]).resolve("trans.blast.dat.gz"),
process.out.versions
).match()
}

}

test("STARFUSION_BUILD - human - minigenome - stub") {

options "-stub"

when {
process {
"""
input[0] = [
[ id:'minigenome fasta' ],
file("https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/minigenome.fa")
]
input[1] = [
[ id:'minigenome gtf' ],
file("https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/minigenome.gtf")
]

input [2] = file("https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/CTAT_HumanFusionLib.mini.dat.gz")
input [3] = "human"

"""
}
}

then {
assert snapshot(process.out).match()
}

}

}
Loading
Loading