Skip to content

Commit

Permalink
markduplicates
Browse files Browse the repository at this point in the history
  • Loading branch information
maxulysse committed Sep 20, 2023
1 parent 4e44c19 commit 65b3343
Show file tree
Hide file tree
Showing 3 changed files with 92 additions and 101 deletions.
8 changes: 4 additions & 4 deletions subworkflows/local/bam_markduplicates/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -18,22 +18,22 @@ workflow BAM_MARKDUPLICATES {
versions = Channel.empty()
reports = Channel.empty()

// RUN MARKDUPLICATES
// RUN MARKUPDUPLICATES
GATK4_MARKDUPLICATES(bam, fasta, fasta_fai)

// Join with the crai file
cram = GATK4_MARKDUPLICATES.out.cram.join(GATK4_MARKDUPLICATES.out.crai, failOnDuplicate: true, failOnMismatch: true)

// QC on CRAM
CRAM_QC_MOSDEPTH_SAMTOOLS(cram, fasta, intervals_bed_combined)
// CRAM_QC_MOSDEPTH_SAMTOOLS(cram, fasta, intervals_bed_combined)

// Gather all reports generated
reports = reports.mix(GATK4_MARKDUPLICATES.out.metrics)
reports = reports.mix(CRAM_QC_MOSDEPTH_SAMTOOLS.out.reports)
// reports = reports.mix(CRAM_QC_MOSDEPTH_SAMTOOLS.out.reports)

// Gather versions of all tools used
versions = versions.mix(GATK4_MARKDUPLICATES.out.versions)
versions = versions.mix(CRAM_QC_MOSDEPTH_SAMTOOLS.out.versions)
// versions = versions.mix(CRAM_QC_MOSDEPTH_SAMTOOLS.out.versions)

emit:
cram
Expand Down
16 changes: 4 additions & 12 deletions subworkflows/local/bam_stats_samtools/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -13,21 +13,13 @@ workflow BAM_STATS_SAMTOOLS {
main:
ch_versions = Channel.empty()

SAMTOOLS_STATS (
ch_bam_bai,
[]
)
ch_versions = ch_versions.mix(SAMTOOLS_STATS.out.versions.first())
SAMTOOLS_FLAGSTAT(ch_bam_bai)
SAMTOOLS_IDXSTATS(ch_bam_bai)
SAMTOOLS_STATS(ch_bam_bai, [[],[]])

SAMTOOLS_FLAGSTAT (
ch_bam_bai
)
ch_versions = ch_versions.mix(SAMTOOLS_FLAGSTAT.out.versions.first())

SAMTOOLS_IDXSTATS (
ch_bam_bai
)
ch_versions = ch_versions.mix(SAMTOOLS_IDXSTATS.out.versions.first())
ch_versions = ch_versions.mix(SAMTOOLS_STATS.out.versions.first())

emit:
stats = SAMTOOLS_STATS.out.stats // channel: [ val(meta), [ stats ] ]
Expand Down
169 changes: 84 additions & 85 deletions workflows/rnavar.nf
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,8 @@ include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/

// // Check STAR alignment parameters
// def prepareToolIndices = params.aligner
// def seq_platform = params.seq_platform ?: []
// def seq_center = params.seq_center ?: []
def seq_platform = params.seq_platform ?: []
def seq_center = params.seq_center ?: []

// // Initialize file channels based on params
// ch_dbsnp = params.dbsnp ? Channel.fromPath(params.dbsnp).collect() : Channel.empty()
Expand Down Expand Up @@ -164,86 +164,85 @@ workflow RNAVAR {
ch_genome_bed = params.exon_bed ? Channel.fromPath(params.exon_bed).map{ it -> [ [id:'exon_bed'], it ] }.collect()
: PREPARE_GENOME.out.exon_bed
ch_dict = params.dict ? Channel.fromPath(params.dict).map{ it -> [ [id:'dict'], it ] }.collect()
: PREPARE_GENOME.out.dict
: ch_dict
ch_fasta_fai = params.fasta_fai ? Channel.fromPath(params.fasta_fai).collect()
: PREPARE_GENOME.out.fasta_fai

// // MODULE: Concatenate FastQ files from same sample if required
// MODULE: Concatenate FastQ files from same sample if required

CAT_FASTQ(ch_fastq.multiple)

ch_cat_fastq = CAT_FASTQ.out.reads.mix(ch_fastq.single)

ch_versions = ch_versions.mix(CAT_FASTQ.out.versions.first().ifEmpty(null))

// // MODULE: Generate QC summary using FastQC
// MODULE: Generate QC summary using FastQC
FASTQC(ch_cat_fastq)
ch_reports = ch_reports.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([]))
ch_versions = ch_versions.mix(FASTQC.out.versions.first())

// //
// // MODULE: Prepare the interval list from the GTF file using GATK4 BedToIntervalList
// //
//
// MODULE: Prepare the interval list from the GTF file using GATK4 BedToIntervalList
//

GATK4_BEDTOINTERVALLIST(ch_genome_bed, ch_dict)
ch_interval_list = GATK4_BEDTOINTERVALLIST.out.interval_list
ch_versions = ch_versions.mix(GATK4_BEDTOINTERVALLIST.out.versions.first().ifEmpty(null))

// //
// // MODULE: Scatter one interval-list into many interval-files using GATK4 IntervalListTools
// //
// ch_interval_list_split = Channel.empty()
// if (!params.skip_intervallisttools) {
// GATK4_INTERVALLISTTOOLS(ch_interval_list)
// ch_interval_list_split = GATK4_INTERVALLISTTOOLS.out.interval_list.map{ meta, bed -> [bed] }.flatten()
// }
// else ch_interval_list_split = ch_interval_list

// //
// // SUBWORKFLOW: Perform read alignment using STAR aligner
// //
// ch_genome_bam = Channel.empty()
// ch_genome_bam_index = Channel.empty()
// ch_samtools_stats = Channel.empty()
// ch_samtools_flagstat = Channel.empty()
// ch_samtools_idxstats = Channel.empty()
// ch_star_multiqc = Channel.empty()
// ch_aligner_pca_multiqc = Channel.empty()
// ch_aligner_clustering_multiqc = Channel.empty()

// if (params.aligner == 'star') {
// ALIGN_STAR(
// ch_cat_fastq,
// PREPARE_GENOME.out.star_index,
// PREPARE_GENOME.out.gtf,
// params.star_ignore_sjdbgtf,
// seq_platform,
// seq_center
// )
// ch_genome_bam = ALIGN_STAR.out.bam
// ch_genome_bam_index = ALIGN_STAR.out.bai
// ch_transcriptome_bam = ALIGN_STAR.out.bam_transcript

// // Gather QC reports
// ch_reports = ch_reports.mix(ALIGN_STAR.out.stats.collect{it[1]}.ifEmpty([]))
// ch_reports = ch_reports.mix(ALIGN_STAR.out.log_final.collect{it[1]}.ifEmpty([]))
// ch_versions = ch_versions.mix(ALIGN_STAR.out.versions.first().ifEmpty(null))

// //
// // SUBWORKFLOW: Mark duplicates with GATK4
// //
// BAM_MARKDUPLICATES(
// ch_genome_bam,
// PREPARE_GENOME.out.fasta,
// PREPARE_GENOME.out.fai,
// [])

// ch_genome_bam = BAM_MARKDUPLICATES.out.bam_bai

// //Gather QC reports
// ch_reports = ch_reports.mix(BAM_MARKDUPLICATES.out.stats.collect{it[1]}.ifEmpty([]))
// ch_reports = ch_reports.mix(BAM_MARKDUPLICATES.out.metrics.collect{it[1]}.ifEmpty([]))
// ch_versions = ch_versions.mix(BAM_MARKDUPLICATES.out.versions.first().ifEmpty(null))
//
// MODULE: Scatter one interval-list into many interval-files using GATK4 IntervalListTools
//
ch_interval_list_split = Channel.empty()
if (!params.skip_intervallisttools) {
GATK4_INTERVALLISTTOOLS(ch_interval_list)
ch_interval_list_split = GATK4_INTERVALLISTTOOLS.out.interval_list.map{ meta, bed -> [bed] }.flatten()
}
else ch_interval_list_split = ch_interval_list

//
// SUBWORKFLOW: Perform read alignment using STAR aligner
//
ch_genome_bam = Channel.empty()
ch_genome_bam_index = Channel.empty()
ch_samtools_stats = Channel.empty()
ch_samtools_flagstat = Channel.empty()
ch_samtools_idxstats = Channel.empty()
ch_star_multiqc = Channel.empty()
ch_aligner_pca_multiqc = Channel.empty()
ch_aligner_clustering_multiqc = Channel.empty()

if (params.aligner == 'star') {
ALIGN_STAR(
ch_cat_fastq,
PREPARE_GENOME.out.star_index,
PREPARE_GENOME.out.gtf,
params.star_ignore_sjdbgtf,
seq_platform,
seq_center
)
ch_genome_bam = ALIGN_STAR.out.bam
ch_genome_bam_index = ALIGN_STAR.out.bai
ch_transcriptome_bam = ALIGN_STAR.out.bam_transcript

// Gather QC reports
ch_reports = ch_reports.mix(ALIGN_STAR.out.stats.collect{it[1]}.ifEmpty([]))
ch_reports = ch_reports.mix(ALIGN_STAR.out.log_final.collect{it[1]}.ifEmpty([]))
ch_versions = ch_versions.mix(ALIGN_STAR.out.versions.first().ifEmpty(null))

//
// SUBWORKFLOW: Mark duplicates with GATK4
//
BAM_MARKDUPLICATES(
ch_genome_bam,
ch_fasta.map{ meta, fasta -> [fasta] },
ch_fasta_fai,
[])

ch_genome_bam = BAM_MARKDUPLICATES.out.cram

//Gather QC reports
ch_reports = ch_reports.mix(BAM_MARKDUPLICATES.out.reports.collect{it[1]}.ifEmpty([]))
ch_versions = ch_versions.mix(BAM_MARKDUPLICATES.out.versions.first().ifEmpty(null))

// //
// // SUBWORKFLOW: SplitNCigarReads from GATK4 over the intervals
Expand All @@ -252,9 +251,9 @@ workflow RNAVAR {
// ch_splitncigar_bam_bai = Channel.empty()
// SPLITNCIGAR(
// ch_genome_bam,
// PREPARE_GENOME.out.fasta,
// PREPARE_GENOME.out.fai,
// PREPARE_GENOME.out.dict,
// ch_fasta,
// ch_fasta_fai,
// ch_dict,
// ch_interval_list_split
// )
// ch_splitncigar_bam_bai = SPLITNCIGAR.out.bam_bai
Expand All @@ -279,9 +278,9 @@ workflow RNAVAR {

// GATK4_BASERECALIBRATOR(
// ch_splitncigar_bam_bai_interval,
// PREPARE_GENOME.out.fasta,
// PREPARE_GENOME.out.fai,
// PREPARE_GENOME.out.dict,
// ch_fasta,
// ch_fasta_fai,
// ch_dict,
// ch_known_sites,
// ch_known_sites_tbi
// )
Expand All @@ -306,8 +305,8 @@ workflow RNAVAR {
// RECALIBRATE(
// params.skip_multiqc,
// ch_applybqsr_bam_bai_interval,
// PREPARE_GENOME.out.dict,
// PREPARE_GENOME.out.fai,
// ch_dict,
// ch_fasta_fai,
// PREPARE_GENOME.out.fasta
// )

Expand All @@ -319,7 +318,7 @@ workflow RNAVAR {
// ch_versions = ch_versions.mix(RECALIBRATE.out.versions.first().ifEmpty(null))
// } else {
// ch_bam_variant_calling = ch_splitncigar_bam_bai
// }
}

// interval_flag = params.no_intervals
// // Run haplotyper even in the absence of dbSNP files
Expand All @@ -344,9 +343,9 @@ workflow RNAVAR {

// GATK4_HAPLOTYPECALLER(
// ch_haplotypecaller_interval_bam,
// PREPARE_GENOME.out.fasta,
// PREPARE_GENOME.out.fai,
// PREPARE_GENOME.out.dict,
// ch_fasta,
// ch_fasta_fai,
// ch_dict,
// ch_dbsnp,
// ch_dbsnp_tbi
// )
Expand All @@ -364,16 +363,16 @@ workflow RNAVAR {
// // MODULE: MergeVCFS from GATK4
// // Merge multiple VCF files into one VCF
// //
// GATK4_MERGEVCFS(ch_haplotypecaller_raw, PREPARE_GENOME.out.dict)
// GATK4_MERGEVCFS(ch_haplotypecaller_raw, ch_dict)
// ch_haplotypecaller_vcf = GATK4_MERGEVCFS.out.vcf
// ch_versions = ch_versions.mix(GATK4_MERGEVCFS.out.versions.first().ifEmpty(null))

// if (params.generate_gvcf){
// GATK4_HAPLOTYPECALLERGVCF(
// ch_haplotypecaller_interval_bam,
// PREPARE_GENOME.out.fasta,
// PREPARE_GENOME.out.fai,
// PREPARE_GENOME.out.dict,
// ch_fasta,
// ch_fasta_fai,
// ch_dict,
// ch_dbsnp,
// ch_dbsnp_tbi
// )
Expand Down Expand Up @@ -415,9 +414,9 @@ workflow RNAVAR {
// GATK4_COMBINEGVCFS(
// ch_haplotypecallergvcf_raw,
// ch_haplotypecallergvcf_raw_index,
// PREPARE_GENOME.out.fasta,
// PREPARE_GENOME.out.fai,
// PREPARE_GENOME.out.dict
// ch_fasta,
// ch_fasta_fai,
// ch_dict
// )
// ch_haplotypecaller_gvcf = GATK4_COMBINEGVCFS.out.combined_gvcf
// ch_versions = ch_versions.mix(GATK4_COMBINEGVCFS.out.versions.first().ifEmpty(null))
Expand Down Expand Up @@ -463,9 +462,9 @@ workflow RNAVAR {

// GATK4_VARIANTFILTRATION(
// ch_haplotypecaller_vcf_tbi,
// PREPARE_GENOME.out.fasta,
// PREPARE_GENOME.out.fai,
// PREPARE_GENOME.out.dict
// ch_fasta,
// ch_fasta_fai,
// ch_dict
// )

// ch_filtered_vcf = GATK4_VARIANTFILTRATION.out.vcf
Expand Down

0 comments on commit 65b3343

Please sign in to comment.