Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Chipseq updates #111

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 34 additions & 13 deletions workflows/chipseq/chipseq-standard.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@ import "../../tools/samtools.wdl"
import "../../tools/util.wdl"
import "../general/bam-to-fastqs.wdl" as b2fq
#@ except: LineWidth
import "https://raw.githubusercontent.com/stjude/seaseq/2.3/workflows/workflows/mapping.wdl" as seaseq_map
import "https://raw.githubusercontent.com/stjude/seaseq/3.1/workflows/tasks/samtools.wdl" as seaseq_samtools
#@ except: LineWidth
import "https://raw.githubusercontent.com/stjude/seaseq/3.0/workflows/tasks/samtools.wdl" as seaseq_samtools
import "https://raw.githubusercontent.com/stjude/seaseq/3.1/workflows/tasks/seaseq_util.wdl" as seaseq_util
#@ except: LineWidth
import "https://raw.githubusercontent.com/stjude/seaseq/3.0/workflows/tasks/seaseq_util.wdl" as seaseq_util
import "https://raw.githubusercontent.com/stjude/seaseq/3.1/workflows/workflows/mapping.wdl" as seaseq_map

workflow chipseq_standard {
meta {
Expand All @@ -31,6 +31,7 @@ workflow chipseq_standard {
parameter_meta {
bam: "Input BAM format file to realign with bowtie"
bowtie_indexes: "Database of v1 reference files for the bowtie aligner. Can be generated with https://github.com/stjude/seaseq/blob/master/workflows/tasks/bowtie.wdl. [*.ebwt]"
paired_end: "Is the data paired-end (true) or single-end (false)?"
excludelist: "Optional list of regions that will be excluded after reference alignment"
prefix: "Prefix for output files"
validate_input: "Run Picard ValidateSamFile on the input BAM"
Expand All @@ -43,6 +44,7 @@ workflow chipseq_standard {
Array[File] bowtie_indexes
File? excludelist
String prefix = basename(bam, ".bam")
Boolean paired_end = false
Boolean validate_input = true
Boolean use_all_cores = false
Int subsample_n_reads = -1
Expand Down Expand Up @@ -84,28 +86,36 @@ workflow chipseq_standard {
bam_index = samtools_index_input.bam_index,
}

scatter (pair in zip(bam_to_fastqs.read1s, get_read_groups.read_groups)){
scatter (tuple in zip(
zip(
bam_to_fastqs.read1s,
bam_to_fastqs.read2s
),
get_read_groups.read_groups)){
call seaseq_util.basicfastqstats as basic_stats { input:
fastqfile = pair.left
fastqfile = tuple.left.left
}
call seaseq_map.mapping as bowtie_single_end_mapping { input:
fastqfile = pair.left,
#@ except: LineWidth
call seaseq_map.mapping as bowtie_mapping { input:
fastqfile = tuple.left.left, # the FASTQ pair is the left of the first pair, then it is R1 = left, R2 = right in the nested pair
fastqfile_R2 = tuple.left.right,
index_files = bowtie_indexes,
metricsfile = basic_stats.metrics_out,
blacklist = excludelist,
paired_end,
}
File chosen_bam = select_first(
[
bowtie_single_end_mapping.bklist_bam,
bowtie_single_end_mapping.mkdup_bam,
bowtie_single_end_mapping.sorted_bam,
bowtie_mapping.bklist_bam,
bowtie_mapping.mkdup_bam,
bowtie_mapping.sorted_bam
]
)
call util.add_to_bam_header { input:
bam = chosen_bam,
additional_header = pair.right,
additional_header = tuple.right,
}
String rg_id_field = sub(sub(pair.right, ".*ID:", "ID:"), "\t.*", "")
String rg_id_field = sub(sub(tuple.right, ".*ID:", "ID:"), "\t.*", "")
String rg_id = sub(rg_id_field, "ID:", "")
call samtools.addreplacerg as single_end { input:
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Rename single_end to something else

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

poke

bam = add_to_bam_header.reheadered_bam,
Expand Down Expand Up @@ -133,8 +143,19 @@ workflow chipseq_standard {
bam = markdup.mkdupbam,
use_all_cores,
}

#@ except: UnusedCall
call picard.validate_bam { input: bam = markdup.mkdupbam }
call picard.validate_bam { input:
bam = markdup.mkdupbam,
ignore_list = [
"MISSING_PLATFORM_VALUE",
"INVALID_PLATFORM_VALUE",
"INVALID_MAPPING_QUALITY",
"MATES_ARE_SAME_END",
"MISMATCH_FLAG_MATE_NEG_STRAND",
"MISMATCH_MATE_ALIGNMENT_START"
Comment on lines +151 to +156
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The first two seem "ok" to ignore. Less comfortable ignoring the last four. Are all these still being produced by seaseq?

],
}

call md5sum.compute_checksum { input:
file = markdup.mkdupbam,
Expand Down
1 change: 1 addition & 0 deletions workflows/general/bam-to-fastqs.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ workflow bam_to_fastqs {
bam,
use_all_cores,
}

scatter (split_bam in split.split_bams) {
call samtools.bam_to_fastq { input:
bam = split_bam,
Expand Down