Skip to content

Commit

Permalink
chore: give workflows names and categories
Browse files Browse the repository at this point in the history
  • Loading branch information
a-frantz committed Dec 6, 2024
1 parent ec291a8 commit 254d7e8
Show file tree
Hide file tree
Showing 21 changed files with 52 additions and 20 deletions.
1 change: 1 addition & 0 deletions data_structures/flag_filter.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ task validate_string_is_12bit_oct_dec_or_hex {

workflow validate_flag_filter {
meta {
name: "Validate FlagFilter"
description: "Validates a FlagFilter struct."
outputs: {
check: "Dummy output to enable caching."
Expand Down
2 changes: 1 addition & 1 deletion tools/kraken2.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,7 @@ task kraken {
parameter_meta {
read_one_fastq_gz: "Gzipped FASTQ file with 1st reads in pair"
read_two_fastq_gz: "Gzipped FASTQ file with 2nd reads in pair"
db: "Kraken2 database. Can be generated with `make-qc-reference.wdl`. Must be a tarball without a root directory."
db: "Kraken2 database. Can be generated with `qc-reference.wdl`. Must be a tarball without a root directory."
prefix: "Prefix for the Kraken2 output files. The extensions `.kraken2.txt` and `.kraken2.sequences.txt.gz` will be added."
store_sequences: {
description: "Store and output main Kraken2 output in addition to the summary report?",
Expand Down
2 changes: 2 additions & 0 deletions workflows/chipseq/chipseq-standard.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@ import "https://raw.githubusercontent.com/stjude/seaseq/3.0/workflows/tasks/seas

workflow chipseq_standard {
meta {
name: "ChIP-Seq Standard"
description: "Runs the BWA ChIP-Seq alignment workflow for St. Jude Cloud."
category: "Harmonization"
outputs: {
harmonized_bam: "A harmonized BWA aligned ChIP-Seq BAM file",
bam_checksum: "STDOUT of the `md5sum` command run on the input BAM that has been redirected to a file",
Expand Down
1 change: 1 addition & 0 deletions workflows/dnaseq/dnaseq-core.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import "../general/samtools-merge.wdl" as samtools_merge_wf

workflow dnaseq_core_experimental {
meta {
name: "DNA-Seq Core (Experimental)"
description: "Aligns DNA reads using bwa"
outputs: {
harmonized_bam: "Harmonized DNA-Seq BAM, aligned with bwa",
Expand Down
2 changes: 2 additions & 0 deletions workflows/dnaseq/dnaseq-standard-fastq.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@ import "./dnaseq-core.wdl" as dnaseq_core_wf

workflow dnaseq_standard_fastq_experimental {
meta {
name: "DNA-Seq Standard (FASTQ, Experimental)"
description: "Aligns DNA reads using bwa"
category: "Harmonization"
outputs: {
harmonized_bam: "Harmonized DNA-Seq BAM, aligned with bwa",
harmonized_bam_index: "Index for the harmonized DNA-Seq BAM file",
Expand Down
2 changes: 2 additions & 0 deletions workflows/dnaseq/dnaseq-standard.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@ import "./dnaseq-core.wdl" as dnaseq_core_wf

workflow dnaseq_standard_experimental {
meta {
name: "DNA-Seq Standard (Experimental)"
description: "Aligns DNA reads using bwa"
category: "Harmonization"
outputs: {
harmonized_bam: "Harmonized DNA-Seq BAM, aligned with bwa",
harmonized_bam_index: "Index for the harmonized DNA-Seq BAM file",
Expand Down
1 change: 1 addition & 0 deletions workflows/general/alignment-post.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import "https://raw.githubusercontent.com/stjude/XenoCP/4.0.0-alpha/wdl/workflow

workflow alignment_post {
meta {
name: "Alignment Post"
description: "Runs a series of standard processing tools that should immediately follow alignment, regardless of data-type"
outputs: {
processed_bam: "Input BAM after being transformed by standard processing",
Expand Down
2 changes: 2 additions & 0 deletions workflows/general/bam-to-fastqs.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@ import "../../tools/samtools.wdl"

workflow bam_to_fastqs {
meta {
name: "BAM to FASTQs"
description: "Converts an input BAM file to one or more FASTQ files, performing QC checks along the way"
category: "Utility"
outputs: {
read1s: "Array of FASTQ files corresponding to either `first` reads (if `paired_end = true`) or all reads (if `paired_end = false`)",
read2s: "Array of FASTQ files corresponding to `last` reads (if `paired_end = true`)",
Expand Down
2 changes: 2 additions & 0 deletions workflows/general/samtools-merge.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@ import "../../tools/samtools.wdl"

workflow samtools_merge {
meta{
name: "Samtools Merge"
description: "Runs `samtools merge`, with optional iteration to avoid maximum command line argument length"
category: "Utility"
outputs: {
merged_bam: "The BAM resulting from merging all the input BAMs"
}
Expand Down
1 change: 1 addition & 0 deletions workflows/qc/markdups-post.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import "../../tools/samtools.wdl"

workflow markdups_post {
meta {
name: "Mark Duplicates Post"
description: "Runs QC analyses which are impacted by duplicate marking"
outputs: {
insert_size_metrics: "`*.txt` output file of `picard collectInsertSizeMetrics`",
Expand Down
8 changes: 5 additions & 3 deletions workflows/qc/quality-check-standard.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@ import "./markdups-post.wdl" as markdups_post_wf

workflow quality_check {
meta {
name: "Quality Check Standard"
description: "Performs comprehensive quality checks, aggregating all analyses and metrics into a final MultiQC report."
category: "Harmonization"
help: "Assumes that input BAM is position-sorted."
external_help: "https://multiqc.info/"
outputs: {
Expand Down Expand Up @@ -79,14 +81,14 @@ workflow quality_check {
parameter_meta {
bam: "Input BAM format file to quality check"
bam_index: "BAM index file corresponding to the input BAM"
kraken_db: "Kraken2 database. Can be generated with `../reference/make-qc-reference.wdl`. Must be a tarball without a root directory."
coverage_beds: "An array of 3 column BEDs which are passed to the `-b` flag of mosdepth, in order to restrict coverage analysis to select regions. Any regional analysis enabled by this option is _in addition_ to whole genome coverage, which is calculated regardless of this setting. An exon BED and a Coding Sequence BED are examples of regions you may wish to restrict coverage analysis to. Those two BEDs can be created with the workflow in `../reference/make-qc-reference.wdl`."
kraken_db: "Kraken2 database. Can be generated with `../reference/qc-reference.wdl`. Must be a tarball without a root directory."
coverage_beds: "An array of 3 column BEDs which are passed to the `-b` flag of mosdepth, in order to restrict coverage analysis to select regions. Any regional analysis enabled by this option is _in addition_ to whole genome coverage, which is calculated regardless of this setting. An exon BED and a Coding Sequence BED are examples of regions you may wish to restrict coverage analysis to. Those two BEDs can be created with the workflow in `../reference/qc-reference.wdl`."
gtf: "GTF features file. Gzipped or uncompressed. **Required** for RNA-Seq data."
standard_filter: "Filter to apply to the input BAM while converting to FASTQ, before running Kraken2 and `librarian` (if `run_librarian == true`). This is a `FlagFilter` object (see ../../data_structures/flag_filter.wdl for more information). By default, it will **remove secondary and supplementary reads** from the created FASTQs. **WARNING:** These filters can be tricky to configure; please read documentation thoroughly before changing the defaults. **WARNING:** If you have set `run_librarian` to `true`, we **strongly** recommend leaving this filter at the default value. `librarian` is trained on a specific set of reads, and changing this filter may produce nonsensical results."
comparative_filter: "Filter to apply to the input BAM while performing a second FASTQ conversion, before running Kraken2 another time. This is a `FlagFilter` object (see ../../data_structures/flag_filter.wdl for more information). By default, it will **remove unmapped, secondary, and supplementary reads** from the created FASTQs. **WARNING** These filters can be tricky to configure; please read documentation thoroughly before changing the defaults."
multiqc_config: "YAML file for configuring MultiQC"
extra_multiqc_inputs: "An array of additional files to pass directly into MultiQC"
coverage_labels: "An array of equal length to `coverage_beds` which determines the prefix label applied to the output files. If omitted, defaults of `regions1`, `regions2`, etc. will be used. If using the BEDs created by `../reference/make-qc-reference.wdl`, the labels [\"exon\", \"CDS\"] are appropriate. Make sure to provide the coverage BEDs **in the same order** as the labels."
coverage_labels: "An array of equal length to `coverage_beds` which determines the prefix label applied to the output files. If omitted, defaults of `regions1`, `regions2`, etc. will be used. If using the BEDs created by `../reference/qc-reference.wdl`, the labels [\"exon\", \"CDS\"] are appropriate. Make sure to provide the coverage BEDs **in the same order** as the labels."
prefix: "Prefix for all results files"
rna: "Is the sequenced molecule RNA? Enabling this option adds RNA-Seq specific analyses to the workflow. If `true`, a GTF file must be provided. If `false`, the GTF file is ignored."
mark_duplicates: "Mark duplicates before select analyses? Default behavior is to set this to the value of the `rna` parameter. This is because DNA files are often duplicate marked already, and RNA-Seq files are usually _not_ duplicate marked. If set to `true`, a BAM will be generated and passed to selected downstream analyses. For more details about what analyses are run, review `./markdups-post.wdl`. **WARNING, this duplicate marked BAM is _not_ ouput by default.** If you would like to output this file, set `output_intermediate_files = true`."
Expand Down
2 changes: 2 additions & 0 deletions workflows/reference/bwa-db-build.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@ import "../../tools/util.wdl"

workflow bwa_db_build {
meta {
name: "BWA Database Build"
description: "Generates a set of genome reference files usable by the BWA aligner from an input reference file in FASTA format."
category: "Reference"
outputs: {
reference_fa: "FASTA format reference file used to generate `bwa_db_tar_gz`",
bwa_db_tar_gz: "Gzipped tar archive of the BWA reference files. Files are at the root of the archive.",
Expand Down
2 changes: 2 additions & 0 deletions workflows/reference/gatk-reference.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@ import "../../tools/util.wdl"

workflow gatk_reference {
meta {
name: "GATK Reference"
description: "Fetches reference files for GATK."
category: "Reference"
outputs: {
fasta: "FASTA file for the reference genome.",
fasta_index: "Index for the FASTA file for the reference genome.",
Expand Down
30 changes: 15 additions & 15 deletions workflows/reference/inputs/make-qc-reference-inputs.json
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
{
"make_qc_reference.reference_fa_url": "ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/001/405/GCA_000001405.15_GRCh38/seqs_for_alignment_pipelines.ucsc_ids/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna.gz",
"make_qc_reference.reference_fa_name": "GRCh38_no_alt.fa.gz",
"make_qc_reference.gtf_url": "ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_31/gencode.v31.annotation.gtf.gz",
"make_qc_reference.gtf_name": "gencode.v31.gtf.gz",
"make_qc_reference.protein": false,
"make_qc_reference.kraken_libraries": [
"qc_reference.reference_fa_url": "ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/001/405/GCA_000001405.15_GRCh38/seqs_for_alignment_pipelines.ucsc_ids/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna.gz",
"qc_reference.reference_fa_name": "GRCh38_no_alt.fa.gz",
"qc_reference.gtf_url": "ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_31/gencode.v31.annotation.gtf.gz",
"qc_reference.gtf_name": "gencode.v31.gtf.gz",
"qc_reference.protein": false,
"qc_reference.kraken_libraries": [
"archaea",
"bacteria",
"plasmid",
Expand All @@ -14,19 +14,19 @@
"protozoa",
"UniVec_Core"
],
"make_qc_reference.kraken_fasta_urls": [
"qc_reference.kraken_fasta_urls": [
"https://ftp.ncbi.nlm.nih.gov/genomes/refseq/vertebrate_mammalian/Mus_musculus/reference/GCF_000001635.27_GRCm39/GCF_000001635.27_GRCm39_genomic.fna.gz",
"https://ftp.ncbi.nlm.nih.gov/genomes/refseq/plant/Arabidopsis_thaliana/reference/GCF_000001735.4_TAIR10.1/GCF_000001735.4_TAIR10.1_genomic.fna.gz",
"https://ftp.ncbi.nlm.nih.gov/genomes/refseq/invertebrate/Drosophila_melanogaster/reference/GCF_000001215.4_Release_6_plus_ISO1_MT/GCF_000001215.4_Release_6_plus_ISO1_MT_genomic.fna.gz",
"https://ftp.ncbi.nlm.nih.gov/genomes/refseq/invertebrate/Caenorhabditis_elegans/reference/GCF_000002985.6_WBcel235/GCF_000002985.6_WBcel235_genomic.fna.gz",
"https://ftp.ncbi.nlm.nih.gov/genomes/refseq/vertebrate_other/Danio_rerio/reference/GCF_000002035.6_GRCz11/GCF_000002035.6_GRCz11_genomic.fna.gz"
],
"make_qc_reference.kraken_fastas": [],
"make_qc_reference.kraken_build_db.db_name": "custom_kraken2_db",
"make_qc_reference.kraken_build_db.kmer_len": 35,
"make_qc_reference.kraken_build_db.minimizer_len": 31,
"make_qc_reference.kraken_build_db.minimizer_spaces": 7,
"make_qc_reference.kraken_build_db.max_db_size_gb": -1,
"make_qc_reference.kraken_build_db.ncpu": 8,
"make_qc_reference.kraken_build_db.use_all_cores": false
"qc_reference.kraken_fastas": [],
"qc_reference.kraken_build_db.db_name": "custom_kraken2_db",
"qc_reference.kraken_build_db.kmer_len": 35,
"qc_reference.kraken_build_db.minimizer_len": 31,
"qc_reference.kraken_build_db.minimizer_spaces": 7,
"qc_reference.kraken_build_db.max_db_size_gb": -1,
"qc_reference.kraken_build_db.ncpu": 8,
"qc_reference.kraken_build_db.use_all_cores": false
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,12 @@ version 1.1
import "../../tools/kraken2.wdl"
import "../../tools/util.wdl"

workflow make_qc_reference {
workflow qc_reference {
meta {
name: "Quality Check Reference"
description: "Downloads and creates all reference files needed to run the `quality_check` workflow"
warning: "See `kraken2.download_library.meta.warning` for information regarding common failures."
category: "Reference"
outputs: {
reference_fa: "FASTA format reference file",
gtf: "GTF feature file",
Expand Down
2 changes: 2 additions & 0 deletions workflows/reference/star-db-build.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@ import "../../tools/util.wdl"

workflow star_db_build {
meta {
name: "STAR Database Build"
description: "Builds a database suitable for running the STAR alignment program"
category: "Reference"
outputs: {
reference_fa: "FASTA format reference file",
gtf: "GTF feature file",
Expand Down
1 change: 1 addition & 0 deletions workflows/rnaseq/ESTIMATE.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import "../../tools/htseq.wdl"

workflow estimate {
meta {
name: "ESTIMATE"
description: "**[DEPRECATED]** Runs the ESTIMATE software package on a feature counts file"
external_help: "https://bioinformatics.mdanderson.org/estimate/"
outputs: {
Expand Down
1 change: 1 addition & 0 deletions workflows/rnaseq/rnaseq-core.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import "../general/alignment-post.wdl" as alignment_post_wf

workflow rnaseq_core {
meta {
name: "RNA-Seq Core"
description: "Main processing of RNA-Seq data, starting with FASTQs. We recommend against calling this workflow directly, and would suggest instead running `rnaseq_standard` or `rnaseq_standard_fastq`. Both wrapper workflows provide a nicer user experience than this workflow and will get you equivalent results."
outputs: {
bam: "Harmonized RNA-Seq BAM",
Expand Down
2 changes: 2 additions & 0 deletions workflows/rnaseq/rnaseq-standard-fastq.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@ import "./rnaseq-standard.wdl" as rnaseq_standard

workflow rnaseq_standard_fastq {
meta {
name: "RNA-Seq Standard (FASTQ)"
description: "Runs the STAR RNA-Seq alignment workflow for St. Jude Cloud from FASTQ input"
category: "Harmonization"
outputs: {
bam: "Harmonized RNA-Seq BAM",
bam_index: "BAI index file associated with `bam`",
Expand Down
2 changes: 2 additions & 0 deletions workflows/rnaseq/rnaseq-standard.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@ import "./rnaseq-core.wdl" as rnaseq_core_wf

workflow rnaseq_standard {
meta {
name: "RNA-Seq Standard"
description: "Runs the STAR RNA-Seq alignment workflow for St. Jude Cloud"
category: "Harmonization"
outputs: {
harmonized_bam: "Harmonized RNA-Seq BAM",
bam_index: "BAI index file associated with `bam`",
Expand Down
2 changes: 2 additions & 0 deletions workflows/rnaseq/rnaseq-variant-calling.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@ import "../../tools/picard.wdl"

workflow rnaseq_variant_calling {
meta {
name: "RNA-Seq Variant Calling"
description: "Call short germline variants from RNA-Seq data. Produces a VCF file of variants. Based on GATK RNA-Seq short variant calling best practices pipeline."
category: "Variant Calling"
outputs: {
recalibrated_bam: "BAM that has undergone recalibration of base quality scores",
recalibrated_bam_index: "Index file for recalibrated BAM file",
Expand Down

0 comments on commit 254d7e8

Please sign in to comment.