chore: give workflows names and categories

stjudecloud · Dec 6, 2024 · 254d7e8 · 254d7e8
1 parent ec291a8
commit 254d7e8
Show file tree

Hide file tree

Showing 21 changed files with 52 additions and 20 deletions.
diff --git a/data_structures/flag_filter.wdl b/data_structures/flag_filter.wdl
@@ -125,6 +125,7 @@ task validate_string_is_12bit_oct_dec_or_hex {
 
 workflow validate_flag_filter {
     meta {
+        name: "Validate FlagFilter"
         description: "Validates a FlagFilter struct."
         outputs: {
             check: "Dummy output to enable caching."

diff --git a/tools/kraken2.wdl b/tools/kraken2.wdl
@@ -307,7 +307,7 @@ task kraken {
     parameter_meta {
         read_one_fastq_gz: "Gzipped FASTQ file with 1st reads in pair"
         read_two_fastq_gz: "Gzipped FASTQ file with 2nd reads in pair"
-        db: "Kraken2 database. Can be generated with `make-qc-reference.wdl`. Must be a tarball without a root directory."
+        db: "Kraken2 database. Can be generated with `qc-reference.wdl`. Must be a tarball without a root directory."
         prefix: "Prefix for the Kraken2 output files. The extensions `.kraken2.txt` and `.kraken2.sequences.txt.gz` will be added."
         store_sequences: {
             description: "Store and output main Kraken2 output in addition to the summary report?",

diff --git a/workflows/chipseq/chipseq-standard.wdl b/workflows/chipseq/chipseq-standard.wdl
@@ -16,7 +16,9 @@ import "https://raw.githubusercontent.com/stjude/seaseq/3.0/workflows/tasks/seas
 
 workflow chipseq_standard {
     meta {
+        name: "ChIP-Seq Standard"
         description: "Runs the BWA ChIP-Seq alignment workflow for St. Jude Cloud."
+        category: "Harmonization"
         outputs: {
             harmonized_bam: "A harmonized BWA aligned ChIP-Seq BAM file",
             bam_checksum: "STDOUT of the `md5sum` command run on the input BAM that has been redirected to a file",

diff --git a/workflows/dnaseq/dnaseq-core.wdl b/workflows/dnaseq/dnaseq-core.wdl
@@ -11,6 +11,7 @@ import "../general/samtools-merge.wdl" as samtools_merge_wf
 
 workflow dnaseq_core_experimental {
     meta {
+        name: "DNA-Seq Core (Experimental)"
         description: "Aligns DNA reads using bwa"
         outputs: {
             harmonized_bam: "Harmonized DNA-Seq BAM, aligned with bwa",

diff --git a/workflows/dnaseq/dnaseq-standard-fastq.wdl b/workflows/dnaseq/dnaseq-standard-fastq.wdl
@@ -8,7 +8,9 @@ import "./dnaseq-core.wdl" as dnaseq_core_wf
 
 workflow dnaseq_standard_fastq_experimental {
     meta {
+        name: "DNA-Seq Standard (FASTQ, Experimental)"
         description: "Aligns DNA reads using bwa"
+        category: "Harmonization"
         outputs: {
             harmonized_bam: "Harmonized DNA-Seq BAM, aligned with bwa",
             harmonized_bam_index: "Index for the harmonized DNA-Seq BAM file",

diff --git a/workflows/dnaseq/dnaseq-standard.wdl b/workflows/dnaseq/dnaseq-standard.wdl
@@ -10,7 +10,9 @@ import "./dnaseq-core.wdl" as dnaseq_core_wf
 
 workflow dnaseq_standard_experimental {
     meta {
+        name: "DNA-Seq Standard (Experimental)"
         description: "Aligns DNA reads using bwa"
+        category: "Harmonization"
         outputs: {
             harmonized_bam: "Harmonized DNA-Seq BAM, aligned with bwa",
             harmonized_bam_index: "Index for the harmonized DNA-Seq BAM file",

diff --git a/workflows/general/alignment-post.wdl b/workflows/general/alignment-post.wdl
@@ -8,6 +8,7 @@ import "https://raw.githubusercontent.com/stjude/XenoCP/4.0.0-alpha/wdl/workflow
 
 workflow alignment_post {
     meta {
+        name: "Alignment Post"
         description: "Runs a series of standard processing tools that should immediately follow alignment, regardless of data-type"
         outputs: {
             processed_bam: "Input BAM after being transformed by standard processing",

diff --git a/workflows/general/bam-to-fastqs.wdl b/workflows/general/bam-to-fastqs.wdl
@@ -5,7 +5,9 @@ import "../../tools/samtools.wdl"
 
 workflow bam_to_fastqs {
     meta {
+        name: "BAM to FASTQs"
         description: "Converts an input BAM file to one or more FASTQ files, performing QC checks along the way"
+        category: "Utility"
         outputs: {
             read1s: "Array of FASTQ files corresponding to either `first` reads (if `paired_end = true`) or all reads (if `paired_end = false`)",
             read2s: "Array of FASTQ files corresponding to `last` reads (if `paired_end = true`)",

diff --git a/workflows/general/samtools-merge.wdl b/workflows/general/samtools-merge.wdl
@@ -6,7 +6,9 @@ import "../../tools/samtools.wdl"
 
 workflow samtools_merge {
     meta{
+        name: "Samtools Merge"
         description: "Runs `samtools merge`, with optional iteration to avoid maximum command line argument length"
+        category: "Utility"
         outputs: {
             merged_bam: "The BAM resulting from merging all the input BAMs"
         }

diff --git a/workflows/qc/markdups-post.wdl b/workflows/qc/markdups-post.wdl
@@ -14,6 +14,7 @@ import "../../tools/samtools.wdl"
 
 workflow markdups_post {
     meta {
+        name: "Mark Duplicates Post"
         description: "Runs QC analyses which are impacted by duplicate marking"
         outputs: {
             insert_size_metrics: "`*.txt` output file of `picard collectInsertSizeMetrics`",

diff --git a/workflows/qc/quality-check-standard.wdl b/workflows/qc/quality-check-standard.wdl
@@ -17,7 +17,9 @@ import "./markdups-post.wdl" as markdups_post_wf
 
 workflow quality_check {
     meta {
+        name: "Quality Check Standard"
         description: "Performs comprehensive quality checks, aggregating all analyses and metrics into a final MultiQC report."
+        category: "Harmonization"
         help: "Assumes that input BAM is position-sorted."
         external_help: "https://multiqc.info/"
         outputs: {
@@ -79,14 +81,14 @@ workflow quality_check {
     parameter_meta {
         bam: "Input BAM format file to quality check"
         bam_index: "BAM index file corresponding to the input BAM"
-        kraken_db: "Kraken2 database. Can be generated with `../reference/make-qc-reference.wdl`. Must be a tarball without a root directory."
-        coverage_beds: "An array of 3 column BEDs which are passed to the `-b` flag of mosdepth, in order to restrict coverage analysis to select regions. Any regional analysis enabled by this option is _in addition_ to whole genome coverage, which is calculated regardless of this setting. An exon BED and a Coding Sequence BED are examples of regions you may wish to restrict coverage analysis to. Those two BEDs can be created with the workflow in `../reference/make-qc-reference.wdl`."
+        kraken_db: "Kraken2 database. Can be generated with `../reference/qc-reference.wdl`. Must be a tarball without a root directory."
+        coverage_beds: "An array of 3 column BEDs which are passed to the `-b` flag of mosdepth, in order to restrict coverage analysis to select regions. Any regional analysis enabled by this option is _in addition_ to whole genome coverage, which is calculated regardless of this setting. An exon BED and a Coding Sequence BED are examples of regions you may wish to restrict coverage analysis to. Those two BEDs can be created with the workflow in `../reference/qc-reference.wdl`."
         gtf: "GTF features file. Gzipped or uncompressed. **Required** for RNA-Seq data."
         standard_filter: "Filter to apply to the input BAM while converting to FASTQ, before running Kraken2 and `librarian` (if `run_librarian == true`). This is a `FlagFilter` object (see ../../data_structures/flag_filter.wdl for more information). By default, it will **remove secondary and supplementary reads** from the created FASTQs. **WARNING:** These filters can be tricky to configure; please read documentation thoroughly before changing the defaults. **WARNING:** If you have set `run_librarian` to `true`, we **strongly** recommend leaving this filter at the default value. `librarian` is trained on a specific set of reads, and changing this filter may produce nonsensical results."
         comparative_filter: "Filter to apply to the input BAM while performing a second FASTQ conversion, before running Kraken2 another time. This is a `FlagFilter` object (see ../../data_structures/flag_filter.wdl for more information). By default, it will **remove unmapped, secondary, and supplementary reads** from the created FASTQs. **WARNING** These filters can be tricky to configure; please read documentation thoroughly before changing the defaults."
         multiqc_config: "YAML file for configuring MultiQC"
         extra_multiqc_inputs: "An array of additional files to pass directly into MultiQC"
-        coverage_labels: "An array of equal length to `coverage_beds` which determines the prefix label applied to the output files. If omitted, defaults of `regions1`, `regions2`, etc. will be used. If using the BEDs created by `../reference/make-qc-reference.wdl`, the labels [\"exon\", \"CDS\"] are appropriate. Make sure to provide the coverage BEDs **in the same order** as the labels."
+        coverage_labels: "An array of equal length to `coverage_beds` which determines the prefix label applied to the output files. If omitted, defaults of `regions1`, `regions2`, etc. will be used. If using the BEDs created by `../reference/qc-reference.wdl`, the labels [\"exon\", \"CDS\"] are appropriate. Make sure to provide the coverage BEDs **in the same order** as the labels."
         prefix: "Prefix for all results files"
         rna: "Is the sequenced molecule RNA? Enabling this option adds RNA-Seq specific analyses to the workflow. If `true`, a GTF file must be provided. If `false`, the GTF file is ignored."
         mark_duplicates: "Mark duplicates before select analyses? Default behavior is to set this to the value of the `rna` parameter. This is because DNA files are often duplicate marked already, and RNA-Seq files are usually _not_ duplicate marked. If set to `true`, a BAM will be generated and passed to selected downstream analyses. For more details about what analyses are run, review `./markdups-post.wdl`. **WARNING, this duplicate marked BAM is _not_ ouput by default.** If you would like to output this file, set `output_intermediate_files = true`."

diff --git a/workflows/reference/bwa-db-build.wdl b/workflows/reference/bwa-db-build.wdl
@@ -5,7 +5,9 @@ import "../../tools/util.wdl"
 
 workflow bwa_db_build {
     meta {
+        name: "BWA Database Build"
         description: "Generates a set of genome reference files usable by the BWA aligner from an input reference file in FASTA format."
+        category: "Reference"
         outputs: {
             reference_fa: "FASTA format reference file used to generate `bwa_db_tar_gz`",
             bwa_db_tar_gz: "Gzipped tar archive of the BWA reference files. Files are at the root of the archive.",

diff --git a/workflows/reference/gatk-reference.wdl b/workflows/reference/gatk-reference.wdl
@@ -6,7 +6,9 @@ import "../../tools/util.wdl"
 
 workflow gatk_reference {
     meta {
+        name: "GATK Reference"
         description: "Fetches reference files for GATK."
+        category: "Reference"
         outputs: {
             fasta: "FASTA file for the reference genome.",
             fasta_index: "Index for the FASTA file for the reference genome.",

diff --git a/workflows/reference/inputs/make-qc-reference-inputs.json b/workflows/reference/inputs/make-qc-reference-inputs.json
@@ -1,10 +1,10 @@
 {
-    "make_qc_reference.reference_fa_url": "ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/001/405/GCA_000001405.15_GRCh38/seqs_for_alignment_pipelines.ucsc_ids/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna.gz",
-    "make_qc_reference.reference_fa_name": "GRCh38_no_alt.fa.gz",
-    "make_qc_reference.gtf_url": "ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_31/gencode.v31.annotation.gtf.gz",
-    "make_qc_reference.gtf_name": "gencode.v31.gtf.gz",
-    "make_qc_reference.protein": false,
-    "make_qc_reference.kraken_libraries": [
+    "qc_reference.reference_fa_url": "ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/001/405/GCA_000001405.15_GRCh38/seqs_for_alignment_pipelines.ucsc_ids/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna.gz",
+    "qc_reference.reference_fa_name": "GRCh38_no_alt.fa.gz",
+    "qc_reference.gtf_url": "ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_31/gencode.v31.annotation.gtf.gz",
+    "qc_reference.gtf_name": "gencode.v31.gtf.gz",
+    "qc_reference.protein": false,
+    "qc_reference.kraken_libraries": [
         "archaea",
         "bacteria",
         "plasmid",
@@ -14,19 +14,19 @@
         "protozoa",
         "UniVec_Core"
     ],
-    "make_qc_reference.kraken_fasta_urls": [
+    "qc_reference.kraken_fasta_urls": [
         "https://ftp.ncbi.nlm.nih.gov/genomes/refseq/vertebrate_mammalian/Mus_musculus/reference/GCF_000001635.27_GRCm39/GCF_000001635.27_GRCm39_genomic.fna.gz",
         "https://ftp.ncbi.nlm.nih.gov/genomes/refseq/plant/Arabidopsis_thaliana/reference/GCF_000001735.4_TAIR10.1/GCF_000001735.4_TAIR10.1_genomic.fna.gz",
         "https://ftp.ncbi.nlm.nih.gov/genomes/refseq/invertebrate/Drosophila_melanogaster/reference/GCF_000001215.4_Release_6_plus_ISO1_MT/GCF_000001215.4_Release_6_plus_ISO1_MT_genomic.fna.gz",
         "https://ftp.ncbi.nlm.nih.gov/genomes/refseq/invertebrate/Caenorhabditis_elegans/reference/GCF_000002985.6_WBcel235/GCF_000002985.6_WBcel235_genomic.fna.gz",
         "https://ftp.ncbi.nlm.nih.gov/genomes/refseq/vertebrate_other/Danio_rerio/reference/GCF_000002035.6_GRCz11/GCF_000002035.6_GRCz11_genomic.fna.gz"
     ],
-    "make_qc_reference.kraken_fastas": [],
-    "make_qc_reference.kraken_build_db.db_name": "custom_kraken2_db",
-    "make_qc_reference.kraken_build_db.kmer_len": 35,
-    "make_qc_reference.kraken_build_db.minimizer_len": 31,
-    "make_qc_reference.kraken_build_db.minimizer_spaces": 7,
-    "make_qc_reference.kraken_build_db.max_db_size_gb": -1,
-    "make_qc_reference.kraken_build_db.ncpu": 8,
-    "make_qc_reference.kraken_build_db.use_all_cores": false
+    "qc_reference.kraken_fastas": [],
+    "qc_reference.kraken_build_db.db_name": "custom_kraken2_db",
+    "qc_reference.kraken_build_db.kmer_len": 35,
+    "qc_reference.kraken_build_db.minimizer_len": 31,
+    "qc_reference.kraken_build_db.minimizer_spaces": 7,
+    "qc_reference.kraken_build_db.max_db_size_gb": -1,
+    "qc_reference.kraken_build_db.ncpu": 8,
+    "qc_reference.kraken_build_db.use_all_cores": false
 }
diff --git a/workflows/reference/make-qc-reference.wdl → workflows/reference/qc-reference.wdl b/workflows/reference/make-qc-reference.wdl → workflows/reference/qc-reference.wdl
@@ -3,10 +3,12 @@ version 1.1
 import "../../tools/kraken2.wdl"
 import "../../tools/util.wdl"
 
-workflow make_qc_reference {
+workflow qc_reference {
     meta {
+        name: "Quality Check Reference"
         description: "Downloads and creates all reference files needed to run the `quality_check` workflow"
         warning: "See `kraken2.download_library.meta.warning` for information regarding common failures."
+        category: "Reference"
         outputs: {
             reference_fa: "FASTA format reference file",
             gtf: "GTF feature file",

diff --git a/workflows/reference/star-db-build.wdl b/workflows/reference/star-db-build.wdl
@@ -5,7 +5,9 @@ import "../../tools/util.wdl"
 
 workflow star_db_build {
     meta {
+        name: "STAR Database Build"
         description: "Builds a database suitable for running the STAR alignment program"
+        category: "Reference"
         outputs: {
             reference_fa: "FASTA format reference file",
             gtf: "GTF feature file",

diff --git a/workflows/rnaseq/ESTIMATE.wdl b/workflows/rnaseq/ESTIMATE.wdl
@@ -5,6 +5,7 @@ import "../../tools/htseq.wdl"
 
 workflow estimate {
     meta {
+        name: "ESTIMATE"
         description: "**[DEPRECATED]** Runs the ESTIMATE software package on a feature counts file"
         external_help: "https://bioinformatics.mdanderson.org/estimate/"
         outputs: {

diff --git a/workflows/rnaseq/rnaseq-core.wdl b/workflows/rnaseq/rnaseq-core.wdl
@@ -8,6 +8,7 @@ import "../general/alignment-post.wdl" as alignment_post_wf
 
 workflow rnaseq_core {
     meta {
+        name: "RNA-Seq Core"
         description: "Main processing of RNA-Seq data, starting with FASTQs. We recommend against calling this workflow directly, and would suggest instead running `rnaseq_standard` or `rnaseq_standard_fastq`. Both wrapper workflows provide a nicer user experience than this workflow and will get you equivalent results."
         outputs: {
             bam: "Harmonized RNA-Seq BAM",

diff --git a/workflows/rnaseq/rnaseq-standard-fastq.wdl b/workflows/rnaseq/rnaseq-standard-fastq.wdl
@@ -24,7 +24,9 @@ import "./rnaseq-standard.wdl" as rnaseq_standard
 
 workflow rnaseq_standard_fastq {
     meta {
+        name: "RNA-Seq Standard (FASTQ)"
         description: "Runs the STAR RNA-Seq alignment workflow for St. Jude Cloud from FASTQ input"
+        category: "Harmonization"
         outputs: {
             bam: "Harmonized RNA-Seq BAM",
             bam_index: "BAI index file associated with `bam`",

diff --git a/workflows/rnaseq/rnaseq-standard.wdl b/workflows/rnaseq/rnaseq-standard.wdl
@@ -8,7 +8,9 @@ import "./rnaseq-core.wdl" as rnaseq_core_wf
 
 workflow rnaseq_standard {
     meta {
+        name: "RNA-Seq Standard"
         description: "Runs the STAR RNA-Seq alignment workflow for St. Jude Cloud"
+        category: "Harmonization"
         outputs: {
             harmonized_bam: "Harmonized RNA-Seq BAM",
             bam_index: "BAI index file associated with `bam`",

diff --git a/workflows/rnaseq/rnaseq-variant-calling.wdl b/workflows/rnaseq/rnaseq-variant-calling.wdl
@@ -5,7 +5,9 @@ import "../../tools/picard.wdl"
 
 workflow rnaseq_variant_calling {
     meta {
+        name: "RNA-Seq Variant Calling"
         description: "Call short germline variants from RNA-Seq data. Produces a VCF file of variants. Based on GATK RNA-Seq short variant calling best practices pipeline."
+        category: "Variant Calling"
         outputs: {
             recalibrated_bam: "BAM that has undergone recalibration of base quality scores",
             recalibrated_bam_index: "Index file for recalibrated BAM file",