From e68cf386434b764d59fc4e628176436c397e5fb6 Mon Sep 17 00:00:00 2001 From: Andrew Frantz Date: Fri, 14 Feb 2025 14:51:14 -0500 Subject: [PATCH 1/3] Update star.wdl --- tools/star.wdl | 40 +++++++++++++++++++++++++++++++--------- 1 file changed, 31 insertions(+), 9 deletions(-) diff --git a/tools/star.wdl b/tools/star.wdl index fb874e2e..8b025484 100755 --- a/tools/star.wdl +++ b/tools/star.wdl @@ -152,17 +152,36 @@ task alignment { description: "An array of `String`s where each `String` corresponds to one read group.", help: "Each read group string should start with the `ID` field followed by any other read group fields, where fields are delimited by a space. See `../data_structures/read_group.wdl` for information about possible fields and utility tasks for constructing, validating, and \"stringifying\" read groups.", warning: "The `ID` field for each read group _must_ be contained in the basename of a FASTQ file or pair of FASTQ files if Paired-End. Example: `[\"ID:rg1 PU:flowcell1.lane1 SM:sample1 PL:illumina LB:sample1_lib1\", \"ID:rg2 PU:flowcell1.lane2 SM:sample1 PL:illumina LB:sample1_lib1\"]`. These two read groups could be associated with the following four FASTQs: `[\"sample1.rg1.R1.fastq\", \"sample1.rg2.R1.fastq\"]` and `[\"sample1.rg1.R2.fastq\", \"sample1.rg2.R2.fastq\"]`", + group: "common", } read_two_fastqs_gz: { description: "An array of gzipped FASTQ files containing read two information", group: "common", } - out_sj_filter_intron_max_vs_read_n: "maximum gap allowed for junctions supported by 1,2,3,,,N reads. i.e. by default junctions supported by 1 read can have gaps <=50000b, by 2 reads: <=100000b, by 3 reads: <=200000b. by >=4 reads any gap <=alignIntronMax. Does not apply to annotated junctions." - out_sj_filter_overhang_min: "minimum overhang length for splice junctions on both sides for: (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif. Does not apply to annotated junctions." - out_sj_filter_count_unique_min: "minimum uniquely mapping read count per junction for: (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif. Junctions are output if one of outSJfilterCountUniqueMin *OR* outSJfilterCountTotalMin conditions are satisfied. Does not apply to annotated junctions." - out_sj_filter_count_total_min: "minimum total (multi-mapping+unique) read count per junction for: (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif. Junctions are output if one of outSJfilterCountUniqueMin *OR* outSJfilterCountTotalMin conditions are satisfied. Does not apply to annotated junctions." - out_sj_filter_dist_to_other_sj_min: "minimum allowed distance to other junctions' donor/acceptor for: (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif. Does not apply to annotated junctions." - align_sj_stitch_mismatch_n_max: "maximum number of mismatches for stitching of the splice junctions (-1: no limit) for: (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif" + out_sj_filter_intron_max_vs_read_n: { + description: "maximum gap allowed for junctions supported by 1,2,3,,,N reads. i.e. by default junctions supported by 1 read can have gaps <=50000b, by 2 reads: <=100000b, by 3 reads: <=200000b. by >=4 reads any gap <=alignIntronMax. Does not apply to annotated junctions.", + group: "Splice Junctions", + } + out_sj_filter_overhang_min: { + description: "minimum overhang length for splice junctions on both sides for: (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif. Does not apply to annotated junctions.", + group: "Splice Junctions", + } + out_sj_filter_count_unique_min: { + description: "minimum uniquely mapping read count per junction for: (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif. Junctions are output if one of outSJfilterCountUniqueMin *OR* outSJfilterCountTotalMin conditions are satisfied. Does not apply to annotated junctions.", + group: "Splice Junctions", + } + out_sj_filter_count_total_min: { + description: "minimum total (multi-mapping+unique) read count per junction for: (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif. Junctions are output if one of outSJfilterCountUniqueMin *OR* outSJfilterCountTotalMin conditions are satisfied. Does not apply to annotated junctions.", + group: "Splice Junctions", + } + out_sj_filter_dist_to_other_sj_min: { + description: "minimum allowed distance to other junctions' donor/acceptor for: (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif. Does not apply to annotated junctions.", + group: "Splice Junctions", + } + align_sj_stitch_mismatch_n_max: { + description: "maximum number of mismatches for stitching of the splice junctions (-1: no limit) for: (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif", + group: "Splice Junctions", + } clip_3p_adapter_seq: { description: "adapter sequences to clip from 3p of each mate. `left` applies to read one and `right` applies to read two.", choices: { @@ -339,7 +358,7 @@ task alignment { } use_all_cores: { description: "Use all cores? Recommended for cloud environments.", - group: "common", + group: "Resources", } out_filter_mismatch_n_over_l_max: "alignment will be output only if its ratio of mismatches to *mapped* length is less than or equal to this value" out_filter_mismatch_n_over_read_l_max: "alignment will be output only if its ratio of mismatches to *read* length is less than or equal to this value" @@ -472,9 +491,12 @@ task alignment { } ncpu: { description: "Number of cores to allocate for task", - group: "common", + group: "Resources", + } + modify_disk_size_gb: { + description: "Add to or subtract from dynamic disk space allocation. Default disk size is determined by the size of the inputs. Specified in GB.", + group: "Resources", } - modify_disk_size_gb: "Add to or subtract from dynamic disk space allocation. Default disk size is determined by the size of the inputs. Specified in GB." } input { From 1965882153a16af845533185c86e3e6d9181e3f2 Mon Sep 17 00:00:00 2001 From: Andrew Frantz Date: Fri, 14 Feb 2025 14:54:39 -0500 Subject: [PATCH 2/3] revise: Capitalize group: Common --- tools/bwa.wdl | 20 +++---- tools/deeptools.wdl | 4 +- tools/fastqc.wdl | 4 +- tools/fq.wdl | 6 +- tools/gatk4.wdl | 2 +- tools/htseq.wdl | 16 +++--- tools/kraken2.wdl | 12 ++-- tools/mosdepth.wdl | 2 +- tools/ngsderive.wdl | 36 ++++++------ tools/picard.wdl | 16 +++--- tools/qualimap.wdl | 4 +- tools/sambamba.wdl | 16 +++--- tools/samtools.wdl | 94 ++++++++++++++++---------------- tools/star.wdl | 78 +++++++++++++------------- tools/util.wdl | 6 +- workflows/rnaseq/rnaseq-core.wdl | 10 ++-- 16 files changed, 163 insertions(+), 163 deletions(-) diff --git a/tools/bwa.wdl b/tools/bwa.wdl index a9cdefc0..729dccab 100644 --- a/tools/bwa.wdl +++ b/tools/bwa.wdl @@ -16,15 +16,15 @@ task bwa_aln { prefix: "Prefix for the BAM file. The extension `.bam` will be added." read_group: { description: "Read group information for BWA to insert into the header. BWA format: '@RG\tID:foo\tSM:bar'", - group: "common", + group: "Common", } use_all_cores: { description: "Use all cores? Recommended for cloud environments.", - group: "common", + group: "Common", } ncpu: { description: "Number of cores to allocate for task", - group: "common", + group: "Common", } modify_disk_size_gb: "Add to or subtract from dynamic disk space allocation. Default disk size is determined by the size of the inputs. Specified in GB." } @@ -112,15 +112,15 @@ task bwa_aln_pe { prefix: "Prefix for the BAM file. The extension `.bam` will be added." read_group: { description: "Read group information for BWA to insert into the header. BWA format: '@RG\tID:foo\tSM:bar'", - group: "common", + group: "Common", } use_all_cores: { description: "Use all cores? Recommended for cloud environments.", - group: "common", + group: "Common", } ncpu: { description: "Number of cores to allocate for task", - group: "common", + group: "Common", } modify_disk_size_gb: "Add to or subtract from dynamic disk space allocation. Default disk size is determined by the size of the inputs. Specified in GB." } @@ -207,15 +207,15 @@ task bwa_mem { prefix: "Prefix for the BAM file. The extension `.bam` will be added." read_group: { description: "Read group information for BWA to insert into the header. BWA format: '@RG\tID:foo\tSM:bar'", - group: "common", + group: "Common", } use_all_cores: { description: "Use all cores? Recommended for cloud environments.", - group: "common", + group: "Common", } ncpu: { description: "Number of cores to allocate for task", - group: "common", + group: "Common", } modify_disk_size_gb: "Add to or subtract from dynamic disk space allocation. Default disk size is determined by the size of the inputs. Specified in GB." } @@ -300,7 +300,7 @@ task build_bwa_db { reference_fasta: "Input reference Fasta file to index with bwa. Should be compressed with gzip." db_name: { description: "Name of the output gzipped tar archive of the bwa reference files. The extension `.tar.gz` will be added.", - group: "common", + group: "Common", } modify_disk_size_gb: "Add to or subtract from dynamic disk space allocation. Default disk size is determined by the size of the inputs. Specified in GB." } diff --git a/tools/deeptools.wdl b/tools/deeptools.wdl index 26323613..395cdd6d 100755 --- a/tools/deeptools.wdl +++ b/tools/deeptools.wdl @@ -16,11 +16,11 @@ task bam_coverage { prefix: "Prefix for the BigWig file. The extension `.bw` will be added." use_all_cores: { description: "Use all cores? Recommended for cloud environments.", - group: "common", + group: "Common", } ncpu: { description: "Number of cores to allocate for task", - group: "common", + group: "Common", } modify_disk_size_gb: "Add to or subtract from dynamic disk space allocation. Default disk size is determined by the size of the inputs. Specified in GB." } diff --git a/tools/fastqc.wdl b/tools/fastqc.wdl index 761d5ee0..c8c6925e 100755 --- a/tools/fastqc.wdl +++ b/tools/fastqc.wdl @@ -16,11 +16,11 @@ task fastqc { prefix: "Prefix for the FastQC results directory. The extension `.tar.gz` will be added." use_all_cores: { description: "Use all cores? Recommended for cloud environments.", - group: "common", + group: "Common", } ncpu: { description: "Number of cores to allocate for task", - group: "common", + group: "Common", } modify_disk_size_gb: "Add to or subtract from dynamic disk space allocation. Default disk size is determined by the size of the inputs. Specified in GB." } diff --git a/tools/fq.wdl b/tools/fq.wdl index d9f9caba..ec5168b6 100755 --- a/tools/fq.wdl +++ b/tools/fq.wdl @@ -50,7 +50,7 @@ task fqlint { } panic: { description: "Panic on first error (true) or log all errors (false)?", - group: "common", + group: "Common", } modify_memory_gb: "Add to or subtract from dynamic memory allocation. Default memory is determined by the size of the inputs. Specified in GB." modify_disk_size_gb: "Add to or subtract from dynamic disk space allocation. Default disk size is determined by the size of the inputs. Specified in GB." @@ -113,11 +113,11 @@ task subsample { prefix: "Prefix for the output FASTQ file(s). The extension `.R1.subsampled.fastq.gz` and `.R2.subsampled.fastq.gz` will be added." probability: { description: "The probability a record is kept, as a decimal (0.0, 1.0). Cannot be used with `record-count`. Any `probability<=0.0` or `probability>=1.0` to disable.", - group: "common", + group: "Common", } record_count: { description: "The exact number of records to keep. Cannot be used with `probability`. Any `record_count<=0` to disable.", - group: "common", + group: "Common", } modify_disk_size_gb: "Add to or subtract from dynamic disk space allocation. Default disk size is determined by the size of the inputs. Specified in GB." } diff --git a/tools/gatk4.wdl b/tools/gatk4.wdl index 4250af6e..e67edd2e 100644 --- a/tools/gatk4.wdl +++ b/tools/gatk4.wdl @@ -419,7 +419,7 @@ task mark_duplicates_spark { } create_bam: { description: "Enable BAM creation (true)? Or only output MarkDuplicates metrics (false)?", - group: "common", + group: "Common", } optical_distance: "Maximum distance between read coordinates to consider them optical duplicates. If `0`, then optical duplicate marking is disabled. Suggested settings of 100 for unpatterned versions of the Illumina platform (e.g. HiSeq) or 2500 for patterned flowcell models (e.g. NovaSeq). Calculation of distance depends on coordinate data embedded in the read names, typically produced by the Illumina sequencing machines. Optical duplicate detection will not work on non-standard names without modifying `read_name_regex`." modify_memory_gb: "Add to or subtract from the default memory allocation. Default memory allocation is determined by the size of the input BAM. Specified in GB." diff --git a/tools/htseq.wdl b/tools/htseq.wdl index fa33e5dd..76ffdce0 100755 --- a/tools/htseq.wdl +++ b/tools/htseq.wdl @@ -25,11 +25,11 @@ task count { prefix: "Prefix for the feature counts file. The extension `.feature-counts.txt` will be added." feature_type: { description: "Feature type (3rd column in GTF file) to be used, all features of other type are ignored", - group: "common", + group: "Common", } idattr: { description: "GFF attribute to be used as feature ID", - group: "common", + group: "Common", } mode: { description: "Mode to handle reads overlapping more than one feature. `union` is recommended for most use-cases.", @@ -42,27 +42,27 @@ task count { } include_custom_header: { description: "Include a custom header for the output file? This is not an official feature of HTSeq. If true, the first line of the output file will be `~{idattr}\t~{prefix}`. This may break downstream tools that expect the typical headerless HTSeq output format.", - group: "common", + group: "Common", } pos_sorted: { description: "Is the BAM position sorted (true) or name sorted (false)? It is **highly** recommended to use a name sorted BAM file. This is because HTSeq will re-sort position-sorted BAMs with an inefficient algorithm, causing very large memory and disk space allocations (especially for large BAMs).", - group: "common", + group: "Common", } nonunique: { description: "Score reads that align to or are assigned to more than one feature?", - group: "common", + group: "Common", } secondary_alignments: { description: "Score secondary alignments (SAM flag 0x100)?", - group: "common", + group: "Common", } supplementary_alignments: { description: "Score supplementary/chimeric alignments (SAM flag 0x800)?", - group: "common", + group: "Common", } minaqual: { description: "Skip all reads with alignment quality lower than the given minimum value", - group: "common", + group: "Common", } modify_memory_gb: "Add to or subtract from dynamic memory allocation. Default memory is determined by the size of the inputs. Specified in GB." modify_disk_size_gb: "Add to or subtract from dynamic disk space allocation. Default disk size is determined by the size of the inputs. Specified in GB." diff --git a/tools/kraken2.wdl b/tools/kraken2.wdl index da938611..915f83de 100644 --- a/tools/kraken2.wdl +++ b/tools/kraken2.wdl @@ -191,12 +191,12 @@ task build_db { tarballs: "Tarballs containing the NCBI taxonomy (generated by the `download_taxonomy` task) and at least one library (generated by the `download_library` or `create_library_from_fastas` task). Tarballs must not have a root directory." db_name: { description: "Name for output in compressed, archived format. The suffix `.tar.gz` will be added.", - group: "common", + group: "Common", } protein: "Construct a protein database?" use_all_cores: { description: "Use all cores? Recommended for cloud environments.", - group: "common", + group: "Common", } kmer_len: "K-mer length in bp that will be used to build the database" minimizer_len: "Minimizer length in bp that will be used to build the database" @@ -204,7 +204,7 @@ task build_db { max_db_size_gb: "Maximum number of GBs for Kraken 2 hash table; if the Kraken 2 estimator determines more would normally be needed, the reference library will be downsampled to fit." ncpu: { description: "Number of cores to allocate for task", - group: "common", + group: "Common", } modify_memory_gb: "Add to or subtract from dynamic memory allocation. Default memory is determined by the size of the inputs. Specified in GB." modify_disk_size_gb: "Add to or subtract from dynamic disk space allocation. Default disk size is determined by the size of the inputs. Specified in GB." @@ -311,17 +311,17 @@ task kraken { prefix: "Prefix for the Kraken2 output files. The extensions `.kraken2.txt` and `.kraken2.sequences.txt.gz` will be added." store_sequences: { description: "Store and output main Kraken2 output in addition to the summary report?", - group: "common", + group: "Common", } use_names: "Print scientific names instead of just taxids?" use_all_cores: { description: "Use all cores? Recommended for cloud environments.", - group: "common", + group: "Common", } min_base_quality: "Minimum base quality used in classification" ncpu: { description: "Number of cores to allocate for task", - group: "common", + group: "Common", } modify_memory_gb: "Add to or subtract from dynamic memory allocation. Default memory is determined by the size of the inputs. Specified in GB." modify_disk_size_gb: "Add to or subtract from dynamic disk space allocation. Default disk size is determined by the size of the inputs. Specified in GB." diff --git a/tools/mosdepth.wdl b/tools/mosdepth.wdl index 6ad3c08e..823fb8e0 100644 --- a/tools/mosdepth.wdl +++ b/tools/mosdepth.wdl @@ -20,7 +20,7 @@ task coverage { use_fast_mode: "Use Mosdepth's 'fast mode'? This enables the `-x` flag." min_mapping_quality: { description: "Minimum mapping quality to pass to the `-Q` flag of `mosdepth`", - group: "common", + group: "Common", } modify_disk_size_gb: "Add to or subtract from dynamic disk space allocation. Default disk size is determined by the size of the inputs. Specified in GB." } diff --git a/tools/ngsderive.wdl b/tools/ngsderive.wdl index 68fbf904..3212f4d5 100644 --- a/tools/ngsderive.wdl +++ b/tools/ngsderive.wdl @@ -18,19 +18,19 @@ task strandedness { outfile_name: "Name for the strandedness TSV file" split_by_rg: { description: "Contain one entry in the output TSV per read group, in addition to an `overall` entry", - group: "common", + group: "Common", } min_reads_per_gene: { description: "Filter any genes that don't have at least `min_reads_per_gene` reads mapping to them", - group: "common", + group: "Common", } num_genes: { description: "How many genes to sample", - group: "common", + group: "Common", } min_mapq: { description: "Minimum MAPQ to consider for supporting reads", - group: "common", + group: "Common", } modify_disk_size_gb: "Add to or subtract from dynamic disk space allocation. Default disk size is determined by the size of the inputs. Specified in GB." } @@ -103,7 +103,7 @@ task instrument { outfile_name: "Name for the instrument TSV file" num_reads: { description: "How many reads to analyze from the start of the file. Any n < 1 to parse whole file.", - group: "common", + group: "Common", } modify_disk_size_gb: "Add to or subtract from dynamic disk space allocation. Default disk size is determined by the size of the inputs. Specified in GB." } @@ -156,11 +156,11 @@ task read_length { outfile_name: "Name for the readlen TSV file" majority_vote_cutoff: { description: "To call a majority readlen, the maximum read length must have at least `majority-vote-cutoff`% reads in support", - group: "common", + group: "Common", } num_reads: { description: "How many reads to analyze from the start of the file. Any n < 1 to parse whole file.", - group: "common", + group: "Common", } modify_disk_size_gb: "Add to or subtract from dynamic disk space allocation. Default disk size is determined by the size of the inputs. Specified in GB." } @@ -219,7 +219,7 @@ task encoding { outfile_name: "Name for the encoding TSV file" num_reads: { description: "How many reads to analyze from the start of the file(s). Any n < 1 to parse whole file(s).", - group: "common", + group: "Common", } modify_disk_size_gb: "Add to or subtract from dynamic disk space allocation. Default disk size is determined by the size of the inputs. Specified in GB." } @@ -271,19 +271,19 @@ task junction_annotation { prefix: "Prefix for the summary TSV and junction files. The extensions `.junction_summary.tsv` and `.junctions.tsv` will be added." min_intron: { description: "Minimum size of intron to be considered a splice", - group: "common", + group: "Common", } min_mapq: { description: "Minimum MAPQ to consider for supporting reads", - group: "common", + group: "Common", } min_reads: { description: "Filter any junctions that don't have at least `min_reads` reads supporting them", - group: "common", + group: "Common", } fuzzy_junction_match_range: { description: "Consider found splices within `+-k` bases of a known splice event annotated", - group: "common", + group: "Common", } modify_disk_size_gb: "Add to or subtract from dynamic disk space allocation. Default disk size is determined by the size of the inputs. Specified in GB." } @@ -355,27 +355,27 @@ task endedness { outfile_name: "Name for the endedness TSV file" lenient: { description: "Return a zero exit code on unknown results", - group: "common", + group: "Common", } calc_rpt: { description: "Calculate and output Reads-Per-Template. This will produce a more sophisticated estimate for endedness, but uses substantially more memory (can reach up to 200% of BAM size in memory consumption for some inputs).", - group: "common", + group: "Common", } round_rpt: { description: "Round RPT to the nearest INT before comparing to expected values. Appropriate if using `--num-reads` > 0.", - group: "common", + group: "Common", } split_by_rg: { description: "Contain one entry per read group", - group: "common", + group: "Common", } paired_deviance: { description: "Distance from 0.5 split between number of f+l- reads and f-l+ reads allowed to be called 'Paired-End'. Default of `0.0` only appropriate if the whole file is being processed.", - group: "common", + group: "Common", } num_reads: { description: "How many reads to analyze from the start of the file. Any n < 1 to parse whole file.", - group: "common", + group: "Common", } modify_memory_gb: "Add to or subtract from dynamic memory allocation. Default memory is determined by value of `calc_rpt` and the size of the input. Specified in GB." modify_disk_size_gb: "Add to or subtract from dynamic disk space allocation. Default disk size is determined by the size of the inputs. Specified in GB." diff --git a/tools/picard.wdl b/tools/picard.wdl index fb8740ee..e2cc704f 100755 --- a/tools/picard.wdl +++ b/tools/picard.wdl @@ -49,7 +49,7 @@ task mark_duplicates { } create_bam: { description: "Enable BAM creation (true)? Or only output MarkDuplicates metrics (false)?", - group: "common", + group: "Common", } clear_dt: "Clear the `DT` tag from the input BAM? For increased performance, if the input BAM does not have the `DT` tag, set to `false`." remove_duplicates: "Remove duplicate reads from the output BAM? If `true`, the output BAM will not contain any duplicate reads." @@ -142,7 +142,7 @@ task validate_bam { ignore_list: { description: "List of Picard errors and warnings to ignore. Possible values can be found on the GATK website (see `external_help`).", external_help: "https://gatk.broadinstitute.org/hc/en-us/articles/360035891231-Errors-in-SAM-or-BAM-files-can-be-diagnosed-with-ValidateSamFile", - group: "common", + group: "Common", } outfile_name: "Name for the ValidateSamFile report file" validation_stringency: { @@ -156,15 +156,15 @@ task validate_bam { } succeed_on_errors: { description: "Succeed the task even if errors *and/or* warnings are detected", - group: "common", + group: "Common", } succeed_on_warnings: { description: "Succeed the task if warnings are detected and there are no errors. Overridden by `succeed_on_errors`", - group: "common", + group: "Common", } summary_mode: { description: "Enable SUMMARY mode?", - group: "common", + group: "Common", } index_validation_stringency_less_exhaustive: "Set `INDEX_VALIDATION_STRINGENCY=LESS_EXHAUSTIVE`?" max_errors: "Set the value of MAX_OUTPUT for `picard ValidateSamFile`. The Picard default is 100, a lower number can enable fast fail behavior" @@ -272,7 +272,7 @@ task sort { "coordinate", "duplicate" ], - group: "common", + group: "Common", } prefix: "Prefix for the sorted BAM file and accessory files. The extensions `.bam`, `.bam.bai`, and `.bam.md5` will be added." validation_stringency: { @@ -357,7 +357,7 @@ task merge_sam_files { "coordinate", "duplicate" ], - group: "common", + group: "Common", } validation_stringency: { description: "Validation stringency for parsing the input BAM.", @@ -819,7 +819,7 @@ task bam_to_fastq { prefix: "Prefix for the file. The extension `` will be added." paired: { description: "Is the data Paired-End (true) or Single-End (false)?", - group: "common", + group: "Common", } memory_gb: "RAM to allocate for task, specified in GB" modify_disk_size_gb: "Add to or subtract from dynamic disk space allocation. Default disk size is determined by the size of the inputs. Specified in GB." diff --git a/tools/qualimap.wdl b/tools/qualimap.wdl index dacc80aa..80c0ae5f 100755 --- a/tools/qualimap.wdl +++ b/tools/qualimap.wdl @@ -19,11 +19,11 @@ task rnaseq { memory_gb: "RAM to allocate for task" name_sorted: { description: "Is the BAM name sorted? QualiMap has an inefficient sorting algorithm. In order to save resources we recommend collating your input BAM before QualiMap and setting this parameter to true.", - group: "common", + group: "Common", } paired_end: { description: "Is the BAM paired end?", - group: "common", + group: "Common", } modify_disk_size_gb: "Add to or subtract from dynamic disk space allocation. Default disk size is determined by the size of the inputs. Specified in GB." } diff --git a/tools/sambamba.wdl b/tools/sambamba.wdl index 734ba4b5..9138131b 100644 --- a/tools/sambamba.wdl +++ b/tools/sambamba.wdl @@ -14,11 +14,11 @@ task index { bam: "Input BAM format file to index" use_all_cores: { description: "Use all cores? Recommended for cloud environments. Not recommended for cluster environments.", - group: "common", + group: "Common", } ncpu: { description: "Number of cores to allocate for task", - group: "common", + group: "Common", } modify_disk_size_gb: "Add to or subtract from dynamic disk space allocation. Default disk size is determined by the size of the inputs. Specified in GB." } @@ -72,11 +72,11 @@ task merge { prefix: "Prefix for the BAM file. The extension `.bam` will be added." use_all_cores: { description: "Use all cores? Recommended for cloud environments. Not recommended for cluster environments.", - group: "common", + group: "Common", } ncpu: { description: "Number of cores to allocate for task", - group: "common", + group: "Common", } modify_disk_size_gb: "Add to or subtract from dynamic disk space allocation. Default disk size is determined by the size of the inputs. Specified in GB." } @@ -132,7 +132,7 @@ task sort { prefix: "Prefix for the sorted BAM file. The extension `.bam` will be added." queryname_sort: { description: "If true, sort the BAM by queryname. If false, sort by coordinate.", - group: "common", + group: "Common", } modify_disk_size_gb: "Add to or subtract from dynamic disk space allocation. Default disk size is determined by the size of the inputs. Specified in GB." ncpu: "Number of cores to allocate for task" @@ -189,7 +189,7 @@ task markdup { prefix: "Prefix for the markdup result files. The extensions `markdup.bam` will be added." remove_duplicates: { description: "If true, remove duplicates instead of marking them.", - group: "common", + group: "Common", } modify_disk_size_gb: "Add to or subtract from dynamic disk space allocation. Default disk size is determined by the size of the inputs. Specified in GB." ncpu: "Number of cores to allocate for task" @@ -243,11 +243,11 @@ task flagstat { outfile_name: "Name for the flagstat report file" use_all_cores: { description: "Use all cores? Recommended for cloud environments. Not recommended for cluster environments.", - group: "common", + group: "Common", } ncpu: { description: "Number of cores to allocate for task", - group: "common", + group: "Common", } modify_disk_size_gb: "Add to or subtract from dynamic disk space allocation. Default disk size is determined by the size of the inputs. Specified in GB." } diff --git a/tools/samtools.wdl b/tools/samtools.wdl index cea87e1c..d9d73dfd 100755 --- a/tools/samtools.wdl +++ b/tools/samtools.wdl @@ -57,19 +57,19 @@ task split { prefix: "Prefix for the split BAM files. The extensions will contain read group IDs, and will end in `.bam`." reject_unaccounted_reads: { description: "If true, error if there are reads present that do not have read group information matching the header.", - group: "common", + group: "Common", } reject_empty_output: { description: "If true, error if any output BAMs are empty.", - group: "common", + group: "Common", } use_all_cores: { description: "Use all cores? Recommended for cloud environments.", - group: "common", + group: "Common", } ncpu: { description: "Number of cores to allocate for task", - group: "common", + group: "Common", } modify_disk_size_gb: "Add to or subtract from dynamic disk space allocation. Default disk size is determined by the size of the inputs. Specified in GB." } @@ -168,11 +168,11 @@ task flagstat { outfile_name: "Name for the flagstat report file" use_all_cores: { description: "Use all cores? Recommended for cloud environments.", - group: "common", + group: "Common", } ncpu: { description: "Number of cores to allocate for task", - group: "common", + group: "Common", } modify_disk_size_gb: "Add to or subtract from dynamic disk space allocation. Default disk size is determined by the size of the inputs. Specified in GB." } @@ -225,11 +225,11 @@ task index { bam: "Input BAM format file to index" use_all_cores: { description: "Use all cores? Recommended for cloud environments.", - group: "common", + group: "Common", } ncpu: { description: "Number of cores to allocate for task", - group: "common", + group: "Common", } modify_disk_size_gb: "Add to or subtract from dynamic disk space allocation. Default disk size is determined by the size of the inputs. Specified in GB." } @@ -288,11 +288,11 @@ task subsample { prefix: "Prefix for the BAM file. The extension `.sampled.bam` will be added." use_all_cores: { description: "Use all cores? Recommended for cloud environments.", - group: "common", + group: "Common", } ncpu: { description: "Number of cores to allocate for task", - group: "common", + group: "Common", } modify_disk_size_gb: "Add to or subtract from dynamic disk space allocation. Default disk size is determined by the size of the inputs. Specified in GB." } @@ -433,11 +433,11 @@ task filter { prefix: "Prefix for the filtered BAM file. The extension `.bam` will be added." use_all_cores: { description: "Use all cores? Recommended for cloud environments.", - group: "common", + group: "Common", } ncpu: { description: "Number of cores to allocate for task", - group: "common", + group: "Common", } modify_disk_size_gb: "Add to or subtract from dynamic disk space allocation. Default disk size is determined by the size of the inputs. Specified in GB." } @@ -520,27 +520,27 @@ task merge { region: "Merge files in the specified region (Format: `chr:start-end`)" attach_rg: { description: "Attach an RG tag to each alignment. The tag value is inferred from file names.", - group: "common", + group: "Common", } name_sorted: { description: "Are _all_ input BAMs `queryname` sorted (true)? Or are _all_ input BAMs `coordinate` sorted (false)?", - group: "common", + group: "Common", } combine_rg: { description: "When several input files contain @RG headers with the same ID, emit only one of them (namely, the header line from the first file we find that ID in) to the merged output file. Combining these similar headers is usually the right thing to do when the files being merged originated from the same file. Without `-c`, all @RG headers appear in the output file, with random suffixes added to their IDs where necessary to differentiate them.", - group: "common", + group: "Common", } combine_pg: { description: "Similarly to `combine_rg`: for each @PG ID in the set of files to merge, use the @PG line of the first file we find that ID in rather than adding a suffix to differentiate similar IDs.", - group: "common", + group: "Common", } use_all_cores: { description: "Use all cores? Recommended for cloud environments.", - group: "common", + group: "Common", } ncpu: { description: "Number of cores to allocate for task", - group: "common", + group: "Common", } modify_disk_size_gb: "Add to or subtract from dynamic disk space allocation. Default disk size is determined by the size of the inputs. Specified in GB." } @@ -620,24 +620,24 @@ task addreplacerg { read_group_id: "Allows you to specify the read group ID of an existing @RG line and applies it to the reads specified by the `orphan_only` option" read_group_line: { description: "Allows you to specify a read group line to append to (or replace in) the header and applies it to the reads specified by the `orphan_only` option. Each String in the Array should correspond to one field of the read group line. Tab literals will be inserted between each entry in the final BAM. Only **one** read group line can be supplied per invocation of this task.", - group: "common", + group: "Common", } prefix: "Prefix for the BAM file. The extension `.bam` will be added." orphan_only: { description: "Only add RG tags to orphans (true)? Or _also_ overwrite all existing RG tags (including any in the header) (false)?", - group: "common", + group: "Common", } overwrite_header_record: { description: "Overwrite an existing @RG line, if a new one with the same ID value is provided?", - group: "common", + group: "Common", } use_all_cores: { description: "Use all cores? Recommended for cloud environments.", - group: "common", + group: "Common", } ncpu: { description: "Number of cores to allocate for task", - group: "common", + group: "Common", } modify_disk_size_gb: "Add to or subtract from dynamic disk space allocation. Default disk size is determined by the size of the inputs. Specified in GB." } @@ -705,15 +705,15 @@ task collate { prefix: "Prefix for the collated BAM file. The extension `.bam` will be added." fast_mode: { description: "Use fast mode (output primary alignments only)?", - group: "common", + group: "Common", } use_all_cores: { description: "Use all cores? Recommended for cloud environments.", - group: "common", + group: "Common", } ncpu: { description: "Number of cores to allocate for task", - group: "common", + group: "Common", } modify_memory_gb: "Add to or subtract from dynamic memory allocation. Default memory is determined by the size of the inputs. Specified in GB." modify_disk_size_gb: "Add to or subtract from dynamic disk space allocation. Default disk size is determined by the size of the inputs. Specified in GB." @@ -785,41 +785,41 @@ task bam_to_fastq { prefix: "Prefix for the collated BAM and FASTQ files. The extensions `.collated.bam` and `[,.R1,.R2,.singleton].fastq.gz` will be added." paired_end: { description: "Is the data Paired-End? If `paired_end == false`, then _all_ reads in the BAM will be output to a single FASTQ file. Use `bitwise_filter` argument to remove any unwanted reads.", - group: "common", + group: "Common", } collated: { description: "Is the BAM collated (or name-sorted)? If `collated == true`, then the input BAM will be run through `samtools fastq` without preprocessing. If `collated == false`, then `samtools collate` must be run on the input BAM before conversion to FASTQ. Ignored if `paired_end == false`.", - group: "common", + group: "Common", } retain_collated_bam: { description: "Save the collated BAM to disk and output it (true)? This slows performance and **substantially** increases storage requirements. Be aware that collated BAMs occupy much more space than either position sorted or name sorted BAMs (due to the compression algorithm). Ignored if `collated == true` **or** `paired_end == false`.", - group: "common", + group: "Common", } fast_mode: { description: "Fast mode for `samtools collate`? If `true`, this **removes secondary and supplementary reads** during the `collate` step. If `false`, secondary and supplementary reads will be retained in the `collated_bam` output (if created). Defaults to the opposite of `retain_collated_bam`. Ignored if `collated == true` **or** `paired_end == false`.", - group: "common", + group: "Common", } append_read_number: { description: "Append /1 and /2 suffixes to read names?", - group: "common", + group: "Common", } interleaved: { description: "Create an interleaved FASTQ file from Paired-End data? Ignored if `paired_end == false`.", - group: "common", + group: "Common", } output_singletons: "Output singleton reads as their own FASTQ? Ignored if `paired_end == false`." fail_on_unexpected_reads: { description: "Should the task fail if reads with an unexpected `first`/`last` bit setting are discovered?", help: "The definition of 'unexpected' depends on whether the values of `paired_end` and `output_singletons` are true or false. If `paired_end` is `false`, no reads are considered unexpected, and _every_ read (not caught by `bitwise_filter`) will be present in the resulting FASTQ regardless of `first`/`last` bit settings. This setting will be ignored in that case. If `paired_end` is `true` then reads that don't satisfy `first` XOR `last` are considered unexpected (i.e. reads that have neither `first` nor `last` set or reads that have both `first` and `last` set). If `output_singletons` is `false`, singleton reads are considered unexpected. A singleton read is a read with either the `first` or the `last` bit set (but not both) and that possesses a _unique_ QNAME; i.e. it is a read without a pair when all reads are expected to be paired. But if `output_singletons` is `true`, these singleton reads will be output as their own FASTQ instead of causing the task to fail. If `fail_on_unexpected_reads` is `false`, then all the above cases will be ignored. Any 'unexpected' reads will be silently discarded.", - group: "common", + group: "Common", } use_all_cores: { description: "Use all cores? Recommended for cloud environments.", - group: "common", + group: "Common", } ncpu: { description: "Number of cores to allocate for task", - group: "common", + group: "Common", } modify_memory_gb: "Add to or subtract from dynamic memory allocation. Default memory is determined by the size of the inputs. Specified in GB." modify_disk_size_gb: "Add to or subtract from dynamic disk space allocation. Default disk size is determined by the size of the inputs. Specified in GB." @@ -985,28 +985,28 @@ task fixmate { ".bam", ".cram" ], - group: "common", + group: "Common", } add_cigar: { description: "Add template cigar `ct` tag", tool_default: false, - group: "common", + group: "Common", } add_mate_score: { description: "Add mate score tags. These are used by `markdup` to select the best reads to keep.", tool_default: false, - group: "common", + group: "Common", } disable_flag_sanitization: "Disable all flag sanitization?" disable_proper_pair_check: "Disable proper pair check [ensure one forward and one reverse read in each pair]" remove_unaligned_and_secondary: "Remove unmapped and secondary reads" use_all_cores: { description: "Use all cores? Recommended for cloud environments.", - group: "common", + group: "Common", } ncpu: { description: "Number of cores to allocate for task", - group: "common", + group: "Common", } modify_disk_size_gb: "Add to or subtract from dynamic disk space allocation. Default disk size is determined by the size of the inputs. Specified in GB." } @@ -1075,28 +1075,28 @@ task position_sorted_fixmate { prefix: "Prefix for the output file. The extension `.bam` will be added." fast_mode: { description: "Use fast mode (output primary alignments only)?", - group: "common", + group: "Common", } add_cigar: { description: "Add template cigar `ct` tag", tool_default: false, - group: "common", + group: "Common", } add_mate_score: { description: "Add mate score tags. These are used by `markdup` to select the best reads to keep.", tool_default: false, - group: "common", + group: "Common", } disable_flag_sanitization: "Disable all flag sanitization?" disable_proper_pair_check: "Disable proper pair check [ensure one forward and one reverse read in each pair]?" remove_unaligned_and_secondary: "Remove unmapped and secondary reads" use_all_cores: { description: "Use all cores? Recommended for cloud environments.", - group: "common", + group: "Common", } ncpu: { description: "Number of cores to allocate for task", - group: "common", + group: "Common", } modify_memory_gb: "Add to or subtract from dynamic memory allocation. Default memory is determined by the size of the inputs. Specified in GB." modify_disk_size_gb: "Add to or subtract from dynamic disk space allocation. Default disk size is determined by the size of the inputs. Specified in GB." @@ -1205,13 +1205,13 @@ task markdup { use_read_groups: "Only mark duplicates _within_ the same Read Group? Ignored if `create_bam == false`." use_all_cores: { description: "Use all cores? Recommended for cloud environments.", - group: "common", + group: "Common", } max_readlen: "Expected maximum read length." optical_distance: "Maximum distance between read coordinates to consider them optical duplicates. If `0`, then optical duplicate marking is disabled. Suggested settings of 100 for HiSeq style platforms or about 2500 for NovaSeq ones. When set above `0`, duplicate reads are tagged with `dt:Z:SQ` for optical duplicates and `dt:Z:LB` otherwise. Calculation of distance depends on coordinate data embedded in the read names, typically produced by the Illumina sequencing machines. Optical duplicate detection will not work on non-standard names without modifying `read_coords_regex`. If changing `read_coords_regex`, make sure that `coordinates_order` matches." ncpu: { description: "Number of cores to allocate for task", - group: "common", + group: "Common", } modify_memory_gb: "Add to or subtract from dynamic memory allocation. Default memory is determined by the size of the inputs. Specified in GB." modify_disk_size_gb: "Add to or subtract from dynamic disk space allocation. Default disk size is determined by the size of the inputs. Specified in GB." diff --git a/tools/star.wdl b/tools/star.wdl index 8b025484..dfc433f1 100755 --- a/tools/star.wdl +++ b/tools/star.wdl @@ -15,11 +15,11 @@ task build_star_db { gtf: "GTF format feature file" db_name: { description: "Name for output in compressed, archived format. The suffix `.tar.gz` will be added.", - group: "common", + group: "Common", } sjdb_gtf_chr_prefix: { description: "prefix for chromosome names in a GTF file (e.g. 'chr' for using ENSEMBL annotations with UCSC genomes)", - group: "common", + group: "Common", } sjdb_gtf_feature_exon: "feature type in GTF file to be used as exons for building transcripts" sjdb_gtf_tag_exon_parant_transcript: "GTF attribute name for parent transcript ID" @@ -28,7 +28,7 @@ task build_star_db { sjdb_gtf_tag_exon_parent_gene_type: "GTF attribute name for parent gene type" use_all_cores: { description: "Use all cores? Recommended for cloud environments.", - group: "common", + group: "Common", } genome_chr_bin_n_bits: "=log2(chrBin), where chrBin is the size of the bins for genome storage: each chromosome will occupy an integer number of bins. For a genome with large number of contigs, it is recommended to scale this parameter as min(18, log2[max(GenomeLength/NumberOfReferences,ReadLength)])." genome_SA_index_n_bases: "length (bases) of the SA pre-indexing string. Typically between 10 and 15. Longer strings will use much more memory, but allow faster searches. For small genomes, the parameter `--genomeSAindexNbases` must be scaled down to `min(14, log2(GenomeLength)/2 - 1)`." @@ -36,11 +36,11 @@ task build_star_db { genome_suffix_length_max: "maximum length of the suffixes, has to be longer than read length. -1 = infinite." sjdb_overhang: { description: "length of the donor/acceptor sequence on each side of the junctions, ideally = (mate_length - 1). **[STAR default]**: `100`. **[WDL default]**: `125`.", - group: "common", + group: "Common", } ncpu: { description: "Number of cores to allocate for task", - group: "common", + group: "Common", } memory_gb: "RAM to allocate for task, specified in GB" modify_disk_size_gb: "Add to or subtract from dynamic disk space allocation. Default disk size is determined by the size of the inputs. Specified in GB." @@ -152,11 +152,11 @@ task alignment { description: "An array of `String`s where each `String` corresponds to one read group.", help: "Each read group string should start with the `ID` field followed by any other read group fields, where fields are delimited by a space. See `../data_structures/read_group.wdl` for information about possible fields and utility tasks for constructing, validating, and \"stringifying\" read groups.", warning: "The `ID` field for each read group _must_ be contained in the basename of a FASTQ file or pair of FASTQ files if Paired-End. Example: `[\"ID:rg1 PU:flowcell1.lane1 SM:sample1 PL:illumina LB:sample1_lib1\", \"ID:rg2 PU:flowcell1.lane2 SM:sample1 PL:illumina LB:sample1_lib1\"]`. These two read groups could be associated with the following four FASTQs: `[\"sample1.rg1.R1.fastq\", \"sample1.rg2.R1.fastq\"]` and `[\"sample1.rg1.R2.fastq\", \"sample1.rg2.R2.fastq\"]`", - group: "common", + group: "Common", } read_two_fastqs_gz: { description: "An array of gzipped FASTQ files containing read two information", - group: "common", + group: "Common", } out_sj_filter_intron_max_vs_read_n: { description: "maximum gap allowed for junctions supported by 1,2,3,,,N reads. i.e. by default junctions supported by 1 read can have gaps <=50000b, by 2 reads: <=100000b, by 3 reads: <=200000b. by >=4 reads any gap <=alignIntronMax. Does not apply to annotated junctions.", @@ -189,7 +189,7 @@ task alignment { sequence: "A nucleotide sequence string of any length, matching the regex `/[ATCG]+/`", polyA: "polyA sequence with the length equal to read length", }, - group: "common", + group: "Common", } clip_3p_adapter_mmp: "max proportion of mismatches for 3p adapter clipping for each mate. `left` applies to read one and `right` applies to read two." align_ends_protrude: { @@ -217,7 +217,7 @@ task alignment { None: "not used", intronMotif: "strand derived from the intron motif. This option changes the output alignments: reads with inconsistent and/or non-canonical introns are filtered out.", }, - group: "common", + group: "Common", } out_sam_attributes: { description: "a string of desired SAM attributes, in the order desired for the output SAM. Tags can be listed in any combination/order. **[STAR defaults]**: `NH HI AS nM`. **[WDL default]**: `NH HI AS nM NM MD XS`.", @@ -235,7 +235,7 @@ task alignment { ch: "marks all segments of all chimeric alignments for --chimOutType WithinBAM output.", cN: "number of bases clipped from the read ends: 5' and 3'", }, - group: "common", + group: "Common", } out_sam_unmapped: { description: "output of unmapped reads in the SAM format.", @@ -271,7 +271,7 @@ task alignment { Normal: "standard filtering using only current alignment", BySJout: "keep only those reads that contain junctions that passed filtering into SJ.out.tab", }, - group: "common", + group: "Common", } out_filter_intron_motifs: { description: "filter alignment using their motifs", @@ -280,7 +280,7 @@ task alignment { RemoveNoncanonical: "filter out alignments that contain non-canonical junctions", RemoveNoncanonicalUnannotated: "filter out alignments that contain non-canonical unannotated junctions when using annotated splice junctions database. The annotated non-canonical junctions will be kept.", }, - group: "common", + group: "Common", } out_filter_intron_strands: { description: "filter alignments", @@ -288,7 +288,7 @@ task alignment { None: "no filtering", RemoveInconsistentStrands: "remove alignments that have junctions with inconsistent strands", }, - group: "common", + group: "Common", } out_sj_filter_reads: { description: "which reads to consider for collapsed splice junctions output", @@ -296,7 +296,7 @@ task alignment { All: "all reads, unique- and multi-mappers", Unique: "uniquely mapping reads only", }, - group: "common", + group: "Common", } align_ends_type: { description: "type of read ends alignment", @@ -313,7 +313,7 @@ task alignment { Yes: "allow", No: "prohibit, useful for compatibility with Cufflinks", }, - group: "common", + group: "Common", } align_insertion_flush: { description: "how to flush ambiguous insertion positions", @@ -321,7 +321,7 @@ task alignment { None: "insertions are not flushed", Right: "insertions are flushed to the right", }, - group: "common", + group: "Common", } chim_out_type: { description: "type of chimeric output", @@ -331,7 +331,7 @@ task alignment { WithinBAM_SoftClip: "output into main aligned BAM files (Aligned.*.bam). Soft-clipping in the CIGAR for supplemental chimeric alignments.", }, tool_default: "Junctions", - group: "common", + group: "Common", } chim_filter: { description: "different filters for chimeric alignments", @@ -346,7 +346,7 @@ task alignment { plain: "no comment lines/headers", comments: "comment lines at the end of the file: command line and Nreads: total, unique/multi-mapping", }, - group: "common", + group: "Common", } twopass_mode: { description: "2-pass mapping mode", @@ -354,7 +354,7 @@ task alignment { None: "1-pass mapping **[STAR default]**", Basic: "basic 2-pass mapping, with all 1st pass junctions inserted into the genome indices on the fly **[WDL default]**", }, - group: "common", + group: "Common", } use_all_cores: { description: "Use all cores? Recommended for cloud environments.", @@ -370,15 +370,15 @@ task alignment { pe_overlap_mmp: "maximum proportion of mismatched bases in the overlap area" run_rng_seed: { description: "random number generator seed", - group: "common", + group: "Common", } sjdb_score: { description: "extra alignment score for alignments that cross database junctions", - group: "common", + group: "Common", } read_map_number: { description: "number of reads to map from the beginning of the file. -1 to map all reads", - group: "common", + group: "Common", } read_quality_score_base: "number to be subtracted from the ASCII code to get Phred quality score" limit_out_sj_one_read: "max number of junctions for one read (including all multi-mappers)" @@ -392,19 +392,19 @@ task alignment { out_filter_multimap_score_range: "the score range below the maximum score for multimapping alignments" out_filter_multimap_n_max: { description: "maximum number of loci the read is allowed to map to. Alignments (all of them) will be output only if the read maps to no more loci than this value. Otherwise no alignments will be output, and the read will be counted as 'mapped to too many loci' in the Log.final.out. **[STAR default]**: `10`. **[WDL default]**: `20`.", - group: "common", + group: "Common", } out_filter_mismatch_n_max: { description: "alignment will be output only if it has no more mismatches than this value", - group: "common", + group: "Common", } out_filter_score_min: { description: "alignment will be output only if its score is higher than or equal to this value", - group: "common", + group: "Common", } out_filter_match_n_min: { description: "alignment will be output only if the number of matched bases is higher than or equal to this value", - group: "common", + group: "Common", } score_gap: "splice junction penalty (independent on intron motif)" score_gap_noncanon: "non-canonical junction penalty (in addition to scoreGap)" @@ -425,23 +425,23 @@ task alignment { seed_map_min: "min length of seeds to be mapped" align_intron_min: { description: "minimum intron size: genomic gap is considered intron if its length>=alignIntronMin, otherwise it is considered Deletion", - group: "common", + group: "Common", } align_intron_max: { description: "maximum intron size, if 0, max intron size will be determined by (2^winBinNbits)*winAnchorDistNbins. **[STAR default]**: `0`. **[WDL default]**: `500000`.", - group: "common", + group: "Common", } align_mates_gap_max: { description: "maximum gap between two mates, if 0, max intron gap will be determined by (2^winBinNbits)*winAnchorDistNbins. **[STAR default]**: `0`. **[WDL default]**: `1000000`", - group: "common", + group: "Common", } align_sj_overhang_min: { description: "minimum overhang (i.e. block size) for spliced alignments", - group: "common", + group: "Common", } align_sjdb_overhang_min: { description: "minimum overhang (i.e. block size) for annotated (sjdb) spliced alignments. **[STAR default]**: `3`. **[WDL default]**: `1`.", - group: "common", + group: "Common", } align_spliced_mate_map_l_min: "minimum mapped length for a read mate that is spliced" align_windows_per_read_n_max: "max number of windows per read" @@ -455,39 +455,39 @@ task alignment { chim_segment_min: { description: "minimum length of chimeric segment length, if ==0, no chimeric output", tool_default: 0, - group: "common", + group: "Common", } chim_score_min: { description: "minimum total (summed) score of the chimeric segments", - group: "common", + group: "Common", } chim_score_drop_max: { description: "max drop (difference) of chimeric score (the sum of scores of all chimeric segments) from the read length", - group: "common", + group: "Common", } chim_score_separation: "minimum difference (separation) between the best chimeric score and the next one" chim_score_junction_nonGTAG: "penalty for a non-GT/AG chimeric junction" chim_junction_overhang_min: { description: "minimum overhang for a chimeric junction", - group: "common", + group: "Common", } chim_segment_read_gap_max: { description: "maximum gap in the read sequence between chimeric segments", - group: "common", + group: "Common", } chim_main_segment_multi_n_max: { description: "maximum number of multi-alignments for the main chimeric segment. =1 will prohibit multimapping main segments.", - group: "common", + group: "Common", } chim_multimap_n_max: { description: "maximum number of chimeric multi-alignments. `0`: use the old scheme for chimeric detection which only considered unique alignments", - group: "common", + group: "Common", } chim_multimap_score_range: "the score range for multi-mapping chimeras below the best chimeric score. Only works with --chimMultimapNmax > 1." chim_nonchim_score_drop_min: "to trigger chimeric detection, the drop in the best non-chimeric alignment score with respect to the read length has to be greater than this value" twopass1_reads_n: { description: "number of reads to process for the 1st step. Use default (`-1`) to map all reads in the first step", - group: "common", + group: "Common", } ncpu: { description: "Number of cores to allocate for task", diff --git a/tools/util.wdl b/tools/util.wdl index b131751f..fe28df92 100644 --- a/tools/util.wdl +++ b/tools/util.wdl @@ -63,7 +63,7 @@ task get_read_groups { clean: { description: "Clean @RG lines to remove the `@RG\t` prefix and use spaces instead of tabs (true) or output @RG lines of the header without further processing (false)?", help: "`clean = true` output matches the formatting of the `read_group_to_string` task in `../data_structures/read_group.wdl`", - group: "common", + group: "Common", } modify_disk_size_gb: "Add to or subtract from dynamic disk space allocation. Default disk size is determined by the size of the inputs. Specified in GB." } @@ -117,7 +117,7 @@ task split_string { string: "String to split on occurences of `delimiter`" delimiter: { description: "Delimiter on which to split `input_string`", - group: "common", + group: "Common", } } @@ -158,7 +158,7 @@ task calc_gene_lengths { outfile_name: "Name of the gene lengths file" idattr: { description: "GTF attribute to be used as feature ID. The value of this attribute will be used as the first column in the output file.", - group: "common", + group: "Common", } modify_disk_size_gb: "Add to or subtract from dynamic disk space allocation. Default disk size is determined by the size of the inputs. Specified in GB." } diff --git a/workflows/rnaseq/rnaseq-core.wdl b/workflows/rnaseq/rnaseq-core.wdl index eeb5f22e..1255904e 100644 --- a/workflows/rnaseq/rnaseq-core.wdl +++ b/workflows/rnaseq/rnaseq-core.wdl @@ -74,7 +74,7 @@ workflow rnaseq_core { description: "This overrides the STAR alignment default. Maximum number of loci the read is allowed to map to. Alignments (all of them) will be output only if the read maps to no more loci than this value. Otherwise no alignments will be output, and the read will be counted as 'mapped to too many loci' in the Log.final.out.", tool: "star", tool_default: 10, - group: "common", + group: "Common", } pe_overlap_n_bases_min: { description: "This overrides the STAR alignment default. Minimum number of overlap bases to trigger mates merging and realignment. Specify >0 value to switch on the 'merging of overlapping mates' algorithm.", @@ -95,25 +95,25 @@ workflow rnaseq_core { description: "This overrides the STAR alignment default. Minimum overhang for a chimeric junction", tool: "star", tool_default: 20, - group: "common", + group: "Common", } chim_segment_read_gap_max: { description: "This overrides the STAR alignment default. Maximum gap in the read sequence between chimeric segments", tool: "star", tool_default: 0, - group: "common", + group: "Common", } chim_multimap_n_max: { description: "This overrides the STAR alignment default. Maximum number of chimeric multi-alignments. `0`: use the old scheme for chimeric detection which only considered unique alignments", tool: "star", tool_default: 0, - group: "common", + group: "Common", } chim_score_drop_max: { description: "max drop (difference) of chimeric score (the sum of scores of all chimeric segments) from the read length", tool: "star", tool_default: 20, - group: "common", + group: "Common", } } From 1ff962009422c0f7bee3103586a54a83f9dcc76f Mon Sep 17 00:00:00 2001 From: Andrew Frantz Date: Mon, 17 Feb 2025 16:25:13 -0500 Subject: [PATCH 3/3] WIP --- tools/star.wdl | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/tools/star.wdl b/tools/star.wdl index dfc433f1..2447a356 100755 --- a/tools/star.wdl +++ b/tools/star.wdl @@ -28,7 +28,7 @@ task build_star_db { sjdb_gtf_tag_exon_parent_gene_type: "GTF attribute name for parent gene type" use_all_cores: { description: "Use all cores? Recommended for cloud environments.", - group: "Common", + group: "Resources", } genome_chr_bin_n_bits: "=log2(chrBin), where chrBin is the size of the bins for genome storage: each chromosome will occupy an integer number of bins. For a genome with large number of contigs, it is recommended to scale this parameter as min(18, log2[max(GenomeLength/NumberOfReferences,ReadLength)])." genome_SA_index_n_bases: "length (bases) of the SA pre-indexing string. Typically between 10 and 15. Longer strings will use much more memory, but allow faster searches. For small genomes, the parameter `--genomeSAindexNbases` must be scaled down to `min(14, log2(GenomeLength)/2 - 1)`." @@ -40,10 +40,16 @@ task build_star_db { } ncpu: { description: "Number of cores to allocate for task", - group: "Common", + group: "Resources", + } + memory_gb: { + description: "RAM to allocate for task, specified in GB", + group: "Resources", + } + modify_disk_size_gb: { + description: "Add to or subtract from dynamic disk space allocation. Default disk size is determined by the size of the inputs. Specified in GB.", + group: "Resources", } - memory_gb: "RAM to allocate for task, specified in GB" - modify_disk_size_gb: "Add to or subtract from dynamic disk space allocation. Default disk size is determined by the size of the inputs. Specified in GB." } input {