diff --git a/.github/workflows/validate_cwls.yml b/.github/workflows/validate_cwls.yml new file mode 100644 index 0000000..06d299f --- /dev/null +++ b/.github/workflows/validate_cwls.yml @@ -0,0 +1,31 @@ +name: Validate CWL Files + +on: + push: + branches: [master, develop] + pull_request: + branches: [master, develop] + +jobs: + validate_cwls: + runs-on: macos-latest + strategy: + fail-fast: false + matrix: + python-version: [3.6, 3.7] + steps: + - uses: actions/checkout@v2 + with: + submodules: recursive + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Install cwltool + run: | + python -m pip install toil[cwl]==4.2.0 + - uses: actions/checkout@v2 + - name: Validate + run: | + pip install cwltool + find . -name '*.cwl' | xargs -n 1 -P 8 cwltool --validate diff --git a/alignment/alignment__packed.cwl b/alignment/alignment__packed.cwl index 35952b5..ff8ef6f 100644 --- a/alignment/alignment__packed.cwl +++ b/alignment/alignment__packed.cwl @@ -12,7 +12,7 @@ "boolean" ], "https://www.sevenbridges.com/x": 319.15625, - "https://www.sevenbridges.com/y": 852.0390625 + "https://www.sevenbridges.com/y": 958.8671875 }, { "id": "#output_file_name", @@ -21,7 +21,7 @@ "string" ], "https://www.sevenbridges.com/x": 319.15625, - "https://www.sevenbridges.com/y": 745.2109375 + "https://www.sevenbridges.com/y": 852.0390625 }, { "id": "#read_group_description", @@ -30,25 +30,25 @@ "string" ], "https://www.sevenbridges.com/x": 0, - "https://www.sevenbridges.com/y": 1388.765625 + "https://www.sevenbridges.com/y": 1495.59375 }, { "id": "#read_group_identifier", "type": "string", "https://www.sevenbridges.com/x": 0, - "https://www.sevenbridges.com/y": 1281.9375 + "https://www.sevenbridges.com/y": 1388.765625 }, { "id": "#read_group_library", "type": "string", "https://www.sevenbridges.com/x": 0, - "https://www.sevenbridges.com/y": 1175.109375 + "https://www.sevenbridges.com/y": 1281.9375 }, { "id": "#read_group_platform_unit", "type": "string", "https://www.sevenbridges.com/x": 0, - "https://www.sevenbridges.com/y": 1068.28125 + "https://www.sevenbridges.com/y": 1175.109375 }, { "id": "#read_group_run_date", @@ -57,25 +57,25 @@ "string" ], "https://www.sevenbridges.com/x": 0, - "https://www.sevenbridges.com/y": 961.453125 + "https://www.sevenbridges.com/y": 1068.28125 }, { "id": "#read_group_sample_name", "type": "string", "https://www.sevenbridges.com/x": 0, - "https://www.sevenbridges.com/y": 854.625 + "https://www.sevenbridges.com/y": 961.453125 }, { "id": "#read_group_sequencing_center", "type": "string", "https://www.sevenbridges.com/x": 0, - "https://www.sevenbridges.com/y": 747.796875 + "https://www.sevenbridges.com/y": 854.625 }, { "id": "#read_group_sequencing_platform", "type": "string", "https://www.sevenbridges.com/x": 0, - "https://www.sevenbridges.com/y": 640.96875 + "https://www.sevenbridges.com/y": 747.796875 }, { "id": "#sort_order", @@ -84,7 +84,7 @@ "string" ], "https://www.sevenbridges.com/x": 0, - "https://www.sevenbridges.com/y": 320.484375 + "https://www.sevenbridges.com/y": 427.3125 }, { "id": "#validation_stringency", @@ -98,8 +98,17 @@ { "id": "#reference", "type": "File", + "secondaryFiles": [ + ".amb", + ".fai", + ".sa", + "^.dict", + ".ann", + ".bwt", + ".pac" + ], "https://www.sevenbridges.com/x": 0, - "https://www.sevenbridges.com/y": 427.3125 + "https://www.sevenbridges.com/y": 534.140625 }, { "id": "#reads", @@ -108,7 +117,7 @@ "items": "File" }, "https://www.sevenbridges.com/x": 0, - "https://www.sevenbridges.com/y": 534.140625 + "https://www.sevenbridges.com/y": 640.96875 }, { "id": "#output", @@ -117,7 +126,7 @@ "string" ], "https://www.sevenbridges.com/x": 0, - "https://www.sevenbridges.com/y": 1602.421875 + "https://www.sevenbridges.com/y": 1709.25 }, { "id": "#P", @@ -126,7 +135,7 @@ "boolean" ], "https://www.sevenbridges.com/x": 0, - "https://www.sevenbridges.com/y": 1495.59375 + "https://www.sevenbridges.com/y": 1602.421875 }, { "id": "#M", @@ -135,7 +144,7 @@ "boolean" ], "https://www.sevenbridges.com/x": 0, - "https://www.sevenbridges.com/y": 1709.25 + "https://www.sevenbridges.com/y": 1816.078125 }, { "id": "#T", @@ -144,7 +153,7 @@ "int" ], "https://www.sevenbridges.com/x": 0, - "https://www.sevenbridges.com/y": 213.65625 + "https://www.sevenbridges.com/y": 320.484375 }, { "id": "#Y", @@ -162,7 +171,7 @@ "int" ], "https://www.sevenbridges.com/x": 0, - "https://www.sevenbridges.com/y": 1816.078125 + "https://www.sevenbridges.com/y": 1922.90625 }, { "id": "#bwa_number_of_threads", @@ -171,7 +180,16 @@ "int" ], "https://www.sevenbridges.com/x": 0, - "https://www.sevenbridges.com/y": 1922.90625 + "https://www.sevenbridges.com/y": 2029.734375 + }, + { + "id": "#temporary_directory", + "type": [ + "null", + "string" + ], + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 213.65625 } ], "outputs": [ @@ -181,8 +199,11 @@ "#picard_add_or_replace_read_groups_4_1_8_1/picard_add_or_replace_read_groups_bam" ], "type": "File", - "https://www.sevenbridges.com/x": 1379.46142578125, - "https://www.sevenbridges.com/y": 961.453125 + "secondaryFiles": [ + "^.bai" + ], + "https://www.sevenbridges.com/x": 1389.239501953125, + "https://www.sevenbridges.com/y": 1014.8671875 } ], "steps": [ @@ -240,6 +261,10 @@ { "id": "#picard_add_or_replace_read_groups_4_1_8_1/create_bam_index", "source": "#create_bam_index" + }, + { + "id": "#picard_add_or_replace_read_groups_4_1_8_1/temporary_directory", + "source": "#temporary_directory" } ], "out": [ @@ -250,7 +275,7 @@ "run": "#picard_add_or_replace_read_groups_4.1.8.1.cwl", "label": "picard_add_or_replace_read_groups_4.1.8.1", "https://www.sevenbridges.com/x": 737.3328857421875, - "https://www.sevenbridges.com/y": 870.453125 + "https://www.sevenbridges.com/y": 923.8671875 }, { "id": "#bwa_mem_0_7_17", @@ -302,7 +327,7 @@ "run": "#bwa_mem_0.7.17.cwl", "label": "bwa_mem_0.7.17", "https://www.sevenbridges.com/x": 319.15625, - "https://www.sevenbridges.com/y": 1014.8671875 + "https://www.sevenbridges.com/y": 1121.6953125 } ], "requirements": [], @@ -891,12 +916,12 @@ "requirements": [ { "class": "ResourceRequirement", - "ramMin": "${ if(inputs.memory_per_job && inputs.memory_overhead) { return inputs.memory_per_job + inputs.memory_overhead } else if (inputs.memory_per_job && !inputs.memory_overhead){ return inputs.memory_per_job + 2000 } else if(!inputs.memory_per_job && inputs.memory_overhead){ return 32000 + inputs.memory_overhead } else { return 32000 } }", - "coresMin": "${ if (inputs.number_of_threads) { return inputs.number_of_threads } else { return 16 } }" + "ramMin": 34000, + "coresMin": 16 }, { "class": "DockerRequirement", - "dockerPull": "mskaccess/bwa_mem_0.7.17:0.1.0" + "dockerPull": "ghcr.io/msk-access/bwa:0.7.17" }, { "class": "InlineJavascriptRequirement" @@ -1135,6 +1160,14 @@ "prefix": "--CREATE_INDEX" }, "doc": "Whether to create a BAM index when writing a coordinate-sorted BAM file. Default value:false. This option can be set to 'null' to clear the default value. Possible values:{true, false}" + }, + { + "id": "#picard_add_or_replace_read_groups_4.1.8.1.cwl/temporary_directory", + "type": [ + "null", + "string" + ], + "doc": "Default value: null. This option may be specified 0 or more times." } ], "outputs": [ @@ -1157,13 +1190,14 @@ }, { "position": 0, - "valueFrom": "-XX:-UseGCOverheadLimit", - "shellQuote": false + "prefix": "-Djava.io.tmpdir=", + "separate": false, + "valueFrom": "${\n if(inputs.temporary_directory)\n return inputs.temporary_directory;\n return runtime.tmpdir\n}" }, { "position": 0, - "valueFrom": "-Djava.io.tmpdir=$(runtime.tmpdir)", - "shellQuote": false + "shellQuote": false, + "valueFrom": "-XX:-UseGCOverheadLimit" }, { "position": 0, @@ -1177,7 +1211,7 @@ { "position": 0, "prefix": "--TMP_DIR", - "valueFrom": "$(runtime.tmpdir)" + "valueFrom": "${\n if(inputs.temporary_directory)\n return inputs.temporary_directory;\n return runtime.tmpdir\n}" }, { "position": 0, @@ -1186,14 +1220,17 @@ } ], "requirements": [ + { + "class": "ShellCommandRequirement" + }, { "class": "ResourceRequirement", - "ramMin": 25000, + "ramMin": 17000, "coresMin": 2 }, { "class": "DockerRequirement", - "dockerPull": "broadinstitute/gatk:4.1.8.1" + "dockerPull": "ghcr.io/msk-access/gatk:4.1.8.1" }, { "class": "InlineJavascriptRequirement" @@ -1236,6 +1273,6 @@ ], "cwlVersion": "v1.0", "$schemas": [ - "http://schema.org/version/9.0/schemaorg-current-http.rdf" + "http://schema.org/version/latest/schemaorg-current-http.rdf" ] } \ No newline at end of file diff --git a/bam_qc_stats/README.md b/bam_qc_stats/README.md new file mode 100644 index 0000000..7692954 --- /dev/null +++ b/bam_qc_stats/README.md @@ -0,0 +1,51 @@ +--- +description: Specifications for performing Indel Re-alignment on a BAM file. +--- + +## Indel Re-alignment sub-workflow specification - abra_fx.cwl + +### Tools used: + +- [bedtools genomecov](https://msk-access.gitbook.io/command-line-tools-cwl/bedtools/bedtools_genomecov_v2.28.0_cv2) +- [bedtools merge](https://msk-access.gitbook.io/command-line-tools-cwl/bedtools/bedtools_merge_v2.28.0_cv2) +- [ABRA2](https://msk-access.gitbook.io/command-line-tools-cwl/abra2/abra2_2.22) +- [GATK - FixMateInformation](https://msk-access.gitbook.io/command-line-tools-cwl/picard-tools/picard_fix_mate_information_4.1.8.1) + +### Usage + +```bash +usage: indel_realignment.cwl [-h] [--window_size WINDOW_SIZE] + [--soft_clip_contig SOFT_CLIP_CONTIG] + [--scoring_gap_alignments SCORING_GAP_ALIGNMENTS] + --reference_fasta REFERENCE_FASTA [--no_sort] + [--maximum_mixmatch_rate MAXIMUM_MIXMATCH_RATE] + [--maximum_average_depth MAXIMUM_AVERAGE_DEPTH] + [--ignore_bad_assembly] + [--contig_anchor CONTIG_ANCHOR] + [--consensus_sequence] [--bam_index] + [--number_of_threads NUMBER_OF_THREADS] + [--option_bedgraph] [--no_edge_complex_indel] + [--distance_between_features DISTANCE_BETWEEN_FEATURES] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --window_size WINDOW_SIZE + --soft_clip_contig SOFT_CLIP_CONTIG + --scoring_gap_alignments SCORING_GAP_ALIGNMENTS + --reference_fasta REFERENCE_FASTA + --no_sort + --maximum_mixmatch_rate MAXIMUM_MIXMATCH_RATE + --maximum_average_depth MAXIMUM_AVERAGE_DEPTH + --ignore_bad_assembly + --contig_anchor CONTIG_ANCHOR + --consensus_sequence + --bam_index + --number_of_threads NUMBER_OF_THREADS + --option_bedgraph + --no_edge_complex_indel + --distance_between_features DISTANCE_BETWEEN_FEATURES +``` diff --git a/bam_qc_stats/bam_qc_stats.cwl b/bam_qc_stats/bam_qc_stats.cwl index 8ead8f5..9365b07 100644 --- a/bam_qc_stats/bam_qc_stats.cwl +++ b/bam_qc_stats/bam_qc_stats.cwl @@ -7,7 +7,12 @@ $namespaces: sbg: 'https://www.sevenbridges.com/' inputs: - id: input - type: File + type: + - File + - type: array + items: File + secondaryFiles: + - ^.bai 'sbg:x': 0 'sbg:y': 374.0625 - id: target_intervals @@ -29,46 +34,79 @@ inputs: type: string? 'sbg:x': 0 'sbg:y': 53.4375 + - id: hsmetrics_minimum_mapping_quality + type: int? + label: hsmetrics_minimum_mapping_quality + 'sbg:x': 1 + 'sbg:y': 613 + - id: hsmetrics_minimum_base_quality + type: int? + label: hsmetrics_minimum_base_quality + 'sbg:x': 3 + 'sbg:y': 743 + - id: hsmetrics_coverage_cap + type: int? + label: hsmetrics_coverage_cap + 'sbg:x': 2 + 'sbg:y': 872 outputs: - id: gatk_collect_insert_size_metrics_histogram_pdf outputSource: - >- gatk_collect_insert_size_metrics_4_1_8_0/gatk_collect_insert_size_metrics_histogram_pdf - type: File + type: + - File + - type: array + items: File 'sbg:x': 700.636962890625 'sbg:y': 106.875 - id: gatk_collect_insert_size_metrics_txt outputSource: - >- gatk_collect_insert_size_metrics_4_1_8_0/gatk_collect_insert_size_metrics_txt - type: File + type: + - File + - type: array + items: File 'sbg:x': 700.636962890625 'sbg:y': 0 - id: gatk_collect_hs_metrics_txt outputSource: - gatk_collect_hs_metrics_4_1_8_0/gatk_collect_hs_metrics_txt - type: File + type: + - File + - type: array + items: File 'sbg:x': 700.636962890625 'sbg:y': 213.75 - id: gatk_collect_hs_metrics_per_base_coverage_txt outputSource: - >- gatk_collect_hs_metrics_4_1_8_0/gatk_collect_hs_metrics_per_base_coverage_txt - type: File + type: + - File + - type: array + items: File 'sbg:x': 700.636962890625 'sbg:y': 427.5 - id: gatk_collect_hs_metrics_per_target_coverage_txt outputSource: - >- gatk_collect_hs_metrics_4_1_8_0/gatk_collect_hs_metrics_per_target_coverage_txt - type: File + type: + - File + - type: array + items: File 'sbg:x': 700.636962890625 'sbg:y': 320.625 - id: gatk_collect_alignment_summary_metrics_txt outputSource: - >- gatk_collect_alignment_summary_metrics_4_1_3_0/gatk_collect_alignment_summary_metrics_txt - type: File + type: + - File + - type: array + items: File 'sbg:x': 700.636962890625 'sbg:y': 534.375 steps: @@ -85,8 +123,8 @@ steps: run: >- ../command_line_tools/gatk_collect_alignment_summary_metrics_4.1.8.0/gatk_collect_alignment_summary_metrics_4.1.8.0.cwl label: GATK-CollectAlignmentSummaryMetrics - 'sbg:x': 208.8125 - 'sbg:y': 402.0625 + 'sbg:x': 334.2886657714844 + 'sbg:y': 560.505126953125 - id: gatk_collect_hs_metrics_4_1_8_0 in: - id: input @@ -95,6 +133,12 @@ steps: source: bait_intervals - id: target_intervals source: target_intervals + - id: coverage_cap + source: hsmetrics_coverage_cap + - id: minimum_base_quality + source: hsmetrics_minimum_base_quality + - id: minimum_mapping_quality + source: hsmetrics_minimum_mapping_quality - id: reference source: reference - id: temporary_directory @@ -106,8 +150,8 @@ steps: run: >- ../command_line_tools/gatk_collect_hs_metrics_4.1.8.0/gatk_collect_hs_metrics_4.1.8.0.cwl label: GATK-CollectHsMetrics - 'sbg:x': 208.8125 - 'sbg:y': 253.1875 + 'sbg:x': 327.8453674316406 + 'sbg:y': 372.8453674316406 - id: gatk_collect_insert_size_metrics_4_1_8_0 in: - id: input @@ -122,8 +166,8 @@ steps: run: >- ../command_line_tools/gatk_collect_insert_size_metrics_4.1.8.0/gatk_collect_insert_size_metrics_4.1.8.0.cwl label: GATK-CollectInsertSizeMetrics - 'sbg:x': 208.8125 - 'sbg:y': 111.3125 + 'sbg:x': 335.57733154296875 + 'sbg:y': 194.7628936767578 requirements: [] $schemas: - 'http://schema.org/version/latest/schemaorg-current-http.rdf' diff --git a/bam_qc_stats/bam_qc_stats__packed.cwl b/bam_qc_stats/bam_qc_stats__packed.cwl index eeb070f..186340d 100644 --- a/bam_qc_stats/bam_qc_stats__packed.cwl +++ b/bam_qc_stats/bam_qc_stats__packed.cwl @@ -7,21 +7,30 @@ "inputs": [ { "id": "#input", - "type": "File", - "https://www.sevenbridges.com/x": -496.41986083984375, - "https://www.sevenbridges.com/y": -282.843994140625 + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "secondaryFiles": [ + "^.bai" + ], + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 374.0625 }, { "id": "#target_intervals", "type": "File", - "https://www.sevenbridges.com/x": -490.1000671386719, - "https://www.sevenbridges.com/y": -133.69674682617188 + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 160.3125 }, { "id": "#bait_intervals", "type": "File", - "https://www.sevenbridges.com/x": -485.0442199707031, - "https://www.sevenbridges.com/y": 11.658624649047852 + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 480.9375 }, { "id": "#reference", @@ -30,8 +39,47 @@ "^.fasta.fai", "^.dict" ], - "https://www.sevenbridges.com/x": -504.0036315917969, - "https://www.sevenbridges.com/y": -426.9353942871094 + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 267.1875 + }, + { + "id": "#temporary_directory", + "type": [ + "null", + "string" + ], + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 53.4375 + }, + { + "id": "#hsmetrics_minimum_mapping_quality", + "type": [ + "null", + "int" + ], + "label": "hsmetrics_minimum_mapping_quality", + "https://www.sevenbridges.com/x": 1, + "https://www.sevenbridges.com/y": 613 + }, + { + "id": "#hsmetrics_minimum_base_quality", + "type": [ + "null", + "int" + ], + "label": "hsmetrics_minimum_base_quality", + "https://www.sevenbridges.com/x": 3, + "https://www.sevenbridges.com/y": 743 + }, + { + "id": "#hsmetrics_coverage_cap", + "type": [ + "null", + "int" + ], + "label": "hsmetrics_coverage_cap", + "https://www.sevenbridges.com/x": 2, + "https://www.sevenbridges.com/y": 872 } ], "outputs": [ @@ -40,54 +88,90 @@ "outputSource": [ "#gatk_collect_insert_size_metrics_4_1_8_0/gatk_collect_insert_size_metrics_histogram_pdf" ], - "type": "File", - "https://www.sevenbridges.com/x": 395.9356689453125, - "https://www.sevenbridges.com/y": 146.90231323242188 + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "https://www.sevenbridges.com/x": 700.636962890625, + "https://www.sevenbridges.com/y": 106.875 }, { "id": "#gatk_collect_insert_size_metrics_txt", "outputSource": [ "#gatk_collect_insert_size_metrics_4_1_8_0/gatk_collect_insert_size_metrics_txt" ], - "type": "File", - "https://www.sevenbridges.com/x": 389.6158752441406, - "https://www.sevenbridges.com/y": 17.978422164916992 + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "https://www.sevenbridges.com/x": 700.636962890625, + "https://www.sevenbridges.com/y": 0 }, { "id": "#gatk_collect_hs_metrics_txt", "outputSource": [ "#gatk_collect_hs_metrics_4_1_8_0/gatk_collect_hs_metrics_txt" ], - "type": "File", - "https://www.sevenbridges.com/x": 384.5600280761719, - "https://www.sevenbridges.com/y": -112.20942687988281 + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "https://www.sevenbridges.com/x": 700.636962890625, + "https://www.sevenbridges.com/y": 213.75 }, { "id": "#gatk_collect_hs_metrics_per_base_coverage_txt", "outputSource": [ "#gatk_collect_hs_metrics_4_1_8_0/gatk_collect_hs_metrics_per_base_coverage_txt" ], - "type": "File", - "https://www.sevenbridges.com/x": 378.240234375, - "https://www.sevenbridges.com/y": -244.92520141601562 + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "https://www.sevenbridges.com/x": 700.636962890625, + "https://www.sevenbridges.com/y": 427.5 }, { "id": "#gatk_collect_hs_metrics_per_target_coverage_txt", "outputSource": [ "#gatk_collect_hs_metrics_4_1_8_0/gatk_collect_hs_metrics_per_target_coverage_txt" ], - "type": "File", - "https://www.sevenbridges.com/x": 371.9204406738281, - "https://www.sevenbridges.com/y": -373.8490905761719 + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "https://www.sevenbridges.com/x": 700.636962890625, + "https://www.sevenbridges.com/y": 320.625 }, { "id": "#gatk_collect_alignment_summary_metrics_txt", "outputSource": [ "#gatk_collect_alignment_summary_metrics_4_1_3_0/gatk_collect_alignment_summary_metrics_txt" ], - "type": "File", - "https://www.sevenbridges.com/x": 373.18438720703125, - "https://www.sevenbridges.com/y": -520.4683837890625 + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "https://www.sevenbridges.com/x": 700.636962890625, + "https://www.sevenbridges.com/y": 534.375 } ], "steps": [ @@ -101,6 +185,10 @@ { "id": "#gatk_collect_alignment_summary_metrics_4_1_3_0/reference", "source": "#reference" + }, + { + "id": "#gatk_collect_alignment_summary_metrics_4_1_3_0/temporary_directory", + "source": "#temporary_directory" } ], "out": [ @@ -110,8 +198,8 @@ ], "run": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl", "label": "GATK-CollectAlignmentSummaryMetrics", - "https://www.sevenbridges.com/x": -63.445003509521484, - "https://www.sevenbridges.com/y": -424.1755676269531 + "https://www.sevenbridges.com/x": 334.2886657714844, + "https://www.sevenbridges.com/y": 560.505126953125 }, { "id": "#gatk_collect_hs_metrics_4_1_8_0", @@ -128,9 +216,25 @@ "id": "#gatk_collect_hs_metrics_4_1_8_0/target_intervals", "source": "#target_intervals" }, + { + "id": "#gatk_collect_hs_metrics_4_1_8_0/coverage_cap", + "source": "#hsmetrics_coverage_cap" + }, + { + "id": "#gatk_collect_hs_metrics_4_1_8_0/minimum_base_quality", + "source": "#hsmetrics_minimum_base_quality" + }, + { + "id": "#gatk_collect_hs_metrics_4_1_8_0/minimum_mapping_quality", + "source": "#hsmetrics_minimum_mapping_quality" + }, { "id": "#gatk_collect_hs_metrics_4_1_8_0/reference", "source": "#reference" + }, + { + "id": "#gatk_collect_hs_metrics_4_1_8_0/temporary_directory", + "source": "#temporary_directory" } ], "out": [ @@ -146,8 +250,8 @@ ], "run": "#gatk_collect_hs_metrics_4.1.8.0.cwl", "label": "GATK-CollectHsMetrics", - "https://www.sevenbridges.com/x": -61.321895599365234, - "https://www.sevenbridges.com/y": -194.27346801757812 + "https://www.sevenbridges.com/x": 327.8453674316406, + "https://www.sevenbridges.com/y": 372.8453674316406 }, { "id": "#gatk_collect_insert_size_metrics_4_1_8_0", @@ -159,6 +263,10 @@ { "id": "#gatk_collect_insert_size_metrics_4_1_8_0/histogram_file", "default": "histogram.pdf" + }, + { + "id": "#gatk_collect_insert_size_metrics_4_1_8_0/temporary_directory", + "source": "#temporary_directory" } ], "out": [ @@ -171,52 +279,39 @@ ], "run": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl", "label": "GATK-CollectInsertSizeMetrics", - "https://www.sevenbridges.com/x": -52.185672760009766, - "https://www.sevenbridges.com/y": 62.291622161865234 + "https://www.sevenbridges.com/x": 335.57733154296875, + "https://www.sevenbridges.com/y": 194.7628936767578 } ], "requirements": [], - "doap:release": [ + "https://schema.org/author": [ { - "class": "doap:Version", - "doap:name": "bam_qc_stats", - "doap:revision": 1.0 - } - ], - "http://purl.org/dc/terms/contributor": [ - { - "class": "foaf:Organization", - "foaf:member": [ - { - "class": "foaf:Person", - "foaf:mbox": "mailto:murphyc4@mskcc.org", - "foaf:name": "Charles Murphy" - } - ], - "foaf:name": "Memorial Sloan Kettering Cancer Center" + "class": "https://schema.org/Person", + "https://schema.org/email": "mailto:murphyc4@mskcc.org", + "https://schema.org/identifier": "", + "https://schema.org/name": "Charles Murphy" } ], - "http://purl.org/dc/terms/creator": [ - { - "class": "foaf:Organization", - "foaf:member": [ - { - "class": "foaf:Person", - "foaf:mbox": "mailto:murphyc4@mskcc.org", - "foaf:name": "Charles Murphy" - } - ], - "foaf:name": "Memorial Sloan Kettering Cancer Center" + "https://schema.org/citation": "", + "https://schema.org/codeRepository": "https://github.com/msk-access/uncollapsed_bam_generation", + "https://schema.org/contributor": [ + { + "class": "https://schema.org/Person", + "https://schema.org/email": "mailto:shahr2@mskcc.org", + "https://schema.org/identifier": "https://orcid.org/0000-0001-9042-6213", + "https://schema.org/name": "Ronak Shah" } ], + "https://schema.org/dateCreated": "2020-09-23", + "https://schema.org/license": "https://spdx.org/licenses/Apache-2.0", "$namespaces": { + "s": "https://schema.org/", "sbg": "https://www.sevenbridges.com/" } }, { "class": "CommandLineTool", "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl", - "label": "GATK-CollectAlignmentSummaryMetrics", "baseCommand": [ "gatk", "CollectAlignmentSummaryMetrics" @@ -272,11 +367,11 @@ "position": 0, "prefix": "-R" }, + "doc": "Reference sequence file. Note that while this argument is not required, without it only a small subset of the metrics will be calculated. Note also that if a reference sequence is provided, it must be accompanied by a sequence dictionary. Default value: null.", "secondaryFiles": [ "^.fasta.fai", "^.dict" - ], - "doc": "Reference sequence file. Note that while this argument is not required, without it only a small subset of the metrics will be calculated. Note also that if a reference sequence is provided, it must be accompanied by a sequence dictionary. Default value: null." + ] }, { "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/adaptor_sequence", @@ -351,8 +446,8 @@ "doc": "Validation stringency for all SAM files read by this program. Setting stringency to SILENT can improve performance when processing a BAM file in which variable-length data (read, qualities, tags) do not otherwise need to be decoded. Default value: STRICT. This option can be set to 'null' to clear the default value. Possible values: {STRICT,LENIENT, SILENT}" }, { - "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/assume_sorted", "default": true, + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/assume_sorted", "type": [ "null", "boolean" @@ -422,6 +517,14 @@ "prefix": "--USE_JDK_INFLATER" }, "doc": "Use the JDK Inflater instead of the Intel Inflater for reading compressed input" + }, + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/temporary_directory", + "type": [ + "null", + "string" + ], + "doc": "Default value: null. This option may be specified 0 or more times." } ], "outputs": [ @@ -433,6 +536,7 @@ } } ], + "label": "GATK-CollectAlignmentSummaryMetrics", "arguments": [ { "position": 0, @@ -442,20 +546,10 @@ { "position": 0, "prefix": "--TMP_DIR", - "valueFrom": "." + "valueFrom": "${\n if(inputs.temporary_directory)\n return inputs.temporary_directory;\n return runtime.tmpdir\n}" }, { "position": 0, - "prefix": "--COMPRESSION_LEVEL", - "valueFrom": "2" - }, - { - "position": 0, - "prefix": "--MAX_RECORDS_IN_RAM", - "valueFrom": "50000" - }, - { - "position": 2, "prefix": "-O", "valueFrom": "${\n if(inputs.output_file_name){\n return inputs.output_file_name\n } else {\n return inputs.input.basename.replace(/.bam/, '_alignment_summary_metrics.txt')\n }\n}" } @@ -468,7 +562,7 @@ }, { "class": "DockerRequirement", - "dockerPull": "broadinstitute/gatk:4.1.8.0" + "dockerPull": "ghcr.io/msk-access/gatk:4.1.8.0" }, { "class": "InlineJavascriptRequirement" @@ -511,7 +605,6 @@ { "class": "CommandLineTool", "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl", - "label": "GATK-CollectHsMetrics", "baseCommand": [ "gatk", "CollectHsMetrics" @@ -698,11 +791,11 @@ "position": 0, "prefix": "-R" }, + "doc": "Reference sequence file. Note that while this argument is not required, without it only a small subset of the metrics will be calculated. Note also that if a reference sequence is provided, it must be accompanied by a sequence dictionary. Default value: null.", "secondaryFiles": [ "^.fasta.fai", "^.dict" - ], - "doc": "Reference sequence file. Note that while this argument is not required, without it only a small subset of the metrics will be calculated. Note also that if a reference sequence is provided, it must be accompanied by a sequence dictionary. Default value: null." + ] }, { "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/metrics_acciumulation_level", @@ -774,6 +867,14 @@ "null", "int" ] + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/temporary_directory", + "type": [ + "null", + "string" + ], + "doc": "Default value: null. This option may be specified 0 or more times." } ], "outputs": [ @@ -799,6 +900,7 @@ } } ], + "label": "GATK-CollectHsMetrics", "arguments": [ { "position": 0, @@ -808,30 +910,20 @@ { "position": 0, "prefix": "--TMP_DIR", - "valueFrom": "." + "valueFrom": "${\n if(inputs.temporary_directory)\n return inputs.temporary_directory;\n return runtime.tmpdir\n}" }, { "position": 0, - "prefix": "--COMPRESSION_LEVEL", - "valueFrom": "2" - }, - { - "position": 0, - "prefix": "--MAX_RECORDS_IN_RAM", - "valueFrom": "50000" - }, - { - "position": 2, "prefix": "-O", "valueFrom": "${\n if(inputs.output_file_name){\n return inputs.output_file_name\n } else {\n return inputs.input.basename.replace(/.bam/, '_hs_metrics.txt')\n }\n}" }, { - "position": 2, + "position": 0, "prefix": "--PER_TARGET_COVERAGE", "valueFrom": "${\n if(inputs.per_target_coverage){\n return inputs.per_target_coverage\n } else {\n return inputs.input.basename.replace(/.bam/, '_per_target_coverage.txt')\n }\n}" }, { - "position": 2, + "position": 0, "prefix": "--PER_BASE_COVERAGE", "valueFrom": "${\n if(inputs.per_base_coverage){\n return inputs.per_base_coverage\n } else {\n return inputs.input.basename.replace(/.bam/, '_per_base_coverage.txt')\n }\n}" } @@ -844,7 +936,7 @@ }, { "class": "DockerRequirement", - "dockerPull": "broadinstitute/gatk:4.1.8.0" + "dockerPull": "ghcr.io/msk-access/gatk:4.1.8.0" }, { "class": "InlineJavascriptRequirement" @@ -887,7 +979,6 @@ { "class": "CommandLineTool", "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl", - "label": "GATK-CollectInsertSizeMetrics", "baseCommand": [ "gatk", "CollectInsertSizeMetrics" @@ -1014,8 +1105,8 @@ "doc": "Validation stringency for all SAM files read by this program. Setting stringency to SILENT can improve performance when processing a BAM file in which variable-length data (read, qualities, tags) do not otherwise need to be decoded. Default value: STRICT. This option can be set to 'null' to clear the default value. Possible values: {STRICT,LENIENT, SILENT}" }, { - "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/assume_sorted", "default": true, + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/assume_sorted", "type": [ "null", "boolean" @@ -1085,6 +1176,14 @@ "prefix": "--USE_JDK_INFLATER" }, "doc": "Use the JDK Inflater instead of the Intel Inflater for reading compressed input" + }, + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/temporary_directory", + "type": [ + "null", + "string" + ], + "doc": "Default value: null. This option may be specified 0 or more times." } ], "outputs": [ @@ -1103,6 +1202,7 @@ } } ], + "label": "GATK-CollectInsertSizeMetrics", "arguments": [ { "position": 0, @@ -1112,17 +1212,7 @@ { "position": 0, "prefix": "--TMP_DIR", - "valueFrom": "." - }, - { - "position": 0, - "prefix": "--COMPRESSION_LEVEL", - "valueFrom": "2" - }, - { - "position": 0, - "prefix": "--MAX_RECORDS_IN_RAM", - "valueFrom": "50000" + "valueFrom": "${\n if(inputs.temporary_directory)\n return inputs.temporary_directory;\n return runtime.tmpdir\n}" }, { "position": 2, @@ -1143,7 +1233,7 @@ }, { "class": "DockerRequirement", - "dockerPull": "broadinstitute/gatk:4.1.8.0" + "dockerPull": "ghcr.io/msk-access/gatk:4.1.8.0" }, { "class": "InlineJavascriptRequirement" @@ -1184,5 +1274,8 @@ ] } ], - "cwlVersion": "v1.0" + "cwlVersion": "v1.0", + "$schemas": [ + "http://schema.org/version/latest/schemaorg-current-http.rdf" + ] } \ No newline at end of file diff --git a/base_quality_recalibration/base_quality_recalibration.cwl b/base_quality_recalibration/base_quality_recalibration.cwl index da38e0b..36ca114 100644 --- a/base_quality_recalibration/base_quality_recalibration.cwl +++ b/base_quality_recalibration/base_quality_recalibration.cwl @@ -24,16 +24,12 @@ inputs: - 'null' - type: array items: string - inputBinding: - prefix: '--read-filter' 'sbg:x': 0 'sbg:y': 213.375 - id: known_sites type: type: array items: File - inputBinding: - prefix: '--known-sites' secondaryFiles: - .idx 'sbg:x': 0 @@ -51,8 +47,6 @@ inputs: - 'null' - type: array items: string - inputBinding: - prefix: '--disable-read-filter' 'sbg:x': 0 'sbg:y': 640.0625 - id: lenient diff --git a/base_quality_recalibration/base_quality_recalibration__packed.cwl b/base_quality_recalibration/base_quality_recalibration__packed.cwl index bdfb325..dcac958 100644 --- a/base_quality_recalibration/base_quality_recalibration__packed.cwl +++ b/base_quality_recalibration/base_quality_recalibration__packed.cwl @@ -12,7 +12,7 @@ "^.bai" ], "https://www.sevenbridges.com/x": 0, - "https://www.sevenbridges.com/y": 427.4375 + "https://www.sevenbridges.com/y": 533.390625 }, { "id": "#reference", @@ -22,7 +22,7 @@ "^.dict" ], "https://www.sevenbridges.com/x": 0, - "https://www.sevenbridges.com/y": 0 + "https://www.sevenbridges.com/y": 106.703125 }, { "id": "#read_filter", @@ -30,26 +30,23 @@ "null", { "type": "array", - "items": "string", - "inputBinding": { - "prefix": "--read-filter" - } + "items": "string" } ], "https://www.sevenbridges.com/x": 0, - "https://www.sevenbridges.com/y": 106.859375 + "https://www.sevenbridges.com/y": 213.375 }, { "id": "#known_sites", "type": { "type": "array", - "items": "File", - "inputBinding": { - "prefix": "--known-sites" - } + "items": "File" }, + "secondaryFiles": [ + ".idx" + ], "https://www.sevenbridges.com/x": 0, - "https://www.sevenbridges.com/y": 320.578125 + "https://www.sevenbridges.com/y": 426.71875 }, { "id": "#base_recalibrator_output_file_name", @@ -58,7 +55,7 @@ "string" ], "https://www.sevenbridges.com/x": 0, - "https://www.sevenbridges.com/y": 641.15625 + "https://www.sevenbridges.com/y": 746.734375 }, { "id": "#add_output_sam_program_record", @@ -67,7 +64,7 @@ "boolean" ], "https://www.sevenbridges.com/x": 0, - "https://www.sevenbridges.com/y": 961.734375 + "https://www.sevenbridges.com/y": 853.4375 }, { "id": "#disable_read_filter", @@ -75,14 +72,11 @@ "null", { "type": "array", - "items": "string", - "inputBinding": { - "prefix": "--disable-read-filter" - } + "items": "string" } ], "https://www.sevenbridges.com/x": 0, - "https://www.sevenbridges.com/y": 534.296875 + "https://www.sevenbridges.com/y": 640.0625 }, { "id": "#lenient", @@ -91,7 +85,7 @@ "boolean" ], "https://www.sevenbridges.com/x": 0, - "https://www.sevenbridges.com/y": 213.71875 + "https://www.sevenbridges.com/y": 320.046875 }, { "id": "#apply_bqsr_create_output_bam_index", @@ -99,8 +93,8 @@ "null", "boolean" ], - "https://www.sevenbridges.com/x": 0, - "https://www.sevenbridges.com/y": 854.875 + "https://www.sevenbridges.com/x": 337.34375, + "https://www.sevenbridges.com/y": 533.453125 }, { "id": "#apply_bqsr_output_file_name", @@ -108,8 +102,17 @@ "null", "string" ], + "https://www.sevenbridges.com/x": 337.34375, + "https://www.sevenbridges.com/y": 426.71875 + }, + { + "id": "#temporary_directory", + "type": [ + "null", + "string" + ], "https://www.sevenbridges.com/x": 0, - "https://www.sevenbridges.com/y": 748.015625 + "https://www.sevenbridges.com/y": 0 } ], "outputs": [ @@ -118,12 +121,12 @@ "outputSource": [ "#gatk_apply_bqsr_4_1_8_1/gatk_apply_bqsr_bam" ], - "type": [ - "null", - "File" + "type": "File", + "secondaryFiles": [ + "^.bai" ], - "https://www.sevenbridges.com/x": 1060.585205078125, - "https://www.sevenbridges.com/y": 772.228271484375 + "https://www.sevenbridges.com/x": 1269.836181640625, + "https://www.sevenbridges.com/y": 426.71875 } ], "steps": [ @@ -167,6 +170,10 @@ "source": [ "#read_filter" ] + }, + { + "id": "#gatk_base_recalibrator_4_1_8_1/temporary_directory", + "source": "#temporary_directory" } ], "out": [ @@ -176,8 +183,8 @@ ], "run": "#gatk_base_recalibrator_4.1.8.1.cwl", "label": "gatk_base_recalibrator_4.1.8.1", - "https://www.sevenbridges.com/x": 356.59375, - "https://www.sevenbridges.com/y": 350.4375 + "https://www.sevenbridges.com/x": 337.34375, + "https://www.sevenbridges.com/y": 263.8515625 }, { "id": "#gatk_apply_bqsr_4_1_8_1", @@ -217,6 +224,10 @@ "source": [ "#read_filter" ] + }, + { + "id": "#gatk_apply_bqsr_4_1_8_1/temporary_directory", + "source": "#temporary_directory" } ], "out": [ @@ -226,8 +237,8 @@ ], "run": "#gatk_apply_bqsr_4.1.8.1.cwl", "label": "gatk_apply_bqsr_4.1.8.1", - "https://www.sevenbridges.com/x": 589.6504516601562, - "https://www.sevenbridges.com/y": 741.6892700195312 + "https://www.sevenbridges.com/x": 837.3018188476562, + "https://www.sevenbridges.com/y": 370.5859375 } ], "requirements": [], @@ -260,7 +271,8 @@ "class": "CommandLineTool", "id": "#gatk_apply_bqsr_4.1.8.1.cwl", "baseCommand": [ - "gatk" + "gatk", + "ApplyBQSR" ], "inputs": [ { @@ -733,15 +745,20 @@ "null", "int" ] + }, + { + "id": "#gatk_apply_bqsr_4.1.8.1.cwl/temporary_directory", + "type": [ + "null", + "string" + ], + "doc": "Default value: null. This option may be specified 0 or more times." } ], "outputs": [ { "id": "#gatk_apply_bqsr_4.1.8.1.cwl/gatk_apply_bqsr_bam", - "type": [ - "null", - "File" - ], + "type": "File", "outputBinding": { "glob": "${\n if(inputs.output_file_name){\n return inputs.output_file_name\n } else {\n return inputs.input.basename.replace(/.bam/, '_bqsr.bam')\n }\n}" }, @@ -755,34 +772,28 @@ { "position": 0, "prefix": "--java-options", - "valueFrom": "${\n if(inputs.memory_per_job && inputs.memory_overhead){\n if(inputs.memory_per_job % 1000 == 0){\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n } else {\n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\"\n }\n } else if (inputs.memory_per_job && !inputs.memory_overhead){\n if(inputs.memory_per_job % 1000 == 0) {\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n } else {\n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\"\n }\n } else if(!inputs.memory_per_job && inputs.memory_overhead){\n return \"-Xmx4G\"\n } else {\n return \"-Xmx4G\"\n }\n}" - }, - { - "position": 2, - "prefix": "--output", - "valueFrom": "${\n if(inputs.output_file_name){\n return inputs.output_file_name\n } else {\n return inputs.input.basename.replace(/.bam/, '_bqsr.bam')\n }\n}" + "valueFrom": "${\n if(inputs.memory_per_job && inputs.memory_overhead){\n if(inputs.memory_per_job % 1000 == 0){\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n } else {\n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\"\n }\n } else if (inputs.memory_per_job && !inputs.memory_overhead){\n if(inputs.memory_per_job % 1000 == 0) {\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n } else {\n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\"\n }\n } else if(!inputs.memory_per_job && inputs.memory_overhead){\n return \"-Xmx12G\"\n } else {\n return \"-Xmx12G\"\n }\n}" }, { "position": 2, "prefix": "--tmp-dir", - "valueFrom": "$(runtime.tmpdir)" + "valueFrom": "${\n if(inputs.temporary_directory)\n return inputs.temporary_directory;\n return runtime.tmpdir\n}" }, { - "position": 1, - "prefix": "", - "separate": false, - "valueFrom": "ApplyBQSR" + "position": 2, + "prefix": "--output", + "valueFrom": "${\n if(inputs.output_file_name){\n return inputs.output_file_name\n } else {\n return inputs.input.basename.replace(/.bam/, '_bqsr.bam')\n }\n}" } ], "requirements": [ { "class": "ResourceRequirement", - "ramMin": 10000, - "coresMin": 8 + "ramMin": 16000, + "coresMin": 4 }, { "class": "DockerRequirement", - "dockerPull": "broadinstitute/gatk:4.1.8.1" + "dockerPull": "ghcr.io/msk-access/gatk:4.1.8.1" }, { "class": "InlineJavascriptRequirement" @@ -837,7 +848,8 @@ "class": "CommandLineTool", "id": "#gatk_base_recalibrator_4.1.8.1.cwl", "baseCommand": [ - "gatk" + "gatk", + "BaseRecalibrator" ], "inputs": [ { @@ -866,7 +878,7 @@ }, "doc": "One or more databases of known polymorphic sites used to exclude regions around known polymorphisms from analysis", "secondaryFiles": [ - "^.idx" + ".idx" ] }, { @@ -1375,6 +1387,14 @@ "null", "int" ] + }, + { + "id": "#gatk_base_recalibrator_4.1.8.1.cwl/temporary_directory", + "type": [ + "null", + "string" + ], + "doc": "Default value: null. This option may be specified 0 or more times." } ], "outputs": [ @@ -1391,28 +1411,17 @@ { "position": 0, "prefix": "--java-options", - "valueFrom": "${\n if(inputs.memory_per_job && inputs.memory_overhead){\n if(inputs.memory_per_job % 1000 == 0){\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n } else {\n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\"\n }\n } else if (inputs.memory_per_job && !inputs.memory_overhead){\n if(inputs.memory_per_job % 1000 == 0) {\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n } else {\n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\"\n }\n } else if(!inputs.memory_per_job && inputs.memory_overhead){\n return \"-Xmx4G\"\n } else {\n return \"-Xmx4G\"\n }\n}" - }, - { - "position": 1, - "prefix": "", - "separate": false, - "valueFrom": "BaseRecalibrator" + "valueFrom": "${\n if(inputs.memory_per_job && inputs.memory_overhead){\n if(inputs.memory_per_job % 1000 == 0){\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n } else {\n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\"\n }\n } else if (inputs.memory_per_job && !inputs.memory_overhead){\n if(inputs.memory_per_job % 1000 == 0) {\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n } else {\n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\"\n }\n } else if(!inputs.memory_per_job && inputs.memory_overhead){\n return \"-Xmx12G\"\n } else {\n return \"-Xmx12G\"\n }\n}" }, { "position": 2, "prefix": "--tmp-dir", - "valueFrom": "$(runtime.tmpdir)" + "valueFrom": "${\n if(inputs.temporary_directory)\n return inputs.temporary_directory;\n return runtime.tmpdir\n}" }, { "position": 2, "prefix": "--output", "valueFrom": "${\n if(inputs.output_file_name){\n return inputs.output_file_name\n } else {\n return inputs.input.basename.replace(/.bam/, '_bqsr.table')\n }\n}" - }, - { - "position": 2, - "prefix": "--verbosity", - "valueFrom": "INFO" } ], "requirements": [ @@ -1423,7 +1432,7 @@ }, { "class": "DockerRequirement", - "dockerPull": "broadinstitute/gatk:4.1.8.1" + "dockerPull": "ghcr.io/msk-access/gatk:4.1.8.1" }, { "class": "InlineJavascriptRequirement" @@ -1470,6 +1479,6 @@ ], "cwlVersion": "v1.0", "$schemas": [ - "http://schema.org/version/9.0/schemaorg-current-http.rdf" + "http://schema.org/version/latest/schemaorg-current-http.rdf" ] } \ No newline at end of file diff --git a/command_line_tools b/command_line_tools index 28551d5..0a0e020 160000 --- a/command_line_tools +++ b/command_line_tools @@ -1 +1 @@ -Subproject commit 28551d5bfbbcd4cd2e6755395a01c7494fa244a1 +Subproject commit 0a0e020ab646d049d424df432058b43a8b9bc927 diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index 0b9bd44..56bdb3d 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -3,9 +3,14 @@ - [MSK-ACCESS sub-workflows](../README.md) - [Installation and Usage](install.md) - [Alignment sub-workflow](../alignment/README.md) - - [INDEL re-alignment sub-workflow](../indel_realignment/README.md) - [Base Quality Score Recalibration sub-workflow](../base_quality_recalibration/README.md) + - [Collapsed BAM QC sub-workflow](../qc_collapsed_bam/README.md) + - [Duplex BAM QC sub-workflow](../qc_duplex_bam/README.md) - [Fgbio Separate Bams](../fgbio_separate_bams/README.md) + - [GetBaseCountsMultiSample Genotyping](../gbcms_genotyping/README.md) + - [INDEL re-alignment sub-workflow](../indel_realignment/README.md) + - [Simplex BAM QC sub-workflow](../qc_simplex_bam/README.md) + - [Uncollapsed BAM QC sub-workflow](../qc_uncollapsed_bam/README.md) ## Github Specifications diff --git a/fgbio_separate_bams/fgbio_separate_bams__packed.cwl b/fgbio_separate_bams/fgbio_separate_bams__packed.cwl index f45f199..98b6f08 100644 --- a/fgbio_separate_bams/fgbio_separate_bams__packed.cwl +++ b/fgbio_separate_bams/fgbio_separate_bams__packed.cwl @@ -34,7 +34,7 @@ "id": "#fgbio_filter_consensus_reads_1.2.0.cwl/input", "type": "File", "inputBinding": { - "position": 0, + "position": 2, "prefix": "--input", "shellQuote": false }, @@ -52,12 +52,12 @@ "id": "#fgbio_filter_consensus_reads_1.2.0.cwl/reference_fasta", "type": "File", "inputBinding": { - "position": 0, + "position": 2, "prefix": "--ref" }, "doc": "Reference fasta file.", "secondaryFiles": [ - "^.fai", + ".fai", "^.dict" ] }, @@ -68,7 +68,7 @@ "boolean" ], "inputBinding": { - "position": 0, + "position": 2, "prefix": "--reverse-per-base-tags" }, "doc": "Reverse [complement] per base tags on reverse strand reads." @@ -83,9 +83,10 @@ } ], "inputBinding": { - "position": 0, + "position": 2, "prefix": "--min-reads", - "itemSeparator": " " + "itemSeparator": " ", + "shellQuote": false }, "doc": "The minimum number of reads supporting a consensus base/read. (Max 3 values)" }, @@ -99,7 +100,7 @@ } ], "inputBinding": { - "position": 0, + "position": 2, "prefix": "--max-read-error-rate", "itemSeparator": " " }, @@ -115,7 +116,7 @@ } ], "inputBinding": { - "position": 0, + "position": 2, "prefix": "--max-base-error-rate", "itemSeparator": " " }, @@ -123,12 +124,9 @@ }, { "id": "#fgbio_filter_consensus_reads_1.2.0.cwl/min_base_quality", - "type": [ - "null", - "int" - ], + "type": "int", "inputBinding": { - "position": 0, + "position": 2, "prefix": "--min-base-quality" }, "doc": "Mask (make N) consensus bases with quality less than this threshold." @@ -140,7 +138,7 @@ "float" ], "inputBinding": { - "position": 0, + "position": 2, "prefix": "--max-no-call-fraction" }, "doc": "Maximum fraction of no-calls in the read after filtering" @@ -152,7 +150,7 @@ "int" ], "inputBinding": { - "position": 0, + "position": 2, "prefix": "--min-mean-base-quality" }, "doc": "The minimum mean base quality across the consensus read" @@ -164,10 +162,31 @@ "boolean" ], "inputBinding": { - "position": 0, + "position": 2, "prefix": "--require-single-strand-agreement" }, "doc": "Mask (make N) consensus bases where the AB and BA consensus reads disagree (for duplex-sequencing only)." + }, + { + "id": "#fgbio_filter_consensus_reads_1.2.0.cwl/temporary_directory", + "type": [ + "null", + "string" + ], + "doc": "Default value: null." + }, + { + "id": "#fgbio_filter_consensus_reads_1.2.0.cwl/async_io", + "type": [ + "null", + "string" + ], + "inputBinding": { + "position": 0, + "separate": false, + "prefix": "--async-io=" + }, + "doc": "'Use asynchronous I/O where possible, e.g. for SAM and BAM files [=true|false].'" } ], "outputs": [ @@ -187,35 +206,27 @@ "arguments": [ { "position": 0, - "prefix": "", - "valueFrom": "${\n if(inputs.memory_per_job && inputs.memory_overhead) {\n if(inputs.memory_per_job % 1000 == 0) {\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\"\n }\n }\n else if (inputs.memory_per_job && !inputs.memory_overhead){\n if(inputs.memory_per_job % 1000 == 0) {\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\"\n }\n }\n else if(!inputs.memory_per_job && inputs.memory_overhead){\n return \"-Xmx10G\"\n }\n else {\n return \"-Xmx10G\"\n }\n}" + "valueFrom": "${\n if(inputs.memory_per_job && inputs.memory_overhead) {\n if(inputs.memory_per_job % 1000 == 0) {\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\"\n }\n }\n else if (inputs.memory_per_job && !inputs.memory_overhead){\n if(inputs.memory_per_job % 1000 == 0) {\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\"\n }\n }\n else if(!inputs.memory_per_job && inputs.memory_overhead){\n return \"-Xmx12G\"\n }\n else {\n return \"-Xmx12G\"\n }\n}" }, { "position": 0, "valueFrom": "-XX:-UseGCOverheadLimit" }, { - "position": 0, - "prefix": "-Djava.io.tmpdir=", - "separate": false, - "shellQuote": false, - "valueFrom": "${ return runtime.tmpdir}" + "position": 1, + "valueFrom": "FilterConsensusReads" }, { "position": 0, - "prefix": "", - "valueFrom": "FilterConsensusReads" + "prefix": "--tmp-dir=", + "separate": false, + "valueFrom": "${\n if(inputs.temporary_directory)\n return inputs.temporary_directory;\n return runtime.tmpdir\n}" }, { - "position": 0, + "position": 2, "prefix": "--output", "shellQuote": false, "valueFrom": "${\n if(inputs.output_file_name)\n return inputs.output_file_name;\n return inputs.input.basename.replace(/.bam/,'_filtered.bam');\n}" - }, - { - "position": 0, - "prefix": "--threads", - "valueFrom": "${\n if(inputs.number_of_threads)\n return inputs.number_of_threads\n return runtime.cores\n}" } ], "requirements": [ @@ -224,12 +235,12 @@ }, { "class": "ResourceRequirement", - "ramMin": 4000, + "ramMin": 16000, "coresMin": 2 }, { "class": "DockerRequirement", - "dockerPull": "quay.io/biocontainers/fgbio:1.2.0--0" + "dockerPull": "ghcr.io/msk-access/fgbio:1.2.0" }, { "class": "InlineJavascriptRequirement" @@ -284,6 +295,7 @@ "id": "#fgbio_postprocessing_simplex_filter_0.1.8.cwl/input_bam", "type": "File", "inputBinding": { + "position": 0, "prefix": "--input_bam" }, "doc": "Input file (bam or sam). Required.", @@ -298,6 +310,7 @@ "string" ], "inputBinding": { + "position": 0, "prefix": "--output_filename" }, "doc": "Output file (bam or sam)." @@ -309,6 +322,7 @@ "int" ], "inputBinding": { + "position": 0, "prefix": "--min_simplex_reads" }, "doc": "Minimum number of simplex reads to pass filter for consensus reads" @@ -330,12 +344,12 @@ "requirements": [ { "class": "ResourceRequirement", - "ramMin": 2000, - "coresMin": 1 + "ramMin": 16000, + "coresMin": 2 }, { "class": "DockerRequirement", - "dockerPull": "mskaccess/fgbio_postprocessing:0.2.0" + "dockerPull": "ghcr.io/msk-access/fgbio_postprocessing:0.2.1" }, { "class": "InlineJavascriptRequirement" @@ -378,7 +392,6 @@ { "class": "CommandLineTool", "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl", - "label": "GATK-CollectAlignmentSummaryMetrics", "baseCommand": [ "gatk", "CollectAlignmentSummaryMetrics" @@ -434,11 +447,11 @@ "position": 0, "prefix": "-R" }, + "doc": "Reference sequence file. Note that while this argument is not required, without it only a small subset of the metrics will be calculated. Note also that if a reference sequence is provided, it must be accompanied by a sequence dictionary. Default value: null.", "secondaryFiles": [ "^.fasta.fai", "^.dict" - ], - "doc": "Reference sequence file. Note that while this argument is not required, without it only a small subset of the metrics will be calculated. Note also that if a reference sequence is provided, it must be accompanied by a sequence dictionary. Default value: null." + ] }, { "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/adaptor_sequence", @@ -513,8 +526,8 @@ "doc": "Validation stringency for all SAM files read by this program. Setting stringency to SILENT can improve performance when processing a BAM file in which variable-length data (read, qualities, tags) do not otherwise need to be decoded. Default value: STRICT. This option can be set to 'null' to clear the default value. Possible values: {STRICT,LENIENT, SILENT}" }, { - "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/assume_sorted", "default": true, + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/assume_sorted", "type": [ "null", "boolean" @@ -584,6 +597,14 @@ "prefix": "--USE_JDK_INFLATER" }, "doc": "Use the JDK Inflater instead of the Intel Inflater for reading compressed input" + }, + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/temporary_directory", + "type": [ + "null", + "string" + ], + "doc": "Default value: null. This option may be specified 0 or more times." } ], "outputs": [ @@ -595,6 +616,7 @@ } } ], + "label": "GATK-CollectAlignmentSummaryMetrics", "arguments": [ { "position": 0, @@ -604,20 +626,10 @@ { "position": 0, "prefix": "--TMP_DIR", - "valueFrom": "." + "valueFrom": "${\n if(inputs.temporary_directory)\n return inputs.temporary_directory;\n return runtime.tmpdir\n}" }, { "position": 0, - "prefix": "--COMPRESSION_LEVEL", - "valueFrom": "2" - }, - { - "position": 0, - "prefix": "--MAX_RECORDS_IN_RAM", - "valueFrom": "50000" - }, - { - "position": 2, "prefix": "-O", "valueFrom": "${\n if(inputs.output_file_name){\n return inputs.output_file_name\n } else {\n return inputs.input.basename.replace(/.bam/, '_alignment_summary_metrics.txt')\n }\n}" } @@ -630,7 +642,7 @@ }, { "class": "DockerRequirement", - "dockerPull": "broadinstitute/gatk:4.1.8.0" + "dockerPull": "ghcr.io/msk-access/gatk:4.1.8.0" }, { "class": "InlineJavascriptRequirement" @@ -679,11 +691,11 @@ "id": "#reference_fasta", "type": "File", "secondaryFiles": [ - "^.fai", + ".fai", "^.dict" ], "https://www.sevenbridges.com/x": 0, - "https://www.sevenbridges.com/y": 747.109375 + "https://www.sevenbridges.com/y": 853.8671875 }, { "id": "#input", @@ -692,7 +704,7 @@ "^.bai" ], "https://www.sevenbridges.com/x": 0, - "https://www.sevenbridges.com/y": 3094.265625 + "https://www.sevenbridges.com/y": 3201.7734375 }, { "id": "#reverse_per_base_tags_simplex_duplex", @@ -701,7 +713,7 @@ "boolean" ], "https://www.sevenbridges.com/x": 0, - "https://www.sevenbridges.com/y": 320.203125 + "https://www.sevenbridges.com/y": 426.9375 }, { "id": "#require_single_strand_agreement_simplex_duplex", @@ -710,7 +722,7 @@ "boolean" ], "https://www.sevenbridges.com/x": 0, - "https://www.sevenbridges.com/y": 533.671875 + "https://www.sevenbridges.com/y": 640.40625 }, { "id": "#output_file_name_simplex_duplex", @@ -719,7 +731,7 @@ "string" ], "https://www.sevenbridges.com/x": 0, - "https://www.sevenbridges.com/y": 853.78125 + "https://www.sevenbridges.com/y": 960.5859375 }, { "id": "#number_of_threads", @@ -728,7 +740,7 @@ "int" ], "https://www.sevenbridges.com/x": 0, - "https://www.sevenbridges.com/y": 1387.140625 + "https://www.sevenbridges.com/y": 1494.1796875 }, { "id": "#min_reads_simplex_duplex", @@ -737,7 +749,7 @@ "items": "int" }, "https://www.sevenbridges.com/x": 0, - "https://www.sevenbridges.com/y": 1600.484375 + "https://www.sevenbridges.com/y": 1707.6171875 }, { "id": "#min_mean_base_quality_simplex_duplex", @@ -746,7 +758,7 @@ "int" ], "https://www.sevenbridges.com/x": 0, - "https://www.sevenbridges.com/y": 1813.859375 + "https://www.sevenbridges.com/y": 1921.0625 }, { "id": "#max_base_error_rate_simplex_duplex", @@ -758,7 +770,7 @@ } ], "https://www.sevenbridges.com/x": 0, - "https://www.sevenbridges.com/y": 2880.921875 + "https://www.sevenbridges.com/y": 2988.3359375 }, { "id": "#max_no_call_fraction_simplex_duplex", @@ -767,7 +779,7 @@ "float" ], "https://www.sevenbridges.com/x": 0, - "https://www.sevenbridges.com/y": 2667.578125 + "https://www.sevenbridges.com/y": 2774.8984375 }, { "id": "#min_base_quality_simplex_duplex", @@ -776,7 +788,7 @@ "int" ], "https://www.sevenbridges.com/x": 0, - "https://www.sevenbridges.com/y": 2027.328125 + "https://www.sevenbridges.com/y": 2134.53125 }, { "id": "#memory_per_job", @@ -785,7 +797,7 @@ "int" ], "https://www.sevenbridges.com/x": 0, - "https://www.sevenbridges.com/y": 2240.796875 + "https://www.sevenbridges.com/y": 2348 }, { "id": "#memory_overhead", @@ -794,7 +806,7 @@ "int" ], "https://www.sevenbridges.com/x": 0, - "https://www.sevenbridges.com/y": 2347.53125 + "https://www.sevenbridges.com/y": 2454.734375 }, { "id": "#max_read_error_rate_simplex_duplex", @@ -806,7 +818,7 @@ } ], "https://www.sevenbridges.com/x": 0, - "https://www.sevenbridges.com/y": 2454.234375 + "https://www.sevenbridges.com/y": 2561.4609375 }, { "id": "#reverse_per_base_tags_duplex", @@ -815,7 +827,7 @@ "boolean" ], "https://www.sevenbridges.com/x": 0, - "https://www.sevenbridges.com/y": 426.9375 + "https://www.sevenbridges.com/y": 533.671875 }, { "id": "#require_single_strand_agreement_duplex", @@ -824,7 +836,7 @@ "boolean" ], "https://www.sevenbridges.com/x": 0, - "https://www.sevenbridges.com/y": 640.40625 + "https://www.sevenbridges.com/y": 747.140625 }, { "id": "#output_file_name_duplex", @@ -833,7 +845,7 @@ "string" ], "https://www.sevenbridges.com/x": 0, - "https://www.sevenbridges.com/y": 1280.46875 + "https://www.sevenbridges.com/y": 1387.4609375 }, { "id": "#min_reads_duplex", @@ -842,7 +854,7 @@ "items": "int" }, "https://www.sevenbridges.com/x": 0, - "https://www.sevenbridges.com/y": 1707.15625 + "https://www.sevenbridges.com/y": 1814.3359375 }, { "id": "#min_mean_base_quality_duplex", @@ -851,7 +863,7 @@ "int" ], "https://www.sevenbridges.com/x": 0, - "https://www.sevenbridges.com/y": 1920.59375 + "https://www.sevenbridges.com/y": 2027.796875 }, { "id": "#min_base_quality_duplex", @@ -860,7 +872,7 @@ "int" ], "https://www.sevenbridges.com/x": 0, - "https://www.sevenbridges.com/y": 2134.0625 + "https://www.sevenbridges.com/y": 2241.265625 }, { "id": "#max_read_error_rate_duplex", @@ -872,7 +884,7 @@ } ], "https://www.sevenbridges.com/x": 0, - "https://www.sevenbridges.com/y": 2560.90625 + "https://www.sevenbridges.com/y": 2668.1796875 }, { "id": "#max_no_call_fraction_duplex", @@ -881,7 +893,7 @@ "float" ], "https://www.sevenbridges.com/x": 0, - "https://www.sevenbridges.com/y": 2774.25 + "https://www.sevenbridges.com/y": 2881.6171875 }, { "id": "#max_base_error_rate_duplex", @@ -893,7 +905,7 @@ } ], "https://www.sevenbridges.com/x": 0, - "https://www.sevenbridges.com/y": 2987.59375 + "https://www.sevenbridges.com/y": 3095.0546875 }, { "id": "#validation_stringency", @@ -929,7 +941,7 @@ "string" ], "https://www.sevenbridges.com/x": 0, - "https://www.sevenbridges.com/y": 1173.796875 + "https://www.sevenbridges.com/y": 1280.7421875 }, { "id": "#create_index", @@ -937,8 +949,8 @@ "null", "boolean" ], - "https://www.sevenbridges.com/x": 454.71875, - "https://www.sevenbridges.com/y": 1684.515625 + "https://www.sevenbridges.com/x": 454.671875, + "https://www.sevenbridges.com/y": 1805.625 }, { "id": "#assume_sorted", @@ -946,8 +958,8 @@ "null", "boolean" ], - "https://www.sevenbridges.com/x": 454.71875, - "https://www.sevenbridges.com/y": 1791.1875 + "https://www.sevenbridges.com/x": 454.671875, + "https://www.sevenbridges.com/y": 1912.34375 }, { "id": "#output_file_name_simplex_aln_metrics", @@ -956,7 +968,7 @@ "string" ], "https://www.sevenbridges.com/x": 0, - "https://www.sevenbridges.com/y": 960.453125 + "https://www.sevenbridges.com/y": 1067.3046875 }, { "id": "#output_file_name_simpex", @@ -965,7 +977,7 @@ "string" ], "https://www.sevenbridges.com/x": 0, - "https://www.sevenbridges.com/y": 1067.125 + "https://www.sevenbridges.com/y": 1174.0234375 }, { "id": "#min_simplex_reads", @@ -974,7 +986,25 @@ "int" ], "https://www.sevenbridges.com/x": 0, - "https://www.sevenbridges.com/y": 1493.8125 + "https://www.sevenbridges.com/y": 1600.8984375 + }, + { + "id": "#temporary_directory", + "type": [ + "null", + "string" + ], + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 320.203125 + }, + { + "id": "#async_io", + "type": [ + "null", + "string" + ], + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 3308.5 } ], "outputs": [ @@ -987,8 +1017,8 @@ "secondaryFiles": [ "^.bai" ], - "https://www.sevenbridges.com/x": 1039.097900390625, - "https://www.sevenbridges.com/y": 1721.21875 + "https://www.sevenbridges.com/x": 1072.9705810546875, + "https://www.sevenbridges.com/y": 1828.3515625 }, { "id": "#fgbio_postprocessing_simplex_bam", @@ -996,17 +1026,11 @@ "#fgbio_postprocessing_simplex_filter_0_1_8/fgbio_postprocessing_simplex_bam" ], "type": "File", - "https://www.sevenbridges.com/x": 1543.551025390625, - "https://www.sevenbridges.com/y": 1749.21875 - }, - { - "id": "#gatk_collect_alignment_summary_metrics_txt_simplex", - "outputSource": [ - "#gatk_collect_alignment_summary_metrics_4.1.8.0_duplex/gatk_collect_alignment_summary_metrics_txt" + "secondaryFiles": [ + "^.bai" ], - "type": "File", - "https://www.sevenbridges.com/x": 1543.551025390625, - "https://www.sevenbridges.com/y": 1345.015625 + "https://www.sevenbridges.com/x": 1616.9268798828125, + "https://www.sevenbridges.com/y": 1809.984375 }, { "id": "#gatk_collect_alignment_summary_metrics_txt_duplex", @@ -1014,8 +1038,8 @@ "#gatk_collect_alignment_summary_metrics_4.1.8.0_duplex/gatk_collect_alignment_summary_metrics_txt" ], "type": "File", - "https://www.sevenbridges.com/x": 1543.551025390625, - "https://www.sevenbridges.com/y": 1451.75 + "https://www.sevenbridges.com/x": 1616.9268798828125, + "https://www.sevenbridges.com/y": 1498.515625 }, { "id": "#fgbio_filter_consensus_reads_simplex_duplex_bam", @@ -1026,8 +1050,17 @@ "secondaryFiles": [ "^.bai" ], - "https://www.sevenbridges.com/x": 1039.097900390625, - "https://www.sevenbridges.com/y": 1614.484375 + "https://www.sevenbridges.com/x": 1072.9705810546875, + "https://www.sevenbridges.com/y": 1721.6171875 + }, + { + "id": "#gatk_collect_alignment_summary_metrics_txt_simplex", + "outputSource": [ + "#gatk_collect_alignment_summary_metrics_4.1.8.0_simplex/gatk_collect_alignment_summary_metrics_txt" + ], + "type": "File", + "https://www.sevenbridges.com/x": 2134.5888671875, + "https://www.sevenbridges.com/y": 1654.25 } ], "steps": [ @@ -1091,6 +1124,14 @@ { "id": "#fgbio_filter_consensus_reads_1_2_0_duplex/require_single_strand_agreement", "source": "#require_single_strand_agreement_duplex" + }, + { + "id": "#fgbio_filter_consensus_reads_1_2_0_duplex/temporary_directory", + "source": "#temporary_directory" + }, + { + "id": "#fgbio_filter_consensus_reads_1_2_0_duplex/async_io", + "source": "#async_io" } ], "out": [ @@ -1100,8 +1141,8 @@ ], "run": "#fgbio_filter_consensus_reads_1.2.0.cwl", "label": "fgbio_filter_consensus_reads_1.2.0_duplex", - "https://www.sevenbridges.com/x": 454.71875, - "https://www.sevenbridges.com/y": 1493.8125 + "https://www.sevenbridges.com/x": 454.671875, + "https://www.sevenbridges.com/y": 1600.8984375 }, { "id": "#fgbio_filter_consensus_reads_1_2_1_simplex_duplex", @@ -1167,6 +1208,14 @@ { "id": "#fgbio_filter_consensus_reads_1_2_1_simplex_duplex/require_single_strand_agreement", "source": "#require_single_strand_agreement_simplex_duplex" + }, + { + "id": "#fgbio_filter_consensus_reads_1_2_1_simplex_duplex/temporary_directory", + "source": "#temporary_directory" + }, + { + "id": "#fgbio_filter_consensus_reads_1_2_1_simplex_duplex/async_io", + "source": "#async_io" } ], "out": [ @@ -1176,8 +1225,8 @@ ], "run": "#fgbio_filter_consensus_reads_1.2.0.cwl", "label": "fgbio_filter_consensus_reads_1.2.0_simplex_duplex", - "https://www.sevenbridges.com/x": 454.71875, - "https://www.sevenbridges.com/y": 1212.078125 + "https://www.sevenbridges.com/x": 454.671875, + "https://www.sevenbridges.com/y": 1291.1640625 }, { "id": "#fgbio_postprocessing_simplex_filter_0_1_8", @@ -1202,8 +1251,8 @@ ], "run": "#fgbio_postprocessing_simplex_filter_0.1.8.cwl", "label": "fgbio_postprocessing_simplex_filter_0.1.8", - "https://www.sevenbridges.com/x": 1039.097900390625, - "https://www.sevenbridges.com/y": 1493.75 + "https://www.sevenbridges.com/x": 1072.9705810546875, + "https://www.sevenbridges.com/y": 1600.8828125 }, { "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0_duplex", @@ -1216,6 +1265,10 @@ "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0_duplex/output_file_name", "source": "#output_file_name_duplex_aln_metrics" }, + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0_duplex/reference", + "source": "#reference_fasta" + }, { "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0_duplex/validation_stringency", "source": "#validation_stringency" @@ -1235,6 +1288,10 @@ { "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0_duplex/use_jdk_inflater", "source": "#use_jdk_inflater" + }, + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0_duplex/temporary_directory", + "source": "#temporary_directory" } ], "out": [ @@ -1244,8 +1301,8 @@ ], "run": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl", "label": "GATK-CollectAlignmentSummaryMetrics", - "https://www.sevenbridges.com/x": 1039.097900390625, - "https://www.sevenbridges.com/y": 1331.015625 + "https://www.sevenbridges.com/x": 1072.9705810546875, + "https://www.sevenbridges.com/y": 1424.1484375 }, { "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0_simplex", @@ -1258,6 +1315,10 @@ "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0_simplex/output_file_name", "source": "#output_file_name_simplex_aln_metrics" }, + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0_simplex/reference", + "source": "#reference_fasta" + }, { "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0_simplex/validation_stringency", "source": "#validation_stringency" @@ -1286,8 +1347,8 @@ ], "run": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl", "label": "GATK-CollectAlignmentSummaryMetrics", - "https://www.sevenbridges.com/x": 1543.551025390625, - "https://www.sevenbridges.com/y": 1600.484375 + "https://www.sevenbridges.com/x": 1616.9268798828125, + "https://www.sevenbridges.com/y": 1654.25 } ], "requirements": [], @@ -1315,6 +1376,6 @@ ], "cwlVersion": "v1.0", "$schemas": [ - "http://schema.org/version/9.0/schemaorg-current-http.rdf" + "http://schema.org/version/latest/schemaorg-current-http.rdf" ] } \ No newline at end of file diff --git a/gbcms_genotyping/README.md b/gbcms_genotyping/README.md new file mode 100644 index 0000000..ab5ee16 --- /dev/null +++ b/gbcms_genotyping/README.md @@ -0,0 +1,43 @@ +--- +description: Specifications for generating counts from Duplex,Simplex and Standard/Unfiltered (Tumor/Normal) BAM +--- + +## Specifications for generating counts from Duplex,Simplex and Standard/Unfiltered (Tumor/Normal) BAM - gbcms_genotyping.cwl + +### Tools used: + +- [GetBaseCountsMultiSample](https://msk-access.gitbook.io/command-line-tools-cwl/getbasecountsmultisample/1.2.5) + +### Usage + +```bash +usage: gbcms_genotyping.cwl [-h] --duplex_bams DUPLEX_BAMS --normal_bams + NORMAL_BAMS --tumor_bams TUMOR_BAMS --simplex_bams + SIMPLEX_BAMS --maf MAF --ref_fasta REF_FASTA + --simplex_genotyping_bams_ids + SIMPLEX_GENOTYPING_BAMS_IDS [--generic_counting] + --normal_genotyping_bams_ids + NORMAL_GENOTYPING_BAMS_IDS + --tumor_genotyping_bams_ids + TUMOR_GENOTYPING_BAMS_IDS + --duplex_genotyping_bams_ids + DUPLEX_GENOTYPING_BAMS_IDS + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --duplex_bams DUPLEX_BAMS + --normal_bams NORMAL_BAMS + --tumor_bams TUMOR_BAMS + --simplex_bams SIMPLEX_BAMS + --maf MAF + --ref_fasta REF_FASTA + --simplex_genotyping_bams_ids SIMPLEX_GENOTYPING_BAMS_IDS + --generic_counting + --normal_genotyping_bams_ids NORMAL_GENOTYPING_BAMS_IDS + --tumor_genotyping_bams_ids TUMOR_GENOTYPING_BAMS_IDS + --duplex_genotyping_bams_ids DUPLEX_GENOTYPING_BAMS_IDS +``` diff --git a/gbcms_genotyping/example_inputs.json b/gbcms_genotyping/example_inputs.json new file mode 100644 index 0000000..59f9120 --- /dev/null +++ b/gbcms_genotyping/example_inputs.json @@ -0,0 +1,51 @@ +{ + "duplex_bams": [ + { + "class": "File", + "path": "/Users/shahr2/Documents/test_reference/bam/duplex/SeraCare_0-5.bam" + } + ], + "duplex_genotyping_bams_ids": [ + "test1" + ], + "duplex_output": null, + "generic_counting": null, + "maf": { + "class": "File", + "path": "/Users/shahr2/Downloads/SeraCare_0-5.F22.combined-variants.vep_keptrmv_taggedHotspots.maf" + }, + "normal_bams": [ + { + "class": "File", + "path": "/Users/shahr2/Documents/test_reference/bam/SeraCare_0-5.bam" + } + ], + "normal_genotyping_bams_ids": [ + "test1" + ], + "normal_output": null, + "ref_fasta": { + "class": "File", + "path": "/Users/shahr2/Documents/test_reference/reference/versions/hg19/Homo_sapiens_assembly19.fasta" + }, + "simplex_bams": [ + { + "class": "File", + "path": "/Users/shahr2/Documents/test_reference/bam/SeraCare_0-5-SIMPLEX.bam" + } + ], + "simplex_genotyping_bams_ids": [ + "test1" + ], + "simplex_output": null, + "tumor_bams": [ + { + "class": "File", + "path": "/Users/shahr2/Documents/test_reference/bam/SeraCare_0-5.bam" + } + ], + "tumor_genotyping_bams_ids": [ + "test1" + ], + "tumor_output": null +} diff --git a/gbcms_genotyping/gbcms_genotyping.cwl b/gbcms_genotyping/gbcms_genotyping.cwl new file mode 100644 index 0000000..c2ba7c6 --- /dev/null +++ b/gbcms_genotyping/gbcms_genotyping.cwl @@ -0,0 +1,316 @@ +class: Workflow +cwlVersion: v1.0 +id: gbcms_genotyping +label: gbcms_genotyping +$namespaces: + s: 'https://schema.org/' + sbg: 'https://www.sevenbridges.com/' +inputs: + - id: duplex_bams + type: 'File[]' + secondaryFiles: + - ^.bai + 'sbg:x': 0 + 'sbg:y': 1067.0859375 + - id: normal_bams + type: 'File[]' + secondaryFiles: + - ^.bai + 'sbg:x': 0 + 'sbg:y': 640.2421875 + - id: tumor_bams + type: 'File[]' + secondaryFiles: + - ^.bai + 'sbg:x': 0 + 'sbg:y': 106.7109375 + - id: simplex_bams + type: 'File[]' + secondaryFiles: + - ^.bai + 'sbg:x': 0 + 'sbg:y': 320.1328125 + - id: maf + type: File + 'sbg:x': 0 + 'sbg:y': 746.9296875 + - id: ref_fasta + type: File + 'sbg:x': 0 + 'sbg:y': 426.8203125 + - id: simplex_genotyping_bams_ids + type: 'string[]' + 'sbg:x': 0 + 'sbg:y': 213.421875 + - id: generic_counting + type: boolean? + 'sbg:x': 0 + 'sbg:y': 853.640625 + - id: normal_genotyping_bams_ids + type: 'string[]' + 'sbg:x': 0 + 'sbg:y': 533.53125 + - id: tumor_genotyping_bams_ids + type: 'string[]' + 'sbg:x': 0 + 'sbg:y': 0 + - id: duplex_genotyping_bams_ids + type: 'string[]' + 'sbg:x': 0 + 'sbg:y': 960.375 +outputs: + - id: tumor_fillout + outputSource: + - tumor_getbasecountsmultisample_1_2_5/fillout + type: + - File + - type: array + items: File + 'sbg:x': 611.2342529296875 + 'sbg:y': 373.5234375 + - id: simplex_fillout + outputSource: + - simplex_getbasecountsmultisample_1_2_5/fillout + type: + - File + - type: array + items: File + 'sbg:x': 611.2342529296875 + 'sbg:y': 480.2109375 + - id: normal_fillout + outputSource: + - normal_getbasecountsmultisample_1_2_5/fillout + type: + - File + - type: array + items: File + 'sbg:x': 611.2342529296875 + 'sbg:y': 586.8984375 + - id: duplex_fillout + outputSource: + - duplex_getbasecountsmultisample_1_2_5/fillout + type: + - File + - type: array + items: File + 'sbg:x': 611.2342529296875 + 'sbg:y': 693.5859375 +steps: + - id: duplex_getbasecountsmultisample_1_2_5 + in: + - id: genotyping_bams + source: + - duplex_bams + - id: genotyping_bams_ids + source: + - duplex_genotyping_bams_ids + - id: filter_duplicate + default: 0 + - id: fragment_count + default: 1 + - id: maf + source: maf + - id: maq + default: 20 + - id: omaf + default: true + - id: output + source: duplex_genotyping_bams_ids + valueFrom: |- + ${ + if (inputs.duplex_output) { + return inputs.duplex_output + } else { + if (typeof(self) == 'object') { + return self.map(function(b, i) { + return b + "_fillout_DUPLEX.maf" + }) + } else { + return self + "_fillout_DUPLEX.maf" + } + } + } + - id: ref_fasta + source: ref_fasta + - id: generic_counting + source: generic_counting + out: + - id: fillout + run: >- + ../command_line_tools/getbasecountsmultisample/1.2.5/getbasecountsmultisample_1.2.5.cwl + label: duplex_getbasecountsmultisample_1.2.5 + scatter: + - genotyping_bams + - genotyping_bams_ids + - output + scatterMethod: dotproduct + 'sbg:x': 295.84375 + 'sbg:y': 763.6328125 + - id: simplex_getbasecountsmultisample_1_2_5 + in: + - id: genotyping_bams + source: + - simplex_bams + - id: genotyping_bams_ids + source: + - simplex_genotyping_bams_ids + - id: filter_duplicate + default: 0 + - id: fragment_count + default: 1 + - id: maf + source: maf + - id: maq + default: 20 + - id: omaf + default: true + - id: output + source: simplex_genotyping_bams_ids + valueFrom: |- + ${ + if (inputs.simplex_output){ + return inputs.simplex_output + } else { + if (typeof(self) == 'object') { + return self.map(function(b, i) { + return b + "_fillout_SIMPLEX.maf" + }) + } else { + return self + "_fillout_SIMPLEX.maf" + } + } + } + - id: ref_fasta + source: ref_fasta + - id: generic_counting + source: generic_counting + out: + - id: fillout + run: >- + ../command_line_tools/getbasecountsmultisample/1.2.5/getbasecountsmultisample_1.2.5.cwl + label: simplex_getbasecountsmultisample_1.2.5 + scatter: + - genotyping_bams + - genotyping_bams_ids + - output + scatterMethod: dotproduct + 'sbg:x': 295.84375 + 'sbg:y': 410.1640625 + - id: tumor_getbasecountsmultisample_1_2_5 + in: + - id: genotyping_bams + source: + - tumor_bams + - id: genotyping_bams_ids + source: + - tumor_genotyping_bams_ids + - id: filter_duplicate + default: 0 + - id: fragment_count + default: 1 + - id: maf + source: maf + - id: maq + default: 20 + - id: omaf + default: true + - id: output + source: tumor_genotyping_bams_ids + valueFrom: |- + ${ + if (inputs.tumor_output) { + return inputs.tumor_output + } else { + if (typeof(self) == 'object') { + return self.map(function(b, i) { + return b + "_fillout.maf" + }) + } else { + return self + "_fillout.maf" + } + } + } + - id: ref_fasta + source: ref_fasta + - id: generic_counting + source: generic_counting + out: + - id: fillout + run: >- + ../command_line_tools/getbasecountsmultisample/1.2.5/getbasecountsmultisample_1.2.5.cwl + label: tumor_getbasecountsmultisample_1.2.5 + scatter: + - genotyping_bams + - genotyping_bams_ids + - output + scatterMethod: dotproduct + 'sbg:x': 295.84375 + 'sbg:y': 233.4296875 + - id: normal_getbasecountsmultisample_1_2_5 + in: + - id: genotyping_bams + source: + - normal_bams + - id: genotyping_bams_ids + source: + - normal_genotyping_bams_ids + - id: filter_duplicate + default: 0 + - id: fragment_count + default: 1 + - id: maf + source: maf + - id: maq + default: 20 + - id: omaf + default: true + - id: output + source: normal_genotyping_bams_ids + valueFrom: |- + ${ + if (inputs.normal_output){ + return inputs.normal_output + } else { + if (typeof(self) == 'object') { + return self.map(function(b, i) { + return b + "_fillout.maf" + }) + } else { + return self + "_fillout.maf" + } + } + } + - id: ref_fasta + source: ref_fasta + - id: generic_counting + source: generic_counting + out: + - id: fillout + run: >- + ../command_line_tools/getbasecountsmultisample/1.2.5/getbasecountsmultisample_1.2.5.cwl + label: normal_getbasecountsmultisample_1.2.5 + scatter: + - genotyping_bams + - genotyping_bams_ids + - output + scatterMethod: dotproduct + 'sbg:x': 295.84375 + 'sbg:y': 586.8984375 +requirements: + - class: ScatterFeatureRequirement + - class: StepInputExpressionRequirement + - class: InlineJavascriptRequirement +$schemas: + - 'http://schema.org/version/latest/schemaorg-current-http.rdf' +'s:author': + - class: 's:Person' + 's:email': 'mailto:johnsoni@mskcc.org' + 's:name': Ian Johnson +'s:citation': '' +'s:codeRepository': 'https://github.com/msk-access/cwl_subworkflows/gbcms_genotyping' +'s:contributor': + - class: 's:Person' + 's:email': 'mailto:shahr2@mskcc.org' + 's:name': Ronak Shah +'s:dateCreated': '2021-05-28' +'s:license': 'https://spdx.org/licenses/Apache-2.0' diff --git a/gbcms_genotyping/gbcms_genotyping__packed.cwl b/gbcms_genotyping/gbcms_genotyping__packed.cwl new file mode 100644 index 0000000..4b5e894 --- /dev/null +++ b/gbcms_genotyping/gbcms_genotyping__packed.cwl @@ -0,0 +1,714 @@ +{ + "$graph": [ + { + "class": "CommandLineTool", + "id": "#getbasecountsmultisample_1.2.5.cwl", + "baseCommand": [ + "GetBaseCountsMultiSample" + ], + "inputs": [ + { + "id": "#getbasecountsmultisample_1.2.5.cwl/memory_per_job", + "type": [ + "null", + "int" + ], + "doc": "Memory per job in megabytes" + }, + { + "id": "#getbasecountsmultisample_1.2.5.cwl/memory_overhead", + "type": [ + "null", + "int" + ], + "doc": "Memory overhead per job in megabytes" + }, + { + "id": "#getbasecountsmultisample_1.2.5.cwl/number_of_threads", + "type": [ + "null", + "int" + ] + }, + { + "id": "#getbasecountsmultisample_1.2.5.cwl/genotyping_bams", + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "doc": "Input bam file" + }, + { + "id": "#getbasecountsmultisample_1.2.5.cwl/genotyping_bams_ids", + "type": [ + "string", + { + "type": "array", + "items": "string" + } + ], + "doc": "Input bam, sample identifier to be used for \"Tumor Sample Barcode\" for maf or Sample name in the header for vcf" + }, + { + "id": "#getbasecountsmultisample_1.2.5.cwl/filter_duplicate", + "type": "int", + "inputBinding": { + "position": 0, + "prefix": "--filter_duplicate" + }, + "doc": "Whether to filter reads that are marked as duplicate. 0=off, 1=on. Default 1" + }, + { + "id": "#getbasecountsmultisample_1.2.5.cwl/fragment_count", + "type": "int", + "inputBinding": { + "position": 0, + "prefix": "--fragment_count" + }, + "doc": "Whether to output fragment read counts DPF/RDF/ADF. 0=off, 1=on. Default 0" + }, + { + "id": "#getbasecountsmultisample_1.2.5.cwl/maf", + "type": "File", + "inputBinding": { + "position": 0, + "prefix": "--maf" + }, + "doc": "Input variant file in TCGA maf format. --maf or --vcf need to be specified at least once. But --maf and --vcf are mutually exclusive" + }, + { + "id": "#getbasecountsmultisample_1.2.5.cwl/maq", + "type": [ + "null", + "int" + ], + "inputBinding": { + "position": 0, + "prefix": "--maq" + }, + "doc": "Mapping quality threshold. Default 20" + }, + { + "id": "#getbasecountsmultisample_1.2.5.cwl/omaf", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--omaf" + }, + "doc": "Output the result in maf format" + }, + { + "id": "#getbasecountsmultisample_1.2.5.cwl/output", + "type": [ + "null", + "string" + ], + "inputBinding": { + "position": 0, + "prefix": "--output", + "valueFrom": "${\n if (inputs.output) {\n return inputs.output\n } else if (inputs.genotyping_bams.length) {\n return inputs.maf.basename.replace('.maf', '_fillout.maf')\n } else {\n return inputs.genotyping_bams.basename.replace('.bam', '_fillout.maf')\n }\n}" + }, + "doc": "Filename for output of raw fillout data in MAF/VCF format" + }, + { + "id": "#getbasecountsmultisample_1.2.5.cwl/ref_fasta", + "type": "File", + "inputBinding": { + "position": 0, + "prefix": "--fasta" + }, + "doc": "Input reference sequence file" + }, + { + "id": "#getbasecountsmultisample_1.2.5.cwl/vcf", + "type": [ + "null", + "File" + ], + "inputBinding": { + "position": 0, + "prefix": "--vcf" + }, + "doc": "Input variant file in vcf-like format(the first 5 columns are used). --maf or --vcf need to be specified at least once. But --maf and --vcf are mutually exclusive" + }, + { + "id": "#getbasecountsmultisample_1.2.5.cwl/generic_counting", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--generic_counting" + }, + "doc": "Use the newly implemented generic counting algorithm. Works better for complex variants. You may get different allele count result from the default counting algorithm" + } + ], + "outputs": [ + { + "id": "#getbasecountsmultisample_1.2.5.cwl/fillout", + "type": "File", + "outputBinding": { + "glob": "${\n if (inputs.output) {\n return inputs.output\n } else if (inputs.genotyping_bams.length) {\n return inputs.maf.basename.replace('.maf', '_fillout.maf')\n } else {\n return inputs.genotyping_bams.basename.replace('.bam', '_fillout.maf')\n }\n}" + } + } + ], + "label": "getbasecountsmultisample_1.2.5", + "arguments": [ + { + "position": 0, + "prefix": "", + "shellQuote": false, + "valueFrom": "$('--bam_fof bam_fof.tsv')\n" + }, + { + "position": 0, + "prefix": "--thread", + "valueFrom": "$(runtime.cores)" + } + ], + "requirements": [ + { + "class": "ShellCommandRequirement" + }, + { + "class": "ResourceRequirement", + "ramMin": 16000, + "coresMin": 2 + }, + { + "class": "DockerRequirement", + "dockerPull": "ghcr.io/msk-access/gbcms:1.2.5" + }, + { + "class": "InitialWorkDirRequirement", + "listing": [ + { + "entryname": "bam_fof.tsv", + "entry": "${\n if (typeof(inputs.genotyping_bams_ids) == 'object') {\n return inputs.genotyping_bams_ids.map(function(sid, i) {\n return sid + \"\\t\" +\n inputs.genotyping_bams[i].path\n }).join(\"\\n\")\n } else {\n return inputs.genotyping_bams_ids + \"\\t\" + inputs.genotyping_bams.path + \"\\n\"\n }\n}", + "writable": false + } + ] + }, + { + "class": "InlineJavascriptRequirement" + }, + { + "class": "StepInputExpressionRequirement" + } + ], + "http://purl.org/dc/terms/contributor": [ + { + "class": "http://xmlns.com/foaf/0.1/Organization", + "http://xmlns.com/foaf/0.1/member": [ + { + "class": "http://xmlns.com/foaf/0.1/Person", + "http://xmlns.com/foaf/0.1/mbox": "mailto:shahr2@mskcc.org", + "http://xmlns.com/foaf/0.1/name": "Ronak Shah" + } + ], + "http://xmlns.com/foaf/0.1/name": "Memorial Sloan Kettering Cancer Center" + } + ], + "http://purl.org/dc/terms/creator": [ + { + "class": "http://xmlns.com/foaf/0.1/Organization", + "http://xmlns.com/foaf/0.1/member": [ + { + "class": "http://xmlns.com/foaf/0.1/Person", + "http://xmlns.com/foaf/0.1/mbox": "mailto:johnsoni@mskcc.org", + "http://xmlns.com/foaf/0.1/name": "Ian Johnson" + } + ], + "http://xmlns.com/foaf/0.1/name": "Memorial Sloan Kettering Cancer Center" + } + ], + "http://usefulinc.com/ns/doap#release": [ + { + "class": "http://usefulinc.com/ns/doap#Version", + "http://usefulinc.com/ns/doap#name": "GetBaseCountsMultiSample", + "http://usefulinc.com/ns/doap#revision": "1.2.5" + } + ], + "$namespaces": { + "s": "https://schema.org/", + "sbg": "https://www.sevenbridges.com/" + } + }, + { + "class": "Workflow", + "id": "#main", + "label": "gbcms_genotyping", + "inputs": [ + { + "id": "#duplex_bams", + "type": { + "type": "array", + "items": "File" + }, + "secondaryFiles": [ + "^.bai" + ], + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 1067.0859375 + }, + { + "id": "#normal_bams", + "type": { + "type": "array", + "items": "File" + }, + "secondaryFiles": [ + "^.bai" + ], + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 640.2421875 + }, + { + "id": "#tumor_bams", + "type": { + "type": "array", + "items": "File" + }, + "secondaryFiles": [ + "^.bai" + ], + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 106.7109375 + }, + { + "id": "#simplex_bams", + "type": { + "type": "array", + "items": "File" + }, + "secondaryFiles": [ + "^.bai" + ], + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 320.1328125 + }, + { + "id": "#maf", + "type": "File", + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 746.9296875 + }, + { + "id": "#ref_fasta", + "type": "File", + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 426.8203125 + }, + { + "id": "#simplex_genotyping_bams_ids", + "type": { + "type": "array", + "items": "string" + }, + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 213.421875 + }, + { + "id": "#generic_counting", + "type": [ + "null", + "boolean" + ], + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 853.640625 + }, + { + "id": "#normal_genotyping_bams_ids", + "type": { + "type": "array", + "items": "string" + }, + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 533.53125 + }, + { + "id": "#tumor_genotyping_bams_ids", + "type": { + "type": "array", + "items": "string" + }, + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 0 + }, + { + "id": "#duplex_genotyping_bams_ids", + "type": { + "type": "array", + "items": "string" + }, + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 960.375 + } + ], + "outputs": [ + { + "id": "#tumor_fillout", + "outputSource": [ + "#tumor_getbasecountsmultisample_1_2_5/fillout" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "https://www.sevenbridges.com/x": 611.2342529296875, + "https://www.sevenbridges.com/y": 373.5234375 + }, + { + "id": "#simplex_fillout", + "outputSource": [ + "#simplex_getbasecountsmultisample_1_2_5/fillout" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "https://www.sevenbridges.com/x": 611.2342529296875, + "https://www.sevenbridges.com/y": 480.2109375 + }, + { + "id": "#normal_fillout", + "outputSource": [ + "#normal_getbasecountsmultisample_1_2_5/fillout" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "https://www.sevenbridges.com/x": 611.2342529296875, + "https://www.sevenbridges.com/y": 586.8984375 + }, + { + "id": "#duplex_fillout", + "outputSource": [ + "#duplex_getbasecountsmultisample_1_2_5/fillout" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "https://www.sevenbridges.com/x": 611.2342529296875, + "https://www.sevenbridges.com/y": 693.5859375 + } + ], + "steps": [ + { + "id": "#duplex_getbasecountsmultisample_1_2_5", + "in": [ + { + "id": "#duplex_getbasecountsmultisample_1_2_5/genotyping_bams", + "source": [ + "#duplex_bams" + ] + }, + { + "id": "#duplex_getbasecountsmultisample_1_2_5/genotyping_bams_ids", + "source": [ + "#duplex_genotyping_bams_ids" + ] + }, + { + "id": "#duplex_getbasecountsmultisample_1_2_5/filter_duplicate", + "default": 0 + }, + { + "id": "#duplex_getbasecountsmultisample_1_2_5/fragment_count", + "default": 1 + }, + { + "id": "#duplex_getbasecountsmultisample_1_2_5/maf", + "source": "#maf" + }, + { + "id": "#duplex_getbasecountsmultisample_1_2_5/maq", + "default": 20 + }, + { + "id": "#duplex_getbasecountsmultisample_1_2_5/omaf", + "default": true + }, + { + "id": "#duplex_getbasecountsmultisample_1_2_5/output", + "source": "#duplex_genotyping_bams_ids", + "valueFrom": "${\n if (inputs.duplex_output) {\n return inputs.duplex_output\n } else {\n if (typeof(self) == 'object') {\n return self.map(function(b, i) {\n return b + \"_fillout_DUPLEX.maf\"\n })\n } else {\n return self + \"_fillout_DUPLEX.maf\"\n }\n }\n}" + }, + { + "id": "#duplex_getbasecountsmultisample_1_2_5/ref_fasta", + "source": "#ref_fasta" + }, + { + "id": "#duplex_getbasecountsmultisample_1_2_5/generic_counting", + "source": "#generic_counting" + } + ], + "out": [ + { + "id": "#duplex_getbasecountsmultisample_1_2_5/fillout" + } + ], + "run": "#getbasecountsmultisample_1.2.5.cwl", + "label": "duplex_getbasecountsmultisample_1.2.5", + "scatter": [ + "#duplex_getbasecountsmultisample_1_2_5/genotyping_bams", + "#duplex_getbasecountsmultisample_1_2_5/genotyping_bams_ids", + "#duplex_getbasecountsmultisample_1_2_5/output" + ], + "scatterMethod": "dotproduct", + "https://www.sevenbridges.com/x": 295.84375, + "https://www.sevenbridges.com/y": 763.6328125 + }, + { + "id": "#simplex_getbasecountsmultisample_1_2_5", + "in": [ + { + "id": "#simplex_getbasecountsmultisample_1_2_5/genotyping_bams", + "source": [ + "#simplex_bams" + ] + }, + { + "id": "#simplex_getbasecountsmultisample_1_2_5/genotyping_bams_ids", + "source": [ + "#simplex_genotyping_bams_ids" + ] + }, + { + "id": "#simplex_getbasecountsmultisample_1_2_5/filter_duplicate", + "default": 0 + }, + { + "id": "#simplex_getbasecountsmultisample_1_2_5/fragment_count", + "default": 1 + }, + { + "id": "#simplex_getbasecountsmultisample_1_2_5/maf", + "source": "#maf" + }, + { + "id": "#simplex_getbasecountsmultisample_1_2_5/maq", + "default": 20 + }, + { + "id": "#simplex_getbasecountsmultisample_1_2_5/omaf", + "default": true + }, + { + "id": "#simplex_getbasecountsmultisample_1_2_5/output", + "source": "#simplex_genotyping_bams_ids", + "valueFrom": "${\n if (inputs.simplex_output){\n return inputs.simplex_output\n } else {\n if (typeof(self) == 'object') {\n return self.map(function(b, i) {\n return b + \"_fillout_SIMPLEX.maf\"\n })\n } else {\n return self + \"_fillout_SIMPLEX.maf\"\n }\n }\n}" + }, + { + "id": "#simplex_getbasecountsmultisample_1_2_5/ref_fasta", + "source": "#ref_fasta" + }, + { + "id": "#simplex_getbasecountsmultisample_1_2_5/generic_counting", + "source": "#generic_counting" + } + ], + "out": [ + { + "id": "#simplex_getbasecountsmultisample_1_2_5/fillout" + } + ], + "run": "#getbasecountsmultisample_1.2.5.cwl", + "label": "simplex_getbasecountsmultisample_1.2.5", + "scatter": [ + "#simplex_getbasecountsmultisample_1_2_5/genotyping_bams", + "#simplex_getbasecountsmultisample_1_2_5/genotyping_bams_ids", + "#simplex_getbasecountsmultisample_1_2_5/output" + ], + "scatterMethod": "dotproduct", + "https://www.sevenbridges.com/x": 295.84375, + "https://www.sevenbridges.com/y": 410.1640625 + }, + { + "id": "#tumor_getbasecountsmultisample_1_2_5", + "in": [ + { + "id": "#tumor_getbasecountsmultisample_1_2_5/genotyping_bams", + "source": [ + "#tumor_bams" + ] + }, + { + "id": "#tumor_getbasecountsmultisample_1_2_5/genotyping_bams_ids", + "source": [ + "#tumor_genotyping_bams_ids" + ] + }, + { + "id": "#tumor_getbasecountsmultisample_1_2_5/filter_duplicate", + "default": 0 + }, + { + "id": "#tumor_getbasecountsmultisample_1_2_5/fragment_count", + "default": 1 + }, + { + "id": "#tumor_getbasecountsmultisample_1_2_5/maf", + "source": "#maf" + }, + { + "id": "#tumor_getbasecountsmultisample_1_2_5/maq", + "default": 20 + }, + { + "id": "#tumor_getbasecountsmultisample_1_2_5/omaf", + "default": true + }, + { + "id": "#tumor_getbasecountsmultisample_1_2_5/output", + "source": "#tumor_genotyping_bams_ids", + "valueFrom": "${\n if (inputs.tumor_output) {\n return inputs.tumor_output\n } else {\n if (typeof(self) == 'object') {\n return self.map(function(b, i) {\n return b + \"_fillout.maf\"\n })\n } else {\n return self + \"_fillout.maf\"\n }\n }\n} " + }, + { + "id": "#tumor_getbasecountsmultisample_1_2_5/ref_fasta", + "source": "#ref_fasta" + }, + { + "id": "#tumor_getbasecountsmultisample_1_2_5/generic_counting", + "source": "#generic_counting" + } + ], + "out": [ + { + "id": "#tumor_getbasecountsmultisample_1_2_5/fillout" + } + ], + "run": "#getbasecountsmultisample_1.2.5.cwl", + "label": "tumor_getbasecountsmultisample_1.2.5", + "scatter": [ + "#tumor_getbasecountsmultisample_1_2_5/genotyping_bams", + "#tumor_getbasecountsmultisample_1_2_5/genotyping_bams_ids", + "#tumor_getbasecountsmultisample_1_2_5/output" + ], + "scatterMethod": "dotproduct", + "https://www.sevenbridges.com/x": 295.84375, + "https://www.sevenbridges.com/y": 233.4296875 + }, + { + "id": "#normal_getbasecountsmultisample_1_2_5", + "in": [ + { + "id": "#normal_getbasecountsmultisample_1_2_5/genotyping_bams", + "source": [ + "#normal_bams" + ] + }, + { + "id": "#normal_getbasecountsmultisample_1_2_5/genotyping_bams_ids", + "source": [ + "#normal_genotyping_bams_ids" + ] + }, + { + "id": "#normal_getbasecountsmultisample_1_2_5/filter_duplicate", + "default": 0 + }, + { + "id": "#normal_getbasecountsmultisample_1_2_5/fragment_count", + "default": 1 + }, + { + "id": "#normal_getbasecountsmultisample_1_2_5/maf", + "source": "#maf" + }, + { + "id": "#normal_getbasecountsmultisample_1_2_5/maq", + "default": 20 + }, + { + "id": "#normal_getbasecountsmultisample_1_2_5/omaf", + "default": true + }, + { + "id": "#normal_getbasecountsmultisample_1_2_5/output", + "source": "#normal_genotyping_bams_ids", + "valueFrom": "${\n if (inputs.normal_output){\n return inputs.normal_output\n } else {\n if (typeof(self) == 'object') {\n return self.map(function(b, i) {\n return b + \"_fillout.maf\"\n })\n } else {\n return self + \"_fillout.maf\"\n }\n }\n}" + }, + { + "id": "#normal_getbasecountsmultisample_1_2_5/ref_fasta", + "source": "#ref_fasta" + }, + { + "id": "#normal_getbasecountsmultisample_1_2_5/generic_counting", + "source": "#generic_counting" + } + ], + "out": [ + { + "id": "#normal_getbasecountsmultisample_1_2_5/fillout" + } + ], + "run": "#getbasecountsmultisample_1.2.5.cwl", + "label": "normal_getbasecountsmultisample_1.2.5", + "scatter": [ + "#normal_getbasecountsmultisample_1_2_5/genotyping_bams", + "#normal_getbasecountsmultisample_1_2_5/genotyping_bams_ids", + "#normal_getbasecountsmultisample_1_2_5/output" + ], + "scatterMethod": "dotproduct", + "https://www.sevenbridges.com/x": 295.84375, + "https://www.sevenbridges.com/y": 586.8984375 + } + ], + "requirements": [ + { + "class": "ScatterFeatureRequirement" + }, + { + "class": "StepInputExpressionRequirement" + }, + { + "class": "InlineJavascriptRequirement" + } + ], + "https://schema.org/author": [ + { + "class": "https://schema.org/Person", + "https://schema.org/email": "mailto:johnsoni@mskcc.org", + "https://schema.org/name": "Ian Johnson" + } + ], + "https://schema.org/citation": "", + "https://schema.org/codeRepository": "https://github.com/msk-access/cwl_subworkflows/gbcms_genotyping", + "https://schema.org/contributor": [ + { + "class": "https://schema.org/Person", + "https://schema.org/email": "mailto:shahr2@mskcc.org", + "https://schema.org/name": "Ronak Shah" + } + ], + "https://schema.org/dateCreated": "2021-05-28", + "https://schema.org/license": "https://spdx.org/licenses/Apache-2.0" + } + ], + "cwlVersion": "v1.0", + "$schemas": [ + "http://schema.org/version/latest/schemaorg-current-http.rdf" + ] +} \ No newline at end of file diff --git a/indel_realignment/indel_realignment__packed.cwl b/indel_realignment/indel_realignment__packed.cwl index 9a9e687..626196c 100644 --- a/indel_realignment/indel_realignment__packed.cwl +++ b/indel_realignment/indel_realignment__packed.cwl @@ -28,11 +28,7 @@ "type": [ "null", "int" - ], - "inputBinding": { - "position": 0, - "prefix": "--threads" - } + ] }, { "id": "#abra2_2.22.cwl/input_bam", @@ -56,12 +52,8 @@ "id": "#abra2_2.22.cwl/working_directory", "type": [ "null", - "Directory" + "string" ], - "inputBinding": { - "position": 0, - "prefix": "--tmpdir" - }, "doc": "Set the temp directory (overrides java.io.tmpdir)" }, { @@ -268,7 +260,7 @@ } ], "outputBinding": { - "glob": "*abra.bam" + "glob": "${\n return inputs.output_bams\n}" }, "secondaryFiles": [ "^.bai" @@ -279,23 +271,33 @@ "arguments": [ { "position": 0, - "valueFrom": "${ if(inputs.memory_per_job && inputs.memory_overhead) { if(inputs.memory_per_job % 1000 == 0) { return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\" } else { return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\" } } else if (inputs.memory_per_job && !inputs.memory_overhead){ if(inputs.memory_per_job % 1000 == 0) { return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\" } else { return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\" } } else if(!inputs.memory_per_job && inputs.memory_overhead){ return \"-Xmx15G\" } else { return \"-Xmx15G\" } }" + "valueFrom": "${\n if (inputs.memory_per_job && inputs.memory_overhead) {\n\n if (inputs.memory_per_job % 1000 == 0) {\n\n return \"-Xmx\" + (inputs.memory_per_job / 1000).toString() + \"G\"\n }\n else {\n\n return \"-Xmx\" + Math.floor((inputs.memory_per_job / 1000)).toString() + \"G\"\n }\n }\n else if (inputs.memory_per_job && !inputs.memory_overhead) {\n\n if (inputs.memory_per_job % 1000 == 0) {\n\n return \"-Xmx\" + (inputs.memory_per_job / 1000).toString() + \"G\"\n }\n else {\n\n return \"-Xmx\" + Math.floor((inputs.memory_per_job / 1000)).toString() + \"G\"\n }\n }\n else if (!inputs.memory_per_job && inputs.memory_overhead) {\n\n return \"-Xmx20G\"\n }\n else {\n\n return \"-Xmx20G\"\n }\n}" }, { "position": 0, "prefix": "-jar", "valueFrom": "/usr/local/bin/abra2.jar" + }, + { + "position": 0, + "prefix": "--threads", + "valueFrom": "${\n if(inputs.number_of_threads)\n return inputs.number_of_threads\n return runtime.cores\n}" + }, + { + "position": 0, + "prefix": "--tmpdir", + "valueFrom": "${\n if(inputs.working_directory)\n return inputs.working_directory;\n return runtime.tmpdir\n}" } ], "requirements": [ { "class": "ResourceRequirement", - "ramMin": "${ if(inputs.memory_per_job && inputs.memory_overhead) { return inputs.memory_per_job + inputs.memory_overhead } else if (inputs.memory_per_job && !inputs.memory_overhead){ return inputs.memory_per_job + 2000 } else if(!inputs.memory_per_job && inputs.memory_overhead){ return 15000 + inputs.memory_overhead } else { return 17000 } }", - "coresMin": "${ if (inputs.number_of_threads) { return inputs.number_of_threads } else { return 4 } }" + "ramMin": 60000, + "coresMin": 16 }, { "class": "DockerRequirement", - "dockerPull": "mskaccess/abra2:2.22" + "dockerPull": "ghcr.io/msk-access/abra2:2.22" }, { "class": "InlineJavascriptRequirement" @@ -423,7 +425,7 @@ }, { "class": "DockerRequirement", - "dockerPull": "biocontainers/bedtools:v2.28.0_cv2" + "dockerPull": "ghcr.io/msk-access/bedtools:v2.28.0_cv2" }, { "class": "InlineJavascriptRequirement" @@ -528,10 +530,7 @@ "outputs": [ { "id": "#bedtools_merge_v2.28.0_cv2.cwl/bedtools_merge_bed", - "type": [ - "null", - "File" - ], + "type": "File", "outputBinding": { "glob": "${\n if (inputs.output_file_name)\n return inputs.output_file_name;\n return inputs.input.basename.replace('.bedgraph', '.bed');\n }" } @@ -549,7 +548,7 @@ }, { "class": "DockerRequirement", - "dockerPull": "biocontainers/bedtools:v2.28.0_cv2" + "dockerPull": "ghcr.io/msk-access/bedtools:v2.28.0_cv2" }, { "class": "InlineJavascriptRequirement" @@ -627,10 +626,7 @@ "position": 0, "prefix": "-I" }, - "doc": "The input file to fix. This option may be specified 0 or more times", - "secondaryFiles": [ - "^.bai" - ] + "doc": "The input file to fix. This option may be specified 0 or more times" }, { "id": "#picard_fix_mate_information_4.1.8.1.cwl/output_file_name", @@ -712,6 +708,14 @@ "prefix": "--CREATE_INDEX" }, "doc": "Whether to create a BAM index when writing a coordinate-sorted BAM file. Default value:false. This option can be set to 'null' to clear the default value. Possible values:{true, false}" + }, + { + "id": "#picard_fix_mate_information_4.1.8.1.cwl/temporary_directory", + "type": [ + "null", + "string" + ], + "doc": "Default value: null. This option may be specified 0 or more times." } ], "outputs": [ @@ -730,17 +734,12 @@ "arguments": [ { "position": 0, - "valueFrom": "${\n if(inputs.memory_per_job && inputs.memory_overhead) {\n if(inputs.memory_per_job % 1000 == 0) {\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\"\n }\n }\n else if (inputs.memory_per_job && !inputs.memory_overhead){\n if(inputs.memory_per_job % 1000 == 0) {\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\"\n }\n }\n else if(!inputs.memory_per_job && inputs.memory_overhead){\n return \"-Xmx15G\"\n }\n else {\n return \"-Xmx15G\"\n }\n}" + "valueFrom": "${\n if(inputs.memory_per_job && inputs.memory_overhead) {\n if(inputs.memory_per_job % 1000 == 0) {\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\"\n }\n }\n else if (inputs.memory_per_job && !inputs.memory_overhead){\n if(inputs.memory_per_job % 1000 == 0) {\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\"\n }\n }\n else if(!inputs.memory_per_job && inputs.memory_overhead){\n return \"-Xmx20G\"\n }\n else {\n return \"-Xmx20G\"\n }\n}" }, { "position": 0, - "valueFrom": "-XX:-UseGCOverheadLimit", - "shellQuote": false - }, - { - "position": 0, - "valueFrom": "-Djava.io.tmpdir=$(runtime.tmpdir)", - "shellQuote": false + "shellQuote": false, + "valueFrom": "-XX:-UseGCOverheadLimit" }, { "position": 0, @@ -754,7 +753,7 @@ { "position": 0, "prefix": "--TMP_DIR", - "valueFrom": "$(runtime.tmpdir)" + "valueFrom": "${\n if(inputs.temporary_directory)\n return inputs.temporary_directory;\n return runtime.tmpdir\n}" }, { "position": 0, @@ -763,14 +762,17 @@ } ], "requirements": [ + { + "class": "ShellCommandRequirement" + }, { "class": "ResourceRequirement", - "ramMin": 25000, - "coresMin": 2 + "ramMin": 30000, + "coresMin": 12 }, { "class": "DockerRequirement", - "dockerPull": "broadinstitute/gatk:4.1.8.1" + "dockerPull": "ghcr.io/msk-access/gatk:4.1.8.1" }, { "class": "InlineJavascriptRequirement" @@ -831,7 +833,7 @@ "string" ], "https://www.sevenbridges.com/x": 0, - "https://www.sevenbridges.com/y": 319.96875 + "https://www.sevenbridges.com/y": 426.796875 }, { "id": "#scoring_gap_alignments", @@ -840,16 +842,16 @@ "string" ], "https://www.sevenbridges.com/x": 0, - "https://www.sevenbridges.com/y": 426.703125 + "https://www.sevenbridges.com/y": 533.53125 }, { "id": "#reference_fasta", "type": "File", "secondaryFiles": [ - "^.fasta.fai" + ".fai" ], "https://www.sevenbridges.com/x": 0, - "https://www.sevenbridges.com/y": 533.359375 + "https://www.sevenbridges.com/y": 640.21875 }, { "id": "#no_sort", @@ -858,7 +860,7 @@ "boolean" ], "https://www.sevenbridges.com/x": 0, - "https://www.sevenbridges.com/y": 959.828125 + "https://www.sevenbridges.com/y": 1066.875 }, { "id": "#maximum_mixmatch_rate", @@ -867,7 +869,7 @@ "float" ], "https://www.sevenbridges.com/x": 0, - "https://www.sevenbridges.com/y": 1173.140625 + "https://www.sevenbridges.com/y": 1280.25 }, { "id": "#maximum_average_depth", @@ -876,22 +878,16 @@ "int" ], "https://www.sevenbridges.com/x": 0, - "https://www.sevenbridges.com/y": 1279.796875 + "https://www.sevenbridges.com/y": 1386.9375 }, { "id": "#input_bam", - "type": [ - "File", - { - "type": "array", - "items": "File" - } - ], + "type": "File", "secondaryFiles": [ "^.bai" ], "https://www.sevenbridges.com/x": 0, - "https://www.sevenbridges.com/y": 1386.453125 + "https://www.sevenbridges.com/y": 1493.625 }, { "id": "#ignore_bad_assembly", @@ -900,7 +896,7 @@ "boolean" ], "https://www.sevenbridges.com/x": 0, - "https://www.sevenbridges.com/y": 1493.109375 + "https://www.sevenbridges.com/y": 1600.3125 }, { "id": "#contig_anchor", @@ -909,7 +905,7 @@ "string" ], "https://www.sevenbridges.com/x": 0, - "https://www.sevenbridges.com/y": 1706.421875 + "https://www.sevenbridges.com/y": 1813.6875 }, { "id": "#consensus_sequence", @@ -918,7 +914,7 @@ "boolean" ], "https://www.sevenbridges.com/x": 0, - "https://www.sevenbridges.com/y": 1813.078125 + "https://www.sevenbridges.com/y": 1920.375 }, { "id": "#bam_index", @@ -927,7 +923,7 @@ "boolean" ], "https://www.sevenbridges.com/x": 0, - "https://www.sevenbridges.com/y": 1919.65625 + "https://www.sevenbridges.com/y": 2027.015625 }, { "id": "#number_of_threads", @@ -936,7 +932,7 @@ "int" ], "https://www.sevenbridges.com/x": 0, - "https://www.sevenbridges.com/y": 853.25 + "https://www.sevenbridges.com/y": 960.234375 }, { "id": "#option_bedgraph", @@ -945,7 +941,7 @@ "boolean" ], "https://www.sevenbridges.com/x": 0, - "https://www.sevenbridges.com/y": 746.59375 + "https://www.sevenbridges.com/y": 853.546875 }, { "id": "#no_edge_complex_indel", @@ -954,7 +950,7 @@ "boolean" ], "https://www.sevenbridges.com/x": 0, - "https://www.sevenbridges.com/y": 1066.484375 + "https://www.sevenbridges.com/y": 1173.5625 }, { "id": "#distance_between_features", @@ -963,7 +959,7 @@ "int" ], "https://www.sevenbridges.com/x": 0, - "https://www.sevenbridges.com/y": 1599.765625 + "https://www.sevenbridges.com/y": 1707 }, { "id": "#output_bams", @@ -975,7 +971,7 @@ } ], "https://www.sevenbridges.com/x": 0, - "https://www.sevenbridges.com/y": 639.9375 + "https://www.sevenbridges.com/y": 746.859375 }, { "id": "#validation_stringency", @@ -984,7 +980,7 @@ "string" ], "https://www.sevenbridges.com/x": 0, - "https://www.sevenbridges.com/y": 106.65625 + "https://www.sevenbridges.com/y": 106.6875 }, { "id": "#sort_order", @@ -993,7 +989,7 @@ "string" ], "https://www.sevenbridges.com/x": 0, - "https://www.sevenbridges.com/y": 213.3125 + "https://www.sevenbridges.com/y": 320.109375 }, { "id": "#output_file_name", @@ -1001,8 +997,8 @@ "null", "string" ], - "https://www.sevenbridges.com/x": 992.881103515625, - "https://www.sevenbridges.com/y": 748.25 + "https://www.sevenbridges.com/x": 992.927978515625, + "https://www.sevenbridges.com/y": 794.8671875 }, { "id": "#create_bam_index", @@ -1010,8 +1006,17 @@ "null", "boolean" ], - "https://www.sevenbridges.com/x": 992.881103515625, - "https://www.sevenbridges.com/y": 854.828125 + "https://www.sevenbridges.com/x": 992.927978515625, + "https://www.sevenbridges.com/y": 901.5078125 + }, + { + "id": "#temporary_directory", + "type": [ + "null", + "string" + ], + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 213.421875 } ], "outputs": [ @@ -1021,8 +1026,11 @@ "#picard_fix_mate_information_4_1_8_1/picard_fix_mate_information_bam" ], "type": "File", - "https://www.sevenbridges.com/x": 1950.827880859375, - "https://www.sevenbridges.com/y": 959.75 + "secondaryFiles": [ + "^.bai" + ], + "https://www.sevenbridges.com/x": 1981.323974609375, + "https://www.sevenbridges.com/y": 1013.4609375 } ], "steps": [ @@ -1039,6 +1047,10 @@ "#input_bam" ] }, + { + "id": "#abra2_2_22/working_directory", + "source": "#temporary_directory" + }, { "id": "#abra2_2_22/reference_fasta", "source": "#reference_fasta" @@ -1105,8 +1117,8 @@ ], "run": "#abra2_2.22.cwl", "label": "abra2_2.22", - "https://www.sevenbridges.com/x": 992.881103515625, - "https://www.sevenbridges.com/y": 1066.40625 + "https://www.sevenbridges.com/x": 992.927978515625, + "https://www.sevenbridges.com/y": 1120.1484375 }, { "id": "#bedtools_genomecov", @@ -1127,8 +1139,8 @@ ], "run": "#bedtools_genomecov_v2.28.0_cv2.cwl", "label": "bedtools_genomecov", - "https://www.sevenbridges.com/x": 269.546875, - "https://www.sevenbridges.com/y": 952.75 + "https://www.sevenbridges.com/x": 269.59375, + "https://www.sevenbridges.com/y": 1006.4609375 }, { "id": "#bedtools_merge", @@ -1149,8 +1161,8 @@ ], "run": "#bedtools_merge_v2.28.0_cv2.cwl", "label": "bedtools_merge", - "https://www.sevenbridges.com/x": 635.4639892578125, - "https://www.sevenbridges.com/y": 952.75 + "https://www.sevenbridges.com/x": 635.5108642578125, + "https://www.sevenbridges.com/y": 1006.4609375 }, { "id": "#picard_fix_mate_information_4_1_8_1", @@ -1174,6 +1186,10 @@ { "id": "#picard_fix_mate_information_4_1_8_1/create_bam_index", "source": "#create_bam_index" + }, + { + "id": "#picard_fix_mate_information_4_1_8_1/temporary_directory", + "source": "#temporary_directory" } ], "out": [ @@ -1183,8 +1199,8 @@ ], "run": "#picard_fix_mate_information_4.1.8.1.cwl", "label": "picard_fix_mate_information_4.1.8.1", - "https://www.sevenbridges.com/x": 1534.827880859375, - "https://www.sevenbridges.com/y": 931.6171875 + "https://www.sevenbridges.com/x": 1546.70458984375, + "https://www.sevenbridges.com/y": 978.328125 } ], "requirements": [], @@ -1210,6 +1226,6 @@ ], "cwlVersion": "v1.0", "$schemas": [ - "http://schema.org/version/9.0/schemaorg-current-http.rdf" + "http://schema.org/version/latest/schemaorg-current-http.rdf" ] } \ No newline at end of file diff --git a/qc_collapsed_bam/README.md b/qc_collapsed_bam/README.md new file mode 100644 index 0000000..0011e45 --- /dev/null +++ b/qc_collapsed_bam/README.md @@ -0,0 +1,73 @@ +### Introduction +The sub-workflow calculates quality control metrics for collapsed BAMs. The main outputs are the following: + +1. Targeted capture metrics. +2. Insert size metrics. +3. Alignment metrics. +4. Duplex sequencing metrics (via Fgbio). +5. Extracted genotype information used for fingerprinting and contamination estimation. +6. Genotype metrics to be used for hotspot mutation metrics. + +**Note:** This sub-workflow was originally designed for MSK-ACCESS data. Hence, in addition to the collapsed BAM, it expects two sets of bait/target regions (referred to as pool A and pool B for MSK-ACCESS). + +### Tools used: + +- [GetBaseCountsMultiSample](../command_line_tools/getbasecountsmultisample/1.2.5) +- [Fgbio-CollectDuplexSeqMetrics](https://msk-access.gitbook.io/command-line-tools-cwl/bedtools/bedtools_merge_v2.28.0_cv2) +- [bam_qc_stats](../bam_qc_stats/README.md) +- [Biometrics](https://msk-access.gitbook.io/biometrics/) + +### Usage + +```bash +usage: qc_collapsed_bam.cwl [-h] --reference REFERENCE + --pool_b_target_intervals POOL_B_TARGET_INTERVALS + --pool_a_target_intervals POOL_A_TARGET_INTERVALS + [--pool_a_bait_intervals POOL_A_BAIT_INTERVALS] + [--pool_b_bait_intervals POOL_B_BAIT_INTERVALS] + [--json] [--plot] + [--minor_threshold MINOR_THRESHOLD] + [--coverage_threshold COVERAGE_THRESHOLD] + [--hsmetrics_minimum_mapping_quality HSMETRICS_MINIMUM_MAPPING_QUALITY] + [--hsmetrics_minimum_base_quality HSMETRICS_MINIMUM_BASE_QUALITY] + [--hsmetrics_coverage_cap HSMETRICS_COVERAGE_CAP] + [--prefix PREFIX] + [--major_threshold MAJOR_THRESHOLD] [--json_1] + --vcf_file VCF_FILE --sample_name SAMPLE_NAME + [--sample_sex SAMPLE_SEX] + [--sample_group SAMPLE_GROUP] --maf MAF + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --reference REFERENCE + --pool_b_target_intervals POOL_B_TARGET_INTERVALS + --pool_a_target_intervals POOL_A_TARGET_INTERVALS + --pool_a_bait_intervals POOL_A_BAIT_INTERVALS + Optional set of intervals over which to restrict + analysis. [Optional]. + --pool_b_bait_intervals POOL_B_BAIT_INTERVALS + Optional set of intervals over which to restrict + analysis. [Optional]. + --json Also output data in JSON format. + --plot Also output plots of the data. + --minor_threshold MINOR_THRESHOLD + Minor contamination threshold for bad sample. + --coverage_threshold COVERAGE_THRESHOLD + Samples with Y chromosome above this value will be + considered male. + --hsmetrics_minimum_mapping_quality HSMETRICS_MINIMUM_MAPPING_QUALITY + --hsmetrics_minimum_base_quality HSMETRICS_MINIMUM_BASE_QUALITY + --hsmetrics_coverage_cap HSMETRICS_COVERAGE_CAP + --prefix PREFIX + --major_threshold MAJOR_THRESHOLD + --json_1 + --vcf_file VCF_FILE + --sample_name SAMPLE_NAME + --sample_sex SAMPLE_SEX + --sample_group SAMPLE_GROUP + --maf MAF +``` diff --git a/qc_collapsed_bam/qc_collapsed_bam.cwl b/qc_collapsed_bam/qc_collapsed_bam.cwl new file mode 100644 index 0000000..b0c1a50 --- /dev/null +++ b/qc_collapsed_bam/qc_collapsed_bam.cwl @@ -0,0 +1,677 @@ +class: Workflow +cwlVersion: v1.0 +id: qc_collapsed_bam +label: qc_collapsed_bam +$namespaces: + sbg: 'https://www.sevenbridges.com/' +inputs: + - id: reference + type: File + secondaryFiles: + - ^.fasta.fai + - ^.dict + 'sbg:x': 0 + 'sbg:y': 824.90625 + - id: pool_b_target_intervals + type: File + label: pool_b_target_intervals + 'sbg:x': 0 + 'sbg:y': 1038.59375 + - id: pool_a_target_intervals + type: File + label: pool_a_target_intervals + 'sbg:x': 0 + 'sbg:y': 1252.28125 + - id: collapsed_bam + type: + - File + - type: array + items: File + label: collapsed_bam + secondaryFiles: + - ^.bai + 'sbg:x': 0 + 'sbg:y': 2748.09375 + - id: group_reads_by_umi_bam + type: + - File + - type: array + items: File + label: group_reads_by_umi_bam + doc: Input BAM file generated by GroupReadByUmi. + 'sbg:x': 0 + 'sbg:y': 2534.40625 + - id: pool_a_bait_intervals + type: File? + label: pool_a_bait_intervals + doc: 'Optional set of intervals over which to restrict analysis. [Optional].' + 'sbg:x': 0 + 'sbg:y': 1359.125 + - id: pool_b_bait_intervals + type: File? + label: pool_b_bait_intervals + doc: 'Optional set of intervals over which to restrict analysis. [Optional].' + 'sbg:x': 0 + 'sbg:y': 1145.4375 + - id: json + type: boolean? + doc: Also output data in JSON format. + 'sbg:x': 0 + 'sbg:y': 2107.03125 + - id: plot + type: boolean? + doc: Also output plots of the data. + 'sbg:x': 0 + 'sbg:y': 1572.8125 + - id: minor_threshold + type: float? + doc: Minor contamination threshold for bad sample. + 'sbg:x': 0 + 'sbg:y': 1679.65625 + - id: coverage_threshold + type: int? + doc: Samples with Y chromosome above this value will be considered male. + 'sbg:x': 0 + 'sbg:y': 2641.25 + - id: hsmetrics_minimum_mapping_quality + type: int? + 'sbg:x': 0 + 'sbg:y': 2213.875 + - id: hsmetrics_minimum_base_quality + type: int? + 'sbg:x': 0 + 'sbg:y': 2320.71875 + - id: hsmetrics_coverage_cap + type: int? + 'sbg:x': 0 + 'sbg:y': 2427.5625 + - id: prefix + type: string? + 'sbg:x': 0 + 'sbg:y': 931.75 + - id: major_threshold + type: float? + 'sbg:x': 0 + 'sbg:y': 1786.5 + - id: json_1 + type: boolean? + 'sbg:x': 0 + 'sbg:y': 2000.1875 + - id: vcf_file + type: File + 'sbg:x': 0 + 'sbg:y': 397.53125 + - id: sample_name + type: string + 'sbg:x': 0 + 'sbg:y': 611.21875 + - id: sample_sex + type: string? + 'sbg:x': 0 + 'sbg:y': 504.375 + - id: sample_group + type: string? + 'sbg:x': 0 + 'sbg:y': 718.0625 + - id: maf + type: File + 'sbg:x': 0 + 'sbg:y': 1893.34375 + - id: bed_file + type: File? + 'sbg:x': -5.7914533615112305 + 'sbg:y': 1468.1177978515625 +outputs: + - id: fgbio_collect_duplex_seq_metrics_duplex_family_size_pool_a + outputSource: + - >- + fgbio_collect_duplex_seq_metrics_1_2_0/fgbio_collect_duplex_seq_metrics_duplex_family_size + type: + - File + - type: array + items: File + label: fgbio_collect_duplex_seq_metrics_duplex_family_size_pool_a + 'sbg:x': 982.1435546875 + 'sbg:y': 2564.25 + - id: fgbio_collect_duplex_seq_metrics_duplex_qc_pool_a + outputSource: + - >- + fgbio_collect_duplex_seq_metrics_1_2_0/fgbio_collect_duplex_seq_metrics_duplex_qc + type: + - 'null' + - File + - type: array + items: File + label: fgbio_collect_duplex_seq_metrics_duplex_qc_pool_a + 'sbg:x': 982.1435546875 + 'sbg:y': 2243.71875 + - id: fgbio_collect_duplex_seq_metrics_duplex_pool_a + outputSource: + - >- + fgbio_collect_duplex_seq_metrics_1_2_0/fgbio_collect_duplex_seq_metrics_duplex_umi_counts + type: + - 'null' + - File + - type: array + items: File + label: fgbio_collect_duplex_seq_metrics_duplex_pool_a + 'sbg:x': 982.1435546875 + 'sbg:y': 2350.5625 + - id: fgbio_collect_duplex_seq_metrics_duplex_yield_metrics_pool_a + outputSource: + - >- + fgbio_collect_duplex_seq_metrics_1_2_0/fgbio_collect_duplex_seq_metrics_duplex_yield_metrics + type: + - File + - type: array + items: File + label: fgbio_collect_duplex_seq_metrics_duplex_yield_metrics_pool_a + 'sbg:x': 982.1435546875 + 'sbg:y': 1923.1875 + - id: fgbio_collect_duplex_seq_metrics_family_size_pool_a + outputSource: + - >- + fgbio_collect_duplex_seq_metrics_1_2_0/fgbio_collect_duplex_seq_metrics_family_size + type: + - File + - type: array + items: File + label: fgbio_collect_duplex_seq_metrics_family_size_pool_a + 'sbg:x': 982.1435546875 + 'sbg:y': 1709.5 + - id: fgbio_collect_duplex_seq_metrics_umi_counts_pool_a + outputSource: + - >- + fgbio_collect_duplex_seq_metrics_1_2_0/fgbio_collect_duplex_seq_metrics_umi_counts + type: + - File + - type: array + items: File + label: fgbio_collect_duplex_seq_metrics_umi_counts_pool_a + 'sbg:x': 982.1435546875 + 'sbg:y': 1495.8125 + - id: fgbio_collect_duplex_seq_metrics_duplex_family_size_pool_b + outputSource: + - >- + fgbio_collect_duplex_seq_metrics_1_2_1/fgbio_collect_duplex_seq_metrics_duplex_family_size + type: + - File + - type: array + items: File + label: fgbio_collect_duplex_seq_metrics_duplex_family_size_pool_b + 'sbg:x': 982.1435546875 + 'sbg:y': 2457.40625 + - id: fgbio_collect_duplex_seq_metrics_duplex_qc_pool_b + outputSource: + - >- + fgbio_collect_duplex_seq_metrics_1_2_1/fgbio_collect_duplex_seq_metrics_duplex_qc + type: + - 'null' + - File + - type: array + items: File + label: fgbio_collect_duplex_seq_metrics_duplex_qc_pool_b + 'sbg:x': 982.1435546875 + 'sbg:y': 2136.875 + - id: fgbio_collect_duplex_seq_metrics_duplex_umi_counts_pool_b + outputSource: + - >- + fgbio_collect_duplex_seq_metrics_1_2_1/fgbio_collect_duplex_seq_metrics_duplex_umi_counts + type: + - 'null' + - File + - type: array + items: File + label: fgbio_collect_duplex_seq_metrics_duplex_umi_counts_pool_b + 'sbg:x': 982.1435546875 + 'sbg:y': 2030.03125 + - id: fgbio_collect_duplex_seq_metrics_duplex_yield_metrics_pool_b + outputSource: + - >- + fgbio_collect_duplex_seq_metrics_1_2_1/fgbio_collect_duplex_seq_metrics_duplex_yield_metrics + type: + - File + - type: array + items: File + label: fgbio_collect_duplex_seq_metrics_duplex_yield_metrics_pool_b + 'sbg:x': 982.1435546875 + 'sbg:y': 1816.34375 + - id: fgbio_collect_duplex_seq_metrics_family_size_pool_b + outputSource: + - >- + fgbio_collect_duplex_seq_metrics_1_2_1/fgbio_collect_duplex_seq_metrics_family_size + type: + - File + - type: array + items: File + label: fgbio_collect_duplex_seq_metrics_family_size_pool_b + 'sbg:x': 982.1435546875 + 'sbg:y': 1602.65625 + - id: fgbio_collect_duplex_seq_metrics_umi_counts_pool_b + outputSource: + - >- + fgbio_collect_duplex_seq_metrics_1_2_1/fgbio_collect_duplex_seq_metrics_umi_counts + type: + - File + - type: array + items: File + label: fgbio_collect_duplex_seq_metrics_umi_counts_pool_b + 'sbg:x': 982.1435546875 + 'sbg:y': 1388.96875 + - id: biometrics_minor_csv + outputSource: + - biometrics_minor/biometrics_minor_csv + type: + - File + - type: array + items: File + 'sbg:x': 1547.1123046875 + 'sbg:y': 1679.65625 + - id: biometrics_minor_json + outputSource: + - biometrics_minor/biometrics_minor_json + type: + - 'null' + - File + - type: array + items: File + 'sbg:x': 1547.1123046875 + 'sbg:y': 1572.8125 + - id: biometrics_minor_plot + outputSource: + - biometrics_minor/biometrics_minor_plot + type: + - 'null' + - File + - type: array + items: File + 'sbg:x': 1547.1123046875 + 'sbg:y': 1465.96875 + - id: biometrics_minor_sites_plot + outputSource: + - biometrics_minor/biometrics_minor_sites_plot + type: + - 'null' + - File + - type: array + items: File + 'sbg:x': 1547.1123046875 + 'sbg:y': 1359.125 + - id: biometrics_sexmismatch_json + outputSource: + - biometrics_sexmismatch/biometrics_sexmismatch_json + type: + - 'null' + - File + - type: array + items: File + 'sbg:x': 1547.1123046875 + 'sbg:y': 1145.4375 + - id: biometrics_sexmismatch_csv + outputSource: + - biometrics_sexmismatch/biometrics_sexmismatch_csv + type: + - File + - type: array + items: File + 'sbg:x': 1547.1123046875 + 'sbg:y': 1252.28125 + - id: gatk_collect_insert_size_metrics_txt_pool_b + outputSource: + - bam_qc_stats_pool_b/gatk_collect_insert_size_metrics_txt + type: + - File + - type: array + items: File + label: gatk_collect_insert_size_metrics_txt_pool_b + 'sbg:x': 982.1435546875 + 'sbg:y': 0 + - id: gatk_collect_insert_size_metrics_histogram_pdf_pool_b + outputSource: + - bam_qc_stats_pool_b/gatk_collect_insert_size_metrics_histogram_pdf + type: + - File + - type: array + items: File + label: gatk_collect_insert_size_metrics_histogram_pdf_pool_b + 'sbg:x': 982.1435546875 + 'sbg:y': 213.6875 + - id: gatk_collect_hs_metrics_txt_pool_b + outputSource: + - bam_qc_stats_pool_b/gatk_collect_hs_metrics_txt + type: + - File + - type: array + items: File + label: gatk_collect_hs_metrics_txt_pool_b + 'sbg:x': 982.1435546875 + 'sbg:y': 427.375 + - id: gatk_collect_hs_metrics_per_target_coverage_txt_pool_b + outputSource: + - bam_qc_stats_pool_b/gatk_collect_hs_metrics_per_target_coverage_txt + type: + - File + - type: array + items: File + label: gatk_collect_hs_metrics_per_target_coverage_txt_pool_b + 'sbg:x': 982.1435546875 + 'sbg:y': 641.0625 + - id: gatk_collect_hs_metrics_per_base_coverage_txt_pool_b + outputSource: + - bam_qc_stats_pool_b/gatk_collect_hs_metrics_per_base_coverage_txt + type: + - File + - type: array + items: File + label: gatk_collect_hs_metrics_per_base_coverage_txt_pool_b + 'sbg:x': 982.1435546875 + 'sbg:y': 854.75 + - id: gatk_collect_alignment_summary_metrics_txt_pool_b + outputSource: + - bam_qc_stats_pool_b/gatk_collect_alignment_summary_metrics_txt + type: + - File + - type: array + items: File + label: gatk_collect_alignment_summary_metrics_txt_pool_b + 'sbg:x': 982.1435546875 + 'sbg:y': 1068.4375 + - id: gatk_collect_insert_size_metrics_txt_pool_a + outputSource: + - bam_qc_stats_pool_a/gatk_collect_insert_size_metrics_txt + type: + - File + - type: array + items: File + label: gatk_collect_insert_size_metrics_txt_pool_a + 'sbg:x': 982.1435546875 + 'sbg:y': 106.84375 + - id: gatk_collect_insert_size_metrics_histogram_pdf_pool_a + outputSource: + - bam_qc_stats_pool_a/gatk_collect_insert_size_metrics_histogram_pdf + type: + - File + - type: array + items: File + label: gatk_collect_insert_size_metrics_histogram_pdf_pool_a + 'sbg:x': 982.1435546875 + 'sbg:y': 320.53125 + - id: gatk_collect_hs_metrics_txt_pool_a + outputSource: + - bam_qc_stats_pool_a/gatk_collect_hs_metrics_txt + type: + - File + - type: array + items: File + label: gatk_collect_hs_metrics_txt_pool_a + 'sbg:x': 982.1435546875 + 'sbg:y': 534.21875 + - id: gatk_collect_hs_metrics_per_target_coverage_txt_pool_a + outputSource: + - bam_qc_stats_pool_a/gatk_collect_hs_metrics_per_target_coverage_txt + type: + - File + - type: array + items: File + label: gatk_collect_hs_metrics_per_target_coverage_txt_pool_a + 'sbg:x': 982.1435546875 + 'sbg:y': 747.90625 + - id: gatk_collect_hs_metrics_per_base_coverage_txt_pool_a + outputSource: + - bam_qc_stats_pool_a/gatk_collect_hs_metrics_per_base_coverage_txt + type: + - File + - type: array + items: File + label: gatk_collect_hs_metrics_per_base_coverage_txt_pool_a + 'sbg:x': 982.1435546875 + 'sbg:y': 961.59375 + - id: gatk_collect_alignment_summary_metrics_txt_pool_a + outputSource: + - bam_qc_stats_pool_a/gatk_collect_alignment_summary_metrics_txt + type: + - File + - type: array + items: File + label: gatk_collect_alignment_summary_metrics_txt_pool_a + 'sbg:x': 982.1435546875 + 'sbg:y': 1175.28125 + - id: biometrics_major_plot + outputSource: + - biometrics_major_0_2_13/biometrics_major_plot + type: File? + 'sbg:x': 1547.1123046875 + 'sbg:y': 1786.5 + - id: biometrics_major_json + outputSource: + - biometrics_major_0_2_13/biometrics_major_json + type: File? + 'sbg:x': 1547.1123046875 + 'sbg:y': 1893.34375 + - id: biometrics_major_csv + outputSource: + - biometrics_major_0_2_13/biometrics_major_csv + type: File + 'sbg:x': 1547.1123046875 + 'sbg:y': 2000.1875 + - id: biometrics_extract_pickle + outputSource: + - biometrics_extract_0_2_13/biometrics_extract_pickle + type: File + 'sbg:x': 982.1435546875 + 'sbg:y': 3145.625 + - id: fillout_maf + outputSource: + - getbasecountsmultisample_1_2_5/fillout + type: File + 'sbg:x': 982.1435546875 + 'sbg:y': 1282.125 +steps: + - id: bam_qc_stats_pool_b + in: + - id: input + source: + - collapsed_bam + - id: target_intervals + source: pool_b_target_intervals + - id: bait_intervals + source: pool_b_bait_intervals + - id: reference + source: reference + - id: hsmetrics_minimum_mapping_quality + source: hsmetrics_minimum_mapping_quality + - id: hsmetrics_minimum_base_quality + source: hsmetrics_minimum_base_quality + - id: hsmetrics_coverage_cap + source: hsmetrics_coverage_cap + out: + - id: gatk_collect_insert_size_metrics_histogram_pdf + - id: gatk_collect_insert_size_metrics_txt + - id: gatk_collect_hs_metrics_txt + - id: gatk_collect_hs_metrics_per_base_coverage_txt + - id: gatk_collect_hs_metrics_per_target_coverage_txt + - id: gatk_collect_alignment_summary_metrics_txt + run: ../bam_qc_stats/bam_qc_stats.cwl + label: bam_qc_stats_pool_b + 'sbg:x': 351.4375 + 'sbg:y': 1796.078125 + - id: bam_qc_stats_pool_a + in: + - id: input + source: + - collapsed_bam + - id: target_intervals + source: pool_a_target_intervals + - id: bait_intervals + source: pool_a_bait_intervals + - id: reference + source: reference + - id: hsmetrics_minimum_mapping_quality + source: hsmetrics_minimum_mapping_quality + - id: hsmetrics_minimum_base_quality + source: hsmetrics_minimum_base_quality + - id: hsmetrics_coverage_cap + source: hsmetrics_coverage_cap + out: + - id: gatk_collect_insert_size_metrics_histogram_pdf + - id: gatk_collect_insert_size_metrics_txt + - id: gatk_collect_hs_metrics_txt + - id: gatk_collect_hs_metrics_per_base_coverage_txt + - id: gatk_collect_hs_metrics_per_target_coverage_txt + - id: gatk_collect_alignment_summary_metrics_txt + run: ../bam_qc_stats/bam_qc_stats.cwl + label: bam_qc_stats_pool_a + 'sbg:x': 351.4375 + 'sbg:y': 1986.921875 + - id: fgbio_collect_duplex_seq_metrics_1_2_0 + in: + - id: input + source: group_reads_by_umi_bam + - id: intervals + source: pool_a_bait_intervals + out: + - id: fgbio_collect_duplex_seq_metrics_family_size + - id: fgbio_collect_duplex_seq_metrics_duplex_family_size + - id: fgbio_collect_duplex_seq_metrics_duplex_yield_metrics + - id: fgbio_collect_duplex_seq_metrics_umi_counts + - id: fgbio_collect_duplex_seq_metrics_duplex_qc + - id: fgbio_collect_duplex_seq_metrics_duplex_umi_counts + run: >- + ../command_line_tools/fgbio_collect_duplex_seq_metrics_1.2.0/fgbio_collect_duplex_seq_metrics_1.2.0.cwl + label: fgbio_collect_duplex_seq_metrics_1.2.0 + 'sbg:x': 351.4375 + 'sbg:y': 1435.390625 + - id: fgbio_collect_duplex_seq_metrics_1_2_1 + in: + - id: input + source: group_reads_by_umi_bam + - id: intervals + source: pool_b_bait_intervals + out: + - id: fgbio_collect_duplex_seq_metrics_family_size + - id: fgbio_collect_duplex_seq_metrics_duplex_family_size + - id: fgbio_collect_duplex_seq_metrics_duplex_yield_metrics + - id: fgbio_collect_duplex_seq_metrics_umi_counts + - id: fgbio_collect_duplex_seq_metrics_duplex_qc + - id: fgbio_collect_duplex_seq_metrics_duplex_umi_counts + run: >- + ../command_line_tools/fgbio_collect_duplex_seq_metrics_1.2.0/fgbio_collect_duplex_seq_metrics_1.2.0.cwl + label: fgbio_collect_duplex_seq_metrics_1.2.0 + 'sbg:x': 351.4375 + 'sbg:y': 1258.546875 + - id: biometrics_minor + in: + - id: input + linkMerge: merge_nested + source: + - biometrics_extract_0_2_13/biometrics_extract_pickle + - id: minor_threshold + source: minor_threshold + - id: prefix + default: collapsed + source: prefix + - id: plot + default: false + source: plot + - id: json + default: true + source: json + out: + - id: biometrics_minor_csv + - id: biometrics_minor_json + - id: biometrics_minor_plot + - id: biometrics_minor_sites_plot + run: ../command_line_tools/biometrics_minor/0.2.13/biometrics_minor.cwl + 'sbg:x': 982.1435546875 + 'sbg:y': 2847.9375 + - id: biometrics_sexmismatch + in: + - id: input + linkMerge: merge_flattened + source: + - biometrics_extract_0_2_13/biometrics_extract_pickle + - id: coverage_threshold + source: coverage_threshold + - id: prefix + default: collapsed + source: prefix + - id: json + source: json + out: + - id: biometrics_sexmismatch_csv + - id: biometrics_sexmismatch_json + run: >- + ../command_line_tools/biometrics_sexmismatch/0.2.13/biometrics_sexmismatch.cwl + 'sbg:x': 982.1435546875 + 'sbg:y': 2692.09375 + - id: biometrics_major_0_2_13 + in: + - id: input + linkMerge: merge_nested + source: + - biometrics_extract_0_2_13/biometrics_extract_pickle + - id: major_threshold + source: major_threshold + - id: prefix + source: prefix + - id: plot + source: plot + - id: json + source: json_1 + out: + - id: biometrics_major_csv + - id: biometrics_major_json + - id: biometrics_major_plot + run: ../command_line_tools/biometrics_major/0.2.13/biometrics_major.cwl + 'sbg:x': 982.1435546875 + 'sbg:y': 3010.78125 + - id: biometrics_extract_0_2_13 + in: + - id: sample_bam + source: collapsed_bam + - id: sample_sex + source: sample_sex + - id: sample_group + source: sample_group + - id: sample_name + source: sample_name + - id: fafile + source: reference + - id: vcf_file + source: vcf_file + - id: bed_file + source: bed_file + out: + - id: biometrics_extract_pickle + run: ../command_line_tools/biometrics_extract/0.2.13/biometrics_extract.cwl + 'sbg:x': 351.4375 + 'sbg:y': 1612.234375 + - id: getbasecountsmultisample_1_2_5 + in: + - id: genotyping_bams + source: + - collapsed_bam + - id: genotyping_bams_ids + source: + - sample_name + - id: filter_duplicate + default: 0 + - id: fragment_count + default: 1 + - id: maf + source: maf + - id: output + source: sample_name + valueFrom: $(self + '_collapsed_hotspots_fillout.maf') + - id: ref_fasta + source: reference + out: + - id: fillout + run: >- + ../command_line_tools/getbasecountsmultisample/1.2.5/getbasecountsmultisample_1.2.5.cwl + label: getbasecountsmultisample_1.2.5 + 'sbg:x': 351.4375 + 'sbg:y': 1102.703125 +requirements: + - class: SubworkflowFeatureRequirement + - class: InlineJavascriptRequirement diff --git a/qc_collapsed_bam/qc_collapsed_bam__packed.cwl b/qc_collapsed_bam/qc_collapsed_bam__packed.cwl new file mode 100644 index 0000000..39fbd39 --- /dev/null +++ b/qc_collapsed_bam/qc_collapsed_bam__packed.cwl @@ -0,0 +1,3695 @@ +{ + "$graph": [ + { + "class": "Workflow", + "id": "#bam_qc_stats.cwl", + "label": "bam_qc_stats", + "inputs": [ + { + "id": "#bam_qc_stats.cwl/input", + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "secondaryFiles": [ + "^.bai" + ], + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 374.0625 + }, + { + "id": "#bam_qc_stats.cwl/target_intervals", + "type": "File", + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 160.3125 + }, + { + "id": "#bam_qc_stats.cwl/bait_intervals", + "type": "File", + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 480.9375 + }, + { + "id": "#bam_qc_stats.cwl/reference", + "type": "File", + "secondaryFiles": [ + "^.fasta.fai", + "^.dict" + ], + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 267.1875 + }, + { + "id": "#bam_qc_stats.cwl/temporary_directory", + "type": [ + "null", + "string" + ], + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 53.4375 + }, + { + "id": "#bam_qc_stats.cwl/hsmetrics_minimum_mapping_quality", + "type": [ + "null", + "int" + ], + "label": "hsmetrics_minimum_mapping_quality", + "https://www.sevenbridges.com/x": 1, + "https://www.sevenbridges.com/y": 613 + }, + { + "id": "#bam_qc_stats.cwl/hsmetrics_minimum_base_quality", + "type": [ + "null", + "int" + ], + "label": "hsmetrics_minimum_base_quality", + "https://www.sevenbridges.com/x": 3, + "https://www.sevenbridges.com/y": 743 + }, + { + "id": "#bam_qc_stats.cwl/hsmetrics_coverage_cap", + "type": [ + "null", + "int" + ], + "label": "hsmetrics_coverage_cap", + "https://www.sevenbridges.com/x": 2, + "https://www.sevenbridges.com/y": 872 + } + ], + "outputs": [ + { + "id": "#bam_qc_stats.cwl/gatk_collect_insert_size_metrics_histogram_pdf", + "outputSource": [ + "#bam_qc_stats.cwl/gatk_collect_insert_size_metrics_4_1_8_0/gatk_collect_insert_size_metrics_histogram_pdf" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "https://www.sevenbridges.com/x": 700.636962890625, + "https://www.sevenbridges.com/y": 106.875 + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_insert_size_metrics_txt", + "outputSource": [ + "#bam_qc_stats.cwl/gatk_collect_insert_size_metrics_4_1_8_0/gatk_collect_insert_size_metrics_txt" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "https://www.sevenbridges.com/x": 700.636962890625, + "https://www.sevenbridges.com/y": 0 + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_hs_metrics_txt", + "outputSource": [ + "#bam_qc_stats.cwl/gatk_collect_hs_metrics_4_1_8_0/gatk_collect_hs_metrics_txt" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "https://www.sevenbridges.com/x": 700.636962890625, + "https://www.sevenbridges.com/y": 213.75 + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_hs_metrics_per_base_coverage_txt", + "outputSource": [ + "#bam_qc_stats.cwl/gatk_collect_hs_metrics_4_1_8_0/gatk_collect_hs_metrics_per_base_coverage_txt" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "https://www.sevenbridges.com/x": 700.636962890625, + "https://www.sevenbridges.com/y": 427.5 + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_hs_metrics_per_target_coverage_txt", + "outputSource": [ + "#bam_qc_stats.cwl/gatk_collect_hs_metrics_4_1_8_0/gatk_collect_hs_metrics_per_target_coverage_txt" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "https://www.sevenbridges.com/x": 700.636962890625, + "https://www.sevenbridges.com/y": 320.625 + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_alignment_summary_metrics_txt", + "outputSource": [ + "#bam_qc_stats.cwl/gatk_collect_alignment_summary_metrics_4_1_3_0/gatk_collect_alignment_summary_metrics_txt" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "https://www.sevenbridges.com/x": 700.636962890625, + "https://www.sevenbridges.com/y": 534.375 + } + ], + "steps": [ + { + "id": "#bam_qc_stats.cwl/gatk_collect_alignment_summary_metrics_4_1_3_0", + "in": [ + { + "id": "#bam_qc_stats.cwl/gatk_collect_alignment_summary_metrics_4_1_3_0/input", + "source": "#bam_qc_stats.cwl/input" + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_alignment_summary_metrics_4_1_3_0/reference", + "source": "#bam_qc_stats.cwl/reference" + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_alignment_summary_metrics_4_1_3_0/temporary_directory", + "source": "#bam_qc_stats.cwl/temporary_directory" + } + ], + "out": [ + { + "id": "#bam_qc_stats.cwl/gatk_collect_alignment_summary_metrics_4_1_3_0/gatk_collect_alignment_summary_metrics_txt" + } + ], + "run": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl", + "label": "GATK-CollectAlignmentSummaryMetrics", + "https://www.sevenbridges.com/x": 334.2886657714844, + "https://www.sevenbridges.com/y": 560.505126953125 + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_hs_metrics_4_1_8_0", + "in": [ + { + "id": "#bam_qc_stats.cwl/gatk_collect_hs_metrics_4_1_8_0/input", + "source": "#bam_qc_stats.cwl/input" + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_hs_metrics_4_1_8_0/bait_intervals", + "source": "#bam_qc_stats.cwl/bait_intervals" + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_hs_metrics_4_1_8_0/target_intervals", + "source": "#bam_qc_stats.cwl/target_intervals" + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_hs_metrics_4_1_8_0/coverage_cap", + "source": "#bam_qc_stats.cwl/hsmetrics_coverage_cap" + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_hs_metrics_4_1_8_0/minimum_base_quality", + "source": "#bam_qc_stats.cwl/hsmetrics_minimum_base_quality" + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_hs_metrics_4_1_8_0/minimum_mapping_quality", + "source": "#bam_qc_stats.cwl/hsmetrics_minimum_mapping_quality" + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_hs_metrics_4_1_8_0/reference", + "source": "#bam_qc_stats.cwl/reference" + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_hs_metrics_4_1_8_0/temporary_directory", + "source": "#bam_qc_stats.cwl/temporary_directory" + } + ], + "out": [ + { + "id": "#bam_qc_stats.cwl/gatk_collect_hs_metrics_4_1_8_0/gatk_collect_hs_metrics_txt" + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_hs_metrics_4_1_8_0/gatk_collect_hs_metrics_per_base_coverage_txt" + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_hs_metrics_4_1_8_0/gatk_collect_hs_metrics_per_target_coverage_txt" + } + ], + "run": "#gatk_collect_hs_metrics_4.1.8.0.cwl", + "label": "GATK-CollectHsMetrics", + "https://www.sevenbridges.com/x": 327.8453674316406, + "https://www.sevenbridges.com/y": 372.8453674316406 + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_insert_size_metrics_4_1_8_0", + "in": [ + { + "id": "#bam_qc_stats.cwl/gatk_collect_insert_size_metrics_4_1_8_0/input", + "source": "#bam_qc_stats.cwl/input" + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_insert_size_metrics_4_1_8_0/histogram_file", + "default": "histogram.pdf" + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_insert_size_metrics_4_1_8_0/temporary_directory", + "source": "#bam_qc_stats.cwl/temporary_directory" + } + ], + "out": [ + { + "id": "#bam_qc_stats.cwl/gatk_collect_insert_size_metrics_4_1_8_0/gatk_collect_insert_size_metrics_txt" + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_insert_size_metrics_4_1_8_0/gatk_collect_insert_size_metrics_histogram_pdf" + } + ], + "run": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl", + "label": "GATK-CollectInsertSizeMetrics", + "https://www.sevenbridges.com/x": 335.57733154296875, + "https://www.sevenbridges.com/y": 194.7628936767578 + } + ], + "requirements": [], + "https://schema.org/author": [ + { + "class": "https://schema.org/Person", + "https://schema.org/email": "mailto:murphyc4@mskcc.org", + "https://schema.org/identifier": "", + "https://schema.org/name": "Charles Murphy" + } + ], + "https://schema.org/citation": "", + "https://schema.org/codeRepository": "https://github.com/msk-access/uncollapsed_bam_generation", + "https://schema.org/contributor": [ + { + "class": "https://schema.org/Person", + "https://schema.org/email": "mailto:shahr2@mskcc.org", + "https://schema.org/identifier": "https://orcid.org/0000-0001-9042-6213", + "https://schema.org/name": "Ronak Shah" + } + ], + "https://schema.org/dateCreated": "2020-09-23", + "https://schema.org/license": "https://spdx.org/licenses/Apache-2.0", + "$namespaces": { + "sbg": "https://www.sevenbridges.com/" + } + }, + { + "class": "CommandLineTool", + "id": "#biometrics_extract.cwl", + "baseCommand": [ + "biometrics", + "extract" + ], + "inputs": [ + { + "id": "#biometrics_extract.cwl/sample_bam", + "type": "File", + "inputBinding": { + "position": 0, + "prefix": "--sample-bam" + }, + "doc": "BAM file.", + "secondaryFiles": [ + "^.bai" + ] + }, + { + "id": "#biometrics_extract.cwl/sample_sex", + "type": [ + "null", + "string" + ], + "inputBinding": { + "position": 0, + "prefix": "--sample-sex" + }, + "doc": "Expected sample sex (i.e. M or F)." + }, + { + "id": "#biometrics_extract.cwl/sample_group", + "type": [ + "null", + "string" + ], + "inputBinding": { + "position": 0, + "prefix": "--sample-group" + }, + "doc": "The sample group (e.g. the sample patient ID)." + }, + { + "id": "#biometrics_extract.cwl/sample_name", + "type": "string", + "inputBinding": { + "position": 0, + "prefix": "--sample-name" + }, + "doc": "Sample name. If not specified, sample name is automatically figured out from the BAM file." + }, + { + "id": "#biometrics_extract.cwl/fafile", + "type": "File", + "inputBinding": { + "position": 0, + "prefix": "--fafile" + }, + "doc": "Path to reference fasta.", + "secondaryFiles": [ + "^.fasta.fai" + ] + }, + { + "id": "#biometrics_extract.cwl/vcf_file", + "type": "File", + "inputBinding": { + "position": 0, + "prefix": "--vcf" + }, + "doc": "VCF file containing the SNPs to be queried." + }, + { + "id": "#biometrics_extract.cwl/bed_file", + "type": [ + "null", + "File" + ], + "inputBinding": { + "position": 0, + "prefix": "--bed" + }, + "doc": "BED file containing the intervals to be queried." + }, + { + "id": "#biometrics_extract.cwl/database", + "type": [ + "null", + "string" + ], + "inputBinding": { + "position": 0, + "prefix": "--database" + }, + "doc": "Directory to store the intermediate files after running the extraction step." + }, + { + "default": 1, + "id": "#biometrics_extract.cwl/min_mapping_quality", + "type": [ + "null", + "int" + ], + "inputBinding": { + "position": 0, + "prefix": "--min-mapping-quality" + }, + "doc": "Minimum mapping quality of reads to be used for pileup." + }, + { + "default": 1, + "id": "#biometrics_extract.cwl/min_base_quality", + "type": [ + "null", + "int" + ], + "inputBinding": { + "position": 0, + "prefix": "--min-base-quality" + }, + "doc": "Minimum base quality of reads to be used for pileup." + }, + { + "default": 10, + "id": "#biometrics_extract.cwl/min_coverage", + "type": [ + "null", + "int" + ], + "inputBinding": { + "position": 0, + "prefix": "--min-coverage" + }, + "doc": "Minimum coverage to count a site." + }, + { + "default": 0.1, + "id": "#biometrics_extract.cwl/min_homozygous_thresh", + "type": [ + "null", + "float" + ], + "inputBinding": { + "position": 0, + "prefix": "--min-homozygous-thresh" + }, + "doc": "Minimum threshold to define homozygous." + }, + { + "id": "#biometrics_extract.cwl/default_genotype", + "type": [ + "null", + "string" + ], + "inputBinding": { + "position": 0, + "prefix": "--default-genotype" + }, + "doc": "Default genotype if coverage is too low (options are Het or Hom)." + } + ], + "outputs": [ + { + "id": "#biometrics_extract.cwl/biometrics_extract_pickle", + "type": "File", + "outputBinding": { + "glob": "${\n if (inputs.database) {\n return inputs.database + '/' + inputs.sample_name + '.pickle';\n } else {\n return inputs.sample_name + '.pickle';\n }\n}" + } + } + ], + "requirements": [ + { + "class": "ResourceRequirement", + "ramMin": 16000, + "coresMin": 2 + }, + { + "class": "DockerRequirement", + "dockerPull": "ghcr.io/msk-access/biometrics:0.2.13" + }, + { + "class": "InlineJavascriptRequirement" + } + ], + "http://purl.org/dc/terms/contributor": [ + { + "class": "http://xmlns.com/foaf/0.1/Organization", + "http://xmlns.com/foaf/0.1/member": [ + { + "class": "http://xmlns.com/foaf/0.1/Person", + "http://xmlns.com/foaf/0.1/mbox": "mailto:murphyc4@mskcc.org", + "http://xmlns.com/foaf/0.1/name": "Charlie Murphy" + } + ], + "http://xmlns.com/foaf/0.1/name": "Memorial Sloan Kettering Cancer Center" + } + ], + "http://purl.org/dc/terms/creator": [ + { + "class": "http://xmlns.com/foaf/0.1/Organization", + "http://xmlns.com/foaf/0.1/member": [ + { + "class": "http://xmlns.com/foaf/0.1/Person", + "http://xmlns.com/foaf/0.1/mbox": "mailto:murphyc4@mskcc.org", + "http://xmlns.com/foaf/0.1/name": "Charlie Murphy" + } + ], + "http://xmlns.com/foaf/0.1/name": "Memorial Sloan Kettering Cancer Center" + } + ], + "http://usefulinc.com/ns/doap#release": [ + { + "class": "http://usefulinc.com/ns/doap#Version", + "http://usefulinc.com/ns/doap#name": "biometrics", + "http://usefulinc.com/ns/doap#revision": "0.2.13" + } + ] + }, + { + "class": "CommandLineTool", + "id": "#biometrics_major.cwl", + "baseCommand": [ + "biometrics", + "major" + ], + "inputs": [ + { + "id": "#biometrics_major.cwl/input", + "type": { + "type": "array", + "items": "File", + "inputBinding": { + "prefix": "--input" + } + }, + "inputBinding": { + "position": 0 + }, + "doc": "Can be one of three types: (1) path to a CSV file containing sample information (one per line). For example: sample_name,sample_bam,sample_type,sample_sex,sample_group. (2) Path to a '*.pk' file that was produced by the 'extract' tool. (3) Name of the sample to analyze; this assumes there is a file named '{sample_name}.pk' in your database directory. Can be specified more than once." + }, + { + "id": "#biometrics_major.cwl/database", + "type": [ + "null", + "string" + ], + "inputBinding": { + "position": 0, + "prefix": "--database" + }, + "doc": "Directory to store the intermediate files after running the extraction step." + }, + { + "default": 0.6, + "id": "#biometrics_major.cwl/major_threshold", + "type": [ + "null", + "float" + ], + "inputBinding": { + "position": 0, + "prefix": "--major-threshold" + }, + "doc": "Major contamination threshold for bad sample." + }, + { + "id": "#biometrics_major.cwl/prefix", + "type": [ + "null", + "string" + ], + "inputBinding": { + "position": 0, + "prefix": "--prefix" + }, + "doc": "Output file prefix." + }, + { + "id": "#biometrics_major.cwl/plot", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--plot" + }, + "doc": "Also output plots of the data." + }, + { + "id": "#biometrics_major.cwl/json", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--json" + }, + "doc": "Also output data in JSON format." + }, + { + "id": "#biometrics_major.cwl/no_db_comparison", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--no-db-compare" + }, + "doc": "Do not compare the sample(s) you provided to all samples in the database, only compare them with each other." + } + ], + "outputs": [ + { + "id": "#biometrics_major.cwl/biometrics_major_csv", + "type": "File", + "outputBinding": { + "glob": "${\n if (inputs.prefix) {\n return inputs.prefix + '_major_contamination.csv'\n } else {\n return 'major_contamination.csv'\n }\n}" + } + }, + { + "id": "#biometrics_major.cwl/biometrics_major_json", + "type": [ + "null", + "File" + ], + "outputBinding": { + "glob": "${\n if (inputs.prefix) {\n return inputs.prefix + '_major_contamination.json'\n } else {\n return 'major_contamination.json'\n }\n}" + } + }, + { + "id": "#biometrics_major.cwl/biometrics_major_plot", + "type": [ + "null", + "File" + ], + "outputBinding": { + "glob": "${\n return 'major_contamination.html'\n}" + } + } + ], + "requirements": [ + { + "class": "ResourceRequirement", + "ramMin": 16000, + "coresMin": 2 + }, + { + "class": "DockerRequirement", + "dockerPull": "ghcr.io/msk-access/biometrics:0.2.13" + }, + { + "class": "InlineJavascriptRequirement" + } + ], + "http://purl.org/dc/terms/contributor": [ + { + "class": "http://xmlns.com/foaf/0.1/Organization", + "http://xmlns.com/foaf/0.1/member": [ + { + "class": "http://xmlns.com/foaf/0.1/Person", + "http://xmlns.com/foaf/0.1/mbox": "mailto:murphyc4@mskcc.org", + "http://xmlns.com/foaf/0.1/name": "Charlie Murphy" + } + ], + "http://xmlns.com/foaf/0.1/name": "Memorial Sloan Kettering Cancer Center" + } + ], + "http://purl.org/dc/terms/creator": [ + { + "class": "http://xmlns.com/foaf/0.1/Organization", + "http://xmlns.com/foaf/0.1/member": [ + { + "class": "http://xmlns.com/foaf/0.1/Person", + "http://xmlns.com/foaf/0.1/mbox": "mailto:murphyc4@mskcc.org", + "http://xmlns.com/foaf/0.1/name": "Charlie Murphy" + } + ], + "http://xmlns.com/foaf/0.1/name": "Memorial Sloan Kettering Cancer Center" + } + ], + "http://usefulinc.com/ns/doap#release": [ + { + "class": "http://usefulinc.com/ns/doap#Version", + "http://usefulinc.com/ns/doap#name": "biometrics", + "http://usefulinc.com/ns/doap#revision": "0.2.13" + } + ] + }, + { + "class": "CommandLineTool", + "id": "#biometrics_minor.cwl", + "baseCommand": [ + "biometrics", + "minor" + ], + "inputs": [ + { + "id": "#biometrics_minor.cwl/input", + "type": { + "type": "array", + "items": "File", + "inputBinding": { + "prefix": "--input" + } + }, + "inputBinding": { + "position": 0 + }, + "doc": "Can be one of three types: (1) path to a CSV file containing sample information (one per line). For example: sample_name,sample_bam,sample_type,sample_sex,sample_group. (2) Path to a '*.pk' file that was produced by the 'extract' tool. (3) Name of the sample to analyze; this assumes there is a file named '{sample_name}.pk' in your database directory. Can be specified more than once." + }, + { + "id": "#biometrics_minor.cwl/database", + "type": [ + "null", + "string" + ], + "inputBinding": { + "position": 0, + "prefix": "--database" + }, + "doc": "Directory to store the intermediate files after running the extraction step." + }, + { + "default": 0.002, + "id": "#biometrics_minor.cwl/minor_threshold", + "type": [ + "null", + "float" + ], + "inputBinding": { + "position": 0, + "prefix": "--minor-threshold" + }, + "doc": "Minor contamination threshold for bad sample." + }, + { + "id": "#biometrics_minor.cwl/prefix", + "type": [ + "null", + "string" + ], + "inputBinding": { + "position": 0, + "prefix": "--prefix" + }, + "doc": "Output file prefix." + }, + { + "id": "#biometrics_minor.cwl/plot", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--plot" + }, + "doc": "Also output plots of the data." + }, + { + "id": "#biometrics_minor.cwl/json", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--json" + }, + "doc": "Also output data in JSON format." + }, + { + "id": "#biometrics_minor.cwl/no_db_comparison", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--no-db-compare" + }, + "doc": "Do not compare the sample(s) you provided to all samples in the database, only compare them with each other." + } + ], + "outputs": [ + { + "id": "#biometrics_minor.cwl/biometrics_minor_csv", + "type": "File", + "outputBinding": { + "glob": "${\n if (inputs.prefix) {\n return inputs.prefix + '_minor_contamination.csv'\n } else {\n return 'minor_contamination.csv'\n }\n}" + } + }, + { + "id": "#biometrics_minor.cwl/biometrics_minor_json", + "type": [ + "null", + "File" + ], + "outputBinding": { + "glob": "${\n if (inputs.prefix) {\n return inputs.prefix + '_minor_contamination.json'\n } else {\n return 'minor_contamination.json'\n }\n}" + } + }, + { + "id": "#biometrics_minor.cwl/biometrics_minor_plot", + "type": [ + "null", + "File" + ], + "outputBinding": { + "glob": "${\n return 'minor_contamination.html'\n}" + } + }, + { + "id": "#biometrics_minor.cwl/biometrics_minor_sites_plot", + "type": [ + "null", + "File" + ], + "outputBinding": { + "glob": "${\n return 'minor_contamination_sites.html'\n}" + } + } + ], + "requirements": [ + { + "class": "ResourceRequirement", + "ramMin": 16000, + "coresMin": 2 + }, + { + "class": "DockerRequirement", + "dockerPull": "ghcr.io/msk-access/biometrics:0.2.13" + }, + { + "class": "InlineJavascriptRequirement" + } + ], + "http://purl.org/dc/terms/contributor": [ + { + "class": "http://xmlns.com/foaf/0.1/Organization", + "http://xmlns.com/foaf/0.1/member": [ + { + "class": "http://xmlns.com/foaf/0.1/Person", + "http://xmlns.com/foaf/0.1/mbox": "mailto:murphyc4@mskcc.org", + "http://xmlns.com/foaf/0.1/name": "Charlie Murphy" + } + ], + "http://xmlns.com/foaf/0.1/name": "Memorial Sloan Kettering Cancer Center" + } + ], + "http://purl.org/dc/terms/creator": [ + { + "class": "http://xmlns.com/foaf/0.1/Organization", + "http://xmlns.com/foaf/0.1/member": [ + { + "class": "http://xmlns.com/foaf/0.1/Person", + "http://xmlns.com/foaf/0.1/mbox": "mailto:murphyc4@mskcc.org", + "http://xmlns.com/foaf/0.1/name": "Charlie Murphy" + } + ], + "http://xmlns.com/foaf/0.1/name": "Memorial Sloan Kettering Cancer Center" + } + ], + "http://usefulinc.com/ns/doap#release": [ + { + "class": "http://usefulinc.com/ns/doap#Version", + "http://usefulinc.com/ns/doap#name": "biometrics", + "http://usefulinc.com/ns/doap#revision": "0.2.13" + } + ] + }, + { + "class": "CommandLineTool", + "id": "#biometrics_sexmismatch.cwl", + "baseCommand": [ + "biometrics", + "sexmismatch" + ], + "inputs": [ + { + "id": "#biometrics_sexmismatch.cwl/input", + "type": { + "type": "array", + "items": "File", + "inputBinding": { + "prefix": "--input" + } + }, + "inputBinding": { + "position": 0 + }, + "doc": "Can be one of three types: (1) path to a CSV file containing sample information (one per line). For example: sample_name,sample_bam,sample_type,sample_sex,sample_group. (2) Path to a '*.pk' file that was produced by the 'extract' tool. (3) Name of the sample to analyze; this assumes there is a file named '{sample_name}.pk' in your database directory. Can be specified more than once." + }, + { + "id": "#biometrics_sexmismatch.cwl/database", + "type": [ + "null", + "string" + ], + "inputBinding": { + "position": 0, + "prefix": "--database" + }, + "doc": "Directory to store the intermediate files after running the extraction step." + }, + { + "default": 50, + "id": "#biometrics_sexmismatch.cwl/coverage_threshold", + "type": [ + "null", + "int" + ], + "inputBinding": { + "position": 0, + "prefix": "--coverage-threshold" + }, + "doc": "Samples with Y chromosome above this value will be considered male." + }, + { + "id": "#biometrics_sexmismatch.cwl/prefix", + "type": [ + "null", + "string" + ], + "inputBinding": { + "position": 0, + "prefix": "--prefix" + }, + "doc": "Output file prefix." + }, + { + "id": "#biometrics_sexmismatch.cwl/json", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--json" + }, + "doc": "Also output data in JSON format." + }, + { + "id": "#biometrics_sexmismatch.cwl/no_db_comparison", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--no-db-compare" + }, + "doc": "Do not compare the sample(s) you provided to all samples in the database, only compare them with each other." + } + ], + "outputs": [ + { + "id": "#biometrics_sexmismatch.cwl/biometrics_sexmismatch_csv", + "type": "File", + "outputBinding": { + "glob": "${\n if (inputs.prefix) {\n return inputs.prefix + '_sex_mismatch.csv'\n } else {\n return 'sex_mismatch.csv'\n }\n}" + } + }, + { + "id": "#biometrics_sexmismatch.cwl/biometrics_sexmismatch_json", + "type": [ + "null", + "File" + ], + "outputBinding": { + "glob": "${\n if (inputs.prefix) {\n return inputs.prefix + '_sex_mismatch.json'\n } else {\n return 'sex_mismatch.json'\n }\n}" + } + } + ], + "requirements": [ + { + "class": "ResourceRequirement", + "ramMin": 16000, + "coresMin": 2 + }, + { + "class": "DockerRequirement", + "dockerPull": "ghcr.io/msk-access/biometrics:0.2.13" + }, + { + "class": "InlineJavascriptRequirement" + } + ], + "http://purl.org/dc/terms/contributor": [ + { + "class": "http://xmlns.com/foaf/0.1/Organization", + "http://xmlns.com/foaf/0.1/member": [ + { + "class": "http://xmlns.com/foaf/0.1/Person", + "http://xmlns.com/foaf/0.1/mbox": "mailto:murphyc4@mskcc.org", + "http://xmlns.com/foaf/0.1/name": "Charlie Murphy" + } + ], + "http://xmlns.com/foaf/0.1/name": "Memorial Sloan Kettering Cancer Center" + } + ], + "http://purl.org/dc/terms/creator": [ + { + "class": "http://xmlns.com/foaf/0.1/Organization", + "http://xmlns.com/foaf/0.1/member": [ + { + "class": "http://xmlns.com/foaf/0.1/Person", + "http://xmlns.com/foaf/0.1/mbox": "mailto:murphyc4@mskcc.org", + "http://xmlns.com/foaf/0.1/name": "Charlie Murphy" + } + ], + "http://xmlns.com/foaf/0.1/name": "Memorial Sloan Kettering Cancer Center" + } + ], + "http://usefulinc.com/ns/doap#release": [ + { + "class": "http://usefulinc.com/ns/doap#Version", + "http://usefulinc.com/ns/doap#name": "biometrics", + "http://usefulinc.com/ns/doap#revision": "0.2.13" + } + ] + }, + { + "class": "CommandLineTool", + "id": "#fgbio_collect_duplex_seq_metrics_1.2.0.cwl", + "baseCommand": [ + "fgbio" + ], + "inputs": [ + { + "id": "#fgbio_collect_duplex_seq_metrics_1.2.0.cwl/memory_per_job", + "type": [ + "null", + "int" + ], + "doc": "Memory per job in megabytes" + }, + { + "id": "#fgbio_collect_duplex_seq_metrics_1.2.0.cwl/memory_overhead", + "type": [ + "null", + "int" + ], + "doc": "Memory overhead per job in megabytes" + }, + { + "id": "#fgbio_collect_duplex_seq_metrics_1.2.0.cwl/number_of_threads", + "type": [ + "null", + "int" + ] + }, + { + "id": "#fgbio_collect_duplex_seq_metrics_1.2.0.cwl/input", + "type": "File", + "inputBinding": { + "position": 2, + "prefix": "--input" + }, + "doc": "Input BAM file generated by GroupReadByUmi." + }, + { + "id": "#fgbio_collect_duplex_seq_metrics_1.2.0.cwl/output_prefix", + "type": [ + "null", + "string" + ], + "doc": "Prefix of output files to write." + }, + { + "id": "#fgbio_collect_duplex_seq_metrics_1.2.0.cwl/intervals", + "type": [ + "null", + "File" + ], + "inputBinding": { + "position": 2, + "prefix": "--intervals" + }, + "doc": "Optional set of intervals over which to restrict analysis. [Optional]." + }, + { + "id": "#fgbio_collect_duplex_seq_metrics_1.2.0.cwl/description", + "type": [ + "null", + "string" + ], + "inputBinding": { + "position": 2, + "prefix": "--description" + }, + "doc": "Description of data set used to label plots. Defaults to sample/library. [Optional]." + }, + { + "id": "#fgbio_collect_duplex_seq_metrics_1.2.0.cwl/duplex_umi_counts", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 2, + "prefix": "--duplex-umi-counts" + }, + "doc": "If true, produce the .duplex_umi_counts.txt file with counts of duplex UMI observations. [Optional]." + }, + { + "id": "#fgbio_collect_duplex_seq_metrics_1.2.0.cwl/min_ab_reads", + "type": [ + "null", + "int" + ], + "inputBinding": { + "position": 2, + "prefix": "--min-ab-reads" + }, + "doc": "Minimum AB reads to call a tag family a 'duplex'. [Optional]." + }, + { + "id": "#fgbio_collect_duplex_seq_metrics_1.2.0.cwl/min_ba_reads", + "type": [ + "null", + "int" + ], + "inputBinding": { + "position": 2, + "prefix": "--min-ba-reads" + }, + "doc": "Minimum BA reads to call a tag family a 'duplex'. [Optional]." + }, + { + "id": "#fgbio_collect_duplex_seq_metrics_1.2.0.cwl/umi_tag", + "type": [ + "null", + "string" + ], + "inputBinding": { + "position": 2, + "prefix": "--umi-tag" + }, + "doc": "The tag containing the raw UMI. [Optional]." + }, + { + "id": "#fgbio_collect_duplex_seq_metrics_1.2.0.cwl/mi_tag", + "type": [ + "null", + "string" + ], + "inputBinding": { + "position": 2, + "prefix": "--mi-tag" + }, + "doc": "The output tag for UMI grouping. [Optional]." + }, + { + "id": "#fgbio_collect_duplex_seq_metrics_1.2.0.cwl/temporary_directory", + "type": [ + "null", + "string" + ], + "doc": "Default value: null." + }, + { + "id": "#fgbio_collect_duplex_seq_metrics_1.2.0.cwl/async_io", + "type": [ + "null", + "string" + ], + "inputBinding": { + "position": 0, + "separate": false, + "prefix": "--async-io=" + }, + "doc": "'Use asynchronous I/O where possible, e.g. for SAM and BAM files [=true|false].'" + } + ], + "outputs": [ + { + "id": "#fgbio_collect_duplex_seq_metrics_1.2.0.cwl/fgbio_collect_duplex_seq_metrics_family_size", + "type": "File", + "outputBinding": { + "glob": "${\n if(inputs.output_prefix){\n return inputs.output_prefix + '.family_sizes.txt'\n }\n else{\n return inputs.input.basename.replace('.bam','.family_sizes.txt')\n }\n}" + } + }, + { + "id": "#fgbio_collect_duplex_seq_metrics_1.2.0.cwl/fgbio_collect_duplex_seq_metrics_duplex_family_size", + "type": "File", + "outputBinding": { + "glob": "${\n if(inputs.output_prefix){\n return inputs.output_prefix + '.duplex_family_sizes.txt'\n }\n else{\n return inputs.input.basename.replace('.bam','.duplex_family_sizes.txt')\n }\n}" + } + }, + { + "id": "#fgbio_collect_duplex_seq_metrics_1.2.0.cwl/fgbio_collect_duplex_seq_metrics_duplex_yield_metrics", + "type": "File", + "outputBinding": { + "glob": "${\n if(inputs.output_prefix){\n return inputs.output_prefix + '.duplex_yield_metrics.txt'\n }\n else{\n return inputs.input.basename.replace('.bam','.duplex_yield_metrics.txt')\n }\n}" + } + }, + { + "id": "#fgbio_collect_duplex_seq_metrics_1.2.0.cwl/fgbio_collect_duplex_seq_metrics_umi_counts", + "type": "File", + "outputBinding": { + "glob": "${\n if(inputs.output_prefix){\n return inputs.output_prefix + '.umi_counts.txt'\n }\n else{\n return inputs.input.basename.replace('.bam','.umi_counts.txt')\n }\n}" + } + }, + { + "id": "#fgbio_collect_duplex_seq_metrics_1.2.0.cwl/fgbio_collect_duplex_seq_metrics_duplex_qc", + "type": [ + "null", + "File" + ], + "outputBinding": { + "glob": "${\n if(inputs.output_prefix){\n return inputs.output_prefix + '.duplex_qc.pdf'\n }\n else{\n return inputs.input.basename.replace('.bam','.duplex_qc.pdf')\n }\n}" + } + }, + { + "id": "#fgbio_collect_duplex_seq_metrics_1.2.0.cwl/fgbio_collect_duplex_seq_metrics_duplex_umi_counts", + "type": [ + "null", + "File" + ], + "outputBinding": { + "glob": "${\n if (inputs.output_prefix) {\n return inputs.output_prefix + '.duplex_umi_counts.txt'\n } else {\n return inputs.input.basename.replace('.bam','.duplex_umi_counts.txt')\n }\n}" + } + } + ], + "doc": "Collects a suite of metrics to QC duplex sequencing data.\nInputs ------\nThe input to this tool must be a BAM file that is either:\n1. The exact BAM output by the 'GroupReadsByUmi' tool (in the sort-order it was produced in) 2. A BAM file that has MI tags present on all reads (usually set by 'GroupReadsByUmi' and has been sorted with\n 'SortBam' into 'TemplateCoordinate' order.\n\nCalculation of metrics may be restricted to a set of regions using the '--intervals' parameter. This can significantly affect results as off-target reads in duplex sequencing experiments often have very different properties than on-target reads due to the lack of enrichment.\nSeveral metrics are calculated related to the fraction of tag families that have duplex coverage. The definition of \"duplex\" is controlled by the '--min-ab-reads' and '--min-ba-reads' parameters. The default is to treat any tag family with at least one observation of each strand as a duplex, but this could be made more stringent, e.g. by setting '--min-ab-reads=3 --min-ba-reads=3'. If different thresholds are used then '--min-ab-reads' must be the higher value.\nOutputs -------\nThe following output files are produced:\n1. .family_sizes.txt: metrics on the frequency of different types of families of different sizes 2. .duplex_family_sizes.txt: metrics on the frequency of duplex tag families by the number of observations\n from each strand\n3. .duplex_yield_metrics.txt: summary QC metrics produced using 5%, 10%, 15%...100% of the data 4. .umi_counts.txt: metrics on the frequency of observations of UMIs within reads and tag families 5. .duplex_qc.pdf: a series of plots generated from the preceding metrics files for visualization 6. .duplex_umi_counts.txt: (optional) metrics on the frequency of observations of duplex UMIs within reads\n and tag families. This file is only produced if the '--duplex-umi-counts' option is used as it requires significantly\n more memory to track all pairs of UMIs seen when a large number of UMI sequences are present.\n\nWithin the metrics files the prefixes 'CS', 'SS' and 'DS' are used to mean:\n* CS: tag families where membership is defined solely on matching genome coordinates and strand * SS: single-stranded tag families where membership is defined by genome coordinates, strand and UMI; ie. 50/A and\n 50/B are considered different tag families.\n* DS: double-stranded tag families where membership is collapsed across single-stranded tag families from the same\n double-stranded source molecule; i.e. 50/A and 50/B become one family\n\nRequirements ------------\nFor plots to be generated R must be installed and the ggplot2 package installed with suggested dependencies. Successfully executing the following in R will ensure a working installation:\ninstall.packages(\"ggplot2\", repos=\"http://cran.us.r-project.org\", dependencies=TRUE)", + "label": "fgbio_collect_duplex_seq_metrics_1.2.0", + "arguments": [ + { + "position": 0, + "valueFrom": "${\n if(inputs.memory_per_job && inputs.memory_overhead) {\n if(inputs.memory_per_job % 1000 == 0) {\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\"\n }\n }\n else if (inputs.memory_per_job && !inputs.memory_overhead){\n if(inputs.memory_per_job % 1000 == 0) {\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\"\n }\n }\n else if(!inputs.memory_per_job && inputs.memory_overhead){\n return \"-Xmx12G\"\n }\n else {\n return \"-Xmx12G\"\n }\n}" + }, + { + "position": 0, + "valueFrom": "-XX:-UseGCOverheadLimit" + }, + { + "position": 1, + "valueFrom": "CollectDuplexSeqMetrics" + }, + { + "position": 0, + "prefix": "--tmp-dir=", + "separate": false, + "valueFrom": "${\n if(inputs.temporary_directory)\n return inputs.temporary_directory;\n return runtime.tmpdir\n}" + }, + { + "position": 2, + "prefix": "--output", + "valueFrom": "${\n if(inputs.output_prefix){\n return inputs.output_prefix\n }\n else{\n return inputs.input.basename.replace(/.bam/,'')\n }\n}" + } + ], + "requirements": [ + { + "class": "ResourceRequirement", + "ramMin": 16000, + "coresMin": 2 + }, + { + "class": "DockerRequirement", + "dockerPull": "ghcr.io/msk-access/fgbio:1.2.0" + }, + { + "class": "InlineJavascriptRequirement" + } + ], + "http://purl.org/dc/terms/contributor": [ + { + "class": "http://xmlns.com/foaf/0.1/Organization", + "http://xmlns.com/foaf/0.1/member": [ + { + "class": "http://xmlns.com/foaf/0.1/Person", + "http://xmlns.com/foaf/0.1/mbox": "mailto:murphyc4@mskcc.org", + "http://xmlns.com/foaf/0.1/name": "Charles Murphy" + } + ], + "http://xmlns.com/foaf/0.1/name": "Memorial Sloan Kettering Cancer Center" + } + ], + "http://purl.org/dc/terms/creator": [ + { + "class": "http://xmlns.com/foaf/0.1/Organization", + "http://xmlns.com/foaf/0.1/member": [ + { + "class": "http://xmlns.com/foaf/0.1/Person", + "http://xmlns.com/foaf/0.1/mbox": "mailto:murphyc4@mskcc.org", + "http://xmlns.com/foaf/0.1/name": "Charles Murphy" + } + ], + "http://xmlns.com/foaf/0.1/name": "Memorial Sloan Kettering Cancer Center" + } + ], + "http://usefulinc.com/ns/doap#release": [ + { + "class": "http://usefulinc.com/ns/doap#Version", + "http://usefulinc.com/ns/doap#name": "fgbio CollectDuplexSeqMetrics", + "http://usefulinc.com/ns/doap#revision": "1.2.0" + } + ] + }, + { + "class": "CommandLineTool", + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl", + "baseCommand": [ + "gatk", + "CollectAlignmentSummaryMetrics" + ], + "inputs": [ + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/memory_per_job", + "type": [ + "null", + "int" + ], + "doc": "Memory per job in megabytes" + }, + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/memory_overhead", + "type": [ + "null", + "int" + ], + "doc": "Memory overhead per job in megabytes" + }, + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/number_of_threads", + "type": [ + "null", + "int" + ] + }, + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/input", + "type": "File", + "inputBinding": { + "position": 0, + "prefix": "-I" + }, + "doc": "Input file (bam or sam). Required." + }, + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/output_file_name", + "type": [ + "null", + "string" + ], + "doc": "File to write the output to. Required." + }, + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/reference", + "type": [ + "null", + "File" + ], + "inputBinding": { + "position": 0, + "prefix": "-R" + }, + "doc": "Reference sequence file. Note that while this argument is not required, without it only a small subset of the metrics will be calculated. Note also that if a reference sequence is provided, it must be accompanied by a sequence dictionary. Default value: null.", + "secondaryFiles": [ + "^.fasta.fai", + "^.dict" + ] + }, + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/adaptor_sequence", + "type": [ + "null", + "string" + ], + "inputBinding": { + "position": 0, + "prefix": "--ADAPTER_SEQUENCE" + }, + "doc": "List of adapter sequences to use when processing the alignment metrics. This argument may be specified 0 or more times. Default value: [AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT, AGATCGGAAGAGCTCGTATGCCGTCTTCTGCTTG, AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT, AGATCGGAAGAGCGGTTCAGCAGGAATGCCGAGACCGATCTCGTATGCCGTCTTCTGCTTG, AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT, AGATCGGAAGAGCACACGTCTGAACTCCAGTCACNNNNNNNNATCTCGTATGCCGTCTTCTGCTTG]." + }, + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/metrics_acciumulation_level", + "type": [ + "null", + "string" + ], + "inputBinding": { + "position": 0, + "prefix": "--METRIC_ACCUMULATION_LEVEL" + }, + "doc": "The level(s) at which to accumulate metrics. Default value: [ALL_READS]. This option can be set to 'null' to clear the default value. Possible values: {ALL_READS, SAMPLE, LIBRARY, READ_GROUP} This option may be specified 0 or more times. This option can be set to 'null' to clear the default list." + }, + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/expected_pair_orientations", + "type": [ + "null", + "string" + ], + "inputBinding": { + "position": 0, + "prefix": "--EXPECTED_PAIR_ORIENTATIONS" + }, + "doc": "Paired-end reads that do not have this expected orientation will be considered chimeric. This argument may be specified 0 or more times. Default value: [FR]. Possible values: {FR, RF, TANDEM}" + }, + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/is_bisulfite_sequenced", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--IS_BISULFITE_SEQUENCED" + }, + "doc": "Whether the SAM or BAM file consists of bisulfite sequenced reads. Default value: false. Possible values: {true, false}" + }, + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/max_insert_size", + "type": [ + "null", + "int" + ], + "inputBinding": { + "position": 0, + "prefix": "--MAX_INSERT_SIZE" + }, + "doc": "Paired-end reads above this insert size will be considered chimeric along with inter-chromosomal pairs. Default value: 100000." + }, + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/validation_stringency", + "type": [ + "null", + "string" + ], + "inputBinding": { + "position": 0, + "prefix": "--VALIDATION_STRINGENCY" + }, + "doc": "Validation stringency for all SAM files read by this program. Setting stringency to SILENT can improve performance when processing a BAM file in which variable-length data (read, qualities, tags) do not otherwise need to be decoded. Default value: STRICT. This option can be set to 'null' to clear the default value. Possible values: {STRICT,LENIENT, SILENT}" + }, + { + "default": true, + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/assume_sorted", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--ASSUME_SORTED" + }, + "doc": "If true (default), then the sort order in the header file will be ignored. Default value: true. This option can be set to 'null' to clear the default value. Possible values: {true, false}" + }, + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/stop_after", + "type": [ + "null", + "int" + ], + "inputBinding": { + "position": 0, + "prefix": "--STOP_AFTER" + }, + "doc": "Stop after processing N reads, mainly for debugging. Default value: 0. This option can be set to 'null' to clear the default value." + }, + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/create_index", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--CREATE_INDEX" + }, + "doc": "Whether to create a BAM index when writing a coordinate-sorted BAM file. Default value: false. Possible values: {true, false}" + }, + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/create_md5_file", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--CREATE_MD5_FILE" + }, + "doc": "Whether to create an MD5 digest for any BAM or FASTQ files created. Default value: false. Possible values: {true, false}" + }, + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/use_jdk_deflater", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--USE_JDK_DEFLATER" + }, + "doc": "Use the JDK Deflater instead of the Intel Deflater for writing compressed output" + }, + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/use_jdk_inflater", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--USE_JDK_INFLATER" + }, + "doc": "Use the JDK Inflater instead of the Intel Inflater for reading compressed input" + }, + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/temporary_directory", + "type": [ + "null", + "string" + ], + "doc": "Default value: null. This option may be specified 0 or more times." + } + ], + "outputs": [ + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/gatk_collect_alignment_summary_metrics_txt", + "type": "File", + "outputBinding": { + "glob": "${\n if (inputs.output_file_name){\n return inputs.output_file_name\n } else {\n return inputs.input.basename.replace(/.bam/, '_alignment_summary_metrics.txt')\n }\n}" + } + } + ], + "label": "GATK-CollectAlignmentSummaryMetrics", + "arguments": [ + { + "position": 0, + "prefix": "--java-options", + "valueFrom": "${\n if(inputs.memory_per_job && inputs.memory_overhead) {\n if(inputs.memory_per_job % 1000 == 0) {\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\"\n }\n }\n else if (inputs.memory_per_job && !inputs.memory_overhead){\n if(inputs.memory_per_job % 1000 == 0) {\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\"\n }\n }\n else if(!inputs.memory_per_job && inputs.memory_overhead){\n return \"-Xmx15G\"\n }\n else {\n return \"-Xmx15G\"\n }\n}" + }, + { + "position": 0, + "prefix": "--TMP_DIR", + "valueFrom": "${\n if(inputs.temporary_directory)\n return inputs.temporary_directory;\n return runtime.tmpdir\n}" + }, + { + "position": 0, + "prefix": "-O", + "valueFrom": "${\n if(inputs.output_file_name){\n return inputs.output_file_name\n } else {\n return inputs.input.basename.replace(/.bam/, '_alignment_summary_metrics.txt')\n }\n}" + } + ], + "requirements": [ + { + "class": "ResourceRequirement", + "ramMin": 32000, + "coresMin": 1 + }, + { + "class": "DockerRequirement", + "dockerPull": "ghcr.io/msk-access/gatk:4.1.8.0" + }, + { + "class": "InlineJavascriptRequirement" + } + ], + "http://purl.org/dc/terms/contributor": [ + { + "class": "http://xmlns.com/foaf/0.1/Organization", + "http://xmlns.com/foaf/0.1/member": [ + { + "class": "http://xmlns.com/foaf/0.1/Person", + "http://xmlns.com/foaf/0.1/mbox": "mailto:murphyc4@mskcc.org", + "http://xmlns.com/foaf/0.1/name": "Charles Murphy" + } + ], + "http://xmlns.com/foaf/0.1/name": "Memorial Sloan Kettering Cancer Center" + } + ], + "http://purl.org/dc/terms/creator": [ + { + "class": "http://xmlns.com/foaf/0.1/Organization", + "http://xmlns.com/foaf/0.1/member": [ + { + "class": "http://xmlns.com/foaf/0.1/Person", + "http://xmlns.com/foaf/0.1/mbox": "mailto:murphyc4@mskcc.org", + "http://xmlns.com/foaf/0.1/name": "Charles Murphy" + } + ], + "http://xmlns.com/foaf/0.1/name": "Memorial Sloan Kettering Cancer Center" + } + ], + "http://usefulinc.com/ns/doap#release": [ + { + "class": "http://usefulinc.com/ns/doap#Version", + "http://usefulinc.com/ns/doap#name": "gatk4", + "http://usefulinc.com/ns/doap#revision": "4.1.8.0" + } + ] + }, + { + "class": "CommandLineTool", + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl", + "baseCommand": [ + "gatk", + "CollectHsMetrics" + ], + "inputs": [ + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/input", + "type": "File", + "inputBinding": { + "position": 0, + "prefix": "-I" + }, + "doc": "An aligned SAM or BAM file. Required." + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/bait_intervals", + "type": "File", + "inputBinding": { + "position": 0, + "prefix": "--BAIT_INTERVALS" + }, + "doc": "An interval list file that contains the locations of the baits used. This argument must be specified at least once. Required." + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/target_intervals", + "type": "File", + "inputBinding": { + "position": 0, + "prefix": "--TARGET_INTERVALS" + }, + "doc": "An interval list file that contains the locations of the targets. This argument must be specified at least once. Required." + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/output_file_name", + "type": [ + "null", + "string" + ], + "doc": "The output file to write the metrics to. Required." + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/per_base_coverage", + "type": [ + "null", + "string" + ], + "doc": "An optional file to output per base coverage information to. The per-base file contains one line per target base and can grow very large. It is not recommended for use with large target sets. Default value: null." + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/per_target_coverage", + "type": [ + "null", + "string" + ], + "doc": "An optional file to output per target coverage information to. Default value: null." + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/theoretical_sensitivity_output", + "type": [ + "null", + "string" + ], + "inputBinding": { + "position": 0, + "prefix": "--THEORETICAL_SENSITIVITY_OUTPUT" + }, + "doc": "Output for Theoretical Sensitivity metrics where the allele fractions are provided by the ALLELE_FRACTION argument. Default value: null." + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/allele_fraction", + "type": [ + "null", + "float" + ], + "inputBinding": { + "position": 0, + "prefix": "--ALLELE_FRACTION" + }, + "doc": "Allele fraction for which to calculate theoretical sensitivity. This argument may be specified 0 or more times. Default value: [0.001, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2, 0.3, 0.5]." + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/bait_set_name", + "type": [ + "null", + "string" + ], + "inputBinding": { + "position": 0, + "prefix": "--BAIT_SET_NAME" + }, + "doc": "Bait set name. If not provided it is inferred from the filename of the bait intervals. Default value: null." + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/clip_overlapping_reads", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--CLIP_OVERLAPPING_READS" + }, + "doc": "True if we are to clip overlapping reads, false otherwise. Default value: true. Possible values: {true, false}" + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/coverage_cap", + "type": [ + "null", + "int" + ], + "inputBinding": { + "position": 0, + "prefix": "--COVERAGE_CAP" + }, + "doc": "Parameter to set a max coverage limit for Theoretical Sensitivity calculations. Default is 200. Default value: 200." + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/include_indels", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--INCLUDE_INDELS" + }, + "doc": "If true count inserted bases as on target and deleted bases as covered by a read. Default value: false. Possible values: {true, false}" + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/minimum_base_quality", + "type": [ + "null", + "int" + ], + "inputBinding": { + "position": 0, + "prefix": "--MINIMUM_BASE_QUALITY" + }, + "doc": "Minimum base quality for a base to contribute coverage. Default value: 20." + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/minimum_mapping_quality", + "type": [ + "null", + "int" + ], + "inputBinding": { + "position": 0, + "prefix": "--MINIMUM_MAPPING_QUALITY" + }, + "doc": "Minimum mapping quality for a read to contribute coverage. Default value: 20." + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/near_distance", + "type": [ + "null", + "int" + ], + "inputBinding": { + "position": 0, + "prefix": "--NEAR_DISTANCE" + }, + "doc": "The maximum distance between a read and the nearest probe/bait/amplicon for the read to be considered 'near probe' and included in percent selected. Default value: 250." + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/sample_size", + "type": [ + "null", + "int" + ], + "inputBinding": { + "position": 0, + "prefix": "--SAMPLE_SIZE" + }, + "doc": "Sample Size used for Theoretical Het Sensitivity sampling. Default is 10000. Default value: 10000." + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/reference", + "type": [ + "null", + "File" + ], + "inputBinding": { + "position": 0, + "prefix": "-R" + }, + "doc": "Reference sequence file. Note that while this argument is not required, without it only a small subset of the metrics will be calculated. Note also that if a reference sequence is provided, it must be accompanied by a sequence dictionary. Default value: null.", + "secondaryFiles": [ + "^.fasta.fai", + "^.dict" + ] + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/metrics_acciumulation_level", + "type": [ + "null", + "string" + ], + "inputBinding": { + "position": 0, + "prefix": "--METRIC_ACCUMULATION_LEVEL" + }, + "doc": "The level(s) at which to accumulate metrics. Default value: [ALL_READS]. This option can be set to 'null' to clear the default value. Possible values: {ALL_READS, SAMPLE, LIBRARY, READ_GROUP} This option may be specified 0 or more times. This option can be set to 'null' to clear the default list." + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/validation_stringency", + "type": [ + "null", + "string" + ], + "inputBinding": { + "position": 0, + "prefix": "--VALIDATION_STRINGENCY" + }, + "doc": "Validation stringency for all SAM files read by this program. Setting stringency to SILENT can improve performance when processing a BAM file in which variable-length data (read, qualities, tags) do not otherwise need to be decoded. Default value: STRICT. This option can be set to 'null' to clear the default value. Possible values: {STRICT,LENIENT, SILENT}" + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/create_index", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--CREATE_INDEX" + }, + "doc": "Whether to create a BAM index when writing a coordinate-sorted BAM file. Default value: false. Possible values: {true, false}" + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/create_md5_file", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--CREATE_MD5_FILE" + }, + "doc": "Whether to create an MD5 digest for any BAM or FASTQ files created. Default value: false. Possible values: {true, false}" + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/memory_per_job", + "type": [ + "null", + "int" + ], + "doc": "Memory per job in megabytes" + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/memory_overhead", + "type": [ + "null", + "int" + ], + "doc": "Memory overhead per job in megabytes" + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/number_of_threads", + "type": [ + "null", + "int" + ] + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/temporary_directory", + "type": [ + "null", + "string" + ], + "doc": "Default value: null. This option may be specified 0 or more times." + } + ], + "outputs": [ + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/gatk_collect_hs_metrics_txt", + "type": "File", + "outputBinding": { + "glob": "${\n if(inputs.output_file_name){\n return inputs.output_file_name\n } else {\n return inputs.input.basename.replace(/.bam/, '_hs_metrics.txt')\n }\n}" + } + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/gatk_collect_hs_metrics_per_base_coverage_txt", + "type": "File", + "outputBinding": { + "glob": "${\n if(inputs.per_base_coverage){\n return inputs.per_base_coverage\n } else {\n return inputs.input.basename.replace(/.bam/, '_per_base_coverage.txt')\n }\n}" + } + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/gatk_collect_hs_metrics_per_target_coverage_txt", + "type": "File", + "outputBinding": { + "glob": "${\n if(inputs.per_target_coverage){\n return inputs.per_target_coverage\n } else {\n return inputs.input.basename.replace(/.bam/, '_per_target_coverage.txt')\n }\n}" + } + } + ], + "label": "GATK-CollectHsMetrics", + "arguments": [ + { + "position": 0, + "prefix": "--java-options", + "valueFrom": "${\n if(inputs.memory_per_job && inputs.memory_overhead) {\n if(inputs.memory_per_job % 1000 == 0) {\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\"\n }\n }\n else if (inputs.memory_per_job && !inputs.memory_overhead){\n if(inputs.memory_per_job % 1000 == 0) {\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\"\n }\n }\n else if(!inputs.memory_per_job && inputs.memory_overhead){\n return \"-Xmx15G\"\n }\n else {\n return \"-Xmx15G\"\n }\n}" + }, + { + "position": 0, + "prefix": "--TMP_DIR", + "valueFrom": "${\n if(inputs.temporary_directory)\n return inputs.temporary_directory;\n return runtime.tmpdir\n}" + }, + { + "position": 0, + "prefix": "-O", + "valueFrom": "${\n if(inputs.output_file_name){\n return inputs.output_file_name\n } else {\n return inputs.input.basename.replace(/.bam/, '_hs_metrics.txt')\n }\n}" + }, + { + "position": 0, + "prefix": "--PER_TARGET_COVERAGE", + "valueFrom": "${\n if(inputs.per_target_coverage){\n return inputs.per_target_coverage\n } else {\n return inputs.input.basename.replace(/.bam/, '_per_target_coverage.txt')\n }\n}" + }, + { + "position": 0, + "prefix": "--PER_BASE_COVERAGE", + "valueFrom": "${\n if(inputs.per_base_coverage){\n return inputs.per_base_coverage\n } else {\n return inputs.input.basename.replace(/.bam/, '_per_base_coverage.txt')\n }\n}" + } + ], + "requirements": [ + { + "class": "ResourceRequirement", + "ramMin": 32000, + "coresMin": 1 + }, + { + "class": "DockerRequirement", + "dockerPull": "ghcr.io/msk-access/gatk:4.1.8.0" + }, + { + "class": "InlineJavascriptRequirement" + } + ], + "http://purl.org/dc/terms/contributor": [ + { + "class": "http://xmlns.com/foaf/0.1/Organization", + "http://xmlns.com/foaf/0.1/member": [ + { + "class": "http://xmlns.com/foaf/0.1/Person", + "http://xmlns.com/foaf/0.1/mbox": "mailto:murphyc4@mskcc.org", + "http://xmlns.com/foaf/0.1/name": "Charles Murphy" + } + ], + "http://xmlns.com/foaf/0.1/name": "Memorial Sloan Kettering Cancer Center" + } + ], + "http://purl.org/dc/terms/creator": [ + { + "class": "http://xmlns.com/foaf/0.1/Organization", + "http://xmlns.com/foaf/0.1/member": [ + { + "class": "http://xmlns.com/foaf/0.1/Person", + "http://xmlns.com/foaf/0.1/mbox": "mailto:murphyc4@mskcc.org", + "http://xmlns.com/foaf/0.1/name": "Charles Murphy" + } + ], + "http://xmlns.com/foaf/0.1/name": "Memorial Sloan Kettering Cancer Center" + } + ], + "http://usefulinc.com/ns/doap#release": [ + { + "class": "http://usefulinc.com/ns/doap#Version", + "http://usefulinc.com/ns/doap#name": "gatk4", + "http://usefulinc.com/ns/doap#revision": "4.1.8.0" + } + ] + }, + { + "class": "CommandLineTool", + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl", + "baseCommand": [ + "gatk", + "CollectInsertSizeMetrics" + ], + "inputs": [ + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/memory_per_job", + "type": [ + "null", + "int" + ], + "doc": "Memory per job in megabytes" + }, + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/memory_overhead", + "type": [ + "null", + "int" + ], + "doc": "Memory overhead per job in megabytes" + }, + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/number_of_threads", + "type": [ + "null", + "int" + ] + }, + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/input", + "type": "File", + "inputBinding": { + "position": 0, + "prefix": "-I" + }, + "doc": "Input file (bam or sam). Required." + }, + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/output_file_name", + "type": [ + "null", + "string" + ], + "doc": "File to write the output to. Required." + }, + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/histogram_file", + "type": [ + "null", + "string" + ], + "doc": "File to write insert size Histogram chart to. Required." + }, + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/deviations", + "type": [ + "null", + "float" + ], + "inputBinding": { + "position": 0, + "prefix": "--DEVIATIONS" + }, + "doc": "Generate mean, sd and plots by trimming the data down to MEDIAN + DEVIATIONS*MEDIAN_ABSOLUTE_DEVIATION. This is done because insert size data typically includes enough anomalous values from chimeras and other artifacts to make the mean and sd grossly misleading regarding the real distribution. Default value: 10.0. This option can be set to 'null' to clear the default value." + }, + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/histogram_width", + "type": [ + "null", + "int" + ], + "inputBinding": { + "position": 0, + "prefix": "--HISTOGRAM_WIDTH" + }, + "doc": "Explicitly sets the Histogram width, overriding automatic truncation of Histogram tail. Also, when calculating mean and standard deviation, only bins <= Histogram_WIDTH will be included. Default value: null." + }, + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/minimum_pct", + "type": [ + "null", + "float" + ], + "inputBinding": { + "position": 0, + "prefix": "--MINIMUM_PCT" + }, + "doc": "When generating the Histogram, discard any data categories (out of FR, TANDEM, RF) that have fewer than this percentage of overall reads. (Range: 0 to 1). Default value: 0.05. This option can be set to 'null' to clear the default value." + }, + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/metrics_acciumulation_level", + "type": [ + "null", + "string" + ], + "inputBinding": { + "position": 0, + "prefix": "--METRIC_ACCUMULATION_LEVEL" + }, + "doc": "The level(s) at which to accumulate metrics. Default value: [ALL_READS]. This option can be set to 'null' to clear the default value. Possible values: {ALL_READS, SAMPLE, LIBRARY, READ_GROUP} This option may be specified 0 or more times. This option can be set to 'null' to clear the default list." + }, + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/include_duplicates", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--INCLUDE_DUPLICATES" + }, + "doc": "If true, also include reads marked as duplicates in the insert size histogram. Default value: false. This option can be set to 'null' to clear the default value. Possible values: {true, false}" + }, + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/validation_stringency", + "type": [ + "null", + "string" + ], + "inputBinding": { + "position": 0, + "prefix": "--VALIDATION_STRINGENCY" + }, + "doc": "Validation stringency for all SAM files read by this program. Setting stringency to SILENT can improve performance when processing a BAM file in which variable-length data (read, qualities, tags) do not otherwise need to be decoded. Default value: STRICT. This option can be set to 'null' to clear the default value. Possible values: {STRICT,LENIENT, SILENT}" + }, + { + "default": true, + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/assume_sorted", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--ASSUME_SORTED" + }, + "doc": "If true (default), then the sort order in the header file will be ignored. Default value: true. This option can be set to 'null' to clear the default value. Possible values: {true, false}" + }, + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/stop_after", + "type": [ + "null", + "int" + ], + "inputBinding": { + "position": 0, + "prefix": "--STOP_AFTER" + }, + "doc": "Stop after processing N reads, mainly for debugging. Default value: 0. This option can be set to 'null' to clear the default value." + }, + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/create_index", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--CREATE_INDEX" + }, + "doc": "Whether to create a BAM index when writing a coordinate-sorted BAM file. Default value: false. Possible values: {true, false}" + }, + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/create_md5_file", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--CREATE_MD5_FILE" + }, + "doc": "Whether to create an MD5 digest for any BAM or FASTQ files created. Default value: false. Possible values: {true, false}" + }, + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/use_jdk_deflater", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--USE_JDK_DEFLATER" + }, + "doc": "Use the JDK Deflater instead of the Intel Deflater for writing compressed output" + }, + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/use_jdk_inflater", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--USE_JDK_INFLATER" + }, + "doc": "Use the JDK Inflater instead of the Intel Inflater for reading compressed input" + }, + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/temporary_directory", + "type": [ + "null", + "string" + ], + "doc": "Default value: null. This option may be specified 0 or more times." + } + ], + "outputs": [ + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/gatk_collect_insert_size_metrics_txt", + "type": "File", + "outputBinding": { + "glob": "${\n if(inputs.output_file_name){\n return inputs.output_file_name\n } else {\n return inputs.input.basename.replace(/.bam/, '_insert_size_metrics.txt')\n }\n}" + } + }, + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/gatk_collect_insert_size_metrics_histogram_pdf", + "type": "File", + "outputBinding": { + "glob": "${\n if(inputs.histogram_file){\n return inputs.histogram_file\n } else {\n return inputs.input.basename.replace(/.bam/, '_histogram.pdf')\n }\n}" + } + } + ], + "label": "GATK-CollectInsertSizeMetrics", + "arguments": [ + { + "position": 0, + "prefix": "--java-options", + "valueFrom": "${\n if(inputs.memory_per_job && inputs.memory_overhead) {\n if(inputs.memory_per_job % 1000 == 0) {\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\"\n }\n }\n else if (inputs.memory_per_job && !inputs.memory_overhead){\n if(inputs.memory_per_job % 1000 == 0) {\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\"\n }\n }\n else if(!inputs.memory_per_job && inputs.memory_overhead){\n return \"-Xmx15G\"\n }\n else {\n return \"-Xmx15G\"\n }\n}" + }, + { + "position": 0, + "prefix": "--TMP_DIR", + "valueFrom": "${\n if(inputs.temporary_directory)\n return inputs.temporary_directory;\n return runtime.tmpdir\n}" + }, + { + "position": 2, + "prefix": "-O", + "valueFrom": "${\n if(inputs.output_file_name){\n return inputs.output_file_name\n } else {\n return inputs.input.basename.replace(/.bam/, '_insert_size_metrics.txt')\n }\n}" + }, + { + "position": 2, + "prefix": "-H", + "valueFrom": "${\n if(inputs.histogram_file){\n return inputs.histogram_file\n } else {\n return inputs.input.basename.replace(/.bam/, '_histogram.pdf')\n }\n}" + } + ], + "requirements": [ + { + "class": "ResourceRequirement", + "ramMin": 32000, + "coresMin": 1 + }, + { + "class": "DockerRequirement", + "dockerPull": "ghcr.io/msk-access/gatk:4.1.8.0" + }, + { + "class": "InlineJavascriptRequirement" + } + ], + "http://purl.org/dc/terms/contributor": [ + { + "class": "http://xmlns.com/foaf/0.1/Organization", + "http://xmlns.com/foaf/0.1/member": [ + { + "class": "http://xmlns.com/foaf/0.1/Person", + "http://xmlns.com/foaf/0.1/mbox": "mailto:murphyc4@mskcc.org", + "http://xmlns.com/foaf/0.1/name": "Charles Murphy" + } + ], + "http://xmlns.com/foaf/0.1/name": "Memorial Sloan Kettering Cancer Center" + } + ], + "http://purl.org/dc/terms/creator": [ + { + "class": "http://xmlns.com/foaf/0.1/Organization", + "http://xmlns.com/foaf/0.1/member": [ + { + "class": "http://xmlns.com/foaf/0.1/Person", + "http://xmlns.com/foaf/0.1/mbox": "mailto:murphyc4@mskcc.org", + "http://xmlns.com/foaf/0.1/name": "Charles Murphy" + } + ], + "http://xmlns.com/foaf/0.1/name": "Memorial Sloan Kettering Cancer Center" + } + ], + "http://usefulinc.com/ns/doap#release": [ + { + "class": "http://usefulinc.com/ns/doap#Version", + "http://usefulinc.com/ns/doap#name": "gatk4", + "http://usefulinc.com/ns/doap#revision": "4.1.8.0" + } + ] + }, + { + "class": "CommandLineTool", + "id": "#getbasecountsmultisample_1.2.5.cwl", + "baseCommand": [ + "GetBaseCountsMultiSample" + ], + "inputs": [ + { + "id": "#getbasecountsmultisample_1.2.5.cwl/memory_per_job", + "type": [ + "null", + "int" + ], + "doc": "Memory per job in megabytes" + }, + { + "id": "#getbasecountsmultisample_1.2.5.cwl/memory_overhead", + "type": [ + "null", + "int" + ], + "doc": "Memory overhead per job in megabytes" + }, + { + "id": "#getbasecountsmultisample_1.2.5.cwl/number_of_threads", + "type": [ + "null", + "int" + ] + }, + { + "id": "#getbasecountsmultisample_1.2.5.cwl/genotyping_bams", + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "doc": "Input bam file" + }, + { + "id": "#getbasecountsmultisample_1.2.5.cwl/genotyping_bams_ids", + "type": [ + "string", + { + "type": "array", + "items": "string" + } + ], + "doc": "Input bam, sample identifier to be used for \"Tumor Sample Barcode\" for maf or Sample name in the header for vcf" + }, + { + "id": "#getbasecountsmultisample_1.2.5.cwl/filter_duplicate", + "type": "int", + "inputBinding": { + "position": 0, + "prefix": "--filter_duplicate" + }, + "doc": "Whether to filter reads that are marked as duplicate. 0=off, 1=on. Default 1" + }, + { + "id": "#getbasecountsmultisample_1.2.5.cwl/fragment_count", + "type": "int", + "inputBinding": { + "position": 0, + "prefix": "--fragment_count" + }, + "doc": "Whether to output fragment read counts DPF/RDF/ADF. 0=off, 1=on. Default 0" + }, + { + "id": "#getbasecountsmultisample_1.2.5.cwl/maf", + "type": "File", + "inputBinding": { + "position": 0, + "prefix": "--maf" + }, + "doc": "Input variant file in TCGA maf format. --maf or --vcf need to be specified at least once. But --maf and --vcf are mutually exclusive" + }, + { + "id": "#getbasecountsmultisample_1.2.5.cwl/maq", + "type": [ + "null", + "int" + ], + "inputBinding": { + "position": 0, + "prefix": "--maq" + }, + "doc": "Mapping quality threshold. Default 20" + }, + { + "id": "#getbasecountsmultisample_1.2.5.cwl/omaf", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--omaf" + }, + "doc": "Output the result in maf format" + }, + { + "id": "#getbasecountsmultisample_1.2.5.cwl/output", + "type": [ + "null", + "string" + ], + "inputBinding": { + "position": 0, + "prefix": "--output", + "valueFrom": "${\n if (inputs.output) {\n return inputs.output\n } else if (inputs.genotyping_bams.length) {\n return inputs.maf.basename.replace('.maf', '_fillout.maf')\n } else {\n return inputs.genotyping_bams.basename.replace('.bam', '_fillout.maf')\n }\n}" + }, + "doc": "Filename for output of raw fillout data in MAF/VCF format" + }, + { + "id": "#getbasecountsmultisample_1.2.5.cwl/ref_fasta", + "type": "File", + "inputBinding": { + "position": 0, + "prefix": "--fasta" + }, + "doc": "Input reference sequence file" + }, + { + "id": "#getbasecountsmultisample_1.2.5.cwl/vcf", + "type": [ + "null", + "File" + ], + "inputBinding": { + "position": 0, + "prefix": "--vcf" + }, + "doc": "Input variant file in vcf-like format(the first 5 columns are used). --maf or --vcf need to be specified at least once. But --maf and --vcf are mutually exclusive" + }, + { + "id": "#getbasecountsmultisample_1.2.5.cwl/generic_counting", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--generic_counting" + }, + "doc": "Use the newly implemented generic counting algorithm. Works better for complex variants. You may get different allele count result from the default counting algorithm" + } + ], + "outputs": [ + { + "id": "#getbasecountsmultisample_1.2.5.cwl/fillout", + "type": "File", + "outputBinding": { + "glob": "${\n if (inputs.output) {\n return inputs.output\n } else if (inputs.genotyping_bams.length) {\n return inputs.maf.basename.replace('.maf', '_fillout.maf')\n } else {\n return inputs.genotyping_bams.basename.replace('.bam', '_fillout.maf')\n }\n}" + } + } + ], + "label": "getbasecountsmultisample_1.2.5", + "arguments": [ + { + "position": 0, + "prefix": "", + "shellQuote": false, + "valueFrom": "$('--bam_fof bam_fof.tsv')\n" + }, + { + "position": 0, + "prefix": "--thread", + "valueFrom": "$(runtime.cores)" + } + ], + "requirements": [ + { + "class": "ShellCommandRequirement" + }, + { + "class": "ResourceRequirement", + "ramMin": 16000, + "coresMin": 2 + }, + { + "class": "DockerRequirement", + "dockerPull": "ghcr.io/msk-access/gbcms:1.2.5" + }, + { + "class": "InitialWorkDirRequirement", + "listing": [ + { + "entryname": "bam_fof.tsv", + "entry": "${\n if (typeof(inputs.genotyping_bams_ids) == 'object') {\n return inputs.genotyping_bams_ids.map(function(sid, i) {\n return sid + \"\\t\" +\n inputs.genotyping_bams[i].path\n }).join(\"\\n\")\n } else {\n return inputs.genotyping_bams_ids + \"\\t\" + inputs.genotyping_bams.path + \"\\n\"\n }\n}", + "writable": false + } + ] + }, + { + "class": "InlineJavascriptRequirement" + }, + { + "class": "StepInputExpressionRequirement" + } + ], + "http://purl.org/dc/terms/contributor": [ + { + "class": "http://xmlns.com/foaf/0.1/Organization", + "http://xmlns.com/foaf/0.1/member": [ + { + "class": "http://xmlns.com/foaf/0.1/Person", + "http://xmlns.com/foaf/0.1/mbox": "mailto:shahr2@mskcc.org", + "http://xmlns.com/foaf/0.1/name": "Ronak Shah" + } + ], + "http://xmlns.com/foaf/0.1/name": "Memorial Sloan Kettering Cancer Center" + } + ], + "http://purl.org/dc/terms/creator": [ + { + "class": "http://xmlns.com/foaf/0.1/Organization", + "http://xmlns.com/foaf/0.1/member": [ + { + "class": "http://xmlns.com/foaf/0.1/Person", + "http://xmlns.com/foaf/0.1/mbox": "mailto:johnsoni@mskcc.org", + "http://xmlns.com/foaf/0.1/name": "Ian Johnson" + } + ], + "http://xmlns.com/foaf/0.1/name": "Memorial Sloan Kettering Cancer Center" + } + ], + "http://usefulinc.com/ns/doap#release": [ + { + "class": "http://usefulinc.com/ns/doap#Version", + "http://usefulinc.com/ns/doap#name": "GetBaseCountsMultiSample", + "http://usefulinc.com/ns/doap#revision": "1.2.5" + } + ] + }, + { + "class": "Workflow", + "id": "#main", + "label": "qc_collapsed_bam", + "inputs": [ + { + "id": "#reference", + "type": "File", + "secondaryFiles": [ + "^.fasta.fai", + "^.dict" + ], + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 824.90625 + }, + { + "id": "#pool_b_target_intervals", + "type": "File", + "label": "pool_b_target_intervals", + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 1038.59375 + }, + { + "id": "#pool_a_target_intervals", + "type": "File", + "label": "pool_a_target_intervals", + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 1252.28125 + }, + { + "id": "#collapsed_bam", + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "label": "collapsed_bam", + "secondaryFiles": [ + "^.bai" + ], + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 2748.09375 + }, + { + "id": "#group_reads_by_umi_bam", + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "label": "group_reads_by_umi_bam", + "doc": "Input BAM file generated by GroupReadByUmi.", + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 2534.40625 + }, + { + "id": "#pool_a_bait_intervals", + "type": [ + "null", + "File" + ], + "label": "pool_a_bait_intervals", + "doc": "Optional set of intervals over which to restrict analysis. [Optional].", + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 1359.125 + }, + { + "id": "#pool_b_bait_intervals", + "type": [ + "null", + "File" + ], + "label": "pool_b_bait_intervals", + "doc": "Optional set of intervals over which to restrict analysis. [Optional].", + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 1145.4375 + }, + { + "id": "#json", + "type": [ + "null", + "boolean" + ], + "doc": "Also output data in JSON format.", + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 2107.03125 + }, + { + "id": "#plot", + "type": [ + "null", + "boolean" + ], + "doc": "Also output plots of the data.", + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 1572.8125 + }, + { + "id": "#minor_threshold", + "type": [ + "null", + "float" + ], + "doc": "Minor contamination threshold for bad sample.", + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 1679.65625 + }, + { + "id": "#coverage_threshold", + "type": [ + "null", + "int" + ], + "doc": "Samples with Y chromosome above this value will be considered male.", + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 2641.25 + }, + { + "id": "#hsmetrics_minimum_mapping_quality", + "type": [ + "null", + "int" + ], + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 2213.875 + }, + { + "id": "#hsmetrics_minimum_base_quality", + "type": [ + "null", + "int" + ], + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 2320.71875 + }, + { + "id": "#hsmetrics_coverage_cap", + "type": [ + "null", + "int" + ], + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 2427.5625 + }, + { + "id": "#prefix", + "type": [ + "null", + "string" + ], + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 931.75 + }, + { + "id": "#major_threshold", + "type": [ + "null", + "float" + ], + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 1786.5 + }, + { + "id": "#json_1", + "type": [ + "null", + "boolean" + ], + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 2000.1875 + }, + { + "id": "#vcf_file", + "type": "File", + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 397.53125 + }, + { + "id": "#sample_name", + "type": "string", + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 611.21875 + }, + { + "id": "#sample_sex", + "type": [ + "null", + "string" + ], + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 504.375 + }, + { + "id": "#sample_group", + "type": [ + "null", + "string" + ], + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 718.0625 + }, + { + "id": "#maf", + "type": "File", + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 1893.34375 + }, + { + "id": "#bed_file", + "type": [ + "null", + "File" + ], + "https://www.sevenbridges.com/x": -5.7914533615112305, + "https://www.sevenbridges.com/y": 1468.1177978515625 + } + ], + "outputs": [ + { + "id": "#fgbio_collect_duplex_seq_metrics_duplex_family_size_pool_a", + "outputSource": [ + "#fgbio_collect_duplex_seq_metrics_1_2_0/fgbio_collect_duplex_seq_metrics_duplex_family_size" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "label": "fgbio_collect_duplex_seq_metrics_duplex_family_size_pool_a", + "https://www.sevenbridges.com/x": 982.1435546875, + "https://www.sevenbridges.com/y": 2564.25 + }, + { + "id": "#fgbio_collect_duplex_seq_metrics_duplex_qc_pool_a", + "outputSource": [ + "#fgbio_collect_duplex_seq_metrics_1_2_0/fgbio_collect_duplex_seq_metrics_duplex_qc" + ], + "type": [ + "null", + "File", + { + "type": "array", + "items": "File" + } + ], + "label": "fgbio_collect_duplex_seq_metrics_duplex_qc_pool_a", + "https://www.sevenbridges.com/x": 982.1435546875, + "https://www.sevenbridges.com/y": 2243.71875 + }, + { + "id": "#fgbio_collect_duplex_seq_metrics_duplex_pool_a", + "outputSource": [ + "#fgbio_collect_duplex_seq_metrics_1_2_0/fgbio_collect_duplex_seq_metrics_duplex_umi_counts" + ], + "type": [ + "null", + "File", + { + "type": "array", + "items": "File" + } + ], + "label": "fgbio_collect_duplex_seq_metrics_duplex_pool_a", + "https://www.sevenbridges.com/x": 982.1435546875, + "https://www.sevenbridges.com/y": 2350.5625 + }, + { + "id": "#fgbio_collect_duplex_seq_metrics_duplex_yield_metrics_pool_a", + "outputSource": [ + "#fgbio_collect_duplex_seq_metrics_1_2_0/fgbio_collect_duplex_seq_metrics_duplex_yield_metrics" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "label": "fgbio_collect_duplex_seq_metrics_duplex_yield_metrics_pool_a", + "https://www.sevenbridges.com/x": 982.1435546875, + "https://www.sevenbridges.com/y": 1923.1875 + }, + { + "id": "#fgbio_collect_duplex_seq_metrics_family_size_pool_a", + "outputSource": [ + "#fgbio_collect_duplex_seq_metrics_1_2_0/fgbio_collect_duplex_seq_metrics_family_size" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "label": "fgbio_collect_duplex_seq_metrics_family_size_pool_a", + "https://www.sevenbridges.com/x": 982.1435546875, + "https://www.sevenbridges.com/y": 1709.5 + }, + { + "id": "#fgbio_collect_duplex_seq_metrics_umi_counts_pool_a", + "outputSource": [ + "#fgbio_collect_duplex_seq_metrics_1_2_0/fgbio_collect_duplex_seq_metrics_umi_counts" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "label": "fgbio_collect_duplex_seq_metrics_umi_counts_pool_a", + "https://www.sevenbridges.com/x": 982.1435546875, + "https://www.sevenbridges.com/y": 1495.8125 + }, + { + "id": "#fgbio_collect_duplex_seq_metrics_duplex_family_size_pool_b", + "outputSource": [ + "#fgbio_collect_duplex_seq_metrics_1_2_1/fgbio_collect_duplex_seq_metrics_duplex_family_size" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "label": "fgbio_collect_duplex_seq_metrics_duplex_family_size_pool_b", + "https://www.sevenbridges.com/x": 982.1435546875, + "https://www.sevenbridges.com/y": 2457.40625 + }, + { + "id": "#fgbio_collect_duplex_seq_metrics_duplex_qc_pool_b", + "outputSource": [ + "#fgbio_collect_duplex_seq_metrics_1_2_1/fgbio_collect_duplex_seq_metrics_duplex_qc" + ], + "type": [ + "null", + "File", + { + "type": "array", + "items": "File" + } + ], + "label": "fgbio_collect_duplex_seq_metrics_duplex_qc_pool_b", + "https://www.sevenbridges.com/x": 982.1435546875, + "https://www.sevenbridges.com/y": 2136.875 + }, + { + "id": "#fgbio_collect_duplex_seq_metrics_duplex_umi_counts_pool_b", + "outputSource": [ + "#fgbio_collect_duplex_seq_metrics_1_2_1/fgbio_collect_duplex_seq_metrics_duplex_umi_counts" + ], + "type": [ + "null", + "File", + { + "type": "array", + "items": "File" + } + ], + "label": "fgbio_collect_duplex_seq_metrics_duplex_umi_counts_pool_b", + "https://www.sevenbridges.com/x": 982.1435546875, + "https://www.sevenbridges.com/y": 2030.03125 + }, + { + "id": "#fgbio_collect_duplex_seq_metrics_duplex_yield_metrics_pool_b", + "outputSource": [ + "#fgbio_collect_duplex_seq_metrics_1_2_1/fgbio_collect_duplex_seq_metrics_duplex_yield_metrics" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "label": "fgbio_collect_duplex_seq_metrics_duplex_yield_metrics_pool_b", + "https://www.sevenbridges.com/x": 982.1435546875, + "https://www.sevenbridges.com/y": 1816.34375 + }, + { + "id": "#fgbio_collect_duplex_seq_metrics_family_size_pool_b", + "outputSource": [ + "#fgbio_collect_duplex_seq_metrics_1_2_1/fgbio_collect_duplex_seq_metrics_family_size" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "label": "fgbio_collect_duplex_seq_metrics_family_size_pool_b", + "https://www.sevenbridges.com/x": 982.1435546875, + "https://www.sevenbridges.com/y": 1602.65625 + }, + { + "id": "#fgbio_collect_duplex_seq_metrics_umi_counts_pool_b", + "outputSource": [ + "#fgbio_collect_duplex_seq_metrics_1_2_1/fgbio_collect_duplex_seq_metrics_umi_counts" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "label": "fgbio_collect_duplex_seq_metrics_umi_counts_pool_b", + "https://www.sevenbridges.com/x": 982.1435546875, + "https://www.sevenbridges.com/y": 1388.96875 + }, + { + "id": "#biometrics_minor_csv", + "outputSource": [ + "#biometrics_minor/biometrics_minor_csv" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "https://www.sevenbridges.com/x": 1547.1123046875, + "https://www.sevenbridges.com/y": 1679.65625 + }, + { + "id": "#biometrics_minor_json", + "outputSource": [ + "#biometrics_minor/biometrics_minor_json" + ], + "type": [ + "null", + "File", + { + "type": "array", + "items": "File" + } + ], + "https://www.sevenbridges.com/x": 1547.1123046875, + "https://www.sevenbridges.com/y": 1572.8125 + }, + { + "id": "#biometrics_minor_plot", + "outputSource": [ + "#biometrics_minor/biometrics_minor_plot" + ], + "type": [ + "null", + "File", + { + "type": "array", + "items": "File" + } + ], + "https://www.sevenbridges.com/x": 1547.1123046875, + "https://www.sevenbridges.com/y": 1465.96875 + }, + { + "id": "#biometrics_minor_sites_plot", + "outputSource": [ + "#biometrics_minor/biometrics_minor_sites_plot" + ], + "type": [ + "null", + "File", + { + "type": "array", + "items": "File" + } + ], + "https://www.sevenbridges.com/x": 1547.1123046875, + "https://www.sevenbridges.com/y": 1359.125 + }, + { + "id": "#biometrics_sexmismatch_json", + "outputSource": [ + "#biometrics_sexmismatch/biometrics_sexmismatch_json" + ], + "type": [ + "null", + "File", + { + "type": "array", + "items": "File" + } + ], + "https://www.sevenbridges.com/x": 1547.1123046875, + "https://www.sevenbridges.com/y": 1145.4375 + }, + { + "id": "#biometrics_sexmismatch_csv", + "outputSource": [ + "#biometrics_sexmismatch/biometrics_sexmismatch_csv" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "https://www.sevenbridges.com/x": 1547.1123046875, + "https://www.sevenbridges.com/y": 1252.28125 + }, + { + "id": "#gatk_collect_insert_size_metrics_txt_pool_b", + "outputSource": [ + "#bam_qc_stats_pool_b/gatk_collect_insert_size_metrics_txt" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "label": "gatk_collect_insert_size_metrics_txt_pool_b", + "https://www.sevenbridges.com/x": 982.1435546875, + "https://www.sevenbridges.com/y": 0 + }, + { + "id": "#gatk_collect_insert_size_metrics_histogram_pdf_pool_b", + "outputSource": [ + "#bam_qc_stats_pool_b/gatk_collect_insert_size_metrics_histogram_pdf" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "label": "gatk_collect_insert_size_metrics_histogram_pdf_pool_b", + "https://www.sevenbridges.com/x": 982.1435546875, + "https://www.sevenbridges.com/y": 213.6875 + }, + { + "id": "#gatk_collect_hs_metrics_txt_pool_b", + "outputSource": [ + "#bam_qc_stats_pool_b/gatk_collect_hs_metrics_txt" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "label": "gatk_collect_hs_metrics_txt_pool_b", + "https://www.sevenbridges.com/x": 982.1435546875, + "https://www.sevenbridges.com/y": 427.375 + }, + { + "id": "#gatk_collect_hs_metrics_per_target_coverage_txt_pool_b", + "outputSource": [ + "#bam_qc_stats_pool_b/gatk_collect_hs_metrics_per_target_coverage_txt" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "label": "gatk_collect_hs_metrics_per_target_coverage_txt_pool_b", + "https://www.sevenbridges.com/x": 982.1435546875, + "https://www.sevenbridges.com/y": 641.0625 + }, + { + "id": "#gatk_collect_hs_metrics_per_base_coverage_txt_pool_b", + "outputSource": [ + "#bam_qc_stats_pool_b/gatk_collect_hs_metrics_per_base_coverage_txt" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "label": "gatk_collect_hs_metrics_per_base_coverage_txt_pool_b", + "https://www.sevenbridges.com/x": 982.1435546875, + "https://www.sevenbridges.com/y": 854.75 + }, + { + "id": "#gatk_collect_alignment_summary_metrics_txt_pool_b", + "outputSource": [ + "#bam_qc_stats_pool_b/gatk_collect_alignment_summary_metrics_txt" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "label": "gatk_collect_alignment_summary_metrics_txt_pool_b", + "https://www.sevenbridges.com/x": 982.1435546875, + "https://www.sevenbridges.com/y": 1068.4375 + }, + { + "id": "#gatk_collect_insert_size_metrics_txt_pool_a", + "outputSource": [ + "#bam_qc_stats_pool_a/gatk_collect_insert_size_metrics_txt" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "label": "gatk_collect_insert_size_metrics_txt_pool_a", + "https://www.sevenbridges.com/x": 982.1435546875, + "https://www.sevenbridges.com/y": 106.84375 + }, + { + "id": "#gatk_collect_insert_size_metrics_histogram_pdf_pool_a", + "outputSource": [ + "#bam_qc_stats_pool_a/gatk_collect_insert_size_metrics_histogram_pdf" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "label": "gatk_collect_insert_size_metrics_histogram_pdf_pool_a", + "https://www.sevenbridges.com/x": 982.1435546875, + "https://www.sevenbridges.com/y": 320.53125 + }, + { + "id": "#gatk_collect_hs_metrics_txt_pool_a", + "outputSource": [ + "#bam_qc_stats_pool_a/gatk_collect_hs_metrics_txt" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "label": "gatk_collect_hs_metrics_txt_pool_a", + "https://www.sevenbridges.com/x": 982.1435546875, + "https://www.sevenbridges.com/y": 534.21875 + }, + { + "id": "#gatk_collect_hs_metrics_per_target_coverage_txt_pool_a", + "outputSource": [ + "#bam_qc_stats_pool_a/gatk_collect_hs_metrics_per_target_coverage_txt" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "label": "gatk_collect_hs_metrics_per_target_coverage_txt_pool_a", + "https://www.sevenbridges.com/x": 982.1435546875, + "https://www.sevenbridges.com/y": 747.90625 + }, + { + "id": "#gatk_collect_hs_metrics_per_base_coverage_txt_pool_a", + "outputSource": [ + "#bam_qc_stats_pool_a/gatk_collect_hs_metrics_per_base_coverage_txt" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "label": "gatk_collect_hs_metrics_per_base_coverage_txt_pool_a", + "https://www.sevenbridges.com/x": 982.1435546875, + "https://www.sevenbridges.com/y": 961.59375 + }, + { + "id": "#gatk_collect_alignment_summary_metrics_txt_pool_a", + "outputSource": [ + "#bam_qc_stats_pool_a/gatk_collect_alignment_summary_metrics_txt" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "label": "gatk_collect_alignment_summary_metrics_txt_pool_a", + "https://www.sevenbridges.com/x": 982.1435546875, + "https://www.sevenbridges.com/y": 1175.28125 + }, + { + "id": "#biometrics_major_plot", + "outputSource": [ + "#biometrics_major_0_2_13/biometrics_major_plot" + ], + "type": [ + "null", + "File" + ], + "https://www.sevenbridges.com/x": 1547.1123046875, + "https://www.sevenbridges.com/y": 1786.5 + }, + { + "id": "#biometrics_major_json", + "outputSource": [ + "#biometrics_major_0_2_13/biometrics_major_json" + ], + "type": [ + "null", + "File" + ], + "https://www.sevenbridges.com/x": 1547.1123046875, + "https://www.sevenbridges.com/y": 1893.34375 + }, + { + "id": "#biometrics_major_csv", + "outputSource": [ + "#biometrics_major_0_2_13/biometrics_major_csv" + ], + "type": "File", + "https://www.sevenbridges.com/x": 1547.1123046875, + "https://www.sevenbridges.com/y": 2000.1875 + }, + { + "id": "#biometrics_extract_pickle", + "outputSource": [ + "#biometrics_extract_0_2_13/biometrics_extract_pickle" + ], + "type": "File", + "https://www.sevenbridges.com/x": 982.1435546875, + "https://www.sevenbridges.com/y": 3145.625 + }, + { + "id": "#fillout_maf", + "outputSource": [ + "#getbasecountsmultisample_1_2_5/fillout" + ], + "type": "File", + "https://www.sevenbridges.com/x": 982.1435546875, + "https://www.sevenbridges.com/y": 1282.125 + } + ], + "steps": [ + { + "id": "#bam_qc_stats_pool_b", + "in": [ + { + "id": "#bam_qc_stats_pool_b/input", + "source": [ + "#collapsed_bam" + ] + }, + { + "id": "#bam_qc_stats_pool_b/target_intervals", + "source": "#pool_b_target_intervals" + }, + { + "id": "#bam_qc_stats_pool_b/bait_intervals", + "source": "#pool_b_bait_intervals" + }, + { + "id": "#bam_qc_stats_pool_b/reference", + "source": "#reference" + }, + { + "id": "#bam_qc_stats_pool_b/hsmetrics_minimum_mapping_quality", + "source": "#hsmetrics_minimum_mapping_quality" + }, + { + "id": "#bam_qc_stats_pool_b/hsmetrics_minimum_base_quality", + "source": "#hsmetrics_minimum_base_quality" + }, + { + "id": "#bam_qc_stats_pool_b/hsmetrics_coverage_cap", + "source": "#hsmetrics_coverage_cap" + } + ], + "out": [ + { + "id": "#bam_qc_stats_pool_b/gatk_collect_insert_size_metrics_histogram_pdf" + }, + { + "id": "#bam_qc_stats_pool_b/gatk_collect_insert_size_metrics_txt" + }, + { + "id": "#bam_qc_stats_pool_b/gatk_collect_hs_metrics_txt" + }, + { + "id": "#bam_qc_stats_pool_b/gatk_collect_hs_metrics_per_base_coverage_txt" + }, + { + "id": "#bam_qc_stats_pool_b/gatk_collect_hs_metrics_per_target_coverage_txt" + }, + { + "id": "#bam_qc_stats_pool_b/gatk_collect_alignment_summary_metrics_txt" + } + ], + "run": "#bam_qc_stats.cwl", + "label": "bam_qc_stats_pool_b", + "https://www.sevenbridges.com/x": 351.4375, + "https://www.sevenbridges.com/y": 1796.078125 + }, + { + "id": "#bam_qc_stats_pool_a", + "in": [ + { + "id": "#bam_qc_stats_pool_a/input", + "source": [ + "#collapsed_bam" + ] + }, + { + "id": "#bam_qc_stats_pool_a/target_intervals", + "source": "#pool_a_target_intervals" + }, + { + "id": "#bam_qc_stats_pool_a/bait_intervals", + "source": "#pool_a_bait_intervals" + }, + { + "id": "#bam_qc_stats_pool_a/reference", + "source": "#reference" + }, + { + "id": "#bam_qc_stats_pool_a/hsmetrics_minimum_mapping_quality", + "source": "#hsmetrics_minimum_mapping_quality" + }, + { + "id": "#bam_qc_stats_pool_a/hsmetrics_minimum_base_quality", + "source": "#hsmetrics_minimum_base_quality" + }, + { + "id": "#bam_qc_stats_pool_a/hsmetrics_coverage_cap", + "source": "#hsmetrics_coverage_cap" + } + ], + "out": [ + { + "id": "#bam_qc_stats_pool_a/gatk_collect_insert_size_metrics_histogram_pdf" + }, + { + "id": "#bam_qc_stats_pool_a/gatk_collect_insert_size_metrics_txt" + }, + { + "id": "#bam_qc_stats_pool_a/gatk_collect_hs_metrics_txt" + }, + { + "id": "#bam_qc_stats_pool_a/gatk_collect_hs_metrics_per_base_coverage_txt" + }, + { + "id": "#bam_qc_stats_pool_a/gatk_collect_hs_metrics_per_target_coverage_txt" + }, + { + "id": "#bam_qc_stats_pool_a/gatk_collect_alignment_summary_metrics_txt" + } + ], + "run": "#bam_qc_stats.cwl", + "label": "bam_qc_stats_pool_a", + "https://www.sevenbridges.com/x": 351.4375, + "https://www.sevenbridges.com/y": 1986.921875 + }, + { + "id": "#fgbio_collect_duplex_seq_metrics_1_2_0", + "in": [ + { + "id": "#fgbio_collect_duplex_seq_metrics_1_2_0/input", + "source": "#group_reads_by_umi_bam" + }, + { + "id": "#fgbio_collect_duplex_seq_metrics_1_2_0/intervals", + "source": "#pool_a_bait_intervals" + } + ], + "out": [ + { + "id": "#fgbio_collect_duplex_seq_metrics_1_2_0/fgbio_collect_duplex_seq_metrics_family_size" + }, + { + "id": "#fgbio_collect_duplex_seq_metrics_1_2_0/fgbio_collect_duplex_seq_metrics_duplex_family_size" + }, + { + "id": "#fgbio_collect_duplex_seq_metrics_1_2_0/fgbio_collect_duplex_seq_metrics_duplex_yield_metrics" + }, + { + "id": "#fgbio_collect_duplex_seq_metrics_1_2_0/fgbio_collect_duplex_seq_metrics_umi_counts" + }, + { + "id": "#fgbio_collect_duplex_seq_metrics_1_2_0/fgbio_collect_duplex_seq_metrics_duplex_qc" + }, + { + "id": "#fgbio_collect_duplex_seq_metrics_1_2_0/fgbio_collect_duplex_seq_metrics_duplex_umi_counts" + } + ], + "run": "#fgbio_collect_duplex_seq_metrics_1.2.0.cwl", + "label": "fgbio_collect_duplex_seq_metrics_1.2.0", + "https://www.sevenbridges.com/x": 351.4375, + "https://www.sevenbridges.com/y": 1435.390625 + }, + { + "id": "#fgbio_collect_duplex_seq_metrics_1_2_1", + "in": [ + { + "id": "#fgbio_collect_duplex_seq_metrics_1_2_1/input", + "source": "#group_reads_by_umi_bam" + }, + { + "id": "#fgbio_collect_duplex_seq_metrics_1_2_1/intervals", + "source": "#pool_b_bait_intervals" + } + ], + "out": [ + { + "id": "#fgbio_collect_duplex_seq_metrics_1_2_1/fgbio_collect_duplex_seq_metrics_family_size" + }, + { + "id": "#fgbio_collect_duplex_seq_metrics_1_2_1/fgbio_collect_duplex_seq_metrics_duplex_family_size" + }, + { + "id": "#fgbio_collect_duplex_seq_metrics_1_2_1/fgbio_collect_duplex_seq_metrics_duplex_yield_metrics" + }, + { + "id": "#fgbio_collect_duplex_seq_metrics_1_2_1/fgbio_collect_duplex_seq_metrics_umi_counts" + }, + { + "id": "#fgbio_collect_duplex_seq_metrics_1_2_1/fgbio_collect_duplex_seq_metrics_duplex_qc" + }, + { + "id": "#fgbio_collect_duplex_seq_metrics_1_2_1/fgbio_collect_duplex_seq_metrics_duplex_umi_counts" + } + ], + "run": "#fgbio_collect_duplex_seq_metrics_1.2.0.cwl", + "label": "fgbio_collect_duplex_seq_metrics_1.2.0", + "https://www.sevenbridges.com/x": 351.4375, + "https://www.sevenbridges.com/y": 1258.546875 + }, + { + "id": "#biometrics_minor", + "in": [ + { + "id": "#biometrics_minor/input", + "linkMerge": "merge_nested", + "source": [ + "#biometrics_extract_0_2_13/biometrics_extract_pickle" + ] + }, + { + "id": "#biometrics_minor/minor_threshold", + "source": "#minor_threshold" + }, + { + "id": "#biometrics_minor/prefix", + "default": "collapsed", + "source": "#prefix" + }, + { + "id": "#biometrics_minor/plot", + "default": false, + "source": "#plot" + }, + { + "id": "#biometrics_minor/json", + "default": true, + "source": "#json" + } + ], + "out": [ + { + "id": "#biometrics_minor/biometrics_minor_csv" + }, + { + "id": "#biometrics_minor/biometrics_minor_json" + }, + { + "id": "#biometrics_minor/biometrics_minor_plot" + }, + { + "id": "#biometrics_minor/biometrics_minor_sites_plot" + } + ], + "run": "#biometrics_minor.cwl", + "https://www.sevenbridges.com/x": 982.1435546875, + "https://www.sevenbridges.com/y": 2847.9375 + }, + { + "id": "#biometrics_sexmismatch", + "in": [ + { + "id": "#biometrics_sexmismatch/input", + "linkMerge": "merge_flattened", + "source": [ + "#biometrics_extract_0_2_13/biometrics_extract_pickle" + ] + }, + { + "id": "#biometrics_sexmismatch/coverage_threshold", + "source": "#coverage_threshold" + }, + { + "id": "#biometrics_sexmismatch/prefix", + "default": "collapsed", + "source": "#prefix" + }, + { + "id": "#biometrics_sexmismatch/json", + "source": "#json" + } + ], + "out": [ + { + "id": "#biometrics_sexmismatch/biometrics_sexmismatch_csv" + }, + { + "id": "#biometrics_sexmismatch/biometrics_sexmismatch_json" + } + ], + "run": "#biometrics_sexmismatch.cwl", + "https://www.sevenbridges.com/x": 982.1435546875, + "https://www.sevenbridges.com/y": 2692.09375 + }, + { + "id": "#biometrics_major_0_2_13", + "in": [ + { + "id": "#biometrics_major_0_2_13/input", + "linkMerge": "merge_nested", + "source": [ + "#biometrics_extract_0_2_13/biometrics_extract_pickle" + ] + }, + { + "id": "#biometrics_major_0_2_13/major_threshold", + "source": "#major_threshold" + }, + { + "id": "#biometrics_major_0_2_13/prefix", + "source": "#prefix" + }, + { + "id": "#biometrics_major_0_2_13/plot", + "source": "#plot" + }, + { + "id": "#biometrics_major_0_2_13/json", + "source": "#json_1" + } + ], + "out": [ + { + "id": "#biometrics_major_0_2_13/biometrics_major_csv" + }, + { + "id": "#biometrics_major_0_2_13/biometrics_major_json" + }, + { + "id": "#biometrics_major_0_2_13/biometrics_major_plot" + } + ], + "run": "#biometrics_major.cwl", + "https://www.sevenbridges.com/x": 982.1435546875, + "https://www.sevenbridges.com/y": 3010.78125 + }, + { + "id": "#biometrics_extract_0_2_13", + "in": [ + { + "id": "#biometrics_extract_0_2_13/sample_bam", + "source": "#collapsed_bam" + }, + { + "id": "#biometrics_extract_0_2_13/sample_sex", + "source": "#sample_sex" + }, + { + "id": "#biometrics_extract_0_2_13/sample_group", + "source": "#sample_group" + }, + { + "id": "#biometrics_extract_0_2_13/sample_name", + "source": "#sample_name" + }, + { + "id": "#biometrics_extract_0_2_13/fafile", + "source": "#reference" + }, + { + "id": "#biometrics_extract_0_2_13/vcf_file", + "source": "#vcf_file" + }, + { + "id": "#biometrics_extract_0_2_13/bed_file", + "source": "#bed_file" + } + ], + "out": [ + { + "id": "#biometrics_extract_0_2_13/biometrics_extract_pickle" + } + ], + "run": "#biometrics_extract.cwl", + "https://www.sevenbridges.com/x": 351.4375, + "https://www.sevenbridges.com/y": 1612.234375 + }, + { + "id": "#getbasecountsmultisample_1_2_5", + "in": [ + { + "id": "#getbasecountsmultisample_1_2_5/genotyping_bams", + "source": [ + "#collapsed_bam" + ] + }, + { + "id": "#getbasecountsmultisample_1_2_5/genotyping_bams_ids", + "source": [ + "#sample_name" + ] + }, + { + "id": "#getbasecountsmultisample_1_2_5/filter_duplicate", + "default": 0 + }, + { + "id": "#getbasecountsmultisample_1_2_5/fragment_count", + "default": 1 + }, + { + "id": "#getbasecountsmultisample_1_2_5/maf", + "source": "#maf" + }, + { + "id": "#getbasecountsmultisample_1_2_5/output", + "source": "#sample_name", + "valueFrom": "$(self + '_collapsed_hotspots_fillout.maf')" + }, + { + "id": "#getbasecountsmultisample_1_2_5/ref_fasta", + "source": "#reference" + } + ], + "out": [ + { + "id": "#getbasecountsmultisample_1_2_5/fillout" + } + ], + "run": "#getbasecountsmultisample_1.2.5.cwl", + "label": "getbasecountsmultisample_1.2.5", + "https://www.sevenbridges.com/x": 351.4375, + "https://www.sevenbridges.com/y": 1102.703125 + } + ], + "requirements": [ + { + "class": "SubworkflowFeatureRequirement" + }, + { + "class": "InlineJavascriptRequirement" + } + ] + } + ], + "cwlVersion": "v1.0", + "$schemas": [ + "http://schema.org/version/latest/schemaorg-current-http.rdf" + ] +} \ No newline at end of file diff --git a/qc_duplex_bam/README.md b/qc_duplex_bam/README.md new file mode 100644 index 0000000..3dd2e15 --- /dev/null +++ b/qc_duplex_bam/README.md @@ -0,0 +1,70 @@ +### Introduction +The sub-workflow calculates quality control metrics for duplex BAMs. The main outputs are the following: + +1. Targeted capture metrics. +2. Insert size metrics. +3. Alignment metrics. +4. Extracted genotype information used for fingerprinting and contamination estimation. +5. Genotype metrics to be used for hotspot mutation metrics. + +**Note:** This sub-workflow was originally designed for MSK-ACCESS data. Hence, in addition to the collapsed BAM, it expects two sets of bait/target regions (referred to as pool A and pool B for MSK-ACCESS). + +### Tools used: + +- [GetBaseCountsMultiSample](../command_line_tools/getbasecountsmultisample/1.2.5) +- [bam_qc_stats](../bam_qc_stats/README.md) +- [sequence_qc](https://msk-access.gitbook.io/sequence-qc/) +- [Biometrics](https://msk-access.gitbook.io/biometrics/) + +### Usage + +```bash +usage: qc_duplex_bam.cwl [-h] --reference REFERENCE --pool_a_target_intervals + POOL_A_TARGET_INTERVALS --pool_a_bait_intervals + POOL_A_BAIT_INTERVALS --pool_b_target_intervals + POOL_B_TARGET_INTERVALS --pool_b_bait_intervals + POOL_B_BAIT_INTERVALS --noise_sites_bed + NOISE_SITES_BED [--plot] [--json] + [--sequence_qc_min_basq SEQUENCE_QC_MIN_BASQ] + [--sequence_qc_min_mapq SEQUENCE_QC_MIN_MAPQ] + [--sequence_qc_threshold SEQUENCE_QC_THRESHOLD] + [--sequence_qc_truncate SEQUENCE_QC_TRUNCATE] + [--hsmetrics_minimum_mapping_quality HSMETRICS_MINIMUM_MAPPING_QUALITY] + [--hsmetrics_minimum_base_quality HSMETRICS_MINIMUM_BASE_QUALITY] + [--hsmetrics_coverage_cap HSMETRICS_COVERAGE_CAP] + [--prefix PREFIX] [--major_threshold MAJOR_THRESHOLD] + --vcf_file VCF_FILE [--sample_sex SAMPLE_SEX] + [--sample_group SAMPLE_GROUP] --maf MAF + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --reference REFERENCE + Path to reference fasta, containing all regions in + bed_file + --pool_a_target_intervals POOL_A_TARGET_INTERVALS + --pool_a_bait_intervals POOL_A_BAIT_INTERVALS + --pool_b_target_intervals POOL_B_TARGET_INTERVALS + --pool_b_bait_intervals POOL_B_BAIT_INTERVALS + --noise_sites_bed NOISE_SITES_BED + Path to BED file containing regions over which to + calculate noise [required] + --plot Also output plots of the data. + --json Also output data in JSON format. + --sequence_qc_min_basq SEQUENCE_QC_MIN_BASQ + --sequence_qc_min_mapq SEQUENCE_QC_MIN_MAPQ + --sequence_qc_threshold SEQUENCE_QC_THRESHOLD + --sequence_qc_truncate SEQUENCE_QC_TRUNCATE + --hsmetrics_minimum_mapping_quality HSMETRICS_MINIMUM_MAPPING_QUALITY + --hsmetrics_minimum_base_quality HSMETRICS_MINIMUM_BASE_QUALITY + --hsmetrics_coverage_cap HSMETRICS_COVERAGE_CAP + --prefix PREFIX + --major_threshold MAJOR_THRESHOLD + --vcf_file VCF_FILE + --sample_sex SAMPLE_SEX + --sample_group SAMPLE_GROUP + --maf MAF +``` diff --git a/qc_duplex_bam/qc_duplex_bam.cwl b/qc_duplex_bam/qc_duplex_bam.cwl new file mode 100644 index 0000000..0130c62 --- /dev/null +++ b/qc_duplex_bam/qc_duplex_bam.cwl @@ -0,0 +1,537 @@ +class: Workflow +cwlVersion: v1.0 +id: qc_duplex +label: qc_duplex +$namespaces: + sbg: 'https://www.sevenbridges.com/' +inputs: + - id: reference + type: File + doc: 'Path to reference fasta, containing all regions in bed_file' + secondaryFiles: + - ^.fasta.fai + 'sbg:x': 0 + 'sbg:y': 903.75 + - id: duplex_bam + type: + - File + - type: array + items: File + label: duplex_bam + secondaryFiles: + - ^.bai + 'sbg:x': 0 + 'sbg:y': 2399.5625 + - id: pool_a_target_intervals + type: File + label: pool_a_target_intervals + 'sbg:x': 0 + 'sbg:y': 1331.125 + - id: pool_a_bait_intervals + type: File + label: pool_a_bait_intervals + 'sbg:x': 0 + 'sbg:y': 1437.96875 + - id: pool_b_target_intervals + type: File + label: pool_b_target_intervals + 'sbg:x': 0 + 'sbg:y': 1117.4375 + - id: pool_b_bait_intervals + type: File + label: pool_b_bait_intervals + 'sbg:x': 0 + 'sbg:y': 1224.28125 + - id: noise_sites_bed + type: File + label: noise_sites_bed + doc: >- + Path to BED file containing regions over which to calculate noise + [required] + 'sbg:x': 0 + 'sbg:y': 1651.65625 + - id: sample_name + type: + - 'null' + - string + - type: array + items: string + doc: >- + Sample name. If not specified, sample name is automatically figured out + from the BAM file. + 'sbg:x': 0 + 'sbg:y': 690.0625 + - id: plot + type: boolean? + doc: Also output plots of the data. + 'sbg:x': 0 + 'sbg:y': 1544.8125 + - id: json + type: boolean? + doc: Also output data in JSON format. + 'sbg:x': 0 + 'sbg:y': 1972.1875 + - id: sequence_qc_min_basq + type: int? + 'sbg:x': 0 + 'sbg:y': 476.375 + - id: sequence_qc_min_mapq + type: int? + 'sbg:x': 0 + 'sbg:y': 369.53125 + - id: sequence_qc_threshold + type: float? + 'sbg:x': 0 + 'sbg:y': 262.6875 + - id: sequence_qc_truncate + type: int? + 'sbg:x': 0 + 'sbg:y': 155.84375 + - id: hsmetrics_minimum_mapping_quality + type: int? + 'sbg:x': 0 + 'sbg:y': 2079.03125 + - id: hsmetrics_minimum_base_quality + type: int? + 'sbg:x': 0 + 'sbg:y': 2185.875 + - id: hsmetrics_coverage_cap + type: int? + 'sbg:x': 0 + 'sbg:y': 2292.71875 + - id: prefix + type: string? + 'sbg:x': 0 + 'sbg:y': 1010.59375 + - id: major_threshold + type: float? + 'sbg:x': 0 + 'sbg:y': 1758.5 + - id: vcf_file + type: File + 'sbg:x': 0 + 'sbg:y': 49 + - id: sample_sex + type: string? + 'sbg:x': 0 + 'sbg:y': 583.21875 + - id: sample_group + type: string? + 'sbg:x': 0 + 'sbg:y': 796.90625 + - id: maf + type: File + 'sbg:x': 0 + 'sbg:y': 1865.34375 +outputs: + - id: sequence_qc_noise_positions + outputSource: + - calculate_noise/sequence_qc_noise_positions + type: + - File + - type: array + items: File + 'sbg:x': 982.1435546875 + 'sbg:y': 106.84375 + - id: sequence_qc_noise_n + outputSource: + - calculate_noise/sequence_qc_noise_n + type: + - File + - type: array + items: File + 'sbg:x': 982.1435546875 + 'sbg:y': 213.6875 + - id: sequence_qc_noise_del + outputSource: + - calculate_noise/sequence_qc_noise_del + type: + - File + - type: array + items: File + 'sbg:x': 982.1435546875 + 'sbg:y': 320.53125 + - id: sequence_qc_noise_acgt + outputSource: + - calculate_noise/sequence_qc_noise_acgt + type: + - File + - type: array + items: File + 'sbg:x': 982.1435546875 + 'sbg:y': 534.21875 + - id: sequence_qc_figures + outputSource: + - calculate_noise/sequence_qc_figures + type: + - File + - type: array + items: File + 'sbg:x': 982.1435546875 + 'sbg:y': 641.0625 + - id: gatk_collect_alignment_summary_metrics_txt_pool_b + outputSource: + - bam_qc_stats_pool_b/gatk_collect_alignment_summary_metrics_txt + type: + - File + - type: array + items: File + label: gatk_collect_alignment_summary_metrics_txt_pool_b + 'sbg:x': 982.1435546875 + 'sbg:y': 1816.34375 + - id: gatk_collect_hs_metrics_per_base_coverage_txt_pool_b + outputSource: + - bam_qc_stats_pool_b/gatk_collect_hs_metrics_per_base_coverage_txt + type: + - File + - type: array + items: File + label: gatk_collect_hs_metrics_per_base_coverage_txt_pool_b + 'sbg:x': 982.1435546875 + 'sbg:y': 1602.65625 + - id: gatk_collect_hs_metrics_per_target_coverage_txt_pool_b + outputSource: + - bam_qc_stats_pool_b/gatk_collect_hs_metrics_per_target_coverage_txt + type: + - File + - type: array + items: File + label: gatk_collect_hs_metrics_per_target_coverage_txt_pool_b + 'sbg:x': 982.1435546875 + 'sbg:y': 1388.96875 + - id: gatk_collect_hs_metrics_txt_pool_b + outputSource: + - bam_qc_stats_pool_b/gatk_collect_hs_metrics_txt + type: + - File + - type: array + items: File + label: gatk_collect_hs_metrics_txt_pool_b + 'sbg:x': 982.1435546875 + 'sbg:y': 1175.28125 + - id: gatk_collect_insert_size_metrics_histogram_pdf_pool_b + outputSource: + - bam_qc_stats_pool_b/gatk_collect_insert_size_metrics_histogram_pdf + type: + - File + - type: array + items: File + label: gatk_collect_insert_size_metrics_histogram_pdf_pool_b + 'sbg:x': 982.1435546875 + 'sbg:y': 961.59375 + - id: gatk_collect_insert_size_metrics_txt_pool_b + outputSource: + - bam_qc_stats_pool_b/gatk_collect_insert_size_metrics_txt + type: + - File + - type: array + items: File + label: gatk_collect_insert_size_metrics_txt_pool_b + 'sbg:x': 982.1435546875 + 'sbg:y': 747.90625 + - id: gatk_collect_alignment_summary_metrics_txt_pool_a + outputSource: + - bam_qc_stats_pool_a/gatk_collect_alignment_summary_metrics_txt + type: + - File + - type: array + items: File + label: gatk_collect_alignment_summary_metrics_txt_pool_a + 'sbg:x': 982.1435546875 + 'sbg:y': 1923.1875 + - id: gatk_collect_hs_metrics_per_base_coverage_txt_pool_a + outputSource: + - bam_qc_stats_pool_a/gatk_collect_hs_metrics_per_base_coverage_txt + type: + - File + - type: array + items: File + label: gatk_collect_hs_metrics_per_base_coverage_txt_pool_a + 'sbg:x': 982.1435546875 + 'sbg:y': 1709.5 + - id: gatk_collect_hs_metrics_per_target_coverage_txt_pool_a + outputSource: + - bam_qc_stats_pool_a/gatk_collect_hs_metrics_per_target_coverage_txt + type: + - File + - type: array + items: File + label: gatk_collect_hs_metrics_per_target_coverage_txt_pool_a + 'sbg:x': 982.1435546875 + 'sbg:y': 1495.8125 + - id: gatk_collect_hs_metrics_txt_pool_a + outputSource: + - bam_qc_stats_pool_a/gatk_collect_hs_metrics_txt + type: + - File + - type: array + items: File + label: gatk_collect_hs_metrics_txt_pool_a + 'sbg:x': 982.1435546875 + 'sbg:y': 1282.125 + - id: gatk_collect_insert_size_metrics_histogram_pdf_pool_a + outputSource: + - bam_qc_stats_pool_a/gatk_collect_insert_size_metrics_histogram_pdf + type: + - File + - type: array + items: File + label: gatk_collect_insert_size_metrics_histogram_pdf_pool_a + 'sbg:x': 982.1435546875 + 'sbg:y': 1068.4375 + - id: gatk_collect_insert_size_metrics_txt_pool_a + outputSource: + - bam_qc_stats_pool_a/gatk_collect_insert_size_metrics_txt + type: + - File + - type: array + items: File + label: gatk_collect_insert_size_metrics_txt_pool_a + 'sbg:x': 982.1435546875 + 'sbg:y': 854.75 + - id: sequence_qc_pileup + outputSource: + - calculate_noise/sequence_qc_pileup + type: + - File + - type: array + items: File + 'sbg:x': 982.1435546875 + 'sbg:y': 0 + - id: sequence_qc_noise_by_substitution + outputSource: + - calculate_noise/sequence_qc_noise_by_substitution + type: File + 'sbg:x': 982.1435546875 + 'sbg:y': 427.375 + - id: biometrics_major_plot + outputSource: + - biometrics_major_0_2_13/biometrics_major_plot + type: File? + 'sbg:x': 1495.5341796875 + 'sbg:y': 1331.125 + - id: biometrics_major_json + outputSource: + - biometrics_major_0_2_13/biometrics_major_json + type: File? + 'sbg:x': 1495.5341796875 + 'sbg:y': 1437.96875 + - id: biometrics_major_csv + outputSource: + - biometrics_major_0_2_13/biometrics_major_csv + type: File + 'sbg:x': 1495.5341796875 + 'sbg:y': 1544.8125 + - id: biometrics_extract_pickle + outputSource: + - biometrics_extract_0_2_13/biometrics_extract_pickle + type: File + 'sbg:x': 982.1435546875 + 'sbg:y': 2448.5625 + - id: biometrics_minor_sites_plot + outputSource: + - biometrics_minor_0_2_13/biometrics_minor_sites_plot + type: File? + 'sbg:x': 1495.5341796875 + 'sbg:y': 903.75 + - id: biometrics_minor_plot + outputSource: + - biometrics_minor_0_2_13/biometrics_minor_plot + type: File? + 'sbg:x': 1495.5341796875 + 'sbg:y': 1010.59375 + - id: biometrics_minor_json + outputSource: + - biometrics_minor_0_2_13/biometrics_minor_json + type: File? + 'sbg:x': 1495.5341796875 + 'sbg:y': 1117.4375 + - id: biometrics_minor_csv + outputSource: + - biometrics_minor_0_2_13/biometrics_minor_csv + type: File + 'sbg:x': 1495.5341796875 + 'sbg:y': 1224.28125 + - id: fillout_maf + outputSource: + - getbasecountsmultisample_1_2_5/fillout + type: File + 'sbg:x': 982.1435546875 + 'sbg:y': 2030.03125 +steps: + - id: bam_qc_stats_pool_a + in: + - id: input + source: + - duplex_bam + - id: target_intervals + source: pool_a_target_intervals + - id: bait_intervals + source: pool_a_bait_intervals + - id: reference + source: reference + - id: hsmetrics_minimum_mapping_quality + source: hsmetrics_minimum_mapping_quality + - id: hsmetrics_minimum_base_quality + source: hsmetrics_minimum_base_quality + - id: hsmetrics_coverage_cap + source: hsmetrics_coverage_cap + out: + - id: gatk_collect_insert_size_metrics_histogram_pdf + - id: gatk_collect_insert_size_metrics_txt + - id: gatk_collect_hs_metrics_txt + - id: gatk_collect_hs_metrics_per_base_coverage_txt + - id: gatk_collect_hs_metrics_per_target_coverage_txt + - id: gatk_collect_alignment_summary_metrics_txt + run: ../bam_qc_stats/bam_qc_stats.cwl + label: bam_qc_stats_pool_a + 'sbg:x': 351.4375 + 'sbg:y': 1563.96875 + - id: calculate_noise + in: + - id: reference + source: reference + - id: bam_file + source: duplex_bam + - id: bed_file + source: noise_sites_bed + - id: sample_id + source: sample_name + - id: threshold + source: sequence_qc_threshold + - id: truncate + source: sequence_qc_truncate + - id: min_mapq + source: sequence_qc_min_mapq + - id: min_basq + source: sequence_qc_min_basq + out: + - id: sequence_qc_pileup + - id: sequence_qc_noise_positions + - id: sequence_qc_noise_by_substitution + - id: sequence_qc_noise_acgt + - id: sequence_qc_noise_n + - id: sequence_qc_noise_del + - id: sequence_qc_figures + run: ../command_line_tools/sequence_qc/0.2.3/sequence_qc_0.2.3.cwl + 'sbg:x': 351.4375 + 'sbg:y': 998.4375 + - id: bam_qc_stats_pool_b + in: + - id: input + source: + - duplex_bam + - id: target_intervals + source: pool_b_target_intervals + - id: bait_intervals + source: pool_b_bait_intervals + - id: reference + source: reference + - id: hsmetrics_minimum_mapping_quality + source: hsmetrics_minimum_mapping_quality + - id: hsmetrics_minimum_base_quality + source: hsmetrics_minimum_base_quality + - id: hsmetrics_coverage_cap + source: hsmetrics_coverage_cap + out: + - id: gatk_collect_insert_size_metrics_histogram_pdf + - id: gatk_collect_insert_size_metrics_txt + - id: gatk_collect_hs_metrics_txt + - id: gatk_collect_hs_metrics_per_base_coverage_txt + - id: gatk_collect_hs_metrics_per_target_coverage_txt + - id: gatk_collect_alignment_summary_metrics_txt + run: ../bam_qc_stats/bam_qc_stats.cwl + label: bam_qc_stats_pool_b + 'sbg:x': 351.4375 + 'sbg:y': 1373.125 + - id: biometrics_major_0_2_13 + in: + - id: input + linkMerge: merge_nested + source: + - biometrics_extract_0_2_13/biometrics_extract_pickle + - id: major_threshold + source: major_threshold + - id: prefix + source: prefix + - id: plot + source: plot + - id: json + source: json + out: + - id: biometrics_major_csv + - id: biometrics_major_json + - id: biometrics_major_plot + run: ../command_line_tools/biometrics_major/0.2.13/biometrics_major.cwl + 'sbg:x': 982.1435546875 + 'sbg:y': 2313.71875 + - id: biometrics_extract_0_2_13 + in: + - id: sample_bam + source: duplex_bam + - id: sample_sex + source: sample_sex + - id: sample_group + source: sample_group + - id: sample_name + source: sample_name + - id: fafile + source: reference + - id: vcf_file + source: vcf_file + - id: min_coverage + default: 200 + out: + - id: biometrics_extract_pickle + run: ../command_line_tools/biometrics_extract/0.2.13/biometrics_extract.cwl + 'sbg:x': 351.4375 + 'sbg:y': 1189.28125 + - id: biometrics_minor_0_2_13 + in: + - id: input + linkMerge: merge_nested + source: + - biometrics_extract_0_2_13/biometrics_extract_pickle + - id: prefix + source: prefix + - id: plot + source: plot + - id: json + source: json + out: + - id: biometrics_minor_csv + - id: biometrics_minor_json + - id: biometrics_minor_plot + - id: biometrics_minor_sites_plot + run: ../command_line_tools/biometrics_minor/0.2.13/biometrics_minor.cwl + 'sbg:x': 982.1435546875 + 'sbg:y': 2157.875 + - id: getbasecountsmultisample_1_2_5 + in: + - id: genotyping_bams + source: + - duplex_bam + - id: genotyping_bams_ids + source: + - sample_name + - id: filter_duplicate + default: 0 + - id: fragment_count + default: 1 + - id: maf + source: maf + - id: output + source: sample_name + valueFrom: $(self + '_duplex_hotspots_fillout.maf') + - id: ref_fasta + source: reference + out: + - id: fillout + run: >- + ../command_line_tools/getbasecountsmultisample/1.2.5/getbasecountsmultisample_1.2.5.cwl + label: getbasecountsmultisample_1.2.5 + 'sbg:x': 351.4375 + 'sbg:y': 814.59375 +requirements: + - class: SubworkflowFeatureRequirement + - class: InlineJavascriptRequirement diff --git a/qc_duplex_bam/qc_duplex_bam__packed.cwl b/qc_duplex_bam/qc_duplex_bam__packed.cwl new file mode 100644 index 0000000..04ed211 --- /dev/null +++ b/qc_duplex_bam/qc_duplex_bam__packed.cwl @@ -0,0 +1,3268 @@ +{ + "$graph": [ + { + "class": "Workflow", + "id": "#bam_qc_stats.cwl", + "label": "bam_qc_stats", + "inputs": [ + { + "id": "#bam_qc_stats.cwl/input", + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "secondaryFiles": [ + "^.bai" + ], + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 374.0625 + }, + { + "id": "#bam_qc_stats.cwl/target_intervals", + "type": "File", + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 160.3125 + }, + { + "id": "#bam_qc_stats.cwl/bait_intervals", + "type": "File", + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 480.9375 + }, + { + "id": "#bam_qc_stats.cwl/reference", + "type": "File", + "secondaryFiles": [ + "^.fasta.fai", + "^.dict" + ], + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 267.1875 + }, + { + "id": "#bam_qc_stats.cwl/temporary_directory", + "type": [ + "null", + "string" + ], + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 53.4375 + }, + { + "id": "#bam_qc_stats.cwl/hsmetrics_minimum_mapping_quality", + "type": [ + "null", + "int" + ], + "label": "hsmetrics_minimum_mapping_quality", + "https://www.sevenbridges.com/x": 1, + "https://www.sevenbridges.com/y": 613 + }, + { + "id": "#bam_qc_stats.cwl/hsmetrics_minimum_base_quality", + "type": [ + "null", + "int" + ], + "label": "hsmetrics_minimum_base_quality", + "https://www.sevenbridges.com/x": 3, + "https://www.sevenbridges.com/y": 743 + }, + { + "id": "#bam_qc_stats.cwl/hsmetrics_coverage_cap", + "type": [ + "null", + "int" + ], + "label": "hsmetrics_coverage_cap", + "https://www.sevenbridges.com/x": 2, + "https://www.sevenbridges.com/y": 872 + } + ], + "outputs": [ + { + "id": "#bam_qc_stats.cwl/gatk_collect_insert_size_metrics_histogram_pdf", + "outputSource": [ + "#bam_qc_stats.cwl/gatk_collect_insert_size_metrics_4_1_8_0/gatk_collect_insert_size_metrics_histogram_pdf" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "https://www.sevenbridges.com/x": 700.636962890625, + "https://www.sevenbridges.com/y": 106.875 + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_insert_size_metrics_txt", + "outputSource": [ + "#bam_qc_stats.cwl/gatk_collect_insert_size_metrics_4_1_8_0/gatk_collect_insert_size_metrics_txt" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "https://www.sevenbridges.com/x": 700.636962890625, + "https://www.sevenbridges.com/y": 0 + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_hs_metrics_txt", + "outputSource": [ + "#bam_qc_stats.cwl/gatk_collect_hs_metrics_4_1_8_0/gatk_collect_hs_metrics_txt" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "https://www.sevenbridges.com/x": 700.636962890625, + "https://www.sevenbridges.com/y": 213.75 + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_hs_metrics_per_base_coverage_txt", + "outputSource": [ + "#bam_qc_stats.cwl/gatk_collect_hs_metrics_4_1_8_0/gatk_collect_hs_metrics_per_base_coverage_txt" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "https://www.sevenbridges.com/x": 700.636962890625, + "https://www.sevenbridges.com/y": 427.5 + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_hs_metrics_per_target_coverage_txt", + "outputSource": [ + "#bam_qc_stats.cwl/gatk_collect_hs_metrics_4_1_8_0/gatk_collect_hs_metrics_per_target_coverage_txt" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "https://www.sevenbridges.com/x": 700.636962890625, + "https://www.sevenbridges.com/y": 320.625 + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_alignment_summary_metrics_txt", + "outputSource": [ + "#bam_qc_stats.cwl/gatk_collect_alignment_summary_metrics_4_1_3_0/gatk_collect_alignment_summary_metrics_txt" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "https://www.sevenbridges.com/x": 700.636962890625, + "https://www.sevenbridges.com/y": 534.375 + } + ], + "steps": [ + { + "id": "#bam_qc_stats.cwl/gatk_collect_alignment_summary_metrics_4_1_3_0", + "in": [ + { + "id": "#bam_qc_stats.cwl/gatk_collect_alignment_summary_metrics_4_1_3_0/input", + "source": "#bam_qc_stats.cwl/input" + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_alignment_summary_metrics_4_1_3_0/reference", + "source": "#bam_qc_stats.cwl/reference" + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_alignment_summary_metrics_4_1_3_0/temporary_directory", + "source": "#bam_qc_stats.cwl/temporary_directory" + } + ], + "out": [ + { + "id": "#bam_qc_stats.cwl/gatk_collect_alignment_summary_metrics_4_1_3_0/gatk_collect_alignment_summary_metrics_txt" + } + ], + "run": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl", + "label": "GATK-CollectAlignmentSummaryMetrics", + "https://www.sevenbridges.com/x": 334.2886657714844, + "https://www.sevenbridges.com/y": 560.505126953125 + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_hs_metrics_4_1_8_0", + "in": [ + { + "id": "#bam_qc_stats.cwl/gatk_collect_hs_metrics_4_1_8_0/input", + "source": "#bam_qc_stats.cwl/input" + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_hs_metrics_4_1_8_0/bait_intervals", + "source": "#bam_qc_stats.cwl/bait_intervals" + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_hs_metrics_4_1_8_0/target_intervals", + "source": "#bam_qc_stats.cwl/target_intervals" + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_hs_metrics_4_1_8_0/coverage_cap", + "source": "#bam_qc_stats.cwl/hsmetrics_coverage_cap" + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_hs_metrics_4_1_8_0/minimum_base_quality", + "source": "#bam_qc_stats.cwl/hsmetrics_minimum_base_quality" + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_hs_metrics_4_1_8_0/minimum_mapping_quality", + "source": "#bam_qc_stats.cwl/hsmetrics_minimum_mapping_quality" + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_hs_metrics_4_1_8_0/reference", + "source": "#bam_qc_stats.cwl/reference" + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_hs_metrics_4_1_8_0/temporary_directory", + "source": "#bam_qc_stats.cwl/temporary_directory" + } + ], + "out": [ + { + "id": "#bam_qc_stats.cwl/gatk_collect_hs_metrics_4_1_8_0/gatk_collect_hs_metrics_txt" + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_hs_metrics_4_1_8_0/gatk_collect_hs_metrics_per_base_coverage_txt" + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_hs_metrics_4_1_8_0/gatk_collect_hs_metrics_per_target_coverage_txt" + } + ], + "run": "#gatk_collect_hs_metrics_4.1.8.0.cwl", + "label": "GATK-CollectHsMetrics", + "https://www.sevenbridges.com/x": 327.8453674316406, + "https://www.sevenbridges.com/y": 372.8453674316406 + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_insert_size_metrics_4_1_8_0", + "in": [ + { + "id": "#bam_qc_stats.cwl/gatk_collect_insert_size_metrics_4_1_8_0/input", + "source": "#bam_qc_stats.cwl/input" + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_insert_size_metrics_4_1_8_0/histogram_file", + "default": "histogram.pdf" + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_insert_size_metrics_4_1_8_0/temporary_directory", + "source": "#bam_qc_stats.cwl/temporary_directory" + } + ], + "out": [ + { + "id": "#bam_qc_stats.cwl/gatk_collect_insert_size_metrics_4_1_8_0/gatk_collect_insert_size_metrics_txt" + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_insert_size_metrics_4_1_8_0/gatk_collect_insert_size_metrics_histogram_pdf" + } + ], + "run": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl", + "label": "GATK-CollectInsertSizeMetrics", + "https://www.sevenbridges.com/x": 335.57733154296875, + "https://www.sevenbridges.com/y": 194.7628936767578 + } + ], + "requirements": [], + "https://schema.org/author": [ + { + "class": "https://schema.org/Person", + "https://schema.org/email": "mailto:murphyc4@mskcc.org", + "https://schema.org/identifier": "", + "https://schema.org/name": "Charles Murphy" + } + ], + "https://schema.org/citation": "", + "https://schema.org/codeRepository": "https://github.com/msk-access/uncollapsed_bam_generation", + "https://schema.org/contributor": [ + { + "class": "https://schema.org/Person", + "https://schema.org/email": "mailto:shahr2@mskcc.org", + "https://schema.org/identifier": "https://orcid.org/0000-0001-9042-6213", + "https://schema.org/name": "Ronak Shah" + } + ], + "https://schema.org/dateCreated": "2020-09-23", + "https://schema.org/license": "https://spdx.org/licenses/Apache-2.0", + "$namespaces": { + "sbg": "https://www.sevenbridges.com/" + } + }, + { + "class": "CommandLineTool", + "id": "#biometrics_extract.cwl", + "baseCommand": [ + "biometrics", + "extract" + ], + "inputs": [ + { + "id": "#biometrics_extract.cwl/sample_bam", + "type": "File", + "inputBinding": { + "position": 0, + "prefix": "--sample-bam" + }, + "doc": "BAM file.", + "secondaryFiles": [ + "^.bai" + ] + }, + { + "id": "#biometrics_extract.cwl/sample_sex", + "type": [ + "null", + "string" + ], + "inputBinding": { + "position": 0, + "prefix": "--sample-sex" + }, + "doc": "Expected sample sex (i.e. M or F)." + }, + { + "id": "#biometrics_extract.cwl/sample_group", + "type": [ + "null", + "string" + ], + "inputBinding": { + "position": 0, + "prefix": "--sample-group" + }, + "doc": "The sample group (e.g. the sample patient ID)." + }, + { + "id": "#biometrics_extract.cwl/sample_name", + "type": "string", + "inputBinding": { + "position": 0, + "prefix": "--sample-name" + }, + "doc": "Sample name. If not specified, sample name is automatically figured out from the BAM file." + }, + { + "id": "#biometrics_extract.cwl/fafile", + "type": "File", + "inputBinding": { + "position": 0, + "prefix": "--fafile" + }, + "doc": "Path to reference fasta.", + "secondaryFiles": [ + "^.fasta.fai" + ] + }, + { + "id": "#biometrics_extract.cwl/vcf_file", + "type": "File", + "inputBinding": { + "position": 0, + "prefix": "--vcf" + }, + "doc": "VCF file containing the SNPs to be queried." + }, + { + "id": "#biometrics_extract.cwl/bed_file", + "type": [ + "null", + "File" + ], + "inputBinding": { + "position": 0, + "prefix": "--bed" + }, + "doc": "BED file containing the intervals to be queried." + }, + { + "id": "#biometrics_extract.cwl/database", + "type": [ + "null", + "string" + ], + "inputBinding": { + "position": 0, + "prefix": "--database" + }, + "doc": "Directory to store the intermediate files after running the extraction step." + }, + { + "default": 1, + "id": "#biometrics_extract.cwl/min_mapping_quality", + "type": [ + "null", + "int" + ], + "inputBinding": { + "position": 0, + "prefix": "--min-mapping-quality" + }, + "doc": "Minimum mapping quality of reads to be used for pileup." + }, + { + "default": 1, + "id": "#biometrics_extract.cwl/min_base_quality", + "type": [ + "null", + "int" + ], + "inputBinding": { + "position": 0, + "prefix": "--min-base-quality" + }, + "doc": "Minimum base quality of reads to be used for pileup." + }, + { + "default": 10, + "id": "#biometrics_extract.cwl/min_coverage", + "type": [ + "null", + "int" + ], + "inputBinding": { + "position": 0, + "prefix": "--min-coverage" + }, + "doc": "Minimum coverage to count a site." + }, + { + "default": 0.1, + "id": "#biometrics_extract.cwl/min_homozygous_thresh", + "type": [ + "null", + "float" + ], + "inputBinding": { + "position": 0, + "prefix": "--min-homozygous-thresh" + }, + "doc": "Minimum threshold to define homozygous." + }, + { + "id": "#biometrics_extract.cwl/default_genotype", + "type": [ + "null", + "string" + ], + "inputBinding": { + "position": 0, + "prefix": "--default-genotype" + }, + "doc": "Default genotype if coverage is too low (options are Het or Hom)." + } + ], + "outputs": [ + { + "id": "#biometrics_extract.cwl/biometrics_extract_pickle", + "type": "File", + "outputBinding": { + "glob": "${\n if (inputs.database) {\n return inputs.database + '/' + inputs.sample_name + '.pickle';\n } else {\n return inputs.sample_name + '.pickle';\n }\n}" + } + } + ], + "requirements": [ + { + "class": "ResourceRequirement", + "ramMin": 16000, + "coresMin": 2 + }, + { + "class": "DockerRequirement", + "dockerPull": "ghcr.io/msk-access/biometrics:0.2.13" + }, + { + "class": "InlineJavascriptRequirement" + } + ], + "http://purl.org/dc/terms/contributor": [ + { + "class": "http://xmlns.com/foaf/0.1/Organization", + "http://xmlns.com/foaf/0.1/member": [ + { + "class": "http://xmlns.com/foaf/0.1/Person", + "http://xmlns.com/foaf/0.1/mbox": "mailto:murphyc4@mskcc.org", + "http://xmlns.com/foaf/0.1/name": "Charlie Murphy" + } + ], + "http://xmlns.com/foaf/0.1/name": "Memorial Sloan Kettering Cancer Center" + } + ], + "http://purl.org/dc/terms/creator": [ + { + "class": "http://xmlns.com/foaf/0.1/Organization", + "http://xmlns.com/foaf/0.1/member": [ + { + "class": "http://xmlns.com/foaf/0.1/Person", + "http://xmlns.com/foaf/0.1/mbox": "mailto:murphyc4@mskcc.org", + "http://xmlns.com/foaf/0.1/name": "Charlie Murphy" + } + ], + "http://xmlns.com/foaf/0.1/name": "Memorial Sloan Kettering Cancer Center" + } + ], + "http://usefulinc.com/ns/doap#release": [ + { + "class": "http://usefulinc.com/ns/doap#Version", + "http://usefulinc.com/ns/doap#name": "biometrics", + "http://usefulinc.com/ns/doap#revision": "0.2.13" + } + ] + }, + { + "class": "CommandLineTool", + "id": "#biometrics_major.cwl", + "baseCommand": [ + "biometrics", + "major" + ], + "inputs": [ + { + "id": "#biometrics_major.cwl/input", + "type": { + "type": "array", + "items": "File", + "inputBinding": { + "prefix": "--input" + } + }, + "inputBinding": { + "position": 0 + }, + "doc": "Can be one of three types: (1) path to a CSV file containing sample information (one per line). For example: sample_name,sample_bam,sample_type,sample_sex,sample_group. (2) Path to a '*.pk' file that was produced by the 'extract' tool. (3) Name of the sample to analyze; this assumes there is a file named '{sample_name}.pk' in your database directory. Can be specified more than once." + }, + { + "id": "#biometrics_major.cwl/database", + "type": [ + "null", + "string" + ], + "inputBinding": { + "position": 0, + "prefix": "--database" + }, + "doc": "Directory to store the intermediate files after running the extraction step." + }, + { + "default": 0.6, + "id": "#biometrics_major.cwl/major_threshold", + "type": [ + "null", + "float" + ], + "inputBinding": { + "position": 0, + "prefix": "--major-threshold" + }, + "doc": "Major contamination threshold for bad sample." + }, + { + "id": "#biometrics_major.cwl/prefix", + "type": [ + "null", + "string" + ], + "inputBinding": { + "position": 0, + "prefix": "--prefix" + }, + "doc": "Output file prefix." + }, + { + "id": "#biometrics_major.cwl/plot", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--plot" + }, + "doc": "Also output plots of the data." + }, + { + "id": "#biometrics_major.cwl/json", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--json" + }, + "doc": "Also output data in JSON format." + }, + { + "id": "#biometrics_major.cwl/no_db_comparison", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--no-db-compare" + }, + "doc": "Do not compare the sample(s) you provided to all samples in the database, only compare them with each other." + } + ], + "outputs": [ + { + "id": "#biometrics_major.cwl/biometrics_major_csv", + "type": "File", + "outputBinding": { + "glob": "${\n if (inputs.prefix) {\n return inputs.prefix + '_major_contamination.csv'\n } else {\n return 'major_contamination.csv'\n }\n}" + } + }, + { + "id": "#biometrics_major.cwl/biometrics_major_json", + "type": [ + "null", + "File" + ], + "outputBinding": { + "glob": "${\n if (inputs.prefix) {\n return inputs.prefix + '_major_contamination.json'\n } else {\n return 'major_contamination.json'\n }\n}" + } + }, + { + "id": "#biometrics_major.cwl/biometrics_major_plot", + "type": [ + "null", + "File" + ], + "outputBinding": { + "glob": "${\n return 'major_contamination.html'\n}" + } + } + ], + "requirements": [ + { + "class": "ResourceRequirement", + "ramMin": 16000, + "coresMin": 2 + }, + { + "class": "DockerRequirement", + "dockerPull": "ghcr.io/msk-access/biometrics:0.2.13" + }, + { + "class": "InlineJavascriptRequirement" + } + ], + "http://purl.org/dc/terms/contributor": [ + { + "class": "http://xmlns.com/foaf/0.1/Organization", + "http://xmlns.com/foaf/0.1/member": [ + { + "class": "http://xmlns.com/foaf/0.1/Person", + "http://xmlns.com/foaf/0.1/mbox": "mailto:murphyc4@mskcc.org", + "http://xmlns.com/foaf/0.1/name": "Charlie Murphy" + } + ], + "http://xmlns.com/foaf/0.1/name": "Memorial Sloan Kettering Cancer Center" + } + ], + "http://purl.org/dc/terms/creator": [ + { + "class": "http://xmlns.com/foaf/0.1/Organization", + "http://xmlns.com/foaf/0.1/member": [ + { + "class": "http://xmlns.com/foaf/0.1/Person", + "http://xmlns.com/foaf/0.1/mbox": "mailto:murphyc4@mskcc.org", + "http://xmlns.com/foaf/0.1/name": "Charlie Murphy" + } + ], + "http://xmlns.com/foaf/0.1/name": "Memorial Sloan Kettering Cancer Center" + } + ], + "http://usefulinc.com/ns/doap#release": [ + { + "class": "http://usefulinc.com/ns/doap#Version", + "http://usefulinc.com/ns/doap#name": "biometrics", + "http://usefulinc.com/ns/doap#revision": "0.2.13" + } + ] + }, + { + "class": "CommandLineTool", + "id": "#biometrics_minor.cwl", + "baseCommand": [ + "biometrics", + "minor" + ], + "inputs": [ + { + "id": "#biometrics_minor.cwl/input", + "type": { + "type": "array", + "items": "File", + "inputBinding": { + "prefix": "--input" + } + }, + "inputBinding": { + "position": 0 + }, + "doc": "Can be one of three types: (1) path to a CSV file containing sample information (one per line). For example: sample_name,sample_bam,sample_type,sample_sex,sample_group. (2) Path to a '*.pk' file that was produced by the 'extract' tool. (3) Name of the sample to analyze; this assumes there is a file named '{sample_name}.pk' in your database directory. Can be specified more than once." + }, + { + "id": "#biometrics_minor.cwl/database", + "type": [ + "null", + "string" + ], + "inputBinding": { + "position": 0, + "prefix": "--database" + }, + "doc": "Directory to store the intermediate files after running the extraction step." + }, + { + "default": 0.002, + "id": "#biometrics_minor.cwl/minor_threshold", + "type": [ + "null", + "float" + ], + "inputBinding": { + "position": 0, + "prefix": "--minor-threshold" + }, + "doc": "Minor contamination threshold for bad sample." + }, + { + "id": "#biometrics_minor.cwl/prefix", + "type": [ + "null", + "string" + ], + "inputBinding": { + "position": 0, + "prefix": "--prefix" + }, + "doc": "Output file prefix." + }, + { + "id": "#biometrics_minor.cwl/plot", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--plot" + }, + "doc": "Also output plots of the data." + }, + { + "id": "#biometrics_minor.cwl/json", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--json" + }, + "doc": "Also output data in JSON format." + }, + { + "id": "#biometrics_minor.cwl/no_db_comparison", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--no-db-compare" + }, + "doc": "Do not compare the sample(s) you provided to all samples in the database, only compare them with each other." + } + ], + "outputs": [ + { + "id": "#biometrics_minor.cwl/biometrics_minor_csv", + "type": "File", + "outputBinding": { + "glob": "${\n if (inputs.prefix) {\n return inputs.prefix + '_minor_contamination.csv'\n } else {\n return 'minor_contamination.csv'\n }\n}" + } + }, + { + "id": "#biometrics_minor.cwl/biometrics_minor_json", + "type": [ + "null", + "File" + ], + "outputBinding": { + "glob": "${\n if (inputs.prefix) {\n return inputs.prefix + '_minor_contamination.json'\n } else {\n return 'minor_contamination.json'\n }\n}" + } + }, + { + "id": "#biometrics_minor.cwl/biometrics_minor_plot", + "type": [ + "null", + "File" + ], + "outputBinding": { + "glob": "${\n return 'minor_contamination.html'\n}" + } + }, + { + "id": "#biometrics_minor.cwl/biometrics_minor_sites_plot", + "type": [ + "null", + "File" + ], + "outputBinding": { + "glob": "${\n return 'minor_contamination_sites.html'\n}" + } + } + ], + "requirements": [ + { + "class": "ResourceRequirement", + "ramMin": 16000, + "coresMin": 2 + }, + { + "class": "DockerRequirement", + "dockerPull": "ghcr.io/msk-access/biometrics:0.2.13" + }, + { + "class": "InlineJavascriptRequirement" + } + ], + "http://purl.org/dc/terms/contributor": [ + { + "class": "http://xmlns.com/foaf/0.1/Organization", + "http://xmlns.com/foaf/0.1/member": [ + { + "class": "http://xmlns.com/foaf/0.1/Person", + "http://xmlns.com/foaf/0.1/mbox": "mailto:murphyc4@mskcc.org", + "http://xmlns.com/foaf/0.1/name": "Charlie Murphy" + } + ], + "http://xmlns.com/foaf/0.1/name": "Memorial Sloan Kettering Cancer Center" + } + ], + "http://purl.org/dc/terms/creator": [ + { + "class": "http://xmlns.com/foaf/0.1/Organization", + "http://xmlns.com/foaf/0.1/member": [ + { + "class": "http://xmlns.com/foaf/0.1/Person", + "http://xmlns.com/foaf/0.1/mbox": "mailto:murphyc4@mskcc.org", + "http://xmlns.com/foaf/0.1/name": "Charlie Murphy" + } + ], + "http://xmlns.com/foaf/0.1/name": "Memorial Sloan Kettering Cancer Center" + } + ], + "http://usefulinc.com/ns/doap#release": [ + { + "class": "http://usefulinc.com/ns/doap#Version", + "http://usefulinc.com/ns/doap#name": "biometrics", + "http://usefulinc.com/ns/doap#revision": "0.2.13" + } + ] + }, + { + "class": "CommandLineTool", + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl", + "baseCommand": [ + "gatk", + "CollectAlignmentSummaryMetrics" + ], + "inputs": [ + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/memory_per_job", + "type": [ + "null", + "int" + ], + "doc": "Memory per job in megabytes" + }, + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/memory_overhead", + "type": [ + "null", + "int" + ], + "doc": "Memory overhead per job in megabytes" + }, + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/number_of_threads", + "type": [ + "null", + "int" + ] + }, + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/input", + "type": "File", + "inputBinding": { + "position": 0, + "prefix": "-I" + }, + "doc": "Input file (bam or sam). Required." + }, + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/output_file_name", + "type": [ + "null", + "string" + ], + "doc": "File to write the output to. Required." + }, + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/reference", + "type": [ + "null", + "File" + ], + "inputBinding": { + "position": 0, + "prefix": "-R" + }, + "doc": "Reference sequence file. Note that while this argument is not required, without it only a small subset of the metrics will be calculated. Note also that if a reference sequence is provided, it must be accompanied by a sequence dictionary. Default value: null.", + "secondaryFiles": [ + "^.fasta.fai", + "^.dict" + ] + }, + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/adaptor_sequence", + "type": [ + "null", + "string" + ], + "inputBinding": { + "position": 0, + "prefix": "--ADAPTER_SEQUENCE" + }, + "doc": "List of adapter sequences to use when processing the alignment metrics. This argument may be specified 0 or more times. Default value: [AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT, AGATCGGAAGAGCTCGTATGCCGTCTTCTGCTTG, AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT, AGATCGGAAGAGCGGTTCAGCAGGAATGCCGAGACCGATCTCGTATGCCGTCTTCTGCTTG, AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT, AGATCGGAAGAGCACACGTCTGAACTCCAGTCACNNNNNNNNATCTCGTATGCCGTCTTCTGCTTG]." + }, + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/metrics_acciumulation_level", + "type": [ + "null", + "string" + ], + "inputBinding": { + "position": 0, + "prefix": "--METRIC_ACCUMULATION_LEVEL" + }, + "doc": "The level(s) at which to accumulate metrics. Default value: [ALL_READS]. This option can be set to 'null' to clear the default value. Possible values: {ALL_READS, SAMPLE, LIBRARY, READ_GROUP} This option may be specified 0 or more times. This option can be set to 'null' to clear the default list." + }, + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/expected_pair_orientations", + "type": [ + "null", + "string" + ], + "inputBinding": { + "position": 0, + "prefix": "--EXPECTED_PAIR_ORIENTATIONS" + }, + "doc": "Paired-end reads that do not have this expected orientation will be considered chimeric. This argument may be specified 0 or more times. Default value: [FR]. Possible values: {FR, RF, TANDEM}" + }, + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/is_bisulfite_sequenced", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--IS_BISULFITE_SEQUENCED" + }, + "doc": "Whether the SAM or BAM file consists of bisulfite sequenced reads. Default value: false. Possible values: {true, false}" + }, + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/max_insert_size", + "type": [ + "null", + "int" + ], + "inputBinding": { + "position": 0, + "prefix": "--MAX_INSERT_SIZE" + }, + "doc": "Paired-end reads above this insert size will be considered chimeric along with inter-chromosomal pairs. Default value: 100000." + }, + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/validation_stringency", + "type": [ + "null", + "string" + ], + "inputBinding": { + "position": 0, + "prefix": "--VALIDATION_STRINGENCY" + }, + "doc": "Validation stringency for all SAM files read by this program. Setting stringency to SILENT can improve performance when processing a BAM file in which variable-length data (read, qualities, tags) do not otherwise need to be decoded. Default value: STRICT. This option can be set to 'null' to clear the default value. Possible values: {STRICT,LENIENT, SILENT}" + }, + { + "default": true, + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/assume_sorted", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--ASSUME_SORTED" + }, + "doc": "If true (default), then the sort order in the header file will be ignored. Default value: true. This option can be set to 'null' to clear the default value. Possible values: {true, false}" + }, + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/stop_after", + "type": [ + "null", + "int" + ], + "inputBinding": { + "position": 0, + "prefix": "--STOP_AFTER" + }, + "doc": "Stop after processing N reads, mainly for debugging. Default value: 0. This option can be set to 'null' to clear the default value." + }, + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/create_index", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--CREATE_INDEX" + }, + "doc": "Whether to create a BAM index when writing a coordinate-sorted BAM file. Default value: false. Possible values: {true, false}" + }, + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/create_md5_file", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--CREATE_MD5_FILE" + }, + "doc": "Whether to create an MD5 digest for any BAM or FASTQ files created. Default value: false. Possible values: {true, false}" + }, + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/use_jdk_deflater", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--USE_JDK_DEFLATER" + }, + "doc": "Use the JDK Deflater instead of the Intel Deflater for writing compressed output" + }, + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/use_jdk_inflater", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--USE_JDK_INFLATER" + }, + "doc": "Use the JDK Inflater instead of the Intel Inflater for reading compressed input" + }, + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/temporary_directory", + "type": [ + "null", + "string" + ], + "doc": "Default value: null. This option may be specified 0 or more times." + } + ], + "outputs": [ + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/gatk_collect_alignment_summary_metrics_txt", + "type": "File", + "outputBinding": { + "glob": "${\n if (inputs.output_file_name){\n return inputs.output_file_name\n } else {\n return inputs.input.basename.replace(/.bam/, '_alignment_summary_metrics.txt')\n }\n}" + } + } + ], + "label": "GATK-CollectAlignmentSummaryMetrics", + "arguments": [ + { + "position": 0, + "prefix": "--java-options", + "valueFrom": "${\n if(inputs.memory_per_job && inputs.memory_overhead) {\n if(inputs.memory_per_job % 1000 == 0) {\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\"\n }\n }\n else if (inputs.memory_per_job && !inputs.memory_overhead){\n if(inputs.memory_per_job % 1000 == 0) {\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\"\n }\n }\n else if(!inputs.memory_per_job && inputs.memory_overhead){\n return \"-Xmx15G\"\n }\n else {\n return \"-Xmx15G\"\n }\n}" + }, + { + "position": 0, + "prefix": "--TMP_DIR", + "valueFrom": "${\n if(inputs.temporary_directory)\n return inputs.temporary_directory;\n return runtime.tmpdir\n}" + }, + { + "position": 0, + "prefix": "-O", + "valueFrom": "${\n if(inputs.output_file_name){\n return inputs.output_file_name\n } else {\n return inputs.input.basename.replace(/.bam/, '_alignment_summary_metrics.txt')\n }\n}" + } + ], + "requirements": [ + { + "class": "ResourceRequirement", + "ramMin": 32000, + "coresMin": 1 + }, + { + "class": "DockerRequirement", + "dockerPull": "ghcr.io/msk-access/gatk:4.1.8.0" + }, + { + "class": "InlineJavascriptRequirement" + } + ], + "http://purl.org/dc/terms/contributor": [ + { + "class": "http://xmlns.com/foaf/0.1/Organization", + "http://xmlns.com/foaf/0.1/member": [ + { + "class": "http://xmlns.com/foaf/0.1/Person", + "http://xmlns.com/foaf/0.1/mbox": "mailto:murphyc4@mskcc.org", + "http://xmlns.com/foaf/0.1/name": "Charles Murphy" + } + ], + "http://xmlns.com/foaf/0.1/name": "Memorial Sloan Kettering Cancer Center" + } + ], + "http://purl.org/dc/terms/creator": [ + { + "class": "http://xmlns.com/foaf/0.1/Organization", + "http://xmlns.com/foaf/0.1/member": [ + { + "class": "http://xmlns.com/foaf/0.1/Person", + "http://xmlns.com/foaf/0.1/mbox": "mailto:murphyc4@mskcc.org", + "http://xmlns.com/foaf/0.1/name": "Charles Murphy" + } + ], + "http://xmlns.com/foaf/0.1/name": "Memorial Sloan Kettering Cancer Center" + } + ], + "http://usefulinc.com/ns/doap#release": [ + { + "class": "http://usefulinc.com/ns/doap#Version", + "http://usefulinc.com/ns/doap#name": "gatk4", + "http://usefulinc.com/ns/doap#revision": "4.1.8.0" + } + ] + }, + { + "class": "CommandLineTool", + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl", + "baseCommand": [ + "gatk", + "CollectHsMetrics" + ], + "inputs": [ + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/input", + "type": "File", + "inputBinding": { + "position": 0, + "prefix": "-I" + }, + "doc": "An aligned SAM or BAM file. Required." + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/bait_intervals", + "type": "File", + "inputBinding": { + "position": 0, + "prefix": "--BAIT_INTERVALS" + }, + "doc": "An interval list file that contains the locations of the baits used. This argument must be specified at least once. Required." + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/target_intervals", + "type": "File", + "inputBinding": { + "position": 0, + "prefix": "--TARGET_INTERVALS" + }, + "doc": "An interval list file that contains the locations of the targets. This argument must be specified at least once. Required." + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/output_file_name", + "type": [ + "null", + "string" + ], + "doc": "The output file to write the metrics to. Required." + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/per_base_coverage", + "type": [ + "null", + "string" + ], + "doc": "An optional file to output per base coverage information to. The per-base file contains one line per target base and can grow very large. It is not recommended for use with large target sets. Default value: null." + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/per_target_coverage", + "type": [ + "null", + "string" + ], + "doc": "An optional file to output per target coverage information to. Default value: null." + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/theoretical_sensitivity_output", + "type": [ + "null", + "string" + ], + "inputBinding": { + "position": 0, + "prefix": "--THEORETICAL_SENSITIVITY_OUTPUT" + }, + "doc": "Output for Theoretical Sensitivity metrics where the allele fractions are provided by the ALLELE_FRACTION argument. Default value: null." + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/allele_fraction", + "type": [ + "null", + "float" + ], + "inputBinding": { + "position": 0, + "prefix": "--ALLELE_FRACTION" + }, + "doc": "Allele fraction for which to calculate theoretical sensitivity. This argument may be specified 0 or more times. Default value: [0.001, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2, 0.3, 0.5]." + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/bait_set_name", + "type": [ + "null", + "string" + ], + "inputBinding": { + "position": 0, + "prefix": "--BAIT_SET_NAME" + }, + "doc": "Bait set name. If not provided it is inferred from the filename of the bait intervals. Default value: null." + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/clip_overlapping_reads", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--CLIP_OVERLAPPING_READS" + }, + "doc": "True if we are to clip overlapping reads, false otherwise. Default value: true. Possible values: {true, false}" + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/coverage_cap", + "type": [ + "null", + "int" + ], + "inputBinding": { + "position": 0, + "prefix": "--COVERAGE_CAP" + }, + "doc": "Parameter to set a max coverage limit for Theoretical Sensitivity calculations. Default is 200. Default value: 200." + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/include_indels", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--INCLUDE_INDELS" + }, + "doc": "If true count inserted bases as on target and deleted bases as covered by a read. Default value: false. Possible values: {true, false}" + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/minimum_base_quality", + "type": [ + "null", + "int" + ], + "inputBinding": { + "position": 0, + "prefix": "--MINIMUM_BASE_QUALITY" + }, + "doc": "Minimum base quality for a base to contribute coverage. Default value: 20." + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/minimum_mapping_quality", + "type": [ + "null", + "int" + ], + "inputBinding": { + "position": 0, + "prefix": "--MINIMUM_MAPPING_QUALITY" + }, + "doc": "Minimum mapping quality for a read to contribute coverage. Default value: 20." + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/near_distance", + "type": [ + "null", + "int" + ], + "inputBinding": { + "position": 0, + "prefix": "--NEAR_DISTANCE" + }, + "doc": "The maximum distance between a read and the nearest probe/bait/amplicon for the read to be considered 'near probe' and included in percent selected. Default value: 250." + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/sample_size", + "type": [ + "null", + "int" + ], + "inputBinding": { + "position": 0, + "prefix": "--SAMPLE_SIZE" + }, + "doc": "Sample Size used for Theoretical Het Sensitivity sampling. Default is 10000. Default value: 10000." + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/reference", + "type": [ + "null", + "File" + ], + "inputBinding": { + "position": 0, + "prefix": "-R" + }, + "doc": "Reference sequence file. Note that while this argument is not required, without it only a small subset of the metrics will be calculated. Note also that if a reference sequence is provided, it must be accompanied by a sequence dictionary. Default value: null.", + "secondaryFiles": [ + "^.fasta.fai", + "^.dict" + ] + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/metrics_acciumulation_level", + "type": [ + "null", + "string" + ], + "inputBinding": { + "position": 0, + "prefix": "--METRIC_ACCUMULATION_LEVEL" + }, + "doc": "The level(s) at which to accumulate metrics. Default value: [ALL_READS]. This option can be set to 'null' to clear the default value. Possible values: {ALL_READS, SAMPLE, LIBRARY, READ_GROUP} This option may be specified 0 or more times. This option can be set to 'null' to clear the default list." + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/validation_stringency", + "type": [ + "null", + "string" + ], + "inputBinding": { + "position": 0, + "prefix": "--VALIDATION_STRINGENCY" + }, + "doc": "Validation stringency for all SAM files read by this program. Setting stringency to SILENT can improve performance when processing a BAM file in which variable-length data (read, qualities, tags) do not otherwise need to be decoded. Default value: STRICT. This option can be set to 'null' to clear the default value. Possible values: {STRICT,LENIENT, SILENT}" + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/create_index", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--CREATE_INDEX" + }, + "doc": "Whether to create a BAM index when writing a coordinate-sorted BAM file. Default value: false. Possible values: {true, false}" + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/create_md5_file", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--CREATE_MD5_FILE" + }, + "doc": "Whether to create an MD5 digest for any BAM or FASTQ files created. Default value: false. Possible values: {true, false}" + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/memory_per_job", + "type": [ + "null", + "int" + ], + "doc": "Memory per job in megabytes" + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/memory_overhead", + "type": [ + "null", + "int" + ], + "doc": "Memory overhead per job in megabytes" + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/number_of_threads", + "type": [ + "null", + "int" + ] + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/temporary_directory", + "type": [ + "null", + "string" + ], + "doc": "Default value: null. This option may be specified 0 or more times." + } + ], + "outputs": [ + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/gatk_collect_hs_metrics_txt", + "type": "File", + "outputBinding": { + "glob": "${\n if(inputs.output_file_name){\n return inputs.output_file_name\n } else {\n return inputs.input.basename.replace(/.bam/, '_hs_metrics.txt')\n }\n}" + } + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/gatk_collect_hs_metrics_per_base_coverage_txt", + "type": "File", + "outputBinding": { + "glob": "${\n if(inputs.per_base_coverage){\n return inputs.per_base_coverage\n } else {\n return inputs.input.basename.replace(/.bam/, '_per_base_coverage.txt')\n }\n}" + } + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/gatk_collect_hs_metrics_per_target_coverage_txt", + "type": "File", + "outputBinding": { + "glob": "${\n if(inputs.per_target_coverage){\n return inputs.per_target_coverage\n } else {\n return inputs.input.basename.replace(/.bam/, '_per_target_coverage.txt')\n }\n}" + } + } + ], + "label": "GATK-CollectHsMetrics", + "arguments": [ + { + "position": 0, + "prefix": "--java-options", + "valueFrom": "${\n if(inputs.memory_per_job && inputs.memory_overhead) {\n if(inputs.memory_per_job % 1000 == 0) {\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\"\n }\n }\n else if (inputs.memory_per_job && !inputs.memory_overhead){\n if(inputs.memory_per_job % 1000 == 0) {\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\"\n }\n }\n else if(!inputs.memory_per_job && inputs.memory_overhead){\n return \"-Xmx15G\"\n }\n else {\n return \"-Xmx15G\"\n }\n}" + }, + { + "position": 0, + "prefix": "--TMP_DIR", + "valueFrom": "${\n if(inputs.temporary_directory)\n return inputs.temporary_directory;\n return runtime.tmpdir\n}" + }, + { + "position": 0, + "prefix": "-O", + "valueFrom": "${\n if(inputs.output_file_name){\n return inputs.output_file_name\n } else {\n return inputs.input.basename.replace(/.bam/, '_hs_metrics.txt')\n }\n}" + }, + { + "position": 0, + "prefix": "--PER_TARGET_COVERAGE", + "valueFrom": "${\n if(inputs.per_target_coverage){\n return inputs.per_target_coverage\n } else {\n return inputs.input.basename.replace(/.bam/, '_per_target_coverage.txt')\n }\n}" + }, + { + "position": 0, + "prefix": "--PER_BASE_COVERAGE", + "valueFrom": "${\n if(inputs.per_base_coverage){\n return inputs.per_base_coverage\n } else {\n return inputs.input.basename.replace(/.bam/, '_per_base_coverage.txt')\n }\n}" + } + ], + "requirements": [ + { + "class": "ResourceRequirement", + "ramMin": 32000, + "coresMin": 1 + }, + { + "class": "DockerRequirement", + "dockerPull": "ghcr.io/msk-access/gatk:4.1.8.0" + }, + { + "class": "InlineJavascriptRequirement" + } + ], + "http://purl.org/dc/terms/contributor": [ + { + "class": "http://xmlns.com/foaf/0.1/Organization", + "http://xmlns.com/foaf/0.1/member": [ + { + "class": "http://xmlns.com/foaf/0.1/Person", + "http://xmlns.com/foaf/0.1/mbox": "mailto:murphyc4@mskcc.org", + "http://xmlns.com/foaf/0.1/name": "Charles Murphy" + } + ], + "http://xmlns.com/foaf/0.1/name": "Memorial Sloan Kettering Cancer Center" + } + ], + "http://purl.org/dc/terms/creator": [ + { + "class": "http://xmlns.com/foaf/0.1/Organization", + "http://xmlns.com/foaf/0.1/member": [ + { + "class": "http://xmlns.com/foaf/0.1/Person", + "http://xmlns.com/foaf/0.1/mbox": "mailto:murphyc4@mskcc.org", + "http://xmlns.com/foaf/0.1/name": "Charles Murphy" + } + ], + "http://xmlns.com/foaf/0.1/name": "Memorial Sloan Kettering Cancer Center" + } + ], + "http://usefulinc.com/ns/doap#release": [ + { + "class": "http://usefulinc.com/ns/doap#Version", + "http://usefulinc.com/ns/doap#name": "gatk4", + "http://usefulinc.com/ns/doap#revision": "4.1.8.0" + } + ] + }, + { + "class": "CommandLineTool", + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl", + "baseCommand": [ + "gatk", + "CollectInsertSizeMetrics" + ], + "inputs": [ + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/memory_per_job", + "type": [ + "null", + "int" + ], + "doc": "Memory per job in megabytes" + }, + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/memory_overhead", + "type": [ + "null", + "int" + ], + "doc": "Memory overhead per job in megabytes" + }, + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/number_of_threads", + "type": [ + "null", + "int" + ] + }, + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/input", + "type": "File", + "inputBinding": { + "position": 0, + "prefix": "-I" + }, + "doc": "Input file (bam or sam). Required." + }, + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/output_file_name", + "type": [ + "null", + "string" + ], + "doc": "File to write the output to. Required." + }, + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/histogram_file", + "type": [ + "null", + "string" + ], + "doc": "File to write insert size Histogram chart to. Required." + }, + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/deviations", + "type": [ + "null", + "float" + ], + "inputBinding": { + "position": 0, + "prefix": "--DEVIATIONS" + }, + "doc": "Generate mean, sd and plots by trimming the data down to MEDIAN + DEVIATIONS*MEDIAN_ABSOLUTE_DEVIATION. This is done because insert size data typically includes enough anomalous values from chimeras and other artifacts to make the mean and sd grossly misleading regarding the real distribution. Default value: 10.0. This option can be set to 'null' to clear the default value." + }, + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/histogram_width", + "type": [ + "null", + "int" + ], + "inputBinding": { + "position": 0, + "prefix": "--HISTOGRAM_WIDTH" + }, + "doc": "Explicitly sets the Histogram width, overriding automatic truncation of Histogram tail. Also, when calculating mean and standard deviation, only bins <= Histogram_WIDTH will be included. Default value: null." + }, + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/minimum_pct", + "type": [ + "null", + "float" + ], + "inputBinding": { + "position": 0, + "prefix": "--MINIMUM_PCT" + }, + "doc": "When generating the Histogram, discard any data categories (out of FR, TANDEM, RF) that have fewer than this percentage of overall reads. (Range: 0 to 1). Default value: 0.05. This option can be set to 'null' to clear the default value." + }, + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/metrics_acciumulation_level", + "type": [ + "null", + "string" + ], + "inputBinding": { + "position": 0, + "prefix": "--METRIC_ACCUMULATION_LEVEL" + }, + "doc": "The level(s) at which to accumulate metrics. Default value: [ALL_READS]. This option can be set to 'null' to clear the default value. Possible values: {ALL_READS, SAMPLE, LIBRARY, READ_GROUP} This option may be specified 0 or more times. This option can be set to 'null' to clear the default list." + }, + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/include_duplicates", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--INCLUDE_DUPLICATES" + }, + "doc": "If true, also include reads marked as duplicates in the insert size histogram. Default value: false. This option can be set to 'null' to clear the default value. Possible values: {true, false}" + }, + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/validation_stringency", + "type": [ + "null", + "string" + ], + "inputBinding": { + "position": 0, + "prefix": "--VALIDATION_STRINGENCY" + }, + "doc": "Validation stringency for all SAM files read by this program. Setting stringency to SILENT can improve performance when processing a BAM file in which variable-length data (read, qualities, tags) do not otherwise need to be decoded. Default value: STRICT. This option can be set to 'null' to clear the default value. Possible values: {STRICT,LENIENT, SILENT}" + }, + { + "default": true, + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/assume_sorted", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--ASSUME_SORTED" + }, + "doc": "If true (default), then the sort order in the header file will be ignored. Default value: true. This option can be set to 'null' to clear the default value. Possible values: {true, false}" + }, + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/stop_after", + "type": [ + "null", + "int" + ], + "inputBinding": { + "position": 0, + "prefix": "--STOP_AFTER" + }, + "doc": "Stop after processing N reads, mainly for debugging. Default value: 0. This option can be set to 'null' to clear the default value." + }, + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/create_index", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--CREATE_INDEX" + }, + "doc": "Whether to create a BAM index when writing a coordinate-sorted BAM file. Default value: false. Possible values: {true, false}" + }, + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/create_md5_file", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--CREATE_MD5_FILE" + }, + "doc": "Whether to create an MD5 digest for any BAM or FASTQ files created. Default value: false. Possible values: {true, false}" + }, + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/use_jdk_deflater", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--USE_JDK_DEFLATER" + }, + "doc": "Use the JDK Deflater instead of the Intel Deflater for writing compressed output" + }, + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/use_jdk_inflater", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--USE_JDK_INFLATER" + }, + "doc": "Use the JDK Inflater instead of the Intel Inflater for reading compressed input" + }, + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/temporary_directory", + "type": [ + "null", + "string" + ], + "doc": "Default value: null. This option may be specified 0 or more times." + } + ], + "outputs": [ + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/gatk_collect_insert_size_metrics_txt", + "type": "File", + "outputBinding": { + "glob": "${\n if(inputs.output_file_name){\n return inputs.output_file_name\n } else {\n return inputs.input.basename.replace(/.bam/, '_insert_size_metrics.txt')\n }\n}" + } + }, + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/gatk_collect_insert_size_metrics_histogram_pdf", + "type": "File", + "outputBinding": { + "glob": "${\n if(inputs.histogram_file){\n return inputs.histogram_file\n } else {\n return inputs.input.basename.replace(/.bam/, '_histogram.pdf')\n }\n}" + } + } + ], + "label": "GATK-CollectInsertSizeMetrics", + "arguments": [ + { + "position": 0, + "prefix": "--java-options", + "valueFrom": "${\n if(inputs.memory_per_job && inputs.memory_overhead) {\n if(inputs.memory_per_job % 1000 == 0) {\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\"\n }\n }\n else if (inputs.memory_per_job && !inputs.memory_overhead){\n if(inputs.memory_per_job % 1000 == 0) {\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\"\n }\n }\n else if(!inputs.memory_per_job && inputs.memory_overhead){\n return \"-Xmx15G\"\n }\n else {\n return \"-Xmx15G\"\n }\n}" + }, + { + "position": 0, + "prefix": "--TMP_DIR", + "valueFrom": "${\n if(inputs.temporary_directory)\n return inputs.temporary_directory;\n return runtime.tmpdir\n}" + }, + { + "position": 2, + "prefix": "-O", + "valueFrom": "${\n if(inputs.output_file_name){\n return inputs.output_file_name\n } else {\n return inputs.input.basename.replace(/.bam/, '_insert_size_metrics.txt')\n }\n}" + }, + { + "position": 2, + "prefix": "-H", + "valueFrom": "${\n if(inputs.histogram_file){\n return inputs.histogram_file\n } else {\n return inputs.input.basename.replace(/.bam/, '_histogram.pdf')\n }\n}" + } + ], + "requirements": [ + { + "class": "ResourceRequirement", + "ramMin": 32000, + "coresMin": 1 + }, + { + "class": "DockerRequirement", + "dockerPull": "ghcr.io/msk-access/gatk:4.1.8.0" + }, + { + "class": "InlineJavascriptRequirement" + } + ], + "http://purl.org/dc/terms/contributor": [ + { + "class": "http://xmlns.com/foaf/0.1/Organization", + "http://xmlns.com/foaf/0.1/member": [ + { + "class": "http://xmlns.com/foaf/0.1/Person", + "http://xmlns.com/foaf/0.1/mbox": "mailto:murphyc4@mskcc.org", + "http://xmlns.com/foaf/0.1/name": "Charles Murphy" + } + ], + "http://xmlns.com/foaf/0.1/name": "Memorial Sloan Kettering Cancer Center" + } + ], + "http://purl.org/dc/terms/creator": [ + { + "class": "http://xmlns.com/foaf/0.1/Organization", + "http://xmlns.com/foaf/0.1/member": [ + { + "class": "http://xmlns.com/foaf/0.1/Person", + "http://xmlns.com/foaf/0.1/mbox": "mailto:murphyc4@mskcc.org", + "http://xmlns.com/foaf/0.1/name": "Charles Murphy" + } + ], + "http://xmlns.com/foaf/0.1/name": "Memorial Sloan Kettering Cancer Center" + } + ], + "http://usefulinc.com/ns/doap#release": [ + { + "class": "http://usefulinc.com/ns/doap#Version", + "http://usefulinc.com/ns/doap#name": "gatk4", + "http://usefulinc.com/ns/doap#revision": "4.1.8.0" + } + ] + }, + { + "class": "CommandLineTool", + "id": "#getbasecountsmultisample_1.2.5.cwl", + "baseCommand": [ + "GetBaseCountsMultiSample" + ], + "inputs": [ + { + "id": "#getbasecountsmultisample_1.2.5.cwl/memory_per_job", + "type": [ + "null", + "int" + ], + "doc": "Memory per job in megabytes" + }, + { + "id": "#getbasecountsmultisample_1.2.5.cwl/memory_overhead", + "type": [ + "null", + "int" + ], + "doc": "Memory overhead per job in megabytes" + }, + { + "id": "#getbasecountsmultisample_1.2.5.cwl/number_of_threads", + "type": [ + "null", + "int" + ] + }, + { + "id": "#getbasecountsmultisample_1.2.5.cwl/genotyping_bams", + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "doc": "Input bam file" + }, + { + "id": "#getbasecountsmultisample_1.2.5.cwl/genotyping_bams_ids", + "type": [ + "string", + { + "type": "array", + "items": "string" + } + ], + "doc": "Input bam, sample identifier to be used for \"Tumor Sample Barcode\" for maf or Sample name in the header for vcf" + }, + { + "id": "#getbasecountsmultisample_1.2.5.cwl/filter_duplicate", + "type": "int", + "inputBinding": { + "position": 0, + "prefix": "--filter_duplicate" + }, + "doc": "Whether to filter reads that are marked as duplicate. 0=off, 1=on. Default 1" + }, + { + "id": "#getbasecountsmultisample_1.2.5.cwl/fragment_count", + "type": "int", + "inputBinding": { + "position": 0, + "prefix": "--fragment_count" + }, + "doc": "Whether to output fragment read counts DPF/RDF/ADF. 0=off, 1=on. Default 0" + }, + { + "id": "#getbasecountsmultisample_1.2.5.cwl/maf", + "type": "File", + "inputBinding": { + "position": 0, + "prefix": "--maf" + }, + "doc": "Input variant file in TCGA maf format. --maf or --vcf need to be specified at least once. But --maf and --vcf are mutually exclusive" + }, + { + "id": "#getbasecountsmultisample_1.2.5.cwl/maq", + "type": [ + "null", + "int" + ], + "inputBinding": { + "position": 0, + "prefix": "--maq" + }, + "doc": "Mapping quality threshold. Default 20" + }, + { + "id": "#getbasecountsmultisample_1.2.5.cwl/omaf", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--omaf" + }, + "doc": "Output the result in maf format" + }, + { + "id": "#getbasecountsmultisample_1.2.5.cwl/output", + "type": [ + "null", + "string" + ], + "inputBinding": { + "position": 0, + "prefix": "--output", + "valueFrom": "${\n if (inputs.output) {\n return inputs.output\n } else if (inputs.genotyping_bams.length) {\n return inputs.maf.basename.replace('.maf', '_fillout.maf')\n } else {\n return inputs.genotyping_bams.basename.replace('.bam', '_fillout.maf')\n }\n}" + }, + "doc": "Filename for output of raw fillout data in MAF/VCF format" + }, + { + "id": "#getbasecountsmultisample_1.2.5.cwl/ref_fasta", + "type": "File", + "inputBinding": { + "position": 0, + "prefix": "--fasta" + }, + "doc": "Input reference sequence file" + }, + { + "id": "#getbasecountsmultisample_1.2.5.cwl/vcf", + "type": [ + "null", + "File" + ], + "inputBinding": { + "position": 0, + "prefix": "--vcf" + }, + "doc": "Input variant file in vcf-like format(the first 5 columns are used). --maf or --vcf need to be specified at least once. But --maf and --vcf are mutually exclusive" + }, + { + "id": "#getbasecountsmultisample_1.2.5.cwl/generic_counting", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--generic_counting" + }, + "doc": "Use the newly implemented generic counting algorithm. Works better for complex variants. You may get different allele count result from the default counting algorithm" + } + ], + "outputs": [ + { + "id": "#getbasecountsmultisample_1.2.5.cwl/fillout", + "type": "File", + "outputBinding": { + "glob": "${\n if (inputs.output) {\n return inputs.output\n } else if (inputs.genotyping_bams.length) {\n return inputs.maf.basename.replace('.maf', '_fillout.maf')\n } else {\n return inputs.genotyping_bams.basename.replace('.bam', '_fillout.maf')\n }\n}" + } + } + ], + "label": "getbasecountsmultisample_1.2.5", + "arguments": [ + { + "position": 0, + "prefix": "", + "shellQuote": false, + "valueFrom": "$('--bam_fof bam_fof.tsv')\n" + }, + { + "position": 0, + "prefix": "--thread", + "valueFrom": "$(runtime.cores)" + } + ], + "requirements": [ + { + "class": "ShellCommandRequirement" + }, + { + "class": "ResourceRequirement", + "ramMin": 16000, + "coresMin": 2 + }, + { + "class": "DockerRequirement", + "dockerPull": "ghcr.io/msk-access/gbcms:1.2.5" + }, + { + "class": "InitialWorkDirRequirement", + "listing": [ + { + "entryname": "bam_fof.tsv", + "entry": "${\n if (typeof(inputs.genotyping_bams_ids) == 'object') {\n return inputs.genotyping_bams_ids.map(function(sid, i) {\n return sid + \"\\t\" +\n inputs.genotyping_bams[i].path\n }).join(\"\\n\")\n } else {\n return inputs.genotyping_bams_ids + \"\\t\" + inputs.genotyping_bams.path + \"\\n\"\n }\n}", + "writable": false + } + ] + }, + { + "class": "InlineJavascriptRequirement" + }, + { + "class": "StepInputExpressionRequirement" + } + ], + "http://purl.org/dc/terms/contributor": [ + { + "class": "http://xmlns.com/foaf/0.1/Organization", + "http://xmlns.com/foaf/0.1/member": [ + { + "class": "http://xmlns.com/foaf/0.1/Person", + "http://xmlns.com/foaf/0.1/mbox": "mailto:shahr2@mskcc.org", + "http://xmlns.com/foaf/0.1/name": "Ronak Shah" + } + ], + "http://xmlns.com/foaf/0.1/name": "Memorial Sloan Kettering Cancer Center" + } + ], + "http://purl.org/dc/terms/creator": [ + { + "class": "http://xmlns.com/foaf/0.1/Organization", + "http://xmlns.com/foaf/0.1/member": [ + { + "class": "http://xmlns.com/foaf/0.1/Person", + "http://xmlns.com/foaf/0.1/mbox": "mailto:johnsoni@mskcc.org", + "http://xmlns.com/foaf/0.1/name": "Ian Johnson" + } + ], + "http://xmlns.com/foaf/0.1/name": "Memorial Sloan Kettering Cancer Center" + } + ], + "http://usefulinc.com/ns/doap#release": [ + { + "class": "http://usefulinc.com/ns/doap#Version", + "http://usefulinc.com/ns/doap#name": "GetBaseCountsMultiSample", + "http://usefulinc.com/ns/doap#revision": "1.2.5" + } + ] + }, + { + "class": "CommandLineTool", + "id": "#sequence_qc_0.2.3.cwl", + "baseCommand": [ + "calculate_noise" + ], + "inputs": [ + { + "id": "#sequence_qc_0.2.3.cwl/reference", + "type": "File", + "inputBinding": { + "position": 0, + "prefix": "--ref_fasta" + }, + "secondaryFiles": [ + "^.fasta.fai" + ], + "doc": "Path to reference fasta, containing all regions in bed_file" + }, + { + "id": "#sequence_qc_0.2.3.cwl/bam_file", + "type": "File", + "inputBinding": { + "position": 0, + "prefix": "--bam_file" + }, + "secondaryFiles": [ + "^.bai" + ], + "doc": "Path to BAM file for calculating noise [required]" + }, + { + "id": "#sequence_qc_0.2.3.cwl/bed_file", + "type": "File", + "inputBinding": { + "position": 0, + "prefix": "--bed_file" + }, + "doc": "Path to BED file containing regions over which to calculate noise [required]" + }, + { + "id": "#sequence_qc_0.2.3.cwl/sample_id", + "type": "string", + "inputBinding": { + "position": 0, + "prefix": "--sample_id" + }, + "doc": "Prefix to include in all output file names" + }, + { + "id": "#sequence_qc_0.2.3.cwl/threshold", + "type": [ + "null", + "float" + ], + "inputBinding": { + "position": 0, + "prefix": "--threshold" + }, + "doc": "Alt allele frequency past which to ignore positions from the calculation." + }, + { + "id": "#sequence_qc_0.2.3.cwl/truncate", + "type": [ + "null", + "int" + ], + "inputBinding": { + "position": 0, + "prefix": "--truncate" + }, + "doc": "Whether to exclude trailing bases from reads that only partially overlap the bed file (0 or 1)" + }, + { + "id": "#sequence_qc_0.2.3.cwl/min_mapq", + "type": [ + "null", + "int" + ], + "inputBinding": { + "position": 0, + "prefix": "--min_mapq" + }, + "doc": "Exclude reads with a lower mapping quality" + }, + { + "id": "#sequence_qc_0.2.3.cwl/min_basq", + "type": [ + "null", + "int" + ], + "inputBinding": { + "position": 0, + "prefix": "--min_basq" + }, + "doc": "Exclude bases with a lower base quality" + } + ], + "outputs": [ + { + "id": "#sequence_qc_0.2.3.cwl/sequence_qc_pileup", + "type": "File", + "outputBinding": { + "glob": "${\n return inputs.sample_id + '_pileup.tsv'\n}" + } + }, + { + "id": "#sequence_qc_0.2.3.cwl/sequence_qc_noise_positions", + "type": "File", + "outputBinding": { + "glob": "${\n return inputs.sample_id + '_noise_positions.tsv'\n}" + } + }, + { + "id": "#sequence_qc_0.2.3.cwl/sequence_qc_noise_by_substitution", + "type": "File", + "outputBinding": { + "glob": "${\n return inputs.sample_id + '_noise_by_substitution.tsv'\n}" + } + }, + { + "id": "#sequence_qc_0.2.3.cwl/sequence_qc_noise_acgt", + "type": "File", + "outputBinding": { + "glob": "${\n return inputs.sample_id + '_noise_acgt.tsv'\n}" + } + }, + { + "id": "#sequence_qc_0.2.3.cwl/sequence_qc_noise_n", + "type": "File", + "outputBinding": { + "glob": "${\n return inputs.sample_id + '_noise_n.tsv'\n}" + } + }, + { + "id": "#sequence_qc_0.2.3.cwl/sequence_qc_noise_del", + "type": "File", + "outputBinding": { + "glob": "${\n return inputs.sample_id + '_noise_del.tsv'\n}" + } + }, + { + "id": "#sequence_qc_0.2.3.cwl/sequence_qc_figures", + "type": "File", + "outputBinding": { + "glob": "${\n return inputs.sample_id + '_noise.html'\n}" + } + } + ], + "requirements": [ + { + "class": "ResourceRequirement", + "ramMin": 8000, + "coresMin": 1 + }, + { + "class": "DockerRequirement", + "dockerPull": "ghcr.io/msk-access/sequence_qc:0.2.3" + }, + { + "class": "InlineJavascriptRequirement" + }, + { + "class": "EnvVarRequirement", + "envDef": [ + { + "envValue": "en_US.utf-8", + "envName": "LANG" + }, + { + "envValue": "en_US.utf-8", + "envName": "LC_ALL" + } + ] + } + ], + "http://purl.org/dc/terms/contributor": [ + { + "class": "http://xmlns.com/foaf/0.1/Organization", + "http://xmlns.com/foaf/0.1/member": [ + { + "class": "http://xmlns.com/foaf/0.1/Person", + "http://xmlns.com/foaf/0.1/mbox": "mailto:murphyc4@mskcc.org", + "http://xmlns.com/foaf/0.1/name": "Charlie Murphy" + } + ], + "http://xmlns.com/foaf/0.1/name": "Memorial Sloan Kettering Cancer Center" + } + ], + "http://purl.org/dc/terms/creator": [ + { + "class": "http://xmlns.com/foaf/0.1/Organization", + "http://xmlns.com/foaf/0.1/member": [ + { + "class": "http://xmlns.com/foaf/0.1/Person", + "http://xmlns.com/foaf/0.1/mbox": "mailto:murphyc4@mskcc.org", + "http://xmlns.com/foaf/0.1/name": "Charlie Murphy" + } + ], + "http://xmlns.com/foaf/0.1/name": "Memorial Sloan Kettering Cancer Center" + } + ], + "http://usefulinc.com/ns/doap#release": [ + { + "class": "http://usefulinc.com/ns/doap#Version", + "http://usefulinc.com/ns/doap#name": "sesquence_qc", + "http://usefulinc.com/ns/doap#revision": "0.2.3" + } + ] + }, + { + "class": "Workflow", + "id": "#main", + "label": "qc_duplex", + "inputs": [ + { + "id": "#reference", + "type": "File", + "doc": "Path to reference fasta, containing all regions in bed_file", + "secondaryFiles": [ + "^.fasta.fai" + ], + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 903.75 + }, + { + "id": "#duplex_bam", + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "label": "duplex_bam", + "secondaryFiles": [ + "^.bai" + ], + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 2399.5625 + }, + { + "id": "#pool_a_target_intervals", + "type": "File", + "label": "pool_a_target_intervals", + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 1331.125 + }, + { + "id": "#pool_a_bait_intervals", + "type": "File", + "label": "pool_a_bait_intervals", + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 1437.96875 + }, + { + "id": "#pool_b_target_intervals", + "type": "File", + "label": "pool_b_target_intervals", + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 1117.4375 + }, + { + "id": "#pool_b_bait_intervals", + "type": "File", + "label": "pool_b_bait_intervals", + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 1224.28125 + }, + { + "id": "#noise_sites_bed", + "type": "File", + "label": "noise_sites_bed", + "doc": "Path to BED file containing regions over which to calculate noise [required]", + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 1651.65625 + }, + { + "id": "#sample_name", + "type": [ + "null", + "string", + { + "type": "array", + "items": "string" + } + ], + "doc": "Sample name. If not specified, sample name is automatically figured out from the BAM file.", + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 690.0625 + }, + { + "id": "#plot", + "type": [ + "null", + "boolean" + ], + "doc": "Also output plots of the data.", + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 1544.8125 + }, + { + "id": "#json", + "type": [ + "null", + "boolean" + ], + "doc": "Also output data in JSON format.", + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 1972.1875 + }, + { + "id": "#sequence_qc_min_basq", + "type": [ + "null", + "int" + ], + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 476.375 + }, + { + "id": "#sequence_qc_min_mapq", + "type": [ + "null", + "int" + ], + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 369.53125 + }, + { + "id": "#sequence_qc_threshold", + "type": [ + "null", + "float" + ], + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 262.6875 + }, + { + "id": "#sequence_qc_truncate", + "type": [ + "null", + "int" + ], + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 155.84375 + }, + { + "id": "#hsmetrics_minimum_mapping_quality", + "type": [ + "null", + "int" + ], + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 2079.03125 + }, + { + "id": "#hsmetrics_minimum_base_quality", + "type": [ + "null", + "int" + ], + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 2185.875 + }, + { + "id": "#hsmetrics_coverage_cap", + "type": [ + "null", + "int" + ], + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 2292.71875 + }, + { + "id": "#prefix", + "type": [ + "null", + "string" + ], + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 1010.59375 + }, + { + "id": "#major_threshold", + "type": [ + "null", + "float" + ], + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 1758.5 + }, + { + "id": "#vcf_file", + "type": "File", + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 49 + }, + { + "id": "#sample_sex", + "type": [ + "null", + "string" + ], + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 583.21875 + }, + { + "id": "#sample_group", + "type": [ + "null", + "string" + ], + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 796.90625 + }, + { + "id": "#maf", + "type": "File", + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 1865.34375 + } + ], + "outputs": [ + { + "id": "#sequence_qc_noise_positions", + "outputSource": [ + "#calculate_noise/sequence_qc_noise_positions" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "https://www.sevenbridges.com/x": 982.1435546875, + "https://www.sevenbridges.com/y": 106.84375 + }, + { + "id": "#sequence_qc_noise_n", + "outputSource": [ + "#calculate_noise/sequence_qc_noise_n" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "https://www.sevenbridges.com/x": 982.1435546875, + "https://www.sevenbridges.com/y": 213.6875 + }, + { + "id": "#sequence_qc_noise_del", + "outputSource": [ + "#calculate_noise/sequence_qc_noise_del" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "https://www.sevenbridges.com/x": 982.1435546875, + "https://www.sevenbridges.com/y": 320.53125 + }, + { + "id": "#sequence_qc_noise_acgt", + "outputSource": [ + "#calculate_noise/sequence_qc_noise_acgt" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "https://www.sevenbridges.com/x": 982.1435546875, + "https://www.sevenbridges.com/y": 534.21875 + }, + { + "id": "#sequence_qc_figures", + "outputSource": [ + "#calculate_noise/sequence_qc_figures" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "https://www.sevenbridges.com/x": 982.1435546875, + "https://www.sevenbridges.com/y": 641.0625 + }, + { + "id": "#gatk_collect_alignment_summary_metrics_txt_pool_b", + "outputSource": [ + "#bam_qc_stats_pool_b/gatk_collect_alignment_summary_metrics_txt" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "label": "gatk_collect_alignment_summary_metrics_txt_pool_b", + "https://www.sevenbridges.com/x": 982.1435546875, + "https://www.sevenbridges.com/y": 1816.34375 + }, + { + "id": "#gatk_collect_hs_metrics_per_base_coverage_txt_pool_b", + "outputSource": [ + "#bam_qc_stats_pool_b/gatk_collect_hs_metrics_per_base_coverage_txt" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "label": "gatk_collect_hs_metrics_per_base_coverage_txt_pool_b", + "https://www.sevenbridges.com/x": 982.1435546875, + "https://www.sevenbridges.com/y": 1602.65625 + }, + { + "id": "#gatk_collect_hs_metrics_per_target_coverage_txt_pool_b", + "outputSource": [ + "#bam_qc_stats_pool_b/gatk_collect_hs_metrics_per_target_coverage_txt" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "label": "gatk_collect_hs_metrics_per_target_coverage_txt_pool_b", + "https://www.sevenbridges.com/x": 982.1435546875, + "https://www.sevenbridges.com/y": 1388.96875 + }, + { + "id": "#gatk_collect_hs_metrics_txt_pool_b", + "outputSource": [ + "#bam_qc_stats_pool_b/gatk_collect_hs_metrics_txt" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "label": "gatk_collect_hs_metrics_txt_pool_b", + "https://www.sevenbridges.com/x": 982.1435546875, + "https://www.sevenbridges.com/y": 1175.28125 + }, + { + "id": "#gatk_collect_insert_size_metrics_histogram_pdf_pool_b", + "outputSource": [ + "#bam_qc_stats_pool_b/gatk_collect_insert_size_metrics_histogram_pdf" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "label": "gatk_collect_insert_size_metrics_histogram_pdf_pool_b", + "https://www.sevenbridges.com/x": 982.1435546875, + "https://www.sevenbridges.com/y": 961.59375 + }, + { + "id": "#gatk_collect_insert_size_metrics_txt_pool_b", + "outputSource": [ + "#bam_qc_stats_pool_b/gatk_collect_insert_size_metrics_txt" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "label": "gatk_collect_insert_size_metrics_txt_pool_b", + "https://www.sevenbridges.com/x": 982.1435546875, + "https://www.sevenbridges.com/y": 747.90625 + }, + { + "id": "#gatk_collect_alignment_summary_metrics_txt_pool_a", + "outputSource": [ + "#bam_qc_stats_pool_a/gatk_collect_alignment_summary_metrics_txt" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "label": "gatk_collect_alignment_summary_metrics_txt_pool_a", + "https://www.sevenbridges.com/x": 982.1435546875, + "https://www.sevenbridges.com/y": 1923.1875 + }, + { + "id": "#gatk_collect_hs_metrics_per_base_coverage_txt_pool_a", + "outputSource": [ + "#bam_qc_stats_pool_a/gatk_collect_hs_metrics_per_base_coverage_txt" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "label": "gatk_collect_hs_metrics_per_base_coverage_txt_pool_a", + "https://www.sevenbridges.com/x": 982.1435546875, + "https://www.sevenbridges.com/y": 1709.5 + }, + { + "id": "#gatk_collect_hs_metrics_per_target_coverage_txt_pool_a", + "outputSource": [ + "#bam_qc_stats_pool_a/gatk_collect_hs_metrics_per_target_coverage_txt" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "label": "gatk_collect_hs_metrics_per_target_coverage_txt_pool_a", + "https://www.sevenbridges.com/x": 982.1435546875, + "https://www.sevenbridges.com/y": 1495.8125 + }, + { + "id": "#gatk_collect_hs_metrics_txt_pool_a", + "outputSource": [ + "#bam_qc_stats_pool_a/gatk_collect_hs_metrics_txt" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "label": "gatk_collect_hs_metrics_txt_pool_a", + "https://www.sevenbridges.com/x": 982.1435546875, + "https://www.sevenbridges.com/y": 1282.125 + }, + { + "id": "#gatk_collect_insert_size_metrics_histogram_pdf_pool_a", + "outputSource": [ + "#bam_qc_stats_pool_a/gatk_collect_insert_size_metrics_histogram_pdf" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "label": "gatk_collect_insert_size_metrics_histogram_pdf_pool_a", + "https://www.sevenbridges.com/x": 982.1435546875, + "https://www.sevenbridges.com/y": 1068.4375 + }, + { + "id": "#gatk_collect_insert_size_metrics_txt_pool_a", + "outputSource": [ + "#bam_qc_stats_pool_a/gatk_collect_insert_size_metrics_txt" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "label": "gatk_collect_insert_size_metrics_txt_pool_a", + "https://www.sevenbridges.com/x": 982.1435546875, + "https://www.sevenbridges.com/y": 854.75 + }, + { + "id": "#sequence_qc_pileup", + "outputSource": [ + "#calculate_noise/sequence_qc_pileup" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "https://www.sevenbridges.com/x": 982.1435546875, + "https://www.sevenbridges.com/y": 0 + }, + { + "id": "#sequence_qc_noise_by_substitution", + "outputSource": [ + "#calculate_noise/sequence_qc_noise_by_substitution" + ], + "type": "File", + "https://www.sevenbridges.com/x": 982.1435546875, + "https://www.sevenbridges.com/y": 427.375 + }, + { + "id": "#biometrics_major_plot", + "outputSource": [ + "#biometrics_major_0_2_13/biometrics_major_plot" + ], + "type": [ + "null", + "File" + ], + "https://www.sevenbridges.com/x": 1495.5341796875, + "https://www.sevenbridges.com/y": 1331.125 + }, + { + "id": "#biometrics_major_json", + "outputSource": [ + "#biometrics_major_0_2_13/biometrics_major_json" + ], + "type": [ + "null", + "File" + ], + "https://www.sevenbridges.com/x": 1495.5341796875, + "https://www.sevenbridges.com/y": 1437.96875 + }, + { + "id": "#biometrics_major_csv", + "outputSource": [ + "#biometrics_major_0_2_13/biometrics_major_csv" + ], + "type": "File", + "https://www.sevenbridges.com/x": 1495.5341796875, + "https://www.sevenbridges.com/y": 1544.8125 + }, + { + "id": "#biometrics_extract_pickle", + "outputSource": [ + "#biometrics_extract_0_2_13/biometrics_extract_pickle" + ], + "type": "File", + "https://www.sevenbridges.com/x": 982.1435546875, + "https://www.sevenbridges.com/y": 2448.5625 + }, + { + "id": "#biometrics_minor_sites_plot", + "outputSource": [ + "#biometrics_minor_0_2_13/biometrics_minor_sites_plot" + ], + "type": [ + "null", + "File" + ], + "https://www.sevenbridges.com/x": 1495.5341796875, + "https://www.sevenbridges.com/y": 903.75 + }, + { + "id": "#biometrics_minor_plot", + "outputSource": [ + "#biometrics_minor_0_2_13/biometrics_minor_plot" + ], + "type": [ + "null", + "File" + ], + "https://www.sevenbridges.com/x": 1495.5341796875, + "https://www.sevenbridges.com/y": 1010.59375 + }, + { + "id": "#biometrics_minor_json", + "outputSource": [ + "#biometrics_minor_0_2_13/biometrics_minor_json" + ], + "type": [ + "null", + "File" + ], + "https://www.sevenbridges.com/x": 1495.5341796875, + "https://www.sevenbridges.com/y": 1117.4375 + }, + { + "id": "#biometrics_minor_csv", + "outputSource": [ + "#biometrics_minor_0_2_13/biometrics_minor_csv" + ], + "type": "File", + "https://www.sevenbridges.com/x": 1495.5341796875, + "https://www.sevenbridges.com/y": 1224.28125 + }, + { + "id": "#fillout_maf", + "outputSource": [ + "#getbasecountsmultisample_1_2_5/fillout" + ], + "type": "File", + "https://www.sevenbridges.com/x": 982.1435546875, + "https://www.sevenbridges.com/y": 2030.03125 + } + ], + "steps": [ + { + "id": "#bam_qc_stats_pool_a", + "in": [ + { + "id": "#bam_qc_stats_pool_a/input", + "source": [ + "#duplex_bam" + ] + }, + { + "id": "#bam_qc_stats_pool_a/target_intervals", + "source": "#pool_a_target_intervals" + }, + { + "id": "#bam_qc_stats_pool_a/bait_intervals", + "source": "#pool_a_bait_intervals" + }, + { + "id": "#bam_qc_stats_pool_a/reference", + "source": "#reference" + }, + { + "id": "#bam_qc_stats_pool_a/hsmetrics_minimum_mapping_quality", + "source": "#hsmetrics_minimum_mapping_quality" + }, + { + "id": "#bam_qc_stats_pool_a/hsmetrics_minimum_base_quality", + "source": "#hsmetrics_minimum_base_quality" + }, + { + "id": "#bam_qc_stats_pool_a/hsmetrics_coverage_cap", + "source": "#hsmetrics_coverage_cap" + } + ], + "out": [ + { + "id": "#bam_qc_stats_pool_a/gatk_collect_insert_size_metrics_histogram_pdf" + }, + { + "id": "#bam_qc_stats_pool_a/gatk_collect_insert_size_metrics_txt" + }, + { + "id": "#bam_qc_stats_pool_a/gatk_collect_hs_metrics_txt" + }, + { + "id": "#bam_qc_stats_pool_a/gatk_collect_hs_metrics_per_base_coverage_txt" + }, + { + "id": "#bam_qc_stats_pool_a/gatk_collect_hs_metrics_per_target_coverage_txt" + }, + { + "id": "#bam_qc_stats_pool_a/gatk_collect_alignment_summary_metrics_txt" + } + ], + "run": "#bam_qc_stats.cwl", + "label": "bam_qc_stats_pool_a", + "https://www.sevenbridges.com/x": 351.4375, + "https://www.sevenbridges.com/y": 1563.96875 + }, + { + "id": "#calculate_noise", + "in": [ + { + "id": "#calculate_noise/reference", + "source": "#reference" + }, + { + "id": "#calculate_noise/bam_file", + "source": "#duplex_bam" + }, + { + "id": "#calculate_noise/bed_file", + "source": "#noise_sites_bed" + }, + { + "id": "#calculate_noise/sample_id", + "source": "#sample_name" + }, + { + "id": "#calculate_noise/threshold", + "source": "#sequence_qc_threshold" + }, + { + "id": "#calculate_noise/truncate", + "source": "#sequence_qc_truncate" + }, + { + "id": "#calculate_noise/min_mapq", + "source": "#sequence_qc_min_mapq" + }, + { + "id": "#calculate_noise/min_basq", + "source": "#sequence_qc_min_basq" + } + ], + "out": [ + { + "id": "#calculate_noise/sequence_qc_pileup" + }, + { + "id": "#calculate_noise/sequence_qc_noise_positions" + }, + { + "id": "#calculate_noise/sequence_qc_noise_by_substitution" + }, + { + "id": "#calculate_noise/sequence_qc_noise_acgt" + }, + { + "id": "#calculate_noise/sequence_qc_noise_n" + }, + { + "id": "#calculate_noise/sequence_qc_noise_del" + }, + { + "id": "#calculate_noise/sequence_qc_figures" + } + ], + "run": "#sequence_qc_0.2.3.cwl", + "https://www.sevenbridges.com/x": 351.4375, + "https://www.sevenbridges.com/y": 998.4375 + }, + { + "id": "#bam_qc_stats_pool_b", + "in": [ + { + "id": "#bam_qc_stats_pool_b/input", + "source": [ + "#duplex_bam" + ] + }, + { + "id": "#bam_qc_stats_pool_b/target_intervals", + "source": "#pool_b_target_intervals" + }, + { + "id": "#bam_qc_stats_pool_b/bait_intervals", + "source": "#pool_b_bait_intervals" + }, + { + "id": "#bam_qc_stats_pool_b/reference", + "source": "#reference" + }, + { + "id": "#bam_qc_stats_pool_b/hsmetrics_minimum_mapping_quality", + "source": "#hsmetrics_minimum_mapping_quality" + }, + { + "id": "#bam_qc_stats_pool_b/hsmetrics_minimum_base_quality", + "source": "#hsmetrics_minimum_base_quality" + }, + { + "id": "#bam_qc_stats_pool_b/hsmetrics_coverage_cap", + "source": "#hsmetrics_coverage_cap" + } + ], + "out": [ + { + "id": "#bam_qc_stats_pool_b/gatk_collect_insert_size_metrics_histogram_pdf" + }, + { + "id": "#bam_qc_stats_pool_b/gatk_collect_insert_size_metrics_txt" + }, + { + "id": "#bam_qc_stats_pool_b/gatk_collect_hs_metrics_txt" + }, + { + "id": "#bam_qc_stats_pool_b/gatk_collect_hs_metrics_per_base_coverage_txt" + }, + { + "id": "#bam_qc_stats_pool_b/gatk_collect_hs_metrics_per_target_coverage_txt" + }, + { + "id": "#bam_qc_stats_pool_b/gatk_collect_alignment_summary_metrics_txt" + } + ], + "run": "#bam_qc_stats.cwl", + "label": "bam_qc_stats_pool_b", + "https://www.sevenbridges.com/x": 351.4375, + "https://www.sevenbridges.com/y": 1373.125 + }, + { + "id": "#biometrics_major_0_2_13", + "in": [ + { + "id": "#biometrics_major_0_2_13/input", + "linkMerge": "merge_nested", + "source": [ + "#biometrics_extract_0_2_13/biometrics_extract_pickle" + ] + }, + { + "id": "#biometrics_major_0_2_13/major_threshold", + "source": "#major_threshold" + }, + { + "id": "#biometrics_major_0_2_13/prefix", + "source": "#prefix" + }, + { + "id": "#biometrics_major_0_2_13/plot", + "source": "#plot" + }, + { + "id": "#biometrics_major_0_2_13/json", + "source": "#json" + } + ], + "out": [ + { + "id": "#biometrics_major_0_2_13/biometrics_major_csv" + }, + { + "id": "#biometrics_major_0_2_13/biometrics_major_json" + }, + { + "id": "#biometrics_major_0_2_13/biometrics_major_plot" + } + ], + "run": "#biometrics_major.cwl", + "https://www.sevenbridges.com/x": 982.1435546875, + "https://www.sevenbridges.com/y": 2313.71875 + }, + { + "id": "#biometrics_extract_0_2_13", + "in": [ + { + "id": "#biometrics_extract_0_2_13/sample_bam", + "source": "#duplex_bam" + }, + { + "id": "#biometrics_extract_0_2_13/sample_sex", + "source": "#sample_sex" + }, + { + "id": "#biometrics_extract_0_2_13/sample_group", + "source": "#sample_group" + }, + { + "id": "#biometrics_extract_0_2_13/sample_name", + "source": "#sample_name" + }, + { + "id": "#biometrics_extract_0_2_13/fafile", + "source": "#reference" + }, + { + "id": "#biometrics_extract_0_2_13/vcf_file", + "source": "#vcf_file" + }, + { + "id": "#biometrics_extract_0_2_13/min_coverage", + "default": 200 + } + ], + "out": [ + { + "id": "#biometrics_extract_0_2_13/biometrics_extract_pickle" + } + ], + "run": "#biometrics_extract.cwl", + "https://www.sevenbridges.com/x": 351.4375, + "https://www.sevenbridges.com/y": 1189.28125 + }, + { + "id": "#biometrics_minor_0_2_13", + "in": [ + { + "id": "#biometrics_minor_0_2_13/input", + "linkMerge": "merge_nested", + "source": [ + "#biometrics_extract_0_2_13/biometrics_extract_pickle" + ] + }, + { + "id": "#biometrics_minor_0_2_13/prefix", + "source": "#prefix" + }, + { + "id": "#biometrics_minor_0_2_13/plot", + "source": "#plot" + }, + { + "id": "#biometrics_minor_0_2_13/json", + "source": "#json" + } + ], + "out": [ + { + "id": "#biometrics_minor_0_2_13/biometrics_minor_csv" + }, + { + "id": "#biometrics_minor_0_2_13/biometrics_minor_json" + }, + { + "id": "#biometrics_minor_0_2_13/biometrics_minor_plot" + }, + { + "id": "#biometrics_minor_0_2_13/biometrics_minor_sites_plot" + } + ], + "run": "#biometrics_minor.cwl", + "https://www.sevenbridges.com/x": 982.1435546875, + "https://www.sevenbridges.com/y": 2157.875 + }, + { + "id": "#getbasecountsmultisample_1_2_5", + "in": [ + { + "id": "#getbasecountsmultisample_1_2_5/genotyping_bams", + "source": [ + "#duplex_bam" + ] + }, + { + "id": "#getbasecountsmultisample_1_2_5/genotyping_bams_ids", + "source": [ + "#sample_name" + ] + }, + { + "id": "#getbasecountsmultisample_1_2_5/filter_duplicate", + "default": 0 + }, + { + "id": "#getbasecountsmultisample_1_2_5/fragment_count", + "default": 1 + }, + { + "id": "#getbasecountsmultisample_1_2_5/maf", + "source": "#maf" + }, + { + "id": "#getbasecountsmultisample_1_2_5/output", + "source": "#sample_name", + "valueFrom": "$(self + '_duplex_hotspots_fillout.maf')" + }, + { + "id": "#getbasecountsmultisample_1_2_5/ref_fasta", + "source": "#reference" + } + ], + "out": [ + { + "id": "#getbasecountsmultisample_1_2_5/fillout" + } + ], + "run": "#getbasecountsmultisample_1.2.5.cwl", + "label": "getbasecountsmultisample_1.2.5", + "https://www.sevenbridges.com/x": 351.4375, + "https://www.sevenbridges.com/y": 814.59375 + } + ], + "requirements": [ + { + "class": "SubworkflowFeatureRequirement" + }, + { + "class": "InlineJavascriptRequirement" + } + ] + } + ], + "cwlVersion": "v1.0", + "$schemas": [ + "http://schema.org/version/latest/schemaorg-current-http.rdf" + ] +} \ No newline at end of file diff --git a/qc_simplex_bam/README.md b/qc_simplex_bam/README.md new file mode 100644 index 0000000..80cc2bd --- /dev/null +++ b/qc_simplex_bam/README.md @@ -0,0 +1,41 @@ +### Introduction +The sub-workflow calculates quality control metrics for simplex BAMs. The main outputs are the following: + +1. Targeted capture metrics. +2. Insert size metrics. +3. Alignment metrics. + +**Note:** This sub-workflow was originally designed for MSK-ACCESS data. Hence, in addition to the collapsed BAM, it expects two sets of bait/target regions (referred to as pool A and pool B for MSK-ACCESS). + +### Tools used: + +- [bam_qc_stats](../bam_qc_stats/README.md) + +### Usage + +```bash +usage: qc_simplex_bam.cwl [-h] --reference REFERENCE --simplex_bam SIMPLEX_BAM + --pool_b_target_intervals POOL_B_TARGET_INTERVALS + --pool_b_bait_intervals POOL_B_BAIT_INTERVALS + --pool_a_bait_intervals POOL_A_BAIT_INTERVALS + --pool_a_target_intervals POOL_A_TARGET_INTERVALS + [--hsmetrics_minimum_mapping_quality HSMETRICS_MINIMUM_MAPPING_QUALITY] + [--hsmetrics_minimum_base_quality HSMETRICS_MINIMUM_BASE_QUALITY] + [--hsmetrics_coverage_cap HSMETRICS_COVERAGE_CAP] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --reference REFERENCE + --simplex_bam SIMPLEX_BAM + --pool_b_target_intervals POOL_B_TARGET_INTERVALS + --pool_b_bait_intervals POOL_B_BAIT_INTERVALS + --pool_a_bait_intervals POOL_A_BAIT_INTERVALS + --pool_a_target_intervals POOL_A_TARGET_INTERVALS + --hsmetrics_minimum_mapping_quality HSMETRICS_MINIMUM_MAPPING_QUALITY + --hsmetrics_minimum_base_quality HSMETRICS_MINIMUM_BASE_QUALITY + --hsmetrics_coverage_cap HSMETRICS_COVERAGE_CAP +``` diff --git a/qc_simplex_bam/qc_simplex_bam.cwl b/qc_simplex_bam/qc_simplex_bam.cwl new file mode 100644 index 0000000..e7fcf3b --- /dev/null +++ b/qc_simplex_bam/qc_simplex_bam.cwl @@ -0,0 +1,197 @@ +class: Workflow +cwlVersion: v1.0 +id: qc_simplex_bam +label: qc_simplex_bam +$namespaces: + sbg: 'https://www.sevenbridges.com/' +inputs: + - id: reference + type: File + secondaryFiles: + - ^.fasta.fai + - ^.dict + 'sbg:x': -573 + 'sbg:y': 247.2935333251953 + - id: simplex_bam + type: File + label: simplex_bam + secondaryFiles: + - ^.bai + 'sbg:x': -570.2189331054688 + 'sbg:y': 376.736328125 + - id: pool_b_target_intervals + type: File + label: pool_b_target_intervals + 'sbg:x': -583.1691284179688 + 'sbg:y': -23.069652557373047 + - id: pool_b_bait_intervals + type: File + label: pool_b_bait_intervals + 'sbg:x': -579.8407592773438 + 'sbg:y': 105.95523071289062 + - id: pool_a_bait_intervals + type: File + label: pool_a_bait_intervals + 'sbg:x': -583.9046020507812 + 'sbg:y': -163.9043731689453 + - id: pool_a_target_intervals + type: File + label: pool_a_target_intervals + 'sbg:x': -581.4170532226562 + 'sbg:y': -288.2825012207031 + - id: hsmetrics_minimum_mapping_quality + type: int? + 'sbg:x': -585.7700805664062 + 'sbg:y': -414.1761779785156 + - id: hsmetrics_minimum_base_quality + type: int? + 'sbg:x': -590.94140625 + 'sbg:y': -539.5800170898438 + - id: hsmetrics_coverage_cap + type: int? + 'sbg:x': -595.156005859375 + 'sbg:y': -670.54931640625 +outputs: + - id: gatk_collect_alignment_summary_metrics_txt_pool_b + outputSource: + - bam_qc_stats_pool_b/gatk_collect_alignment_summary_metrics_txt + type: File + label: gatk_collect_alignment_summary_metrics_txt_pool_b + 'sbg:x': 429.216064453125 + 'sbg:y': 559.75537109375 + - id: gatk_collect_hs_metrics_per_base_coverage_txt_pool_b + outputSource: + - bam_qc_stats_pool_b/gatk_collect_hs_metrics_per_base_coverage_txt + type: File + label: gatk_collect_hs_metrics_per_base_coverage_txt_pool_b + 'sbg:x': 420.07769775390625 + 'sbg:y': 442.26190185546875 + - id: gatk_collect_hs_metrics_per_target_coverage_txt_pool_b + outputSource: + - bam_qc_stats_pool_b/gatk_collect_hs_metrics_per_target_coverage_txt + type: File + label: gatk_collect_hs_metrics_per_target_coverage_txt_pool_b + 'sbg:x': 427.91058349609375 + 'sbg:y': 323.46295166015625 + - id: gatk_collect_hs_metrics_txt_pool_b + outputSource: + - bam_qc_stats_pool_b/gatk_collect_hs_metrics_txt + type: File + label: gatk_collect_hs_metrics_txt_pool_b + 'sbg:x': 427.91058349609375 + 'sbg:y': 204.66400146484375 + - id: gatk_collect_insert_size_metrics_histogram_pdf_pool_b + outputSource: + - bam_qc_stats_pool_b/gatk_collect_insert_size_metrics_histogram_pdf + type: File + label: gatk_collect_insert_size_metrics_histogram_pdf_pool_b + 'sbg:x': 422.68865966796875 + 'sbg:y': 80.64311218261719 + - id: gatk_collect_insert_size_metrics_txt_pool_b + outputSource: + - bam_qc_stats_pool_b/gatk_collect_insert_size_metrics_txt + type: File + label: gatk_collect_insert_size_metrics_txt_pool_b + 'sbg:x': 430.52154541015625 + 'sbg:y': -34.2393913269043 + - id: gatk_collect_alignment_summary_metrics_txt_pool_a + outputSource: + - bam_qc_stats_pool_a/gatk_collect_alignment_summary_metrics_txt + type: File + label: gatk_collect_alignment_summary_metrics_txt_pool_a + 'sbg:x': 420.07769775390625 + 'sbg:y': -155.64930725097656 + - id: gatk_collect_hs_metrics_per_base_coverage_txt_pool_a + outputSource: + - bam_qc_stats_pool_a/gatk_collect_hs_metrics_per_base_coverage_txt + type: File + label: gatk_collect_hs_metrics_per_base_coverage_txt_pool_a + 'sbg:x': 417.46673583984375 + 'sbg:y': -274.4482727050781 + - id: gatk_collect_hs_metrics_per_target_coverage_txt_pool_a + outputSource: + - bam_qc_stats_pool_a/gatk_collect_hs_metrics_per_target_coverage_txt + type: File + label: gatk_collect_hs_metrics_per_target_coverage_txt_pool_a + 'sbg:x': 414.85577392578125 + 'sbg:y': -389.3307800292969 + - id: gatk_collect_hs_metrics_txt_pool_a + outputSource: + - bam_qc_stats_pool_a/gatk_collect_hs_metrics_txt + type: File + label: gatk_collect_hs_metrics_txt_pool_a + 'sbg:x': 409.9451599121094 + 'sbg:y': -498.08355712890625 + - id: gatk_collect_insert_size_metrics_histogram_pdf_pool_a + outputSource: + - bam_qc_stats_pool_a/gatk_collect_insert_size_metrics_histogram_pdf + type: File + label: gatk_collect_insert_size_metrics_histogram_pdf_pool_a + 'sbg:x': 410.9393005371094 + 'sbg:y': -621.7067260742188 + - id: gatk_collect_insert_size_metrics_txt_pool_a + outputSource: + - bam_qc_stats_pool_a/gatk_collect_insert_size_metrics_txt + type: File + label: gatk_collect_insert_size_metrics_txt_pool_a + 'sbg:x': 400.4954528808594 + 'sbg:y': -773.1427612304688 +steps: + - id: bam_qc_stats_pool_a + in: + - id: input + source: + - simplex_bam + - id: target_intervals + source: pool_a_target_intervals + - id: bait_intervals + source: pool_a_bait_intervals + - id: reference + source: reference + - id: hsmetrics_minimum_mapping_quality + source: hsmetrics_minimum_mapping_quality + - id: hsmetrics_minimum_base_quality + source: hsmetrics_minimum_base_quality + - id: hsmetrics_coverage_cap + source: hsmetrics_coverage_cap + out: + - id: gatk_collect_insert_size_metrics_histogram_pdf + - id: gatk_collect_insert_size_metrics_txt + - id: gatk_collect_hs_metrics_txt + - id: gatk_collect_hs_metrics_per_base_coverage_txt + - id: gatk_collect_hs_metrics_per_target_coverage_txt + - id: gatk_collect_alignment_summary_metrics_txt + run: ../bam_qc_stats/bam_qc_stats.cwl + label: bam_qc_stats_pool_a + 'sbg:x': -114.38903045654297 + 'sbg:y': -295.4621276855469 + - id: bam_qc_stats_pool_b + in: + - id: input + source: + - simplex_bam + - id: target_intervals + source: pool_b_target_intervals + - id: bait_intervals + source: pool_b_bait_intervals + - id: reference + source: reference + - id: hsmetrics_minimum_mapping_quality + source: hsmetrics_minimum_mapping_quality + - id: hsmetrics_minimum_base_quality + source: hsmetrics_minimum_base_quality + - id: hsmetrics_coverage_cap + source: hsmetrics_coverage_cap + out: + - id: gatk_collect_insert_size_metrics_histogram_pdf + - id: gatk_collect_insert_size_metrics_txt + - id: gatk_collect_hs_metrics_txt + - id: gatk_collect_hs_metrics_per_base_coverage_txt + - id: gatk_collect_hs_metrics_per_target_coverage_txt + - id: gatk_collect_alignment_summary_metrics_txt + run: ../bam_qc_stats/bam_qc_stats.cwl + label: bam_qc_stats_pool_b + 'sbg:x': -116.60113525390625 + 'sbg:y': 139.5 +requirements: + - class: SubworkflowFeatureRequirement diff --git a/qc_simplex_bam/qc_simplex_bam__packed.cwl b/qc_simplex_bam/qc_simplex_bam__packed.cwl new file mode 100644 index 0000000..dc775bd --- /dev/null +++ b/qc_simplex_bam/qc_simplex_bam__packed.cwl @@ -0,0 +1,1609 @@ +{ + "$graph": [ + { + "class": "Workflow", + "id": "#bam_qc_stats.cwl", + "label": "bam_qc_stats", + "inputs": [ + { + "id": "#bam_qc_stats.cwl/input", + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "secondaryFiles": [ + "^.bai" + ], + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 374.0625 + }, + { + "id": "#bam_qc_stats.cwl/target_intervals", + "type": "File", + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 160.3125 + }, + { + "id": "#bam_qc_stats.cwl/bait_intervals", + "type": "File", + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 480.9375 + }, + { + "id": "#bam_qc_stats.cwl/reference", + "type": "File", + "secondaryFiles": [ + "^.fasta.fai", + "^.dict" + ], + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 267.1875 + }, + { + "id": "#bam_qc_stats.cwl/temporary_directory", + "type": [ + "null", + "string" + ], + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 53.4375 + }, + { + "id": "#bam_qc_stats.cwl/hsmetrics_minimum_mapping_quality", + "type": [ + "null", + "int" + ], + "label": "hsmetrics_minimum_mapping_quality", + "https://www.sevenbridges.com/x": 1, + "https://www.sevenbridges.com/y": 613 + }, + { + "id": "#bam_qc_stats.cwl/hsmetrics_minimum_base_quality", + "type": [ + "null", + "int" + ], + "label": "hsmetrics_minimum_base_quality", + "https://www.sevenbridges.com/x": 3, + "https://www.sevenbridges.com/y": 743 + }, + { + "id": "#bam_qc_stats.cwl/hsmetrics_coverage_cap", + "type": [ + "null", + "int" + ], + "label": "hsmetrics_coverage_cap", + "https://www.sevenbridges.com/x": 2, + "https://www.sevenbridges.com/y": 872 + } + ], + "outputs": [ + { + "id": "#bam_qc_stats.cwl/gatk_collect_insert_size_metrics_histogram_pdf", + "outputSource": [ + "#bam_qc_stats.cwl/gatk_collect_insert_size_metrics_4_1_8_0/gatk_collect_insert_size_metrics_histogram_pdf" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "https://www.sevenbridges.com/x": 700.636962890625, + "https://www.sevenbridges.com/y": 106.875 + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_insert_size_metrics_txt", + "outputSource": [ + "#bam_qc_stats.cwl/gatk_collect_insert_size_metrics_4_1_8_0/gatk_collect_insert_size_metrics_txt" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "https://www.sevenbridges.com/x": 700.636962890625, + "https://www.sevenbridges.com/y": 0 + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_hs_metrics_txt", + "outputSource": [ + "#bam_qc_stats.cwl/gatk_collect_hs_metrics_4_1_8_0/gatk_collect_hs_metrics_txt" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "https://www.sevenbridges.com/x": 700.636962890625, + "https://www.sevenbridges.com/y": 213.75 + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_hs_metrics_per_base_coverage_txt", + "outputSource": [ + "#bam_qc_stats.cwl/gatk_collect_hs_metrics_4_1_8_0/gatk_collect_hs_metrics_per_base_coverage_txt" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "https://www.sevenbridges.com/x": 700.636962890625, + "https://www.sevenbridges.com/y": 427.5 + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_hs_metrics_per_target_coverage_txt", + "outputSource": [ + "#bam_qc_stats.cwl/gatk_collect_hs_metrics_4_1_8_0/gatk_collect_hs_metrics_per_target_coverage_txt" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "https://www.sevenbridges.com/x": 700.636962890625, + "https://www.sevenbridges.com/y": 320.625 + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_alignment_summary_metrics_txt", + "outputSource": [ + "#bam_qc_stats.cwl/gatk_collect_alignment_summary_metrics_4_1_3_0/gatk_collect_alignment_summary_metrics_txt" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "https://www.sevenbridges.com/x": 700.636962890625, + "https://www.sevenbridges.com/y": 534.375 + } + ], + "steps": [ + { + "id": "#bam_qc_stats.cwl/gatk_collect_alignment_summary_metrics_4_1_3_0", + "in": [ + { + "id": "#bam_qc_stats.cwl/gatk_collect_alignment_summary_metrics_4_1_3_0/input", + "source": "#bam_qc_stats.cwl/input" + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_alignment_summary_metrics_4_1_3_0/reference", + "source": "#bam_qc_stats.cwl/reference" + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_alignment_summary_metrics_4_1_3_0/temporary_directory", + "source": "#bam_qc_stats.cwl/temporary_directory" + } + ], + "out": [ + { + "id": "#bam_qc_stats.cwl/gatk_collect_alignment_summary_metrics_4_1_3_0/gatk_collect_alignment_summary_metrics_txt" + } + ], + "run": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl", + "label": "GATK-CollectAlignmentSummaryMetrics", + "https://www.sevenbridges.com/x": 334.2886657714844, + "https://www.sevenbridges.com/y": 560.505126953125 + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_hs_metrics_4_1_8_0", + "in": [ + { + "id": "#bam_qc_stats.cwl/gatk_collect_hs_metrics_4_1_8_0/input", + "source": "#bam_qc_stats.cwl/input" + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_hs_metrics_4_1_8_0/bait_intervals", + "source": "#bam_qc_stats.cwl/bait_intervals" + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_hs_metrics_4_1_8_0/target_intervals", + "source": "#bam_qc_stats.cwl/target_intervals" + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_hs_metrics_4_1_8_0/coverage_cap", + "source": "#bam_qc_stats.cwl/hsmetrics_coverage_cap" + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_hs_metrics_4_1_8_0/minimum_base_quality", + "source": "#bam_qc_stats.cwl/hsmetrics_minimum_base_quality" + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_hs_metrics_4_1_8_0/minimum_mapping_quality", + "source": "#bam_qc_stats.cwl/hsmetrics_minimum_mapping_quality" + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_hs_metrics_4_1_8_0/reference", + "source": "#bam_qc_stats.cwl/reference" + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_hs_metrics_4_1_8_0/temporary_directory", + "source": "#bam_qc_stats.cwl/temporary_directory" + } + ], + "out": [ + { + "id": "#bam_qc_stats.cwl/gatk_collect_hs_metrics_4_1_8_0/gatk_collect_hs_metrics_txt" + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_hs_metrics_4_1_8_0/gatk_collect_hs_metrics_per_base_coverage_txt" + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_hs_metrics_4_1_8_0/gatk_collect_hs_metrics_per_target_coverage_txt" + } + ], + "run": "#gatk_collect_hs_metrics_4.1.8.0.cwl", + "label": "GATK-CollectHsMetrics", + "https://www.sevenbridges.com/x": 327.8453674316406, + "https://www.sevenbridges.com/y": 372.8453674316406 + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_insert_size_metrics_4_1_8_0", + "in": [ + { + "id": "#bam_qc_stats.cwl/gatk_collect_insert_size_metrics_4_1_8_0/input", + "source": "#bam_qc_stats.cwl/input" + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_insert_size_metrics_4_1_8_0/histogram_file", + "default": "histogram.pdf" + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_insert_size_metrics_4_1_8_0/temporary_directory", + "source": "#bam_qc_stats.cwl/temporary_directory" + } + ], + "out": [ + { + "id": "#bam_qc_stats.cwl/gatk_collect_insert_size_metrics_4_1_8_0/gatk_collect_insert_size_metrics_txt" + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_insert_size_metrics_4_1_8_0/gatk_collect_insert_size_metrics_histogram_pdf" + } + ], + "run": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl", + "label": "GATK-CollectInsertSizeMetrics", + "https://www.sevenbridges.com/x": 335.57733154296875, + "https://www.sevenbridges.com/y": 194.7628936767578 + } + ], + "requirements": [], + "https://schema.org/author": [ + { + "class": "https://schema.org/Person", + "https://schema.org/email": "mailto:murphyc4@mskcc.org", + "https://schema.org/identifier": "", + "https://schema.org/name": "Charles Murphy" + } + ], + "https://schema.org/citation": "", + "https://schema.org/codeRepository": "https://github.com/msk-access/uncollapsed_bam_generation", + "https://schema.org/contributor": [ + { + "class": "https://schema.org/Person", + "https://schema.org/email": "mailto:shahr2@mskcc.org", + "https://schema.org/identifier": "https://orcid.org/0000-0001-9042-6213", + "https://schema.org/name": "Ronak Shah" + } + ], + "https://schema.org/dateCreated": "2020-09-23", + "https://schema.org/license": "https://spdx.org/licenses/Apache-2.0", + "$namespaces": { + "sbg": "https://www.sevenbridges.com/" + } + }, + { + "class": "CommandLineTool", + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl", + "baseCommand": [ + "gatk", + "CollectAlignmentSummaryMetrics" + ], + "inputs": [ + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/memory_per_job", + "type": [ + "null", + "int" + ], + "doc": "Memory per job in megabytes" + }, + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/memory_overhead", + "type": [ + "null", + "int" + ], + "doc": "Memory overhead per job in megabytes" + }, + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/number_of_threads", + "type": [ + "null", + "int" + ] + }, + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/input", + "type": "File", + "inputBinding": { + "position": 0, + "prefix": "-I" + }, + "doc": "Input file (bam or sam). Required." + }, + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/output_file_name", + "type": [ + "null", + "string" + ], + "doc": "File to write the output to. Required." + }, + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/reference", + "type": [ + "null", + "File" + ], + "inputBinding": { + "position": 0, + "prefix": "-R" + }, + "doc": "Reference sequence file. Note that while this argument is not required, without it only a small subset of the metrics will be calculated. Note also that if a reference sequence is provided, it must be accompanied by a sequence dictionary. Default value: null.", + "secondaryFiles": [ + "^.fasta.fai", + "^.dict" + ] + }, + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/adaptor_sequence", + "type": [ + "null", + "string" + ], + "inputBinding": { + "position": 0, + "prefix": "--ADAPTER_SEQUENCE" + }, + "doc": "List of adapter sequences to use when processing the alignment metrics. This argument may be specified 0 or more times. Default value: [AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT, AGATCGGAAGAGCTCGTATGCCGTCTTCTGCTTG, AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT, AGATCGGAAGAGCGGTTCAGCAGGAATGCCGAGACCGATCTCGTATGCCGTCTTCTGCTTG, AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT, AGATCGGAAGAGCACACGTCTGAACTCCAGTCACNNNNNNNNATCTCGTATGCCGTCTTCTGCTTG]." + }, + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/metrics_acciumulation_level", + "type": [ + "null", + "string" + ], + "inputBinding": { + "position": 0, + "prefix": "--METRIC_ACCUMULATION_LEVEL" + }, + "doc": "The level(s) at which to accumulate metrics. Default value: [ALL_READS]. This option can be set to 'null' to clear the default value. Possible values: {ALL_READS, SAMPLE, LIBRARY, READ_GROUP} This option may be specified 0 or more times. This option can be set to 'null' to clear the default list." + }, + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/expected_pair_orientations", + "type": [ + "null", + "string" + ], + "inputBinding": { + "position": 0, + "prefix": "--EXPECTED_PAIR_ORIENTATIONS" + }, + "doc": "Paired-end reads that do not have this expected orientation will be considered chimeric. This argument may be specified 0 or more times. Default value: [FR]. Possible values: {FR, RF, TANDEM}" + }, + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/is_bisulfite_sequenced", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--IS_BISULFITE_SEQUENCED" + }, + "doc": "Whether the SAM or BAM file consists of bisulfite sequenced reads. Default value: false. Possible values: {true, false}" + }, + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/max_insert_size", + "type": [ + "null", + "int" + ], + "inputBinding": { + "position": 0, + "prefix": "--MAX_INSERT_SIZE" + }, + "doc": "Paired-end reads above this insert size will be considered chimeric along with inter-chromosomal pairs. Default value: 100000." + }, + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/validation_stringency", + "type": [ + "null", + "string" + ], + "inputBinding": { + "position": 0, + "prefix": "--VALIDATION_STRINGENCY" + }, + "doc": "Validation stringency for all SAM files read by this program. Setting stringency to SILENT can improve performance when processing a BAM file in which variable-length data (read, qualities, tags) do not otherwise need to be decoded. Default value: STRICT. This option can be set to 'null' to clear the default value. Possible values: {STRICT,LENIENT, SILENT}" + }, + { + "default": true, + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/assume_sorted", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--ASSUME_SORTED" + }, + "doc": "If true (default), then the sort order in the header file will be ignored. Default value: true. This option can be set to 'null' to clear the default value. Possible values: {true, false}" + }, + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/stop_after", + "type": [ + "null", + "int" + ], + "inputBinding": { + "position": 0, + "prefix": "--STOP_AFTER" + }, + "doc": "Stop after processing N reads, mainly for debugging. Default value: 0. This option can be set to 'null' to clear the default value." + }, + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/create_index", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--CREATE_INDEX" + }, + "doc": "Whether to create a BAM index when writing a coordinate-sorted BAM file. Default value: false. Possible values: {true, false}" + }, + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/create_md5_file", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--CREATE_MD5_FILE" + }, + "doc": "Whether to create an MD5 digest for any BAM or FASTQ files created. Default value: false. Possible values: {true, false}" + }, + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/use_jdk_deflater", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--USE_JDK_DEFLATER" + }, + "doc": "Use the JDK Deflater instead of the Intel Deflater for writing compressed output" + }, + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/use_jdk_inflater", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--USE_JDK_INFLATER" + }, + "doc": "Use the JDK Inflater instead of the Intel Inflater for reading compressed input" + }, + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/temporary_directory", + "type": [ + "null", + "string" + ], + "doc": "Default value: null. This option may be specified 0 or more times." + } + ], + "outputs": [ + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/gatk_collect_alignment_summary_metrics_txt", + "type": "File", + "outputBinding": { + "glob": "${\n if (inputs.output_file_name){\n return inputs.output_file_name\n } else {\n return inputs.input.basename.replace(/.bam/, '_alignment_summary_metrics.txt')\n }\n}" + } + } + ], + "label": "GATK-CollectAlignmentSummaryMetrics", + "arguments": [ + { + "position": 0, + "prefix": "--java-options", + "valueFrom": "${\n if(inputs.memory_per_job && inputs.memory_overhead) {\n if(inputs.memory_per_job % 1000 == 0) {\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\"\n }\n }\n else if (inputs.memory_per_job && !inputs.memory_overhead){\n if(inputs.memory_per_job % 1000 == 0) {\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\"\n }\n }\n else if(!inputs.memory_per_job && inputs.memory_overhead){\n return \"-Xmx15G\"\n }\n else {\n return \"-Xmx15G\"\n }\n}" + }, + { + "position": 0, + "prefix": "--TMP_DIR", + "valueFrom": "${\n if(inputs.temporary_directory)\n return inputs.temporary_directory;\n return runtime.tmpdir\n}" + }, + { + "position": 0, + "prefix": "-O", + "valueFrom": "${\n if(inputs.output_file_name){\n return inputs.output_file_name\n } else {\n return inputs.input.basename.replace(/.bam/, '_alignment_summary_metrics.txt')\n }\n}" + } + ], + "requirements": [ + { + "class": "ResourceRequirement", + "ramMin": 32000, + "coresMin": 1 + }, + { + "class": "DockerRequirement", + "dockerPull": "ghcr.io/msk-access/gatk:4.1.8.0" + }, + { + "class": "InlineJavascriptRequirement" + } + ], + "http://purl.org/dc/terms/contributor": [ + { + "class": "http://xmlns.com/foaf/0.1/Organization", + "http://xmlns.com/foaf/0.1/member": [ + { + "class": "http://xmlns.com/foaf/0.1/Person", + "http://xmlns.com/foaf/0.1/mbox": "mailto:murphyc4@mskcc.org", + "http://xmlns.com/foaf/0.1/name": "Charles Murphy" + } + ], + "http://xmlns.com/foaf/0.1/name": "Memorial Sloan Kettering Cancer Center" + } + ], + "http://purl.org/dc/terms/creator": [ + { + "class": "http://xmlns.com/foaf/0.1/Organization", + "http://xmlns.com/foaf/0.1/member": [ + { + "class": "http://xmlns.com/foaf/0.1/Person", + "http://xmlns.com/foaf/0.1/mbox": "mailto:murphyc4@mskcc.org", + "http://xmlns.com/foaf/0.1/name": "Charles Murphy" + } + ], + "http://xmlns.com/foaf/0.1/name": "Memorial Sloan Kettering Cancer Center" + } + ], + "http://usefulinc.com/ns/doap#release": [ + { + "class": "http://usefulinc.com/ns/doap#Version", + "http://usefulinc.com/ns/doap#name": "gatk4", + "http://usefulinc.com/ns/doap#revision": "4.1.8.0" + } + ] + }, + { + "class": "CommandLineTool", + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl", + "baseCommand": [ + "gatk", + "CollectHsMetrics" + ], + "inputs": [ + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/input", + "type": "File", + "inputBinding": { + "position": 0, + "prefix": "-I" + }, + "doc": "An aligned SAM or BAM file. Required." + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/bait_intervals", + "type": "File", + "inputBinding": { + "position": 0, + "prefix": "--BAIT_INTERVALS" + }, + "doc": "An interval list file that contains the locations of the baits used. This argument must be specified at least once. Required." + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/target_intervals", + "type": "File", + "inputBinding": { + "position": 0, + "prefix": "--TARGET_INTERVALS" + }, + "doc": "An interval list file that contains the locations of the targets. This argument must be specified at least once. Required." + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/output_file_name", + "type": [ + "null", + "string" + ], + "doc": "The output file to write the metrics to. Required." + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/per_base_coverage", + "type": [ + "null", + "string" + ], + "doc": "An optional file to output per base coverage information to. The per-base file contains one line per target base and can grow very large. It is not recommended for use with large target sets. Default value: null." + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/per_target_coverage", + "type": [ + "null", + "string" + ], + "doc": "An optional file to output per target coverage information to. Default value: null." + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/theoretical_sensitivity_output", + "type": [ + "null", + "string" + ], + "inputBinding": { + "position": 0, + "prefix": "--THEORETICAL_SENSITIVITY_OUTPUT" + }, + "doc": "Output for Theoretical Sensitivity metrics where the allele fractions are provided by the ALLELE_FRACTION argument. Default value: null." + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/allele_fraction", + "type": [ + "null", + "float" + ], + "inputBinding": { + "position": 0, + "prefix": "--ALLELE_FRACTION" + }, + "doc": "Allele fraction for which to calculate theoretical sensitivity. This argument may be specified 0 or more times. Default value: [0.001, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2, 0.3, 0.5]." + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/bait_set_name", + "type": [ + "null", + "string" + ], + "inputBinding": { + "position": 0, + "prefix": "--BAIT_SET_NAME" + }, + "doc": "Bait set name. If not provided it is inferred from the filename of the bait intervals. Default value: null." + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/clip_overlapping_reads", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--CLIP_OVERLAPPING_READS" + }, + "doc": "True if we are to clip overlapping reads, false otherwise. Default value: true. Possible values: {true, false}" + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/coverage_cap", + "type": [ + "null", + "int" + ], + "inputBinding": { + "position": 0, + "prefix": "--COVERAGE_CAP" + }, + "doc": "Parameter to set a max coverage limit for Theoretical Sensitivity calculations. Default is 200. Default value: 200." + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/include_indels", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--INCLUDE_INDELS" + }, + "doc": "If true count inserted bases as on target and deleted bases as covered by a read. Default value: false. Possible values: {true, false}" + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/minimum_base_quality", + "type": [ + "null", + "int" + ], + "inputBinding": { + "position": 0, + "prefix": "--MINIMUM_BASE_QUALITY" + }, + "doc": "Minimum base quality for a base to contribute coverage. Default value: 20." + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/minimum_mapping_quality", + "type": [ + "null", + "int" + ], + "inputBinding": { + "position": 0, + "prefix": "--MINIMUM_MAPPING_QUALITY" + }, + "doc": "Minimum mapping quality for a read to contribute coverage. Default value: 20." + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/near_distance", + "type": [ + "null", + "int" + ], + "inputBinding": { + "position": 0, + "prefix": "--NEAR_DISTANCE" + }, + "doc": "The maximum distance between a read and the nearest probe/bait/amplicon for the read to be considered 'near probe' and included in percent selected. Default value: 250." + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/sample_size", + "type": [ + "null", + "int" + ], + "inputBinding": { + "position": 0, + "prefix": "--SAMPLE_SIZE" + }, + "doc": "Sample Size used for Theoretical Het Sensitivity sampling. Default is 10000. Default value: 10000." + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/reference", + "type": [ + "null", + "File" + ], + "inputBinding": { + "position": 0, + "prefix": "-R" + }, + "doc": "Reference sequence file. Note that while this argument is not required, without it only a small subset of the metrics will be calculated. Note also that if a reference sequence is provided, it must be accompanied by a sequence dictionary. Default value: null.", + "secondaryFiles": [ + "^.fasta.fai", + "^.dict" + ] + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/metrics_acciumulation_level", + "type": [ + "null", + "string" + ], + "inputBinding": { + "position": 0, + "prefix": "--METRIC_ACCUMULATION_LEVEL" + }, + "doc": "The level(s) at which to accumulate metrics. Default value: [ALL_READS]. This option can be set to 'null' to clear the default value. Possible values: {ALL_READS, SAMPLE, LIBRARY, READ_GROUP} This option may be specified 0 or more times. This option can be set to 'null' to clear the default list." + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/validation_stringency", + "type": [ + "null", + "string" + ], + "inputBinding": { + "position": 0, + "prefix": "--VALIDATION_STRINGENCY" + }, + "doc": "Validation stringency for all SAM files read by this program. Setting stringency to SILENT can improve performance when processing a BAM file in which variable-length data (read, qualities, tags) do not otherwise need to be decoded. Default value: STRICT. This option can be set to 'null' to clear the default value. Possible values: {STRICT,LENIENT, SILENT}" + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/create_index", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--CREATE_INDEX" + }, + "doc": "Whether to create a BAM index when writing a coordinate-sorted BAM file. Default value: false. Possible values: {true, false}" + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/create_md5_file", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--CREATE_MD5_FILE" + }, + "doc": "Whether to create an MD5 digest for any BAM or FASTQ files created. Default value: false. Possible values: {true, false}" + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/memory_per_job", + "type": [ + "null", + "int" + ], + "doc": "Memory per job in megabytes" + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/memory_overhead", + "type": [ + "null", + "int" + ], + "doc": "Memory overhead per job in megabytes" + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/number_of_threads", + "type": [ + "null", + "int" + ] + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/temporary_directory", + "type": [ + "null", + "string" + ], + "doc": "Default value: null. This option may be specified 0 or more times." + } + ], + "outputs": [ + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/gatk_collect_hs_metrics_txt", + "type": "File", + "outputBinding": { + "glob": "${\n if(inputs.output_file_name){\n return inputs.output_file_name\n } else {\n return inputs.input.basename.replace(/.bam/, '_hs_metrics.txt')\n }\n}" + } + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/gatk_collect_hs_metrics_per_base_coverage_txt", + "type": "File", + "outputBinding": { + "glob": "${\n if(inputs.per_base_coverage){\n return inputs.per_base_coverage\n } else {\n return inputs.input.basename.replace(/.bam/, '_per_base_coverage.txt')\n }\n}" + } + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/gatk_collect_hs_metrics_per_target_coverage_txt", + "type": "File", + "outputBinding": { + "glob": "${\n if(inputs.per_target_coverage){\n return inputs.per_target_coverage\n } else {\n return inputs.input.basename.replace(/.bam/, '_per_target_coverage.txt')\n }\n}" + } + } + ], + "label": "GATK-CollectHsMetrics", + "arguments": [ + { + "position": 0, + "prefix": "--java-options", + "valueFrom": "${\n if(inputs.memory_per_job && inputs.memory_overhead) {\n if(inputs.memory_per_job % 1000 == 0) {\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\"\n }\n }\n else if (inputs.memory_per_job && !inputs.memory_overhead){\n if(inputs.memory_per_job % 1000 == 0) {\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\"\n }\n }\n else if(!inputs.memory_per_job && inputs.memory_overhead){\n return \"-Xmx15G\"\n }\n else {\n return \"-Xmx15G\"\n }\n}" + }, + { + "position": 0, + "prefix": "--TMP_DIR", + "valueFrom": "${\n if(inputs.temporary_directory)\n return inputs.temporary_directory;\n return runtime.tmpdir\n}" + }, + { + "position": 0, + "prefix": "-O", + "valueFrom": "${\n if(inputs.output_file_name){\n return inputs.output_file_name\n } else {\n return inputs.input.basename.replace(/.bam/, '_hs_metrics.txt')\n }\n}" + }, + { + "position": 0, + "prefix": "--PER_TARGET_COVERAGE", + "valueFrom": "${\n if(inputs.per_target_coverage){\n return inputs.per_target_coverage\n } else {\n return inputs.input.basename.replace(/.bam/, '_per_target_coverage.txt')\n }\n}" + }, + { + "position": 0, + "prefix": "--PER_BASE_COVERAGE", + "valueFrom": "${\n if(inputs.per_base_coverage){\n return inputs.per_base_coverage\n } else {\n return inputs.input.basename.replace(/.bam/, '_per_base_coverage.txt')\n }\n}" + } + ], + "requirements": [ + { + "class": "ResourceRequirement", + "ramMin": 32000, + "coresMin": 1 + }, + { + "class": "DockerRequirement", + "dockerPull": "ghcr.io/msk-access/gatk:4.1.8.0" + }, + { + "class": "InlineJavascriptRequirement" + } + ], + "http://purl.org/dc/terms/contributor": [ + { + "class": "http://xmlns.com/foaf/0.1/Organization", + "http://xmlns.com/foaf/0.1/member": [ + { + "class": "http://xmlns.com/foaf/0.1/Person", + "http://xmlns.com/foaf/0.1/mbox": "mailto:murphyc4@mskcc.org", + "http://xmlns.com/foaf/0.1/name": "Charles Murphy" + } + ], + "http://xmlns.com/foaf/0.1/name": "Memorial Sloan Kettering Cancer Center" + } + ], + "http://purl.org/dc/terms/creator": [ + { + "class": "http://xmlns.com/foaf/0.1/Organization", + "http://xmlns.com/foaf/0.1/member": [ + { + "class": "http://xmlns.com/foaf/0.1/Person", + "http://xmlns.com/foaf/0.1/mbox": "mailto:murphyc4@mskcc.org", + "http://xmlns.com/foaf/0.1/name": "Charles Murphy" + } + ], + "http://xmlns.com/foaf/0.1/name": "Memorial Sloan Kettering Cancer Center" + } + ], + "http://usefulinc.com/ns/doap#release": [ + { + "class": "http://usefulinc.com/ns/doap#Version", + "http://usefulinc.com/ns/doap#name": "gatk4", + "http://usefulinc.com/ns/doap#revision": "4.1.8.0" + } + ] + }, + { + "class": "CommandLineTool", + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl", + "baseCommand": [ + "gatk", + "CollectInsertSizeMetrics" + ], + "inputs": [ + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/memory_per_job", + "type": [ + "null", + "int" + ], + "doc": "Memory per job in megabytes" + }, + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/memory_overhead", + "type": [ + "null", + "int" + ], + "doc": "Memory overhead per job in megabytes" + }, + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/number_of_threads", + "type": [ + "null", + "int" + ] + }, + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/input", + "type": "File", + "inputBinding": { + "position": 0, + "prefix": "-I" + }, + "doc": "Input file (bam or sam). Required." + }, + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/output_file_name", + "type": [ + "null", + "string" + ], + "doc": "File to write the output to. Required." + }, + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/histogram_file", + "type": [ + "null", + "string" + ], + "doc": "File to write insert size Histogram chart to. Required." + }, + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/deviations", + "type": [ + "null", + "float" + ], + "inputBinding": { + "position": 0, + "prefix": "--DEVIATIONS" + }, + "doc": "Generate mean, sd and plots by trimming the data down to MEDIAN + DEVIATIONS*MEDIAN_ABSOLUTE_DEVIATION. This is done because insert size data typically includes enough anomalous values from chimeras and other artifacts to make the mean and sd grossly misleading regarding the real distribution. Default value: 10.0. This option can be set to 'null' to clear the default value." + }, + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/histogram_width", + "type": [ + "null", + "int" + ], + "inputBinding": { + "position": 0, + "prefix": "--HISTOGRAM_WIDTH" + }, + "doc": "Explicitly sets the Histogram width, overriding automatic truncation of Histogram tail. Also, when calculating mean and standard deviation, only bins <= Histogram_WIDTH will be included. Default value: null." + }, + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/minimum_pct", + "type": [ + "null", + "float" + ], + "inputBinding": { + "position": 0, + "prefix": "--MINIMUM_PCT" + }, + "doc": "When generating the Histogram, discard any data categories (out of FR, TANDEM, RF) that have fewer than this percentage of overall reads. (Range: 0 to 1). Default value: 0.05. This option can be set to 'null' to clear the default value." + }, + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/metrics_acciumulation_level", + "type": [ + "null", + "string" + ], + "inputBinding": { + "position": 0, + "prefix": "--METRIC_ACCUMULATION_LEVEL" + }, + "doc": "The level(s) at which to accumulate metrics. Default value: [ALL_READS]. This option can be set to 'null' to clear the default value. Possible values: {ALL_READS, SAMPLE, LIBRARY, READ_GROUP} This option may be specified 0 or more times. This option can be set to 'null' to clear the default list." + }, + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/include_duplicates", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--INCLUDE_DUPLICATES" + }, + "doc": "If true, also include reads marked as duplicates in the insert size histogram. Default value: false. This option can be set to 'null' to clear the default value. Possible values: {true, false}" + }, + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/validation_stringency", + "type": [ + "null", + "string" + ], + "inputBinding": { + "position": 0, + "prefix": "--VALIDATION_STRINGENCY" + }, + "doc": "Validation stringency for all SAM files read by this program. Setting stringency to SILENT can improve performance when processing a BAM file in which variable-length data (read, qualities, tags) do not otherwise need to be decoded. Default value: STRICT. This option can be set to 'null' to clear the default value. Possible values: {STRICT,LENIENT, SILENT}" + }, + { + "default": true, + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/assume_sorted", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--ASSUME_SORTED" + }, + "doc": "If true (default), then the sort order in the header file will be ignored. Default value: true. This option can be set to 'null' to clear the default value. Possible values: {true, false}" + }, + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/stop_after", + "type": [ + "null", + "int" + ], + "inputBinding": { + "position": 0, + "prefix": "--STOP_AFTER" + }, + "doc": "Stop after processing N reads, mainly for debugging. Default value: 0. This option can be set to 'null' to clear the default value." + }, + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/create_index", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--CREATE_INDEX" + }, + "doc": "Whether to create a BAM index when writing a coordinate-sorted BAM file. Default value: false. Possible values: {true, false}" + }, + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/create_md5_file", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--CREATE_MD5_FILE" + }, + "doc": "Whether to create an MD5 digest for any BAM or FASTQ files created. Default value: false. Possible values: {true, false}" + }, + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/use_jdk_deflater", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--USE_JDK_DEFLATER" + }, + "doc": "Use the JDK Deflater instead of the Intel Deflater for writing compressed output" + }, + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/use_jdk_inflater", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--USE_JDK_INFLATER" + }, + "doc": "Use the JDK Inflater instead of the Intel Inflater for reading compressed input" + }, + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/temporary_directory", + "type": [ + "null", + "string" + ], + "doc": "Default value: null. This option may be specified 0 or more times." + } + ], + "outputs": [ + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/gatk_collect_insert_size_metrics_txt", + "type": "File", + "outputBinding": { + "glob": "${\n if(inputs.output_file_name){\n return inputs.output_file_name\n } else {\n return inputs.input.basename.replace(/.bam/, '_insert_size_metrics.txt')\n }\n}" + } + }, + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/gatk_collect_insert_size_metrics_histogram_pdf", + "type": "File", + "outputBinding": { + "glob": "${\n if(inputs.histogram_file){\n return inputs.histogram_file\n } else {\n return inputs.input.basename.replace(/.bam/, '_histogram.pdf')\n }\n}" + } + } + ], + "label": "GATK-CollectInsertSizeMetrics", + "arguments": [ + { + "position": 0, + "prefix": "--java-options", + "valueFrom": "${\n if(inputs.memory_per_job && inputs.memory_overhead) {\n if(inputs.memory_per_job % 1000 == 0) {\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\"\n }\n }\n else if (inputs.memory_per_job && !inputs.memory_overhead){\n if(inputs.memory_per_job % 1000 == 0) {\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\"\n }\n }\n else if(!inputs.memory_per_job && inputs.memory_overhead){\n return \"-Xmx15G\"\n }\n else {\n return \"-Xmx15G\"\n }\n}" + }, + { + "position": 0, + "prefix": "--TMP_DIR", + "valueFrom": "${\n if(inputs.temporary_directory)\n return inputs.temporary_directory;\n return runtime.tmpdir\n}" + }, + { + "position": 2, + "prefix": "-O", + "valueFrom": "${\n if(inputs.output_file_name){\n return inputs.output_file_name\n } else {\n return inputs.input.basename.replace(/.bam/, '_insert_size_metrics.txt')\n }\n}" + }, + { + "position": 2, + "prefix": "-H", + "valueFrom": "${\n if(inputs.histogram_file){\n return inputs.histogram_file\n } else {\n return inputs.input.basename.replace(/.bam/, '_histogram.pdf')\n }\n}" + } + ], + "requirements": [ + { + "class": "ResourceRequirement", + "ramMin": 32000, + "coresMin": 1 + }, + { + "class": "DockerRequirement", + "dockerPull": "ghcr.io/msk-access/gatk:4.1.8.0" + }, + { + "class": "InlineJavascriptRequirement" + } + ], + "http://purl.org/dc/terms/contributor": [ + { + "class": "http://xmlns.com/foaf/0.1/Organization", + "http://xmlns.com/foaf/0.1/member": [ + { + "class": "http://xmlns.com/foaf/0.1/Person", + "http://xmlns.com/foaf/0.1/mbox": "mailto:murphyc4@mskcc.org", + "http://xmlns.com/foaf/0.1/name": "Charles Murphy" + } + ], + "http://xmlns.com/foaf/0.1/name": "Memorial Sloan Kettering Cancer Center" + } + ], + "http://purl.org/dc/terms/creator": [ + { + "class": "http://xmlns.com/foaf/0.1/Organization", + "http://xmlns.com/foaf/0.1/member": [ + { + "class": "http://xmlns.com/foaf/0.1/Person", + "http://xmlns.com/foaf/0.1/mbox": "mailto:murphyc4@mskcc.org", + "http://xmlns.com/foaf/0.1/name": "Charles Murphy" + } + ], + "http://xmlns.com/foaf/0.1/name": "Memorial Sloan Kettering Cancer Center" + } + ], + "http://usefulinc.com/ns/doap#release": [ + { + "class": "http://usefulinc.com/ns/doap#Version", + "http://usefulinc.com/ns/doap#name": "gatk4", + "http://usefulinc.com/ns/doap#revision": "4.1.8.0" + } + ] + }, + { + "class": "Workflow", + "id": "#main", + "label": "qc_simplex_bam", + "inputs": [ + { + "id": "#reference", + "type": "File", + "secondaryFiles": [ + "^.fasta.fai", + "^.dict" + ], + "https://www.sevenbridges.com/x": -573, + "https://www.sevenbridges.com/y": 247.2935333251953 + }, + { + "id": "#simplex_bam", + "type": "File", + "label": "simplex_bam", + "secondaryFiles": [ + "^.bai" + ], + "https://www.sevenbridges.com/x": -570.2189331054688, + "https://www.sevenbridges.com/y": 376.736328125 + }, + { + "id": "#pool_b_target_intervals", + "type": "File", + "label": "pool_b_target_intervals", + "https://www.sevenbridges.com/x": -583.1691284179688, + "https://www.sevenbridges.com/y": -23.069652557373047 + }, + { + "id": "#pool_b_bait_intervals", + "type": "File", + "label": "pool_b_bait_intervals", + "https://www.sevenbridges.com/x": -579.8407592773438, + "https://www.sevenbridges.com/y": 105.95523071289062 + }, + { + "id": "#pool_a_bait_intervals", + "type": "File", + "label": "pool_a_bait_intervals", + "https://www.sevenbridges.com/x": -583.9046020507812, + "https://www.sevenbridges.com/y": -163.9043731689453 + }, + { + "id": "#pool_a_target_intervals", + "type": "File", + "label": "pool_a_target_intervals", + "https://www.sevenbridges.com/x": -581.4170532226562, + "https://www.sevenbridges.com/y": -288.2825012207031 + }, + { + "id": "#hsmetrics_minimum_mapping_quality", + "type": [ + "null", + "int" + ], + "https://www.sevenbridges.com/x": -585.7700805664062, + "https://www.sevenbridges.com/y": -414.1761779785156 + }, + { + "id": "#hsmetrics_minimum_base_quality", + "type": [ + "null", + "int" + ], + "https://www.sevenbridges.com/x": -590.94140625, + "https://www.sevenbridges.com/y": -539.5800170898438 + }, + { + "id": "#hsmetrics_coverage_cap", + "type": [ + "null", + "int" + ], + "https://www.sevenbridges.com/x": -595.156005859375, + "https://www.sevenbridges.com/y": -670.54931640625 + } + ], + "outputs": [ + { + "id": "#gatk_collect_alignment_summary_metrics_txt_pool_b", + "outputSource": [ + "#bam_qc_stats_pool_b/gatk_collect_alignment_summary_metrics_txt" + ], + "type": "File", + "label": "gatk_collect_alignment_summary_metrics_txt_pool_b", + "https://www.sevenbridges.com/x": 429.216064453125, + "https://www.sevenbridges.com/y": 559.75537109375 + }, + { + "id": "#gatk_collect_hs_metrics_per_base_coverage_txt_pool_b", + "outputSource": [ + "#bam_qc_stats_pool_b/gatk_collect_hs_metrics_per_base_coverage_txt" + ], + "type": "File", + "label": "gatk_collect_hs_metrics_per_base_coverage_txt_pool_b", + "https://www.sevenbridges.com/x": 420.07769775390625, + "https://www.sevenbridges.com/y": 442.26190185546875 + }, + { + "id": "#gatk_collect_hs_metrics_per_target_coverage_txt_pool_b", + "outputSource": [ + "#bam_qc_stats_pool_b/gatk_collect_hs_metrics_per_target_coverage_txt" + ], + "type": "File", + "label": "gatk_collect_hs_metrics_per_target_coverage_txt_pool_b", + "https://www.sevenbridges.com/x": 427.91058349609375, + "https://www.sevenbridges.com/y": 323.46295166015625 + }, + { + "id": "#gatk_collect_hs_metrics_txt_pool_b", + "outputSource": [ + "#bam_qc_stats_pool_b/gatk_collect_hs_metrics_txt" + ], + "type": "File", + "label": "gatk_collect_hs_metrics_txt_pool_b", + "https://www.sevenbridges.com/x": 427.91058349609375, + "https://www.sevenbridges.com/y": 204.66400146484375 + }, + { + "id": "#gatk_collect_insert_size_metrics_histogram_pdf_pool_b", + "outputSource": [ + "#bam_qc_stats_pool_b/gatk_collect_insert_size_metrics_histogram_pdf" + ], + "type": "File", + "label": "gatk_collect_insert_size_metrics_histogram_pdf_pool_b", + "https://www.sevenbridges.com/x": 422.68865966796875, + "https://www.sevenbridges.com/y": 80.64311218261719 + }, + { + "id": "#gatk_collect_insert_size_metrics_txt_pool_b", + "outputSource": [ + "#bam_qc_stats_pool_b/gatk_collect_insert_size_metrics_txt" + ], + "type": "File", + "label": "gatk_collect_insert_size_metrics_txt_pool_b", + "https://www.sevenbridges.com/x": 430.52154541015625, + "https://www.sevenbridges.com/y": -34.2393913269043 + }, + { + "id": "#gatk_collect_alignment_summary_metrics_txt_pool_a", + "outputSource": [ + "#bam_qc_stats_pool_a/gatk_collect_alignment_summary_metrics_txt" + ], + "type": "File", + "label": "gatk_collect_alignment_summary_metrics_txt_pool_a", + "https://www.sevenbridges.com/x": 420.07769775390625, + "https://www.sevenbridges.com/y": -155.64930725097656 + }, + { + "id": "#gatk_collect_hs_metrics_per_base_coverage_txt_pool_a", + "outputSource": [ + "#bam_qc_stats_pool_a/gatk_collect_hs_metrics_per_base_coverage_txt" + ], + "type": "File", + "label": "gatk_collect_hs_metrics_per_base_coverage_txt_pool_a", + "https://www.sevenbridges.com/x": 417.46673583984375, + "https://www.sevenbridges.com/y": -274.4482727050781 + }, + { + "id": "#gatk_collect_hs_metrics_per_target_coverage_txt_pool_a", + "outputSource": [ + "#bam_qc_stats_pool_a/gatk_collect_hs_metrics_per_target_coverage_txt" + ], + "type": "File", + "label": "gatk_collect_hs_metrics_per_target_coverage_txt_pool_a", + "https://www.sevenbridges.com/x": 414.85577392578125, + "https://www.sevenbridges.com/y": -389.3307800292969 + }, + { + "id": "#gatk_collect_hs_metrics_txt_pool_a", + "outputSource": [ + "#bam_qc_stats_pool_a/gatk_collect_hs_metrics_txt" + ], + "type": "File", + "label": "gatk_collect_hs_metrics_txt_pool_a", + "https://www.sevenbridges.com/x": 409.9451599121094, + "https://www.sevenbridges.com/y": -498.08355712890625 + }, + { + "id": "#gatk_collect_insert_size_metrics_histogram_pdf_pool_a", + "outputSource": [ + "#bam_qc_stats_pool_a/gatk_collect_insert_size_metrics_histogram_pdf" + ], + "type": "File", + "label": "gatk_collect_insert_size_metrics_histogram_pdf_pool_a", + "https://www.sevenbridges.com/x": 410.9393005371094, + "https://www.sevenbridges.com/y": -621.7067260742188 + }, + { + "id": "#gatk_collect_insert_size_metrics_txt_pool_a", + "outputSource": [ + "#bam_qc_stats_pool_a/gatk_collect_insert_size_metrics_txt" + ], + "type": "File", + "label": "gatk_collect_insert_size_metrics_txt_pool_a", + "https://www.sevenbridges.com/x": 400.4954528808594, + "https://www.sevenbridges.com/y": -773.1427612304688 + } + ], + "steps": [ + { + "id": "#bam_qc_stats_pool_a", + "in": [ + { + "id": "#bam_qc_stats_pool_a/input", + "source": [ + "#simplex_bam" + ] + }, + { + "id": "#bam_qc_stats_pool_a/target_intervals", + "source": "#pool_a_target_intervals" + }, + { + "id": "#bam_qc_stats_pool_a/bait_intervals", + "source": "#pool_a_bait_intervals" + }, + { + "id": "#bam_qc_stats_pool_a/reference", + "source": "#reference" + }, + { + "id": "#bam_qc_stats_pool_a/hsmetrics_minimum_mapping_quality", + "source": "#hsmetrics_minimum_mapping_quality" + }, + { + "id": "#bam_qc_stats_pool_a/hsmetrics_minimum_base_quality", + "source": "#hsmetrics_minimum_base_quality" + }, + { + "id": "#bam_qc_stats_pool_a/hsmetrics_coverage_cap", + "source": "#hsmetrics_coverage_cap" + } + ], + "out": [ + { + "id": "#bam_qc_stats_pool_a/gatk_collect_insert_size_metrics_histogram_pdf" + }, + { + "id": "#bam_qc_stats_pool_a/gatk_collect_insert_size_metrics_txt" + }, + { + "id": "#bam_qc_stats_pool_a/gatk_collect_hs_metrics_txt" + }, + { + "id": "#bam_qc_stats_pool_a/gatk_collect_hs_metrics_per_base_coverage_txt" + }, + { + "id": "#bam_qc_stats_pool_a/gatk_collect_hs_metrics_per_target_coverage_txt" + }, + { + "id": "#bam_qc_stats_pool_a/gatk_collect_alignment_summary_metrics_txt" + } + ], + "run": "#bam_qc_stats.cwl", + "label": "bam_qc_stats_pool_a", + "https://www.sevenbridges.com/x": -114.38903045654297, + "https://www.sevenbridges.com/y": -295.4621276855469 + }, + { + "id": "#bam_qc_stats_pool_b", + "in": [ + { + "id": "#bam_qc_stats_pool_b/input", + "source": [ + "#simplex_bam" + ] + }, + { + "id": "#bam_qc_stats_pool_b/target_intervals", + "source": "#pool_b_target_intervals" + }, + { + "id": "#bam_qc_stats_pool_b/bait_intervals", + "source": "#pool_b_bait_intervals" + }, + { + "id": "#bam_qc_stats_pool_b/reference", + "source": "#reference" + }, + { + "id": "#bam_qc_stats_pool_b/hsmetrics_minimum_mapping_quality", + "source": "#hsmetrics_minimum_mapping_quality" + }, + { + "id": "#bam_qc_stats_pool_b/hsmetrics_minimum_base_quality", + "source": "#hsmetrics_minimum_base_quality" + }, + { + "id": "#bam_qc_stats_pool_b/hsmetrics_coverage_cap", + "source": "#hsmetrics_coverage_cap" + } + ], + "out": [ + { + "id": "#bam_qc_stats_pool_b/gatk_collect_insert_size_metrics_histogram_pdf" + }, + { + "id": "#bam_qc_stats_pool_b/gatk_collect_insert_size_metrics_txt" + }, + { + "id": "#bam_qc_stats_pool_b/gatk_collect_hs_metrics_txt" + }, + { + "id": "#bam_qc_stats_pool_b/gatk_collect_hs_metrics_per_base_coverage_txt" + }, + { + "id": "#bam_qc_stats_pool_b/gatk_collect_hs_metrics_per_target_coverage_txt" + }, + { + "id": "#bam_qc_stats_pool_b/gatk_collect_alignment_summary_metrics_txt" + } + ], + "run": "#bam_qc_stats.cwl", + "label": "bam_qc_stats_pool_b", + "https://www.sevenbridges.com/x": -116.60113525390625, + "https://www.sevenbridges.com/y": 139.5 + } + ], + "requirements": [ + { + "class": "SubworkflowFeatureRequirement" + } + ] + } + ], + "cwlVersion": "v1.0", + "$schemas": [ + "http://schema.org/version/latest/schemaorg-current-http.rdf" + ] +} \ No newline at end of file diff --git a/qc_uncollapsed_bam/README.md b/qc_uncollapsed_bam/README.md new file mode 100644 index 0000000..a109cf1 --- /dev/null +++ b/qc_uncollapsed_bam/README.md @@ -0,0 +1,43 @@ +### Introduction +The sub-workflow calculates quality control metrics for uncollapsed BAMs. The main outputs are the following: + +1. Targeted capture metrics. +2. Insert size metrics. +3. Alignment metrics. +4. Mean base quality by cycle. + +**Note:** This sub-workflow was originally designed for MSK-ACCESS data. Hence, in addition to the collapsed BAM, it expects two sets of bait/target regions (referred to as pool A and pool B for MSK-ACCESS). + +### Tools used: + +- [GATK-MeanQualityByCycle](../command_line_tools/gatk_mean_quality_by_cycle/README.md) +- [bam_qc_stats](../bam_qc_stats/README.md) + +### Usage + +```bash +usage: qc_uncollapsed_bam.cwl [-h] --reference REFERENCE + --pool_b_target_intervals + POOL_B_TARGET_INTERVALS --pool_b_bait_intervals + POOL_B_BAIT_INTERVALS --pool_a_bait_intervals + POOL_A_BAIT_INTERVALS --pool_a_target_intervals + POOL_A_TARGET_INTERVALS + [--hsmetrics_minimum_mapping_quality HSMETRICS_MINIMUM_MAPPING_QUALITY] + [--hsmetrics_minimum_base_quality HSMETRICS_MINIMUM_BASE_QUALITY] + [--hsmetrics_coverage_cap HSMETRICS_COVERAGE_CAP] + [job_order] + +positional arguments: + job_order Job input json file + +optional arguments: + -h, --help show this help message and exit + --reference REFERENCE + --pool_b_target_intervals POOL_B_TARGET_INTERVALS + --pool_b_bait_intervals POOL_B_BAIT_INTERVALS + --pool_a_bait_intervals POOL_A_BAIT_INTERVALS + --pool_a_target_intervals POOL_A_TARGET_INTERVALS + --hsmetrics_minimum_mapping_quality HSMETRICS_MINIMUM_MAPPING_QUALITY + --hsmetrics_minimum_base_quality HSMETRICS_MINIMUM_BASE_QUALITY + --hsmetrics_coverage_cap HSMETRICS_COVERAGE_CAP +``` diff --git a/qc_uncollapsed_bam/qc_uncollapsed_bam.cwl b/qc_uncollapsed_bam/qc_uncollapsed_bam.cwl new file mode 100644 index 0000000..ae8b634 --- /dev/null +++ b/qc_uncollapsed_bam/qc_uncollapsed_bam.cwl @@ -0,0 +1,294 @@ +class: Workflow +cwlVersion: v1.0 +id: qc_uncollapsed_bam +label: qc_uncollapsed_bam +$namespaces: + sbg: 'https://www.sevenbridges.com/' +inputs: + - id: reference + type: File + secondaryFiles: + - ^.fasta.fai + - ^.dict + 'sbg:x': 0 + 'sbg:y': 267.2265625 + - id: uncollapsed_bam_base_recal + type: + - File + - type: array + items: File + label: uncollapsed_bam_base_recal + secondaryFiles: + - ^.bai + 'sbg:x': 0 + 'sbg:y': 160.3359375 + - id: pool_b_target_intervals + type: File + label: pool_b_target_intervals + 'sbg:x': 0 + 'sbg:y': 374.1171875 + - id: pool_b_bait_intervals + type: File + label: pool_b_bait_intervals + 'sbg:x': 0 + 'sbg:y': 481.0078125 + - id: pool_a_bait_intervals + type: File + label: pool_a_bait_intervals + 'sbg:x': 0 + 'sbg:y': 694.7890625 + - id: pool_a_target_intervals + type: File + label: pool_a_target_intervals + 'sbg:x': 0 + 'sbg:y': 587.8984375 + - id: hsmetrics_minimum_mapping_quality + type: int? + 'sbg:x': 0 + 'sbg:y': 801.6796875 + - id: hsmetrics_minimum_base_quality + type: int? + 'sbg:x': 0 + 'sbg:y': 908.5703125 + - id: hsmetrics_coverage_cap + type: int? + 'sbg:x': 0 + 'sbg:y': 1015.4609375 +outputs: + - id: gatk_collect_alignment_summary_metrics_txt_pool_b + outputSource: + - bam_qc_stats_pool_b/gatk_collect_alignment_summary_metrics_txt + type: + - File + - type: array + items: File + label: gatk_collect_alignment_summary_metrics_txt_pool_b + 'sbg:x': 1369.4512939453125 + 'sbg:y': 1068.90625 + - id: gatk_collect_hs_metrics_per_base_coverage_txt_pool_b + outputSource: + - bam_qc_stats_pool_b/gatk_collect_hs_metrics_per_base_coverage_txt + type: + - File + - type: array + items: File + label: gatk_collect_hs_metrics_per_base_coverage_txt_pool_b + 'sbg:x': 1369.4512939453125 + 'sbg:y': 855.125 + - id: gatk_collect_hs_metrics_per_target_coverage_txt_pool_b + outputSource: + - bam_qc_stats_pool_b/gatk_collect_hs_metrics_per_target_coverage_txt + type: + - File + - type: array + items: File + label: gatk_collect_hs_metrics_per_target_coverage_txt_pool_b + 'sbg:x': 1369.4512939453125 + 'sbg:y': 641.34375 + - id: gatk_collect_hs_metrics_txt_pool_b + outputSource: + - bam_qc_stats_pool_b/gatk_collect_hs_metrics_txt + type: + - File + - type: array + items: File + label: gatk_collect_hs_metrics_txt_pool_b + 'sbg:x': 1369.4512939453125 + 'sbg:y': 427.5625 + - id: gatk_collect_insert_size_metrics_histogram_pdf_pool_b + outputSource: + - bam_qc_stats_pool_b/gatk_collect_insert_size_metrics_histogram_pdf + type: + - File + - type: array + items: File + label: gatk_collect_insert_size_metrics_histogram_pdf_pool_b + 'sbg:x': 1369.4512939453125 + 'sbg:y': 213.78125 + - id: gatk_collect_insert_size_metrics_txt_pool_b + outputSource: + - bam_qc_stats_pool_b/gatk_collect_insert_size_metrics_txt + type: + - File + - type: array + items: File + label: gatk_collect_insert_size_metrics_txt_pool_b + 'sbg:x': 1369.4512939453125 + 'sbg:y': 0 + - id: gatk_collect_alignment_summary_metrics_txt_pool_a + outputSource: + - bam_qc_stats_pool_a/gatk_collect_alignment_summary_metrics_txt + type: + - File + - type: array + items: File + label: gatk_collect_alignment_summary_metrics_txt_pool_a + 'sbg:x': 1369.4512939453125 + 'sbg:y': 1175.796875 + - id: gatk_collect_hs_metrics_per_base_coverage_txt_pool_a + outputSource: + - bam_qc_stats_pool_a/gatk_collect_hs_metrics_per_base_coverage_txt + type: + - File + - type: array + items: File + label: gatk_collect_hs_metrics_per_base_coverage_txt_pool_a + 'sbg:x': 1369.4512939453125 + 'sbg:y': 962.015625 + - id: gatk_collect_hs_metrics_per_target_coverage_txt_pool_a + outputSource: + - bam_qc_stats_pool_a/gatk_collect_hs_metrics_per_target_coverage_txt + type: + - File + - type: array + items: File + label: gatk_collect_hs_metrics_per_target_coverage_txt_pool_a + 'sbg:x': 1369.4512939453125 + 'sbg:y': 748.234375 + - id: gatk_collect_hs_metrics_txt_pool_a + outputSource: + - bam_qc_stats_pool_a/gatk_collect_hs_metrics_txt + type: + - File + - type: array + items: File + label: gatk_collect_hs_metrics_txt_pool_a + 'sbg:x': 1369.4512939453125 + 'sbg:y': 534.453125 + - id: gatk_collect_insert_size_metrics_histogram_pdf_pool_a + outputSource: + - bam_qc_stats_pool_a/gatk_collect_insert_size_metrics_histogram_pdf + type: + - File + - type: array + items: File + label: gatk_collect_insert_size_metrics_histogram_pdf_pool_a + 'sbg:x': 1369.4512939453125 + 'sbg:y': 320.671875 + - id: gatk_collect_insert_size_metrics_txt_pool_a + outputSource: + - bam_qc_stats_pool_a/gatk_collect_insert_size_metrics_txt + type: + - File + - type: array + items: File + label: gatk_collect_insert_size_metrics_txt_pool_a + 'sbg:x': 1369.4512939453125 + 'sbg:y': 106.890625 + - id: gatk_mean_quality_by_cycle_output_base_recal + outputSource: + - gatk_mean_quality_by_cycle_4_1_8_1/gatk_mean_quality_by_cycle_output + type: + - File + - type: array + items: File + label: gatk_mean_quality_by_cycle_output_base_recal + 'sbg:x': 738.7452392578125 + 'sbg:y': 343.5625 + - id: gatk_mean_quality_by_cycle_chart_output_base_recal + outputSource: + - >- + gatk_mean_quality_by_cycle_4_1_8_1/gatk_mean_quality_by_cycle_chart_output + type: + - File + - type: array + items: File + label: gatk_mean_quality_by_cycle_chart_output_base_recal + 'sbg:x': 738.7452392578125 + 'sbg:y': 450.453125 +steps: + - id: bam_qc_stats_pool_a + in: + - id: input + source: + - gatk_revert_sam_4_1_8_0/gatk_revert_sam_output + - id: target_intervals + source: pool_a_target_intervals + - id: bait_intervals + source: pool_a_bait_intervals + - id: reference + source: reference + - id: hsmetrics_minimum_mapping_quality + source: hsmetrics_minimum_mapping_quality + - id: hsmetrics_minimum_base_quality + source: hsmetrics_minimum_base_quality + - id: hsmetrics_coverage_cap + source: hsmetrics_coverage_cap + out: + - id: gatk_collect_insert_size_metrics_histogram_pdf + - id: gatk_collect_insert_size_metrics_txt + - id: gatk_collect_hs_metrics_txt + - id: gatk_collect_hs_metrics_per_base_coverage_txt + - id: gatk_collect_hs_metrics_per_target_coverage_txt + - id: gatk_collect_alignment_summary_metrics_txt + run: ../bam_qc_stats/bam_qc_stats.cwl + label: bam_qc_stats_pool_a + 'sbg:x': 738.7452392578125 + 'sbg:y': 790.234375 + - id: bam_qc_stats_pool_b + in: + - id: input + source: + - gatk_revert_sam_4_1_8_0/gatk_revert_sam_output + - id: target_intervals + source: pool_b_target_intervals + - id: bait_intervals + source: pool_b_bait_intervals + - id: reference + source: reference + - id: hsmetrics_minimum_mapping_quality + source: hsmetrics_minimum_mapping_quality + - id: hsmetrics_minimum_base_quality + source: hsmetrics_minimum_base_quality + - id: hsmetrics_coverage_cap + source: hsmetrics_coverage_cap + out: + - id: gatk_collect_insert_size_metrics_histogram_pdf + - id: gatk_collect_insert_size_metrics_txt + - id: gatk_collect_hs_metrics_txt + - id: gatk_collect_hs_metrics_per_base_coverage_txt + - id: gatk_collect_hs_metrics_per_target_coverage_txt + - id: gatk_collect_alignment_summary_metrics_txt + run: ../bam_qc_stats/bam_qc_stats.cwl + label: bam_qc_stats_pool_b + 'sbg:x': 738.7452392578125 + 'sbg:y': 599.34375 + - id: gatk_mean_quality_by_cycle_4_1_8_1 + in: + - id: input + source: uncollapsed_bam_base_recal + - id: reference + source: reference + out: + - id: gatk_mean_quality_by_cycle_output + - id: gatk_mean_quality_by_cycle_chart_output + run: >- + ../command_line_tools/gatk_mean_quality_by_cycle/4.1.8.0/gatk_mean_quality_by_cycle_4.1.8.0.cwl + label: GATK-MeanQualityByCycle_base_recal + 'sbg:x': 351.4375 + 'sbg:y': 701.7890625 + - id: gatk_revert_sam_4_1_8_0 + in: + - id: input + source: uncollapsed_bam_base_recal + - id: remove_alignment_information + default: 'false' + - id: remove_duplicate_information + default: 'true' + - id: restore_hardclips + default: 'false' + - id: restore_original_qualities + default: 'false' + - id: sort_order + default: unsorted + - id: validation_stringency + default: SILENT + out: + - id: gatk_revert_sam_output + - id: gatk_revert_sam_output_map + run: ../command_line_tools/gatk_revert_sam/4.1.8.0/gatk_revert_sam_4.1.8.0.cwl + label: GATK-RevertSam + 'sbg:x': 351.4375 + 'sbg:y': 580.8984375 +requirements: + - class: SubworkflowFeatureRequirement diff --git a/qc_uncollapsed_bam/qc_uncollapsed_bam__packed.cwl b/qc_uncollapsed_bam/qc_uncollapsed_bam__packed.cwl new file mode 100644 index 0000000..045e548 --- /dev/null +++ b/qc_uncollapsed_bam/qc_uncollapsed_bam__packed.cwl @@ -0,0 +1,2401 @@ +{ + "$graph": [ + { + "class": "Workflow", + "id": "#bam_qc_stats.cwl", + "label": "bam_qc_stats", + "inputs": [ + { + "id": "#bam_qc_stats.cwl/input", + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "secondaryFiles": [ + "^.bai" + ], + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 374.0625 + }, + { + "id": "#bam_qc_stats.cwl/target_intervals", + "type": "File", + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 160.3125 + }, + { + "id": "#bam_qc_stats.cwl/bait_intervals", + "type": "File", + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 480.9375 + }, + { + "id": "#bam_qc_stats.cwl/reference", + "type": "File", + "secondaryFiles": [ + "^.fasta.fai", + "^.dict" + ], + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 267.1875 + }, + { + "id": "#bam_qc_stats.cwl/temporary_directory", + "type": [ + "null", + "string" + ], + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 53.4375 + }, + { + "id": "#bam_qc_stats.cwl/hsmetrics_minimum_mapping_quality", + "type": [ + "null", + "int" + ], + "label": "hsmetrics_minimum_mapping_quality", + "https://www.sevenbridges.com/x": 1, + "https://www.sevenbridges.com/y": 613 + }, + { + "id": "#bam_qc_stats.cwl/hsmetrics_minimum_base_quality", + "type": [ + "null", + "int" + ], + "label": "hsmetrics_minimum_base_quality", + "https://www.sevenbridges.com/x": 3, + "https://www.sevenbridges.com/y": 743 + }, + { + "id": "#bam_qc_stats.cwl/hsmetrics_coverage_cap", + "type": [ + "null", + "int" + ], + "label": "hsmetrics_coverage_cap", + "https://www.sevenbridges.com/x": 2, + "https://www.sevenbridges.com/y": 872 + } + ], + "outputs": [ + { + "id": "#bam_qc_stats.cwl/gatk_collect_insert_size_metrics_histogram_pdf", + "outputSource": [ + "#bam_qc_stats.cwl/gatk_collect_insert_size_metrics_4_1_8_0/gatk_collect_insert_size_metrics_histogram_pdf" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "https://www.sevenbridges.com/x": 700.636962890625, + "https://www.sevenbridges.com/y": 106.875 + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_insert_size_metrics_txt", + "outputSource": [ + "#bam_qc_stats.cwl/gatk_collect_insert_size_metrics_4_1_8_0/gatk_collect_insert_size_metrics_txt" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "https://www.sevenbridges.com/x": 700.636962890625, + "https://www.sevenbridges.com/y": 0 + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_hs_metrics_txt", + "outputSource": [ + "#bam_qc_stats.cwl/gatk_collect_hs_metrics_4_1_8_0/gatk_collect_hs_metrics_txt" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "https://www.sevenbridges.com/x": 700.636962890625, + "https://www.sevenbridges.com/y": 213.75 + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_hs_metrics_per_base_coverage_txt", + "outputSource": [ + "#bam_qc_stats.cwl/gatk_collect_hs_metrics_4_1_8_0/gatk_collect_hs_metrics_per_base_coverage_txt" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "https://www.sevenbridges.com/x": 700.636962890625, + "https://www.sevenbridges.com/y": 427.5 + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_hs_metrics_per_target_coverage_txt", + "outputSource": [ + "#bam_qc_stats.cwl/gatk_collect_hs_metrics_4_1_8_0/gatk_collect_hs_metrics_per_target_coverage_txt" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "https://www.sevenbridges.com/x": 700.636962890625, + "https://www.sevenbridges.com/y": 320.625 + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_alignment_summary_metrics_txt", + "outputSource": [ + "#bam_qc_stats.cwl/gatk_collect_alignment_summary_metrics_4_1_3_0/gatk_collect_alignment_summary_metrics_txt" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "https://www.sevenbridges.com/x": 700.636962890625, + "https://www.sevenbridges.com/y": 534.375 + } + ], + "steps": [ + { + "id": "#bam_qc_stats.cwl/gatk_collect_alignment_summary_metrics_4_1_3_0", + "in": [ + { + "id": "#bam_qc_stats.cwl/gatk_collect_alignment_summary_metrics_4_1_3_0/input", + "source": "#bam_qc_stats.cwl/input" + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_alignment_summary_metrics_4_1_3_0/reference", + "source": "#bam_qc_stats.cwl/reference" + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_alignment_summary_metrics_4_1_3_0/temporary_directory", + "source": "#bam_qc_stats.cwl/temporary_directory" + } + ], + "out": [ + { + "id": "#bam_qc_stats.cwl/gatk_collect_alignment_summary_metrics_4_1_3_0/gatk_collect_alignment_summary_metrics_txt" + } + ], + "run": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl", + "label": "GATK-CollectAlignmentSummaryMetrics", + "https://www.sevenbridges.com/x": 334.2886657714844, + "https://www.sevenbridges.com/y": 560.505126953125 + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_hs_metrics_4_1_8_0", + "in": [ + { + "id": "#bam_qc_stats.cwl/gatk_collect_hs_metrics_4_1_8_0/input", + "source": "#bam_qc_stats.cwl/input" + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_hs_metrics_4_1_8_0/bait_intervals", + "source": "#bam_qc_stats.cwl/bait_intervals" + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_hs_metrics_4_1_8_0/target_intervals", + "source": "#bam_qc_stats.cwl/target_intervals" + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_hs_metrics_4_1_8_0/coverage_cap", + "source": "#bam_qc_stats.cwl/hsmetrics_coverage_cap" + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_hs_metrics_4_1_8_0/minimum_base_quality", + "source": "#bam_qc_stats.cwl/hsmetrics_minimum_base_quality" + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_hs_metrics_4_1_8_0/minimum_mapping_quality", + "source": "#bam_qc_stats.cwl/hsmetrics_minimum_mapping_quality" + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_hs_metrics_4_1_8_0/reference", + "source": "#bam_qc_stats.cwl/reference" + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_hs_metrics_4_1_8_0/temporary_directory", + "source": "#bam_qc_stats.cwl/temporary_directory" + } + ], + "out": [ + { + "id": "#bam_qc_stats.cwl/gatk_collect_hs_metrics_4_1_8_0/gatk_collect_hs_metrics_txt" + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_hs_metrics_4_1_8_0/gatk_collect_hs_metrics_per_base_coverage_txt" + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_hs_metrics_4_1_8_0/gatk_collect_hs_metrics_per_target_coverage_txt" + } + ], + "run": "#gatk_collect_hs_metrics_4.1.8.0.cwl", + "label": "GATK-CollectHsMetrics", + "https://www.sevenbridges.com/x": 327.8453674316406, + "https://www.sevenbridges.com/y": 372.8453674316406 + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_insert_size_metrics_4_1_8_0", + "in": [ + { + "id": "#bam_qc_stats.cwl/gatk_collect_insert_size_metrics_4_1_8_0/input", + "source": "#bam_qc_stats.cwl/input" + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_insert_size_metrics_4_1_8_0/histogram_file", + "default": "histogram.pdf" + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_insert_size_metrics_4_1_8_0/temporary_directory", + "source": "#bam_qc_stats.cwl/temporary_directory" + } + ], + "out": [ + { + "id": "#bam_qc_stats.cwl/gatk_collect_insert_size_metrics_4_1_8_0/gatk_collect_insert_size_metrics_txt" + }, + { + "id": "#bam_qc_stats.cwl/gatk_collect_insert_size_metrics_4_1_8_0/gatk_collect_insert_size_metrics_histogram_pdf" + } + ], + "run": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl", + "label": "GATK-CollectInsertSizeMetrics", + "https://www.sevenbridges.com/x": 335.57733154296875, + "https://www.sevenbridges.com/y": 194.7628936767578 + } + ], + "requirements": [], + "https://schema.org/author": [ + { + "class": "https://schema.org/Person", + "https://schema.org/email": "mailto:murphyc4@mskcc.org", + "https://schema.org/identifier": "", + "https://schema.org/name": "Charles Murphy" + } + ], + "https://schema.org/citation": "", + "https://schema.org/codeRepository": "https://github.com/msk-access/uncollapsed_bam_generation", + "https://schema.org/contributor": [ + { + "class": "https://schema.org/Person", + "https://schema.org/email": "mailto:shahr2@mskcc.org", + "https://schema.org/identifier": "https://orcid.org/0000-0001-9042-6213", + "https://schema.org/name": "Ronak Shah" + } + ], + "https://schema.org/dateCreated": "2020-09-23", + "https://schema.org/license": "https://spdx.org/licenses/Apache-2.0", + "$namespaces": { + "sbg": "https://www.sevenbridges.com/" + } + }, + { + "class": "CommandLineTool", + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl", + "baseCommand": [ + "gatk", + "CollectAlignmentSummaryMetrics" + ], + "inputs": [ + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/memory_per_job", + "type": [ + "null", + "int" + ], + "doc": "Memory per job in megabytes" + }, + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/memory_overhead", + "type": [ + "null", + "int" + ], + "doc": "Memory overhead per job in megabytes" + }, + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/number_of_threads", + "type": [ + "null", + "int" + ] + }, + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/input", + "type": "File", + "inputBinding": { + "position": 0, + "prefix": "-I" + }, + "doc": "Input file (bam or sam). Required." + }, + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/output_file_name", + "type": [ + "null", + "string" + ], + "doc": "File to write the output to. Required." + }, + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/reference", + "type": [ + "null", + "File" + ], + "inputBinding": { + "position": 0, + "prefix": "-R" + }, + "doc": "Reference sequence file. Note that while this argument is not required, without it only a small subset of the metrics will be calculated. Note also that if a reference sequence is provided, it must be accompanied by a sequence dictionary. Default value: null.", + "secondaryFiles": [ + "^.fasta.fai", + "^.dict" + ] + }, + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/adaptor_sequence", + "type": [ + "null", + "string" + ], + "inputBinding": { + "position": 0, + "prefix": "--ADAPTER_SEQUENCE" + }, + "doc": "List of adapter sequences to use when processing the alignment metrics. This argument may be specified 0 or more times. Default value: [AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT, AGATCGGAAGAGCTCGTATGCCGTCTTCTGCTTG, AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT, AGATCGGAAGAGCGGTTCAGCAGGAATGCCGAGACCGATCTCGTATGCCGTCTTCTGCTTG, AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT, AGATCGGAAGAGCACACGTCTGAACTCCAGTCACNNNNNNNNATCTCGTATGCCGTCTTCTGCTTG]." + }, + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/metrics_acciumulation_level", + "type": [ + "null", + "string" + ], + "inputBinding": { + "position": 0, + "prefix": "--METRIC_ACCUMULATION_LEVEL" + }, + "doc": "The level(s) at which to accumulate metrics. Default value: [ALL_READS]. This option can be set to 'null' to clear the default value. Possible values: {ALL_READS, SAMPLE, LIBRARY, READ_GROUP} This option may be specified 0 or more times. This option can be set to 'null' to clear the default list." + }, + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/expected_pair_orientations", + "type": [ + "null", + "string" + ], + "inputBinding": { + "position": 0, + "prefix": "--EXPECTED_PAIR_ORIENTATIONS" + }, + "doc": "Paired-end reads that do not have this expected orientation will be considered chimeric. This argument may be specified 0 or more times. Default value: [FR]. Possible values: {FR, RF, TANDEM}" + }, + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/is_bisulfite_sequenced", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--IS_BISULFITE_SEQUENCED" + }, + "doc": "Whether the SAM or BAM file consists of bisulfite sequenced reads. Default value: false. Possible values: {true, false}" + }, + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/max_insert_size", + "type": [ + "null", + "int" + ], + "inputBinding": { + "position": 0, + "prefix": "--MAX_INSERT_SIZE" + }, + "doc": "Paired-end reads above this insert size will be considered chimeric along with inter-chromosomal pairs. Default value: 100000." + }, + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/validation_stringency", + "type": [ + "null", + "string" + ], + "inputBinding": { + "position": 0, + "prefix": "--VALIDATION_STRINGENCY" + }, + "doc": "Validation stringency for all SAM files read by this program. Setting stringency to SILENT can improve performance when processing a BAM file in which variable-length data (read, qualities, tags) do not otherwise need to be decoded. Default value: STRICT. This option can be set to 'null' to clear the default value. Possible values: {STRICT,LENIENT, SILENT}" + }, + { + "default": true, + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/assume_sorted", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--ASSUME_SORTED" + }, + "doc": "If true (default), then the sort order in the header file will be ignored. Default value: true. This option can be set to 'null' to clear the default value. Possible values: {true, false}" + }, + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/stop_after", + "type": [ + "null", + "int" + ], + "inputBinding": { + "position": 0, + "prefix": "--STOP_AFTER" + }, + "doc": "Stop after processing N reads, mainly for debugging. Default value: 0. This option can be set to 'null' to clear the default value." + }, + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/create_index", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--CREATE_INDEX" + }, + "doc": "Whether to create a BAM index when writing a coordinate-sorted BAM file. Default value: false. Possible values: {true, false}" + }, + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/create_md5_file", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--CREATE_MD5_FILE" + }, + "doc": "Whether to create an MD5 digest for any BAM or FASTQ files created. Default value: false. Possible values: {true, false}" + }, + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/use_jdk_deflater", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--USE_JDK_DEFLATER" + }, + "doc": "Use the JDK Deflater instead of the Intel Deflater for writing compressed output" + }, + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/use_jdk_inflater", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--USE_JDK_INFLATER" + }, + "doc": "Use the JDK Inflater instead of the Intel Inflater for reading compressed input" + }, + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/temporary_directory", + "type": [ + "null", + "string" + ], + "doc": "Default value: null. This option may be specified 0 or more times." + } + ], + "outputs": [ + { + "id": "#gatk_collect_alignment_summary_metrics_4.1.8.0.cwl/gatk_collect_alignment_summary_metrics_txt", + "type": "File", + "outputBinding": { + "glob": "${\n if (inputs.output_file_name){\n return inputs.output_file_name\n } else {\n return inputs.input.basename.replace(/.bam/, '_alignment_summary_metrics.txt')\n }\n}" + } + } + ], + "label": "GATK-CollectAlignmentSummaryMetrics", + "arguments": [ + { + "position": 0, + "prefix": "--java-options", + "valueFrom": "${\n if(inputs.memory_per_job && inputs.memory_overhead) {\n if(inputs.memory_per_job % 1000 == 0) {\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\"\n }\n }\n else if (inputs.memory_per_job && !inputs.memory_overhead){\n if(inputs.memory_per_job % 1000 == 0) {\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\"\n }\n }\n else if(!inputs.memory_per_job && inputs.memory_overhead){\n return \"-Xmx15G\"\n }\n else {\n return \"-Xmx15G\"\n }\n}" + }, + { + "position": 0, + "prefix": "--TMP_DIR", + "valueFrom": "${\n if(inputs.temporary_directory)\n return inputs.temporary_directory;\n return runtime.tmpdir\n}" + }, + { + "position": 0, + "prefix": "-O", + "valueFrom": "${\n if(inputs.output_file_name){\n return inputs.output_file_name\n } else {\n return inputs.input.basename.replace(/.bam/, '_alignment_summary_metrics.txt')\n }\n}" + } + ], + "requirements": [ + { + "class": "ResourceRequirement", + "ramMin": 32000, + "coresMin": 1 + }, + { + "class": "DockerRequirement", + "dockerPull": "ghcr.io/msk-access/gatk:4.1.8.0" + }, + { + "class": "InlineJavascriptRequirement" + } + ], + "http://purl.org/dc/terms/contributor": [ + { + "class": "http://xmlns.com/foaf/0.1/Organization", + "http://xmlns.com/foaf/0.1/member": [ + { + "class": "http://xmlns.com/foaf/0.1/Person", + "http://xmlns.com/foaf/0.1/mbox": "mailto:murphyc4@mskcc.org", + "http://xmlns.com/foaf/0.1/name": "Charles Murphy" + } + ], + "http://xmlns.com/foaf/0.1/name": "Memorial Sloan Kettering Cancer Center" + } + ], + "http://purl.org/dc/terms/creator": [ + { + "class": "http://xmlns.com/foaf/0.1/Organization", + "http://xmlns.com/foaf/0.1/member": [ + { + "class": "http://xmlns.com/foaf/0.1/Person", + "http://xmlns.com/foaf/0.1/mbox": "mailto:murphyc4@mskcc.org", + "http://xmlns.com/foaf/0.1/name": "Charles Murphy" + } + ], + "http://xmlns.com/foaf/0.1/name": "Memorial Sloan Kettering Cancer Center" + } + ], + "http://usefulinc.com/ns/doap#release": [ + { + "class": "http://usefulinc.com/ns/doap#Version", + "http://usefulinc.com/ns/doap#name": "gatk4", + "http://usefulinc.com/ns/doap#revision": "4.1.8.0" + } + ] + }, + { + "class": "CommandLineTool", + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl", + "baseCommand": [ + "gatk", + "CollectHsMetrics" + ], + "inputs": [ + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/input", + "type": "File", + "inputBinding": { + "position": 0, + "prefix": "-I" + }, + "doc": "An aligned SAM or BAM file. Required." + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/bait_intervals", + "type": "File", + "inputBinding": { + "position": 0, + "prefix": "--BAIT_INTERVALS" + }, + "doc": "An interval list file that contains the locations of the baits used. This argument must be specified at least once. Required." + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/target_intervals", + "type": "File", + "inputBinding": { + "position": 0, + "prefix": "--TARGET_INTERVALS" + }, + "doc": "An interval list file that contains the locations of the targets. This argument must be specified at least once. Required." + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/output_file_name", + "type": [ + "null", + "string" + ], + "doc": "The output file to write the metrics to. Required." + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/per_base_coverage", + "type": [ + "null", + "string" + ], + "doc": "An optional file to output per base coverage information to. The per-base file contains one line per target base and can grow very large. It is not recommended for use with large target sets. Default value: null." + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/per_target_coverage", + "type": [ + "null", + "string" + ], + "doc": "An optional file to output per target coverage information to. Default value: null." + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/theoretical_sensitivity_output", + "type": [ + "null", + "string" + ], + "inputBinding": { + "position": 0, + "prefix": "--THEORETICAL_SENSITIVITY_OUTPUT" + }, + "doc": "Output for Theoretical Sensitivity metrics where the allele fractions are provided by the ALLELE_FRACTION argument. Default value: null." + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/allele_fraction", + "type": [ + "null", + "float" + ], + "inputBinding": { + "position": 0, + "prefix": "--ALLELE_FRACTION" + }, + "doc": "Allele fraction for which to calculate theoretical sensitivity. This argument may be specified 0 or more times. Default value: [0.001, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2, 0.3, 0.5]." + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/bait_set_name", + "type": [ + "null", + "string" + ], + "inputBinding": { + "position": 0, + "prefix": "--BAIT_SET_NAME" + }, + "doc": "Bait set name. If not provided it is inferred from the filename of the bait intervals. Default value: null." + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/clip_overlapping_reads", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--CLIP_OVERLAPPING_READS" + }, + "doc": "True if we are to clip overlapping reads, false otherwise. Default value: true. Possible values: {true, false}" + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/coverage_cap", + "type": [ + "null", + "int" + ], + "inputBinding": { + "position": 0, + "prefix": "--COVERAGE_CAP" + }, + "doc": "Parameter to set a max coverage limit for Theoretical Sensitivity calculations. Default is 200. Default value: 200." + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/include_indels", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--INCLUDE_INDELS" + }, + "doc": "If true count inserted bases as on target and deleted bases as covered by a read. Default value: false. Possible values: {true, false}" + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/minimum_base_quality", + "type": [ + "null", + "int" + ], + "inputBinding": { + "position": 0, + "prefix": "--MINIMUM_BASE_QUALITY" + }, + "doc": "Minimum base quality for a base to contribute coverage. Default value: 20." + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/minimum_mapping_quality", + "type": [ + "null", + "int" + ], + "inputBinding": { + "position": 0, + "prefix": "--MINIMUM_MAPPING_QUALITY" + }, + "doc": "Minimum mapping quality for a read to contribute coverage. Default value: 20." + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/near_distance", + "type": [ + "null", + "int" + ], + "inputBinding": { + "position": 0, + "prefix": "--NEAR_DISTANCE" + }, + "doc": "The maximum distance between a read and the nearest probe/bait/amplicon for the read to be considered 'near probe' and included in percent selected. Default value: 250." + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/sample_size", + "type": [ + "null", + "int" + ], + "inputBinding": { + "position": 0, + "prefix": "--SAMPLE_SIZE" + }, + "doc": "Sample Size used for Theoretical Het Sensitivity sampling. Default is 10000. Default value: 10000." + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/reference", + "type": [ + "null", + "File" + ], + "inputBinding": { + "position": 0, + "prefix": "-R" + }, + "doc": "Reference sequence file. Note that while this argument is not required, without it only a small subset of the metrics will be calculated. Note also that if a reference sequence is provided, it must be accompanied by a sequence dictionary. Default value: null.", + "secondaryFiles": [ + "^.fasta.fai", + "^.dict" + ] + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/metrics_acciumulation_level", + "type": [ + "null", + "string" + ], + "inputBinding": { + "position": 0, + "prefix": "--METRIC_ACCUMULATION_LEVEL" + }, + "doc": "The level(s) at which to accumulate metrics. Default value: [ALL_READS]. This option can be set to 'null' to clear the default value. Possible values: {ALL_READS, SAMPLE, LIBRARY, READ_GROUP} This option may be specified 0 or more times. This option can be set to 'null' to clear the default list." + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/validation_stringency", + "type": [ + "null", + "string" + ], + "inputBinding": { + "position": 0, + "prefix": "--VALIDATION_STRINGENCY" + }, + "doc": "Validation stringency for all SAM files read by this program. Setting stringency to SILENT can improve performance when processing a BAM file in which variable-length data (read, qualities, tags) do not otherwise need to be decoded. Default value: STRICT. This option can be set to 'null' to clear the default value. Possible values: {STRICT,LENIENT, SILENT}" + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/create_index", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--CREATE_INDEX" + }, + "doc": "Whether to create a BAM index when writing a coordinate-sorted BAM file. Default value: false. Possible values: {true, false}" + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/create_md5_file", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--CREATE_MD5_FILE" + }, + "doc": "Whether to create an MD5 digest for any BAM or FASTQ files created. Default value: false. Possible values: {true, false}" + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/memory_per_job", + "type": [ + "null", + "int" + ], + "doc": "Memory per job in megabytes" + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/memory_overhead", + "type": [ + "null", + "int" + ], + "doc": "Memory overhead per job in megabytes" + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/number_of_threads", + "type": [ + "null", + "int" + ] + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/temporary_directory", + "type": [ + "null", + "string" + ], + "doc": "Default value: null. This option may be specified 0 or more times." + } + ], + "outputs": [ + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/gatk_collect_hs_metrics_txt", + "type": "File", + "outputBinding": { + "glob": "${\n if(inputs.output_file_name){\n return inputs.output_file_name\n } else {\n return inputs.input.basename.replace(/.bam/, '_hs_metrics.txt')\n }\n}" + } + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/gatk_collect_hs_metrics_per_base_coverage_txt", + "type": "File", + "outputBinding": { + "glob": "${\n if(inputs.per_base_coverage){\n return inputs.per_base_coverage\n } else {\n return inputs.input.basename.replace(/.bam/, '_per_base_coverage.txt')\n }\n}" + } + }, + { + "id": "#gatk_collect_hs_metrics_4.1.8.0.cwl/gatk_collect_hs_metrics_per_target_coverage_txt", + "type": "File", + "outputBinding": { + "glob": "${\n if(inputs.per_target_coverage){\n return inputs.per_target_coverage\n } else {\n return inputs.input.basename.replace(/.bam/, '_per_target_coverage.txt')\n }\n}" + } + } + ], + "label": "GATK-CollectHsMetrics", + "arguments": [ + { + "position": 0, + "prefix": "--java-options", + "valueFrom": "${\n if(inputs.memory_per_job && inputs.memory_overhead) {\n if(inputs.memory_per_job % 1000 == 0) {\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\"\n }\n }\n else if (inputs.memory_per_job && !inputs.memory_overhead){\n if(inputs.memory_per_job % 1000 == 0) {\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\"\n }\n }\n else if(!inputs.memory_per_job && inputs.memory_overhead){\n return \"-Xmx15G\"\n }\n else {\n return \"-Xmx15G\"\n }\n}" + }, + { + "position": 0, + "prefix": "--TMP_DIR", + "valueFrom": "${\n if(inputs.temporary_directory)\n return inputs.temporary_directory;\n return runtime.tmpdir\n}" + }, + { + "position": 0, + "prefix": "-O", + "valueFrom": "${\n if(inputs.output_file_name){\n return inputs.output_file_name\n } else {\n return inputs.input.basename.replace(/.bam/, '_hs_metrics.txt')\n }\n}" + }, + { + "position": 0, + "prefix": "--PER_TARGET_COVERAGE", + "valueFrom": "${\n if(inputs.per_target_coverage){\n return inputs.per_target_coverage\n } else {\n return inputs.input.basename.replace(/.bam/, '_per_target_coverage.txt')\n }\n}" + }, + { + "position": 0, + "prefix": "--PER_BASE_COVERAGE", + "valueFrom": "${\n if(inputs.per_base_coverage){\n return inputs.per_base_coverage\n } else {\n return inputs.input.basename.replace(/.bam/, '_per_base_coverage.txt')\n }\n}" + } + ], + "requirements": [ + { + "class": "ResourceRequirement", + "ramMin": 32000, + "coresMin": 1 + }, + { + "class": "DockerRequirement", + "dockerPull": "ghcr.io/msk-access/gatk:4.1.8.0" + }, + { + "class": "InlineJavascriptRequirement" + } + ], + "http://purl.org/dc/terms/contributor": [ + { + "class": "http://xmlns.com/foaf/0.1/Organization", + "http://xmlns.com/foaf/0.1/member": [ + { + "class": "http://xmlns.com/foaf/0.1/Person", + "http://xmlns.com/foaf/0.1/mbox": "mailto:murphyc4@mskcc.org", + "http://xmlns.com/foaf/0.1/name": "Charles Murphy" + } + ], + "http://xmlns.com/foaf/0.1/name": "Memorial Sloan Kettering Cancer Center" + } + ], + "http://purl.org/dc/terms/creator": [ + { + "class": "http://xmlns.com/foaf/0.1/Organization", + "http://xmlns.com/foaf/0.1/member": [ + { + "class": "http://xmlns.com/foaf/0.1/Person", + "http://xmlns.com/foaf/0.1/mbox": "mailto:murphyc4@mskcc.org", + "http://xmlns.com/foaf/0.1/name": "Charles Murphy" + } + ], + "http://xmlns.com/foaf/0.1/name": "Memorial Sloan Kettering Cancer Center" + } + ], + "http://usefulinc.com/ns/doap#release": [ + { + "class": "http://usefulinc.com/ns/doap#Version", + "http://usefulinc.com/ns/doap#name": "gatk4", + "http://usefulinc.com/ns/doap#revision": "4.1.8.0" + } + ] + }, + { + "class": "CommandLineTool", + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl", + "baseCommand": [ + "gatk", + "CollectInsertSizeMetrics" + ], + "inputs": [ + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/memory_per_job", + "type": [ + "null", + "int" + ], + "doc": "Memory per job in megabytes" + }, + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/memory_overhead", + "type": [ + "null", + "int" + ], + "doc": "Memory overhead per job in megabytes" + }, + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/number_of_threads", + "type": [ + "null", + "int" + ] + }, + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/input", + "type": "File", + "inputBinding": { + "position": 0, + "prefix": "-I" + }, + "doc": "Input file (bam or sam). Required." + }, + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/output_file_name", + "type": [ + "null", + "string" + ], + "doc": "File to write the output to. Required." + }, + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/histogram_file", + "type": [ + "null", + "string" + ], + "doc": "File to write insert size Histogram chart to. Required." + }, + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/deviations", + "type": [ + "null", + "float" + ], + "inputBinding": { + "position": 0, + "prefix": "--DEVIATIONS" + }, + "doc": "Generate mean, sd and plots by trimming the data down to MEDIAN + DEVIATIONS*MEDIAN_ABSOLUTE_DEVIATION. This is done because insert size data typically includes enough anomalous values from chimeras and other artifacts to make the mean and sd grossly misleading regarding the real distribution. Default value: 10.0. This option can be set to 'null' to clear the default value." + }, + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/histogram_width", + "type": [ + "null", + "int" + ], + "inputBinding": { + "position": 0, + "prefix": "--HISTOGRAM_WIDTH" + }, + "doc": "Explicitly sets the Histogram width, overriding automatic truncation of Histogram tail. Also, when calculating mean and standard deviation, only bins <= Histogram_WIDTH will be included. Default value: null." + }, + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/minimum_pct", + "type": [ + "null", + "float" + ], + "inputBinding": { + "position": 0, + "prefix": "--MINIMUM_PCT" + }, + "doc": "When generating the Histogram, discard any data categories (out of FR, TANDEM, RF) that have fewer than this percentage of overall reads. (Range: 0 to 1). Default value: 0.05. This option can be set to 'null' to clear the default value." + }, + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/metrics_acciumulation_level", + "type": [ + "null", + "string" + ], + "inputBinding": { + "position": 0, + "prefix": "--METRIC_ACCUMULATION_LEVEL" + }, + "doc": "The level(s) at which to accumulate metrics. Default value: [ALL_READS]. This option can be set to 'null' to clear the default value. Possible values: {ALL_READS, SAMPLE, LIBRARY, READ_GROUP} This option may be specified 0 or more times. This option can be set to 'null' to clear the default list." + }, + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/include_duplicates", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--INCLUDE_DUPLICATES" + }, + "doc": "If true, also include reads marked as duplicates in the insert size histogram. Default value: false. This option can be set to 'null' to clear the default value. Possible values: {true, false}" + }, + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/validation_stringency", + "type": [ + "null", + "string" + ], + "inputBinding": { + "position": 0, + "prefix": "--VALIDATION_STRINGENCY" + }, + "doc": "Validation stringency for all SAM files read by this program. Setting stringency to SILENT can improve performance when processing a BAM file in which variable-length data (read, qualities, tags) do not otherwise need to be decoded. Default value: STRICT. This option can be set to 'null' to clear the default value. Possible values: {STRICT,LENIENT, SILENT}" + }, + { + "default": true, + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/assume_sorted", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--ASSUME_SORTED" + }, + "doc": "If true (default), then the sort order in the header file will be ignored. Default value: true. This option can be set to 'null' to clear the default value. Possible values: {true, false}" + }, + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/stop_after", + "type": [ + "null", + "int" + ], + "inputBinding": { + "position": 0, + "prefix": "--STOP_AFTER" + }, + "doc": "Stop after processing N reads, mainly for debugging. Default value: 0. This option can be set to 'null' to clear the default value." + }, + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/create_index", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--CREATE_INDEX" + }, + "doc": "Whether to create a BAM index when writing a coordinate-sorted BAM file. Default value: false. Possible values: {true, false}" + }, + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/create_md5_file", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--CREATE_MD5_FILE" + }, + "doc": "Whether to create an MD5 digest for any BAM or FASTQ files created. Default value: false. Possible values: {true, false}" + }, + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/use_jdk_deflater", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--USE_JDK_DEFLATER" + }, + "doc": "Use the JDK Deflater instead of the Intel Deflater for writing compressed output" + }, + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/use_jdk_inflater", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--USE_JDK_INFLATER" + }, + "doc": "Use the JDK Inflater instead of the Intel Inflater for reading compressed input" + }, + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/temporary_directory", + "type": [ + "null", + "string" + ], + "doc": "Default value: null. This option may be specified 0 or more times." + } + ], + "outputs": [ + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/gatk_collect_insert_size_metrics_txt", + "type": "File", + "outputBinding": { + "glob": "${\n if(inputs.output_file_name){\n return inputs.output_file_name\n } else {\n return inputs.input.basename.replace(/.bam/, '_insert_size_metrics.txt')\n }\n}" + } + }, + { + "id": "#gatk_collect_insert_size_metrics_4.1.8.0.cwl/gatk_collect_insert_size_metrics_histogram_pdf", + "type": "File", + "outputBinding": { + "glob": "${\n if(inputs.histogram_file){\n return inputs.histogram_file\n } else {\n return inputs.input.basename.replace(/.bam/, '_histogram.pdf')\n }\n}" + } + } + ], + "label": "GATK-CollectInsertSizeMetrics", + "arguments": [ + { + "position": 0, + "prefix": "--java-options", + "valueFrom": "${\n if(inputs.memory_per_job && inputs.memory_overhead) {\n if(inputs.memory_per_job % 1000 == 0) {\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\"\n }\n }\n else if (inputs.memory_per_job && !inputs.memory_overhead){\n if(inputs.memory_per_job % 1000 == 0) {\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\"\n }\n }\n else if(!inputs.memory_per_job && inputs.memory_overhead){\n return \"-Xmx15G\"\n }\n else {\n return \"-Xmx15G\"\n }\n}" + }, + { + "position": 0, + "prefix": "--TMP_DIR", + "valueFrom": "${\n if(inputs.temporary_directory)\n return inputs.temporary_directory;\n return runtime.tmpdir\n}" + }, + { + "position": 2, + "prefix": "-O", + "valueFrom": "${\n if(inputs.output_file_name){\n return inputs.output_file_name\n } else {\n return inputs.input.basename.replace(/.bam/, '_insert_size_metrics.txt')\n }\n}" + }, + { + "position": 2, + "prefix": "-H", + "valueFrom": "${\n if(inputs.histogram_file){\n return inputs.histogram_file\n } else {\n return inputs.input.basename.replace(/.bam/, '_histogram.pdf')\n }\n}" + } + ], + "requirements": [ + { + "class": "ResourceRequirement", + "ramMin": 32000, + "coresMin": 1 + }, + { + "class": "DockerRequirement", + "dockerPull": "ghcr.io/msk-access/gatk:4.1.8.0" + }, + { + "class": "InlineJavascriptRequirement" + } + ], + "http://purl.org/dc/terms/contributor": [ + { + "class": "http://xmlns.com/foaf/0.1/Organization", + "http://xmlns.com/foaf/0.1/member": [ + { + "class": "http://xmlns.com/foaf/0.1/Person", + "http://xmlns.com/foaf/0.1/mbox": "mailto:murphyc4@mskcc.org", + "http://xmlns.com/foaf/0.1/name": "Charles Murphy" + } + ], + "http://xmlns.com/foaf/0.1/name": "Memorial Sloan Kettering Cancer Center" + } + ], + "http://purl.org/dc/terms/creator": [ + { + "class": "http://xmlns.com/foaf/0.1/Organization", + "http://xmlns.com/foaf/0.1/member": [ + { + "class": "http://xmlns.com/foaf/0.1/Person", + "http://xmlns.com/foaf/0.1/mbox": "mailto:murphyc4@mskcc.org", + "http://xmlns.com/foaf/0.1/name": "Charles Murphy" + } + ], + "http://xmlns.com/foaf/0.1/name": "Memorial Sloan Kettering Cancer Center" + } + ], + "http://usefulinc.com/ns/doap#release": [ + { + "class": "http://usefulinc.com/ns/doap#Version", + "http://usefulinc.com/ns/doap#name": "gatk4", + "http://usefulinc.com/ns/doap#revision": "4.1.8.0" + } + ] + }, + { + "class": "CommandLineTool", + "id": "#gatk_mean_quality_by_cycle_4.1.8.0.cwl", + "baseCommand": [ + "gatk", + "MeanQualityByCycle" + ], + "inputs": [ + { + "id": "#gatk_mean_quality_by_cycle_4.1.8.0.cwl/input", + "type": "File", + "inputBinding": { + "position": 0, + "prefix": "-I" + }, + "doc": "An aligned SAM or BAM file. Required." + }, + { + "id": "#gatk_mean_quality_by_cycle_4.1.8.0.cwl/output_file_name", + "type": [ + "null", + "string" + ], + "doc": "The output file to write the metrics to." + }, + { + "id": "#gatk_mean_quality_by_cycle_4.1.8.0.cwl/chart_output", + "type": [ + "null", + "string" + ], + "doc": "A file (with .pdf extension) to write the chart to." + }, + { + "id": "#gatk_mean_quality_by_cycle_4.1.8.0.cwl/assume_sorted", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 1, + "prefix": "--ASSUME_SORTED" + }, + "doc": "If true (default), then the sort order in the header file will be ignored.\n" + }, + { + "id": "#gatk_mean_quality_by_cycle_4.1.8.0.cwl/pf_reads_only", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 1, + "prefix": "--PF_READS_ONLY" + }, + "doc": "If set to true calculate mean quality over PF reads only. Default value: false. Possible values: {true, false}\n" + }, + { + "id": "#gatk_mean_quality_by_cycle_4.1.8.0.cwl/reference", + "type": [ + "null", + "File" + ], + "inputBinding": { + "position": 0, + "prefix": "-R" + }, + "doc": "Reference sequence file. Note that while this argument is not required, without it only a small subset of the metrics will be calculated. Note also that if a reference sequence is provided, it must be accompanied by a sequence dictionary. Default value: null.", + "secondaryFiles": [ + "^.fasta.fai", + "^.dict" + ] + }, + { + "id": "#gatk_mean_quality_by_cycle_4.1.8.0.cwl/validation_stringency", + "type": [ + "null", + "string" + ], + "inputBinding": { + "position": 0, + "prefix": "--VALIDATION_STRINGENCY" + }, + "doc": "Validation stringency for all SAM files read by this program. Setting stringency to SILENT can improve performance when processing a BAM file in which variable-length data (read, qualities, tags) do not otherwise need to be decoded. Default value: STRICT. This option can be set to 'null' to clear the default value. Possible values: {STRICT,LENIENT, SILENT}" + }, + { + "id": "#gatk_mean_quality_by_cycle_4.1.8.0.cwl/create_index", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--CREATE_INDEX" + }, + "doc": "Whether to create a BAM index when writing a coordinate-sorted BAM file. Default value: false. Possible values: {true, false}" + }, + { + "id": "#gatk_mean_quality_by_cycle_4.1.8.0.cwl/create_md5_file", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--CREATE_MD5_FILE" + }, + "doc": "Whether to create an MD5 digest for any BAM or FASTQ files created. Default value: false. Possible values: {true, false}" + }, + { + "id": "#gatk_mean_quality_by_cycle_4.1.8.0.cwl/memory_per_job", + "type": [ + "null", + "int" + ], + "doc": "Memory per job in megabytes" + }, + { + "id": "#gatk_mean_quality_by_cycle_4.1.8.0.cwl/memory_overhead", + "type": [ + "null", + "int" + ], + "doc": "Memory overhead per job in megabytes" + }, + { + "id": "#gatk_mean_quality_by_cycle_4.1.8.0.cwl/number_of_threads", + "type": [ + "null", + "int" + ] + }, + { + "id": "#gatk_mean_quality_by_cycle_4.1.8.0.cwl/temporary_directory", + "type": [ + "null", + "string" + ], + "doc": "Directory with space available to be used by this program for temporary storage of working files." + } + ], + "outputs": [ + { + "id": "#gatk_mean_quality_by_cycle_4.1.8.0.cwl/gatk_mean_quality_by_cycle_output", + "type": "File", + "outputBinding": { + "glob": "${\n if(inputs.output_file_name){\n return inputs.output_file_name\n } else {\n return inputs.input.basename.replace(/.bam/, '_mean_quality_by_cycle.txt')\n }\n}" + } + }, + { + "id": "#gatk_mean_quality_by_cycle_4.1.8.0.cwl/gatk_mean_quality_by_cycle_chart_output", + "type": "File", + "outputBinding": { + "glob": "${\n if(inputs.chart_output){\n return inputs.chart_output\n } else {\n return inputs.input.basename.replace(/.bam/, '_mean_quality_by_cycle.pdf')\n }\n}" + } + } + ], + "label": "GATK-MeanQualityByCycle", + "arguments": [ + { + "position": 0, + "prefix": "--java-options", + "valueFrom": "${\n if(inputs.memory_per_job && inputs.memory_overhead) {\n if(inputs.memory_per_job % 1000 == 0) {\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\"\n }\n }\n else if (inputs.memory_per_job && !inputs.memory_overhead){\n if(inputs.memory_per_job % 1000 == 0) {\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\"\n }\n }\n else if(!inputs.memory_per_job && inputs.memory_overhead){\n return \"-Xmx14G\"\n }\n else {\n return \"-Xmx14G\"\n }\n}" + }, + { + "position": 0, + "prefix": "--TMP_DIR", + "valueFrom": "${\n if(inputs.temporary_directory) {\n return inputs.temporary_directory;\n }\n return runtime.tmpdir;\n}" + }, + { + "position": 0, + "prefix": "-O", + "valueFrom": "${\n if(inputs.output_file_name){\n return inputs.output_file_name\n } else {\n return inputs.input.basename.replace(/.bam/, '_mean_quality_by_cycle.txt')\n }\n}" + }, + { + "position": 0, + "prefix": "--CHART_OUTPUT", + "valueFrom": "${\n if(inputs.chart_output){\n return inputs.chart_output\n } else {\n return inputs.input.basename.replace(/.bam/, '_mean_quality_by_cycle.pdf')\n }\n}" + } + ], + "requirements": [ + { + "class": "ResourceRequirement", + "ramMin": 16000, + "coresMin": 2 + }, + { + "class": "DockerRequirement", + "dockerPull": "ghcr.io/msk-access/gatk:4.1.8.0" + }, + { + "class": "InlineJavascriptRequirement" + } + ], + "http://purl.org/dc/terms/contributor": [ + { + "class": "http://xmlns.com/foaf/0.1/Organization", + "http://xmlns.com/foaf/0.1/member": [ + { + "class": "http://xmlns.com/foaf/0.1/Person", + "http://xmlns.com/foaf/0.1/mbox": "mailto:murphyc4@mskcc.org", + "http://xmlns.com/foaf/0.1/name": "Charles Murphy" + } + ], + "http://xmlns.com/foaf/0.1/name": "Memorial Sloan Kettering Cancer Center" + } + ], + "http://purl.org/dc/terms/creator": [ + { + "class": "http://xmlns.com/foaf/0.1/Organization", + "http://xmlns.com/foaf/0.1/member": [ + { + "class": "http://xmlns.com/foaf/0.1/Person", + "http://xmlns.com/foaf/0.1/mbox": "mailto:murphyc4@mskcc.org", + "http://xmlns.com/foaf/0.1/name": "Charles Murphy" + } + ], + "http://xmlns.com/foaf/0.1/name": "Memorial Sloan Kettering Cancer Center" + } + ], + "http://usefulinc.com/ns/doap#release": [ + { + "class": "http://usefulinc.com/ns/doap#Version", + "http://usefulinc.com/ns/doap#name": "gatk4", + "http://usefulinc.com/ns/doap#revision": "4.1.8.0" + } + ] + }, + { + "class": "CommandLineTool", + "id": "#gatk_revert_sam_4.1.8.0.cwl", + "baseCommand": [ + "gatk", + "RevertSam" + ], + "inputs": [ + { + "id": "#gatk_revert_sam_4.1.8.0.cwl/input", + "type": "File", + "inputBinding": { + "position": 0, + "prefix": "-I" + }, + "doc": "An aligned SAM or BAM file. Required." + }, + { + "id": "#gatk_revert_sam_4.1.8.0.cwl/output", + "type": [ + "null", + "string" + ], + "doc": "The output SAM/BAM file to create, or an output directory if OUTPUT_BY_READGROUP is true. Required. Cannot be used in conjunction with argument(s) OUTPUT_MAP (OM)" + }, + { + "id": "#gatk_revert_sam_4.1.8.0.cwl/output_map", + "type": [ + "null", + "string" + ], + "doc": "Tab separated file with two columns, READ_GROUP_ID and OUTPUT, providing file mapping only used if OUTPUT_BY_READGROUP is true. Required. Cannot be used in conjunction with argument(s) OUTPUT (O)" + }, + { + "id": "#gatk_revert_sam_4.1.8.0.cwl/attribute_to_clear", + "type": [ + "null", + { + "type": "array", + "items": "string", + "inputBinding": { + "position": 0, + "prefix": "--ATTRIBUTE_TO_CLEAR" + } + } + ], + "doc": "When removing alignment information, the set of optional tags to remove. This may be specified 0 or more times. Default value: [NM, UQ, PG, MD, MQ, SA, MC, AS]." + }, + { + "id": "#gatk_revert_sam_4.1.8.0.cwl/max_discard_fraction", + "type": [ + "null", + "float" + ], + "inputBinding": { + "position": 0, + "prefix": "--MAX_DISCARD_FRACTION" + }, + "doc": "If SANITIZE=true and higher than MAX_DISCARD_FRACTION reads are discarded due to sanitization thenthe program will exit with an Exception instead of exiting cleanly. Output BAM will still be valid. Default value: 0.01." + }, + { + "id": "#gatk_revert_sam_4.1.8.0.cwl/library_name", + "type": [ + "null", + "string" + ], + "inputBinding": { + "position": 0, + "prefix": "--LIBRARY_NAME" + }, + "doc": "The library name to use in the reverted output file. This will override the existing sample alias in the file and is used only if all the read groups in the input file have the same library name. Default value: null." + }, + { + "id": "#gatk_revert_sam_4.1.8.0.cwl/max_records_in_ram", + "type": [ + "null", + "int" + ], + "inputBinding": { + "position": 0, + "prefix": "--MAX_RECORDS_IN_RAM" + }, + "doc": "When writing files that need to be sorted, this will specify the number of records stored in RAM before spilling to disk. Increasing this number reduces the number of file handles needed to sort the file, and increases the amount of RAM needed. Default value: 500000." + }, + { + "id": "#gatk_revert_sam_4.1.8.0.cwl/output_by_readgroup", + "type": [ + "null", + "string" + ], + "default": "false", + "inputBinding": { + "position": 0, + "prefix": "--OUTPUT_BY_READGROUP" + }, + "doc": "When true, outputs each read group in a separate file. Default value: false. Possible values: {true, false}" + }, + { + "id": "#gatk_revert_sam_4.1.8.0.cwl/output_by_readgroup_file_format", + "type": [ + "null", + "string" + ], + "inputBinding": { + "position": 0, + "prefix": "--OUTPUT_BY_READGROUP_FILE_FORMAT" + }, + "doc": "When using OUTPUT_BY_READGROUP, the output file format can be set to a certain format. Default value: dynamic. sam (Generate SAM files.) bam (Generate BAM files.) cram (Generate CRAM files.) dynamic (Generate files based on the extention of INPUT.)" + }, + { + "id": "#gatk_revert_sam_4.1.8.0.cwl/remove_alignment_information", + "type": [ + "null", + "string" + ], + "default": "true", + "inputBinding": { + "position": 0, + "prefix": "--REMOVE_ALIGNMENT_INFORMATION" + }, + "doc": "Remove all alignment information from the file. Default value: true. Possible values: {true, false}" + }, + { + "id": "#gatk_revert_sam_4.1.8.0.cwl/remove_duplicate_information", + "type": [ + "null", + "string" + ], + "default": "true", + "inputBinding": { + "position": 1, + "prefix": "--REMOVE_DUPLICATE_INFORMATION" + }, + "doc": "Remove duplicate read flags from all reads. Note that if this is false and\nREMOVE_ALIGNMENT_INFORMATION==true, the output may have the unusual but sometimes\ndesirable trait of having unmapped reads that are marked as duplicates. Default value:\ntrue. Possible values: {true, false}\n" + }, + { + "id": "#gatk_revert_sam_4.1.8.0.cwl/restore_hardclips", + "type": [ + "null", + "string" + ], + "default": "true", + "inputBinding": { + "position": 0, + "prefix": "--RESTORE_HARDCLIPS" + }, + "doc": "When true, restores reads and qualities of records with hard-clips containing XB and XQ tags. Default value: true. Possible values: {true, false}" + }, + { + "id": "#gatk_revert_sam_4.1.8.0.cwl/restore_original_qualities", + "type": [ + "null", + "string" + ], + "default": "true", + "inputBinding": { + "position": 1, + "prefix": "--RESTORE_ORIGINAL_QUALITIES" + }, + "doc": "True to restore original qualities from the OQ field to the QUAL field if available. Default value: true. Possible values: {true, false}\n" + }, + { + "id": "#gatk_revert_sam_4.1.8.0.cwl/sample_alias", + "type": [ + "null", + "string" + ], + "inputBinding": { + "position": 1, + "prefix": "--SAMPLE_ALIAS" + }, + "doc": "The sample alias to use in the reverted output file. This will override the existing\nsample alias in the file and is used only if all the read groups in the input file have\nthe same sample alias. Default value: null.\n" + }, + { + "id": "#gatk_revert_sam_4.1.8.0.cwl/sanitize", + "type": [ + "null", + "string" + ], + "default": "false", + "inputBinding": { + "position": 1, + "prefix": "--SANITIZE" + }, + "doc": "WARNING: This option is potentially destructive. If enabled will discard reads in order to\nproduce a consistent output BAM. Reads discarded include (but are not limited to) paired\nreads with missing mates, duplicated records, records with mismatches in length of bases\nand qualities. This option can only be enabled if the output sort order is queryname and\nwill always cause sorting to occur. Default value: false. Possible values: {true, false}\n" + }, + { + "id": "#gatk_revert_sam_4.1.8.0.cwl/sort_order", + "type": [ + "null", + "string" + ], + "inputBinding": { + "position": 1, + "prefix": "--SORT_ORDER" + }, + "doc": "The sort order to create the reverted output file with. Default value: queryname. Possible values: {unsorted, queryname, coordinate, duplicate, unknown}\n" + }, + { + "id": "#gatk_revert_sam_4.1.8.0.cwl/reference", + "type": [ + "null", + "File" + ], + "inputBinding": { + "position": 0, + "prefix": "-R" + }, + "doc": "Reference sequence file. Note that while this argument is not required, without it only a small subset of the metrics will be calculated. Note also that if a reference sequence is provided, it must be accompanied by a sequence dictionary. Default value: null.", + "secondaryFiles": [ + "^.fasta.fai", + "^.dict" + ] + }, + { + "id": "#gatk_revert_sam_4.1.8.0.cwl/validation_stringency", + "type": [ + "null", + "string" + ], + "inputBinding": { + "position": 0, + "prefix": "--VALIDATION_STRINGENCY" + }, + "doc": "Validation stringency for all SAM files read by this program. Setting stringency to SILENT can improve performance when processing a BAM file in which variable-length data (read, qualities, tags) do not otherwise need to be decoded. Default value: STRICT. This option can be set to 'null' to clear the default value. Possible values: {STRICT,LENIENT, SILENT}" + }, + { + "id": "#gatk_revert_sam_4.1.8.0.cwl/compression_level", + "type": [ + "null", + "int" + ], + "inputBinding": { + "position": 0, + "prefix": "--COMPRESSION_LEVEL" + }, + "doc": "Compression level for all compressed files created (e.g. BAM and VCF). Default value: 2." + }, + { + "id": "#gatk_revert_sam_4.1.8.0.cwl/create_index", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--CREATE_INDEX" + }, + "doc": "Whether to create a BAM index when writing a coordinate-sorted BAM file. Default value: false. Possible values: {true, false}" + }, + { + "id": "#gatk_revert_sam_4.1.8.0.cwl/create_md5_file", + "type": [ + "null", + "boolean" + ], + "inputBinding": { + "position": 0, + "prefix": "--CREATE_MD5_FILE" + }, + "doc": "Whether to create an MD5 digest for any BAM or FASTQ files created. Default value: false. Possible values: {true, false}" + }, + { + "id": "#gatk_revert_sam_4.1.8.0.cwl/memory_per_job", + "type": [ + "null", + "int" + ], + "doc": "Memory per job in megabytes" + }, + { + "id": "#gatk_revert_sam_4.1.8.0.cwl/memory_overhead", + "type": [ + "null", + "int" + ], + "doc": "Memory overhead per job in megabytes" + }, + { + "id": "#gatk_revert_sam_4.1.8.0.cwl/number_of_threads", + "type": [ + "null", + "int" + ] + }, + { + "id": "#gatk_revert_sam_4.1.8.0.cwl/temporary_directory", + "type": [ + "null", + "string" + ], + "doc": "Default value: null. This option may be specified 0 or more times." + } + ], + "outputs": [ + { + "id": "#gatk_revert_sam_4.1.8.0.cwl/gatk_revert_sam_output", + "type": "File", + "outputBinding": { + "glob": "${\n if(inputs.output){\n return inputs.output\n } else {\n return inputs.input.basename.replace(/.bam|.sam/, '_revertsam.bam')\n }\n}" + } + }, + { + "id": "#gatk_revert_sam_4.1.8.0.cwl/gatk_revert_sam_output_map", + "type": [ + "null", + "File" + ], + "outputBinding": { + "glob": "${\n if(inputs.output_map){\n return inputs.output_map\n } else {\n return inputs.input.basename.replace(/.bam|.sam/, '_revertsam.tsv')\n }\n}" + } + } + ], + "label": "GATK-RevertSam", + "arguments": [ + { + "position": 0, + "prefix": "--java-options", + "valueFrom": "${\n if(inputs.memory_per_job && inputs.memory_overhead) {\n if(inputs.memory_per_job % 1000 == 0) {\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\"\n }\n }\n else if (inputs.memory_per_job && !inputs.memory_overhead){\n if(inputs.memory_per_job % 1000 == 0) {\n return \"-Xmx\" + (inputs.memory_per_job/1000).toString() + \"G\"\n }\n else {\n return \"-Xmx\" + Math.floor((inputs.memory_per_job/1000)).toString() + \"G\"\n }\n }\n else if(!inputs.memory_per_job && inputs.memory_overhead){\n return \"-Xmx15G\"\n }\n else {\n return \"-Xmx15G\"\n }\n}" + }, + { + "position": 0, + "prefix": "--TMP_DIR", + "valueFrom": "${\n if(inputs.temporary_directory)\n return inputs.temporary_directory;\n return runtime.tmpdir\n}" + }, + { + "position": 0, + "prefix": "-O", + "valueFrom": "${\n if(inputs.output){\n return inputs.output;\n } else if (inputs.output_map) {\n return null;\n } else {\n return inputs.input.basename.replace(/.bam|.sam/, '_revertsam.bam');\n }\n}" + }, + { + "position": 0, + "prefix": "-OM", + "valueFrom": "${\n if(inputs.output_map){\n return inputs.output_map;\n } else {\n return null;\n }\n}" + } + ], + "requirements": [ + { + "class": "ResourceRequirement", + "ramMin": 17000, + "coresMin": 2 + }, + { + "class": "DockerRequirement", + "dockerPull": "ghcr.io/msk-access/gatk:4.1.8.0" + }, + { + "class": "InlineJavascriptRequirement" + } + ], + "http://purl.org/dc/terms/contributor": [ + { + "class": "http://xmlns.com/foaf/0.1/Organization", + "http://xmlns.com/foaf/0.1/member": [ + { + "class": "http://xmlns.com/foaf/0.1/Person", + "http://xmlns.com/foaf/0.1/mbox": "mailto:murphyc4@mskcc.org", + "http://xmlns.com/foaf/0.1/name": "Charles Murphy" + } + ], + "http://xmlns.com/foaf/0.1/name": "Memorial Sloan Kettering Cancer Center" + } + ], + "http://purl.org/dc/terms/creator": [ + { + "class": "http://xmlns.com/foaf/0.1/Organization", + "http://xmlns.com/foaf/0.1/member": [ + { + "class": "http://xmlns.com/foaf/0.1/Person", + "http://xmlns.com/foaf/0.1/mbox": "mailto:murphyc4@mskcc.org", + "http://xmlns.com/foaf/0.1/name": "Charles Murphy" + } + ], + "http://xmlns.com/foaf/0.1/name": "Memorial Sloan Kettering Cancer Center" + } + ], + "http://usefulinc.com/ns/doap#release": [ + { + "class": "http://usefulinc.com/ns/doap#Version", + "http://usefulinc.com/ns/doap#name": "gatk4", + "http://usefulinc.com/ns/doap#revision": "4.1.8.0" + } + ] + }, + { + "class": "Workflow", + "id": "#main", + "label": "qc_uncollapsed_bam", + "inputs": [ + { + "id": "#reference", + "type": "File", + "secondaryFiles": [ + "^.fasta.fai", + "^.dict" + ], + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 267.2265625 + }, + { + "id": "#uncollapsed_bam_base_recal", + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "label": "uncollapsed_bam_base_recal", + "secondaryFiles": [ + "^.bai" + ], + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 160.3359375 + }, + { + "id": "#pool_b_target_intervals", + "type": "File", + "label": "pool_b_target_intervals", + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 374.1171875 + }, + { + "id": "#pool_b_bait_intervals", + "type": "File", + "label": "pool_b_bait_intervals", + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 481.0078125 + }, + { + "id": "#pool_a_bait_intervals", + "type": "File", + "label": "pool_a_bait_intervals", + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 694.7890625 + }, + { + "id": "#pool_a_target_intervals", + "type": "File", + "label": "pool_a_target_intervals", + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 587.8984375 + }, + { + "id": "#hsmetrics_minimum_mapping_quality", + "type": [ + "null", + "int" + ], + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 801.6796875 + }, + { + "id": "#hsmetrics_minimum_base_quality", + "type": [ + "null", + "int" + ], + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 908.5703125 + }, + { + "id": "#hsmetrics_coverage_cap", + "type": [ + "null", + "int" + ], + "https://www.sevenbridges.com/x": 0, + "https://www.sevenbridges.com/y": 1015.4609375 + } + ], + "outputs": [ + { + "id": "#gatk_collect_alignment_summary_metrics_txt_pool_b", + "outputSource": [ + "#bam_qc_stats_pool_b/gatk_collect_alignment_summary_metrics_txt" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "label": "gatk_collect_alignment_summary_metrics_txt_pool_b", + "https://www.sevenbridges.com/x": 1369.4512939453125, + "https://www.sevenbridges.com/y": 1068.90625 + }, + { + "id": "#gatk_collect_hs_metrics_per_base_coverage_txt_pool_b", + "outputSource": [ + "#bam_qc_stats_pool_b/gatk_collect_hs_metrics_per_base_coverage_txt" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "label": "gatk_collect_hs_metrics_per_base_coverage_txt_pool_b", + "https://www.sevenbridges.com/x": 1369.4512939453125, + "https://www.sevenbridges.com/y": 855.125 + }, + { + "id": "#gatk_collect_hs_metrics_per_target_coverage_txt_pool_b", + "outputSource": [ + "#bam_qc_stats_pool_b/gatk_collect_hs_metrics_per_target_coverage_txt" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "label": "gatk_collect_hs_metrics_per_target_coverage_txt_pool_b", + "https://www.sevenbridges.com/x": 1369.4512939453125, + "https://www.sevenbridges.com/y": 641.34375 + }, + { + "id": "#gatk_collect_hs_metrics_txt_pool_b", + "outputSource": [ + "#bam_qc_stats_pool_b/gatk_collect_hs_metrics_txt" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "label": "gatk_collect_hs_metrics_txt_pool_b", + "https://www.sevenbridges.com/x": 1369.4512939453125, + "https://www.sevenbridges.com/y": 427.5625 + }, + { + "id": "#gatk_collect_insert_size_metrics_histogram_pdf_pool_b", + "outputSource": [ + "#bam_qc_stats_pool_b/gatk_collect_insert_size_metrics_histogram_pdf" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "label": "gatk_collect_insert_size_metrics_histogram_pdf_pool_b", + "https://www.sevenbridges.com/x": 1369.4512939453125, + "https://www.sevenbridges.com/y": 213.78125 + }, + { + "id": "#gatk_collect_insert_size_metrics_txt_pool_b", + "outputSource": [ + "#bam_qc_stats_pool_b/gatk_collect_insert_size_metrics_txt" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "label": "gatk_collect_insert_size_metrics_txt_pool_b", + "https://www.sevenbridges.com/x": 1369.4512939453125, + "https://www.sevenbridges.com/y": 0 + }, + { + "id": "#gatk_collect_alignment_summary_metrics_txt_pool_a", + "outputSource": [ + "#bam_qc_stats_pool_a/gatk_collect_alignment_summary_metrics_txt" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "label": "gatk_collect_alignment_summary_metrics_txt_pool_a", + "https://www.sevenbridges.com/x": 1369.4512939453125, + "https://www.sevenbridges.com/y": 1175.796875 + }, + { + "id": "#gatk_collect_hs_metrics_per_base_coverage_txt_pool_a", + "outputSource": [ + "#bam_qc_stats_pool_a/gatk_collect_hs_metrics_per_base_coverage_txt" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "label": "gatk_collect_hs_metrics_per_base_coverage_txt_pool_a", + "https://www.sevenbridges.com/x": 1369.4512939453125, + "https://www.sevenbridges.com/y": 962.015625 + }, + { + "id": "#gatk_collect_hs_metrics_per_target_coverage_txt_pool_a", + "outputSource": [ + "#bam_qc_stats_pool_a/gatk_collect_hs_metrics_per_target_coverage_txt" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "label": "gatk_collect_hs_metrics_per_target_coverage_txt_pool_a", + "https://www.sevenbridges.com/x": 1369.4512939453125, + "https://www.sevenbridges.com/y": 748.234375 + }, + { + "id": "#gatk_collect_hs_metrics_txt_pool_a", + "outputSource": [ + "#bam_qc_stats_pool_a/gatk_collect_hs_metrics_txt" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "label": "gatk_collect_hs_metrics_txt_pool_a", + "https://www.sevenbridges.com/x": 1369.4512939453125, + "https://www.sevenbridges.com/y": 534.453125 + }, + { + "id": "#gatk_collect_insert_size_metrics_histogram_pdf_pool_a", + "outputSource": [ + "#bam_qc_stats_pool_a/gatk_collect_insert_size_metrics_histogram_pdf" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "label": "gatk_collect_insert_size_metrics_histogram_pdf_pool_a", + "https://www.sevenbridges.com/x": 1369.4512939453125, + "https://www.sevenbridges.com/y": 320.671875 + }, + { + "id": "#gatk_collect_insert_size_metrics_txt_pool_a", + "outputSource": [ + "#bam_qc_stats_pool_a/gatk_collect_insert_size_metrics_txt" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "label": "gatk_collect_insert_size_metrics_txt_pool_a", + "https://www.sevenbridges.com/x": 1369.4512939453125, + "https://www.sevenbridges.com/y": 106.890625 + }, + { + "id": "#gatk_mean_quality_by_cycle_output_base_recal", + "outputSource": [ + "#gatk_mean_quality_by_cycle_4_1_8_1/gatk_mean_quality_by_cycle_output" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "label": "gatk_mean_quality_by_cycle_output_base_recal", + "https://www.sevenbridges.com/x": 738.7452392578125, + "https://www.sevenbridges.com/y": 343.5625 + }, + { + "id": "#gatk_mean_quality_by_cycle_chart_output_base_recal", + "outputSource": [ + "#gatk_mean_quality_by_cycle_4_1_8_1/gatk_mean_quality_by_cycle_chart_output" + ], + "type": [ + "File", + { + "type": "array", + "items": "File" + } + ], + "label": "gatk_mean_quality_by_cycle_chart_output_base_recal", + "https://www.sevenbridges.com/x": 738.7452392578125, + "https://www.sevenbridges.com/y": 450.453125 + } + ], + "steps": [ + { + "id": "#bam_qc_stats_pool_a", + "in": [ + { + "id": "#bam_qc_stats_pool_a/input", + "source": [ + "#gatk_revert_sam_4_1_8_0/gatk_revert_sam_output" + ] + }, + { + "id": "#bam_qc_stats_pool_a/target_intervals", + "source": "#pool_a_target_intervals" + }, + { + "id": "#bam_qc_stats_pool_a/bait_intervals", + "source": "#pool_a_bait_intervals" + }, + { + "id": "#bam_qc_stats_pool_a/reference", + "source": "#reference" + }, + { + "id": "#bam_qc_stats_pool_a/hsmetrics_minimum_mapping_quality", + "source": "#hsmetrics_minimum_mapping_quality" + }, + { + "id": "#bam_qc_stats_pool_a/hsmetrics_minimum_base_quality", + "source": "#hsmetrics_minimum_base_quality" + }, + { + "id": "#bam_qc_stats_pool_a/hsmetrics_coverage_cap", + "source": "#hsmetrics_coverage_cap" + } + ], + "out": [ + { + "id": "#bam_qc_stats_pool_a/gatk_collect_insert_size_metrics_histogram_pdf" + }, + { + "id": "#bam_qc_stats_pool_a/gatk_collect_insert_size_metrics_txt" + }, + { + "id": "#bam_qc_stats_pool_a/gatk_collect_hs_metrics_txt" + }, + { + "id": "#bam_qc_stats_pool_a/gatk_collect_hs_metrics_per_base_coverage_txt" + }, + { + "id": "#bam_qc_stats_pool_a/gatk_collect_hs_metrics_per_target_coverage_txt" + }, + { + "id": "#bam_qc_stats_pool_a/gatk_collect_alignment_summary_metrics_txt" + } + ], + "run": "#bam_qc_stats.cwl", + "label": "bam_qc_stats_pool_a", + "https://www.sevenbridges.com/x": 738.7452392578125, + "https://www.sevenbridges.com/y": 790.234375 + }, + { + "id": "#bam_qc_stats_pool_b", + "in": [ + { + "id": "#bam_qc_stats_pool_b/input", + "source": [ + "#gatk_revert_sam_4_1_8_0/gatk_revert_sam_output" + ] + }, + { + "id": "#bam_qc_stats_pool_b/target_intervals", + "source": "#pool_b_target_intervals" + }, + { + "id": "#bam_qc_stats_pool_b/bait_intervals", + "source": "#pool_b_bait_intervals" + }, + { + "id": "#bam_qc_stats_pool_b/reference", + "source": "#reference" + }, + { + "id": "#bam_qc_stats_pool_b/hsmetrics_minimum_mapping_quality", + "source": "#hsmetrics_minimum_mapping_quality" + }, + { + "id": "#bam_qc_stats_pool_b/hsmetrics_minimum_base_quality", + "source": "#hsmetrics_minimum_base_quality" + }, + { + "id": "#bam_qc_stats_pool_b/hsmetrics_coverage_cap", + "source": "#hsmetrics_coverage_cap" + } + ], + "out": [ + { + "id": "#bam_qc_stats_pool_b/gatk_collect_insert_size_metrics_histogram_pdf" + }, + { + "id": "#bam_qc_stats_pool_b/gatk_collect_insert_size_metrics_txt" + }, + { + "id": "#bam_qc_stats_pool_b/gatk_collect_hs_metrics_txt" + }, + { + "id": "#bam_qc_stats_pool_b/gatk_collect_hs_metrics_per_base_coverage_txt" + }, + { + "id": "#bam_qc_stats_pool_b/gatk_collect_hs_metrics_per_target_coverage_txt" + }, + { + "id": "#bam_qc_stats_pool_b/gatk_collect_alignment_summary_metrics_txt" + } + ], + "run": "#bam_qc_stats.cwl", + "label": "bam_qc_stats_pool_b", + "https://www.sevenbridges.com/x": 738.7452392578125, + "https://www.sevenbridges.com/y": 599.34375 + }, + { + "id": "#gatk_mean_quality_by_cycle_4_1_8_1", + "in": [ + { + "id": "#gatk_mean_quality_by_cycle_4_1_8_1/input", + "source": "#uncollapsed_bam_base_recal" + }, + { + "id": "#gatk_mean_quality_by_cycle_4_1_8_1/reference", + "source": "#reference" + } + ], + "out": [ + { + "id": "#gatk_mean_quality_by_cycle_4_1_8_1/gatk_mean_quality_by_cycle_output" + }, + { + "id": "#gatk_mean_quality_by_cycle_4_1_8_1/gatk_mean_quality_by_cycle_chart_output" + } + ], + "run": "#gatk_mean_quality_by_cycle_4.1.8.0.cwl", + "label": "GATK-MeanQualityByCycle_base_recal", + "https://www.sevenbridges.com/x": 351.4375, + "https://www.sevenbridges.com/y": 701.7890625 + }, + { + "id": "#gatk_revert_sam_4_1_8_0", + "in": [ + { + "id": "#gatk_revert_sam_4_1_8_0/input", + "source": "#uncollapsed_bam_base_recal" + }, + { + "id": "#gatk_revert_sam_4_1_8_0/remove_alignment_information", + "default": "false" + }, + { + "id": "#gatk_revert_sam_4_1_8_0/remove_duplicate_information", + "default": "true" + }, + { + "id": "#gatk_revert_sam_4_1_8_0/restore_hardclips", + "default": "false" + }, + { + "id": "#gatk_revert_sam_4_1_8_0/restore_original_qualities", + "default": "false" + }, + { + "id": "#gatk_revert_sam_4_1_8_0/sort_order", + "default": "unsorted" + }, + { + "id": "#gatk_revert_sam_4_1_8_0/validation_stringency", + "default": "SILENT" + } + ], + "out": [ + { + "id": "#gatk_revert_sam_4_1_8_0/gatk_revert_sam_output" + }, + { + "id": "#gatk_revert_sam_4_1_8_0/gatk_revert_sam_output_map" + } + ], + "run": "#gatk_revert_sam_4.1.8.0.cwl", + "label": "GATK-RevertSam", + "https://www.sevenbridges.com/x": 351.4375, + "https://www.sevenbridges.com/y": 580.8984375 + } + ], + "requirements": [ + { + "class": "SubworkflowFeatureRequirement" + } + ] + } + ], + "cwlVersion": "v1.0", + "$schemas": [ + "http://schema.org/version/latest/schemaorg-current-http.rdf" + ] +} \ No newline at end of file