Skip to content

Commit

Permalink
Merge pull request #159 from eastgenomics/IN-384_v2.6.0
Browse files Browse the repository at this point in the history
IN-384_v2.6.0 - Adding variant counts as DNAnexus file details
  • Loading branch information
rklocke authored Dec 18, 2023
2 parents 4b914db + 9cdfcf6 commit b1daf06
Show file tree
Hide file tree
Showing 5 changed files with 49 additions and 12 deletions.
10 changes: 10 additions & 0 deletions Readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,16 @@ This is the source code for an app that runs on the DNAnexus Platform.
For more information about how to run or modify it, see
https://documentation.dnanexus.com/.


## File details
The app will also add `details` metadata, in terms of variant counts, to the output xlsx report DNAnexus file. Example file details if `-isummary=Dias`, `-iclinical_indication=R208.1_Inherited breast cancer and ovarian cancer_P` and an `-ifilter` is provided, with `-ikeep_filtered=True`:
```
"clinical_indication": "R208.1_Inherited breast cancer and ovarian cancer_P",
"included": 10,
"excluded": 255
```
Note: if `-isummary` not Dias, if `-ikeep_filtered=False`, and if `-iclinical_indication` not provided then the only details added to the file would be `"included": 10`. In this case, if no filtering is performed either then only `"variants": 265` would be added as details.

#### This app was made by EMEE GLH

[bcftools]: https://samtools.github.io/bcftools/bcftools.html#filter
Expand Down
4 changes: 2 additions & 2 deletions dxapp.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
"title": "eggd_generate_variant_workbook",
"summary": "Create Excel workbook from VEP annotated vcf",
"dxapi": "1.0.0",
"version": "2.5.0",
"whatsNew": "* v2.0.0 Rewrite of previous app to generate xlsx file from a VEP annotated VCF(s); * v2.0.1 Bug fix to correctly treat CHROM as string values; * v2.0.2 Bug fix for ACMG report template structure; * v2.0.3 Bug fixes for issues with hyperlinks, changed app name to eggd_generate_variant_workbook; * v2.1.0 Handle VCFs from GATK gCNV and Illumina TSO500, readability tweaks to variant sheets; * v2.1.1 Bug fix for typing of numeric values in hyperlinks; * v2.2.0 Added ability to pass in non VCF files (tsvs/csvs and images) to additional sheets, optional adding of links to DECIPHER with --decipher; * v2.3.0 Added conditional colouring of cells in variant sheets, new 'basic' summary sheet; * v2.4.0 Added handling for duplicate annotation in VEP fields (i.e. cosmic, CGC, etc..); v2.5.0 Better parsing of CombinedVariantOutput files as additional files",
"version": "2.6.0",
"whatsNew": "* v2.0.0 Rewrite of previous app to generate xlsx file from a VEP annotated VCF(s); * v2.0.1 Bug fix to correctly treat CHROM as string values; * v2.0.2 Bug fix for ACMG report template structure; * v2.0.3 Bug fixes for issues with hyperlinks, changed app name to eggd_generate_variant_workbook; * v2.1.0 Handle VCFs from GATK gCNV and Illumina TSO500, readability tweaks to variant sheets; * v2.1.1 Bug fix for typing of numeric values in hyperlinks; * v2.2.0 Added ability to pass in non VCF files (tsvs/csvs and images) to additional sheets, optional adding of links to DECIPHER with --decipher; * v2.3.0 Added conditional colouring of cells in variant sheets, new 'basic' summary sheet; * v2.4.0 Added handling for duplicate annotation in VEP fields (i.e. cosmic, CGC, etc..); * v2.5.0 Better parsing of CombinedVariantOutput files as additional files; * v2.6.0 Add variant counts as DNAnexus file details to the .xlsx workbook",
"authorizedUsers": [
"org-emee_1"
],
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
colour==0.1.5
openpyxl==3.0.9
openpyxl==3.1.2
pandas==1.3.5
et-xmlfile==1.1.0
filetype==1.1.0
Expand Down
40 changes: 33 additions & 7 deletions resources/home/dnanexus/generate_workbook/utils/excel.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from collections import defaultdict
import json
import operator
import os
from pathlib import Path
import re
from string import ascii_uppercase as uppercase
Expand Down Expand Up @@ -181,7 +183,7 @@ def helios_summary(self) -> None:

self.summary.cell(9, 1).value = "Variant totals"

to_bold.extend(["A1", "A2", "A4", "A5", "A6", "A9"])
to_bold.extend(["A1", "A2", "A4", "A5", "A6", "A9"])

# get sample name from vcf, should only be one but handle everything
# list-wise just in case
Expand Down Expand Up @@ -228,7 +230,7 @@ def helios_summary(self) -> None:
# not 4 cols => didn't parse out just sample values in
# utils.parse_metrics => skip
continue

# specific metrics lines we want to parse out
idxs = []
idxs.append(df[0].eq('Metric (UOM)').idxmax())
Expand All @@ -249,7 +251,7 @@ def helios_summary(self) -> None:
self.summary.cell(row_count, 2).value = lsl
self.summary.cell(row_count, 3).value = usl
self.summary.cell(row_count, 4).value = sample

# perform colouring like in self.colour_metrics(), lazily
# catch anything in case of weird values to not break
try:
Expand Down Expand Up @@ -278,10 +280,10 @@ def helios_summary(self) -> None:
"WARNING: error in colouring metrics values in "
f"summary sheet: {err}.\nContinuing without colouring"
)

to_bold.append(f"A{row_count}")
row_count += 1

# do the colouring
for colour, idxs in colouring.items():
for idx in idxs:
Expand All @@ -301,7 +303,7 @@ def helios_summary(self) -> None:
start_color='b30000'
)
row_count += 2

# Parsing of TMB/MSI/Gene Amplifications into summary
for _, df in self.additional_files.items():
if df.empty:
Expand All @@ -327,7 +329,7 @@ def helios_summary(self) -> None:
for ref in list(set(self.refs)):
self.summary.cell(row_count, 2).value = ref
row_count += 1

row_count += 2

if self.args.human_filter:
Expand Down Expand Up @@ -374,6 +376,7 @@ def dias_summary(self) -> None:
- sample ID, panel(s), run IDs etc.
- formatted tables for them to fill in reporting
"""
details_dict = defaultdict()
# write titles for summary values
self.summary.cell(1, 1).value = "Sample ID:"
self.summary.cell(1, 5).value = "Clinical Indication(s):"
Expand All @@ -394,6 +397,13 @@ def dias_summary(self) -> None:
self.summary.cell(1, 6).value = self.args.clinical_indication
self.summary.cell(2, 6).value = self.args.panel

# If clinical indication given as arg, add this to our dict
# Write out the dias clinical indication info to JSON file
if self.args.clinical_indication:
details_dict['clinical_indication'] = self.args.clinical_indication
with open('details.json', 'w', encoding='utf8') as details_json:
json.dump(details_dict, details_json)

# write total rows in each sheet
count = 34

Expand All @@ -406,6 +416,7 @@ def dias_summary(self) -> None:
to_bold.append(f"A{count}")
count += 1


count += 5

# write genome reference(s) parsed from vcf header
Expand Down Expand Up @@ -724,6 +735,16 @@ def write_variants(self) -> None:
"this may take a few minutes..."
)

# If details.json already exists (dias summary page written and
# clinical indication is given as arg) then
# open it and read it in so we can add var counts to the dict
# otherwise just make a new empty dict
if os.path.isfile('details.json'):
with open('details.json', 'r', encoding='utf8') as details_json:
details_dict = json.load(details_json)
else:
details_dict = defaultdict()

with self.writer:
# add variants
for sheet, vcf in zip(self.args.sheets, self.vcfs):
Expand All @@ -732,6 +753,7 @@ def write_variants(self) -> None:
f"\nWriting {len(vcf)} rows to {sheet} sheet "
f"({sheet_no}/{len(self.args.sheets)})"
)
details_dict[sheet] = len(vcf)

# timing how long it takes to write because its slow
start = timer()
Expand Down Expand Up @@ -764,6 +786,10 @@ def write_variants(self) -> None:
self.set_types(curr_worksheet)
self.workbook.save(self.args.output)

# Write out dict to file
with open('details.json', 'w', encoding='utf8') as details_json:
json.dump(details_dict, details_json)


def write_additional_files(self) -> None:
"""
Expand Down
5 changes: 3 additions & 2 deletions src/code.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ set -exo pipefail

_dias_report_setup () {
# function to handle parsing values and reading
# manifest / g2t etc. for Dias sampels
# manifest / g2t etc. for Dias samples
mark-section "Getting output name for Dias"

project_id=$DX_PROJECT_CONTEXT_ID
Expand Down Expand Up @@ -122,7 +122,8 @@ main() {
fi

mark-section "Uploading output"
output_xlsx=$(dx upload /home/dnanexus/out/xlsx_reports/* --brief)
JSON_DETAILS=$(cat details.json)
output_xlsx=$(dx upload /home/dnanexus/out/xlsx_reports/* --brief --details "$JSON_DETAILS")
dx-jobutil-add-output xlsx_report "$output_xlsx" --class=file

if [ "$keep_tmp" == true ]; then
Expand Down

0 comments on commit b1daf06

Please sign in to comment.