Skip to content

Commit

Permalink
IN-500 correct AF and order
Browse files Browse the repository at this point in the history
  • Loading branch information
Aisha-D committed Oct 18, 2024
1 parent 8c5c452 commit b1f3201
Showing 1 changed file with 20 additions and 10 deletions.
30 changes: 20 additions & 10 deletions resources/home/dnanexus/generate_workbook/utils/vcf.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ def process(self) -> None:
self.add_additional_columns()

if self.args.af_format == "percent":
self.percent_af()
self.percent_af(self.vcfs)

if self.args.report_text:
self.make_report_text(self.vcfs)
Expand All @@ -191,7 +191,7 @@ def process(self) -> None:
self.drop_columns()

if self.args.reorder:
self.order_columns()
self.order_columns(self.vcfs)

self.vcfs = self.rename_columns(self.vcfs)

Expand Down Expand Up @@ -440,15 +440,19 @@ def read_additional_files(self):
# to get things like split INFO columns and hyperlinks

file_df = splitColumns().split(file_df)
file_df = self.make_report_text([file_df])[0]
if self.args.af_format == "percent":
file_df = self.percent_af([file_df])[0]
if self.args.report_text:
file_df = self.make_report_text([file_df])[0]

file_df = self.format_strings([file_df])[0]
file_df = self.add_hyperlinks([file_df])[0]
file_df = self.percent_af([file_df])[0]

if self.args.exclude or self.args.include:
self.drop_columns([file_df])

self.order_columns([file_df])
if self.args.reorder:
file_df = self.order_columns([file_df])[0]

file_df.columns = self.strip_csq_prefix(file_df)
file_df = self.rename_columns([file_df])[0]
Expand Down Expand Up @@ -735,7 +739,7 @@ def drop_columns(self, vcfs=None) -> None:
vcfs[idx].drop(to_drop, axis=1, inplace=True, errors='ignore')


def order_columns(self) -> None:
def order_columns(self, vcfs) -> None:
"""
Reorder columns by specified order from `--reorder` argument, any not
specified will retain original order after reorder columns
Expand All @@ -746,7 +750,7 @@ def order_columns(self) -> None:
Raised when columns specified with --reorder are not
present in one or more of the dataframes
"""
for idx, vcf in enumerate(self.vcfs):
for idx, vcf in enumerate(vcfs):
vcf_columns = list(vcf.columns)

# check columns given are present in vcf
Expand All @@ -765,7 +769,9 @@ def order_columns(self) -> None:
[vcf_columns.remove(x) for x in self.args.reorder]
column_order = self.args.reorder + vcf_columns

self.vcfs[idx] = vcf[column_order]
vcfs[idx] = vcf[column_order]

return vcfs


def add_additional_columns(self) -> None:
Expand Down Expand Up @@ -953,18 +959,22 @@ def add_raw_change(self) -> None:
'{0[CHROM]}:g.{0[POS]}{0[REF]}>{0[ALT]}'.format, axis=1)


def percent_af(self):
def percent_af(self, vcfs):
"""
Finds the column with "AF" and will convert the number format
to percent
"""
# find the sheets and apply to all sheets
for vcf in self.vcfs:
for idx, vcf in enumerate(vcfs):
if 'AF' not in vcf.columns:
continue
vcf['AF'] = vcf['AF'].astype(np.float16)
vcf['AF'] = vcf['AF'].map(lambda n: '{:,.1%}'.format(n))

vcfs[idx] = vcf

return vcfs


def make_report_text(self, vcfs):
"""
Expand Down

0 comments on commit b1f3201

Please sign in to comment.