Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Behavior to allow different percentile threshold strategies for candidates #1185

Open
wants to merge 12 commits into
base: staging
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
141 changes: 75 additions & 66 deletions pvactools/lib/aggregate_all_epitopes.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,7 @@ def execute(self):
'allele_expr_threshold': self.allele_expr_threshold,
'maximum_transcript_support_level': self.maximum_transcript_support_level,
'percentile_threshold': self.percentile_threshold,
'percentile_threshold_strategy': self.percentile_threshold_strategy,
'use_allele_specific_binding_thresholds': self.use_allele_specific_binding_thresholds,
'mt_top_score_metric': self.mt_top_score_metric,
'wt_top_score_metric': self.wt_top_score_metric,
Expand Down Expand Up @@ -278,6 +279,7 @@ def __init__(
expn_val=1,
maximum_transcript_support_level=1,
percentile_threshold=None,
percentile_threshold_strategy='conservative',
allele_specific_binding_thresholds=False,
top_score_metric="median",
allele_specific_anchors=False,
Expand All @@ -291,6 +293,7 @@ def __init__(
self.binding_threshold = binding_threshold
self.use_allele_specific_binding_thresholds = allele_specific_binding_thresholds
self.percentile_threshold = percentile_threshold
self.percentile_threshold_strategy = percentile_threshold_strategy
self.aggregate_inclusion_binding_threshold = aggregate_inclusion_binding_threshold
self.aggregate_inclusion_count_limit = aggregate_inclusion_count_limit
self.allele_expr_threshold = trna_vaf * expn_val * 10
Expand Down Expand Up @@ -440,6 +443,17 @@ def get_tier(self, mutation, vaf_clonal):
binding_threshold = self.allele_specific_binding_thresholds[mutation['HLA Allele']]
else:
binding_threshold = self.binding_threshold

ic50_pass = mutation["{} MT IC50 Score".format(self.mt_top_score_metric)] < binding_threshold
percentile_pass = (
self.percentile_threshold is None or
mutation["{} MT Percentile".format(self.mt_top_score_metric)] < self.percentile_threshold
)
binding_pass = (
(ic50_pass and percentile_pass)
if self.percentile_threshold_strategy == 'conservative'
else (ic50_pass or percentile_pass)
)

anchor_residue_pass = self.is_anchor_residue_pass(mutation)

Expand All @@ -462,40 +476,28 @@ def get_tier(self, mutation, vaf_clonal):
vaf_clonal_pass = False

#writing these out as explicitly as possible for ease of understanding
if (mutation["{} MT IC50 Score".format(self.mt_top_score_metric)] < binding_threshold and
if (binding_pass and
allele_expr_pass and
vaf_clonal_pass and
tsl_pass and
anchor_residue_pass):
if self.percentile_threshold:
if mutation["{} MT Percentile".format(self.mt_top_score_metric)] < self.percentile_threshold:
return "Pass"
else:
return "Pass"
return "Pass"

#anchor residues
if (mutation["{} MT IC50 Score".format(self.mt_top_score_metric)] < binding_threshold and
if (binding_pass and
allele_expr_pass and
vaf_clonal_pass and
tsl_pass and
not anchor_residue_pass):
if self.percentile_threshold:
if mutation["{} MT Percentile".format(self.mt_top_score_metric)] < self.percentile_threshold:
return "Anchor"
else:
return "Anchor"
return "Anchor"

#not in founding clone
if (mutation["{} MT IC50 Score".format(self.mt_top_score_metric)] < binding_threshold and
if (binding_pass and
allele_expr_pass and
not vaf_clonal_pass and
tsl_pass and
anchor_residue_pass):
if self.percentile_threshold:
if mutation["{} MT Percentile".format(self.mt_top_score_metric)] < self.percentile_threshold:
return "Subclonal"
else:
return "Subclonal"
return "Subclonal"

#relax expression. Include sites that have reasonable vaf but zero overall gene expression
lowexpr=False
Expand All @@ -507,16 +509,12 @@ def get_tier(self, mutation, vaf_clonal):
lowexpr=True

#if low expression is the only strike against it, it gets lowexpr label (multiple strikes will pass through to poor)
if (mutation["{} MT IC50 Score".format(self.mt_top_score_metric)] < binding_threshold and
if (binding_pass and
lowexpr and
vaf_clonal_pass and
tsl_pass and
anchor_residue_pass):
if self.percentile_threshold:
if mutation["{} MT Percentile".format(self.mt_top_score_metric)] < self.percentile_threshold:
return "LowExpr"
else:
return "LowExpr"
return "LowExpr"

#zero expression
if (mutation["Gene Expression"] == 0 or mutation["Tumor RNA VAF"] == 0) and not lowexpr:
Expand Down Expand Up @@ -834,6 +832,7 @@ def __init__(self,
output_file,
binding_threshold=500,
percentile_threshold=None,
percentile_threshold_strategy='conservative',
allele_specific_binding_thresholds=False,
top_score_metric="median",
aggregate_inclusion_binding_threshold=5000,
Expand All @@ -843,6 +842,7 @@ def __init__(self,
self.output_file = output_file
self.binding_threshold = binding_threshold
self.percentile_threshold = percentile_threshold
self.percentile_threshold_strategy = percentile_threshold_strategy
self.use_allele_specific_binding_thresholds = allele_specific_binding_thresholds
self.aggregate_inclusion_binding_threshold = aggregate_inclusion_binding_threshold
self.aggregate_inclusion_count_limit = aggregate_inclusion_count_limit
Expand Down Expand Up @@ -963,6 +963,7 @@ def __init__(
output_file,
binding_threshold=500,
percentile_threshold=None,
percentile_threshold_strategy='conservative',
allele_specific_binding_thresholds=False,
top_score_metric="median",
read_support=5,
Expand All @@ -976,6 +977,7 @@ def __init__(
output_file,
binding_threshold=binding_threshold,
percentile_threshold=percentile_threshold,
percentile_threshold_strategy = percentile_threshold_strategy,
allele_specific_binding_thresholds=allele_specific_binding_thresholds,
top_score_metric=top_score_metric,
aggregate_inclusion_binding_threshold=aggregate_inclusion_binding_threshold,
Expand Down Expand Up @@ -1014,6 +1016,17 @@ def get_tier(self, mutation, vaf_clonal):
binding_threshold = self.allele_specific_binding_thresholds[mutation['HLA Allele']]
else:
binding_threshold = self.binding_threshold

ic50_pass = mutation["{} IC50 Score".format(self.top_score_metric)] < binding_threshold
percentile_pass = (
self.percentile_threshold is None or
mutation["{} Percentile".format(self.top_score_metric)] < self.percentile_threshold
)
binding_pass = (
(ic50_pass and percentile_pass)
if self.percentile_threshold_strategy == 'conservative'
else (ic50_pass or percentile_pass)
)

low_read_support = False
if mutation['Read Support'] != 'NA' and mutation['Read Support'] < self.read_support:
Expand All @@ -1023,34 +1036,22 @@ def get_tier(self, mutation, vaf_clonal):
if mutation['Expression'] != 'NA' and mutation['Expression'] < self.expn_val:
low_expr = True

if (mutation["{} IC50 Score".format(self.top_score_metric)] < binding_threshold and
if (binding_pass and
not low_read_support and
not low_expr):
if self.percentile_threshold:
if mutation["{} Percentile".format(self.top_score_metric)] < self.percentile_threshold:
return "Pass"
else:
return "Pass"
return "Pass"

#low read support
if (mutation["{} IC50 Score".format(self.top_score_metric)] < binding_threshold and
if (binding_pass and
low_read_support and
not low_expr):
if self.percentile_threshold:
if mutation["{} MT IC50 Percentile".format(self.mt_top_score_metric)] < self.percentile_threshold:
return "LowReadSupport"
else:
return "LowReadSupport"
return "LowReadSupport"

#low expression
if (mutation["{} IC50 Score".format(self.top_score_metric)] < binding_threshold and
if (binding_pass and
not low_read_support and
low_expr):
if self.percentile_threshold:
if mutation["{} MT IC50 Percentile".format(self.mt_top_score_metric)] < self.percentile_threshold:
return "LowExpr"
else:
return "LowExpr"
return "LowExpr"

return "Poor"

Expand Down Expand Up @@ -1098,13 +1099,20 @@ def get_tier(self, mutation, vaf_clonal):
binding_threshold = self.allele_specific_binding_thresholds[mutation['HLA Allele']]
else:
binding_threshold = self.binding_threshold

ic50_pass = mutation["{} IC50 Score".format(self.top_score_metric)] < binding_threshold
percentile_pass = (
self.percentile_threshold is None or
mutation["{} Percentile".format(self.top_score_metric)] < self.percentile_threshold
)
binding_pass = (
(ic50_pass and percentile_pass)
if self.percentile_threshold_strategy == 'conservative'
else (ic50_pass or percentile_pass)
)

if mutation["{} IC50 Score".format(self.top_score_metric)] < binding_threshold:
if self.percentile_threshold:
if mutation["{} Percentile".format(self.top_score_metric)] < self.percentile_threshold:
return "Pass"
else:
return "Pass"
if binding_pass:
return "Pass"

return "Poor"

Expand All @@ -1117,6 +1125,7 @@ def __init__(
tumor_purity=None,
binding_threshold=500,
percentile_threshold=None,
percentile_threshold_strategy='conservative',
allele_specific_binding_thresholds=False,
aggregate_inclusion_binding_threshold=5000,
aggregate_inclusion_count_limit=15,
Expand All @@ -1132,6 +1141,7 @@ def __init__(
output_file,
binding_threshold=binding_threshold,
percentile_threshold=percentile_threshold,
percentile_threshold_strategy = percentile_threshold_strategy,
allele_specific_binding_thresholds=allele_specific_binding_thresholds,
aggregate_inclusion_binding_threshold=aggregate_inclusion_binding_threshold,
aggregate_inclusion_count_limit=aggregate_inclusion_count_limit,
Expand Down Expand Up @@ -1164,6 +1174,17 @@ def get_tier(self, mutation, vaf_clonal):
binding_threshold = self.allele_specific_binding_thresholds[mutation['HLA Allele']]
else:
binding_threshold = self.binding_threshold

ic50_pass = mutation["{} IC50 Score".format(self.top_score_metric)] < binding_threshold
percentile_pass = (
self.percentile_threshold is None or
mutation["{} Percentile".format(self.top_score_metric)] < self.percentile_threshold
)
binding_pass = (
(ic50_pass and percentile_pass)
if self.percentile_threshold_strategy == 'conservative'
else (ic50_pass or percentile_pass)
)

tsl_pass = True
if mutation["Transcript Support Level"] == "Not Supported":
Expand All @@ -1184,26 +1205,18 @@ def get_tier(self, mutation, vaf_clonal):
vaf_clonal_pass = False

#writing these out as explicitly as possible for ease of understanding
if (mutation["{} IC50 Score".format(self.top_score_metric)] < binding_threshold and
if (binding_pass and
allele_expr_pass and
vaf_clonal_pass and
tsl_pass):
if self.percentile_threshold:
if mutation["{} Percentile".format(self.top_score_metric)] < self.percentile_threshold:
return "Pass"
else:
return "Pass"
return "Pass"

#not in founding clone
if (mutation["{} IC50 Score".format(self.top_score_metric)] < binding_threshold and
if (binding_pass and
allele_expr_pass and
not vaf_clonal_pass and
tsl_pass):
if self.percentile_threshold:
if mutation["{} Percentile".format(self.top_score_metric)] < self.percentile_threshold:
return "Subclonal"
else:
return "Subclonal"
return "Subclonal"

#relax expression. Include sites that have reasonable vaf but zero overall gene expression
lowexpr=False
Expand All @@ -1215,15 +1228,11 @@ def get_tier(self, mutation, vaf_clonal):
lowexpr=True

#if low expression is the only strike against it, it gets lowexpr label (multiple strikes will pass through to poor)
if (mutation["{} IC50 Score".format(self.top_score_metric)] < binding_threshold and
if (binding_pass and
lowexpr and
vaf_clonal_pass and
tsl_pass):
if self.percentile_threshold:
if mutation["{} Percentile".format(self.top_score_metric)] < self.percentile_threshold:
return "LowExpr"
else:
return "LowExpr"
return "LowExpr"

#zero expression
if (mutation["Gene Expression"] == 0 or mutation["Tumor RNA VAF"] == 0) and not lowexpr:
Expand Down
20 changes: 13 additions & 7 deletions pvactools/lib/allele_specific_binding_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from pvactools.lib.prediction_class import PredictionClass

class AlleleSpecificBindingFilter:
def __init__(self, input_file, output_file, default_threshold, minimum_fold_change, top_score_metric, exclude_nas, percentile_threshold, file_type='pVACseq'):
def __init__(self, input_file, output_file, default_threshold, minimum_fold_change, top_score_metric, exclude_nas, percentile_threshold, file_type='pVACseq', percentile_threshold_strategy="conservative"):
self.input_file = input_file
self.output_file = output_file
self.default_threshold = default_threshold
Expand All @@ -12,6 +12,7 @@ def __init__(self, input_file, output_file, default_threshold, minimum_fold_chan
self.exclude_nas = exclude_nas
self.percentile_threshold = percentile_threshold
self.file_type = file_type
self.percentile_threshold_strategy = percentile_threshold_strategy

def execute(self):
with open(self.input_file, 'r') as input_fh:
Expand Down Expand Up @@ -45,18 +46,23 @@ def execute(self):
fold_change = sys.maxsize if entry['Corresponding Fold Change'] == 'NA' else float(entry['Corresponding Fold Change'])
percentile_column = 'Best MT Percentile'

if self.percentile_threshold is not None:
if float(entry[percentile_column]) > self.percentile_threshold:
continue

threshold = PredictionClass.cutoff_for_allele(entry['HLA Allele'])
threshold = self.default_threshold if threshold is None else float(threshold)

if score == 'NA':
if self.exclude_nas:
continue
elif float(score) > threshold:
continue
else:
if self.percentile_threshold is not None:
if self.percentile_threshold_strategy == 'conservative':
if float(score) > threshold or float(entry[percentile_column]) > self.percentile_threshold:
continue
else:
if float(score) > threshold and float(entry[percentile_column]) > self.percentile_threshold:
continue
else:
if float(score) > threshold:
continue

if self.minimum_fold_change is not None and fold_change < self.minimum_fold_change:
continue
Expand Down
Loading