diff --git a/pvactools/lib/aggregate_all_epitopes.py b/pvactools/lib/aggregate_all_epitopes.py index ac111244..8957cb49 100644 --- a/pvactools/lib/aggregate_all_epitopes.py +++ b/pvactools/lib/aggregate_all_epitopes.py @@ -234,6 +234,7 @@ def execute(self): 'allele_expr_threshold': self.allele_expr_threshold, 'maximum_transcript_support_level': self.maximum_transcript_support_level, 'percentile_threshold': self.percentile_threshold, + 'percentile_threshold_strategy': self.percentile_threshold_strategy, 'use_allele_specific_binding_thresholds': self.use_allele_specific_binding_thresholds, 'mt_top_score_metric': self.mt_top_score_metric, 'wt_top_score_metric': self.wt_top_score_metric, @@ -278,6 +279,7 @@ def __init__( expn_val=1, maximum_transcript_support_level=1, percentile_threshold=None, + percentile_threshold_strategy='conservative', allele_specific_binding_thresholds=False, top_score_metric="median", allele_specific_anchors=False, @@ -291,6 +293,7 @@ def __init__( self.binding_threshold = binding_threshold self.use_allele_specific_binding_thresholds = allele_specific_binding_thresholds self.percentile_threshold = percentile_threshold + self.percentile_threshold_strategy = percentile_threshold_strategy self.aggregate_inclusion_binding_threshold = aggregate_inclusion_binding_threshold self.aggregate_inclusion_count_limit = aggregate_inclusion_count_limit self.allele_expr_threshold = trna_vaf * expn_val * 10 @@ -440,6 +443,17 @@ def get_tier(self, mutation, vaf_clonal): binding_threshold = self.allele_specific_binding_thresholds[mutation['HLA Allele']] else: binding_threshold = self.binding_threshold + + ic50_pass = mutation["{} MT IC50 Score".format(self.mt_top_score_metric)] < binding_threshold + percentile_pass = ( + self.percentile_threshold is None or + mutation["{} MT Percentile".format(self.mt_top_score_metric)] < self.percentile_threshold + ) + binding_pass = ( + (ic50_pass and percentile_pass) + if self.percentile_threshold_strategy == 'conservative' + else (ic50_pass or percentile_pass) + ) anchor_residue_pass = self.is_anchor_residue_pass(mutation) @@ -462,40 +476,28 @@ def get_tier(self, mutation, vaf_clonal): vaf_clonal_pass = False #writing these out as explicitly as possible for ease of understanding - if (mutation["{} MT IC50 Score".format(self.mt_top_score_metric)] < binding_threshold and + if (binding_pass and allele_expr_pass and vaf_clonal_pass and tsl_pass and anchor_residue_pass): - if self.percentile_threshold: - if mutation["{} MT Percentile".format(self.mt_top_score_metric)] < self.percentile_threshold: - return "Pass" - else: - return "Pass" + return "Pass" #anchor residues - if (mutation["{} MT IC50 Score".format(self.mt_top_score_metric)] < binding_threshold and + if (binding_pass and allele_expr_pass and vaf_clonal_pass and tsl_pass and not anchor_residue_pass): - if self.percentile_threshold: - if mutation["{} MT Percentile".format(self.mt_top_score_metric)] < self.percentile_threshold: - return "Anchor" - else: - return "Anchor" + return "Anchor" #not in founding clone - if (mutation["{} MT IC50 Score".format(self.mt_top_score_metric)] < binding_threshold and + if (binding_pass and allele_expr_pass and not vaf_clonal_pass and tsl_pass and anchor_residue_pass): - if self.percentile_threshold: - if mutation["{} MT Percentile".format(self.mt_top_score_metric)] < self.percentile_threshold: - return "Subclonal" - else: - return "Subclonal" + return "Subclonal" #relax expression. Include sites that have reasonable vaf but zero overall gene expression lowexpr=False @@ -507,16 +509,12 @@ def get_tier(self, mutation, vaf_clonal): lowexpr=True #if low expression is the only strike against it, it gets lowexpr label (multiple strikes will pass through to poor) - if (mutation["{} MT IC50 Score".format(self.mt_top_score_metric)] < binding_threshold and + if (binding_pass and lowexpr and vaf_clonal_pass and tsl_pass and anchor_residue_pass): - if self.percentile_threshold: - if mutation["{} MT Percentile".format(self.mt_top_score_metric)] < self.percentile_threshold: - return "LowExpr" - else: - return "LowExpr" + return "LowExpr" #zero expression if (mutation["Gene Expression"] == 0 or mutation["Tumor RNA VAF"] == 0) and not lowexpr: @@ -834,6 +832,7 @@ def __init__(self, output_file, binding_threshold=500, percentile_threshold=None, + percentile_threshold_strategy='conservative', allele_specific_binding_thresholds=False, top_score_metric="median", aggregate_inclusion_binding_threshold=5000, @@ -843,6 +842,7 @@ def __init__(self, self.output_file = output_file self.binding_threshold = binding_threshold self.percentile_threshold = percentile_threshold + self.percentile_threshold_strategy = percentile_threshold_strategy self.use_allele_specific_binding_thresholds = allele_specific_binding_thresholds self.aggregate_inclusion_binding_threshold = aggregate_inclusion_binding_threshold self.aggregate_inclusion_count_limit = aggregate_inclusion_count_limit @@ -963,6 +963,7 @@ def __init__( output_file, binding_threshold=500, percentile_threshold=None, + percentile_threshold_strategy='conservative', allele_specific_binding_thresholds=False, top_score_metric="median", read_support=5, @@ -976,6 +977,7 @@ def __init__( output_file, binding_threshold=binding_threshold, percentile_threshold=percentile_threshold, + percentile_threshold_strategy = percentile_threshold_strategy, allele_specific_binding_thresholds=allele_specific_binding_thresholds, top_score_metric=top_score_metric, aggregate_inclusion_binding_threshold=aggregate_inclusion_binding_threshold, @@ -1014,6 +1016,17 @@ def get_tier(self, mutation, vaf_clonal): binding_threshold = self.allele_specific_binding_thresholds[mutation['HLA Allele']] else: binding_threshold = self.binding_threshold + + ic50_pass = mutation["{} IC50 Score".format(self.top_score_metric)] < binding_threshold + percentile_pass = ( + self.percentile_threshold is None or + mutation["{} Percentile".format(self.top_score_metric)] < self.percentile_threshold + ) + binding_pass = ( + (ic50_pass and percentile_pass) + if self.percentile_threshold_strategy == 'conservative' + else (ic50_pass or percentile_pass) + ) low_read_support = False if mutation['Read Support'] != 'NA' and mutation['Read Support'] < self.read_support: @@ -1023,34 +1036,22 @@ def get_tier(self, mutation, vaf_clonal): if mutation['Expression'] != 'NA' and mutation['Expression'] < self.expn_val: low_expr = True - if (mutation["{} IC50 Score".format(self.top_score_metric)] < binding_threshold and + if (binding_pass and not low_read_support and not low_expr): - if self.percentile_threshold: - if mutation["{} Percentile".format(self.top_score_metric)] < self.percentile_threshold: - return "Pass" - else: - return "Pass" + return "Pass" #low read support - if (mutation["{} IC50 Score".format(self.top_score_metric)] < binding_threshold and + if (binding_pass and low_read_support and not low_expr): - if self.percentile_threshold: - if mutation["{} MT IC50 Percentile".format(self.mt_top_score_metric)] < self.percentile_threshold: - return "LowReadSupport" - else: - return "LowReadSupport" + return "LowReadSupport" #low expression - if (mutation["{} IC50 Score".format(self.top_score_metric)] < binding_threshold and + if (binding_pass and not low_read_support and low_expr): - if self.percentile_threshold: - if mutation["{} MT IC50 Percentile".format(self.mt_top_score_metric)] < self.percentile_threshold: - return "LowExpr" - else: - return "LowExpr" + return "LowExpr" return "Poor" @@ -1098,13 +1099,20 @@ def get_tier(self, mutation, vaf_clonal): binding_threshold = self.allele_specific_binding_thresholds[mutation['HLA Allele']] else: binding_threshold = self.binding_threshold + + ic50_pass = mutation["{} IC50 Score".format(self.top_score_metric)] < binding_threshold + percentile_pass = ( + self.percentile_threshold is None or + mutation["{} Percentile".format(self.top_score_metric)] < self.percentile_threshold + ) + binding_pass = ( + (ic50_pass and percentile_pass) + if self.percentile_threshold_strategy == 'conservative' + else (ic50_pass or percentile_pass) + ) - if mutation["{} IC50 Score".format(self.top_score_metric)] < binding_threshold: - if self.percentile_threshold: - if mutation["{} Percentile".format(self.top_score_metric)] < self.percentile_threshold: - return "Pass" - else: - return "Pass" + if binding_pass: + return "Pass" return "Poor" @@ -1117,6 +1125,7 @@ def __init__( tumor_purity=None, binding_threshold=500, percentile_threshold=None, + percentile_threshold_strategy='conservative', allele_specific_binding_thresholds=False, aggregate_inclusion_binding_threshold=5000, aggregate_inclusion_count_limit=15, @@ -1132,6 +1141,7 @@ def __init__( output_file, binding_threshold=binding_threshold, percentile_threshold=percentile_threshold, + percentile_threshold_strategy = percentile_threshold_strategy, allele_specific_binding_thresholds=allele_specific_binding_thresholds, aggregate_inclusion_binding_threshold=aggregate_inclusion_binding_threshold, aggregate_inclusion_count_limit=aggregate_inclusion_count_limit, @@ -1164,6 +1174,17 @@ def get_tier(self, mutation, vaf_clonal): binding_threshold = self.allele_specific_binding_thresholds[mutation['HLA Allele']] else: binding_threshold = self.binding_threshold + + ic50_pass = mutation["{} IC50 Score".format(self.top_score_metric)] < binding_threshold + percentile_pass = ( + self.percentile_threshold is None or + mutation["{} Percentile".format(self.top_score_metric)] < self.percentile_threshold + ) + binding_pass = ( + (ic50_pass and percentile_pass) + if self.percentile_threshold_strategy == 'conservative' + else (ic50_pass or percentile_pass) + ) tsl_pass = True if mutation["Transcript Support Level"] == "Not Supported": @@ -1184,26 +1205,18 @@ def get_tier(self, mutation, vaf_clonal): vaf_clonal_pass = False #writing these out as explicitly as possible for ease of understanding - if (mutation["{} IC50 Score".format(self.top_score_metric)] < binding_threshold and + if (binding_pass and allele_expr_pass and vaf_clonal_pass and tsl_pass): - if self.percentile_threshold: - if mutation["{} Percentile".format(self.top_score_metric)] < self.percentile_threshold: - return "Pass" - else: - return "Pass" + return "Pass" #not in founding clone - if (mutation["{} IC50 Score".format(self.top_score_metric)] < binding_threshold and + if (binding_pass and allele_expr_pass and not vaf_clonal_pass and tsl_pass): - if self.percentile_threshold: - if mutation["{} Percentile".format(self.top_score_metric)] < self.percentile_threshold: - return "Subclonal" - else: - return "Subclonal" + return "Subclonal" #relax expression. Include sites that have reasonable vaf but zero overall gene expression lowexpr=False @@ -1215,15 +1228,11 @@ def get_tier(self, mutation, vaf_clonal): lowexpr=True #if low expression is the only strike against it, it gets lowexpr label (multiple strikes will pass through to poor) - if (mutation["{} IC50 Score".format(self.top_score_metric)] < binding_threshold and + if (binding_pass and lowexpr and vaf_clonal_pass and tsl_pass): - if self.percentile_threshold: - if mutation["{} Percentile".format(self.top_score_metric)] < self.percentile_threshold: - return "LowExpr" - else: - return "LowExpr" + return "LowExpr" #zero expression if (mutation["Gene Expression"] == 0 or mutation["Tumor RNA VAF"] == 0) and not lowexpr: diff --git a/pvactools/lib/allele_specific_binding_filter.py b/pvactools/lib/allele_specific_binding_filter.py index 2168d540..f9263ebf 100644 --- a/pvactools/lib/allele_specific_binding_filter.py +++ b/pvactools/lib/allele_specific_binding_filter.py @@ -3,7 +3,7 @@ from pvactools.lib.prediction_class import PredictionClass class AlleleSpecificBindingFilter: - def __init__(self, input_file, output_file, default_threshold, minimum_fold_change, top_score_metric, exclude_nas, percentile_threshold, file_type='pVACseq'): + def __init__(self, input_file, output_file, default_threshold, minimum_fold_change, top_score_metric, exclude_nas, percentile_threshold, file_type='pVACseq', percentile_threshold_strategy="conservative"): self.input_file = input_file self.output_file = output_file self.default_threshold = default_threshold @@ -12,6 +12,7 @@ def __init__(self, input_file, output_file, default_threshold, minimum_fold_chan self.exclude_nas = exclude_nas self.percentile_threshold = percentile_threshold self.file_type = file_type + self.percentile_threshold_strategy = percentile_threshold_strategy def execute(self): with open(self.input_file, 'r') as input_fh: @@ -45,18 +46,23 @@ def execute(self): fold_change = sys.maxsize if entry['Corresponding Fold Change'] == 'NA' else float(entry['Corresponding Fold Change']) percentile_column = 'Best MT Percentile' - if self.percentile_threshold is not None: - if float(entry[percentile_column]) > self.percentile_threshold: - continue - threshold = PredictionClass.cutoff_for_allele(entry['HLA Allele']) threshold = self.default_threshold if threshold is None else float(threshold) if score == 'NA': if self.exclude_nas: continue - elif float(score) > threshold: - continue + else: + if self.percentile_threshold is not None: + if self.percentile_threshold_strategy == 'conservative': + if float(score) > threshold or float(entry[percentile_column]) > self.percentile_threshold: + continue + else: + if float(score) > threshold and float(entry[percentile_column]) > self.percentile_threshold: + continue + else: + if float(score) > threshold: + continue if self.minimum_fold_change is not None and fold_change < self.minimum_fold_change: continue diff --git a/pvactools/lib/binding_filter.py b/pvactools/lib/binding_filter.py index 183af292..eadad92c 100644 --- a/pvactools/lib/binding_filter.py +++ b/pvactools/lib/binding_filter.py @@ -7,11 +7,12 @@ from pvactools.lib.run_utils import * class BindingFilter: - def __init__(self, input_file, output_file, binding_threshold, minimum_fold_change, top_score_metric, exclude_nas, allele_specific_cutoffs, percentile_threshold, file_type='pVACseq'): + def __init__(self, input_file, output_file, binding_threshold, minimum_fold_change, top_score_metric, exclude_nas, allele_specific_cutoffs, percentile_threshold, percentile_threshold_strategy='conservative', file_type='pVACseq'): self.input_file = input_file self.output_file = output_file self.binding_threshold = binding_threshold self.percentile_threshold = percentile_threshold + self.percentile_threshold_strategy = percentile_threshold_strategy self.minimum_fold_change = minimum_fold_change self.top_score_metric = top_score_metric self.exclude_nas = exclude_nas @@ -22,7 +23,7 @@ def execute(self): filter_criteria = [] if self.allele_specific_cutoffs: - AlleleSpecificBindingFilter(self.input_file, self.output_file, self.binding_threshold, self.minimum_fold_change, self.top_score_metric, self.exclude_nas, self.percentile_threshold, self.file_type).execute() + AlleleSpecificBindingFilter(self.input_file, self.output_file, self.binding_threshold, self.minimum_fold_change, self.top_score_metric, self.exclude_nas, self.percentile_threshold, self.file_type, self.percentile_threshold_strategy).execute() else: if self.file_type == 'pVACbind' or self.file_type == 'pVACfuse' or self.file_type == 'pVACsplice': if self.top_score_metric == 'median': @@ -48,8 +49,7 @@ def execute(self): elif self.top_score_metric == 'lowest': column = 'Corresponding Fold Change' filter_criteria.append(FilterCriterion(column, '>=', self.minimum_fold_change, exclude_nas=self.exclude_nas)) - - Filter(self.input_file, self.output_file, filter_criteria).execute() + Filter(self.input_file, self.output_file, filter_criteria, [], "AND" if self.percentile_threshold_strategy == 'conservative' else "OR").execute() @classmethod def parser(cls, tool): @@ -78,6 +78,13 @@ def parser(cls, tool): help="Report only epitopes where the mutant allele " +"has a percentile rank below this value." ) + parser.add_argument( + '--percentile-threshold-strategy', + choices=['conservative', 'exploratory'], + help="Specify the candidate inclusion strategy. The 'conservative' option requires a candidate to pass BOTH the binding threshold and percentile threshold (default)." + + " The 'exploratory' option requires a candidate to pass EITHER the binding threshold or the percentile threshold.", + default="conservative", + ) if tool == 'pvacseq': parser.add_argument( '-c', '--minimum-fold-change', type=int, diff --git a/pvactools/lib/filter.py b/pvactools/lib/filter.py index 24e1b8b3..be0b81fa 100644 --- a/pvactools/lib/filter.py +++ b/pvactools/lib/filter.py @@ -5,11 +5,12 @@ pd.options.mode.chained_assignment = None class Filter: - def __init__(self, input_file, output_file, filter_criteria, int_filter_columns=[]): + def __init__(self, input_file, output_file, filter_criteria, int_filter_columns=[], filter_strategy="AND"): self.input_file = input_file self.output_file = output_file self.filter_criteria = filter_criteria self.int_filter_columns = int_filter_columns + self.filter_strategy = filter_strategy def execute(self): with open(self.input_file, 'r') as read_fh, open(self.output_file, 'w') as write_fh: @@ -18,18 +19,21 @@ def execute(self): writer.writeheader() for line in reader: to_filter = False - for criterion in self.filter_criteria: - value = line[criterion.column] - if value == 'inf': - value = sys.maxsize - if value == 'NA': - if criterion.exclude_nas: - to_filter = True - elif criterion.skip_value is not None and value == criterion.skip_value: - pass - else: - if not eval("{} {} {}".format(value, criterion.operator, criterion.threshold)): - to_filter = True + + process_criterion = lambda criterion: ( + (line[criterion.column] == 'NA' and criterion.exclude_nas) or + (line[criterion.column] != 'NA' and criterion.skip_value != line[criterion.column] and not eval("{} {} {}".format( + line[criterion.column] if line[criterion.column] != 'inf' else sys.maxsize, + criterion.operator, + criterion.threshold + ))) + ) + + if self.filter_strategy == "AND": + to_filter = any(process_criterion(criterion) for criterion in self.filter_criteria) + else: + to_filter = all(process_criterion(criterion) for criterion in self.filter_criteria) + if not to_filter: writer.writerow(line) diff --git a/pvactools/lib/post_processor.py b/pvactools/lib/post_processor.py index e8e1cb20..dd5222d3 100644 --- a/pvactools/lib/post_processor.py +++ b/pvactools/lib/post_processor.py @@ -88,6 +88,7 @@ def aggregate_all_epitopes(self): tumor_purity=self.tumor_purity, binding_threshold=self.binding_threshold, percentile_threshold=self.percentile_threshold, + percentile_threshold_strategy=self.percentile_threshold_strategy, allele_specific_binding_thresholds=self.allele_specific_binding_thresholds, trna_vaf=self.trna_vaf, trna_cov=self.trna_cov, @@ -106,6 +107,7 @@ def aggregate_all_epitopes(self): binding_threshold=self.binding_threshold, allele_specific_binding_thresholds=self.allele_specific_binding_thresholds, percentile_threshold=self.percentile_threshold, + percentile_threshold_strategy=self.percentile_threshold_strategy, top_score_metric=self.top_score_metric, read_support=self.read_support, expn_val=self.expn_val, @@ -119,6 +121,7 @@ def aggregate_all_epitopes(self): binding_threshold=self.binding_threshold, allele_specific_binding_thresholds=self.allele_specific_binding_thresholds, percentile_threshold=self.percentile_threshold, + percentile_threshold_strategy=self.percentile_threshold_strategy, top_score_metric=self.top_score_metric, aggregate_inclusion_binding_threshold=self.aggregate_inclusion_binding_threshold, aggregate_inclusion_count_limit=self.aggregate_inclusion_count_limit, @@ -130,6 +133,7 @@ def aggregate_all_epitopes(self): tumor_purity=self.tumor_purity, binding_threshold=self.binding_threshold, percentile_threshold=self.percentile_threshold, + percentile_threshold_strategy=self.percentile_threshold_strategy, allele_specific_binding_thresholds=self.allele_specific_binding_thresholds, aggregate_inclusion_binding_threshold=self.aggregate_inclusion_binding_threshold, aggregate_inclusion_count_limit=self.aggregate_inclusion_count_limit, @@ -162,6 +166,7 @@ def execute_binding_filter(self): self.exclude_NAs, self.allele_specific_binding_thresholds, self.percentile_threshold, + self.percentile_threshold_strategy, self.file_type, ).execute() print("Completed") diff --git a/pvactools/lib/run_argument_parser.py b/pvactools/lib/run_argument_parser.py index 9e5e513f..9456d448 100644 --- a/pvactools/lib/run_argument_parser.py +++ b/pvactools/lib/run_argument_parser.py @@ -71,6 +71,13 @@ def __init__(self, tool_name, input_file_help): help="Report only epitopes where the mutant allele " +"has a percentile rank below this value." ) + parser.add_argument( + '--percentile-threshold-strategy', + choices=['conservative', 'exploratory'], + help="Specify the candidate inclusion strategy. The 'conservative' option requires a candidate to pass BOTH the binding threshold and percentile threshold (default)." + + " The 'exploratory' option requires a candidate to pass EITHER the binding threshold or the percentile threshold.", + default="conservative", + ) parser.add_argument( '--allele-specific-binding-thresholds', help="Use allele-specific binding thresholds. To print the allele-specific binding thresholds run `%s allele_specific_cutoffs`. " % tool_name diff --git a/pvactools/tools/pvacbind/binding_filter.py b/pvactools/tools/pvacbind/binding_filter.py index e1dafe10..6cc3394c 100644 --- a/pvactools/tools/pvacbind/binding_filter.py +++ b/pvactools/tools/pvacbind/binding_filter.py @@ -9,7 +9,7 @@ def main(args_input = sys.argv[1:]): parser = define_parser() args = parser.parse_args(args_input) - BindingFilter(args.input_file, args.output_file, args.binding_threshold, None, args.top_score_metric, args.exclude_NAs, args.allele_specific_binding_thresholds, args.percentile_threshold, 'pVACbind').execute() + BindingFilter(args.input_file, args.output_file, args.binding_threshold, None, args.top_score_metric, args.exclude_NAs, args.allele_specific_binding_thresholds, args.percentile_threshold, args.percentile_threshold_strategy, 'pVACbind').execute() if __name__ == "__main__": main() diff --git a/pvactools/tools/pvacbind/generate_aggregated_report.py b/pvactools/tools/pvacbind/generate_aggregated_report.py index a7e45687..7a8d35a4 100644 --- a/pvactools/tools/pvacbind/generate_aggregated_report.py +++ b/pvactools/tools/pvacbind/generate_aggregated_report.py @@ -38,6 +38,13 @@ def define_parser(): + "has percentile scores below this value and in the \"Relaxed\" tier " + "when the mutant allele has percentile scores below double this value.", ) + parser.add_argument( + '--percentile-threshold-strategy', + choices=['conservative', 'exploratory'], + help="Specify the candidate inclusion strategy. The 'conservative' option requires a candidate to pass BOTH the binding threshold and percentile threshold (default)." + + " The 'exploratory' option requires a candidate to pass EITHER the binding threshold or the percentile threshold.", + default="conservative", + ) parser.add_argument( '--aggregate-inclusion-binding-threshold', type=int, help="Threshold for including epitopes when creating the aggregate report", @@ -72,6 +79,7 @@ def main(args_input = sys.argv[1:]): binding_threshold=args.binding_threshold, allele_specific_binding_thresholds=args.allele_specific_binding_thresholds, percentile_threshold=args.percentile_threshold, + percentile_threshold_strategy=args.percentile_threshold_strategy, top_score_metric=args.top_score_metric, aggregate_inclusion_binding_threshold=args.aggregate_inclusion_binding_threshold, aggregate_inclusion_count_limit=args.aggregate_inclusion_count_limit, diff --git a/pvactools/tools/pvacbind/run.py b/pvactools/tools/pvacbind/run.py index ad21b0f5..7fb6b14b 100644 --- a/pvactools/tools/pvacbind/run.py +++ b/pvactools/tools/pvacbind/run.py @@ -69,6 +69,7 @@ def main(args_input = sys.argv[1:]): 'top_score_metric' : args.top_score_metric, 'binding_threshold' : args.binding_threshold, 'percentile_threshold' : args.percentile_threshold, + 'percentile_threshold_strategy': args.percentile_threshold_strategy, 'allele_specific_binding_thresholds': args.allele_specific_binding_thresholds, 'net_chop_fasta' : args.input_file, 'net_chop_method' : args.net_chop_method, diff --git a/pvactools/tools/pvacfuse/binding_filter.py b/pvactools/tools/pvacfuse/binding_filter.py index 6ce7b473..cbb52f18 100644 --- a/pvactools/tools/pvacfuse/binding_filter.py +++ b/pvactools/tools/pvacfuse/binding_filter.py @@ -9,7 +9,7 @@ def main(args_input = sys.argv[1:]): parser = define_parser() args = parser.parse_args(args_input) - BindingFilter(args.input_file, args.output_file, args.binding_threshold, None, args.top_score_metric, args.exclude_NAs, args.allele_specific_binding_thresholds, args.percentile_threshold, 'pVACfuse').execute() + BindingFilter(args.input_file, args.output_file, args.binding_threshold, None, args.top_score_metric, args.exclude_NAs, args.allele_specific_binding_thresholds, args.percentile_threshold, args.percentile_threshold_strategy, 'pVACfuse').execute() if __name__ == "__main__": main() diff --git a/pvactools/tools/pvacfuse/generate_aggregated_report.py b/pvactools/tools/pvacfuse/generate_aggregated_report.py index dc4a103d..d37bc065 100644 --- a/pvactools/tools/pvacfuse/generate_aggregated_report.py +++ b/pvactools/tools/pvacfuse/generate_aggregated_report.py @@ -38,6 +38,13 @@ def define_parser(): + "has percentile scores below this value and in the \"Relaxed\" tier " + "when the mutant allele has percentile scores below double this value.", ) + parser.add_argument( + '--percentile-threshold-strategy', + choices=['conservative', 'exploratory'], + help="Specify the candidate inclusion strategy. The 'conservative' option requires a candidate to pass BOTH the binding threshold and percentile threshold (default)." + + " The 'exploratory' option requires a candidate to pass EITHER the binding threshold or the percentile threshold.", + default="conservative", + ) parser.add_argument( '--aggregate-inclusion-binding-threshold', type=int, help="Threshold for including epitopes when creating the aggregate report", @@ -82,6 +89,7 @@ def main(args_input = sys.argv[1:]): binding_threshold=args.binding_threshold, allele_specific_binding_thresholds=args.allele_specific_binding_thresholds, percentile_threshold=args.percentile_threshold, + percentile_threshold_strategy=args.percentile_threshold_strategy, top_score_metric=args.top_score_metric, read_support=args.read_support, expn_val=args.expn_val, diff --git a/pvactools/tools/pvacfuse/run.py b/pvactools/tools/pvacfuse/run.py index dcde1933..ad540bdf 100644 --- a/pvactools/tools/pvacfuse/run.py +++ b/pvactools/tools/pvacfuse/run.py @@ -141,6 +141,7 @@ def main(args_input = sys.argv[1:]): 'top_score_metric' : args.top_score_metric, 'binding_threshold' : args.binding_threshold, 'percentile_threshold' : args.percentile_threshold, + 'percentile_threshold_strategy': args.percentile_threshold_strategy, 'allele_specific_binding_thresholds': args.allele_specific_binding_thresholds, 'net_chop_method' : args.net_chop_method, 'net_chop_threshold' : args.net_chop_threshold, diff --git a/pvactools/tools/pvacseq/binding_filter.py b/pvactools/tools/pvacseq/binding_filter.py index 5da66e8c..9fc0edda 100644 --- a/pvactools/tools/pvacseq/binding_filter.py +++ b/pvactools/tools/pvacseq/binding_filter.py @@ -9,7 +9,7 @@ def main(args_input = sys.argv[1:]): parser = define_parser() args = parser.parse_args(args_input) - BindingFilter(args.input_file, args.output_file, args.binding_threshold, args.minimum_fold_change, args.top_score_metric, args.exclude_NAs, args.allele_specific_binding_thresholds, args.percentile_threshold).execute() + BindingFilter(args.input_file, args.output_file, args.binding_threshold, args.minimum_fold_change, args.top_score_metric, args.exclude_NAs, args.allele_specific_binding_thresholds, args.percentile_threshold, args.percentile_threshold_strategy).execute() if __name__ == "__main__": main() diff --git a/pvactools/tools/pvacseq/generate_aggregated_report.py b/pvactools/tools/pvacseq/generate_aggregated_report.py index 5b870395..49893e5f 100644 --- a/pvactools/tools/pvacseq/generate_aggregated_report.py +++ b/pvactools/tools/pvacseq/generate_aggregated_report.py @@ -43,6 +43,13 @@ def define_parser(): + "has percentile scores below this value and in the \"Relaxed\" tier " + "when the mutant allele has percentile scores below double this value.", ) + parser.add_argument( + '--percentile-threshold-strategy', + choices=['conservative', 'exploratory'], + help="Specify the candidate inclusion strategy. The 'conservative' option requires a candidate to pass BOTH the binding threshold and percentile threshold (default)." + + " The 'exploratory' option requires a candidate to pass EITHER the binding threshold or the percentile threshold.", + default="conservative", + ) parser.add_argument( '--aggregate-inclusion-binding-threshold', type=int, help="Binding threshold for including epitopes when creating the aggregate report", @@ -119,6 +126,7 @@ def main(args_input = sys.argv[1:]): binding_threshold=args.binding_threshold, allele_specific_binding_thresholds=args.allele_specific_binding_thresholds, percentile_threshold=args.percentile_threshold, + percentile_threshold_strategy=args.percentile_threshold_strategy, trna_vaf=args.trna_vaf, trna_cov=args.trna_cov, expn_val=args.expn_val, diff --git a/pvactools/tools/pvacseq/run.py b/pvactools/tools/pvacseq/run.py index 518c4adf..27f13307 100644 --- a/pvactools/tools/pvacseq/run.py +++ b/pvactools/tools/pvacseq/run.py @@ -85,6 +85,7 @@ def main(args_input = sys.argv[1:]): 'top_score_metric' : args.top_score_metric, 'binding_threshold' : args.binding_threshold, 'percentile_threshold' : args.percentile_threshold, + 'percentile_threshold_strategy': args.percentile_threshold_strategy, 'allele_specific_binding_thresholds': args.allele_specific_binding_thresholds, 'minimum_fold_change' : args.minimum_fold_change, 'net_chop_method' : args.net_chop_method, diff --git a/pvactools/tools/pvacsplice/binding_filter.py b/pvactools/tools/pvacsplice/binding_filter.py index baa2fa23..f75bf6f3 100644 --- a/pvactools/tools/pvacsplice/binding_filter.py +++ b/pvactools/tools/pvacsplice/binding_filter.py @@ -9,7 +9,7 @@ def main(args_input = sys.argv[1:]): parser = define_parser() args = parser.parse_args(args_input) - BindingFilter(args.input_file, args.output_file, args.binding_threshold, None, args.top_score_metric, args.exclude_NAs, args.allele_specific_binding_thresholds, args.percentile_threshold, file_type='pVACsplice').execute() + BindingFilter(args.input_file, args.output_file, args.binding_threshold, None, args.top_score_metric, args.exclude_NAs, args.allele_specific_binding_thresholds, args.percentile_threshold, args.percentile_threshold_strategy, 'pVACsplice').execute() if __name__ == "__main__": main() diff --git a/pvactools/tools/pvacsplice/generate_aggregated_report.py b/pvactools/tools/pvacsplice/generate_aggregated_report.py index 0a788e36..5d3b69b8 100644 --- a/pvactools/tools/pvacsplice/generate_aggregated_report.py +++ b/pvactools/tools/pvacsplice/generate_aggregated_report.py @@ -43,6 +43,13 @@ def define_parser(): + "has percentile scores below this value and in the \"Relaxed\" tier " + "when the mutant allele has percentile scores below double this value.", ) + parser.add_argument( + '--percentile-threshold-strategy', + choices=['conservative', 'exploratory'], + help="Specify the candidate inclusion strategy. The 'conservative' option requires a candidate to pass BOTH the binding threshold and percentile threshold (default)." + + " The 'exploratory' option requires a candidate to pass EITHER the binding threshold or the percentile threshold.", + default="conservative", + ) parser.add_argument( '--aggregate-inclusion-binding-threshold', type=int, help="Threshold for including epitopes when creating the aggregate report", @@ -95,6 +102,7 @@ def main(args_input = sys.argv[1:]): binding_threshold=args.binding_threshold, allele_specific_binding_thresholds=args.allele_specific_binding_thresholds, percentile_threshold=args.percentile_threshold, + percentile_threshold_strategy=args.percentile_threshold_strategy, trna_vaf=args.trna_vaf, trna_cov=args.trna_cov, expn_val=args.expn_val, diff --git a/pvactools/tools/pvacsplice/run.py b/pvactools/tools/pvacsplice/run.py index dead2562..eca886f5 100644 --- a/pvactools/tools/pvacsplice/run.py +++ b/pvactools/tools/pvacsplice/run.py @@ -153,6 +153,7 @@ def main(args_input = sys.argv[1:]): 'top_score_metric' : args.top_score_metric, 'binding_threshold' : args.binding_threshold, 'percentile_threshold' : args.percentile_threshold, + 'percentile_threshold_strategy': args.percentile_threshold_strategy, 'allele_specific_binding_thresholds': args.allele_specific_binding_thresholds, 'aggregate_inclusion_binding_threshold' : args.aggregate_inclusion_binding_threshold, 'aggregate_inclusion_count_limit': args.aggregate_inclusion_count_limit, diff --git a/pvactools/tools/pvacvector/run.py b/pvactools/tools/pvacvector/run.py index a90a9391..64ab63f2 100644 --- a/pvactools/tools/pvacvector/run.py +++ b/pvactools/tools/pvacvector/run.py @@ -140,10 +140,18 @@ def find_min_scores(parsed_output_files, current_output_dir, args, min_scores, m threshold = float(args.binding_threshold) if threshold is None else float(threshold) else: threshold = float(args.binding_threshold) - if score < threshold: - junctions_with_good_binders.add(index) - if args.percentile_threshold is not None and percentile < args.percentile_threshold: - junctions_with_good_binders.add(index) + if args.percentile_threshold is None: + if score < threshold: + junctions_with_good_binders.add(index) + else: + if args.percentile_threshold_strategy == 'conservative': + #a conservative strategy would invalidate junctions that fail the binding threshold or the percentile threshold + if score < threshold or percentile < args.percentile_threshold: + junctions_with_good_binders.add(index) + elif args.percentile_threshold_strategy == 'exploratory': + #a exploratory strategy is more relaxed and only invalidates a junction if it fails both the binding and percentile thresholds + if score < threshold and percentile < args.percentile_threshold: + junctions_with_good_binders.add(index) if index not in junction_min_scores: #"initialize" with the first score encountered @@ -524,7 +532,7 @@ def main(args_input=sys.argv[1:]): 'A vaccine design using the parameters specified could not be found. Some options that you may want to consider:\n' + '1) decreasing the acceptable junction binding score to allow more possible connections (-b parameter)\n' + '2) using the "median" binding score instead of the "best" binding score for each junction, (best may be too conservative, -m parameter)\n' + - '3) if running with a percentile threshold set, either remove this parameter or reduce the acceptable threshold to allow more possible connections (--percentile-threshold parameter)\n' + + '3) if running with a percentile threshold set, either remove this parameter, reduce the acceptable threshold to allow more possible connections (--percentile-threshold parameter) or use the \'exploratory\' strategry (--percentile-threshold-strategy parameter)\n' + '4) increase the number of peptides that can be excluded from the vector (--allow-n-peptide-exclusion parameter)' ) diff --git a/pvactools/tools/pvacview/anchor_and_helper_functions.R b/pvactools/tools/pvacview/anchor_and_helper_functions.R index 5aa920b6..e95c8efc 100644 --- a/pvactools/tools/pvacview/anchor_and_helper_functions.R +++ b/pvactools/tools/pvacview/anchor_and_helper_functions.R @@ -254,7 +254,7 @@ calculate_mutation_info <- function(metrics_data_row) { return(diff_positions) } ##Generate Tiering for given variant with specific cutoffs -tier <- function(variant_info, anchor_contribution, dna_cutoff, allele_expr_cutoff, mutation_pos_list, hla_allele, tsl, meta_data, anchor_mode, use_allele_specific_binding_thresholds, binding_threshold, percentile_threshold) { +tier <- function(variant_info, anchor_contribution, dna_cutoff, allele_expr_cutoff, mutation_pos_list, hla_allele, tsl, meta_data, anchor_mode, use_allele_specific_binding_thresholds, binding_threshold, percentile_threshold, percentile_threshold_strategy) { mt_binding <- as.numeric(variant_info["IC50 MT"]) wt_binding <- as.numeric(variant_info["IC50 WT"]) mt_percent <- as.numeric(variant_info["%ile MT"]) @@ -264,16 +264,13 @@ tier <- function(variant_info, anchor_contribution, dna_cutoff, allele_expr_cuto rna_vaf <- as.numeric(variant_info["RNA VAF"]) rna_depth <- as.numeric(variant_info["RNA Depth"]) allele_expr <- as.numeric(variant_info["Allele Expr"]) - if (use_allele_specific_binding_thresholds && hla_allele %in% names(meta_data[["allele_specific_binding_thresholds"]][hla_allele])) { - binding_threshold <- as.numeric(meta_data[["allele_specific_binding_thresholds"]][hla_allele]) - } - trna_vaf <- as.numeric(meta_data["trna_vaf"]) - trna_cov <- as.numeric(meta_data["trna_cov"]) percentile_filter <- FALSE if (!is.null(percentile_threshold)) { percentile_threshold <- as.numeric(percentile_threshold) percentile_filter <- TRUE } + trna_vaf <- as.numeric(meta_data["trna_vaf"]) + trna_cov <- as.numeric(meta_data["trna_cov"]) tsl_max <- as.numeric(meta_data["maximum_transcript_support_level"]) mutation_pos_list <- mutation_pos_list[["Pos"]] if (anchor_mode == "default") { @@ -284,6 +281,20 @@ tier <- function(variant_info, anchor_contribution, dna_cutoff, allele_expr_cuto anchor_list <- c(1, 2, nchar(variant_info[["Best Peptide"]]), nchar(variant_info[["Best Peptide"]]) - 1) } } + if (use_allele_specific_binding_thresholds && hla_allele %in% names(meta_data[["allele_specific_binding_thresholds"]][hla_allele])) { + binding_threshold <- as.numeric(meta_data[["allele_specific_binding_thresholds"]][hla_allele]) + } + ic50_pass <- (mt_binding < binding_threshold) + percentile_pass <- TRUE + if (percentile_filter && mt_percent > percentile_threshold) { + percentile_pass <- FALSE + } + binding_pass <- TRUE + if (percentile_threshold_strategy == 'conservative') { + binding_pass <- (ic50_pass && percentile_pass) + } else { + binding_pass <- (ic50_pass || percentile_pass) + } anchor_residue_pass <- TRUE # if all of mutated positions in anchors if (grepl("-", mutation_pos_list, fixed = TRUE)) { @@ -325,33 +336,15 @@ tier <- function(variant_info, anchor_contribution, dna_cutoff, allele_expr_cuto vaf_clonal_pass <- FALSE } ## Assign Tiering - if ((mt_binding < binding_threshold) && allele_expr_pass && vaf_clonal_pass && tsl_pass && anchor_residue_pass) { - if (percentile_filter) { - if (mt_percent <= percentile_threshold) { - return("Pass") - } - }else { - return("Pass") - } + if (binding_pass && allele_expr_pass && vaf_clonal_pass && tsl_pass && anchor_residue_pass) { + return("Pass") } - - if ((mt_binding < binding_threshold) && allele_expr_pass && vaf_clonal_pass && tsl_pass && !anchor_residue_pass) { - if (percentile_filter) { - if (mt_percent <= percentile_threshold) { - return("Anchor") - } - }else { - return("Anchor") - } + + if (binding_pass && allele_expr_pass && vaf_clonal_pass && tsl_pass && !anchor_residue_pass) { + return("Anchor") } - if ((mt_binding < binding_threshold) && allele_expr_pass && !vaf_clonal_pass && tsl_pass && anchor_residue_pass) { - if (percentile_filter) { - if (mt_percent <= percentile_threshold) { - return("Subclonal") - } - }else { - return("Subclonal") - } + if (binding_pass && allele_expr_pass && !vaf_clonal_pass && tsl_pass && anchor_residue_pass) { + return("Subclonal") } lowexpr <- FALSE if (!is.na(rna_vaf) && !is.na(gene_expr) && !is.na(rna_depth)) { @@ -359,14 +352,8 @@ tier <- function(variant_info, anchor_contribution, dna_cutoff, allele_expr_cuto lowexpr <- TRUE } } - if ((mt_binding < binding_threshold) && lowexpr && vaf_clonal_pass && tsl_pass && anchor_residue_pass) { - if (percentile_filter) { - if (mt_percent <= percentile_threshold) { - return("LowExpr") - } - }else { - return("LowExpr") - } + if (binding_pass && lowexpr && vaf_clonal_pass && tsl_pass && anchor_residue_pass) { + return("LowExpr") } if (!is.na(allele_expr) && ((gene_expr == 0) || (rna_vaf == 0)) && !lowexpr) { return("NoExpr") @@ -374,7 +361,7 @@ tier <- function(variant_info, anchor_contribution, dna_cutoff, allele_expr_cuto return("Poor") } #Determine the Tier Count for given variant with specific cutoffs -tier_numbers <- function(variant_info, anchor_contribution, dna_cutoff, allele_expr_cutoff, mutation_pos_list, hla_allele, tsl, meta_data, anchor_mode, allele_specific_binding_thresholds, use_allele_specific_binding_thresholds, binding_threshold, percentile_threshold) { +tier_numbers <- function(variant_info, anchor_contribution, dna_cutoff, allele_expr_cutoff, mutation_pos_list, hla_allele, tsl, meta_data, anchor_mode, allele_specific_binding_thresholds, use_allele_specific_binding_thresholds, binding_threshold, percentile_threshold, percentile_threshold_strategy) { mt_binding <- as.numeric(variant_info["IC50 MT"]) wt_binding <- as.numeric(variant_info["IC50 WT"]) mt_percent <- as.numeric(variant_info["%ile MT"]) @@ -384,9 +371,6 @@ tier_numbers <- function(variant_info, anchor_contribution, dna_cutoff, allele_e rna_vaf <- as.numeric(variant_info["RNA VAF"]) rna_depth <- as.numeric(variant_info["RNA Depth"]) allele_expr <- as.numeric(variant_info["Allele Expr"]) - if (use_allele_specific_binding_thresholds && hla_allele %in% names(meta_data[["allele_specific_binding_thresholds"]][hla_allele])) { - binding_threshold <- as.numeric(meta_data[["allele_specific_binding_thresholds"]][hla_allele]) - } trna_vaf <- as.numeric(meta_data["trna_vaf"]) trna_cov <- as.numeric(meta_data["trna_cov"]) percentile_filter <- FALSE @@ -395,6 +379,20 @@ tier_numbers <- function(variant_info, anchor_contribution, dna_cutoff, allele_e percentile_filter <- TRUE } tsl_max <- as.numeric(meta_data["maximum_transcript_support_level"]) + if (use_allele_specific_binding_thresholds && hla_allele %in% names(meta_data[["allele_specific_binding_thresholds"]][hla_allele])) { + binding_threshold <- as.numeric(meta_data[["allele_specific_binding_thresholds"]][hla_allele]) + } + ic50_pass <- (mt_binding < binding_threshold) + percentile_pass <- TRUE + if (percentile_filter && mt_percent > percentile_threshold) { + percentile_pass <- FALSE + } + binding_pass <- TRUE + if (percentile_threshold_strategy == 'conservative') { + binding_pass <- (ic50_pass && percentile_pass) + } else { + binding_pass <- (ic50_pass || percentile_pass) + } mutation_pos_list <- mutation_pos_list[["Pos"]] if (anchor_mode == "default") { anchor_list <- c(1, 2, nchar(variant_info[["Best Peptide"]]), nchar(variant_info[["Best Peptide"]]) - 1) @@ -447,33 +445,15 @@ tier_numbers <- function(variant_info, anchor_contribution, dna_cutoff, allele_e vaf_clonal_pass <- FALSE } ## Pass - if ((mt_binding < binding_threshold) && allele_expr_pass && vaf_clonal_pass && tsl_pass && anchor_residue_pass) { - if (percentile_filter) { - if (mt_percent <= percentile_threshold) { - return(1) - } - }else { - return(1) - } + if (binding_pass && allele_expr_pass && vaf_clonal_pass && tsl_pass && anchor_residue_pass) { + return(1) } ## Anchor - if ((mt_binding < binding_threshold) && allele_expr_pass && vaf_clonal_pass && tsl_pass && !anchor_residue_pass) { - if (percentile_filter) { - if (mt_percent <= percentile_threshold) { - return(5) - } - }else { - return(5) - } + if (binding_pass && allele_expr_pass && vaf_clonal_pass && tsl_pass && !anchor_residue_pass) { + return(5) } - if ((mt_binding < binding_threshold) && allele_expr_pass && !vaf_clonal_pass && tsl_pass && anchor_residue_pass) { - if (percentile_filter) { - if (mt_percent <= percentile_threshold) { - return(6) - } - }else { - return(6) - } + if (binding_pass && allele_expr_pass && !vaf_clonal_pass && tsl_pass && anchor_residue_pass) { + return(6) } lowexpr <- FALSE if (!is.na(rna_vaf) && !is.na(gene_expr) && !is.na(rna_depth)) { @@ -481,21 +461,11 @@ tier_numbers <- function(variant_info, anchor_contribution, dna_cutoff, allele_e lowexpr <- TRUE } } - if ((mt_binding < binding_threshold) && (lowexpr) && vaf_clonal_pass && tsl_pass && anchor_residue_pass) { - if (percentile_filter) { - if (mt_percent <= percentile_threshold) { - if (allele_expr > 0) { - return(7) - }else if ((gene_expr == 0) && (rna_depth > trna_cov) && (rna_vaf > trna_vaf)) { - return(8) - } - } - }else { - if (allele_expr > 0) { - return(7) - }else if ((gene_expr == 0) && (rna_depth > trna_cov) && (rna_vaf > trna_vaf)) { - return(8) - } + if (binding_pass && lowexpr && vaf_clonal_pass && tsl_pass && anchor_residue_pass) { + if (allele_expr > 0) { + return(7) + }else if ((gene_expr == 0) && (rna_depth > trna_cov) && (rna_vaf > trna_vaf)) { + return(8) } } if (!is.na(allele_expr) && ((gene_expr == 0) || (rna_vaf == 0)) && !lowexpr) { diff --git a/pvactools/tools/pvacview/server.R b/pvactools/tools/pvacview/server.R index b642b71e..4535ad54 100644 --- a/pvactools/tools/pvacview/server.R +++ b/pvactools/tools/pvacview/server.R @@ -93,6 +93,7 @@ server <- shinyServer(function(input, output, session) { use_allele_specific_binding_thresholds = NULL, aggregate_inclusion_binding_threshold = NULL, percentile_threshold = NULL, + percentile_threshold_strategy = NULL, allele_specific_binding_thresholds = NULL, allele_expr = NULL, anchor_mode = NULL, @@ -128,6 +129,7 @@ server <- shinyServer(function(input, output, session) { df$allele_specific_binding_thresholds <- df$metricsData$`allele_specific_binding_thresholds` df$aggregate_inclusion_binding_threshold <- df$metricsData$`aggregate_inclusion_binding_threshold` df$percentile_threshold <- df$metricsData$`percentile_threshold` + df$percentile_threshold_strategy <- df$metricsData$`percentile_threshold_strategy` df$dna_cutoff <- df$metricsData$vaf_clonal df$allele_expr <- df$metricsData$allele_expr_threshold df$anchor_mode <- ifelse(df$metricsData$`allele_specific_anchors`, "allele-specific", "default") @@ -154,7 +156,7 @@ server <- shinyServer(function(input, output, session) { df$comments <- data.frame(matrix("No comments", nrow = nrow(df$mainTable)), ncol = 1) } df$mainTable <- df$mainTable[, columns_needed] - df$mainTable$`Tier Count` <- apply(df$mainTable, 1, function(x) tier_numbers(x, df$anchor_contribution, df$dna_cutoff, df$allele_expr, x["Pos"], x["Allele"], x["TSL"], df$metricsData[1:15], df$anchor_mode, df$allele_specific_binding_thresholds, df$use_allele_specific_binding_thresholds, df$binding_threshold, df$percentile_threshold)) + df$mainTable$`Tier Count` <- apply(df$mainTable, 1, function(x) tier_numbers(x, df$anchor_contribution, df$dna_cutoff, df$allele_expr, x["Pos"], x["Allele"], x["TSL"], df$metricsData[1:15], df$anchor_mode, df$allele_specific_binding_thresholds, df$use_allele_specific_binding_thresholds, df$binding_threshold, df$percentile_threshold, df$percentile_threshold_strategy)) df$mainTable$`Gene of Interest` <- apply(df$mainTable, 1, function(x) {any(x["Gene"] == df$gene_list)}) rownames(df$comments) <- df$mainTable$ID df$mainTable$`Scaled BA` <- apply(df$mainTable, 1, function(x) scale_binding_affinity(df$allele_specific_binding_thresholds, df$use_allele_specific_binding_thresholds, df$binding_threshold, x["Allele"], x["IC50 MT"])) @@ -218,6 +220,7 @@ server <- shinyServer(function(input, output, session) { df$use_allele_specific_binding_thresholds <- df$metricsData$`use_allele_specific_binding_thresholds` df$aggregate_inclusion_binding_threshold <- df$metricsData$`aggregate_inclusion_binding_threshold` df$percentile_threshold <- df$metricsData$`percentile_threshold` + df$percentile_threshold_strategy <- df$metricsData$`percentile_threshold_strategy` df$dna_cutoff <- df$metricsData$vaf_clonal df$allele_expr <- df$metricsData$allele_expr_threshold df$anchor_mode <- ifelse(df$metricsData$`allele_specific_anchors`, "allele-specific", "default") @@ -245,7 +248,7 @@ server <- shinyServer(function(input, output, session) { df$comments <- data.frame(matrix("No comments", nrow = nrow(df$mainTable)), ncol = 1) } df$mainTable <- df$mainTable[, columns_needed] - df$mainTable$`Tier Count` <- apply(df$mainTable, 1, function(x) tier_numbers(x, df$anchor_contribution, df$dna_cutoff, df$allele_expr, x["Pos"], x["Allele"], x["TSL"], df$metricsData[1:15], df$anchor_mode, df$allele_specific_binding_thresholds, df$use_allele_specific_binding_thresholds, df$binding_threshold, df$percentile_threshold)) + df$mainTable$`Tier Count` <- apply(df$mainTable, 1, function(x) tier_numbers(x, df$anchor_contribution, df$dna_cutoff, df$allele_expr, x["Pos"], x["Allele"], x["TSL"], df$metricsData[1:15], df$anchor_mode, df$allele_specific_binding_thresholds, df$use_allele_specific_binding_thresholds, df$binding_threshold, df$percentile_threshold, df$percentile_threshold_strategy)) df$mainTable$`Gene of Interest` <- apply(df$mainTable, 1, function(x) {any(x["Gene"] == df$gene_list)}) if ("Comments" %in% colnames(df$mainTable)) { df$comments <- data.frame(data = df$mainTable$`Comments`, nrow = nrow(df$mainTable), ncol = 1) @@ -340,6 +343,15 @@ server <- shinyServer(function(input, output, session) { current_percentile <- df$percentile_threshold numericInput("percentile_threshold", "Percentile Threshold", current_percentile, min = 0, max = 100, step = 0.01, width = 500) }) + output$percentile_threshold_strategy_ui <- renderUI({ + current_percentile_threshold_strategy <- df$percentile_threshold_strategy + radioButtons( + "percentile_threshold_strategy", + "Specify the candidate binding score and percentile inclusion strategy. The 'conservative' option requires a candidate to pass BOTH the binding threshold and percentile threshold (default). The 'exploratory' option requires a candidate to pass EITHER the binding threshold or the percentile threshold.", + c("conservative", "exploratory"), + selected = current_percentile_threshold_strategy + ) + }) output$dna_cutoff_ui <- renderUI({ current_dna_cutoff <- df$dna_cutoff numericInput("dna_cutoff", "Clonal DNA VAF (Anything lower than 1/2 of chosen VAF level will be considered subclonal)", current_dna_cutoff, min = 0, max = 1, step = 0.01, width = 500) @@ -358,6 +370,7 @@ server <- shinyServer(function(input, output, session) { } else { df$percentile_threshold <- input$percentile_threshold } + df$percentile_threshold_strategy <- input$percentile_threshold_strategy df$dna_cutoff <- input$dna_cutoff df$allele_expr <- input$allele_expr df$allele_specific_anchors <- input$use_anchor @@ -368,8 +381,8 @@ server <- shinyServer(function(input, output, session) { }else { df$anchor_mode <- "default" } - df$mainTable$`Tier` <- apply(df$mainTable, 1, function(x) tier(x, df$anchor_contribution, input$dna_cutoff, input$allele_expr, x["Pos"], x["Allele"], x["TSL"], df$metricsData[1:15], df$anchor_mode, df$use_allele_specific_binding_thresholds, df$binding_threshold, df$percentile_threshold)) - df$mainTable$`Tier Count` <- apply(df$mainTable, 1, function(x) tier_numbers(x, df$anchor_contribution, input$dna_cutoff, input$allele_expr, x["Pos"], x["Allele"], x["TSL"], df$metricsData[1:15], df$anchor_mode, df$allele_specific_binding_thresholds, df$use_allele_specific_binding_thresholds, df$binding_threshold, df$percentile_threshold)) + df$mainTable$`Tier` <- apply(df$mainTable, 1, function(x) tier(x, df$anchor_contribution, input$dna_cutoff, input$allele_expr, x["Pos"], x["Allele"], x["TSL"], df$metricsData[1:15], df$anchor_mode, df$use_allele_specific_binding_thresholds, df$binding_threshold, df$percentile_threshold, df$percentile_threshold_strategy)) + df$mainTable$`Tier Count` <- apply(df$mainTable, 1, function(x) tier_numbers(x, df$anchor_contribution, input$dna_cutoff, input$allele_expr, x["Pos"], x["Allele"], x["TSL"], df$metricsData[1:15], df$anchor_mode, df$allele_specific_binding_thresholds, df$use_allele_specific_binding_thresholds, df$binding_threshold, df$percentile_threshold, df$percentile_threshold_strategy)) df$mainTable$`Scaled BA` <- apply(df$mainTable, 1, function(x) scale_binding_affinity(df$allele_specific_binding_thresholds, df$use_allele_specific_binding_thresholds, df$binding_threshold, x["Allele"], x["IC50 MT"])) df$mainTable$`Scaled percentile` <- apply(df$mainTable, 1, function(x) {ifelse((is.null(df$percentile_threshold) || is.na(df$percentile_threshold)), as.numeric(x["%ile MT"]), as.numeric(x["%ile MT"]) / (df$percentile_threshold))}) if (is.null(df$percentile_threshold) || is.na(df$percentile_threshold)) { @@ -396,13 +409,14 @@ server <- shinyServer(function(input, output, session) { df$allele_specific_binding_thresholds <- df$metricsData$`allele_specific_binding_thresholds` df$use_allele_specific_binding_thresholds <- df$metricsData$`use_allele_specific_binding_thresholds` df$percentile_threshold <- df$metricsData$`percentile_threshold` + df$percentile_threshold_strategy <- df$metricsData$`percentile_threshold_strategy` df$dna_cutoff <- df$metricsData$`vaf_clonal` df$allele_expr <- df$metricsData$`allele_expr` df$anchor_mode <- ifelse(df$metricsData$`allele_specific_anchors`, "allele-specific", "default") df$allele_specific_anchors <- df$metricsData$`allele_specific_anchors` df$anchor_contribution <- df$metricsData$`anchor_contribution_threshold` - df$mainTable$`Tier` <- apply(df$mainTable, 1, function(x) tier(x, df$anchor_contribution, df$dna_cutoff, df$allele_expr, x["Pos"], x["Allele"], x["TSL"], df$metricsData[1:15], df$anchor_mode, df$use_allele_specific_binding_thresholds, df$binding_threshold, df$percentile_threshold)) - df$mainTable$`Tier Count` <- apply(df$mainTable, 1, function(x) tier_numbers(x, df$anchor_contribution, df$dna_cutoff, df$allele_expr, x["Pos"], x["Allele"], x["TSL"], df$metricsData[1:15], df$anchor_mode, df$allele_specific_binding_thresholds, df$use_allele_specific_binding_thresholds, df$binding_threshold, df$percentile_threshold)) + df$mainTable$`Tier` <- apply(df$mainTable, 1, function(x) tier(x, df$anchor_contribution, df$dna_cutoff, df$allele_expr, x["Pos"], x["Allele"], x["TSL"], df$metricsData[1:15], df$anchor_mode, df$use_allele_specific_binding_thresholds, df$binding_threshold, df$percentile_threshold, df$percentile_threshold_strategy)) + df$mainTable$`Tier Count` <- apply(df$mainTable, 1, function(x) tier_numbers(x, df$anchor_contribution, df$dna_cutoff, df$allele_expr, x["Pos"], x["Allele"], x["TSL"], df$metricsData[1:15], df$anchor_mode, df$allele_specific_binding_thresholds, df$use_allele_specific_binding_thresholds, df$binding_threshold, df$percentile_threshold, df$percentile_threshold_strategy)) df$mainTable$`Scaled BA` <- apply(df$mainTable, 1, function(x) scale_binding_affinity(df$allele_specific_binding_thresholds, df$use_allele_specific_binding_thresholds, df$binding_threshold, x["Allele"], x["IC50 MT"])) df$mainTable$`Scaled percentile` <- apply(df$mainTable, 1, function(x) {ifelse(is.null(df$percentile_threshold), as.numeric(x["%ile MT"]), as.numeric(x["%ile MT"]) / (df$percentile_threshold))}) if (is.null(df$percentile_threshold)) { @@ -439,7 +453,7 @@ server <- shinyServer(function(input, output, session) { data <- data.frame( "Parameter" = c("Tumor Purity", "VAF Clonal", "VAF Subclonal", "Allele Expression for Passing Variants", "Binding Threshold", "Binding Threshold for Inclusion into Metrics File", "Maximum TSL", - "Percentile Threshold", "Allele Specific Binding Thresholds", + "Percentile Threshold", "Percentile Threshold Strategy", "Allele Specific Binding Thresholds", "MT Top Score Metric", "WT Top Score Metric", "Allele Specific Anchors Used", "Anchor Contribution Threshold"), "Value" = c(if (is.null(df$metricsData$tumor_purity)) {"NULL"}else {df$metricsData$tumor_purity}, @@ -447,6 +461,7 @@ server <- shinyServer(function(input, output, session) { df$metricsData$binding_threshold, df$metricsData$`aggregate_inclusion_binding_threshold`, df$metricsData$maximum_transcript_support_level, if (is.null(df$metricsData$percentile_threshold)) {"NULL"}else { df$metricsData$percentile_threshold}, + df$metricsData$percentile_threshold_strategy, df$metricsData$use_allele_specific_binding_thresholds, df$metricsData$mt_top_score_metric, df$metricsData$wt_top_score_metric, df$metricsData$allele_specific_anchors, df$metricsData$anchor_contribution_threshold) @@ -475,7 +490,7 @@ server <- shinyServer(function(input, output, session) { data <- data.frame( "Parameter" = c("VAF Clonal", "VAF Subclonal", "Allele Expression for Passing Variants", "Binding Threshold", "Binding Threshold for Inclusion into Metrics File", "Maximum TSL", - "Percentile Threshold", "Allele Specific Binding Thresholds", + "Percentile Threshold", "Percentile Threshold Strategy", "Allele Specific Binding Thresholds", "MT Top Score Metric", "WT Top Score Metric", "Allele Specific Anchors Used", "Anchor Contribution Threshold"), "Value" = c( @@ -486,6 +501,7 @@ server <- shinyServer(function(input, output, session) { df$metricsData$`aggregate_inclusion_binding_threshold`, df$metricsData$maximum_transcript_support_level, if (is.null(df$percentile_threshold) || is.na(df$percentile_threshold)) {"NULL"}else { df$percentile_threshold}, + df$percentile_threshold_strategy, df$use_allele_specific_binding_thresholds, df$metricsData$mt_top_score_metric, df$metricsData$wt_top_score_metric, diff --git a/pvactools/tools/pvacview/ui.R b/pvactools/tools/pvacview/ui.R index e2730359..e6c67b71 100644 --- a/pvactools/tools/pvacview/ui.R +++ b/pvactools/tools/pvacview/ui.R @@ -98,6 +98,7 @@ explore_tab <- tabItem( uiOutput("binding_threshold_ui"), uiOutput("allele_specific_binding_ui"), uiOutput("percentile_threshold_ui"), + uiOutput("percentile_threshold_strategy_ui"), uiOutput("dna_cutoff_ui"), uiOutput("allele_expr_ui"), h5("For further explanations on these inputs, please refer to the ", tags$a(href = "https://pvactools.readthedocs.io/en/latest/pvacview/pvacseq_module/pvacseq_upload.html#visualize-and-explore", "pVACview documentation.", target = "_blank")), diff --git a/tests/test_data/aggregate_all_epitopes/HCC1395.output.metrics.json b/tests/test_data/aggregate_all_epitopes/HCC1395.output.metrics.json index 25d0b803..54dca258 100644 --- a/tests/test_data/aggregate_all_epitopes/HCC1395.output.metrics.json +++ b/tests/test_data/aggregate_all_epitopes/HCC1395.output.metrics.json @@ -10,6 +10,7 @@ "allele_expr_threshold": 2.5, "maximum_transcript_support_level": 1, "percentile_threshold": null, + "percentile_threshold_strategy": "conservative", "use_allele_specific_binding_thresholds": false, "mt_top_score_metric": "Median", "wt_top_score_metric": "Median", diff --git a/tests/test_data/aggregate_all_epitopes/output.allele_specific_binding_thresholds.metrics.json b/tests/test_data/aggregate_all_epitopes/output.allele_specific_binding_thresholds.metrics.json index 7aaed642..2ed818d6 100644 --- a/tests/test_data/aggregate_all_epitopes/output.allele_specific_binding_thresholds.metrics.json +++ b/tests/test_data/aggregate_all_epitopes/output.allele_specific_binding_thresholds.metrics.json @@ -10,6 +10,7 @@ "allele_expr_threshold": 2.5, "maximum_transcript_support_level": 1, "percentile_threshold": null, + "percentile_threshold_strategy": "conservative", "use_allele_specific_binding_thresholds": true, "mt_top_score_metric": "Median", "wt_top_score_metric": "Median", diff --git a/tests/test_data/aggregate_all_epitopes/output.lowest_top_score_metric.metrics.json b/tests/test_data/aggregate_all_epitopes/output.lowest_top_score_metric.metrics.json index c5e5fc40..856bc24e 100644 --- a/tests/test_data/aggregate_all_epitopes/output.lowest_top_score_metric.metrics.json +++ b/tests/test_data/aggregate_all_epitopes/output.lowest_top_score_metric.metrics.json @@ -10,6 +10,7 @@ "allele_expr_threshold": 2.5, "maximum_transcript_support_level": 1, "percentile_threshold": null, + "percentile_threshold_strategy": "conservative", "use_allele_specific_binding_thresholds": false, "mt_top_score_metric": "Best", "wt_top_score_metric": "Corresponding", diff --git a/tests/test_data/aggregate_all_epitopes/output.metrics.json b/tests/test_data/aggregate_all_epitopes/output.metrics.json index 1600d58c..fca1f0cf 100644 --- a/tests/test_data/aggregate_all_epitopes/output.metrics.json +++ b/tests/test_data/aggregate_all_epitopes/output.metrics.json @@ -10,6 +10,7 @@ "allele_expr_threshold": 2.5, "maximum_transcript_support_level": 1, "percentile_threshold": null, + "percentile_threshold_strategy": "conservative", "use_allele_specific_binding_thresholds": false, "mt_top_score_metric": "Median", "wt_top_score_metric": "Median", diff --git a/tests/test_data/aggregate_all_epitopes/output.na_mutation_position.metrics.json b/tests/test_data/aggregate_all_epitopes/output.na_mutation_position.metrics.json index 4b6f4a1f..8f21a9dd 100644 --- a/tests/test_data/aggregate_all_epitopes/output.na_mutation_position.metrics.json +++ b/tests/test_data/aggregate_all_epitopes/output.na_mutation_position.metrics.json @@ -10,6 +10,7 @@ "allele_expr_threshold": 2.5, "maximum_transcript_support_level": 1, "percentile_threshold": null, + "percentile_threshold_strategy": "conservative", "use_allele_specific_binding_thresholds": false, "mt_top_score_metric": "Median", "wt_top_score_metric": "Median", diff --git a/tests/test_data/pvacseq/MHC_Class_I/Test.all_epitopes.aggregated.metrics.json b/tests/test_data/pvacseq/MHC_Class_I/Test.all_epitopes.aggregated.metrics.json index da507f59..7f68f217 100644 --- a/tests/test_data/pvacseq/MHC_Class_I/Test.all_epitopes.aggregated.metrics.json +++ b/tests/test_data/pvacseq/MHC_Class_I/Test.all_epitopes.aggregated.metrics.json @@ -10,6 +10,7 @@ "allele_expr_threshold": 2.5, "maximum_transcript_support_level": 1, "percentile_threshold": null, + "percentile_threshold_strategy": "conservative", "use_allele_specific_binding_thresholds": false, "mt_top_score_metric": "Best", "wt_top_score_metric": "Corresponding", diff --git a/tests/test_data/pvacseq/MHC_Class_II/Test.all_epitopes.aggregated.metrics.json b/tests/test_data/pvacseq/MHC_Class_II/Test.all_epitopes.aggregated.metrics.json index 7c42aa77..7a0be83e 100644 --- a/tests/test_data/pvacseq/MHC_Class_II/Test.all_epitopes.aggregated.metrics.json +++ b/tests/test_data/pvacseq/MHC_Class_II/Test.all_epitopes.aggregated.metrics.json @@ -10,6 +10,7 @@ "allele_expr_threshold": 2.5, "maximum_transcript_support_level": 1, "percentile_threshold": null, + "percentile_threshold_strategy": "conservative", "use_allele_specific_binding_thresholds": false, "mt_top_score_metric": "Best", "wt_top_score_metric": "Corresponding", diff --git a/tests/test_filter.py b/tests/test_filter.py index dae128a4..058a332d 100644 --- a/tests/test_filter.py +++ b/tests/test_filter.py @@ -183,3 +183,59 @@ def test_inf(self): os.path.join(self.test_data_path, "output.inf.tsv"), False )) + + def test_conservative(self): + output_file = tempfile.NamedTemporaryFile() + self.assertFalse(Filter( + os.path.join( + self.test_data_path, + 'Test.combined.parsed.tsv' + ), + output_file.name, + [FilterCriterion( + "Median MT IC50 Score", + "<", + "500", + exclude_nas=False + ), FilterCriterion( + "Corresponding Fold Change", + "<", + "16000", + exclude_nas=False + )], + [], + "AND" + ).execute()) + self.assertTrue(cmp( + output_file.name, + os.path.join(self.test_data_path, "Test.filtered.lt.tsv"), + False + )) + + def test_exploratory(self): + output_file = tempfile.NamedTemporaryFile() + self.assertFalse(Filter( + os.path.join( + self.test_data_path, + 'Test.combined.parsed.tsv' + ), + output_file.name, + [FilterCriterion( + "Median MT IC50 Score", + "<", + "500", + exclude_nas=False + ), FilterCriterion( + "Corresponding Fold Change", + ">", + "16000", + exclude_nas=False + )], + [], + "OR" + ).execute()) + self.assertTrue(cmp( + output_file.name, + os.path.join(self.test_data_path, "Test.filtered.lt.tsv"), + False + ))