Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added argument for specifying NetMHCIIpan/NetMHCIIpanEL version #1181

Open
wants to merge 7 commits into
base: staging
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions pvactools/lib/netmhc_pan_version.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import argparse


class NetMHCPanVersion:
ldhtnp marked this conversation as resolved.
Show resolved Hide resolved
valid_versions = ["4.3", "4.2", "4.1", "4.0 (Not supported by standalone IEDB)"]

def __init__(self, list):
self.list = list

def print_valid_versions(self):
if self.list:
print("Valid NetMHCIIpan and NetMHCIIpanEL Versions")
print('\n'.join([a for a in self.valid_versions]))

@classmethod
def parser(cls, tool="pvacseq"):
parser = argparse.ArgumentParser(
"%s valid_netmhcpan_versions" % tool,
description="Show a list of valid versions of NetMHCIIpan and NetMHCIIpanEL that can be used.",
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument(
'-l', '--list',
help="List the valid NetMHCIIpan and NetMHCIIpanEL versions.",
default='None',
action='store_true'
)
return parser
43 changes: 31 additions & 12 deletions pvactools/lib/output_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,8 +172,11 @@ def get_scores(self, line, method):
return {'score': float(line['BigMHC_IM'])}
elif method.lower() == 'netmhcpan_el':
return {'score': float(line['score'])}
elif method.lower() == 'netmhciipan_el':
return {'score': float(line['score'])}
elif 'netmhciipan_el' in method.lower():
try:
return {'score': float(line['score'])}
except:
return {'ic50': float(line['ic50'])}
ldhtnp marked this conversation as resolved.
Show resolved Hide resolved
else:
return {'ic50': float(line['ic50'])}

Expand Down Expand Up @@ -596,7 +599,7 @@ def output_headers(self):
headers.append("%s MT Score" % pretty_method)
continue

if method in ['netmhcpan_el', 'netmhciipan_el']:
if 'netmhciipan_el' in method or 'netmhcpan_el' in method:
headers.append("%s WT Score" % pretty_method)
headers.append("%s MT Score" % pretty_method)
else:
Expand All @@ -620,9 +623,12 @@ def flurry_headers(self, headers):

def prediction_methods(self):
methods = set()
pattern = re.compile(rf"{re.escape(self.sample_name)}\.(\w+(?:-\d+\.\d+)?)")

for input_iedb_file in self.input_iedb_files:
# we remove "sample_name." prefix from filename and then first part before a dot is the method name
method = (os.path.basename(input_iedb_file)[len(self.sample_name)+1:]).split('.', 1)[0]
filename = os.path.basename(input_iedb_file)
match = pattern.match(filename)
method = match.group(1)
methods.add(method)

return sorted(list(methods))
Expand Down Expand Up @@ -761,6 +767,7 @@ def execute(self):


class DefaultOutputParser(OutputParser):

def parse_iedb_file(self, tsv_entries):
with open(self.key_file, 'r') as key_file_reader:
protein_identifiers_from_label = yaml.load(key_file_reader, Loader=yaml.FullLoader)
Expand All @@ -769,8 +776,12 @@ def parse_iedb_file(self, tsv_entries):
for input_iedb_file in self.input_iedb_files:
with open(input_iedb_file, 'r') as reader:
iedb_tsv_reader = csv.DictReader(reader, delimiter='\t')
# we remove "sample_name." prefix from filename and then first part before a dot is the method name
method = (os.path.basename(input_iedb_file)[len(self.sample_name)+1:]).split('.', 1)[0]
filename = os.path.basename(input_iedb_file)

pattern = re.compile(rf"{re.escape(self.sample_name)}\.(\w+(?:-\d+\.\d+)?)")
match = pattern.match(filename)
method = match.group(1)

for line in iedb_tsv_reader:
if "Warning: Potential DNA sequence(s)" in line['allele']:
continue
Expand Down Expand Up @@ -831,8 +842,12 @@ def parse_iedb_file(self):
for input_iedb_file in self.input_iedb_files:
with open(input_iedb_file, 'r') as reader:
iedb_tsv_reader = csv.DictReader(reader, delimiter='\t')
# we remove "sample_name." prefix from filename and then first part before a dot is the method name
method = (os.path.basename(input_iedb_file)[len(self.sample_name)+1:]).split('.', 1)[0]
filename = os.path.basename(input_iedb_file)

pattern = re.compile(rf"{re.escape(self.sample_name)}\.(\w+(?:-\d+\.\d+)?)")
match = pattern.match(filename)
method = match.group(1)

for line in iedb_tsv_reader:
if "Warning: Potential DNA sequence(s)" in line['allele']:
continue
Expand Down Expand Up @@ -946,7 +961,7 @@ def output_headers(self):
headers.append("%s Score" % pretty_method)
continue

if method in ['netmhcpan_el', 'netmhciipan_el']:
if 'netmhciipan_el' in method or 'netmhcpan_el' in method:
headers.append("%s Score" % pretty_method)
else:
headers.append("%s IC50 Score" % pretty_method)
Expand Down Expand Up @@ -1017,8 +1032,12 @@ def parse_iedb_file(self):
# input iedb file
with open(input_iedb_file, 'r') as reader:
iedb_tsv_reader = csv.DictReader(reader, delimiter='\t')
# we remove "sample_name." prefix from filename and then first part before a dot is the method name
method = (os.path.basename(input_iedb_file)[len(self.sample_name)+1:]).split('.', 1)[0]
filename = os.path.basename(input_iedb_file)

pattern = re.compile(rf"{re.escape(self.sample_name)}\.(\w+(?:-\d+\.\d+)?)")
match = pattern.match(filename)
method = match.group(1)

# header: allele, seq_num, start, end, length, peptide, ic50, percentile_rank
for line in iedb_tsv_reader:
if "Warning: Potential DNA sequence(s)" in line['allele']:
Expand Down
15 changes: 13 additions & 2 deletions pvactools/lib/prediction_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -653,9 +653,13 @@ def url(self):
def parse_iedb_allele_file(self):
#Ultimately we probably want this method to call out to IEDB but their command is currently broken
#curl --data "method=ann&species=human" http://tools-api.iedb.org/tools_api/mhci/
file_name = next(
(name for name in ["netmhciipan", "netmhciipan_el"] if name in self.iedb_prediction_method),
self.iedb_prediction_method
)
base_dir = os.path.abspath(os.path.join(os.path.dirname(os.path.realpath(__file__)), '..'))
iedb_alleles_dir = os.path.join(base_dir, 'tools', 'pvacseq', 'iedb_alleles', 'class_ii')
iedb_alleles_file_name = os.path.join(iedb_alleles_dir, "%s.tsv" % self.iedb_prediction_method)
iedb_alleles_file_name = os.path.join(iedb_alleles_dir, "%s.tsv" % file_name)
alleles = []
with open(iedb_alleles_file_name) as iedb_alleles_file:
for row in iedb_alleles_file:
Expand All @@ -675,14 +679,21 @@ def iedb_executable_params(self, iedb_executable_path, method, allele, input_fil
allele = allele.replace('-DPB', '/DPB').replace('-DQB', '/DQB')
return [iedb_executable_path, method, allele, input_file, str(epitope_length)]

class NetMHCIIVersion:
netmhcii_pan_version = None

class NetMHCIIpan(IEDBMHCII):
@property
def iedb_prediction_method(self):
return 'NetMHCIIpan'
if NetMHCIIVersion.netmhcii_pan_version in ['4.0', '4.2', '4.3']:
return 'netmhciipan_ba-' + NetMHCIIVersion.netmhcii_pan_version
return 'netmhciipan_ba'

class NetMHCIIpanEL(IEDBMHCII):
@property
def iedb_prediction_method(self):
if NetMHCIIVersion.netmhcii_pan_version in ['4.0', '4.2', '4.3']:
return 'netmhciipan_el-' + NetMHCIIVersion.netmhcii_pan_version
return 'netmhciipan_el'

class NNalign(IEDBMHCII):
Expand Down
6 changes: 6 additions & 0 deletions pvactools/lib/run_argument_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,12 @@ def __init__(self, tool_name, input_file_help):
default=1,
help="Number of threads to use for parallelizing peptide-MHC binding prediction calls.",
)
parser.add_argument(
"--netmhc_pan_version",
ldhtnp marked this conversation as resolved.
Show resolved Hide resolved
choices=["4.3", "4.2", "4.1", "4.0"],
default=None,
ldhtnp marked this conversation as resolved.
Show resolved Hide resolved
help="Specify the version of NetMHCIIpan or NetMHCIIpanEL to be used during the run.",
)
self.parser = parser

def prediction_args(self):
Expand Down
1 change: 1 addition & 0 deletions pvactools/tools/pvacbind/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
'top_score_filter',
'net_chop',
'netmhc_stab',
'netmhc_pan_version',
'calculate_reference_proteome_similarity',
'generate_aggregated_report',
'identify_problematic_amino_acids',
Expand Down
7 changes: 7 additions & 0 deletions pvactools/tools/pvacbind/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,13 @@ def main():
)
valid_algorithms_parser.set_defaults(func=valid_algorithms)

netmhc_pan_version_parser = subparsers.add_parser(
"valid_netmhcpan_versions",
help="Show a list of valid versions of NetMHCIIpan and NetMHCIIpanEL that can be used.",
add_help=False
)
netmhc_pan_version_parser.set_defaults(func=netmhc_pan_version)

allele_specific_cutoffs_parser = subparsers.add_parser(
"allele_specific_cutoffs",
help="Show the allele specific cutoffs",
Expand Down
15 changes: 15 additions & 0 deletions pvactools/tools/pvacbind/netmhc_pan_version.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import sys

from pvactools.lib.netmhc_pan_version import NetMHCPanVersion

def define_parser():
return NetMHCPanVersion.parser('pvacbind')

def main(args_input = sys.argv[1:]):
parser = define_parser()
args = parser.parse_args(args_input)

NetMHCPanVersion(args.list).print_valid_versions()

if __name__ == "__main__":
main()
1 change: 1 addition & 0 deletions pvactools/tools/pvacfuse/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
'top_score_filter',
'net_chop',
'netmhc_stab',
'netmhc_pan_version',
'calculate_reference_proteome_similarity',
'generate_protein_fasta',
"generate_aggregated_report",
Expand Down
7 changes: 7 additions & 0 deletions pvactools/tools/pvacfuse/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,13 @@ def define_parser():
)
valid_algorithms_parser.set_defaults(func=valid_algorithms)

netmhc_pan_version_parser = subparsers.add_parser(
"valid_netmhcpan_versions",
help="Show a list of valid versions of NetMHCIIpan and NetMHCIIpanEL that can be used.",
add_help=False
)
netmhc_pan_version_parser.set_defaults(func=netmhc_pan_version)

identify_problematic_amino_acids_parser = subparsers.add_parser(
"identify_problematic_amino_acids",
help="Mark problematic amino acid positions in each epitope or filter entries that have problematic amino acids.",
Expand Down
15 changes: 15 additions & 0 deletions pvactools/tools/pvacfuse/netmhc_pan_version.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import sys

from pvactools.lib.netmhc_pan_version import NetMHCPanVersion

def define_parser():
return NetMHCPanVersion.parser('pvacfuse')

def main(args_input = sys.argv[1:]):
parser = define_parser()
args = parser.parse_args(args_input)

NetMHCPanVersion(args.list).print_valid_versions()

if __name__ == "__main__":
main()
1 change: 1 addition & 0 deletions pvactools/tools/pvacseq/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
'top_score_filter',
'net_chop',
'netmhc_stab',
'netmhc_pan_version',
'calculate_reference_proteome_similarity',
'transcript_support_level_filter',
'identify_problematic_amino_acids',
Expand Down
7 changes: 7 additions & 0 deletions pvactools/tools/pvacseq/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,13 @@ def define_parser():
)
valid_algorithms_parser.set_defaults(func=valid_algorithms)

netmhc_pan_version_parser = subparsers.add_parser(
"valid_netmhcpan_versions",
help="Show a list of valid versions of NetMHCIIpan and NetMHCIIpanEL that can be used.",
add_help=False
)
netmhc_pan_version_parser.set_defaults(func=netmhc_pan_version)

allele_specific_cutoffs_parser = subparsers.add_parser(
"allele_specific_cutoffs",
help="Show the allele specific cutoffs.",
Expand Down
15 changes: 15 additions & 0 deletions pvactools/tools/pvacseq/netmhc_pan_version.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import sys

from pvactools.lib.netmhc_pan_version import NetMHCPanVersion

def define_parser():
return NetMHCPanVersion.parser('pvacseq')

def main(args_input = sys.argv[1:]):
parser = define_parser()
args = parser.parse_args(args_input)

NetMHCPanVersion(args.list).print_valid_versions()

if __name__ == "__main__":
main()
8 changes: 8 additions & 0 deletions pvactools/tools/pvacseq/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,14 @@ def main(args_input = sys.argv[1:]):
input_file_type = 'vcf'
base_output_dir = os.path.abspath(args.output_dir)

if (args.netmhc_pan_version == '4.0' and args.iedb_install_directory is not None):
raise Exception("Standalone IEDB does not support version 4.0")
ldhtnp marked this conversation as resolved.
Show resolved Hide resolved

if (args.netmhc_pan_version and ("NetMHCIIpan" in args.prediction_algorithms or "NetMHCIIpanEL" in args.prediction_algorithms)):
NetMHCIIVersion.netmhcii_pan_version = args.netmhc_pan_version
elif (args.netmhc_pan_version and not ("NetMHCIIpan" in args.prediction_algorithms or "NetMHCIIpanEL" in args.prediction_algorithms or "all_class_ii" in args.prediction_algorithms)):
raise Exception("NetMHCIIpan and NetMHCIIpanEL version was specified but neither algorithm was selected")
ldhtnp marked this conversation as resolved.
Show resolved Hide resolved

(class_i_prediction_algorithms, class_ii_prediction_algorithms) = split_algorithms(args.prediction_algorithms)
alleles = combine_class_ii_alleles(args.allele)
(class_i_alleles, class_ii_alleles, species) = split_alleles(alleles)
Expand Down
1 change: 1 addition & 0 deletions pvactools/tools/pvacsplice/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
'identify_problematic_amino_acids',
'net_chop',
'netmhc_stab',
'netmhc_pan_version',
'run',
'top_score_filter',
'transcript_support_level_filter',
Expand Down
7 changes: 7 additions & 0 deletions pvactools/tools/pvacsplice/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,13 @@ def define_parser():
)
valid_alleles_parser.set_defaults(func=valid_alleles)

netmhc_pan_version_parser = subparsers.add_parser(
"valid_netmhcpan_versions",
help="Show a list of valid versions of NetMHCIIpan and NetMHCIIpanEL that can be used.",
add_help=False
)
netmhc_pan_version_parser.set_defaults(func=netmhc_pan_version)

allele_specific_cutoffs_parser = subparsers.add_parser(
"allele_specific_cutoffs",
help="Show the allele specific cutoffs.",
Expand Down
15 changes: 15 additions & 0 deletions pvactools/tools/pvacsplice/netmhc_pan_version.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import sys

from pvactools.lib.netmhc_pan_version import NetMHCPanVersion

def define_parser():
return NetMHCPanVersion.parser('pvacsplice')

def main(args_input = sys.argv[1:]):
parser = define_parser()
args = parser.parse_args(args_input)

NetMHCPanVersion(args.list).print_valid_versions()

if __name__ == "__main__":
main()
1 change: 1 addition & 0 deletions pvactools/tools/pvacvector/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
'visualize',
'valid_alleles',
'valid_algorithms',
'netmhc_pan_version',
'allele_specific_cutoffs',
'download_example_data',
]
Expand Down
7 changes: 7 additions & 0 deletions pvactools/tools/pvacvector/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,13 @@ def define_parser():
)
valid_algorithms_parser.set_defaults(func=valid_algorithms)

netmhc_pan_version_parser = subparsers.add_parser(
"valid_netmhcpan_versions",
help="Show a list of valid versions of NetMHCIIpan and NetMHCIIpanEL that can be used.",
add_help=False
)
netmhc_pan_version_parser.set_defaults(func=netmhc_pan_version)

allele_specific_cutoffs_parser = subparsers.add_parser(
"allele_specific_cutoffs",
help="Show the allele specific cutoffs",
Expand Down
15 changes: 15 additions & 0 deletions pvactools/tools/pvacvector/netmhc_pan_version.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import sys

from pvactools.lib.netmhc_pan_version import NetMHCPanVersion

def define_parser():
return NetMHCPanVersion.parser('pvacvector')

def main(args_input = sys.argv[1:]):
parser = define_parser()
args = parser.parse_args(args_input)

NetMHCPanVersion(args.list).print_valid_versions()

if __name__ == "__main__":
main()
Loading