Skip to content

Commit

Permalink
Merge branch '295-MixMHC2pred-Mouse' into 'develop'
Browse files Browse the repository at this point in the history
Resolve "Support MixMHC2pred for mouse data"

See merge request tron/addannot!256
  • Loading branch information
franla23 committed Oct 26, 2023
2 parents 2a8843f + 446c7d9 commit 5c3ca0a
Show file tree
Hide file tree
Showing 17 changed files with 267 additions and 67 deletions.
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,9 @@ NeoFox depends on the following tools:
- BLAST 2.10.1
- netMHCpan 4.1
- netMHCIIpan 4.0
- MixMHCpred 2.2
- MixMHC2pred 2.0.2
- PRIME 2.0
- MixMHCpred 2.2 (optional)
- MixMHC2pred 2.0.2 (optional)
- PRIME 2.0 (optional)

Install from PyPI:
```
Expand Down
Binary file added docs/figures/figure1_v3b.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion docs/source/01_overview.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ model Neofox's input and output data: neoantigens, patients, MHC alleles and neo

**Figure 1**

![Neofox model](../figures/figure1_v3.png)
![Neofox model](../figures/figure1_v3b.png)

For detailed information about the required input data, output data and usage please refer to the [User guide](03_user_guide.rst).

Expand Down
13 changes: 9 additions & 4 deletions docs/source/02_installation.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ conda install bioconda::neofox

### Install third-party dependencies

Please, check the licences of third-party dependencies.
**NOTE**: Please, check the licences of third-party dependencies.

#### Install R

Expand Down Expand Up @@ -191,13 +191,18 @@ export NEOFOX_HLA_DATABASE=https://raw.githubusercontent.com/ANHIG/IMGTHLA/Lates

Run the following to configure the NeoFox reference folder:
```
neofox-configure --reference-folder /your/neofox/folder [--install-r-dependencies]
neofox-configure --reference-folder /your/neofox/folder [--install-r-dependencies --install_mouse_mixmhc2pred]
```

**NOTE**: when installing from conda `--install-r-dependencies` is not needed.

The above command will install several resources and store in the annotations metadata their version, MD5 checksum and
download timestamp.
The above command will download and transform several resources and store in the annotations metadata their version, MD5 checksum and
download timestamp.


To run NeoFox on data from mouse with MixMHC2pred, mouse-specific PMWs are required. For such use cases the reference folder needs to be configured with `--install_mouse_mixmhc2pred` (see also )

Depending on your use case please check the licences of these third-party resources (see urls in neofox/references/installer.py).

Unless indicated to the installer by flag `--install-r-dependencies` you will need to install manually some R packages. These packages are the following:
```
Expand Down
8 changes: 4 additions & 4 deletions docs/source/03_02_output_data.md

Large diffs are not rendered by default.

45 changes: 21 additions & 24 deletions neofox/MHC_predictors/MixMHCpred/mixmhc2pred.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@
from neofox.helpers.epitope_helper import EpitopeHelper
from neofox.model.mhc_parser import MhcParser, get_alleles_by_gene

from neofox.references.references import DependenciesConfiguration, MhcDatabase
from neofox.references.references import DependenciesConfiguration, MhcDatabase, \
ReferenceFolder, ORGANISM_HOMO_SAPIENS, \
ORGANISM_MUS_MUSCULUS

from neofox.helpers.runner import Runner

Expand All @@ -47,33 +49,33 @@ class MixMHC2pred:
ANNOTATION_PREFIX_WT = 'MixMHC2pred_WT'

def __init__(self, runner: Runner, configuration: DependenciesConfiguration, mhc_parser: MhcParser,
mhc_database: MhcDatabase):
references: ReferenceFolder):
self.runner = runner
self.configuration = configuration
self.mhc_database = mhc_database
self.mhc_parser = mhc_parser
self.available_alleles = self._load_available_alleles(mhc_database)
self.references = references
self.organism = references.organism
self.available_alleles = self._load_available_alleles()

self.results = None

def _load_available_alleles(self, mhc_database):
def _load_available_alleles(self):
"""
loads file with available HLA II alllels for MixMHC2pred prediction, returns set
loads file with available HLA II allels for MixMHC2pred prediction, returns set
:return:
"""
if mhc_database.is_homo_sapiens():
if self.organism == ORGANISM_HOMO_SAPIENS:
alleles = pd.read_csv(
self.configuration.mix_mhc2_pred_human_alleles_list, skiprows=2, sep="\t"
)
# run only
else:
# to test if the required PWMdef folder for mouse is downloaded
if self.configuration.mix_mhc2_pred_mouse_alleles_list is not None:
elif self.organism == ORGANISM_MUS_MUSCULUS:
if self.references.mixmhc2pred_alleles_list is not None:
alleles = pd.read_csv(
self.configuration.mix_mhc2_pred_mouse_alleles_list, skiprows=2, sep="\t"
self.references.mixmhc2pred_alleles_list, skiprows=2, sep="\t"
)
else:
logger.warning("The PWMdef folder of mouse has not been downloaded.")
logger.error("The PWMdef for Mouse was not downloaded.")

return list(alleles["AlleleName"])


Expand Down Expand Up @@ -196,15 +198,9 @@ def _parse_mixmhc2pred_output(self, filename: str) -> List[PredictedEpitope]:
return parsed_results

def _mixmhc2prediction(self, isoforms: List[str], potential_ligand_sequences: List[str]) -> List[PredictedEpitope]:
# TODO: define the pwm_path again because the mouse path is only defined by the config
tmptxt = intermediate_files.create_temp_mixmhc2pred(potential_ligand_sequences, prefix="tmp_sequence_")
outtmp = intermediate_files.create_temp_file(prefix="mixmhc2pred", suffix=".txt")

if self.mhc_database.is_homo_sapiens():
pwm_path = os.path.dirname(self.configuration.mix_mhc2_pred_human_alleles_list)
else:
#pwm_path = '/home/nguyenhv/code/MixMHC2pred/2.0/PWMdef/PWMdef_Mouse/' # reference folder
pwm_path = os.path.dirname(self.configuration.mix_mhc2_pred_mouse_alleles_list)
cmd = [
self.configuration.mix_mhc2_pred,
"-a",
Expand All @@ -213,10 +209,12 @@ def _mixmhc2prediction(self, isoforms: List[str], potential_ligand_sequences: Li
tmptxt,
"-o",
outtmp,
"-f",
pwm_path,
"--no_context"
]
if self.organism != ORGANISM_HOMO_SAPIENS:
pwm_dir = self.references.mixmhc2pred_pwm_dir
cmd.extend(["-f", pwm_dir])

self.runner.run_command(cmd)
results = self._parse_mixmhc2pred_output(filename=outtmp)
os.remove(outtmp)
Expand All @@ -236,7 +234,7 @@ def run(self, mhc: List[Mhc2], neoantigen: Neoantigen, uniprot):
neoantigen=neoantigen, lengths=[12, 13, 14, 15, 16, 17, 18, 19, 20, 21], uniprot=uniprot)

if len(potential_ligand_sequences) > 0:
if self.mhc_database.is_homo_sapiens():
if self.organism == ORGANISM_HOMO_SAPIENS:
mhc2_alleles = self.transform_hla_ii_alleles_for_prediction(mhc)
else:
mhc2_alleles = self.transform_h2_alleles_for_prediction(mhc)
Expand All @@ -246,14 +244,13 @@ def run(self, mhc: List[Mhc2], neoantigen: Neoantigen, uniprot):
isoforms=mhc2_alleles, potential_ligand_sequences=potential_ligand_sequences)
else:
logger.warning("None of the MHC II alleles are supported by MixMHC2pred")
print(mhc2_alleles)

def run_peptide(self, peptide: str, isoform: Mhc2Isoform) -> PredictedEpitope:
"""
Performs MixMHC2pred prediction for desired hla allele and writes result to temporary file.
"""
result = None
if self.mhc_database.is_homo_sapiens():
if self.organism == ORGANISM_HOMO_SAPIENS:
isoform_representation = self._get_mixmhc2_isoform_human_representation(isoform)
else:
isoform_representation = self._get_mixmhc2_isoform_mouse_representation(isoform)
Expand Down
2 changes: 1 addition & 1 deletion neofox/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.#


VERSION = "1.1.0b29"
VERSION = "1.1.0"

REFERENCE_FOLDER_ENV = "NEOFOX_REFERENCE_FOLDER"
NEOFOX_BLASTP_ENV = "NEOFOX_BLASTP"
Expand Down
9 changes: 6 additions & 3 deletions neofox/annotator/neoantigen_mhc_binding_annotator.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ def __init__(self, references: ReferenceFolder, configuration: DependenciesConfi
self.organism = references.organism
self.uniprot = uniprot
self.proteome_blastp_runner = proteome_blastp_runner
self.references = references

self.mhc_database = references.get_mhc_database()
self.mhc_parser = MhcParser.get_mhc_parser(self.mhc_database)
Expand Down Expand Up @@ -64,7 +65,8 @@ def get_mhc_binding_annotations(self, neoantigen: Neoantigen, patient: Patient):
self.mhc_parser,
neoantigen,
patient,
self.mhc_database
self.mhc_database,
self.references
)

# avoids running MixMHCpred and PRIME for non human organisms
Expand Down Expand Up @@ -159,8 +161,9 @@ def _run_mixmhc2pred(
mhc_parser: MhcParser,
neoantigen: Neoantigen,
patient: Patient,
mhc_database: MhcDatabase
mhc_database: MhcDatabase,
references: ReferenceFolder
):
mixmhc2 = MixMHC2pred(runner, configuration, mhc_parser, mhc_database)
mixmhc2 = MixMHC2pred(runner, configuration, mhc_parser, references)
mixmhc2.run(mhc=patient.mhc2, neoantigen=neoantigen, uniprot=self.uniprot)
return mixmhc2
2 changes: 1 addition & 1 deletion neofox/annotator/neoepitope_mhc_binding_annotator.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def __init__(self, references: ReferenceFolder, configuration: DependenciesConfi
runner=self.runner, configuration=configuration, mhc_parser=self.mhc_parser,
blastp_runner=self.proteome_blastp_runner)
self.mixmhcpred = MixMHCpred(self.runner, self.configuration, self.mhc_parser)
self.mixmhc2pred = MixMHC2pred(self.runner, self.configuration, self.mhc_parser, self.mhc_database)
self.mixmhc2pred = MixMHC2pred(self.runner, self.configuration, self.mhc_parser, references)
self.prime = Prime(self.runner, self.configuration, self.mhc_parser)

def get_mhc_binding_annotations(self, neoepitope: PredictedEpitope) -> PredictedEpitope:
Expand Down
10 changes: 9 additions & 1 deletion neofox/command_line.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,17 +51,25 @@ def neofox_configure():
action="store_true",
help="install the R dependencies automatically",
)
parser.add_argument(
"--install-mouse-mixmhc2pred",
dest="install_mouse_mixmhc2pred",
action="store_true",
help="get the mouse allele PWMs required to run MixMHC2pred for mouse",
)

args = parser.parse_args()
reference_folder = args.reference_folder
install_r_dependencies = args.install_r_dependencies
install_mouse_mixmhc2pred = args.install_mouse_mixmhc2pred

# makes sure that the output folder exists
os.makedirs(reference_folder, exist_ok=True)

logger.info("Starting the installation of references")
NeofoxReferenceInstaller(
reference_folder=reference_folder, install_r_dependencies=install_r_dependencies
reference_folder=reference_folder, install_r_dependencies=install_r_dependencies,
install_mouse_mixmhc2pred=install_mouse_mixmhc2pred
).install()
logger.info("Finished the installation succesfully!")

Expand Down
52 changes: 44 additions & 8 deletions neofox/references/installer.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
NETMHCPAN_AVAILABLE_ALLELES_MICE_FILE, NETMHC2PAN_AVAILABLE_ALLELES_MICE_FILE, MUS_MUSCULUS_FASTA,
PREFIX_MUS_MUSCULUS, MUS_MUSCULUS_PICKLE, IEDB_FASTA_MUS_MUSCULUS, IEDB_BLAST_PREFIX_HOMO_SAPIENS,
IEDB_BLAST_PREFIX_MUS_MUSCULUS, H2_DATABASE_AVAILABLE_ALLELES_FILE, RESOURCES_VERSIONS,
MIXMHC2PRED_PWM
)
from logzero import logger

Expand All @@ -41,13 +42,16 @@

IEDB_URL = 'http://www.iedb.org/downloader.php?file_name=doc/tcell_full_v3.zip'

MIXMHC2PRED_PWM_MOUSE_URL = "http://ec2-18-188-210-66.us-east-2.compute.amazonaws.com:4000/data/PWMdef/PWMdef_Mouse.zip"


class NeofoxReferenceInstaller(object):
def __init__(self, reference_folder, install_r_dependencies=False):
def __init__(self, reference_folder, install_r_dependencies=False, install_mouse_mixmhc2pred=False):
self.config = DependenciesConfigurationForInstaller()
self.runner = Runner()
self.reference_folder = reference_folder
self.install_r_dependencies = install_r_dependencies
self.install_mouse_mixmhc2pred = install_mouse_mixmhc2pred

def install(self):
# ensures the reference folder exists
Expand All @@ -65,14 +69,20 @@ def install(self):
self._install_r_dependencies()
else:
logger.warning("R dependencies will need to be installed manually")
mixmhc2pred_resources = []
if self.install_mouse_mixmhc2pred:
mixmhc2pred_resources = self._set_mixmhc2pred_pwms()
else:
logger.warning("MixMHC2pred mouse alleles have to be installed manually")
self._save_resources_versions(
iedb_resource=iedb_resource,
hla_resource=hla_resource,
proteome_resources=proteome_resources
proteome_resources=proteome_resources,
mixmhc2pred_resources=mixmhc2pred_resources
)

def _save_resources_versions(
self, iedb_resource, hla_resource, proteome_resources):
self, iedb_resource, hla_resource, proteome_resources, mixmhc2pred_resources):

download_timestamp = datetime.today().strftime('%Y%m%d%H%M%S')
resources_version_file = os.path.join(self.reference_folder, RESOURCES_VERSIONS)
Expand All @@ -81,15 +91,17 @@ def _save_resources_versions(
hla_resource.download_timestamp = download_timestamp
for r in proteome_resources:
r.download_timestamp = download_timestamp
for r in mixmhc2pred_resources:
r.download_timestamp = download_timestamp

resources_version = [
Resource(name="netMHCpan", version="4.1"),
Resource(name="netMHCIIpan", version="4.0"),
Resource(name="mixMHCpred", version="2.1"),
Resource(name="mixMHC2pred", version="1.2"),
Resource(name="mixMHCpred", version="2.2"),
Resource(name="mixMHC2pred", version="2.0.2"),
iedb_resource,
hla_resource
] + proteome_resources
] + proteome_resources + mixmhc2pred_resources

json.dump([r.to_dict() for r in resources_version], open(resources_version_file, "w"), indent=4)

Expand Down Expand Up @@ -242,8 +254,6 @@ def _set_proteome(self):
url=MOUSE_PROTEOME_ISOFORMS, hash=hash_isoforms_mouse),
]

return hash_human, hash_isoforms_human, version_human, hash_mouse, hash_isoforms_mouse, version_mouse

def _prepare_proteome(self, url, url_isoforms, version_url, proteome_file_name, proteome_prefix, proteome_pickle_file_name):
# download proteome
hash = self._download_and_unzip(proteome_file_name, url)
Expand Down Expand Up @@ -347,6 +357,32 @@ def _install_r_dependencies(self):
)
self._run_command(cmd)

def _set_mixmhc2pred_pwms(self):
# Downloads PWMs of other species than human from http://mixmhc2pred.gfellerlab.org/PWMdef
# Currently only mouse is supported and downloaded
logger.info("Installing MixMHC2pred for mouse...")

# reference folder path where the MixMHC2pred PWM directories are downloaded into
mixmhc2pred_pwm_path = os.path.join(self.reference_folder, MIXMHC2PRED_PWM)
os.makedirs(mixmhc2pred_pwm_path, exist_ok=True)
# the name of the zip file that will be downloaded
zip_file = os.path.basename(MIXMHC2PRED_PWM_MOUSE_URL)
pwm_zip_file = os.path.join(mixmhc2pred_pwm_path, zip_file)

url = MIXMHC2PRED_PWM_MOUSE_URL

# download the allele PWMs
cmd = f"wget {url} -O {pwm_zip_file}"
self._run_command(cmd)
hash = self._get_md5_hash(pwm_zip_file)
# unzip the downloaded PWMs
cmd = f"unzip -o {pwm_zip_file} -d {mixmhc2pred_pwm_path}"
self._run_command(cmd)

return [
Resource(name="MixMHC2pred_PWM_Mouse", url=MIXMHC2PRED_PWM_MOUSE_URL, hash=hash),
]

def _run_command(self, cmd):
logger.info(cmd)
process = subprocess.Popen(
Expand Down
Loading

0 comments on commit 5c3ca0a

Please sign in to comment.