From bd61970a3e7441b6b299e5018cac35aaabd8b8ec Mon Sep 17 00:00:00 2001 From: Valerij Talagayev <82884038+talagayev@users.noreply.github.com> Date: Sun, 10 Nov 2024 17:17:47 +0100 Subject: [PATCH 1/4] Update fingerprint.py addition of run_bridged_analysis --- prolif/fingerprint.py | 49 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/prolif/fingerprint.py b/prolif/fingerprint.py index 29c059e..b24fd47 100644 --- a/prolif/fingerprint.py +++ b/prolif/fingerprint.py @@ -210,6 +210,7 @@ def __init__( self.count = count self._set_interactions(interactions, parameters) self.vicinity_cutoff = vicinity_cutoff + self.parameters=parameters def _set_interactions(self, interactions, parameters): # read interactions to compute @@ -1062,3 +1063,51 @@ def plot_3d( only_interacting=only_interacting, remove_hydrogens=remove_hydrogens, ) + + def run_bridged_analysis(self, traj, lig, prot, water, **kwargs): + # run analysis twice, once on ligand-water, then on water-prot + ifp_stores = [] + for pair in [(lig, water), (water, prot)]: + fp = Fingerprint( + interactions=["HBDonor", "HBAcceptor"], parameters=self.parameters + ) + fp.run(traj, *pair, **kwargs) + ifp_stores.append(fp.ifp) + + # merge results from the 2 runs on matching water residues + combined = {} + for (frame1, ifp1), ifp2 in zip(ifp_stores[0].items(), ifp_stores[1].values()): + ifp = IFP() + + # For each ligand-water interaction in ifp1 + for (lig_res, water_res), interaction_data_ifp1 in ifp1.items(): + # Find matching water-protein interactions in ifp2 based on water residue + matching_entries = { + (lig_res, prot_res): interaction_data_ifp2 + for (water_res2, prot_res), interaction_data_ifp2 in ifp2.items() + if water_res2 == water_res + } + + # Merge data from ifp1 and ifp2 for each matching pair + for (lig_res, prot_res), interaction_data_ifp2 in matching_entries.items(): + # Combine keys from both ifp1 and ifp2 + all_keys = set(interaction_data_ifp1.keys()).union(interaction_data_ifp2.keys()) + combined_interaction_data = {} + + # Add merged values or single values if unique + for key in all_keys: + values_ifp1 = interaction_data_ifp1.get(key, "") + values_ifp2 = interaction_data_ifp2.get(key, "") + # Combine the values if both are present, otherwise take the single value + combined_interaction_data[key] = f"{values_ifp1};{values_ifp2}" if values_ifp1 and values_ifp2 else values_ifp1 or values_ifp2 + + # Store the combined interaction data + ifp.update({(lig_res, prot_res): combined_interaction_data}) + + combined[frame1] = ifp + + # Add to existing results if any + self.ifp = getattr(self, "ifp", {}) + self.ifp.update(combined) + + return self From df3c1bafe8abe135f13dfe5211b60aa5b76a3c57 Mon Sep 17 00:00:00 2001 From: Valerij Talagayev <82884038+talagayev@users.noreply.github.com> Date: Fri, 15 Nov 2024 00:24:21 +0100 Subject: [PATCH 2/4] Update fingerprint.py adjusted to have the metadata under the WaterBridge key with addition of _ligand_water and _water_protein additions --- prolif/fingerprint.py | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/prolif/fingerprint.py b/prolif/fingerprint.py index b24fd47..9aa36bb 100644 --- a/prolif/fingerprint.py +++ b/prolif/fingerprint.py @@ -1090,18 +1090,26 @@ def run_bridged_analysis(self, traj, lig, prot, water, **kwargs): # Merge data from ifp1 and ifp2 for each matching pair for (lig_res, prot_res), interaction_data_ifp2 in matching_entries.items(): - # Combine keys from both ifp1 and ifp2 - all_keys = set(interaction_data_ifp1.keys()).union(interaction_data_ifp2.keys()) combined_interaction_data = {} - # Add merged values or single values if unique - for key in all_keys: - values_ifp1 = interaction_data_ifp1.get(key, "") - values_ifp2 = interaction_data_ifp2.get(key, "") - # Combine the values if both are present, otherwise take the single value - combined_interaction_data[key] = f"{values_ifp1};{values_ifp2}" if values_ifp1 and values_ifp2 else values_ifp1 or values_ifp2 + # Prepare merged interaction data under a single key "WaterBridge" + combined_interaction_data["WaterBridge"] = {} - # Store the combined interaction data + # Collect and merge metadata for `ifp1` (ligand-water) interactions + for interaction_type, metadata_list in interaction_data_ifp1.items(): + for metadata in metadata_list: + for key, value in metadata.items(): + combined_key = f"{key}_ligand_water" + combined_interaction_data["WaterBridge"][combined_key] = value + + # Collect and merge metadata for `ifp2` (water-protein) interactions + for interaction_type, metadata_list in interaction_data_ifp2.items(): + for metadata in metadata_list: + for key, value in metadata.items(): + combined_key = f"{key}_water_protein" + combined_interaction_data["WaterBridge"][combined_key] = value + + # Store the combined interaction data for the (lig_res, prot_res) pair in `ifp` ifp.update({(lig_res, prot_res): combined_interaction_data}) combined[frame1] = ifp From 0583aac3255ff87c9aff1e2168ddc6be54ec3f94 Mon Sep 17 00:00:00 2001 From: Valerij Talagayev <82884038+talagayev@users.noreply.github.com> Date: Sat, 16 Nov 2024 00:58:05 +0100 Subject: [PATCH 3/4] Update fingerprint.py added suggested adjustments --- prolif/fingerprint.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/prolif/fingerprint.py b/prolif/fingerprint.py index 9aa36bb..cc4018d 100644 --- a/prolif/fingerprint.py +++ b/prolif/fingerprint.py @@ -1066,6 +1066,7 @@ def plot_3d( def run_bridged_analysis(self, traj, lig, prot, water, **kwargs): # run analysis twice, once on ligand-water, then on water-prot + self.ifp = getattr(self, "ifp", {}) ifp_stores = [] for pair in [(lig, water), (water, prot)]: fp = Fingerprint( @@ -1112,10 +1113,11 @@ def run_bridged_analysis(self, traj, lig, prot, water, **kwargs): # Store the combined interaction data for the (lig_res, prot_res) pair in `ifp` ifp.update({(lig_res, prot_res): combined_interaction_data}) - combined[frame1] = ifp + if frame1 not in self.ifp: + self.ifp[frame1] = IFP() + self.ifp[frame1].update(ifp) # Add to existing results if any - self.ifp = getattr(self, "ifp", {}) self.ifp.update(combined) return self From f8bb5afd0b7f4af5e308e0d3c8631970f2560550 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Bouysset?= Date: Sat, 16 Nov 2024 22:26:00 +0000 Subject: [PATCH 4/4] fix and refactor merged data --- prolif/fingerprint.py | 119 +++++++++++++++++++++++++----------------- 1 file changed, 71 insertions(+), 48 deletions(-) diff --git a/prolif/fingerprint.py b/prolif/fingerprint.py index cc4018d..bf952b7 100644 --- a/prolif/fingerprint.py +++ b/prolif/fingerprint.py @@ -34,6 +34,7 @@ import dill import multiprocess as mp import numpy as np +from MDAnalysis.converters.RDKit import set_converter_cache_size from rdkit import Chem from tqdm.auto import tqdm @@ -210,7 +211,7 @@ def __init__( self.count = count self._set_interactions(interactions, parameters) self.vicinity_cutoff = vicinity_cutoff - self.parameters=parameters + self.parameters = parameters def _set_interactions(self, interactions, parameters): # read interactions to compute @@ -1065,59 +1066,81 @@ def plot_3d( ) def run_bridged_analysis(self, traj, lig, prot, water, **kwargs): + """ + TODO + """ + kwargs.pop("n_jobs", None) + set_converter_cache_size(3) + # run analysis twice, once on ligand-water, then on water-prot - self.ifp = getattr(self, "ifp", {}) - ifp_stores = [] + ifp_stores: list[dict[int, IFP]] = [] for pair in [(lig, water), (water, prot)]: fp = Fingerprint( interactions=["HBDonor", "HBAcceptor"], parameters=self.parameters ) - fp.run(traj, *pair, **kwargs) + fp.run(traj, *pair, n_jobs=1, **kwargs) ifp_stores.append(fp.ifp) # merge results from the 2 runs on matching water residues - combined = {} - for (frame1, ifp1), ifp2 in zip(ifp_stores[0].items(), ifp_stores[1].values()): - ifp = IFP() - - # For each ligand-water interaction in ifp1 - for (lig_res, water_res), interaction_data_ifp1 in ifp1.items(): - # Find matching water-protein interactions in ifp2 based on water residue - matching_entries = { - (lig_res, prot_res): interaction_data_ifp2 - for (water_res2, prot_res), interaction_data_ifp2 in ifp2.items() - if water_res2 == water_res - } - - # Merge data from ifp1 and ifp2 for each matching pair - for (lig_res, prot_res), interaction_data_ifp2 in matching_entries.items(): - combined_interaction_data = {} - - # Prepare merged interaction data under a single key "WaterBridge" - combined_interaction_data["WaterBridge"] = {} - - # Collect and merge metadata for `ifp1` (ligand-water) interactions - for interaction_type, metadata_list in interaction_data_ifp1.items(): - for metadata in metadata_list: - for key, value in metadata.items(): - combined_key = f"{key}_ligand_water" - combined_interaction_data["WaterBridge"][combined_key] = value - - # Collect and merge metadata for `ifp2` (water-protein) interactions - for interaction_type, metadata_list in interaction_data_ifp2.items(): - for metadata in metadata_list: - for key, value in metadata.items(): - combined_key = f"{key}_water_protein" - combined_interaction_data["WaterBridge"][combined_key] = value - - # Store the combined interaction data for the (lig_res, prot_res) pair in `ifp` - ifp.update({(lig_res, prot_res): combined_interaction_data}) - - if frame1 not in self.ifp: - self.ifp[frame1] = IFP() - self.ifp[frame1].update(ifp) - - # Add to existing results if any - self.ifp.update(combined) - + self.ifp = getattr(self, "ifp", {}) + for (frame, ifp1), ifp2 in zip(ifp_stores[0].items(), ifp_stores[1].values()): + # for each ligand-water interaction in ifp1 + for data1 in ifp1.interactions(): + # for each water-protein interaction in ifp2 where water1 == water2 + for data2 in [ + d2 for d2 in ifp2.interactions() if d2.ligand == data1.protein + ]: + # construct merged metadata + metadata = ( + { + "indices": { + "ligand": data1.metadata["indices"]["ligand"], + "protein": data2.metadata["indices"]["protein"], + "water": tuple( + set().union( + data1.metadata["indices"]["protein"], + data2.metadata["indices"]["ligand"], + ) + ), + }, + "parent_indices": { + "ligand": data1.metadata["parent_indices"]["ligand"], + "protein": data2.metadata["parent_indices"]["protein"], + "water": tuple( + set().union( + data1.metadata["parent_indices"]["protein"], + data2.metadata["parent_indices"]["ligand"], + ) + ), + }, + "water_residue": data1.protein, + "ligand_role": data1.interaction, + "protein_role": ( # invert role + "HBDonor" + if data2.interaction == "HBAcceptor" + else "HBAcceptor" + ), + **{ + f"{key}{suffix}": data.metadata[key] + for suffix, data in [ + ("_ligand_water", data1), + ("_water_protein", data2), + ] + for key in ["distance", "DHA_angle"] + }, + }, + ) + + # store metadata + if frame not in self.ifp: + ifp = self.ifp[frame] = IFP() + ifp = self.ifp[frame] + if int_data := ifp.get((data1.ligand, data2.protein)): + if "WaterBridge" in int_data: + int_data["WaterBridge"].append(metadata) + else: + int_data["WaterBridge"] = [metadata] + else: + ifp[data1.ligand, data2.protein] = {"WaterBridge": [metadata]} + return self