Skip to content

Commit

Permalink
fix: edge functions adding nodes to graphs with chain selections #131 #…
Browse files Browse the repository at this point in the history
…132 #134 #135 (#136)

* fix edges functions adding nodes to graphs with chain selections #134

* change generator comprehension in coordinate update to list comprehension to allow pickling #135

* [docs] update changelog

* update conversion dosctrings #132

* update version to 1.2.1

* prevent execution in docs #131
  • Loading branch information
a-r-j authored Mar 16, 2022
1 parent c0b252f commit 2a3b4df
Show file tree
Hide file tree
Showing 11 changed files with 2,609 additions and 2,523 deletions.
4 changes: 3 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
### 1.2.1 - UNRELEASED
### 1.2.1 - 16/3/21

* [Feature] - #124 adds support for vector features associated protein protein geometry. #120 #122
* [Feature] - #124 adds visualisation of vector features in 3D graph plots.
* [Feature] - #121 adds functions for saving graph data to PDB files.
* [Bugfix] - #136 changes generator comprehension when updating coordinates in subgraphs to list comprehension to allow pickling
* [Bugfix] - #136 fixes bug in edge construction functions using chain selections where nodes from unselected chains would be added to the graph.

#### Breaking Changes

Expand Down
3,789 changes: 1,896 additions & 1,893 deletions datasets/pscdb/process_data.ipynb

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
author = "Arian Jamasb"

# The full version, including alpha/beta/rc tags
release = "1.2.0"
release = "1.2.1"


# -- General configuration ---------------------------------------------------
Expand Down
2 changes: 1 addition & 1 deletion graphein/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,5 @@
__author__ = "Arian Jamasb <[email protected]>"


__version__ = "1.2.0" # get_versions()["version"]
__version__ = "1.2.1" # get_versions()["version"]
# del get_versions
10 changes: 9 additions & 1 deletion graphein/ml/conversion.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
"""Utilities for converting Graphein Networks to Geometric Deep Learning formats.
"""
# %%
# Graphein
# Author: Kexin Huang, Arian Jamasb <[email protected]>
# License: MIT
# Project Website: https://github.com/a-r-j/graphein
# Code Repository: https://github.com/a-r-j/graphein
from __future__ import annotations

from typing import List, Optional
Expand Down Expand Up @@ -272,7 +280,7 @@ def convert_nx_to_pyg(self, G: nx.Graph) -> Data:
@staticmethod
def convert_nx_to_nx(G: nx.Graph) -> nx.Graph:
"""
Converts NetworkX graph (``nx.Graph``) to NetworkX graph (``nx.Graph``) object. Redundant - returns itself
Converts NetworkX graph (``nx.Graph``) to NetworkX graph (``nx.Graph``) object. Redundant - returns itself.
:param G: NetworkX Graph
:type G: nx.Graph
Expand Down
43 changes: 32 additions & 11 deletions graphein/protein/edges/distance.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

import logging
from itertools import combinations
from typing import Dict, List, Optional, Tuple
from typing import Dict, List, Optional, Tuple, Union

import networkx as nx
import numpy as np
Expand Down Expand Up @@ -132,6 +132,9 @@ def add_hydrophobic_interactions(
hydrophobics_df = filter_dataframe(
rgroup_df, "residue_name", HYDROPHOBIC_RESIS, True
)
hydrophobics_df = filter_dataframe(
hydrophobics_df, "node_id", list(G.nodes()), True
)
distmat = compute_distmat(hydrophobics_df)
interacting_atoms = get_interacting_atoms(5, distmat)
add_interacting_resis(
Expand Down Expand Up @@ -180,6 +183,7 @@ def add_hydrogen_bond_interactions(
# For these atoms, find those that are within 3.5A of one another.
if rgroup_df is None:
rgroup_df = G.graph["rgroup_df"]
rgroup_df = filter_dataframe(rgroup_df, "node_id", list(G.nodes()), True)
HBOND_ATOMS = [
"ND", # histidine and asparagine
"NE", # glutamate, tryptophan, arginine, histidine
Expand Down Expand Up @@ -224,6 +228,7 @@ def add_ionic_interactions(
if rgroup_df is None:
rgroup_df = G.graph["rgroup_df"]
ionic_df = filter_dataframe(rgroup_df, "residue_name", IONIC_RESIS, True)
ionic_df = filter_dataframe(rgroup_df, "node_id", list(G.nodes()), True)
distmat = compute_distmat(ionic_df)
interacting_atoms = get_interacting_atoms(6, distmat)
add_interacting_resis(G, interacting_atoms, ionic_df, ["ionic"])
Expand Down Expand Up @@ -274,6 +279,9 @@ def add_aromatic_interactions(
dfs = []
for resi in AROMATIC_RESIS:
resi_rings_df = get_ring_atoms(pdb_df, resi)
resi_rings_df = filter_dataframe(
resi_rings_df, "node_id", list(G.nodes()), True
)
resi_centroid_df = get_ring_centroids(resi_rings_df)
dfs.append(resi_centroid_df)
aromatic_df = (
Expand Down Expand Up @@ -312,9 +320,12 @@ def add_aromatic_sulphur_interactions(
aromatic_sulphur_df = filter_dataframe(
rgroup_df, "residue_name", RESIDUES, True
)
aromatic_sulphur_df = filter_dataframe(
aromatic_sulphur_df, "node_id", list(G.nodes()), True
)
distmat = compute_distmat(aromatic_sulphur_df)
interacting_atoms = get_interacting_atoms(5.3, distmat)
interacting_atoms = zip(interacting_atoms[0], interacting_atoms[1])
interacting_atoms = list(zip(interacting_atoms[0], interacting_atoms[1]))

for (a1, a2) in interacting_atoms:
resi1 = aromatic_sulphur_df.loc[a1, "node_id"]
Expand All @@ -339,9 +350,12 @@ def add_cation_pi_interactions(
cation_pi_df = filter_dataframe(
rgroup_df, "residue_name", CATION_PI_RESIS, True
)
cation_pi_df = filter_dataframe(
cation_pi_df, "node_id", list(G.nodes()), True
)
distmat = compute_distmat(cation_pi_df)
interacting_atoms = get_interacting_atoms(6, distmat)
interacting_atoms = zip(interacting_atoms[0], interacting_atoms[1])
interacting_atoms = list(zip(interacting_atoms[0], interacting_atoms[1]))

for (a1, a2) in interacting_atoms:
resi1 = cation_pi_df.loc[a1, "node_id"]
Expand Down Expand Up @@ -411,7 +425,7 @@ def add_delaunay_triangulation(

tri = Delaunay(coords) # this is the triangulation
log.debug(
f"Detected {len(tri.simplices)} simplices in the Delaunay Triangulaton."
f"Detected {len(tri.simplices)} simplices in the Delaunay Triangulation."
)
for simplex in tri.simplices:
nodes = [node_map[s] for s in simplex]
Expand Down Expand Up @@ -439,10 +453,14 @@ def add_distance_threshold(
:type threshold: float
:return: Graph with distance-based edges added
"""
dist_mat = compute_distmat(G.graph["pdb_df"])
pdb_df = filter_dataframe(
G.graph["pdb_df"], "node_id", list(G.nodes()), True
)
dist_mat = compute_distmat(pdb_df)
interacting_nodes = get_interacting_atoms(threshold, distmat=dist_mat)
interacting_nodes = zip(interacting_nodes[0], interacting_nodes[1])

log.info(f"Found: {len(list(interacting_nodes))} distance edges")
for a1, a2 in interacting_nodes:
n1 = G.graph["pdb_df"].loc[a1, "node_id"]
n2 = G.graph["pdb_df"].loc[a2, "node_id"]
Expand All @@ -456,7 +474,7 @@ def add_distance_threshold(
abs(n1_position - n2_position) > long_interaction_threshold
)

if condition_1 or (condition_2 and not condition_1):
if condition_1 or condition_2:
if G.has_edge(n1, n2):
G.edges[n1, n2]["kind"].add("distance_threshold")
else:
Expand Down Expand Up @@ -498,7 +516,10 @@ def add_k_nn_edges(
:return: Graph with knn-based edges added
:rtype: nx.Graph
"""
dist_mat = compute_distmat(G.graph["pdb_df"])
pdb_df = filter_dataframe(
G.graph["pdb_df"], "node_id", list(G.nodes()), True
)
dist_mat = compute_distmat(pdb_df)

nn = kneighbors_graph(
X=dist_mat,
Expand All @@ -512,8 +533,8 @@ def add_k_nn_edges(
# Create iterable of node indices
outgoing = np.repeat(np.array(range(len(G.graph["pdb_df"]))), k)
incoming = nn.indices
interacting_nodes = zip(outgoing, incoming)

interacting_nodes = list(zip(outgoing, incoming))
log.info(f"Found: {len(interacting_nodes)} KNN edges")
for a1, a2 in interacting_nodes:
# Get nodes IDs from indices
n1 = G.graph["pdb_df"].loc[a1, "node_id"]
Expand All @@ -536,7 +557,7 @@ def add_k_nn_edges(

# If not on same chain add edge or
# If on same chain and separation is sufficient add edge
if condition_1 or (condition_2 and not condition_1):
if condition_1 or condition_2:
if G.has_edge(n1, n2):
G.edges[n1, n2]["kind"].add("k_nn")
else:
Expand Down Expand Up @@ -639,7 +660,7 @@ def node_coords(G: nx.Graph, n: str) -> Tuple[float, float, float]:

def add_interacting_resis(
G: nx.Graph,
interacting_atoms: np.array,
interacting_atoms: np.ndarray,
dataframe: pd.DataFrame,
kind: List[str],
):
Expand Down
2 changes: 1 addition & 1 deletion graphein/protein/subgraphs.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def extract_subgraph_from_node_list(
]
if update_coords:
g.graph["coords"] = np.array(
d["coords"] for d in g.nodes(data=True)
[d["coords"] for _, d in g.nodes(data=True)]
)
if recompute_distmat:
if not filter_dataframe:
Expand Down
Loading

0 comments on commit 2a3b4df

Please sign in to comment.