Skip to content

Commit

Permalink
fix: improve support for newer dssp versions #403
Browse files Browse the repository at this point in the history
  • Loading branch information
a-r-j committed Jul 24, 2024
1 parent 27463a5 commit 94d832b
Showing 1 changed file with 28 additions and 13 deletions.
41 changes: 28 additions & 13 deletions graphein/protein/features/nodes/dssp.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import networkx as nx
import pandas as pd
from Bio.PDB.DSSP import dssp_dict_from_pdb_file, residue_max_acc
from loguru import logger

from graphein.protein.resi_atoms import STANDARD_AMINO_ACID_MAPPING_1_TO_3
from graphein.protein.utils import save_pdb_df_to_pdb
Expand Down Expand Up @@ -79,12 +80,13 @@ def add_dssp_df(
:param G: Input protein graph
:param G: nx.Graph
:param dssp_config: DSSPConfig object. Specifies which executable to run. Located in graphein.protein.config
:param dssp_config: DSSPConfig object. Specifies which executable to run.
Located in `:obj:graphein.protein.config`.
:type dssp_config: DSSPConfig, optional
:return: Protein graph with DSSP dataframe added
:rtype: nx.Graph
"""

# if dssp_config is None:
config = G.graph["config"]
pdb_code = G.graph["pdb_code"]
path = G.graph["path"]
Expand All @@ -107,14 +109,14 @@ def add_dssp_df(
if os.path.isfile(config.pdb_dir / (pdb_code + ".pdb")):
pdb_file = config.pdb_dir / (pdb_code + ".pdb")

# get dssp version string
dssp_version = re.search(
r"version ([\d\.]+)", os.popen(f"{executable} --version").read()
).group(
1
) # e.g. "4.0.4"
# Check for existence of pdb file. If not, reconstructs it from the raw df.
if pdb_file:
# get dssp version string
dssp_version = re.search(
r"version ([\d\.]+)", os.popen(f"{executable} --version").read()
).group(
1
) # e.g. "4.0.4"
dssp_dict = dssp_dict_from_pdb_file(
pdb_file, DSSP=executable, dssp_version=dssp_version
)
Expand All @@ -124,17 +126,30 @@ def add_dssp_df(
G.graph["raw_pdb_df"], tmpdirname + f"/{pdb_name}.pdb"
)
dssp_dict = dssp_dict_from_pdb_file(
tmpdirname + f"/{pdb_name}.pdb", DSSP=executable
tmpdirname + f"/{pdb_name}.pdb",
DSSP=executable,
dssp_version=dssp_version,
)

# Newer versions of Biopython return a tuple
if isinstance(dssp_dict, tuple):
dssp_dict = dssp_dict[0]

if len(dssp_dict) == 0:
raise ValueError(
"DSSP could not be calculated. Check DSSP version "
f"({dssp_version}) orthat the input PDB file is valid."
)

if config.verbose:
print(f"Using DSSP executable '{executable}'")
logger.debug(f"Using DSSP executable '{executable}'")

dssp_dict = parse_dssp_df(dssp_dict)
# Convert 1 letter aa code to 3 letter
dssp_dict["aa"] = dssp_dict["aa"].map(STANDARD_AMINO_ACID_MAPPING_1_TO_3)

# Resolve UNKs NOTE: the original didn't work if HETATM residues exist in DSSP output
# Resolve UNKs
# NOTE: the original didn't work if HETATM residues exist in DSSP output
_raw_pdb_df = G.graph["raw_pdb_df"].copy().drop_duplicates("node_id")
_dssp_df_unk = dssp_dict.loc[dssp_dict["aa"] == "UNK"][
["chain", "resnum", "icode"]
Expand Down Expand Up @@ -177,7 +192,7 @@ def add_dssp_df(
dssp_dict.set_index("node_id", inplace=True)

if config.verbose:
print(dssp_dict)
logger.debug(dssp_dict)

# Assign DSSP Dict
G.graph["dssp_df"] = dssp_dict
Expand Down Expand Up @@ -241,7 +256,7 @@ def add_dssp_feature(G: nx.Graph, feature: str) -> nx.Graph:
nx.set_node_attributes(G, dict(dssp_df[feature]), feature)

if config.verbose:
print("Added " + feature + " features to graph nodes")
logger.debug("Added " + feature + " features to graph nodes")

return G

Expand Down

0 comments on commit 94d832b

Please sign in to comment.