Skip to content

Commit

Permalink
Documentation and a minor Trajectory improvement
Browse files Browse the repository at this point in the history
  • Loading branch information
AndrewRadev committed Dec 10, 2024
1 parent 9119336 commit c3696e6
Show file tree
Hide file tree
Showing 8 changed files with 95 additions and 22 deletions.
8 changes: 8 additions & 0 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,14 @@
]
# autosummary_generate = True # Turn on sphinx.ext.autosummary
autoapi_dirs = ['../../lib']
autoapi_options = [
'members',
'undoc-members',
'private-members',
'show-inheritance',
'special-members',
'imported-members',
]

templates_path = ['_templates']
exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
Expand Down
37 changes: 29 additions & 8 deletions lib/normal_modes.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
Generating normal modes, reading and writing mode data from/to files.
"""

# Allows us to refer to the current class in a type signature:
from __future__ import annotations

import numpy as np
from pathlib import Path
from prody import parsePDB, writeNMD, EDA, Ensemble
Expand All @@ -18,7 +21,7 @@ def generate_nmd_from_pdb(pdb_path: Path|str, nmd_path: Path|str, mode_count=10)
This can be an expensive process, which is why it's encapsulated in a
function that goes from file to file and doesn't produce an in-memory
structure. To work with the resulting normal modes, use
`NormalModes.from_nmd`. which is a cheap operation.
:func:`NormalModes.from_nmd`, which is a cheap operation.
"""
pdb_path = str(pdb_path)
nmd_path = str(nmd_path)
Expand All @@ -44,21 +47,35 @@ def generate_nmd_from_pdb(pdb_path: Path|str, nmd_path: Path|str, mode_count=10)


class ValidationError(Exception):
"""
Represents a mismatch in normal mode information
Example: the number of coordinates given versus the number of vectors in
the mode.
"""
pass


class NormalModes:
"""
An object that represents a collection of normal modes
Primarily parsed from an NMD file generated by the
:func:`generate_nmd_from_pdb` function, but can be constructed manually.
"""

@staticmethod
def from_nmd(nmd_path: Path|str):
def from_nmd(nmd_path: Path|str) -> NormalModes:
"""
Create a :class:`NormalModes` instance by parsing the given input file.
A fast operation.
"""
nm = NormalModes()
nm.parse_nmd_file(nmd_path)

return nm

def __init__(self):
"""
NormalModes object with input parameters.
"""
self.coordinates = None
self.atomnames = None
self.resnames = None
Expand All @@ -67,12 +84,13 @@ def __init__(self):

def parse_nmd_file(self, nmd_path):
"""
Parse the NMD file and extract atomnames, resnames, resids,
coordinates, and modes. Structure:
Parse the given NMD file and extract atomnames, resnames, resids,
coordinates, and modes into a :class:`NormalModes` object. Expected
structure::
atomnames CA CA CA ...
resnames SER ARG LEU ...
resids 0 1 2 3 4 5 6 7 8 11 12 ... <- Note: possible to have gaps
resids 0 1 2 3 4 5 6 7 8 11 12 ...
coordinates 54.260 50.940 73.060 ...
mode 1 29.61 -0.008 -0.005 0.012 ...
mode 2 18.28 -0.009 0.004 -0.008 ...
Expand Down Expand Up @@ -174,4 +192,7 @@ def _validate_modes(self):
raise ValidationError(message)

def _group_in_threes(self, flat_coordinates):
"""
A utility function that groups sequences of values into triplets.
"""
return list(util.batched(flat_coordinates, n=3, strict=True))
10 changes: 7 additions & 3 deletions lib/segmentation/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@


class ParsingError(Exception):
"""
Raised when an issue was encountered during parsing the inputs. Will
contain as ``__cause__`` the original exception that triggered it.
"""
pass


Expand All @@ -32,13 +36,13 @@ def __init__(self, *paths: list[Path|str]):
@abstractmethod
def parse(self) -> Iterator[Tuple[str, int, str]]:
"""
The return value is a collection of items of the form:
The return value is a collection of items of the form::
("<method name>", <number of domains>, "<chopping>")
They could be collected into a list or yield-ed. The
`write_segmentations` function only expects that this method returns an
iterable object.
:func:`write_segmentations` function only expects that this method
returns an iterable object.
"""
raise NotImplementedError

Expand Down
4 changes: 2 additions & 2 deletions lib/segmentation/chainsaw.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
"""
Chainsaw predicts domain segmentation based on a deep learning model. It uses
the `stride` tool to determine secondary structure, and produces a TSV file
the ``stride`` tool to determine secondary structure, and produces a TSV file
with a description of the output.
Source: <https://github.com/JudeWells/Chainsaw>
Source: https://github.com/JudeWells/Chainsaw
"""

import csv
Expand Down
11 changes: 6 additions & 5 deletions lib/segmentation/geostas.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
external project itself.
Documentation can be found at:
<http://thegrantlab.org/bio3d/reference/geostas.html>
http://thegrantlab.org/bio3d/reference/geostas.html
"""

import json
Expand Down Expand Up @@ -47,8 +47,9 @@ def parse(self) -> Iterator[Tuple[str, int, str]]:
def _translate_atoms_to_residues(self, atom_data, atom_groups):
"""
The output of GeoStaS is (alpha carbon) atom indices (1-indexed) while
we need residues. We need to translate the sequential atom index into
the residue it corresponds to by using an MDAnalysis universe.
the output of the parser is in residues. This method translates the
sequential atom indices into the residues they correspond to by using
an MDAnalysis universe.
"""
return [
[
Expand All @@ -60,8 +61,8 @@ def _translate_atoms_to_residues(self, atom_data, atom_groups):

def _generate_chopping(self, residue_groups):
"""
Input: [[1, 2, 3], [10, 11, 20, 21], ...]
Output: 1-3,10-11_20,21,...
Input: ``[[1, 2, 3], [10, 11, 20, 21], ...]``
Output: ``1-3,10-11_20,21,...``
"""
# Structure: { group: [(start, end), (start, end), ...] }
groupings = {}
Expand Down
4 changes: 2 additions & 2 deletions lib/segmentation/merizo.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
we're using our own fork to fix an issue with recognizing certain residue codes
like HIP and CYX.
Original source: <https://github.com/psipred/Merizo>
Fork source: <https://github.com/AndrewRadev/Merizo>
* Original source: https://github.com/psipred/Merizo
* Fork source: https://github.com/AndrewRadev/Merizo
"""

import csv
Expand Down
42 changes: 41 additions & 1 deletion lib/trajectory.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

class Trajectory:
"""
A Trajectory object is a wrapper for the MDAnalysis Universe class. It
A Trajectory object is a wrapper for the MDAnalysis ``Universe`` class. It
encapsulates some MDA-specific function calls to make it simpler to use for
the purposes of this codebase.
Expand All @@ -26,6 +26,11 @@ class Trajectory:

@staticmethod
def from_paths(topology_path, trajectory_path=None):
"""
Build a :class:`Trajectory` object from a topology file and a
trajectory file. Mirrors the MDAnalysis Universe construction method.
"""

if trajectory_path:
mda_universe = mda.Universe(topology_path, trajectory_path)
else:
Expand All @@ -35,6 +40,22 @@ def from_paths(topology_path, trajectory_path=None):

@staticmethod
def from_ca_frames(data, topology_attr={}):
"""
A shortcut to create an in-memory MDAnalysis Universe with the given
data as a list of coordinate matrices, each a list of triplets.
Each coordinate is considered to be an alpha carbon, so the number of
residues is set to the number of given atoms. Some additional
(optional) topology attributes you might provide:
* names: atom names that match the coordinates (default to "CA")
* resids: residue ids that correspond to the given atoms
* resnames: residue names that correspond to the given residue ids
All other topology attributes are passed along to the MDAnalysis
Universe method ``add_TopologyAttr``.
"""

frame_count = len(data)
n_atoms = len(data[0])

Expand All @@ -46,6 +67,9 @@ def from_ca_frames(data, topology_attr={}):
trajectory=True,
)

if 'names' not in topology_attr:
topology_attr['names'] = ['CA'] * n_atoms

for key, value in topology_attr.items():
u.add_TopologyAttr(key, value)

Expand All @@ -65,15 +89,31 @@ def frames(self):
return self.mda_universe.trajectory

def __next__(self):
"""
Delegates to MDAnalysis to shift the internal trajectory of the
Universe forward.
"""
return next(self.mda_universe.trajectory)

def select_atoms(self, *args):
"""
Delegates to MDAnalysis to return an AtomGroup.
"""
return self.mda_universe.select_atoms(*args)

def write_static(self, path: Path|str, selection='all'):
"""
Write the static coordinates of the current frame into the given file.
It's expected that it's a PDB, but anything that MDAnalysis accepts
will work.
"""
atoms = self.select_atoms(selection)
atoms.write(str(path))

def write_frames(self, path: Path|str, selection='all'):
"""
Write the full trajectory to a file. It's expected that it's a PDB, but
anything that MDAnalysis accepts will work.
"""
atoms = self.select_atoms(selection)
atoms.write(str(path), frames='all')
1 change: 0 additions & 1 deletion tests/segmentation_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,6 @@ def _create_pdb_file(self, name, atom_count):
coordinates = np.zeros((atom_count, 3))

trajectory = Trajectory.from_ca_frames([coordinates], topology_attr={
'names': ['CA'] * atom_count,
'resids': np.arange(atom_count) + 1,
})

Expand Down

0 comments on commit c3696e6

Please sign in to comment.