Documentation and a minor Trajectory improvement

AndrewRadev · Dec 10, 2024 · c3696e6 · c3696e6
1 parent 9119336
commit c3696e6
Show file tree

Hide file tree

Showing 8 changed files with 95 additions and 22 deletions.
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -26,6 +26,14 @@
     ]
 # autosummary_generate = True  # Turn on sphinx.ext.autosummary
 autoapi_dirs = ['../../lib']
+autoapi_options = [
+    'members',
+    'undoc-members',
+    'private-members',
+    'show-inheritance',
+    'special-members',
+    'imported-members',
+]
 
 templates_path = ['_templates']
 exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']

diff --git a/lib/normal_modes.py b/lib/normal_modes.py
@@ -2,6 +2,9 @@
 Generating normal modes, reading and writing mode data from/to files.
 """
 
+# Allows us to refer to the current class in a type signature:
+from __future__ import annotations
+
 import numpy as np
 from pathlib import Path
 from prody import parsePDB, writeNMD, EDA, Ensemble
@@ -18,7 +21,7 @@ def generate_nmd_from_pdb(pdb_path: Path|str, nmd_path: Path|str, mode_count=10)
     This can be an expensive process, which is why it's encapsulated in a
     function that goes from file to file and doesn't produce an in-memory
     structure. To work with the resulting normal modes, use
-    `NormalModes.from_nmd`. which is a cheap operation.
+    :func:`NormalModes.from_nmd`, which is a cheap operation.
     """
     pdb_path = str(pdb_path)
     nmd_path = str(nmd_path)
@@ -44,21 +47,35 @@ def generate_nmd_from_pdb(pdb_path: Path|str, nmd_path: Path|str, mode_count=10)
 
 
 class ValidationError(Exception):
+    """
+    Represents a mismatch in normal mode information
+
+    Example: the number of coordinates given versus the number of vectors in
+    the mode.
+    """
     pass
 
 
 class NormalModes:
+    """
+    An object that represents a collection of normal modes
+
+    Primarily parsed from an NMD file generated by the
+    :func:`generate_nmd_from_pdb` function, but can be constructed manually.
+    """
+
     @staticmethod
-    def from_nmd(nmd_path: Path|str):
+    def from_nmd(nmd_path: Path|str) -> NormalModes:
+        """
+        Create a :class:`NormalModes` instance by parsing the given input file.
+        A fast operation.
+        """
         nm = NormalModes()
         nm.parse_nmd_file(nmd_path)
 
         return nm
 
     def __init__(self):
-        """
-        NormalModes object with input parameters.
-        """
         self.coordinates = None
         self.atomnames = None
         self.resnames = None
@@ -67,12 +84,13 @@ def __init__(self):
 
     def parse_nmd_file(self, nmd_path):
         """
-        Parse the NMD file and extract atomnames, resnames, resids,
-        coordinates, and modes. Structure:
+        Parse the given NMD file and extract atomnames, resnames, resids,
+        coordinates, and modes into a :class:`NormalModes` object. Expected
+        structure::
 
             atomnames CA CA CA ...
             resnames SER ARG LEU ...
-            resids 0 1 2 3 4 5 6 7 8 11 12 ... <- Note: possible to have gaps
+            resids 0 1 2 3 4 5 6 7 8 11 12 ...
             coordinates 54.260 50.940 73.060 ...
             mode 1 29.61 -0.008 -0.005 0.012 ...
             mode 2 18.28 -0.009 0.004 -0.008 ...
@@ -174,4 +192,7 @@ def _validate_modes(self):
                 raise ValidationError(message)
 
     def _group_in_threes(self, flat_coordinates):
+        """
+        A utility function that groups sequences of values into triplets.
+        """
         return list(util.batched(flat_coordinates, n=3, strict=True))
diff --git a/lib/segmentation/__init__.py b/lib/segmentation/__init__.py
@@ -10,6 +10,10 @@
 
 
 class ParsingError(Exception):
+    """
+    Raised when an issue was encountered during parsing the inputs. Will
+    contain as ``__cause__`` the original exception that triggered it.
+    """
     pass
 
 
@@ -32,13 +36,13 @@ def __init__(self, *paths: list[Path|str]):
     @abstractmethod
     def parse(self) -> Iterator[Tuple[str, int, str]]:
         """
-        The return value is a collection of items of the form:
+        The return value is a collection of items of the form::
 
             ("<method name>", <number of domains>, "<chopping>")
 
         They could be collected into a list or yield-ed. The
-        `write_segmentations` function only expects that this method returns an
-        iterable object.
+        :func:`write_segmentations` function only expects that this method
+        returns an iterable object.
         """
         raise NotImplementedError
 

diff --git a/lib/segmentation/chainsaw.py b/lib/segmentation/chainsaw.py
@@ -1,9 +1,9 @@
 """
 Chainsaw predicts domain segmentation based on a deep learning model. It uses
-the `stride` tool to determine secondary structure, and produces a TSV file
+the ``stride`` tool to determine secondary structure, and produces a TSV file
 with a description of the output.
 
-Source: <https://github.com/JudeWells/Chainsaw>
+Source: https://github.com/JudeWells/Chainsaw
 """
 
 import csv

diff --git a/lib/segmentation/geostas.py b/lib/segmentation/geostas.py
@@ -6,7 +6,7 @@
 external project itself.
 
 Documentation can be found at:
-<http://thegrantlab.org/bio3d/reference/geostas.html>
+http://thegrantlab.org/bio3d/reference/geostas.html
 """
 
 import json
@@ -47,8 +47,9 @@ def parse(self) -> Iterator[Tuple[str, int, str]]:
     def _translate_atoms_to_residues(self, atom_data, atom_groups):
         """
         The output of GeoStaS is (alpha carbon) atom indices (1-indexed) while
-        we need residues. We need to translate the sequential atom index into
-        the residue it corresponds to by using an MDAnalysis universe.
+        the output of the parser is in residues. This method translates the
+        sequential atom indices into the residues they correspond to by using
+        an MDAnalysis universe.
         """
         return [
             [
@@ -60,8 +61,8 @@ def _translate_atoms_to_residues(self, atom_data, atom_groups):
 
     def _generate_chopping(self, residue_groups):
         """
-        Input: [[1, 2, 3], [10, 11, 20, 21], ...]
-        Output: 1-3,10-11_20,21,...
+        Input: ``[[1, 2, 3], [10, 11, 20, 21], ...]``
+        Output: ``1-3,10-11_20,21,...``
         """
         # Structure: { group: [(start, end), (start, end), ...] }
         groupings = {}

diff --git a/lib/segmentation/merizo.py b/lib/segmentation/merizo.py
@@ -3,8 +3,8 @@
 we're using our own fork to fix an issue with recognizing certain residue codes
 like HIP and CYX.
 
-Original source: <https://github.com/psipred/Merizo>
-Fork source:     <https://github.com/AndrewRadev/Merizo>
+* Original source: https://github.com/psipred/Merizo
+* Fork source: https://github.com/AndrewRadev/Merizo
 """
 
 import csv

diff --git a/lib/trajectory.py b/lib/trajectory.py
@@ -11,7 +11,7 @@
 
 class Trajectory:
     """
-    A Trajectory object is a wrapper for the MDAnalysis Universe class. It
+    A Trajectory object is a wrapper for the MDAnalysis ``Universe`` class. It
     encapsulates some MDA-specific function calls to make it simpler to use for
     the purposes of this codebase.
 
@@ -26,6 +26,11 @@ class Trajectory:
 
     @staticmethod
     def from_paths(topology_path, trajectory_path=None):
+        """
+        Build a :class:`Trajectory` object from a topology file and a
+        trajectory file. Mirrors the MDAnalysis Universe construction method.
+        """
+
         if trajectory_path:
             mda_universe = mda.Universe(topology_path, trajectory_path)
         else:
@@ -35,6 +40,22 @@ def from_paths(topology_path, trajectory_path=None):
 
     @staticmethod
     def from_ca_frames(data, topology_attr={}):
+        """
+        A shortcut to create an in-memory MDAnalysis Universe with the given
+        data as a list of coordinate matrices, each a list of triplets.
+
+        Each coordinate is considered to be an alpha carbon, so the number of
+        residues is set to the number of given atoms. Some additional
+        (optional) topology attributes you might provide:
+
+        * names: atom names that match the coordinates (default to "CA")
+        * resids: residue ids that correspond to the given atoms
+        * resnames: residue names that correspond to the given residue ids
+
+        All other topology attributes are passed along to the MDAnalysis
+        Universe method ``add_TopologyAttr``.
+        """
+
         frame_count = len(data)
         n_atoms     = len(data[0])
 
@@ -46,6 +67,9 @@ def from_ca_frames(data, topology_attr={}):
             trajectory=True,
         )
 
+        if 'names' not in topology_attr:
+            topology_attr['names'] = ['CA'] * n_atoms
+
         for key, value in topology_attr.items():
             u.add_TopologyAttr(key, value)
 
@@ -65,15 +89,31 @@ def frames(self):
         return self.mda_universe.trajectory
 
     def __next__(self):
+        """
+        Delegates to MDAnalysis to shift the internal trajectory of the
+        Universe forward.
+        """
         return next(self.mda_universe.trajectory)
 
     def select_atoms(self, *args):
+        """
+        Delegates to MDAnalysis to return an AtomGroup.
+        """
         return self.mda_universe.select_atoms(*args)
 
     def write_static(self, path: Path|str, selection='all'):
+        """
+        Write the static coordinates of the current frame into the given file.
+        It's expected that it's a PDB, but anything that MDAnalysis accepts
+        will work.
+        """
         atoms = self.select_atoms(selection)
         atoms.write(str(path))
 
     def write_frames(self, path: Path|str, selection='all'):
+        """
+        Write the full trajectory to a file. It's expected that it's a PDB, but
+        anything that MDAnalysis accepts will work.
+        """
         atoms = self.select_atoms(selection)
         atoms.write(str(path), frames='all')
diff --git a/tests/segmentation_test.py b/tests/segmentation_test.py
@@ -131,7 +131,6 @@ def _create_pdb_file(self, name, atom_count):
         coordinates = np.zeros((atom_count, 3))
 
         trajectory = Trajectory.from_ca_frames([coordinates], topology_attr={
-            'names': ['CA'] * atom_count,
             'resids': np.arange(atom_count) + 1,
         })