Updated docstrings and harmonzied type comparisons

KosinskiLab · Jul 31, 2024 · 928acd7 · 928acd7
1 parent b93a452
commit 928acd7
Show file tree

Hide file tree

Showing 18 changed files with 218 additions and 41 deletions.
diff --git a/tme/backends/_jax_utils.py b/tme/backends/_jax_utils.py
@@ -0,0 +1,186 @@
+""" Utility functions for jax backend.
+
+    Copyright (c) 2023-2024 European Molecular Biology Laboratory
+
+    Author: Valentin Maurer <[email protected]>
+"""
+from typing import Tuple
+from functools import partial
+
+import jax.numpy as jnp
+from jax import pmap, lax
+
+from ..types import BackendArray
+from ..backends import backend as be
+from ..matching_utils import normalize_template as _normalize_template
+
+
+def _correlate(template: BackendArray, ft_target: BackendArray) -> BackendArray:
+    """
+    Computes :py:meth:`tme.matching_exhaustive.cc_setup`.
+    """
+    template_ft = jnp.fft.rfftn(template)
+    template_ft = template_ft.at[:].multiply(ft_target)
+    correlation = jnp.fft.irfftn(template_ft)
+    return correlation
+
+
+def _flc_scoring(
+    template: BackendArray,
+    template_mask: BackendArray,
+    ft_target: BackendArray,
+    ft_target2: BackendArray,
+    n_observations: BackendArray,
+    eps: float,
+    **kwargs,
+) -> BackendArray:
+    """
+    Computes :py:meth:`tme.matching_exhaustive.flc_scoring`.
+    """
+    correlation = _correlate(template=template, ft_target=ft_target)
+    inv_denominator = _reciprocal_target_std(
+        ft_target=ft_target,
+        ft_target2=ft_target2,
+        template_mask=template_mask,
+        eps=eps,
+        n_observations=n_observations,
+    )
+    correlation = correlation.at[:].multiply(inv_denominator)
+    return correlation
+
+
+def _flcSphere_scoring(
+    template: BackendArray,
+    ft_target: BackendArray,
+    inv_denominator: BackendArray,
+    **kwargs,
+) -> BackendArray:
+    """
+    Computes :py:meth:`tme.matching_exhaustive.flc_scoring`.
+    """
+    correlation = _correlate(template=template, ft_target=ft_target)
+    correlation = correlation.at[:].multiply(inv_denominator)
+    return correlation
+
+
+def _reciprocal_target_std(
+    ft_target: BackendArray,
+    ft_target2: BackendArray,
+    template_mask: BackendArray,
+    n_observations: float,
+    eps: float,
+) -> BackendArray:
+    """
+    Computes reciprocal standard deviation of a target given a mask.
+
+    See Also
+    --------
+    :py:meth:`tme.matching_exhaustive.flc_scoring`.
+    """
+    ft_template_mask = jnp.fft.rfftn(template_mask)
+
+    # E(X^2)- E(X)^2
+    exp_sq = jnp.fft.irfftn(ft_target2 * ft_template_mask)
+    exp_sq = exp_sq.at[:].divide(n_observations)
+
+    ft_template_mask = ft_template_mask.at[:].multiply(ft_target)
+    sq_exp = jnp.fft.irfftn(ft_template_mask)
+    sq_exp = sq_exp.at[:].divide(n_observations)
+    sq_exp = sq_exp.at[:].power(2)
+
+    exp_sq = exp_sq.at[:].add(-sq_exp)
+    exp_sq = exp_sq.at[:].max(0)
+    exp_sq = exp_sq.at[:].power(0.5)
+
+    exp_sq = exp_sq.at[:].set(
+        jnp.where(exp_sq <= eps, 0, jnp.reciprocal(exp_sq * n_observations))
+    )
+    return exp_sq
+
+
+def _apply_fourier_filter(arr: BackendArray, arr_filter: BackendArray) -> BackendArray:
+    arr_ft = jnp.fft.rfftn(arr)
+    arr_ft = arr_ft.at[:].multiply(arr_filter)
+    return arr.at[:].set(jnp.fft.irfftn(arr_ft, s=arr.shape))
+
+
+def _identity(arr: BackendArray, arr_filter: BackendArray) -> BackendArray:
+    return arr
+
+
+@partial(
+    pmap,
+    in_axes=(0,) + (None,) * 6,
+    static_broadcasted_argnums=[6, 7],
+)
+def scan(
+    target: BackendArray,
+    template: BackendArray,
+    template_mask: BackendArray,
+    rotations: BackendArray,
+    template_filter: BackendArray,
+    target_filter: BackendArray,
+    fast_shape: Tuple[int],
+    rotate_mask: bool,
+) -> Tuple[BackendArray, BackendArray]:
+    eps = jnp.finfo(template.dtype).resolution
+
+    if hasattr(target_filter, "shape"):
+        target = _apply_fourier_filter(target, target_filter)
+
+    ft_target = jnp.fft.rfftn(target)
+    ft_target2 = jnp.fft.rfftn(jnp.square(target))
+    inv_denominator, target, scoring_func = None, None, _flc_scoring
+    if not rotate_mask:
+        n_observations = jnp.sum(template_mask)
+        inv_denominator = _reciprocal_target_std(
+            ft_target=ft_target,
+            ft_target2=ft_target2,
+            template_mask=be.topleft_pad(template_mask, fast_shape),
+            eps=eps,
+            n_observations=n_observations,
+        )
+        ft_target2, scoring_func = None, _flcSphere_scoring
+
+    _template_filter_func = _identity
+    if template_filter.shape != ():
+        _template_filter_func = _apply_fourier_filter
+
+    def _sample_transform(ret, rotation_matrix):
+        max_scores, rotations, index = ret
+        template_rot, template_mask_rot = be.rigid_transform(
+            arr=template,
+            arr_mask=template_mask,
+            rotation_matrix=rotation_matrix,
+            order=1,  # thats all we get for now
+        )
+
+        n_observations = jnp.sum(template_mask_rot)
+        template_rot = _template_filter_func(template_rot, template_filter)
+        template_rot = _normalize_template(
+            template_rot, template_mask_rot, n_observations
+        )
+        template_rot = be.topleft_pad(template_rot, fast_shape)
+        template_mask_rot = be.topleft_pad(template_mask_rot, fast_shape)
+
+        scores = scoring_func(
+            template=template_rot,
+            template_mask=template_mask_rot,
+            ft_target=ft_target,
+            ft_target2=ft_target2,
+            inv_denominator=inv_denominator,
+            n_observations=n_observations,
+            eps=eps,
+        )
+        max_scores, rotations = be.max_score_over_rotations(
+            scores, max_scores, rotations, index
+        )
+        return (max_scores, rotations, index + 1), None
+
+    score_space = jnp.zeros(fast_shape)
+    rotation_space = jnp.full(shape=fast_shape, dtype=jnp.int32, fill_value=-1)
+    (score_space, rotation_space, _), _ = lax.scan(
+        _sample_transform, (score_space, rotation_space, 0), rotations
+    )
+
+    return score_space, rotation_space
diff --git a/tme/density.py b/tme/density.py
@@ -113,7 +113,7 @@ def __init__(
         self.metadata = metadata
 
     def __repr__(self):
-        response = "Density object at {}\nOrigin: {}, sampling_rate: {}, Shape: {}"
+        response = "Density object at {}\nOrigin: {}, Sampling Rate: {}, Shape: {}"
         return response.format(
             hex(id(self)),
             tuple(np.round(self.origin, 3)),

diff --git a/tme/matching_data.py b/tme/matching_data.py
@@ -98,8 +98,7 @@ def _load_array(arr: NDArray):
         NDArray
             Loaded array.
         """
-
-        if type(arr) == np.memmap:
+        if isinstance(arr, np.memmap):
             return np.memmap(arr.filename, mode="r", shape=arr.shape, dtype=arr.dtype)
         return arr
 
@@ -153,13 +152,13 @@ def subset_array(
         arr_slice = tuple(slice(*pos) for pos in zip(arr_start, arr_stop))
         arr_mesh = self._slice_to_mesh(arr_slice, arr.shape)
 
-        if type(arr) == Density:
+        if isinstance(arr, Density):
             if isinstance(arr.data, np.memmap):
                 arr = Density.from_file(arr.data.filename, subset=arr_slice).data
             else:
                 arr = np.asarray(arr.data[*arr_mesh])
         else:
-            if type(arr) == np.memmap:
+            if isinstance(arr, np.memmap):
                 arr = np.memmap(
                     arr.filename, mode="r", shape=arr.shape, dtype=arr.dtype
                 )

diff --git a/tme/matching_scores.py b/tme/matching_scores.py
@@ -860,8 +860,8 @@ def mcc_scoring(
         tol = 1e3 * eps * be.max(be.abs(temp2), axis=axes, keepdims=True)
 
         temp2[temp2 < tol] = 1
-        be.divide(numerator, temp2, out=temp)
-        be.clip(temp, a_min=-1, a_max=1, out=temp)
+        temp = be.divide(numerator, temp2, out=temp)
+        temp = be.clip(temp, a_min=-1, a_max=1, out=temp)
 
         # Apply overlap ratio threshold
         number_px_threshold = overlap_ratio * be.max(

diff --git a/tme/matching_utils.py b/tme/matching_utils.py
@@ -166,7 +166,7 @@ def memmap_to_array(arr: NDArray) -> NDArray:
     obj:`numpy.ndarray`
         In-memory version of ``arr``.
     """
-    if type(arr) == np.memmap:
+    if isinstance(arr, np.memmap):
         memmap_filepath = arr.filename
         arr = np.array(arr)
         os.remove(memmap_filepath)

diff --git a/tme/memory.py b/tme/memory.py
@@ -14,12 +14,7 @@
 
 class MatchingMemoryUsage(ABC):
     """
-    Base class for estimating the memory usage of template matching.
-
-    This class provides a template for estimating memory usage for
-    different matching methods. Users should subclass it and implement the
-    `base_usage` and `per_fork` methods to specify custom memory usage
-    estimates.
+    Class specification for estimating the memory requirements of template matching.
 
     Parameters
     ----------
@@ -80,7 +75,7 @@ def per_fork(self) -> int:
 
 class CCMemoryUsage(MatchingMemoryUsage):
     """
-    Memory usage estimation for the CC fitter.
+    Memory usage estimation for CC scoring.
 
     See Also
     --------

diff --git a/tme/orientations.py b/tme/orientations.py
@@ -62,10 +62,10 @@ class Orientations:
         Array with additional orientation details (n, ).
     """
 
-    #: Array with translations of each orientation (n x d).
+    #: Array with translations of each orientation (n, d).
     translations: np.ndarray
 
-    #: Array with zyx euler angles of each orientation (n x d).
+    #: Array with zyx euler angles of each orientation (n, d).
     rotations: np.ndarray
 
     #: Array with scores of each orientation (n, ).
@@ -158,7 +158,7 @@ def to_file(self, filename: str, file_format: type = None, **kwargs) -> None:
             the file_format from the typical extension. Supported formats are
 
             +---------------+----------------------------------------------------+
-            | text          | pyTME's standard tab-separated orientations file   |
+            | text          | pytme's standard tab-separated orientations file   |
             +---------------+----------------------------------------------------+
             | relion        | Creates a STAR file of orientations                |
             +---------------+----------------------------------------------------+

diff --git a/tme/parser.py b/tme/parser.py
@@ -137,8 +137,7 @@ def parse_input(self, lines: List[str]) -> Dict:
 
 class PDBParser(Parser):
     """
-    A Parser subclass for converting PDB file data into a dictionary representation.
-    This class is specifically designed to work with PDB file format.
+    Convert PDB file data into a dictionary representation [1]_.
 
     References
     ----------
@@ -228,8 +227,8 @@ def parse_input(self, lines: List[str]) -> Dict:
 
 class MMCIFParser(Parser):
     """
-    A Parser subclass for converting MMCIF file data into a dictionary representation.
-    This implementation heavily relies on the atomium library [1]_.
+    Convert MMCIF file data into a dictionary representation. This implementation
+    heavily relies on the atomium library [1]_.
 
     References
     ----------

diff --git a/tme/preprocessing/_utils.py b/tme/preprocessing/_utils.py
@@ -8,11 +8,10 @@
 from typing import Tuple, List
 
 import numpy as np
-from numpy.typing import NDArray
 
-from ..types import BackendArray
 from ..backends import backend as be
 from ..backends import NumpyFFTWBackend
+from ..types import BackendArray, NDArray
 from ..matching_utils import euler_to_rotationmatrix
 
 

diff --git a/tme/preprocessing/tilt_series.py b/tme/preprocessing/tilt_series.py
@@ -9,12 +9,11 @@
 from dataclasses import dataclass
 
 import numpy as np
-from numpy.typing import NDArray
 
 from .. import Preprocessor
+from ..types import NDArray
 from ..backends import backend as be
 from ..matching_utils import euler_to_rotationmatrix
-
 from ._utils import (
     frequency_grid_at_angle,
     compute_tilt_shape,

diff --git a/tme/preprocessor.py b/tme/preprocessor.py
@@ -1385,7 +1385,7 @@ def _hlpf_fitness(
     orig = int((f_mask.size - 1) / 2)
     dist = np.arange(-orig, orig + 1) * T
     t, c, k = splrep(x=dist, y=f_mask, k=3)
-    i_max = np.ceil(np.divide(f_mask.shape, M))
+    i_max = np.ceil(np.divide(f_mask.shape, M)).astype(int)[0]
     coarse_mask = np.arange(-i_max, i_max + 1) * M
     spline = BSpline(t, c, k)
     coarse_values = spline(coarse_mask)

diff --git a/tme/tests/test_analyzer.py b/tme/tests/test_analyzer.py
@@ -3,7 +3,7 @@
 import pytest
 import numpy as np
 
-from tme.backends import backend
+from tme.backends import backend as be
 from tme.analyzer import (
     MaxScoreOverRotations,
     PeakCaller,
@@ -162,7 +162,7 @@ def test__iter__(self, use_memmap: bool):
         score_analyzer(self.data, rotation_matrix=self.rotation_matrix)
         res = tuple(score_analyzer)
         assert np.allclose(res[0].shape, self.data.shape)
-        assert res[0].dtype == backend._float_dtype
+        assert res[0].dtype == be._float_dtype
         assert res[1].size == self.data.ndim
         assert np.allclose(res[2].shape, self.data.shape)
         assert len(res) == 4

diff --git a/tme/tests/test_density.py b/tme/tests/test_density.py
@@ -1,6 +1,6 @@
+from os import remove
 from tempfile import mkstemp
 from itertools import permutations
-from os import remove
 
 import pytest
 import numpy as np
@@ -74,7 +74,7 @@ def test_repr(self):
         density = Density(data, origin, sampling_rate)
         repr_str = density.__repr__()
 
-        response = "Density object at {}\nOrigin: {}, sampling_rate: {}, Shape: {}"
+        response = "Density object at {}\nOrigin: {}, Sampling Rate: {}, Shape: {}"
         response = response.format(
             hex(id(density)),
             tuple(np.round(density.origin, 3)),