ott-jax · michalk8 · Oct 16, 2024 · Sep 20, 2024 · Oct 10, 2024 · Oct 10, 2024
diff --git a/docs/tutorials/geometry/100_grid.ipynb b/docs/tutorials/geometry/100_grid.ipynb
@@ -241,11 +241,15 @@
     "class MyCost(costs.CostFn):\n",
     "    \"\"\"An unusual cost function.\"\"\"\n",
     "\n",
-    "    def norm(self, x):\n",
+    "    def norm(self, x: jnp.ndarray) -> jnp.ndarray:\n",
     "        return jnp.sum(x**3 + jnp.cos(x) ** 2, axis=-1)\n",
     "\n",
-    "    def pairwise(self, x, y):\n",
-    "        return -jnp.sum(jnp.sin(x + 1) * jnp.sin(y)) * 2"
+    "    def __call__(self, x: jnp.ndarray, y: jnp.ndarray) -> jnp.ndarray:\n",
+    "        return (\n",
+    "            self.norm(x)\n",
+    "            + self.norm(y)\n",
+    "            - jnp.sum(jnp.sin(x + 1) * jnp.sin(y)) * 2\n",
+    "        )"
    ]
   },
   {

diff --git a/docs/utils.rst b/docs/utils.rst
@@ -11,3 +11,4 @@ function for :class:`~ott.solvers.linear.sinkhorn.Sinkhorn`.
 
     default_progress_fn
     tqdm_progress_fn
+    batched_vmap
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "ott-jax"
-description = "Optimal Transport Tools in JAX."
+description = "Optimal Transport Tools in JAX"
 requires-python = ">=3.9"
 dynamic = ["version"]
 readme = {file = "README.md", content-type = "text/markdown"}
@@ -17,6 +17,7 @@ dependencies = [
     "jaxopt>=0.8",
     "lineax>=0.0.5",
     "numpy>=1.20.0",
+    "typing_extensions; python_version <= '3.9'",
 ]
 keywords = [
     "optimal transport",
@@ -107,7 +108,7 @@ multi_line_output = 3
 sections = ["FUTURE", "STDLIB", "THIRDPARTY", "TEST", "NUMERIC", "NEURAL", "PLOTTING", "FIRSTPARTY", "LOCALFOLDER"]
 # also contains what we import in notebooks/tests
 known_neural = ["flax", "optax", "diffrax", "orbax"]
-known_numeric = ["numpy", "scipy", "jax", "flax", "optax", "jaxopt", "ot", "torch", "torchvision", "pandas", "sklearn", "tslearn"]
+known_numeric = ["numpy", "scipy", "jax", "chex", "flax", "optax", "jaxopt", "ot", "torch", "torchvision", "pandas", "sklearn", "tslearn"]
 known_test = ["_pytest", "pytest"]
 known_plotting = ["IPython", "matplotlib", "mpl_toolkits", "seaborn"]
 
@@ -120,12 +121,6 @@ markers = [
     "cpu: Mark tests as CPU only.",
     "fast: Mark tests as fast.",
 ]
-filterwarnings = [
-    "ignore:\\n*.*scipy.sparse array",
-    "ignore:jax.random.KeyArray is deprecated:DeprecationWarning",
-    "ignore:.*jax.config:DeprecationWarning",
-    "ignore:jax.core.Shape is deprecated:DeprecationWarning:chex",
-]
 
 [tool.coverage.run]
 branch = true

diff --git a/src/ott/geometry/costs.py b/src/ott/geometry/costs.py
@@ -14,7 +14,7 @@
 import abc
 import functools
 import math
-from typing import Any, Callable, Dict, Optional, Tuple, Union
+from typing import Any, Callable, Dict, Optional, Tuple
 
 import jax
 import jax.numpy as jnp
@@ -39,29 +39,16 @@
     "SoftDTW",
 ]
 
+# TODO(michalk8): norm check
 Func = Callable[[jnp.ndarray], float]
 
 
 @jtu.register_pytree_node_class
 class CostFn(abc.ABC):
-  """Base class for all costs.
-
-  Cost functions evaluate a function on a pair of inputs. For convenience,
-  that function is split into two norms -- evaluated on each input separately --
-  followed by a pairwise cost that involves both inputs, as in:
-
-  .. math::
-    c(x, y) = norm(x) + norm(y) + pairwise(x, y)
-
-  If the :attr:`norm` function is not implemented, that value is handled as
-  :math:`0`, and only :func:`pairwise` is used.
-  """
-
-  # no norm function created by default.
-  norm: Optional[Callable[[jnp.ndarray], Union[float, jnp.ndarray]]] = None
+  """Base class for all costs."""
 
   @abc.abstractmethod
-  def pairwise(self, x: jnp.ndarray, y: jnp.ndarray) -> float:
+  def __call__(self, x: jnp.ndarray, y: jnp.ndarray) -> float:
     """Compute cost between :math:`x` and :math:`y`.
 
     Args:
@@ -99,22 +86,6 @@ def _padder(cls, dim: int) -> jnp.ndarray:
     """
     return jnp.zeros((1, dim))
 
-  def __call__(self, x: jnp.ndarray, y: jnp.ndarray) -> float:
-    """Compute cost between :math:`x` and :math:`y`.
-
-    Args:
-      x: Array.
-      y: Array.
-
-    Returns:
-      The cost, optionally including the :attr:`norms <norm>` of
-      :math:`x`/:math:`y`.
-    """
-    cost = self.pairwise(x, y)
-    if self.norm is None:
-      return cost
-    return cost + self.norm(x) + self.norm(y)
-
   def all_pairs(self, x: jnp.ndarray, y: jnp.ndarray) -> jnp.ndarray:
     """Compute matrix of all pairwise costs, including the :attr:`norms <norm>`.
 
@@ -127,18 +98,6 @@ def all_pairs(self, x: jnp.ndarray, y: jnp.ndarray) -> jnp.ndarray:
     """
     return jax.vmap(lambda x_: jax.vmap(lambda y_: self(x_, y_))(y))(x)
 
-  def all_pairs_pairwise(self, x: jnp.ndarray, y: jnp.ndarray) -> jnp.ndarray:
-    """Compute matrix of all pairwise costs, excluding the :attr:`norms <norm>`.
-
-    Args:
-      x: Array of shape ``[n, ...]``.
-      y: Array of shape ``[m, ...]``.
-
-    Returns:
-      Array of shape ``[n, m]`` of cost evaluations.
-    """
-    return jax.vmap(lambda x_: jax.vmap(lambda y_: self.pairwise(x_, y_))(y))(x)
-
   def twist_operator(
       self, vec: jnp.ndarray, dual_vec: jnp.ndarray, variable: bool
   ) -> jnp.ndarray:
@@ -200,7 +159,7 @@ def h_legendre(self, z: jnp.ndarray) -> float:
     """Legendre transform of :func:`h` when it is convex."""
     raise NotImplementedError("Legendre transform of `h` is not implemented.")
 
-  def pairwise(self, x: jnp.ndarray, y: jnp.ndarray) -> float:
+  def __call__(self, x: jnp.ndarray, y: jnp.ndarray) -> float:
     """Compute cost as evaluation of :func:`h` on :math:`x-y`."""
     return self.h(x - y)
 
@@ -539,7 +498,7 @@ class Euclidean(CostFn):
   because the function is not strictly convex (it is linear on rays).
   """
 
-  def pairwise(self, x: jnp.ndarray, y: jnp.ndarray) -> float:
+  def __call__(self, x: jnp.ndarray, y: jnp.ndarray) -> float:
     """Compute Euclidean norm using custom jvp implementation.
 
     Here we use a custom jvp implementation for the norm that does not yield
@@ -556,13 +515,14 @@ class SqEuclidean(TICost):
   Implemented as a translation invariant cost, :math:`h(z) = \|z\|^2`.
   """
 
-  def norm(self, x: jnp.ndarray) -> Union[float, jnp.ndarray]:
+  def norm(self, x: jnp.ndarray) -> jnp.ndarray:
     """Compute squared Euclidean norm for vector."""
     return jnp.sum(x ** 2, axis=-1)
 
-  def pairwise(self, x: jnp.ndarray, y: jnp.ndarray) -> float:
+  def __call__(self, x: jnp.ndarray, y: jnp.ndarray) -> float:
     """Compute minus twice the dot-product between vectors."""
-    return -2.0 * jnp.vdot(x, y)
+    cross_term = -2.0 * jnp.vdot(x, y)
+    return self.norm(x) + self.norm(y) + cross_term
 
   def h(self, z: jnp.ndarray) -> float:  # noqa: D102
     return jnp.sum(z ** 2)
@@ -588,7 +548,7 @@ def __init__(self, ridge: float = 1e-8):
     super().__init__()
     self._ridge = ridge
 
-  def pairwise(self, x: jnp.ndarray, y: jnp.ndarray) -> float:
+  def __call__(self, x: jnp.ndarray, y: jnp.ndarray) -> float:
     """Cosine distance between vectors, denominator regularized with ridge."""
     x_norm = jnp.linalg.norm(x, axis=-1)
     y_norm = jnp.linalg.norm(y, axis=-1)
@@ -624,7 +584,7 @@ def __init__(self, n: int, ridge: float = 1e-8):
     self.n = n
     self._ridge = ridge
 
-  def pairwise(self, x: jnp.ndarray, y: jnp.ndarray):  # noqa: D102
+  def __call__(self, x: jnp.ndarray, y: jnp.ndarray):  # noqa: D102
     x_norm = jnp.linalg.norm(x, axis=-1)
     y_norm = jnp.linalg.norm(y, axis=-1)
     cosine_similarity = jnp.vdot(x, y) / (x_norm * y_norm + self._ridge)
@@ -688,7 +648,7 @@ def norm(self, x: jnp.ndarray) -> jnp.ndarray:
     norm += jnp.trace(cov, axis1=-2, axis2=-1)
     return norm
 
-  def pairwise(self, x: jnp.ndarray, y: jnp.ndarray) -> float:
+  def __call__(self, x: jnp.ndarray, y: jnp.ndarray) -> float:
     """Compute - 2 x Bures dot-product."""
     mean_x, cov_x = x_to_means_and_covs(x, self._dimension)
     mean_y, cov_y = x_to_means_and_covs(y, self._dimension)
@@ -698,7 +658,10 @@ def pairwise(self, x: jnp.ndarray, y: jnp.ndarray) -> float:
     sq__sq_x_y_sq_x = matrix_square_root.sqrtm(
         sq_x_y_sq_x, self._dimension, **self._sqrtm_kw
     )[0]
-    return -2 * (mean_dot_prod + jnp.trace(sq__sq_x_y_sq_x, axis1=-2, axis2=-1))
+    cross_term = -2.0 * (
+        mean_dot_prod + jnp.trace(sq__sq_x_y_sq_x, axis1=-2, axis2=-1)
+    )
+    return self.norm(x) + self.norm(y) + cross_term
 
   def covariance_fixpoint_iter(
       self,
@@ -883,7 +846,7 @@ def norm(self, x: jnp.ndarray) -> jnp.ndarray:
     """
     return self._gamma * x[..., 0]
 
-  def pairwise(self, x: jnp.ndarray, y: jnp.ndarray) -> float:
+  def __call__(self, x: jnp.ndarray, y: jnp.ndarray) -> float:
     """Compute dot-product for unbalanced Bures.
 
     Args:
@@ -939,12 +902,13 @@ def pairwise(self, x: jnp.ndarray, y: jnp.ndarray) -> float:
     log_m_pi += -0.5 * ldet_c_ab
 
     # if all logdet signs are 1, output value, nan otherwise
-    pos_signs = (sldet_c + sldet_c_ab + sldet_t_ab + sldet_t_ab) == 4
+    pos_signs = (sldet_c + sldet_c_ab + sldet_ab + sldet_t_ab) == 4
 
-    return jax.lax.cond(
+    cross_term = jax.lax.cond(
         pos_signs, lambda: 2 * sig2 * mass_x * mass_y - 2 *
         (sig2 + gam) * jnp.exp(log_m_pi), lambda: jnp.nan
     )
+    return self.norm(x) + self.norm(y) + cross_term
 
   def tree_flatten(self):  # noqa: D102
     return (), (self._dimension, self._sigma, self._gamma, self._sqrtm_kw)
@@ -977,7 +941,7 @@ def __init__(
     self.ground_cost = SqEuclidean() if ground_cost is None else ground_cost
     self.debiased = debiased
 
-  def pairwise(self, x: jnp.ndarray, y: jnp.ndarray) -> float:  # noqa: D102
+  def __call__(self, x: jnp.ndarray, y: jnp.ndarray) -> float:  # noqa: D102
     c_xy = self._soft_dtw(x, y)
     if self.debiased:
       return c_xy - 0.5 * (self._soft_dtw(x, x) + self._soft_dtw(y, y))

diff --git a/src/ott/geometry/distrib_costs.py b/src/ott/geometry/distrib_costs.py
@@ -51,7 +51,7 @@ def __init__(
     )
     self._solve_fn = solve_fn
 
-  def pairwise(self, x: jnp.ndarray, y: jnp.ndarray) -> float:
+  def __call__(self, x: jnp.ndarray, y: jnp.ndarray) -> float:
     """Wasserstein distance between :math:`x` and :math:`y` seen as a 1D dist.
 
     Args:

diff --git a/src/ott/geometry/geodesic.py b/src/ott/geometry/geodesic.py
@@ -16,6 +16,7 @@
 import jax
 import jax.experimental.sparse as jesp
 import jax.numpy as jnp
+import jax.tree_util as jtu
 import numpy as np
 from scipy.special import ive
 
@@ -28,7 +29,7 @@
 Array_g = Union[jnp.ndarray, jesp.BCOO]
 
 
-@jax.tree_util.register_pytree_node_class
+@jtu.register_pytree_node_class
 class Geodesic(geometry.Geometry):
   r"""Graph distance approximation using heat kernel :cite:`huguet:2023`.
 
@@ -134,22 +135,22 @@ def from_graph(
 
   def apply_kernel(
       self,
-      scaling: jnp.ndarray,
+      vec: jnp.ndarray,
       eps: Optional[float] = None,
       axis: int = 0,
   ) -> jnp.ndarray:
     r"""Apply :attr:`kernel_matrix` on positive scaling vector.
 
     Args:
-      scaling: Scaling to apply the kernel to.
+      vec: Scaling to apply the kernel to.
       eps: passed for consistency, not used yet.
       axis: passed for consistency, not used yet.
 
     Returns:
       Kernel applied to ``scaling``.
     """
     return expm_multiply(
-        self.scaled_laplacian, scaling, self.chebyshev_coeffs, 0.5 * self.eigval
+        self.scaled_laplacian, vec, self.chebyshev_coeffs, 0.5 * self.eigval
     )
 
   @property