From 8e2323d3e2f7c3af1f3de3324a75f03177a27787 Mon Sep 17 00:00:00 2001
From: E105D104U125 <72515278+E105D104U125@users.noreply.github.com>
Date: Thu, 15 Feb 2024 15:49:44 +0100
Subject: [PATCH 01/14] Converted tests to pytests and added FData tests.
Converted tests from old test_covariances file to pytest format and added tests for functional data objects.
---
skfda/tests/test_covariances.py | 450 ++++++++++++++++++++++++--------
1 file changed, 344 insertions(+), 106 deletions(-)
diff --git a/skfda/tests/test_covariances.py b/skfda/tests/test_covariances.py
index 1005da49d..f9a54c17a 100644
--- a/skfda/tests/test_covariances.py
+++ b/skfda/tests/test_covariances.py
@@ -1,106 +1,344 @@
-import unittest
-
-import numpy as np
-
-import skfda.misc.covariances
-
-
-class TestsSklearn(unittest.TestCase):
-
- def setUp(self) -> None:
- unittest.TestCase.setUp(self)
-
- self.x = np.linspace(-1, 1, 1000)[:, np.newaxis]
-
- def _test_compare_sklearn(
- self,
- cov: skfda.misc.covariances.Covariance,
- ) -> None:
- cov_sklearn = cov.to_sklearn()
- cov_matrix = cov(self.x, self.x)
- cov_sklearn_matrix = cov_sklearn(self.x)
-
- np.testing.assert_array_almost_equal(cov_matrix, cov_sklearn_matrix)
-
- def test_linear(self) -> None:
-
- for variance in (1, 2):
- for intercept in (0, 1, 2):
- with self.subTest(variance=variance, intercept=intercept):
- cov = skfda.misc.covariances.Linear(
- variance=variance, intercept=intercept)
- self._test_compare_sklearn(cov)
-
- def test_polynomial(self) -> None:
-
- # Test a couple of non-default parameters only for speed
- for variance in (2,):
- for intercept in (0, 2):
- for slope in (1, 2):
- for degree in (1, 2, 3):
- with self.subTest(
- variance=variance,
- intercept=intercept,
- slope=slope,
- degree=degree,
- ):
- cov = skfda.misc.covariances.Polynomial(
- variance=variance,
- intercept=intercept,
- slope=slope,
- degree=degree,
- )
- self._test_compare_sklearn(cov)
-
- def test_gaussian(self) -> None:
-
- for variance in (1, 2):
- for length_scale in (0.5, 1, 2):
- with self.subTest(
- variance=variance,
- length_scale=length_scale,
- ):
- cov = skfda.misc.covariances.Gaussian(
- variance=variance,
- length_scale=length_scale,
- )
- self._test_compare_sklearn(cov)
-
- def test_exponential(self) -> None:
-
- for variance in (1, 2):
- for length_scale in (0.5, 1, 2):
- with self.subTest(
- variance=variance,
- length_scale=length_scale,
- ):
- cov = skfda.misc.covariances.Exponential(
- variance=variance,
- length_scale=length_scale,
- )
- self._test_compare_sklearn(cov)
-
- def test_matern(self) -> None:
-
- # Test a couple of non-default parameters only for speed
- for variance in (2,):
- for length_scale in (0.5,):
- for nu in (0.5, 1, 1.5, 2.5, 3.5, np.inf):
- with self.subTest(
- variance=variance,
- length_scale=length_scale,
- nu=nu,
- ):
- cov = skfda.misc.covariances.Matern(
- variance=variance,
- length_scale=length_scale,
- nu=nu,
- )
- self._test_compare_sklearn(cov)
-
- def test_white_noise(self) -> None:
-
- for variance in (1, 2):
- with self.subTest(variance=variance):
- cov = skfda.misc.covariances.WhiteNoise(variance=variance)
- self._test_compare_sklearn(cov)
+"""
+Tests for Covariance module.
+
+This file includes tests for multivariate data from the previous version
+of the file. It additionally incorporates tests cases for functional data
+objects.
+"""
+import pytest
+from typing import Tuple
+
+import numpy as np
+
+import skfda.misc.covariances as cov
+from skfda import FDataGrid, FDataBasis
+from skfda.datasets import fetch_phoneme
+from skfda.representation.basis import MonomialBasis
+
+
+def _test_compare_sklearn(
+ multivariate_data,
+ cov: cov.Covariance,
+) -> None:
+ cov_sklearn = cov.to_sklearn()
+ cov_matrix = cov(multivariate_data, multivariate_data)
+ cov_sklearn_matrix = cov_sklearn(multivariate_data)
+
+ np.testing.assert_array_almost_equal(cov_matrix, cov_sklearn_matrix)
+
+##############################################################################
+# Fixtures
+##############################################################################
+
+
+@pytest.fixture
+def fetch_phoneme_fixture() -> FDataGrid:
+ """Fixture for loading the phoneme dataset example."""
+ fd, _ = fetch_phoneme(return_X_y=True)
+ return fd[:20]
+
+
+@pytest.fixture(
+ params=[
+ cov.Linear(),
+ cov.Polynomial(),
+ cov.Gaussian(),
+ cov.Exponential(),
+ cov.Matern(),
+ ],
+)
+def covariances_fixture(request) -> cov.Covariance:
+ """Fixture for getting a covariance kernel function."""
+ return request.param
+
+
+@pytest.fixture(
+ params=[
+ cov.Brownian(),
+ cov.WhiteNoise(),
+ ],
+)
+def covariances_raise_fixture(request) -> cov.Covariance:
+ """Fixture for getting a covariance kernel that raises a ValueError."""
+ return request.param
+
+
+@pytest.fixture
+def fdatabasis_data() -> Tuple[FDataBasis, FDataBasis]:
+ """Fixture for loading fdatabasis objects."""
+ basis = MonomialBasis(
+ n_basis=2,
+ domain_range=(-2, 2),
+ )
+
+ fd1 = FDataBasis(
+ basis=basis,
+ coefficients=[
+ [1, 0],
+ [1, 2],
+ ],
+ )
+
+ fd2 = FDataBasis(
+ basis=basis,
+ coefficients=[
+ [0, 1],
+ ],
+ )
+
+ return fd1, fd2
+
+
+@pytest.fixture
+def multivariate_data() -> None:
+ """Fixture for loading multivariate data."""
+ return np.linspace(-1, 1, 1000)[:, np.newaxis]
+
+
+@pytest.fixture(
+ params=[1, 2],
+)
+def variance_param(request) -> np.ndarray:
+ """Fixture for loading variance parameter."""
+ return request.param
+
+
+@pytest.fixture(
+ params=[0, 1, 2],
+)
+def intercept_param(request) -> np.ndarray:
+ """Fixture for loading intercept parameter."""
+ return request.param
+
+
+@pytest.fixture(
+ params=[1, 2],
+)
+def slope_param(request) -> np.ndarray:
+ """Fixture for loading slope parameter."""
+ return request.param
+
+
+@pytest.fixture(
+ params=[1, 2, 3],
+)
+def degree_param(request) -> np.ndarray:
+ """Fixture for loading degree parameter."""
+ return request.param
+
+
+@pytest.fixture(
+ params=[1, 2, 3],
+)
+def length_scale_param(request) -> np.ndarray:
+ """Fixture for loading length scale parameter."""
+ return request.param
+
+
+@pytest.fixture(
+ params=[0.5, 1, 1.5, 2.5, 3.5, np.inf],
+)
+def nu_param(request) -> np.ndarray:
+ """Fixture for loading nu parameter."""
+ return request.param
+
+
+##############################################################################
+# Tests
+##############################################################################
+
+
+def test_covariances(fetch_phoneme_fixture, covariances_fixture) -> None:
+ """Check that invalid parameters in fit raise exception."""
+ fd = fetch_phoneme_fixture
+ cov_kernel = covariances_fixture
+
+ # Also test that it does not fail
+ res1 = cov_kernel(fd, fd)
+ res2 = cov_kernel(fd)
+
+ np.testing.assert_allclose(
+ res1,
+ res2,
+ atol=1e-7,
+ )
+
+
+def test_raises(fetch_phoneme_fixture, covariances_raise_fixture):
+ """Check that it raises a ValueError exception."""
+ fd = fetch_phoneme_fixture
+ cov_kernel = covariances_raise_fixture
+
+ np.testing.assert_raises(
+ ValueError,
+ cov_kernel,
+ fd,
+ )
+
+
+def test_fdatabasis_example_linear(fdatabasis_data):
+ """Check a precalculated example for Linear covariance kernel."""
+ fd1, fd2 = fdatabasis_data
+ res1 = cov.Linear(variance=1 / 2, intercept=3)(fd1, fd2)
+ res2 = np.array([[3 / 2], [3 / 2 + 32 / 6]])
+ np.testing.assert_allclose(
+ res1,
+ res2,
+ rtol=1e-6,
+ )
+
+
+def test_fdatabasis_example_polynomial(fdatabasis_data):
+ """Check a precalculated example for Polynomial covariance kernel."""
+ fd1, fd2 = fdatabasis_data
+ res1 = cov.Polynomial(
+ variance=1 / 3,
+ slope=2,
+ intercept=1,
+ degree=2,
+ )(fd1, fd2)
+ res2 = np.array([[1 / 3], [67**2 / 3**3]])
+ np.testing.assert_allclose(
+ res1,
+ res2,
+ rtol=1e-6,
+ )
+
+
+def test_fdatabasis_example_gaussian(fdatabasis_data):
+ """Check a precalculated example for Gaussian covariance kernel."""
+ fd1, fd2 = fdatabasis_data
+ res1 = cov.Gaussian(variance=3, length_scale=2)(fd1, fd2)
+ res2 = np.array([
+ [3 * np.exp(-7 / 6)],
+ [3 * np.exp(-7 / 6)],
+ ])
+ np.testing.assert_allclose(
+ res1,
+ res2,
+ rtol=1e-6,
+ )
+
+
+def test_fdatabasis_example_exponential(fdatabasis_data):
+ """Check a precalculated example for Exponential covariance kernel."""
+ fd1, fd2 = fdatabasis_data
+ res1 = cov.Exponential(variance=4, length_scale=5)(fd1, fd2)
+ res2 = np.array([
+ [4 * np.exp(-np.sqrt(28 / 3) / 5)],
+ [4 * np.exp(-np.sqrt(28 / 3) / 5)],
+ ])
+ np.testing.assert_allclose(
+ res1,
+ res2,
+ rtol=1e-6,
+ )
+
+
+def test_fdatabasis_example_matern(fdatabasis_data):
+ """Check a precalculated example for Matern covariance kernel."""
+ fd1, fd2 = fdatabasis_data
+ res1 = cov.Matern(variance=2, length_scale=3, nu=2)(fd1, fd2)
+ res2 = np.array([
+ [(2 / 3) ** 2 * (28 / 3) * 0.239775899566],
+ [(2 / 3) ** 2 * (28 / 3) * 0.239775899566],
+ ])
+ np.testing.assert_allclose(
+ res1,
+ res2,
+ rtol=1e-6,
+ )
+
+
+def test_multivariate_linear(
+ multivariate_data,
+ variance_param,
+ intercept_param,
+) -> None:
+ """Test linear covariance kernel against scikit learn's kernel."""
+ _test_compare_sklearn(
+ multivariate_data,
+ cov.Linear(
+ variance=variance_param,
+ intercept=intercept_param,
+ ),
+ )
+
+
+def test_multivariate_polynomial(
+ multivariate_data,
+ variance_param,
+ intercept_param,
+ slope_param,
+ degree_param,
+) -> None:
+ """Test polynomial covariance kernel against scikit learn's kernel."""
+ _test_compare_sklearn(
+ multivariate_data,
+ cov.Polynomial(
+ variance=variance_param,
+ intercept=intercept_param,
+ slope=slope_param,
+ degree=degree_param,
+ ),
+ )
+
+
+def test_multivariate_gaussian(
+ multivariate_data,
+ variance_param,
+ length_scale_param,
+) -> None:
+ """Test gaussian covariance kernel against scikit learn's kernel."""
+ _test_compare_sklearn(
+ multivariate_data,
+ cov.Gaussian(
+ variance=variance_param,
+ length_scale=length_scale_param,
+ ),
+ )
+
+
+def test_multivariate_exponential(
+ multivariate_data,
+ variance_param,
+ length_scale_param,
+) -> None:
+ """Test exponential covariance kernel against scikit learn's kernel."""
+ _test_compare_sklearn(
+ multivariate_data,
+ cov.Exponential(
+ variance=variance_param,
+ length_scale=length_scale_param,
+ ),
+ )
+
+
+def test_multivariate_matern(
+ multivariate_data,
+ variance_param,
+ length_scale_param,
+ nu_param,
+) -> None:
+ """Test matern covariance kernel against scikit learn's kernel."""
+ _test_compare_sklearn(
+ multivariate_data,
+ cov.Matern(
+ variance=variance_param,
+ length_scale=length_scale_param,
+ nu=nu_param,
+ ),
+ )
+
+
+def test_multivariate_white_noise(
+ multivariate_data,
+ variance_param,
+) -> None:
+ """Test white noise covariance kernel against scikit learn's kernel."""
+ _test_compare_sklearn(
+ multivariate_data,
+ cov.WhiteNoise(
+ variance=variance_param,
+ ),
+ )
From 16d8eb023a954a61d70c095bca8f1fa0d748cc7e Mon Sep 17 00:00:00 2001
From: E105D104U125 <72515278+E105D104U125@users.noreply.github.com>
Date: Thu, 15 Feb 2024 15:52:39 +0100
Subject: [PATCH 02/14] Adapted covariances to accept FData objects.
Adapted Linear, Polynomial, Exponential, Gaussian and Matern covariances and added control messages for covariance kernels that do not support functional data objects.
---
skfda/misc/covariances.py | 1811 +++++++++++++++++++------------------
1 file changed, 941 insertions(+), 870 deletions(-)
diff --git a/skfda/misc/covariances.py b/skfda/misc/covariances.py
index 43635eb02..9bce97d74 100644
--- a/skfda/misc/covariances.py
+++ b/skfda/misc/covariances.py
@@ -1,870 +1,941 @@
-from __future__ import annotations
-
-import abc
-from typing import Callable, Sequence, Tuple, Union
-
-import matplotlib.pyplot as plt
-import numpy as np
-import sklearn.gaussian_process.kernels as sklearn_kern
-from matplotlib.figure import Figure
-from scipy.special import gamma, kv
-
-from ..representation import FData, FDataBasis, FDataGrid
-from ..representation.basis import TensorBasis
-from ..typing._numpy import ArrayLike, NDArrayFloat
-
-
-def _squared_norms(x: NDArrayFloat, y: NDArrayFloat) -> NDArrayFloat:
- return ( # type: ignore[no-any-return]
- (x[:, np.newaxis, :] - y[np.newaxis, :, :]) ** 2
- ).sum(2)
-
-
-CovarianceLike = Union[
- float,
- NDArrayFloat,
- Callable[[ArrayLike, ArrayLike], NDArrayFloat],
-]
-
-
-def _transform_to_2d(t: ArrayLike) -> NDArrayFloat:
- """Transform 1d arrays in column vectors."""
- t = np.asfarray(t)
-
- dim = len(t.shape)
- assert dim <= 2
-
- if dim < 2:
- t = np.atleast_2d(t).T
-
- return t
-
-
-def _execute_covariance(
- covariance: CovarianceLike,
- x: ArrayLike,
- y: ArrayLike,
-) -> NDArrayFloat:
- """Execute a covariance function."""
- x = _transform_to_2d(x)
- y = _transform_to_2d(y)
-
- if isinstance(covariance, (int, float)):
- return np.array(covariance)
- else:
- if callable(covariance):
- result = covariance(x, y)
- elif isinstance(covariance, np.ndarray):
- result = covariance
- else:
- # GPy kernel
- result = covariance.K(x, y)
-
- assert result.shape[0] == len(x)
- assert result.shape[1] == len(y)
- return result
-
-
-class Covariance(abc.ABC):
- """Abstract class for covariance functions."""
-
- _parameters_str: Sequence[Tuple[str, str]]
- _latex_formula: str
-
- @abc.abstractmethod
- def __call__(self, x: ArrayLike, y: ArrayLike) -> NDArrayFloat:
- pass
-
- def heatmap(self, limits: Tuple[float, float] = (-1, 1)) -> Figure:
- """Return a heatmap plot of the covariance function."""
- from ..exploratory.visualization._utils import _create_figure
-
- x = np.linspace(*limits, 1000)
-
- cov_matrix = self(x, x)
-
- fig = _create_figure()
- ax = fig.add_subplot(1, 1, 1)
- ax.imshow(
- cov_matrix,
- extent=[limits[0], limits[1], limits[1], limits[0]],
- )
- ax.set_title(f"Covariance function in [{limits[0]}, {limits[1]}]")
-
- return fig
-
- def _sample_trajectories_plot(self) -> Figure:
- from ..datasets import make_gaussian_process
-
- fd = make_gaussian_process(
- start=-1,
- n_samples=10,
- cov=self,
- random_state=0,
- )
- fig = fd.plot()
- fig.axes[0].set_title("Sample trajectories")
- return fig
-
- def __repr__(self) -> str:
-
- params_str = ', '.join(
- f'{n}={getattr(self, n)}' for n, _ in self._parameters_str
- )
-
- return (
- f"{self.__module__}.{type(self).__qualname__}("
- f"{params_str}"
- f")"
- )
-
- def _latex_content(self) -> str:
- params_str = ''.join(
- fr'{l} &= {getattr(self, n)} \\' for n, l in self._parameters_str
- )
-
- return (
- fr"{self._latex_formula} \\"
- r"\text{where:}"
- r"\begin{aligned}"
- fr"\qquad{params_str}"
- r"\end{aligned}"
- )
-
- def _repr_latex_(self) -> str:
- return fr"\(\displaystyle {self._latex_content()}\)"
-
- def _repr_html_(self) -> str:
- from ..exploratory.visualization._utils import _figure_to_svg
-
- fig = self.heatmap()
- heatmap = _figure_to_svg(fig)
- plt.close(fig)
-
- fig = self._sample_trajectories_plot()
- sample_trajectories = _figure_to_svg(fig)
- plt.close(fig)
-
- row_style = ''
-
- def column_style(percent: float, margin_top: str = "0") -> str:
- return (
- f'style="display: inline-block; '
- f'margin:0; '
- f'margin-top: {margin_top}; '
- f'width:{percent}%; '
- f'height:auto;'
- f'vertical-align: middle"'
- )
-
- return fr"""
-
-
- \[{self._latex_content()}\]
-
-
-
-
- {sample_trajectories}
-
-
- {heatmap}
-
-
- """
-
- def to_sklearn(self) -> sklearn_kern.Kernel:
- """Convert it to a sklearn kernel, if there is one."""
- raise NotImplementedError(
- f"{type(self).__name__} covariance not "
- f"implemented in scikit-learn",
- )
-
-
-class Brownian(Covariance):
- r"""
- Brownian covariance function.
-
- The covariance function is
-
- .. math::
- K(x, x') = \sigma^2 \frac{|x - \mathcal{O}| + |x' - \mathcal{O}|
- - |x - x'|}{2}
-
- where :math:`\sigma^2` is the variance at distance 1 from
- :math:`\mathcal{O}` and :math:`\mathcal{O}` is the origin point.
- If :math:`\mathcal{O} = 0` (the default) and we only
- consider positive values, the formula can be simplified as
-
- .. math::
- K(x, y) = \sigma^2 \min(x, y).
-
- Heatmap plot of the covariance function:
-
- .. jupyter-execute::
-
- from skfda.misc.covariances import Brownian
- import matplotlib.pyplot as plt
-
- Brownian().heatmap(limits=(0, 1))
- plt.show()
-
- Example of Gaussian process trajectories using this covariance:
-
- .. jupyter-execute::
-
- from skfda.misc.covariances import Brownian
- from skfda.datasets import make_gaussian_process
- import matplotlib.pyplot as plt
-
- gp = make_gaussian_process(
- n_samples=10, cov=Brownian(), random_state=0)
- gp.plot()
- plt.show()
-
- Default representation in a Jupyter notebook:
-
- .. jupyter-execute::
-
- from skfda.misc.covariances import Brownian
-
- Brownian()
-
- """
- _latex_formula = (
- r"K(x, x') = \sigma^2 \frac{|x - \mathcal{O}| + "
- r"|x' - \mathcal{O}| - |x - x'|}{2}"
- )
-
- _parameters_str = [
- ("variance", r"\sigma^2"),
- ("origin", r"\mathcal{O}"),
- ]
-
- def __init__(self, *, variance: float = 1, origin: float = 0) -> None:
- self.variance = variance
- self.origin = origin
-
- def __call__(self, x: ArrayLike, y: ArrayLike) -> NDArrayFloat:
- x = _transform_to_2d(x) - self.origin
- y = _transform_to_2d(y) - self.origin
-
- sum_norms = np.add.outer(
- np.linalg.norm(x, axis=-1),
- np.linalg.norm(y, axis=-1),
- )
- norm_sub = np.linalg.norm(
- x[:, np.newaxis, :] - y[np.newaxis, :, :],
- axis=-1,
- )
-
- return ( # type: ignore[no-any-return]
- self.variance * (sum_norms - norm_sub) / 2
- )
-
-
-class Linear(Covariance):
- r"""
- Linear covariance function.
-
- The covariance function is
-
- .. math::
- K(x, x') = \sigma^2 (x^T x' + c)
-
- where :math:`\sigma^2` is the scale of the variance and
- :math:`c` is the intercept.
-
- Heatmap plot of the covariance function:
-
- .. jupyter-execute::
-
- from skfda.misc.covariances import Linear
- import matplotlib.pyplot as plt
-
- Linear().heatmap(limits=(0, 1))
- plt.show()
-
- Example of Gaussian process trajectories using this covariance:
-
- .. jupyter-execute::
-
- from skfda.misc.covariances import Linear
- from skfda.datasets import make_gaussian_process
- import matplotlib.pyplot as plt
-
- gp = make_gaussian_process(
- n_samples=10, cov=Linear(), random_state=0)
- gp.plot()
- plt.show()
-
- Default representation in a Jupyter notebook:
-
- .. jupyter-execute::
-
- from skfda.misc.covariances import Linear
-
- Linear()
-
- """
-
- _latex_formula = r"K(x, x') = \sigma^2 (x^T x' + c)"
-
- _parameters_str = [
- ("variance", r"\sigma^2"),
- ("intercept", "c"),
- ]
-
- def __init__(self, *, variance: float = 1, intercept: float = 0) -> None:
- self.variance = variance
- self.intercept = intercept
-
- def __call__(self, x: ArrayLike, y: ArrayLike) -> NDArrayFloat:
- x = _transform_to_2d(x)
- y = _transform_to_2d(y)
-
- return self.variance * (x @ y.T + self.intercept)
-
- def to_sklearn(self) -> sklearn_kern.Kernel:
- return (
- self.variance
- * (sklearn_kern.DotProduct(0) + self.intercept)
- )
-
-
-class Polynomial(Covariance):
- r"""
- Polynomial covariance function.
-
- The covariance function is
-
- .. math::
- K(x, x') = \sigma^2 (\alpha x^T x' + c)^d
-
- where :math:`\sigma^2` is the scale of the variance,
- :math:`\alpha` is the slope, :math:`d` the degree of the
- polynomial and :math:`c` is the intercept.
-
- Heatmap plot of the covariance function:
-
- .. jupyter-execute::
-
- from skfda.misc.covariances import Polynomial
- import matplotlib.pyplot as plt
-
- Polynomial().heatmap(limits=(0, 1))
- plt.show()
-
- Example of Gaussian process trajectories using this covariance:
-
- .. jupyter-execute::
-
- from skfda.misc.covariances import Polynomial
- from skfda.datasets import make_gaussian_process
- import matplotlib.pyplot as plt
-
- gp = make_gaussian_process(
- n_samples=10, cov=Polynomial(), random_state=0)
- gp.plot()
- plt.show()
-
- Default representation in a Jupyter notebook:
-
- .. jupyter-execute::
-
- from skfda.misc.covariances import Polynomial
-
- Polynomial()
-
- """
-
- _latex_formula = r"K(x, x') = \sigma^2 (\alpha x^T x' + c)^d"
-
- _parameters_str = [
- ("variance", r"\sigma^2"),
- ("intercept", "c"),
- ("slope", r"\alpha"),
- ("degree", "d"),
- ]
-
- def __init__(
- self,
- *,
- variance: float = 1,
- intercept: float = 0,
- slope: float = 1,
- degree: float = 2,
- ) -> None:
- self.variance = variance
- self.intercept = intercept
- self.slope = slope
- self.degree = degree
-
- def __call__(self, x: ArrayLike, y: ArrayLike) -> NDArrayFloat:
- x = _transform_to_2d(x)
- y = _transform_to_2d(y)
-
- return (
- self.variance
- * (self.slope * x @ y.T + self.intercept) ** self.degree
- )
-
- def to_sklearn(self) -> sklearn_kern.Kernel:
- return (
- self.variance
- * (self.slope * sklearn_kern.DotProduct(0) + self.intercept)
- ** self.degree
- )
-
-
-class Gaussian(Covariance):
- r"""
- Gaussian covariance function.
-
- The covariance function is
-
- .. math::
- K(x, x') = \sigma^2 \exp\left(-\frac{||x - x'||^2}{2l^2}\right)
-
- where :math:`\sigma^2` is the variance and :math:`l` is the length scale.
-
- Heatmap plot of the covariance function:
-
- .. jupyter-execute::
-
- from skfda.misc.covariances import Gaussian
- import matplotlib.pyplot as plt
-
- Gaussian().heatmap(limits=(0, 1))
- plt.show()
-
- Example of Gaussian process trajectories using this covariance:
-
- .. jupyter-execute::
-
- from skfda.misc.covariances import Gaussian
- from skfda.datasets import make_gaussian_process
- import matplotlib.pyplot as plt
-
- gp = make_gaussian_process(
- n_samples=10, cov=Gaussian(), random_state=0)
- gp.plot()
- plt.show()
-
- Default representation in a Jupyter notebook:
-
- .. jupyter-execute::
-
- from skfda.misc.covariances import Gaussian
-
- Gaussian()
-
- """
-
- _latex_formula = (
- r"K(x, x') = \sigma^2 \exp\left(-\frac{\|x - x'\|^2}{2l^2}"
- r"\right)"
- )
-
- _parameters_str = [
- ("variance", r"\sigma^2"),
- ("length_scale", "l"),
- ]
-
- def __init__(self, *, variance: float = 1, length_scale: float = 1):
- self.variance = variance
- self.length_scale = length_scale
-
- def __call__(self, x: ArrayLike, y: ArrayLike) -> NDArrayFloat:
- x = _transform_to_2d(x)
- y = _transform_to_2d(y)
-
- x_y = _squared_norms(x, y)
-
- return self.variance * np.exp(-x_y / (2 * self.length_scale ** 2))
-
- def to_sklearn(self) -> sklearn_kern.Kernel:
- return (
- self.variance * sklearn_kern.RBF(length_scale=self.length_scale)
- )
-
-
-class Exponential(Covariance):
- r"""
- Exponential covariance function.
-
- The covariance function is
-
- .. math::
- K(x, x') = \sigma^2 \exp\left(-\frac{\|x - x'\|}{l}\right)
-
- where :math:`\sigma^2` is the variance and :math:`l` is the length scale.
-
- Heatmap plot of the covariance function:
-
- .. jupyter-execute::
-
- from skfda.misc.covariances import Exponential
- import matplotlib.pyplot as plt
-
- Exponential().heatmap(limits=(0, 1))
- plt.show()
-
- Example of Gaussian process trajectories using this covariance:
-
- .. jupyter-execute::
-
- from skfda.misc.covariances import Exponential
- from skfda.datasets import make_gaussian_process
- import matplotlib.pyplot as plt
-
- gp = make_gaussian_process(
- n_samples=10, cov=Exponential(), random_state=0)
- gp.plot()
- plt.show()
-
- Default representation in a Jupyter notebook:
-
- .. jupyter-execute::
-
- from skfda.misc.covariances import Exponential
-
- Exponential()
-
- """
-
- _latex_formula = (
- r"K(x, x') = \sigma^2 \exp\left(-\frac{||x - x'||}{l}"
- r"\right)"
- )
-
- _parameters_str = [
- ("variance", r"\sigma^2"),
- ("length_scale", "l"),
- ]
-
- def __init__(
- self,
- *,
- variance: float = 1,
- length_scale: float = 1,
- ) -> None:
- self.variance = variance
- self.length_scale = length_scale
-
- def __call__(self, x: ArrayLike, y: ArrayLike) -> NDArrayFloat:
- x = _transform_to_2d(x)
- y = _transform_to_2d(y)
-
- x_y = _squared_norms(x, y)
- return self.variance * np.exp(-np.sqrt(x_y) / (self.length_scale))
-
- def to_sklearn(self) -> sklearn_kern.Kernel:
- return (
- self.variance
- * sklearn_kern.Matern(length_scale=self.length_scale, nu=0.5)
- )
-
-
-class WhiteNoise(Covariance):
- r"""
- Gaussian covariance function.
-
- The covariance function is
-
- .. math::
- K(x, x')= \begin{cases}
- \sigma^2, \quad x = x' \\
- 0, \quad x \neq x'\\
- \end{cases}
-
- where :math:`\sigma^2` is the variance.
-
- Heatmap plot of the covariance function:
-
- .. jupyter-execute::
-
- from skfda.misc.covariances import WhiteNoise
- import matplotlib.pyplot as plt
-
- WhiteNoise().heatmap(limits=(0, 1))
- plt.show()
-
- Example of Gaussian process trajectories using this covariance:
-
- .. jupyter-execute::
-
- from skfda.misc.covariances import WhiteNoise
- from skfda.datasets import make_gaussian_process
- import matplotlib.pyplot as plt
-
- gp = make_gaussian_process(
- n_samples=10, cov=WhiteNoise(), random_state=0)
- gp.plot()
- plt.show()
-
- Default representation in a Jupyter notebook:
-
- .. jupyter-execute::
-
- from skfda.misc.covariances import WhiteNoise
-
- WhiteNoise()
-
- """
-
- _latex_formula = (
- r"K(x, x')= \begin{cases} \sigma^2, \quad x = x' \\"
- r"0, \quad x \neq x'\\ \end{cases}"
- )
-
- _parameters_str = [("variance", r"\sigma^2")]
-
- def __init__(self, *, variance: float = 1):
- self.variance = variance
-
- def __call__(self, x: ArrayLike, y: ArrayLike) -> NDArrayFloat:
- x = _transform_to_2d(x)
- return self.variance * np.eye(x.shape[0])
-
- def to_sklearn(self) -> sklearn_kern.Kernel:
- return sklearn_kern.WhiteKernel(noise_level=self.variance)
-
-
-class Matern(Covariance):
- r"""
- Matérn covariance function.
-
- The covariance function is
-
- .. math::
- K(x, x') = \sigma^2 \frac{2^{1-\nu}}{\Gamma(\nu)}
- \left( \frac{\sqrt{2\nu}|x - x'|}{l} \right)^{\nu}
- K_{\nu}\left( \frac{\sqrt{2\nu}|x - x'|}{l} \right)
-
- where :math:`\sigma^2` is the variance, :math:`l` is the length scale
- and :math:`\nu` controls the smoothness of the related Gaussian process.
- The trajectories of a Gaussian process with Matérn covariance is
- :math:`\lceil \nu \rceil - 1` times differentiable.
-
-
- Heatmap plot of the covariance function:
-
- .. jupyter-execute::
-
- from skfda.misc.covariances import Matern
- import matplotlib.pyplot as plt
-
- Matern().heatmap(limits=(0, 1))
- plt.show()
-
- Example of Gaussian process trajectories using this covariance:
-
- .. jupyter-execute::
-
- from skfda.misc.covariances import Matern
- from skfda.datasets import make_gaussian_process
- import matplotlib.pyplot as plt
-
- gp = make_gaussian_process(
- n_samples=10, cov=Matern(), random_state=0)
- gp.plot()
- plt.show()
-
- Default representation in a Jupyter notebook:
-
- .. jupyter-execute::
-
- from skfda.misc.covariances import Matern
-
- Matern()
-
- """
- _latex_formula = (
- r"K(x, x') = \sigma^2 \frac{2^{1-\nu}}{\Gamma(\nu)}"
- r"\left( \frac{\sqrt{2\nu}|x - x'|}{l} \right)^{\nu}"
- r"K_{\nu}\left( \frac{\sqrt{2\nu}|x - x'|}{l} \right)"
- )
-
- _parameters_str = [
- ("variance", r"\sigma^2"),
- ("length_scale", "l"),
- ("nu", r"\nu"),
- ]
-
- def __init__(
- self,
- *,
- variance: float = 1,
- length_scale: float = 1,
- nu: float = 1.5,
- ) -> None:
- self.variance = variance
- self.length_scale = length_scale
- self.nu = nu
-
- def __call__(self, x: ArrayLike, y: ArrayLike) -> NDArrayFloat:
- x = _transform_to_2d(x)
- y = _transform_to_2d(y)
-
- x_y_squared = _squared_norms(x, y)
- x_y = np.sqrt(x_y_squared)
-
- p = self.nu - 0.5
- if p.is_integer():
- # Formula for half-integers
- p = int(p)
- body = np.sqrt(2 * p + 1) * x_y / self.length_scale
- exponential = np.exp(-body)
- power_list = np.full(shape=(p,) + body.shape, fill_value=2 * body)
- power_list = np.cumprod(power_list, axis=0)
- power_list = np.concatenate(
- (power_list[::-1], np.asarray([np.ones_like(body)])),
- )
- power_list = np.moveaxis(power_list, 0, -1)
- numerator = np.cumprod(np.arange(p, 0, -1))
- numerator = np.concatenate(([1], numerator))
- denom1 = np.cumprod(np.arange(2 * p, p, -1))
- denom1 = np.concatenate((denom1[::-1], [1]))
- denom2 = np.cumprod(np.arange(1, p + 1))
- denom2 = np.concatenate(([1], denom2))
-
- sum_terms = power_list * numerator / (denom1 * denom2)
- return ( # type: ignore[no-any-return]
- self.variance * exponential * np.sum(sum_terms, axis=-1)
- )
- elif self.nu == np.inf:
- return (
- self.variance * np.exp(
- -x_y_squared / (2 * self.length_scale ** 2),
- )
- )
- else:
- # General formula
- scaling = 2**(1 - self.nu) / gamma(self.nu)
- body = np.sqrt(2 * self.nu) * x_y / self.length_scale
- power = body**self.nu
- bessel = kv(self.nu, body)
-
- with np.errstate(invalid='ignore'):
- eval_cov = self.variance * scaling * power * bessel
-
- # Values with nan are where the distance is 0
- return np.nan_to_num( # type: ignore[no-any-return]
- eval_cov,
- nan=self.variance,
- )
-
- def to_sklearn(self) -> sklearn_kern.Kernel:
- return (
- self.variance
- * sklearn_kern.Matern(length_scale=self.length_scale, nu=self.nu)
- )
-
-
-class Empirical(Covariance):
- r"""
- Sample covariance function.
-
- The sample covariance function is defined as
- . math::
- K(t, s) = \frac{1}{N-\text{correction}}\sum_{n=1}^N\left(x_n(t) -
- \bar{x}(t)\right) \left(x_n(s) - \bar{x}(s)\right)
-
- where :math:`x_n(t)` is the n-th sample and :math:`\bar{x}(t)` is the
- mean of the samples. :math:`N` is the number of samples,
- :math:`\text{correction}` is the degrees of freedom adjustment and is such
- that :math:`N-\text{correction}` is the divisor used in the calculation of
- the covariance function.
-
- """
-
- _latex_formula = (
- r"K(t, s) = \frac{1}{N-\text{correction}}\sum_{n=1}^N"
- r"(x_n(t) - \bar{x}(t))(x_n(s) - \bar{x}(s))"
- )
- _parameters_str = [
- ("data", "data"),
- ]
-
- cov_fdata: FData
- correction: int
-
- @abc.abstractmethod
- def __init__(self, data: FData) -> None:
- if data.dim_domain != 1 or data.dim_codomain != 1:
- raise NotImplementedError(
- "Covariance only implemented "
- "for univariate functions",
- )
-
- def __call__(self, x: ArrayLike, y: ArrayLike) -> NDArrayFloat:
- """Evaluate the covariance function.
-
- Args:
- x: First array of points of evaluation.
- y: Second array of points of evaluation.
-
- Returns:
- Covariance function evaluated at the grid formed by x and y.
- """
- return self.cov_fdata([x, y], grid=True)[0, ..., 0]
-
-
-class EmpiricalGrid(Empirical):
- """Sample covariance function for FDataGrid."""
-
- cov_fdata: FDataGrid
- correction: int
-
- def __init__(self, data: FDataGrid, correction: int = 0) -> None:
- super().__init__(data=data)
-
- self.correction = correction
- self.cov_fdata = data.copy(
- data_matrix=np.cov(
- data.data_matrix[..., 0],
- rowvar=False,
- ddof=correction,
- )[np.newaxis, ...],
- grid_points=[
- data.grid_points[0],
- data.grid_points[0],
- ],
- domain_range=[
- data.domain_range[0],
- data.domain_range[0],
- ],
- argument_names=data.argument_names * 2,
- sample_names=("covariance",),
- )
-
-
-class EmpiricalBasis(Empirical):
- """
- Sample covariance function for FDataBasis.
-
- In this case one may use the basis expression of the samples to
- express the sample covariance function in the tensor product basis
- of the original basis.
- """
-
- cov_fdata: FDataBasis
- coeff_matrix: NDArrayFloat
- correction: int
-
- def __init__(self, data: FDataBasis, correction: int = 0) -> None:
- super().__init__(data=data)
-
- self.correction = correction
- self.coeff_matrix = np.cov(
- data.coefficients,
- rowvar=False,
- ddof=correction,
- )
- self.cov_fdata = FDataBasis(
- basis=TensorBasis([data.basis, data.basis]),
- coefficients=self.coeff_matrix.flatten(),
- argument_names=data.argument_names * 2,
- sample_names=("covariance",),
- )
+from __future__ import annotations
+
+import abc
+from typing import Callable, Sequence, Tuple, Union
+
+import matplotlib.pyplot as plt
+import numpy as np
+import sklearn.gaussian_process.kernels as sklearn_kern
+from matplotlib.figure import Figure
+from scipy.special import gamma, kv
+
+from ..misc import inner_product_matrix
+from ..misc.metrics import PairwiseMetric, LpNorm, l2_distance
+from ..representation import FData, FDataBasis, FDataGrid
+from ..representation.basis import TensorBasis
+from ..typing._numpy import ArrayLike, NDArrayFloat
+
+
+def _squared_norms(x: NDArrayFloat, y: NDArrayFloat) -> NDArrayFloat:
+ return ( # type: ignore[no-any-return]
+ (x[:, np.newaxis, :] - y[np.newaxis, :, :]) ** 2
+ ).sum(2)
+
+
+CovarianceLike = Union[
+ float,
+ NDArrayFloat,
+ Callable[[ArrayLike, ArrayLike], NDArrayFloat],
+]
+
+InputAcceptable = Union[
+ np.ndarray,
+ FData,
+]
+
+
+def _transform_to_2d(t: ArrayLike) -> NDArrayFloat:
+ """Transform 1d arrays in column vectors."""
+ t = np.asfarray(t)
+
+ dim = len(t.shape)
+ assert dim <= 2
+
+ if dim < 2:
+ t = np.atleast_2d(t).T
+
+ return t
+
+
+def _execute_covariance(
+ covariance: CovarianceLike,
+ x: ArrayLike,
+ y: ArrayLike,
+) -> NDArrayFloat:
+ """Execute a covariance function."""
+ x = _transform_to_2d(x)
+ y = _transform_to_2d(y)
+
+ if isinstance(covariance, (int, float)):
+ return np.array(covariance)
+ else:
+ if callable(covariance):
+ result = covariance(x, y)
+ elif isinstance(covariance, np.ndarray):
+ result = covariance
+ else:
+ # GPy kernel
+ result = covariance.K(x, y)
+
+ assert result.shape[0] == len(x)
+ assert result.shape[1] == len(y)
+ return result
+
+
+class Covariance(abc.ABC):
+ """Abstract class for covariance functions."""
+
+ _parameters_str: Sequence[Tuple[str, str]]
+ _latex_formula: str
+
+ @abc.abstractmethod
+ def __call__(
+ self,
+ x: InputAcceptable,
+ y: InputAcceptable | None = None,
+ ) -> NDArrayFloat:
+ pass
+
+ def _param_check_and_transform(
+ self,
+ x: InputAcceptable,
+ y: InputAcceptable | None = None,
+ ) -> Tuple[np.ndarray | FData, np.ndarray | FData]:
+ # Param check
+ if y is None:
+ y = x
+
+ if type(x) is not type(y): # noqa: WPS516
+ raise ValueError(
+ 'Cannot operate objects x and y from different classes',
+ f'({type(x)}, {type(y)}).',
+ )
+
+ if isinstance(x, np.ndarray) and isinstance(y, np.ndarray):
+ x, y = np.array(x), np.array(y)
+ if len(x.shape) < 2:
+ x = np.atleast_2d(x)
+ if len(y.shape) < 2:
+ y = np.atleast_2d(y)
+
+ return x, y
+
+ def heatmap(self, limits: Tuple[float, float] = (-1, 1)) -> Figure:
+ """Return a heatmap plot of the covariance function."""
+ from ..exploratory.visualization._utils import _create_figure
+
+ x = np.linspace(*limits, 1000)
+
+ cov_matrix = self(x, x)
+
+ fig = _create_figure()
+ ax = fig.add_subplot(1, 1, 1)
+ ax.imshow(
+ cov_matrix,
+ extent=[limits[0], limits[1], limits[1], limits[0]],
+ )
+ ax.set_title(f"Covariance function in [{limits[0]}, {limits[1]}]")
+
+ return fig
+
+ def _sample_trajectories_plot(self) -> Figure:
+ from ..datasets import make_gaussian_process
+
+ fd = make_gaussian_process(
+ start=-1,
+ n_samples=10,
+ cov=self,
+ random_state=0,
+ )
+ fig = fd.plot()
+ fig.axes[0].set_title("Sample trajectories")
+ return fig
+
+ def __repr__(self) -> str:
+
+ params_str = ', '.join(
+ f'{n}={getattr(self, n)}' for n, _ in self._parameters_str
+ )
+
+ return (
+ f"{self.__module__}.{type(self).__qualname__}("
+ f"{params_str}"
+ f")"
+ )
+
+ def _latex_content(self) -> str:
+ params_str = ''.join(
+ fr'{l} &= {getattr(self, n)} \\' for n, l in self._parameters_str
+ )
+
+ return (
+ fr"{self._latex_formula} \\"
+ r"\text{where:}"
+ r"\begin{aligned}"
+ fr"\qquad{params_str}"
+ r"\end{aligned}"
+ )
+
+ def _repr_latex_(self) -> str:
+ return fr"\(\displaystyle {self._latex_content()}\)"
+
+ def _repr_html_(self) -> str:
+ from ..exploratory.visualization._utils import _figure_to_svg
+
+ fig = self.heatmap()
+ heatmap = _figure_to_svg(fig)
+ plt.close(fig)
+
+ fig = self._sample_trajectories_plot()
+ sample_trajectories = _figure_to_svg(fig)
+ plt.close(fig)
+
+ row_style = ''
+
+ def column_style(percent: float, margin_top: str = "0") -> str:
+ return (
+ f'style="display: inline-block; '
+ f'margin:0; '
+ f'margin-top: {margin_top}; '
+ f'width:{percent}%; '
+ f'height:auto;'
+ f'vertical-align: middle"'
+ )
+
+ return fr"""
+
+
+ \[{self._latex_content()}\]
+
+
+
+
+ {sample_trajectories}
+
+
+ {heatmap}
+
+
+ """
+
+ def to_sklearn(self) -> sklearn_kern.Kernel:
+ """Convert it to a sklearn kernel, if there is one."""
+ raise NotImplementedError(
+ f"{type(self).__name__} covariance not "
+ f"implemented in scikit-learn",
+ )
+
+
+class Brownian(Covariance):
+ r"""
+ Brownian covariance function.
+
+ The covariance function is
+
+ .. math::
+ K(x, x') = \sigma^2 \frac{|x - \mathcal{O}| + |x' - \mathcal{O}|
+ - |x - x'|}{2}
+
+ where :math:`\sigma^2` is the variance at distance 1 from
+ :math:`\mathcal{O}` and :math:`\mathcal{O}` is the origin point.
+ If :math:`\mathcal{O} = 0` (the default) and we only
+ consider positive values, the formula can be simplified as
+
+ .. math::
+ K(x, y) = \sigma^2 \min(x, y).
+
+ Heatmap plot of the covariance function:
+
+ .. jupyter-execute::
+
+ from skfda.misc.covariances import Brownian
+ import matplotlib.pyplot as plt
+
+ Brownian().heatmap(limits=(0, 1))
+ plt.show()
+
+ Example of Gaussian process trajectories using this covariance:
+
+ .. jupyter-execute::
+
+ from skfda.misc.covariances import Brownian
+ from skfda.datasets import make_gaussian_process
+ import matplotlib.pyplot as plt
+
+ gp = make_gaussian_process(
+ n_samples=10, cov=Brownian(), random_state=0)
+ gp.plot()
+ plt.show()
+
+ Default representation in a Jupyter notebook:
+
+ .. jupyter-execute::
+
+ from skfda.misc.covariances import Brownian
+
+ Brownian()
+
+ """
+ _latex_formula = (
+ r"K(x, x') = \sigma^2 \frac{|x - \mathcal{O}| + "
+ r"|x' - \mathcal{O}| - |x - x'|}{2}"
+ )
+
+ _parameters_str = [
+ ("variance", r"\sigma^2"),
+ ("origin", r"\mathcal{O}"),
+ ]
+
+ def __init__(self, *, variance: float = 1, origin: float = 0) -> None:
+ self.variance = variance
+ self.origin = origin
+
+ def __call__(
+ self,
+ x: InputAcceptable | FData,
+ y: InputAcceptable | None = None,
+ ) -> NDArrayFloat:
+ if isinstance(x, FData) or isinstance(y, FData):
+ raise ValueError('Not defined for FData objects.')
+
+ x = _transform_to_2d(x) - self.origin
+ y = _transform_to_2d(y) - self.origin
+
+ sum_norms = np.add.outer(
+ np.linalg.norm(x, axis=-1),
+ np.linalg.norm(y, axis=-1),
+ )
+ norm_sub = np.linalg.norm(
+ x[:, np.newaxis, :] - y[np.newaxis, :, :],
+ axis=-1,
+ )
+
+ return ( # type: ignore[no-any-return]
+ self.variance * (sum_norms - norm_sub) / 2
+ )
+
+
+class Linear(Covariance):
+ r"""
+ Linear covariance function.
+
+ The covariance function is
+
+ .. math::
+ K(x, x') = \sigma^2 (x^T x' + c)
+
+ where :math:`\sigma^2` is the scale of the variance and
+ :math:`c` is the intercept.
+
+ Heatmap plot of the covariance function:
+
+ .. jupyter-execute::
+
+ from skfda.misc.covariances import Linear
+ import matplotlib.pyplot as plt
+
+ Linear().heatmap(limits=(0, 1))
+ plt.show()
+
+ Example of Gaussian process trajectories using this covariance:
+
+ .. jupyter-execute::
+
+ from skfda.misc.covariances import Linear
+ from skfda.datasets import make_gaussian_process
+ import matplotlib.pyplot as plt
+
+ gp = make_gaussian_process(
+ n_samples=10, cov=Linear(), random_state=0)
+ gp.plot()
+ plt.show()
+
+ Default representation in a Jupyter notebook:
+
+ .. jupyter-execute::
+
+ from skfda.misc.covariances import Linear
+
+ Linear()
+
+ """
+
+ _latex_formula = r"K(x, x') = \sigma^2 (x^T x' + c)"
+
+ _parameters_str = [
+ ("variance", r"\sigma^2"),
+ ("intercept", "c"),
+ ]
+
+ def __init__(self, *, variance: float = 1, intercept: float = 0) -> None:
+ self.variance = variance
+ self.intercept = intercept
+
+ def __call__(
+ self,
+ x: InputAcceptable,
+ y: InputAcceptable | None = None,
+ ) -> NDArrayFloat:
+ x, y = self._param_check_and_transform(x, y)
+
+ x_y = inner_product_matrix(x, y)
+ return self.variance * (x_y + self.intercept)
+
+ def to_sklearn(self) -> sklearn_kern.Kernel:
+ return (
+ self.variance
+ * (sklearn_kern.DotProduct(0) + self.intercept)
+ )
+
+
+class Polynomial(Covariance):
+ r"""
+ Polynomial covariance function.
+
+ The covariance function is
+
+ .. math::
+ K(x, x') = \sigma^2 (\alpha x^T x' + c)^d
+
+ where :math:`\sigma^2` is the scale of the variance,
+ :math:`\alpha` is the slope, :math:`d` the degree of the
+ polynomial and :math:`c` is the intercept.
+
+ Heatmap plot of the covariance function:
+
+ .. jupyter-execute::
+
+ from skfda.misc.covariances import Polynomial
+ import matplotlib.pyplot as plt
+
+ Polynomial().heatmap(limits=(0, 1))
+ plt.show()
+
+ Example of Gaussian process trajectories using this covariance:
+
+ .. jupyter-execute::
+
+ from skfda.misc.covariances import Polynomial
+ from skfda.datasets import make_gaussian_process
+ import matplotlib.pyplot as plt
+
+ gp = make_gaussian_process(
+ n_samples=10, cov=Polynomial(), random_state=0)
+ gp.plot()
+ plt.show()
+
+ Default representation in a Jupyter notebook:
+
+ .. jupyter-execute::
+
+ from skfda.misc.covariances import Polynomial
+
+ Polynomial()
+
+ """
+
+ _latex_formula = r"K(x, x') = \sigma^2 (\alpha x^T x' + c)^d"
+
+ _parameters_str = [
+ ("variance", r"\sigma^2"),
+ ("intercept", "c"),
+ ("slope", r"\alpha"),
+ ("degree", "d"),
+ ]
+
+ def __init__(
+ self,
+ *,
+ variance: float = 1,
+ intercept: float = 0,
+ slope: float = 1,
+ degree: float = 2,
+ ) -> None:
+ self.variance = variance
+ self.intercept = intercept
+ self.slope = slope
+ self.degree = degree
+
+ def __call__(
+ self,
+ x: InputAcceptable,
+ y: InputAcceptable | None = None,
+ ) -> NDArrayFloat:
+ x, y = self._param_check_and_transform(x, y)
+
+ x_y = inner_product_matrix(x, y)
+ return (
+ self.variance
+ * (self.slope * x_y + self.intercept) ** self.degree
+ )
+
+ def to_sklearn(self) -> sklearn_kern.Kernel:
+ return (
+ self.variance
+ * (self.slope * sklearn_kern.DotProduct(0) + self.intercept)
+ ** self.degree
+ )
+
+
+class Gaussian(Covariance):
+ r"""
+ Gaussian covariance function.
+
+ The covariance function is
+
+ .. math::
+ K(x, x') = \sigma^2 \exp\left(-\frac{||x - x'||^2}{2l^2}\right)
+
+ where :math:`\sigma^2` is the variance and :math:`l` is the length scale.
+
+ Heatmap plot of the covariance function:
+
+ .. jupyter-execute::
+
+ from skfda.misc.covariances import Gaussian
+ import matplotlib.pyplot as plt
+
+ Gaussian().heatmap(limits=(0, 1))
+ plt.show()
+
+ Example of Gaussian process trajectories using this covariance:
+
+ .. jupyter-execute::
+
+ from skfda.misc.covariances import Gaussian
+ from skfda.datasets import make_gaussian_process
+ import matplotlib.pyplot as plt
+
+ gp = make_gaussian_process(
+ n_samples=10, cov=Gaussian(), random_state=0)
+ gp.plot()
+ plt.show()
+
+ Default representation in a Jupyter notebook:
+
+ .. jupyter-execute::
+
+ from skfda.misc.covariances import Gaussian
+
+ Gaussian()
+
+ """
+
+ _latex_formula = (
+ r"K(x, x') = \sigma^2 \exp\left(-\frac{\|x - x'\|^2}{2l^2}"
+ r"\right)"
+ )
+
+ _parameters_str = [
+ ("variance", r"\sigma^2"),
+ ("length_scale", "l"),
+ ]
+
+ def __init__(self, *, variance: float = 1, length_scale: float = 1):
+ self.variance = variance
+ self.length_scale = length_scale
+
+ def __call__(
+ self,
+ x: InputAcceptable,
+ y: InputAcceptable | None = None,
+ ) -> NDArrayFloat:
+ x, y = self._param_check_and_transform(x, y)
+
+ distance_x_y = PairwiseMetric(l2_distance)(x, y)
+ return self.variance * np.exp(-distance_x_y ** 2 / (2 * self.length_scale ** 2))
+
+ def to_sklearn(self) -> sklearn_kern.Kernel:
+ return (
+ self.variance * sklearn_kern.RBF(length_scale=self.length_scale)
+ )
+
+
+class Exponential(Covariance):
+ r"""
+ Exponential covariance function.
+
+ The covariance function is
+
+ .. math::
+ K(x, x') = \sigma^2 \exp\left(-\frac{\|x - x'\|}{l}\right)
+
+ where :math:`\sigma^2` is the variance and :math:`l` is the length scale.
+
+ Heatmap plot of the covariance function:
+
+ .. jupyter-execute::
+
+ from skfda.misc.covariances import Exponential
+ import matplotlib.pyplot as plt
+
+ Exponential().heatmap(limits=(0, 1))
+ plt.show()
+
+ Example of Gaussian process trajectories using this covariance:
+
+ .. jupyter-execute::
+
+ from skfda.misc.covariances import Exponential
+ from skfda.datasets import make_gaussian_process
+ import matplotlib.pyplot as plt
+
+ gp = make_gaussian_process(
+ n_samples=10, cov=Exponential(), random_state=0)
+ gp.plot()
+ plt.show()
+
+ Default representation in a Jupyter notebook:
+
+ .. jupyter-execute::
+
+ from skfda.misc.covariances import Exponential
+
+ Exponential()
+
+ """
+
+ _latex_formula = (
+ r"K(x, x') = \sigma^2 \exp\left(-\frac{||x - x'||}{l}"
+ r"\right)"
+ )
+
+ _parameters_str = [
+ ("variance", r"\sigma^2"),
+ ("length_scale", "l"),
+ ]
+
+ def __init__(
+ self,
+ *,
+ variance: float = 1,
+ length_scale: float = 1,
+ ) -> None:
+ self.variance = variance
+ self.length_scale = length_scale
+
+ def __call__(
+ self,
+ x: InputAcceptable,
+ y: InputAcceptable | None = None,
+ ) -> NDArrayFloat:
+ x, y = self._param_check_and_transform(x, y)
+
+ distance_x_y = PairwiseMetric(l2_distance)(x, y)
+ return self.variance * np.exp(-distance_x_y / (self.length_scale))
+
+ def to_sklearn(self) -> sklearn_kern.Kernel:
+ return (
+ self.variance
+ * sklearn_kern.Matern(length_scale=self.length_scale, nu=0.5)
+ )
+
+
+class WhiteNoise(Covariance):
+ r"""
+ Gaussian covariance function.
+
+ The covariance function is
+
+ .. math::
+ K(x, x')= \begin{cases}
+ \sigma^2, \quad x = x' \\
+ 0, \quad x \neq x'\\
+ \end{cases}
+
+ where :math:`\sigma^2` is the variance.
+
+ Heatmap plot of the covariance function:
+
+ .. jupyter-execute::
+
+ from skfda.misc.covariances import WhiteNoise
+ import matplotlib.pyplot as plt
+
+ WhiteNoise().heatmap(limits=(0, 1))
+ plt.show()
+
+ Example of Gaussian process trajectories using this covariance:
+
+ .. jupyter-execute::
+
+ from skfda.misc.covariances import WhiteNoise
+ from skfda.datasets import make_gaussian_process
+ import matplotlib.pyplot as plt
+
+ gp = make_gaussian_process(
+ n_samples=10, cov=WhiteNoise(), random_state=0)
+ gp.plot()
+ plt.show()
+
+ Default representation in a Jupyter notebook:
+
+ .. jupyter-execute::
+
+ from skfda.misc.covariances import WhiteNoise
+
+ WhiteNoise()
+
+ """
+
+ _latex_formula = (
+ r"K(x, x')= \begin{cases} \sigma^2, \quad x = x' \\"
+ r"0, \quad x \neq x'\\ \end{cases}"
+ )
+
+ _parameters_str = [("variance", r"\sigma^2")]
+
+ def __init__(self, *, variance: float = 1):
+ self.variance = variance
+
+ def __call__(
+ self,
+ x: InputAcceptable,
+ y: InputAcceptable | None = None,
+ ) -> NDArrayFloat:
+ if isinstance(x, FData) or isinstance(y, FData):
+ raise ValueError('Not defined for FData objects.')
+
+ x = _transform_to_2d(x)
+ return self.variance * np.eye(x.shape[0])
+
+ def to_sklearn(self) -> sklearn_kern.Kernel:
+ return sklearn_kern.WhiteKernel(noise_level=self.variance)
+
+
+class Matern(Covariance):
+ r"""
+ Matérn covariance function.
+
+ The covariance function is
+
+ .. math::
+ K(x, x') = \sigma^2 \frac{2^{1-\nu}}{\Gamma(\nu)}
+ \left( \frac{\sqrt{2\nu}|x - x'|}{l} \right)^{\nu}
+ K_{\nu}\left( \frac{\sqrt{2\nu}|x - x'|}{l} \right)
+
+ where :math:`\sigma^2` is the variance, :math:`l` is the length scale
+ and :math:`\nu` controls the smoothness of the related Gaussian process.
+ The trajectories of a Gaussian process with Matérn covariance is
+ :math:`\lceil \nu \rceil - 1` times differentiable.
+
+
+ Heatmap plot of the covariance function:
+
+ .. jupyter-execute::
+
+ from skfda.misc.covariances import Matern
+ import matplotlib.pyplot as plt
+
+ Matern().heatmap(limits=(0, 1))
+ plt.show()
+
+ Example of Gaussian process trajectories using this covariance:
+
+ .. jupyter-execute::
+
+ from skfda.misc.covariances import Matern
+ from skfda.datasets import make_gaussian_process
+ import matplotlib.pyplot as plt
+
+ gp = make_gaussian_process(
+ n_samples=10, cov=Matern(), random_state=0)
+ gp.plot()
+ plt.show()
+
+ Default representation in a Jupyter notebook:
+
+ .. jupyter-execute::
+
+ from skfda.misc.covariances import Matern
+
+ Matern()
+
+ """
+ _latex_formula = (
+ r"K(x, x') = \sigma^2 \frac{2^{1-\nu}}{\Gamma(\nu)}"
+ r"\left( \frac{\sqrt{2\nu}|x - x'|}{l} \right)^{\nu}"
+ r"K_{\nu}\left( \frac{\sqrt{2\nu}|x - x'|}{l} \right)"
+ )
+
+ _parameters_str = [
+ ("variance", r"\sigma^2"),
+ ("length_scale", "l"),
+ ("nu", r"\nu"),
+ ]
+
+ def __init__(
+ self,
+ *,
+ variance: float = 1,
+ length_scale: float = 1,
+ nu: float = 1.5,
+ ) -> None:
+ self.variance = variance
+ self.length_scale = length_scale
+ self.nu = nu
+
+ def __call__(
+ self,
+ x: InputAcceptable,
+ y: InputAcceptable | None = None,
+ ) -> NDArrayFloat:
+ x, y = self._param_check_and_transform(x, y)
+
+ distance_x_y = PairwiseMetric(l2_distance)(x, y)
+
+ p = self.nu - 0.5
+ if p.is_integer():
+ # Formula for half-integers
+ p = int(p)
+ body = np.sqrt(2 * p + 1) * distance_x_y / self.length_scale
+ exponential = np.exp(-body)
+ power_list = np.full(shape=(p,) + body.shape, fill_value=2 * body)
+ power_list = np.cumprod(power_list, axis=0)
+ power_list = np.concatenate(
+ (power_list[::-1], np.asarray([np.ones_like(body)])),
+ )
+ power_list = np.moveaxis(power_list, 0, -1)
+ numerator = np.cumprod(np.arange(p, 0, -1))
+ numerator = np.concatenate(([1], numerator))
+ denom1 = np.cumprod(np.arange(2 * p, p, -1))
+ denom1 = np.concatenate((denom1[::-1], [1]))
+ denom2 = np.cumprod(np.arange(1, p + 1))
+ denom2 = np.concatenate(([1], denom2))
+
+ sum_terms = power_list * numerator / (denom1 * denom2)
+ return ( # type: ignore[no-any-return]
+ self.variance * exponential * np.sum(sum_terms, axis=-1)
+ )
+ elif self.nu == np.inf:
+ return ( # type: ignore[no-any-return]
+ self.variance * np.exp(
+ -distance_x_y ** 2 / (2 * self.length_scale ** 2),
+ )
+ )
+ else:
+ # General formula
+ scaling = 2**(1 - self.nu) / gamma(self.nu)
+ body = np.sqrt(2 * self.nu) * distance_x_y / self.length_scale
+ power = body**self.nu
+ bessel = kv(self.nu, body)
+
+ with np.errstate(invalid='ignore'):
+ eval_cov = self.variance * scaling * power * bessel
+
+ # Values with nan are where the distance is 0
+ return np.nan_to_num( # type: ignore[no-any-return]
+ eval_cov,
+ nan=self.variance,
+ )
+
+ def to_sklearn(self) -> sklearn_kern.Kernel:
+ return (
+ self.variance
+ * sklearn_kern.Matern(length_scale=self.length_scale, nu=self.nu)
+ )
+
+
+class Empirical(Covariance):
+ r"""
+ Sample covariance function.
+
+ The sample covariance function is defined as
+ . math::
+ K(t, s) = \frac{1}{N-\text{correction}}\sum_{n=1}^N\left(x_n(t) -
+ \bar{x}(t)\right) \left(x_n(s) - \bar{x}(s)\right)
+
+ where :math:`x_n(t)` is the n-th sample and :math:`\bar{x}(t)` is the
+ mean of the samples. :math:`N` is the number of samples,
+ :math:`\text{correction}` is the degrees of freedom adjustment and is such
+ that :math:`N-\text{correction}` is the divisor used in the calculation of
+ the covariance function.
+
+ """
+
+ _latex_formula = (
+ r"K(t, s) = \frac{1}{N-\text{correction}}\sum_{n=1}^N"
+ r"(x_n(t) - \bar{x}(t))(x_n(s) - \bar{x}(s))"
+ )
+ _parameters_str = [
+ ("data", "data"),
+ ]
+
+ cov_fdata: FData
+ correction: int
+
+ @abc.abstractmethod
+ def __init__(self, data: FData) -> None:
+ if data.dim_domain != 1 or data.dim_codomain != 1:
+ raise NotImplementedError(
+ "Covariance only implemented "
+ "for univariate functions",
+ )
+
+ def __call__(
+ self,
+ x: InputAcceptable,
+ y: InputAcceptable | None = None,
+ ) -> NDArrayFloat:
+ if isinstance(x, FData) or isinstance(y, FData):
+ raise ValueError('Not defined for FData objects.')
+
+ """Evaluate the covariance function.
+
+ Args:
+ x: First array of points of evaluation.
+ y: Second array of points of evaluation.
+
+ Returns:
+ Covariance function evaluated at the grid formed by x and y.
+ """
+ return self.cov_fdata([x, y], grid=True)[0, ..., 0]
+
+
+class EmpiricalGrid(Empirical):
+ """Sample covariance function for FDataGrid."""
+
+ cov_fdata: FDataGrid
+ correction: int
+
+ def __init__(self, data: FDataGrid, correction: int = 0) -> None:
+ super().__init__(data=data)
+
+ self.correction = correction
+ self.cov_fdata = data.copy(
+ data_matrix=np.cov(
+ data.data_matrix[..., 0],
+ rowvar=False,
+ ddof=correction,
+ )[np.newaxis, ...],
+ grid_points=[
+ data.grid_points[0],
+ data.grid_points[0],
+ ],
+ domain_range=[
+ data.domain_range[0],
+ data.domain_range[0],
+ ],
+ argument_names=data.argument_names * 2,
+ sample_names=("covariance",),
+ )
+
+
+class EmpiricalBasis(Empirical):
+ """
+ Sample covariance function for FDataBasis.
+
+ In this case one may use the basis expression of the samples to
+ express the sample covariance function in the tensor product basis
+ of the original basis.
+ """
+
+ cov_fdata: FDataBasis
+ coeff_matrix: NDArrayFloat
+ correction: int
+
+ def __init__(self, data: FDataBasis, correction: int = 0) -> None:
+ super().__init__(data=data)
+
+ self.correction = correction
+ self.coeff_matrix = np.cov(
+ data.coefficients,
+ rowvar=False,
+ ddof=correction,
+ )
+ self.cov_fdata = FDataBasis(
+ basis=TensorBasis([data.basis, data.basis]),
+ coefficients=self.coeff_matrix.flatten(),
+ argument_names=data.argument_names * 2,
+ sample_names=("covariance",),
+ )
From 4abb6e5e7540048d898469b274626f7fcfe12273 Mon Sep 17 00:00:00 2001
From: E69D68U85 <72515278+E105D104U125@users.noreply.github.com>
Date: Sun, 3 Mar 2024 22:27:26 +0100
Subject: [PATCH 03/14] Input parameters now parsed using ParameterGrid.
Changed function to test multivariate data for all covariance functions. Now uses ParameterGrid.
---
skfda/tests/test_covariances.py | 237 +++++++++++---------------------
1 file changed, 79 insertions(+), 158 deletions(-)
diff --git a/skfda/tests/test_covariances.py b/skfda/tests/test_covariances.py
index f9a54c17a..dc5982ca0 100644
--- a/skfda/tests/test_covariances.py
+++ b/skfda/tests/test_covariances.py
@@ -1,27 +1,22 @@
-"""
-Tests for Covariance module.
-
-This file includes tests for multivariate data from the previous version
-of the file. It additionally incorporates tests cases for functional data
-objects.
-"""
-import pytest
-from typing import Tuple
+"""Tests for Covariance module."""
+from typing import Any, Tuple
import numpy as np
+import pytest
+from sklearn.model_selection import ParameterGrid
import skfda.misc.covariances as cov
-from skfda import FDataGrid, FDataBasis
+from skfda import FDataBasis, FDataGrid
from skfda.datasets import fetch_phoneme
from skfda.representation.basis import MonomialBasis
def _test_compare_sklearn(
- multivariate_data,
+ multivariate_data: Any,
cov: cov.Covariance,
) -> None:
cov_sklearn = cov.to_sklearn()
- cov_matrix = cov(multivariate_data, multivariate_data)
+ cov_matrix = cov(multivariate_data)
cov_sklearn_matrix = cov_sklearn(multivariate_data)
np.testing.assert_array_almost_equal(cov_matrix, cov_sklearn_matrix)
@@ -47,7 +42,7 @@ def fetch_phoneme_fixture() -> FDataGrid:
cov.Matern(),
],
)
-def covariances_fixture(request) -> cov.Covariance:
+def covariances_fixture(request: Any) -> Any:
"""Fixture for getting a covariance kernel function."""
return request.param
@@ -58,14 +53,14 @@ def covariances_fixture(request) -> cov.Covariance:
cov.WhiteNoise(),
],
)
-def covariances_raise_fixture(request) -> cov.Covariance:
+def covariances_raise_fixture(request: Any) -> Any:
"""Fixture for getting a covariance kernel that raises a ValueError."""
return request.param
@pytest.fixture
def fdatabasis_data() -> Tuple[FDataBasis, FDataBasis]:
- """Fixture for loading fdatabasis objects."""
+ """Fixture for getting fdatabasis objects."""
basis = MonomialBasis(
n_basis=2,
domain_range=(-2, 2),
@@ -90,56 +85,50 @@ def fdatabasis_data() -> Tuple[FDataBasis, FDataBasis]:
@pytest.fixture
-def multivariate_data() -> None:
- """Fixture for loading multivariate data."""
+def multivariate_data() -> np.array:
+ """Fixture for getting multivariate data."""
return np.linspace(-1, 1, 1000)[:, np.newaxis]
@pytest.fixture(
- params=[1, 2],
-)
-def variance_param(request) -> np.ndarray:
- """Fixture for loading variance parameter."""
- return request.param
-
-
-@pytest.fixture(
- params=[0, 1, 2],
-)
-def intercept_param(request) -> np.ndarray:
- """Fixture for loading intercept parameter."""
- return request.param
-
-
-@pytest.fixture(
- params=[1, 2],
-)
-def slope_param(request) -> np.ndarray:
- """Fixture for loading slope parameter."""
- return request.param
-
-
-@pytest.fixture(
- params=[1, 2, 3],
-)
-def degree_param(request) -> np.ndarray:
- """Fixture for loading degree parameter."""
- return request.param
-
-
-@pytest.fixture(
- params=[1, 2, 3],
-)
-def length_scale_param(request) -> np.ndarray:
- """Fixture for loading length scale parameter."""
- return request.param
-
-
-@pytest.fixture(
- params=[0.5, 1, 1.5, 2.5, 3.5, np.inf],
+ params=[
+ (cov.Linear,
+ {
+ "variance": [1, 2],
+ "intercept": [3, 4],
+ },
+ ),
+ (cov.Polynomial,
+ {
+ "variance": [2],
+ "intercept": [0, 2],
+ "slope": [1, 2],
+ "degree": [1, 2, 3],
+ },
+ ),
+ (cov.Exponential,
+ {
+ "variance": [1, 2],
+ "length_scale": [0.5, 1, 2],
+ },
+ ),
+ (cov.Gaussian,
+ {
+ "variance": [1, 2],
+ "length_scale": [0.5, 1, 2],
+ },
+ ),
+ (cov.Matern,
+ {
+ "variance": [2],
+ "length_scale": [0.5],
+ "nu": [0.5, 1, 1.5, 2.5, 3.5, np.inf],
+ },
+ ),
+ ],
)
-def nu_param(request) -> np.ndarray:
- """Fixture for loading nu parameter."""
+def covariance_and_params(request: Any) -> Any:
+ """Fixture to load the covariance functions."""
return request.param
@@ -148,7 +137,10 @@ def nu_param(request) -> np.ndarray:
##############################################################################
-def test_covariances(fetch_phoneme_fixture, covariances_fixture) -> None:
+def test_covariances(
+ fetch_phoneme_fixture: Any,
+ covariances_fixture: Any,
+) -> None:
"""Check that invalid parameters in fit raise exception."""
fd = fetch_phoneme_fixture
cov_kernel = covariances_fixture
@@ -164,7 +156,10 @@ def test_covariances(fetch_phoneme_fixture, covariances_fixture) -> None:
)
-def test_raises(fetch_phoneme_fixture, covariances_raise_fixture):
+def test_raises(
+ fetch_phoneme_fixture: Any,
+ covariances_raise_fixture: Any,
+) -> None:
"""Check that it raises a ValueError exception."""
fd = fetch_phoneme_fixture
cov_kernel = covariances_raise_fixture
@@ -176,7 +171,9 @@ def test_raises(fetch_phoneme_fixture, covariances_raise_fixture):
)
-def test_fdatabasis_example_linear(fdatabasis_data):
+def test_fdatabasis_example_linear(
+ fdatabasis_data: Any,
+) -> None:
"""Check a precalculated example for Linear covariance kernel."""
fd1, fd2 = fdatabasis_data
res1 = cov.Linear(variance=1 / 2, intercept=3)(fd1, fd2)
@@ -188,7 +185,9 @@ def test_fdatabasis_example_linear(fdatabasis_data):
)
-def test_fdatabasis_example_polynomial(fdatabasis_data):
+def test_fdatabasis_example_polynomial(
+ fdatabasis_data: Any,
+) -> None:
"""Check a precalculated example for Polynomial covariance kernel."""
fd1, fd2 = fdatabasis_data
res1 = cov.Polynomial(
@@ -205,7 +204,9 @@ def test_fdatabasis_example_polynomial(fdatabasis_data):
)
-def test_fdatabasis_example_gaussian(fdatabasis_data):
+def test_fdatabasis_example_gaussian(
+ fdatabasis_data: Any,
+) -> None:
"""Check a precalculated example for Gaussian covariance kernel."""
fd1, fd2 = fdatabasis_data
res1 = cov.Gaussian(variance=3, length_scale=2)(fd1, fd2)
@@ -220,7 +221,9 @@ def test_fdatabasis_example_gaussian(fdatabasis_data):
)
-def test_fdatabasis_example_exponential(fdatabasis_data):
+def test_fdatabasis_example_exponential(
+ fdatabasis_data: Any,
+) -> None:
"""Check a precalculated example for Exponential covariance kernel."""
fd1, fd2 = fdatabasis_data
res1 = cov.Exponential(variance=4, length_scale=5)(fd1, fd2)
@@ -235,7 +238,9 @@ def test_fdatabasis_example_exponential(fdatabasis_data):
)
-def test_fdatabasis_example_matern(fdatabasis_data):
+def test_fdatabasis_example_matern(
+ fdatabasis_data: Any,
+) -> None:
"""Check a precalculated example for Matern covariance kernel."""
fd1, fd2 = fdatabasis_data
res1 = cov.Matern(variance=2, length_scale=3, nu=2)(fd1, fd2)
@@ -250,95 +255,11 @@ def test_fdatabasis_example_matern(fdatabasis_data):
)
-def test_multivariate_linear(
- multivariate_data,
- variance_param,
- intercept_param,
+def test_multivariate_covariance_kernel(
+ multivariate_data: Any,
+ covariance_and_params: Any,
) -> None:
- """Test linear covariance kernel against scikit learn's kernel."""
- _test_compare_sklearn(
- multivariate_data,
- cov.Linear(
- variance=variance_param,
- intercept=intercept_param,
- ),
- )
-
-
-def test_multivariate_polynomial(
- multivariate_data,
- variance_param,
- intercept_param,
- slope_param,
- degree_param,
-) -> None:
- """Test polynomial covariance kernel against scikit learn's kernel."""
- _test_compare_sklearn(
- multivariate_data,
- cov.Polynomial(
- variance=variance_param,
- intercept=intercept_param,
- slope=slope_param,
- degree=degree_param,
- ),
- )
-
-
-def test_multivariate_gaussian(
- multivariate_data,
- variance_param,
- length_scale_param,
-) -> None:
- """Test gaussian covariance kernel against scikit learn's kernel."""
- _test_compare_sklearn(
- multivariate_data,
- cov.Gaussian(
- variance=variance_param,
- length_scale=length_scale_param,
- ),
- )
-
-
-def test_multivariate_exponential(
- multivariate_data,
- variance_param,
- length_scale_param,
-) -> None:
- """Test exponential covariance kernel against scikit learn's kernel."""
- _test_compare_sklearn(
- multivariate_data,
- cov.Exponential(
- variance=variance_param,
- length_scale=length_scale_param,
- ),
- )
-
-
-def test_multivariate_matern(
- multivariate_data,
- variance_param,
- length_scale_param,
- nu_param,
-) -> None:
- """Test matern covariance kernel against scikit learn's kernel."""
- _test_compare_sklearn(
- multivariate_data,
- cov.Matern(
- variance=variance_param,
- length_scale=length_scale_param,
- nu=nu_param,
- ),
- )
-
-
-def test_multivariate_white_noise(
- multivariate_data,
- variance_param,
-) -> None:
- """Test white noise covariance kernel against scikit learn's kernel."""
- _test_compare_sklearn(
- multivariate_data,
- cov.WhiteNoise(
- variance=variance_param,
- ),
- )
+ """Test general covariance kernel against scikit-learn's kernel."""
+ cov_kernel, param_dict = covariance_and_params
+ for input_params in list(ParameterGrid(param_dict)):
+ _test_compare_sklearn(multivariate_data, cov_kernel(**input_params))
From 3a54b9f275775266611979d590aa6c0a99d387ad Mon Sep 17 00:00:00 2001
From: E69D68U85 <72515278+E105D104U125@users.noreply.github.com>
Date: Mon, 25 Mar 2024 19:56:52 +0100
Subject: [PATCH 04/14] Changed EOL to LF.
---
skfda/misc/covariances.py | 1882 ++++++++++++++++++-------------------
1 file changed, 941 insertions(+), 941 deletions(-)
diff --git a/skfda/misc/covariances.py b/skfda/misc/covariances.py
index 9bce97d74..b441892ff 100644
--- a/skfda/misc/covariances.py
+++ b/skfda/misc/covariances.py
@@ -1,941 +1,941 @@
-from __future__ import annotations
-
-import abc
-from typing import Callable, Sequence, Tuple, Union
-
-import matplotlib.pyplot as plt
-import numpy as np
-import sklearn.gaussian_process.kernels as sklearn_kern
-from matplotlib.figure import Figure
-from scipy.special import gamma, kv
-
-from ..misc import inner_product_matrix
-from ..misc.metrics import PairwiseMetric, LpNorm, l2_distance
-from ..representation import FData, FDataBasis, FDataGrid
-from ..representation.basis import TensorBasis
-from ..typing._numpy import ArrayLike, NDArrayFloat
-
-
-def _squared_norms(x: NDArrayFloat, y: NDArrayFloat) -> NDArrayFloat:
- return ( # type: ignore[no-any-return]
- (x[:, np.newaxis, :] - y[np.newaxis, :, :]) ** 2
- ).sum(2)
-
-
-CovarianceLike = Union[
- float,
- NDArrayFloat,
- Callable[[ArrayLike, ArrayLike], NDArrayFloat],
-]
-
-InputAcceptable = Union[
- np.ndarray,
- FData,
-]
-
-
-def _transform_to_2d(t: ArrayLike) -> NDArrayFloat:
- """Transform 1d arrays in column vectors."""
- t = np.asfarray(t)
-
- dim = len(t.shape)
- assert dim <= 2
-
- if dim < 2:
- t = np.atleast_2d(t).T
-
- return t
-
-
-def _execute_covariance(
- covariance: CovarianceLike,
- x: ArrayLike,
- y: ArrayLike,
-) -> NDArrayFloat:
- """Execute a covariance function."""
- x = _transform_to_2d(x)
- y = _transform_to_2d(y)
-
- if isinstance(covariance, (int, float)):
- return np.array(covariance)
- else:
- if callable(covariance):
- result = covariance(x, y)
- elif isinstance(covariance, np.ndarray):
- result = covariance
- else:
- # GPy kernel
- result = covariance.K(x, y)
-
- assert result.shape[0] == len(x)
- assert result.shape[1] == len(y)
- return result
-
-
-class Covariance(abc.ABC):
- """Abstract class for covariance functions."""
-
- _parameters_str: Sequence[Tuple[str, str]]
- _latex_formula: str
-
- @abc.abstractmethod
- def __call__(
- self,
- x: InputAcceptable,
- y: InputAcceptable | None = None,
- ) -> NDArrayFloat:
- pass
-
- def _param_check_and_transform(
- self,
- x: InputAcceptable,
- y: InputAcceptable | None = None,
- ) -> Tuple[np.ndarray | FData, np.ndarray | FData]:
- # Param check
- if y is None:
- y = x
-
- if type(x) is not type(y): # noqa: WPS516
- raise ValueError(
- 'Cannot operate objects x and y from different classes',
- f'({type(x)}, {type(y)}).',
- )
-
- if isinstance(x, np.ndarray) and isinstance(y, np.ndarray):
- x, y = np.array(x), np.array(y)
- if len(x.shape) < 2:
- x = np.atleast_2d(x)
- if len(y.shape) < 2:
- y = np.atleast_2d(y)
-
- return x, y
-
- def heatmap(self, limits: Tuple[float, float] = (-1, 1)) -> Figure:
- """Return a heatmap plot of the covariance function."""
- from ..exploratory.visualization._utils import _create_figure
-
- x = np.linspace(*limits, 1000)
-
- cov_matrix = self(x, x)
-
- fig = _create_figure()
- ax = fig.add_subplot(1, 1, 1)
- ax.imshow(
- cov_matrix,
- extent=[limits[0], limits[1], limits[1], limits[0]],
- )
- ax.set_title(f"Covariance function in [{limits[0]}, {limits[1]}]")
-
- return fig
-
- def _sample_trajectories_plot(self) -> Figure:
- from ..datasets import make_gaussian_process
-
- fd = make_gaussian_process(
- start=-1,
- n_samples=10,
- cov=self,
- random_state=0,
- )
- fig = fd.plot()
- fig.axes[0].set_title("Sample trajectories")
- return fig
-
- def __repr__(self) -> str:
-
- params_str = ', '.join(
- f'{n}={getattr(self, n)}' for n, _ in self._parameters_str
- )
-
- return (
- f"{self.__module__}.{type(self).__qualname__}("
- f"{params_str}"
- f")"
- )
-
- def _latex_content(self) -> str:
- params_str = ''.join(
- fr'{l} &= {getattr(self, n)} \\' for n, l in self._parameters_str
- )
-
- return (
- fr"{self._latex_formula} \\"
- r"\text{where:}"
- r"\begin{aligned}"
- fr"\qquad{params_str}"
- r"\end{aligned}"
- )
-
- def _repr_latex_(self) -> str:
- return fr"\(\displaystyle {self._latex_content()}\)"
-
- def _repr_html_(self) -> str:
- from ..exploratory.visualization._utils import _figure_to_svg
-
- fig = self.heatmap()
- heatmap = _figure_to_svg(fig)
- plt.close(fig)
-
- fig = self._sample_trajectories_plot()
- sample_trajectories = _figure_to_svg(fig)
- plt.close(fig)
-
- row_style = ''
-
- def column_style(percent: float, margin_top: str = "0") -> str:
- return (
- f'style="display: inline-block; '
- f'margin:0; '
- f'margin-top: {margin_top}; '
- f'width:{percent}%; '
- f'height:auto;'
- f'vertical-align: middle"'
- )
-
- return fr"""
-
-
- \[{self._latex_content()}\]
-
-
-
-
- {sample_trajectories}
-
-
- {heatmap}
-
-
- """
-
- def to_sklearn(self) -> sklearn_kern.Kernel:
- """Convert it to a sklearn kernel, if there is one."""
- raise NotImplementedError(
- f"{type(self).__name__} covariance not "
- f"implemented in scikit-learn",
- )
-
-
-class Brownian(Covariance):
- r"""
- Brownian covariance function.
-
- The covariance function is
-
- .. math::
- K(x, x') = \sigma^2 \frac{|x - \mathcal{O}| + |x' - \mathcal{O}|
- - |x - x'|}{2}
-
- where :math:`\sigma^2` is the variance at distance 1 from
- :math:`\mathcal{O}` and :math:`\mathcal{O}` is the origin point.
- If :math:`\mathcal{O} = 0` (the default) and we only
- consider positive values, the formula can be simplified as
-
- .. math::
- K(x, y) = \sigma^2 \min(x, y).
-
- Heatmap plot of the covariance function:
-
- .. jupyter-execute::
-
- from skfda.misc.covariances import Brownian
- import matplotlib.pyplot as plt
-
- Brownian().heatmap(limits=(0, 1))
- plt.show()
-
- Example of Gaussian process trajectories using this covariance:
-
- .. jupyter-execute::
-
- from skfda.misc.covariances import Brownian
- from skfda.datasets import make_gaussian_process
- import matplotlib.pyplot as plt
-
- gp = make_gaussian_process(
- n_samples=10, cov=Brownian(), random_state=0)
- gp.plot()
- plt.show()
-
- Default representation in a Jupyter notebook:
-
- .. jupyter-execute::
-
- from skfda.misc.covariances import Brownian
-
- Brownian()
-
- """
- _latex_formula = (
- r"K(x, x') = \sigma^2 \frac{|x - \mathcal{O}| + "
- r"|x' - \mathcal{O}| - |x - x'|}{2}"
- )
-
- _parameters_str = [
- ("variance", r"\sigma^2"),
- ("origin", r"\mathcal{O}"),
- ]
-
- def __init__(self, *, variance: float = 1, origin: float = 0) -> None:
- self.variance = variance
- self.origin = origin
-
- def __call__(
- self,
- x: InputAcceptable | FData,
- y: InputAcceptable | None = None,
- ) -> NDArrayFloat:
- if isinstance(x, FData) or isinstance(y, FData):
- raise ValueError('Not defined for FData objects.')
-
- x = _transform_to_2d(x) - self.origin
- y = _transform_to_2d(y) - self.origin
-
- sum_norms = np.add.outer(
- np.linalg.norm(x, axis=-1),
- np.linalg.norm(y, axis=-1),
- )
- norm_sub = np.linalg.norm(
- x[:, np.newaxis, :] - y[np.newaxis, :, :],
- axis=-1,
- )
-
- return ( # type: ignore[no-any-return]
- self.variance * (sum_norms - norm_sub) / 2
- )
-
-
-class Linear(Covariance):
- r"""
- Linear covariance function.
-
- The covariance function is
-
- .. math::
- K(x, x') = \sigma^2 (x^T x' + c)
-
- where :math:`\sigma^2` is the scale of the variance and
- :math:`c` is the intercept.
-
- Heatmap plot of the covariance function:
-
- .. jupyter-execute::
-
- from skfda.misc.covariances import Linear
- import matplotlib.pyplot as plt
-
- Linear().heatmap(limits=(0, 1))
- plt.show()
-
- Example of Gaussian process trajectories using this covariance:
-
- .. jupyter-execute::
-
- from skfda.misc.covariances import Linear
- from skfda.datasets import make_gaussian_process
- import matplotlib.pyplot as plt
-
- gp = make_gaussian_process(
- n_samples=10, cov=Linear(), random_state=0)
- gp.plot()
- plt.show()
-
- Default representation in a Jupyter notebook:
-
- .. jupyter-execute::
-
- from skfda.misc.covariances import Linear
-
- Linear()
-
- """
-
- _latex_formula = r"K(x, x') = \sigma^2 (x^T x' + c)"
-
- _parameters_str = [
- ("variance", r"\sigma^2"),
- ("intercept", "c"),
- ]
-
- def __init__(self, *, variance: float = 1, intercept: float = 0) -> None:
- self.variance = variance
- self.intercept = intercept
-
- def __call__(
- self,
- x: InputAcceptable,
- y: InputAcceptable | None = None,
- ) -> NDArrayFloat:
- x, y = self._param_check_and_transform(x, y)
-
- x_y = inner_product_matrix(x, y)
- return self.variance * (x_y + self.intercept)
-
- def to_sklearn(self) -> sklearn_kern.Kernel:
- return (
- self.variance
- * (sklearn_kern.DotProduct(0) + self.intercept)
- )
-
-
-class Polynomial(Covariance):
- r"""
- Polynomial covariance function.
-
- The covariance function is
-
- .. math::
- K(x, x') = \sigma^2 (\alpha x^T x' + c)^d
-
- where :math:`\sigma^2` is the scale of the variance,
- :math:`\alpha` is the slope, :math:`d` the degree of the
- polynomial and :math:`c` is the intercept.
-
- Heatmap plot of the covariance function:
-
- .. jupyter-execute::
-
- from skfda.misc.covariances import Polynomial
- import matplotlib.pyplot as plt
-
- Polynomial().heatmap(limits=(0, 1))
- plt.show()
-
- Example of Gaussian process trajectories using this covariance:
-
- .. jupyter-execute::
-
- from skfda.misc.covariances import Polynomial
- from skfda.datasets import make_gaussian_process
- import matplotlib.pyplot as plt
-
- gp = make_gaussian_process(
- n_samples=10, cov=Polynomial(), random_state=0)
- gp.plot()
- plt.show()
-
- Default representation in a Jupyter notebook:
-
- .. jupyter-execute::
-
- from skfda.misc.covariances import Polynomial
-
- Polynomial()
-
- """
-
- _latex_formula = r"K(x, x') = \sigma^2 (\alpha x^T x' + c)^d"
-
- _parameters_str = [
- ("variance", r"\sigma^2"),
- ("intercept", "c"),
- ("slope", r"\alpha"),
- ("degree", "d"),
- ]
-
- def __init__(
- self,
- *,
- variance: float = 1,
- intercept: float = 0,
- slope: float = 1,
- degree: float = 2,
- ) -> None:
- self.variance = variance
- self.intercept = intercept
- self.slope = slope
- self.degree = degree
-
- def __call__(
- self,
- x: InputAcceptable,
- y: InputAcceptable | None = None,
- ) -> NDArrayFloat:
- x, y = self._param_check_and_transform(x, y)
-
- x_y = inner_product_matrix(x, y)
- return (
- self.variance
- * (self.slope * x_y + self.intercept) ** self.degree
- )
-
- def to_sklearn(self) -> sklearn_kern.Kernel:
- return (
- self.variance
- * (self.slope * sklearn_kern.DotProduct(0) + self.intercept)
- ** self.degree
- )
-
-
-class Gaussian(Covariance):
- r"""
- Gaussian covariance function.
-
- The covariance function is
-
- .. math::
- K(x, x') = \sigma^2 \exp\left(-\frac{||x - x'||^2}{2l^2}\right)
-
- where :math:`\sigma^2` is the variance and :math:`l` is the length scale.
-
- Heatmap plot of the covariance function:
-
- .. jupyter-execute::
-
- from skfda.misc.covariances import Gaussian
- import matplotlib.pyplot as plt
-
- Gaussian().heatmap(limits=(0, 1))
- plt.show()
-
- Example of Gaussian process trajectories using this covariance:
-
- .. jupyter-execute::
-
- from skfda.misc.covariances import Gaussian
- from skfda.datasets import make_gaussian_process
- import matplotlib.pyplot as plt
-
- gp = make_gaussian_process(
- n_samples=10, cov=Gaussian(), random_state=0)
- gp.plot()
- plt.show()
-
- Default representation in a Jupyter notebook:
-
- .. jupyter-execute::
-
- from skfda.misc.covariances import Gaussian
-
- Gaussian()
-
- """
-
- _latex_formula = (
- r"K(x, x') = \sigma^2 \exp\left(-\frac{\|x - x'\|^2}{2l^2}"
- r"\right)"
- )
-
- _parameters_str = [
- ("variance", r"\sigma^2"),
- ("length_scale", "l"),
- ]
-
- def __init__(self, *, variance: float = 1, length_scale: float = 1):
- self.variance = variance
- self.length_scale = length_scale
-
- def __call__(
- self,
- x: InputAcceptable,
- y: InputAcceptable | None = None,
- ) -> NDArrayFloat:
- x, y = self._param_check_and_transform(x, y)
-
- distance_x_y = PairwiseMetric(l2_distance)(x, y)
- return self.variance * np.exp(-distance_x_y ** 2 / (2 * self.length_scale ** 2))
-
- def to_sklearn(self) -> sklearn_kern.Kernel:
- return (
- self.variance * sklearn_kern.RBF(length_scale=self.length_scale)
- )
-
-
-class Exponential(Covariance):
- r"""
- Exponential covariance function.
-
- The covariance function is
-
- .. math::
- K(x, x') = \sigma^2 \exp\left(-\frac{\|x - x'\|}{l}\right)
-
- where :math:`\sigma^2` is the variance and :math:`l` is the length scale.
-
- Heatmap plot of the covariance function:
-
- .. jupyter-execute::
-
- from skfda.misc.covariances import Exponential
- import matplotlib.pyplot as plt
-
- Exponential().heatmap(limits=(0, 1))
- plt.show()
-
- Example of Gaussian process trajectories using this covariance:
-
- .. jupyter-execute::
-
- from skfda.misc.covariances import Exponential
- from skfda.datasets import make_gaussian_process
- import matplotlib.pyplot as plt
-
- gp = make_gaussian_process(
- n_samples=10, cov=Exponential(), random_state=0)
- gp.plot()
- plt.show()
-
- Default representation in a Jupyter notebook:
-
- .. jupyter-execute::
-
- from skfda.misc.covariances import Exponential
-
- Exponential()
-
- """
-
- _latex_formula = (
- r"K(x, x') = \sigma^2 \exp\left(-\frac{||x - x'||}{l}"
- r"\right)"
- )
-
- _parameters_str = [
- ("variance", r"\sigma^2"),
- ("length_scale", "l"),
- ]
-
- def __init__(
- self,
- *,
- variance: float = 1,
- length_scale: float = 1,
- ) -> None:
- self.variance = variance
- self.length_scale = length_scale
-
- def __call__(
- self,
- x: InputAcceptable,
- y: InputAcceptable | None = None,
- ) -> NDArrayFloat:
- x, y = self._param_check_and_transform(x, y)
-
- distance_x_y = PairwiseMetric(l2_distance)(x, y)
- return self.variance * np.exp(-distance_x_y / (self.length_scale))
-
- def to_sklearn(self) -> sklearn_kern.Kernel:
- return (
- self.variance
- * sklearn_kern.Matern(length_scale=self.length_scale, nu=0.5)
- )
-
-
-class WhiteNoise(Covariance):
- r"""
- Gaussian covariance function.
-
- The covariance function is
-
- .. math::
- K(x, x')= \begin{cases}
- \sigma^2, \quad x = x' \\
- 0, \quad x \neq x'\\
- \end{cases}
-
- where :math:`\sigma^2` is the variance.
-
- Heatmap plot of the covariance function:
-
- .. jupyter-execute::
-
- from skfda.misc.covariances import WhiteNoise
- import matplotlib.pyplot as plt
-
- WhiteNoise().heatmap(limits=(0, 1))
- plt.show()
-
- Example of Gaussian process trajectories using this covariance:
-
- .. jupyter-execute::
-
- from skfda.misc.covariances import WhiteNoise
- from skfda.datasets import make_gaussian_process
- import matplotlib.pyplot as plt
-
- gp = make_gaussian_process(
- n_samples=10, cov=WhiteNoise(), random_state=0)
- gp.plot()
- plt.show()
-
- Default representation in a Jupyter notebook:
-
- .. jupyter-execute::
-
- from skfda.misc.covariances import WhiteNoise
-
- WhiteNoise()
-
- """
-
- _latex_formula = (
- r"K(x, x')= \begin{cases} \sigma^2, \quad x = x' \\"
- r"0, \quad x \neq x'\\ \end{cases}"
- )
-
- _parameters_str = [("variance", r"\sigma^2")]
-
- def __init__(self, *, variance: float = 1):
- self.variance = variance
-
- def __call__(
- self,
- x: InputAcceptable,
- y: InputAcceptable | None = None,
- ) -> NDArrayFloat:
- if isinstance(x, FData) or isinstance(y, FData):
- raise ValueError('Not defined for FData objects.')
-
- x = _transform_to_2d(x)
- return self.variance * np.eye(x.shape[0])
-
- def to_sklearn(self) -> sklearn_kern.Kernel:
- return sklearn_kern.WhiteKernel(noise_level=self.variance)
-
-
-class Matern(Covariance):
- r"""
- Matérn covariance function.
-
- The covariance function is
-
- .. math::
- K(x, x') = \sigma^2 \frac{2^{1-\nu}}{\Gamma(\nu)}
- \left( \frac{\sqrt{2\nu}|x - x'|}{l} \right)^{\nu}
- K_{\nu}\left( \frac{\sqrt{2\nu}|x - x'|}{l} \right)
-
- where :math:`\sigma^2` is the variance, :math:`l` is the length scale
- and :math:`\nu` controls the smoothness of the related Gaussian process.
- The trajectories of a Gaussian process with Matérn covariance is
- :math:`\lceil \nu \rceil - 1` times differentiable.
-
-
- Heatmap plot of the covariance function:
-
- .. jupyter-execute::
-
- from skfda.misc.covariances import Matern
- import matplotlib.pyplot as plt
-
- Matern().heatmap(limits=(0, 1))
- plt.show()
-
- Example of Gaussian process trajectories using this covariance:
-
- .. jupyter-execute::
-
- from skfda.misc.covariances import Matern
- from skfda.datasets import make_gaussian_process
- import matplotlib.pyplot as plt
-
- gp = make_gaussian_process(
- n_samples=10, cov=Matern(), random_state=0)
- gp.plot()
- plt.show()
-
- Default representation in a Jupyter notebook:
-
- .. jupyter-execute::
-
- from skfda.misc.covariances import Matern
-
- Matern()
-
- """
- _latex_formula = (
- r"K(x, x') = \sigma^2 \frac{2^{1-\nu}}{\Gamma(\nu)}"
- r"\left( \frac{\sqrt{2\nu}|x - x'|}{l} \right)^{\nu}"
- r"K_{\nu}\left( \frac{\sqrt{2\nu}|x - x'|}{l} \right)"
- )
-
- _parameters_str = [
- ("variance", r"\sigma^2"),
- ("length_scale", "l"),
- ("nu", r"\nu"),
- ]
-
- def __init__(
- self,
- *,
- variance: float = 1,
- length_scale: float = 1,
- nu: float = 1.5,
- ) -> None:
- self.variance = variance
- self.length_scale = length_scale
- self.nu = nu
-
- def __call__(
- self,
- x: InputAcceptable,
- y: InputAcceptable | None = None,
- ) -> NDArrayFloat:
- x, y = self._param_check_and_transform(x, y)
-
- distance_x_y = PairwiseMetric(l2_distance)(x, y)
-
- p = self.nu - 0.5
- if p.is_integer():
- # Formula for half-integers
- p = int(p)
- body = np.sqrt(2 * p + 1) * distance_x_y / self.length_scale
- exponential = np.exp(-body)
- power_list = np.full(shape=(p,) + body.shape, fill_value=2 * body)
- power_list = np.cumprod(power_list, axis=0)
- power_list = np.concatenate(
- (power_list[::-1], np.asarray([np.ones_like(body)])),
- )
- power_list = np.moveaxis(power_list, 0, -1)
- numerator = np.cumprod(np.arange(p, 0, -1))
- numerator = np.concatenate(([1], numerator))
- denom1 = np.cumprod(np.arange(2 * p, p, -1))
- denom1 = np.concatenate((denom1[::-1], [1]))
- denom2 = np.cumprod(np.arange(1, p + 1))
- denom2 = np.concatenate(([1], denom2))
-
- sum_terms = power_list * numerator / (denom1 * denom2)
- return ( # type: ignore[no-any-return]
- self.variance * exponential * np.sum(sum_terms, axis=-1)
- )
- elif self.nu == np.inf:
- return ( # type: ignore[no-any-return]
- self.variance * np.exp(
- -distance_x_y ** 2 / (2 * self.length_scale ** 2),
- )
- )
- else:
- # General formula
- scaling = 2**(1 - self.nu) / gamma(self.nu)
- body = np.sqrt(2 * self.nu) * distance_x_y / self.length_scale
- power = body**self.nu
- bessel = kv(self.nu, body)
-
- with np.errstate(invalid='ignore'):
- eval_cov = self.variance * scaling * power * bessel
-
- # Values with nan are where the distance is 0
- return np.nan_to_num( # type: ignore[no-any-return]
- eval_cov,
- nan=self.variance,
- )
-
- def to_sklearn(self) -> sklearn_kern.Kernel:
- return (
- self.variance
- * sklearn_kern.Matern(length_scale=self.length_scale, nu=self.nu)
- )
-
-
-class Empirical(Covariance):
- r"""
- Sample covariance function.
-
- The sample covariance function is defined as
- . math::
- K(t, s) = \frac{1}{N-\text{correction}}\sum_{n=1}^N\left(x_n(t) -
- \bar{x}(t)\right) \left(x_n(s) - \bar{x}(s)\right)
-
- where :math:`x_n(t)` is the n-th sample and :math:`\bar{x}(t)` is the
- mean of the samples. :math:`N` is the number of samples,
- :math:`\text{correction}` is the degrees of freedom adjustment and is such
- that :math:`N-\text{correction}` is the divisor used in the calculation of
- the covariance function.
-
- """
-
- _latex_formula = (
- r"K(t, s) = \frac{1}{N-\text{correction}}\sum_{n=1}^N"
- r"(x_n(t) - \bar{x}(t))(x_n(s) - \bar{x}(s))"
- )
- _parameters_str = [
- ("data", "data"),
- ]
-
- cov_fdata: FData
- correction: int
-
- @abc.abstractmethod
- def __init__(self, data: FData) -> None:
- if data.dim_domain != 1 or data.dim_codomain != 1:
- raise NotImplementedError(
- "Covariance only implemented "
- "for univariate functions",
- )
-
- def __call__(
- self,
- x: InputAcceptable,
- y: InputAcceptable | None = None,
- ) -> NDArrayFloat:
- if isinstance(x, FData) or isinstance(y, FData):
- raise ValueError('Not defined for FData objects.')
-
- """Evaluate the covariance function.
-
- Args:
- x: First array of points of evaluation.
- y: Second array of points of evaluation.
-
- Returns:
- Covariance function evaluated at the grid formed by x and y.
- """
- return self.cov_fdata([x, y], grid=True)[0, ..., 0]
-
-
-class EmpiricalGrid(Empirical):
- """Sample covariance function for FDataGrid."""
-
- cov_fdata: FDataGrid
- correction: int
-
- def __init__(self, data: FDataGrid, correction: int = 0) -> None:
- super().__init__(data=data)
-
- self.correction = correction
- self.cov_fdata = data.copy(
- data_matrix=np.cov(
- data.data_matrix[..., 0],
- rowvar=False,
- ddof=correction,
- )[np.newaxis, ...],
- grid_points=[
- data.grid_points[0],
- data.grid_points[0],
- ],
- domain_range=[
- data.domain_range[0],
- data.domain_range[0],
- ],
- argument_names=data.argument_names * 2,
- sample_names=("covariance",),
- )
-
-
-class EmpiricalBasis(Empirical):
- """
- Sample covariance function for FDataBasis.
-
- In this case one may use the basis expression of the samples to
- express the sample covariance function in the tensor product basis
- of the original basis.
- """
-
- cov_fdata: FDataBasis
- coeff_matrix: NDArrayFloat
- correction: int
-
- def __init__(self, data: FDataBasis, correction: int = 0) -> None:
- super().__init__(data=data)
-
- self.correction = correction
- self.coeff_matrix = np.cov(
- data.coefficients,
- rowvar=False,
- ddof=correction,
- )
- self.cov_fdata = FDataBasis(
- basis=TensorBasis([data.basis, data.basis]),
- coefficients=self.coeff_matrix.flatten(),
- argument_names=data.argument_names * 2,
- sample_names=("covariance",),
- )
+from __future__ import annotations
+
+import abc
+from typing import Callable, Sequence, Tuple, Union
+
+import matplotlib.pyplot as plt
+import numpy as np
+import sklearn.gaussian_process.kernels as sklearn_kern
+from matplotlib.figure import Figure
+from scipy.special import gamma, kv
+
+from ..misc import inner_product_matrix
+from ..misc.metrics import PairwiseMetric, LpNorm, l2_distance
+from ..representation import FData, FDataBasis, FDataGrid
+from ..representation.basis import TensorBasis
+from ..typing._numpy import ArrayLike, NDArrayFloat
+
+
+def _squared_norms(x: NDArrayFloat, y: NDArrayFloat) -> NDArrayFloat:
+ return ( # type: ignore[no-any-return]
+ (x[:, np.newaxis, :] - y[np.newaxis, :, :]) ** 2
+ ).sum(2)
+
+
+CovarianceLike = Union[
+ float,
+ NDArrayFloat,
+ Callable[[ArrayLike, ArrayLike], NDArrayFloat],
+]
+
+InputAcceptable = Union[
+ np.ndarray,
+ FData,
+]
+
+
+def _transform_to_2d(t: ArrayLike) -> NDArrayFloat:
+ """Transform 1d arrays in column vectors."""
+ t = np.asfarray(t)
+
+ dim = len(t.shape)
+ assert dim <= 2
+
+ if dim < 2:
+ t = np.atleast_2d(t).T
+
+ return t
+
+
+def _execute_covariance(
+ covariance: CovarianceLike,
+ x: ArrayLike,
+ y: ArrayLike,
+) -> NDArrayFloat:
+ """Execute a covariance function."""
+ x = _transform_to_2d(x)
+ y = _transform_to_2d(y)
+
+ if isinstance(covariance, (int, float)):
+ return np.array(covariance)
+ else:
+ if callable(covariance):
+ result = covariance(x, y)
+ elif isinstance(covariance, np.ndarray):
+ result = covariance
+ else:
+ # GPy kernel
+ result = covariance.K(x, y)
+
+ assert result.shape[0] == len(x)
+ assert result.shape[1] == len(y)
+ return result
+
+
+class Covariance(abc.ABC):
+ """Abstract class for covariance functions."""
+
+ _parameters_str: Sequence[Tuple[str, str]]
+ _latex_formula: str
+
+ @abc.abstractmethod
+ def __call__(
+ self,
+ x: InputAcceptable,
+ y: InputAcceptable | None = None,
+ ) -> NDArrayFloat:
+ pass
+
+ def _param_check_and_transform(
+ self,
+ x: InputAcceptable,
+ y: InputAcceptable | None = None,
+ ) -> Tuple[np.ndarray | FData, np.ndarray | FData]:
+ # Param check
+ if y is None:
+ y = x
+
+ if type(x) is not type(y): # noqa: WPS516
+ raise ValueError(
+ 'Cannot operate objects x and y from different classes',
+ f'({type(x)}, {type(y)}).',
+ )
+
+ if isinstance(x, np.ndarray) and isinstance(y, np.ndarray):
+ x, y = np.array(x), np.array(y)
+ if len(x.shape) < 2:
+ x = np.atleast_2d(x)
+ if len(y.shape) < 2:
+ y = np.atleast_2d(y)
+
+ return x, y
+
+ def heatmap(self, limits: Tuple[float, float] = (-1, 1)) -> Figure:
+ """Return a heatmap plot of the covariance function."""
+ from ..exploratory.visualization._utils import _create_figure
+
+ x = np.linspace(*limits, 1000)
+
+ cov_matrix = self(x, x)
+
+ fig = _create_figure()
+ ax = fig.add_subplot(1, 1, 1)
+ ax.imshow(
+ cov_matrix,
+ extent=[limits[0], limits[1], limits[1], limits[0]],
+ )
+ ax.set_title(f"Covariance function in [{limits[0]}, {limits[1]}]")
+
+ return fig
+
+ def _sample_trajectories_plot(self) -> Figure:
+ from ..datasets import make_gaussian_process
+
+ fd = make_gaussian_process(
+ start=-1,
+ n_samples=10,
+ cov=self,
+ random_state=0,
+ )
+ fig = fd.plot()
+ fig.axes[0].set_title("Sample trajectories")
+ return fig
+
+ def __repr__(self) -> str:
+
+ params_str = ', '.join(
+ f'{n}={getattr(self, n)}' for n, _ in self._parameters_str
+ )
+
+ return (
+ f"{self.__module__}.{type(self).__qualname__}("
+ f"{params_str}"
+ f")"
+ )
+
+ def _latex_content(self) -> str:
+ params_str = ''.join(
+ fr'{l} &= {getattr(self, n)} \\' for n, l in self._parameters_str
+ )
+
+ return (
+ fr"{self._latex_formula} \\"
+ r"\text{where:}"
+ r"\begin{aligned}"
+ fr"\qquad{params_str}"
+ r"\end{aligned}"
+ )
+
+ def _repr_latex_(self) -> str:
+ return fr"\(\displaystyle {self._latex_content()}\)"
+
+ def _repr_html_(self) -> str:
+ from ..exploratory.visualization._utils import _figure_to_svg
+
+ fig = self.heatmap()
+ heatmap = _figure_to_svg(fig)
+ plt.close(fig)
+
+ fig = self._sample_trajectories_plot()
+ sample_trajectories = _figure_to_svg(fig)
+ plt.close(fig)
+
+ row_style = ''
+
+ def column_style(percent: float, margin_top: str = "0") -> str:
+ return (
+ f'style="display: inline-block; '
+ f'margin:0; '
+ f'margin-top: {margin_top}; '
+ f'width:{percent}%; '
+ f'height:auto;'
+ f'vertical-align: middle"'
+ )
+
+ return fr"""
+
+
+ \[{self._latex_content()}\]
+
+
+
+
+ {sample_trajectories}
+
+
+ {heatmap}
+
+
+ """
+
+ def to_sklearn(self) -> sklearn_kern.Kernel:
+ """Convert it to a sklearn kernel, if there is one."""
+ raise NotImplementedError(
+ f"{type(self).__name__} covariance not "
+ f"implemented in scikit-learn",
+ )
+
+
+class Brownian(Covariance):
+ r"""
+ Brownian covariance function.
+
+ The covariance function is
+
+ .. math::
+ K(x, x') = \sigma^2 \frac{|x - \mathcal{O}| + |x' - \mathcal{O}|
+ - |x - x'|}{2}
+
+ where :math:`\sigma^2` is the variance at distance 1 from
+ :math:`\mathcal{O}` and :math:`\mathcal{O}` is the origin point.
+ If :math:`\mathcal{O} = 0` (the default) and we only
+ consider positive values, the formula can be simplified as
+
+ .. math::
+ K(x, y) = \sigma^2 \min(x, y).
+
+ Heatmap plot of the covariance function:
+
+ .. jupyter-execute::
+
+ from skfda.misc.covariances import Brownian
+ import matplotlib.pyplot as plt
+
+ Brownian().heatmap(limits=(0, 1))
+ plt.show()
+
+ Example of Gaussian process trajectories using this covariance:
+
+ .. jupyter-execute::
+
+ from skfda.misc.covariances import Brownian
+ from skfda.datasets import make_gaussian_process
+ import matplotlib.pyplot as plt
+
+ gp = make_gaussian_process(
+ n_samples=10, cov=Brownian(), random_state=0)
+ gp.plot()
+ plt.show()
+
+ Default representation in a Jupyter notebook:
+
+ .. jupyter-execute::
+
+ from skfda.misc.covariances import Brownian
+
+ Brownian()
+
+ """
+ _latex_formula = (
+ r"K(x, x') = \sigma^2 \frac{|x - \mathcal{O}| + "
+ r"|x' - \mathcal{O}| - |x - x'|}{2}"
+ )
+
+ _parameters_str = [
+ ("variance", r"\sigma^2"),
+ ("origin", r"\mathcal{O}"),
+ ]
+
+ def __init__(self, *, variance: float = 1, origin: float = 0) -> None:
+ self.variance = variance
+ self.origin = origin
+
+ def __call__(
+ self,
+ x: InputAcceptable | FData,
+ y: InputAcceptable | None = None,
+ ) -> NDArrayFloat:
+ if isinstance(x, FData) or isinstance(y, FData):
+ raise ValueError('Not defined for FData objects.')
+
+ x = _transform_to_2d(x) - self.origin
+ y = _transform_to_2d(y) - self.origin
+
+ sum_norms = np.add.outer(
+ np.linalg.norm(x, axis=-1),
+ np.linalg.norm(y, axis=-1),
+ )
+ norm_sub = np.linalg.norm(
+ x[:, np.newaxis, :] - y[np.newaxis, :, :],
+ axis=-1,
+ )
+
+ return ( # type: ignore[no-any-return]
+ self.variance * (sum_norms - norm_sub) / 2
+ )
+
+
+class Linear(Covariance):
+ r"""
+ Linear covariance function.
+
+ The covariance function is
+
+ .. math::
+ K(x, x') = \sigma^2 (x^T x' + c)
+
+ where :math:`\sigma^2` is the scale of the variance and
+ :math:`c` is the intercept.
+
+ Heatmap plot of the covariance function:
+
+ .. jupyter-execute::
+
+ from skfda.misc.covariances import Linear
+ import matplotlib.pyplot as plt
+
+ Linear().heatmap(limits=(0, 1))
+ plt.show()
+
+ Example of Gaussian process trajectories using this covariance:
+
+ .. jupyter-execute::
+
+ from skfda.misc.covariances import Linear
+ from skfda.datasets import make_gaussian_process
+ import matplotlib.pyplot as plt
+
+ gp = make_gaussian_process(
+ n_samples=10, cov=Linear(), random_state=0)
+ gp.plot()
+ plt.show()
+
+ Default representation in a Jupyter notebook:
+
+ .. jupyter-execute::
+
+ from skfda.misc.covariances import Linear
+
+ Linear()
+
+ """
+
+ _latex_formula = r"K(x, x') = \sigma^2 (x^T x' + c)"
+
+ _parameters_str = [
+ ("variance", r"\sigma^2"),
+ ("intercept", "c"),
+ ]
+
+ def __init__(self, *, variance: float = 1, intercept: float = 0) -> None:
+ self.variance = variance
+ self.intercept = intercept
+
+ def __call__(
+ self,
+ x: InputAcceptable,
+ y: InputAcceptable | None = None,
+ ) -> NDArrayFloat:
+ x, y = self._param_check_and_transform(x, y)
+
+ x_y = inner_product_matrix(x, y)
+ return self.variance * (x_y + self.intercept)
+
+ def to_sklearn(self) -> sklearn_kern.Kernel:
+ return (
+ self.variance
+ * (sklearn_kern.DotProduct(0) + self.intercept)
+ )
+
+
+class Polynomial(Covariance):
+ r"""
+ Polynomial covariance function.
+
+ The covariance function is
+
+ .. math::
+ K(x, x') = \sigma^2 (\alpha x^T x' + c)^d
+
+ where :math:`\sigma^2` is the scale of the variance,
+ :math:`\alpha` is the slope, :math:`d` the degree of the
+ polynomial and :math:`c` is the intercept.
+
+ Heatmap plot of the covariance function:
+
+ .. jupyter-execute::
+
+ from skfda.misc.covariances import Polynomial
+ import matplotlib.pyplot as plt
+
+ Polynomial().heatmap(limits=(0, 1))
+ plt.show()
+
+ Example of Gaussian process trajectories using this covariance:
+
+ .. jupyter-execute::
+
+ from skfda.misc.covariances import Polynomial
+ from skfda.datasets import make_gaussian_process
+ import matplotlib.pyplot as plt
+
+ gp = make_gaussian_process(
+ n_samples=10, cov=Polynomial(), random_state=0)
+ gp.plot()
+ plt.show()
+
+ Default representation in a Jupyter notebook:
+
+ .. jupyter-execute::
+
+ from skfda.misc.covariances import Polynomial
+
+ Polynomial()
+
+ """
+
+ _latex_formula = r"K(x, x') = \sigma^2 (\alpha x^T x' + c)^d"
+
+ _parameters_str = [
+ ("variance", r"\sigma^2"),
+ ("intercept", "c"),
+ ("slope", r"\alpha"),
+ ("degree", "d"),
+ ]
+
+ def __init__(
+ self,
+ *,
+ variance: float = 1,
+ intercept: float = 0,
+ slope: float = 1,
+ degree: float = 2,
+ ) -> None:
+ self.variance = variance
+ self.intercept = intercept
+ self.slope = slope
+ self.degree = degree
+
+ def __call__(
+ self,
+ x: InputAcceptable,
+ y: InputAcceptable | None = None,
+ ) -> NDArrayFloat:
+ x, y = self._param_check_and_transform(x, y)
+
+ x_y = inner_product_matrix(x, y)
+ return (
+ self.variance
+ * (self.slope * x_y + self.intercept) ** self.degree
+ )
+
+ def to_sklearn(self) -> sklearn_kern.Kernel:
+ return (
+ self.variance
+ * (self.slope * sklearn_kern.DotProduct(0) + self.intercept)
+ ** self.degree
+ )
+
+
+class Gaussian(Covariance):
+ r"""
+ Gaussian covariance function.
+
+ The covariance function is
+
+ .. math::
+ K(x, x') = \sigma^2 \exp\left(-\frac{||x - x'||^2}{2l^2}\right)
+
+ where :math:`\sigma^2` is the variance and :math:`l` is the length scale.
+
+ Heatmap plot of the covariance function:
+
+ .. jupyter-execute::
+
+ from skfda.misc.covariances import Gaussian
+ import matplotlib.pyplot as plt
+
+ Gaussian().heatmap(limits=(0, 1))
+ plt.show()
+
+ Example of Gaussian process trajectories using this covariance:
+
+ .. jupyter-execute::
+
+ from skfda.misc.covariances import Gaussian
+ from skfda.datasets import make_gaussian_process
+ import matplotlib.pyplot as plt
+
+ gp = make_gaussian_process(
+ n_samples=10, cov=Gaussian(), random_state=0)
+ gp.plot()
+ plt.show()
+
+ Default representation in a Jupyter notebook:
+
+ .. jupyter-execute::
+
+ from skfda.misc.covariances import Gaussian
+
+ Gaussian()
+
+ """
+
+ _latex_formula = (
+ r"K(x, x') = \sigma^2 \exp\left(-\frac{\|x - x'\|^2}{2l^2}"
+ r"\right)"
+ )
+
+ _parameters_str = [
+ ("variance", r"\sigma^2"),
+ ("length_scale", "l"),
+ ]
+
+ def __init__(self, *, variance: float = 1, length_scale: float = 1):
+ self.variance = variance
+ self.length_scale = length_scale
+
+ def __call__(
+ self,
+ x: InputAcceptable,
+ y: InputAcceptable | None = None,
+ ) -> NDArrayFloat:
+ x, y = self._param_check_and_transform(x, y)
+
+ distance_x_y = PairwiseMetric(l2_distance)(x, y)
+ return self.variance * np.exp(-distance_x_y ** 2 / (2 * self.length_scale ** 2))
+
+ def to_sklearn(self) -> sklearn_kern.Kernel:
+ return (
+ self.variance * sklearn_kern.RBF(length_scale=self.length_scale)
+ )
+
+
+class Exponential(Covariance):
+ r"""
+ Exponential covariance function.
+
+ The covariance function is
+
+ .. math::
+ K(x, x') = \sigma^2 \exp\left(-\frac{\|x - x'\|}{l}\right)
+
+ where :math:`\sigma^2` is the variance and :math:`l` is the length scale.
+
+ Heatmap plot of the covariance function:
+
+ .. jupyter-execute::
+
+ from skfda.misc.covariances import Exponential
+ import matplotlib.pyplot as plt
+
+ Exponential().heatmap(limits=(0, 1))
+ plt.show()
+
+ Example of Gaussian process trajectories using this covariance:
+
+ .. jupyter-execute::
+
+ from skfda.misc.covariances import Exponential
+ from skfda.datasets import make_gaussian_process
+ import matplotlib.pyplot as plt
+
+ gp = make_gaussian_process(
+ n_samples=10, cov=Exponential(), random_state=0)
+ gp.plot()
+ plt.show()
+
+ Default representation in a Jupyter notebook:
+
+ .. jupyter-execute::
+
+ from skfda.misc.covariances import Exponential
+
+ Exponential()
+
+ """
+
+ _latex_formula = (
+ r"K(x, x') = \sigma^2 \exp\left(-\frac{||x - x'||}{l}"
+ r"\right)"
+ )
+
+ _parameters_str = [
+ ("variance", r"\sigma^2"),
+ ("length_scale", "l"),
+ ]
+
+ def __init__(
+ self,
+ *,
+ variance: float = 1,
+ length_scale: float = 1,
+ ) -> None:
+ self.variance = variance
+ self.length_scale = length_scale
+
+ def __call__(
+ self,
+ x: InputAcceptable,
+ y: InputAcceptable | None = None,
+ ) -> NDArrayFloat:
+ x, y = self._param_check_and_transform(x, y)
+
+ distance_x_y = PairwiseMetric(l2_distance)(x, y)
+ return self.variance * np.exp(-distance_x_y / (self.length_scale))
+
+ def to_sklearn(self) -> sklearn_kern.Kernel:
+ return (
+ self.variance
+ * sklearn_kern.Matern(length_scale=self.length_scale, nu=0.5)
+ )
+
+
+class WhiteNoise(Covariance):
+ r"""
+ Gaussian covariance function.
+
+ The covariance function is
+
+ .. math::
+ K(x, x')= \begin{cases}
+ \sigma^2, \quad x = x' \\
+ 0, \quad x \neq x'\\
+ \end{cases}
+
+ where :math:`\sigma^2` is the variance.
+
+ Heatmap plot of the covariance function:
+
+ .. jupyter-execute::
+
+ from skfda.misc.covariances import WhiteNoise
+ import matplotlib.pyplot as plt
+
+ WhiteNoise().heatmap(limits=(0, 1))
+ plt.show()
+
+ Example of Gaussian process trajectories using this covariance:
+
+ .. jupyter-execute::
+
+ from skfda.misc.covariances import WhiteNoise
+ from skfda.datasets import make_gaussian_process
+ import matplotlib.pyplot as plt
+
+ gp = make_gaussian_process(
+ n_samples=10, cov=WhiteNoise(), random_state=0)
+ gp.plot()
+ plt.show()
+
+ Default representation in a Jupyter notebook:
+
+ .. jupyter-execute::
+
+ from skfda.misc.covariances import WhiteNoise
+
+ WhiteNoise()
+
+ """
+
+ _latex_formula = (
+ r"K(x, x')= \begin{cases} \sigma^2, \quad x = x' \\"
+ r"0, \quad x \neq x'\\ \end{cases}"
+ )
+
+ _parameters_str = [("variance", r"\sigma^2")]
+
+ def __init__(self, *, variance: float = 1):
+ self.variance = variance
+
+ def __call__(
+ self,
+ x: InputAcceptable,
+ y: InputAcceptable | None = None,
+ ) -> NDArrayFloat:
+ if isinstance(x, FData) or isinstance(y, FData):
+ raise ValueError('Not defined for FData objects.')
+
+ x = _transform_to_2d(x)
+ return self.variance * np.eye(x.shape[0])
+
+ def to_sklearn(self) -> sklearn_kern.Kernel:
+ return sklearn_kern.WhiteKernel(noise_level=self.variance)
+
+
+class Matern(Covariance):
+ r"""
+ Matérn covariance function.
+
+ The covariance function is
+
+ .. math::
+ K(x, x') = \sigma^2 \frac{2^{1-\nu}}{\Gamma(\nu)}
+ \left( \frac{\sqrt{2\nu}|x - x'|}{l} \right)^{\nu}
+ K_{\nu}\left( \frac{\sqrt{2\nu}|x - x'|}{l} \right)
+
+ where :math:`\sigma^2` is the variance, :math:`l` is the length scale
+ and :math:`\nu` controls the smoothness of the related Gaussian process.
+ The trajectories of a Gaussian process with Matérn covariance is
+ :math:`\lceil \nu \rceil - 1` times differentiable.
+
+
+ Heatmap plot of the covariance function:
+
+ .. jupyter-execute::
+
+ from skfda.misc.covariances import Matern
+ import matplotlib.pyplot as plt
+
+ Matern().heatmap(limits=(0, 1))
+ plt.show()
+
+ Example of Gaussian process trajectories using this covariance:
+
+ .. jupyter-execute::
+
+ from skfda.misc.covariances import Matern
+ from skfda.datasets import make_gaussian_process
+ import matplotlib.pyplot as plt
+
+ gp = make_gaussian_process(
+ n_samples=10, cov=Matern(), random_state=0)
+ gp.plot()
+ plt.show()
+
+ Default representation in a Jupyter notebook:
+
+ .. jupyter-execute::
+
+ from skfda.misc.covariances import Matern
+
+ Matern()
+
+ """
+ _latex_formula = (
+ r"K(x, x') = \sigma^2 \frac{2^{1-\nu}}{\Gamma(\nu)}"
+ r"\left( \frac{\sqrt{2\nu}|x - x'|}{l} \right)^{\nu}"
+ r"K_{\nu}\left( \frac{\sqrt{2\nu}|x - x'|}{l} \right)"
+ )
+
+ _parameters_str = [
+ ("variance", r"\sigma^2"),
+ ("length_scale", "l"),
+ ("nu", r"\nu"),
+ ]
+
+ def __init__(
+ self,
+ *,
+ variance: float = 1,
+ length_scale: float = 1,
+ nu: float = 1.5,
+ ) -> None:
+ self.variance = variance
+ self.length_scale = length_scale
+ self.nu = nu
+
+ def __call__(
+ self,
+ x: InputAcceptable,
+ y: InputAcceptable | None = None,
+ ) -> NDArrayFloat:
+ x, y = self._param_check_and_transform(x, y)
+
+ distance_x_y = PairwiseMetric(l2_distance)(x, y)
+
+ p = self.nu - 0.5
+ if p.is_integer():
+ # Formula for half-integers
+ p = int(p)
+ body = np.sqrt(2 * p + 1) * distance_x_y / self.length_scale
+ exponential = np.exp(-body)
+ power_list = np.full(shape=(p,) + body.shape, fill_value=2 * body)
+ power_list = np.cumprod(power_list, axis=0)
+ power_list = np.concatenate(
+ (power_list[::-1], np.asarray([np.ones_like(body)])),
+ )
+ power_list = np.moveaxis(power_list, 0, -1)
+ numerator = np.cumprod(np.arange(p, 0, -1))
+ numerator = np.concatenate(([1], numerator))
+ denom1 = np.cumprod(np.arange(2 * p, p, -1))
+ denom1 = np.concatenate((denom1[::-1], [1]))
+ denom2 = np.cumprod(np.arange(1, p + 1))
+ denom2 = np.concatenate(([1], denom2))
+
+ sum_terms = power_list * numerator / (denom1 * denom2)
+ return ( # type: ignore[no-any-return]
+ self.variance * exponential * np.sum(sum_terms, axis=-1)
+ )
+ elif self.nu == np.inf:
+ return ( # type: ignore[no-any-return]
+ self.variance * np.exp(
+ -distance_x_y ** 2 / (2 * self.length_scale ** 2),
+ )
+ )
+ else:
+ # General formula
+ scaling = 2**(1 - self.nu) / gamma(self.nu)
+ body = np.sqrt(2 * self.nu) * distance_x_y / self.length_scale
+ power = body**self.nu
+ bessel = kv(self.nu, body)
+
+ with np.errstate(invalid='ignore'):
+ eval_cov = self.variance * scaling * power * bessel
+
+ # Values with nan are where the distance is 0
+ return np.nan_to_num( # type: ignore[no-any-return]
+ eval_cov,
+ nan=self.variance,
+ )
+
+ def to_sklearn(self) -> sklearn_kern.Kernel:
+ return (
+ self.variance
+ * sklearn_kern.Matern(length_scale=self.length_scale, nu=self.nu)
+ )
+
+
+class Empirical(Covariance):
+ r"""
+ Sample covariance function.
+
+ The sample covariance function is defined as
+ . math::
+ K(t, s) = \frac{1}{N-\text{correction}}\sum_{n=1}^N\left(x_n(t) -
+ \bar{x}(t)\right) \left(x_n(s) - \bar{x}(s)\right)
+
+ where :math:`x_n(t)` is the n-th sample and :math:`\bar{x}(t)` is the
+ mean of the samples. :math:`N` is the number of samples,
+ :math:`\text{correction}` is the degrees of freedom adjustment and is such
+ that :math:`N-\text{correction}` is the divisor used in the calculation of
+ the covariance function.
+
+ """
+
+ _latex_formula = (
+ r"K(t, s) = \frac{1}{N-\text{correction}}\sum_{n=1}^N"
+ r"(x_n(t) - \bar{x}(t))(x_n(s) - \bar{x}(s))"
+ )
+ _parameters_str = [
+ ("data", "data"),
+ ]
+
+ cov_fdata: FData
+ correction: int
+
+ @abc.abstractmethod
+ def __init__(self, data: FData) -> None:
+ if data.dim_domain != 1 or data.dim_codomain != 1:
+ raise NotImplementedError(
+ "Covariance only implemented "
+ "for univariate functions",
+ )
+
+ def __call__(
+ self,
+ x: InputAcceptable,
+ y: InputAcceptable | None = None,
+ ) -> NDArrayFloat:
+ if isinstance(x, FData) or isinstance(y, FData):
+ raise ValueError('Not defined for FData objects.')
+
+ """Evaluate the covariance function.
+
+ Args:
+ x: First array of points of evaluation.
+ y: Second array of points of evaluation.
+
+ Returns:
+ Covariance function evaluated at the grid formed by x and y.
+ """
+ return self.cov_fdata([x, y], grid=True)[0, ..., 0]
+
+
+class EmpiricalGrid(Empirical):
+ """Sample covariance function for FDataGrid."""
+
+ cov_fdata: FDataGrid
+ correction: int
+
+ def __init__(self, data: FDataGrid, correction: int = 0) -> None:
+ super().__init__(data=data)
+
+ self.correction = correction
+ self.cov_fdata = data.copy(
+ data_matrix=np.cov(
+ data.data_matrix[..., 0],
+ rowvar=False,
+ ddof=correction,
+ )[np.newaxis, ...],
+ grid_points=[
+ data.grid_points[0],
+ data.grid_points[0],
+ ],
+ domain_range=[
+ data.domain_range[0],
+ data.domain_range[0],
+ ],
+ argument_names=data.argument_names * 2,
+ sample_names=("covariance",),
+ )
+
+
+class EmpiricalBasis(Empirical):
+ """
+ Sample covariance function for FDataBasis.
+
+ In this case one may use the basis expression of the samples to
+ express the sample covariance function in the tensor product basis
+ of the original basis.
+ """
+
+ cov_fdata: FDataBasis
+ coeff_matrix: NDArrayFloat
+ correction: int
+
+ def __init__(self, data: FDataBasis, correction: int = 0) -> None:
+ super().__init__(data=data)
+
+ self.correction = correction
+ self.coeff_matrix = np.cov(
+ data.coefficients,
+ rowvar=False,
+ ddof=correction,
+ )
+ self.cov_fdata = FDataBasis(
+ basis=TensorBasis([data.basis, data.basis]),
+ coefficients=self.coeff_matrix.flatten(),
+ argument_names=data.argument_names * 2,
+ sample_names=("covariance",),
+ )
From f31229dd36e2741d168b8510348372c8957b3e8d Mon Sep 17 00:00:00 2001
From: E105D104U125
Date: Mon, 25 Mar 2024 20:00:57 +0100
Subject: [PATCH 05/14] Replaced EOL to LF
---
skfda/tests/test_covariances.py | 530 ++++++++++++++++----------------
1 file changed, 265 insertions(+), 265 deletions(-)
diff --git a/skfda/tests/test_covariances.py b/skfda/tests/test_covariances.py
index dc5982ca0..598280342 100644
--- a/skfda/tests/test_covariances.py
+++ b/skfda/tests/test_covariances.py
@@ -1,265 +1,265 @@
-"""Tests for Covariance module."""
-from typing import Any, Tuple
-
-import numpy as np
-import pytest
-from sklearn.model_selection import ParameterGrid
-
-import skfda.misc.covariances as cov
-from skfda import FDataBasis, FDataGrid
-from skfda.datasets import fetch_phoneme
-from skfda.representation.basis import MonomialBasis
-
-
-def _test_compare_sklearn(
- multivariate_data: Any,
- cov: cov.Covariance,
-) -> None:
- cov_sklearn = cov.to_sklearn()
- cov_matrix = cov(multivariate_data)
- cov_sklearn_matrix = cov_sklearn(multivariate_data)
-
- np.testing.assert_array_almost_equal(cov_matrix, cov_sklearn_matrix)
-
-##############################################################################
-# Fixtures
-##############################################################################
-
-
-@pytest.fixture
-def fetch_phoneme_fixture() -> FDataGrid:
- """Fixture for loading the phoneme dataset example."""
- fd, _ = fetch_phoneme(return_X_y=True)
- return fd[:20]
-
-
-@pytest.fixture(
- params=[
- cov.Linear(),
- cov.Polynomial(),
- cov.Gaussian(),
- cov.Exponential(),
- cov.Matern(),
- ],
-)
-def covariances_fixture(request: Any) -> Any:
- """Fixture for getting a covariance kernel function."""
- return request.param
-
-
-@pytest.fixture(
- params=[
- cov.Brownian(),
- cov.WhiteNoise(),
- ],
-)
-def covariances_raise_fixture(request: Any) -> Any:
- """Fixture for getting a covariance kernel that raises a ValueError."""
- return request.param
-
-
-@pytest.fixture
-def fdatabasis_data() -> Tuple[FDataBasis, FDataBasis]:
- """Fixture for getting fdatabasis objects."""
- basis = MonomialBasis(
- n_basis=2,
- domain_range=(-2, 2),
- )
-
- fd1 = FDataBasis(
- basis=basis,
- coefficients=[
- [1, 0],
- [1, 2],
- ],
- )
-
- fd2 = FDataBasis(
- basis=basis,
- coefficients=[
- [0, 1],
- ],
- )
-
- return fd1, fd2
-
-
-@pytest.fixture
-def multivariate_data() -> np.array:
- """Fixture for getting multivariate data."""
- return np.linspace(-1, 1, 1000)[:, np.newaxis]
-
-
-@pytest.fixture(
- params=[
- (cov.Linear,
- {
- "variance": [1, 2],
- "intercept": [3, 4],
- },
- ),
- (cov.Polynomial,
- {
- "variance": [2],
- "intercept": [0, 2],
- "slope": [1, 2],
- "degree": [1, 2, 3],
- },
- ),
- (cov.Exponential,
- {
- "variance": [1, 2],
- "length_scale": [0.5, 1, 2],
- },
- ),
- (cov.Gaussian,
- {
- "variance": [1, 2],
- "length_scale": [0.5, 1, 2],
- },
- ),
- (cov.Matern,
- {
- "variance": [2],
- "length_scale": [0.5],
- "nu": [0.5, 1, 1.5, 2.5, 3.5, np.inf],
- },
- ),
- ],
-)
-def covariance_and_params(request: Any) -> Any:
- """Fixture to load the covariance functions."""
- return request.param
-
-
-##############################################################################
-# Tests
-##############################################################################
-
-
-def test_covariances(
- fetch_phoneme_fixture: Any,
- covariances_fixture: Any,
-) -> None:
- """Check that invalid parameters in fit raise exception."""
- fd = fetch_phoneme_fixture
- cov_kernel = covariances_fixture
-
- # Also test that it does not fail
- res1 = cov_kernel(fd, fd)
- res2 = cov_kernel(fd)
-
- np.testing.assert_allclose(
- res1,
- res2,
- atol=1e-7,
- )
-
-
-def test_raises(
- fetch_phoneme_fixture: Any,
- covariances_raise_fixture: Any,
-) -> None:
- """Check that it raises a ValueError exception."""
- fd = fetch_phoneme_fixture
- cov_kernel = covariances_raise_fixture
-
- np.testing.assert_raises(
- ValueError,
- cov_kernel,
- fd,
- )
-
-
-def test_fdatabasis_example_linear(
- fdatabasis_data: Any,
-) -> None:
- """Check a precalculated example for Linear covariance kernel."""
- fd1, fd2 = fdatabasis_data
- res1 = cov.Linear(variance=1 / 2, intercept=3)(fd1, fd2)
- res2 = np.array([[3 / 2], [3 / 2 + 32 / 6]])
- np.testing.assert_allclose(
- res1,
- res2,
- rtol=1e-6,
- )
-
-
-def test_fdatabasis_example_polynomial(
- fdatabasis_data: Any,
-) -> None:
- """Check a precalculated example for Polynomial covariance kernel."""
- fd1, fd2 = fdatabasis_data
- res1 = cov.Polynomial(
- variance=1 / 3,
- slope=2,
- intercept=1,
- degree=2,
- )(fd1, fd2)
- res2 = np.array([[1 / 3], [67**2 / 3**3]])
- np.testing.assert_allclose(
- res1,
- res2,
- rtol=1e-6,
- )
-
-
-def test_fdatabasis_example_gaussian(
- fdatabasis_data: Any,
-) -> None:
- """Check a precalculated example for Gaussian covariance kernel."""
- fd1, fd2 = fdatabasis_data
- res1 = cov.Gaussian(variance=3, length_scale=2)(fd1, fd2)
- res2 = np.array([
- [3 * np.exp(-7 / 6)],
- [3 * np.exp(-7 / 6)],
- ])
- np.testing.assert_allclose(
- res1,
- res2,
- rtol=1e-6,
- )
-
-
-def test_fdatabasis_example_exponential(
- fdatabasis_data: Any,
-) -> None:
- """Check a precalculated example for Exponential covariance kernel."""
- fd1, fd2 = fdatabasis_data
- res1 = cov.Exponential(variance=4, length_scale=5)(fd1, fd2)
- res2 = np.array([
- [4 * np.exp(-np.sqrt(28 / 3) / 5)],
- [4 * np.exp(-np.sqrt(28 / 3) / 5)],
- ])
- np.testing.assert_allclose(
- res1,
- res2,
- rtol=1e-6,
- )
-
-
-def test_fdatabasis_example_matern(
- fdatabasis_data: Any,
-) -> None:
- """Check a precalculated example for Matern covariance kernel."""
- fd1, fd2 = fdatabasis_data
- res1 = cov.Matern(variance=2, length_scale=3, nu=2)(fd1, fd2)
- res2 = np.array([
- [(2 / 3) ** 2 * (28 / 3) * 0.239775899566],
- [(2 / 3) ** 2 * (28 / 3) * 0.239775899566],
- ])
- np.testing.assert_allclose(
- res1,
- res2,
- rtol=1e-6,
- )
-
-
-def test_multivariate_covariance_kernel(
- multivariate_data: Any,
- covariance_and_params: Any,
-) -> None:
- """Test general covariance kernel against scikit-learn's kernel."""
- cov_kernel, param_dict = covariance_and_params
- for input_params in list(ParameterGrid(param_dict)):
- _test_compare_sklearn(multivariate_data, cov_kernel(**input_params))
+"""Tests for Covariance module."""
+from typing import Any, Tuple
+
+import numpy as np
+import pytest
+from sklearn.model_selection import ParameterGrid
+
+import skfda.misc.covariances as cov
+from skfda import FDataBasis, FDataGrid
+from skfda.datasets import fetch_weather # fetch_phoneme,
+from skfda.representation.basis import MonomialBasis
+
+
+def _test_compare_sklearn(
+ multivariate_data: Any,
+ cov: cov.Covariance,
+) -> None:
+ cov_sklearn = cov.to_sklearn()
+ cov_matrix = cov(multivariate_data)
+ cov_sklearn_matrix = cov_sklearn(multivariate_data)
+
+ np.testing.assert_array_almost_equal(cov_matrix, cov_sklearn_matrix)
+
+##############################################################################
+# Fixtures
+##############################################################################
+
+
+@pytest.fixture
+def fetch_functional_data() -> FDataGrid:
+ """Fixture for loading the phoneme dataset example."""
+ fd, _ = fetch_weather(return_X_y=True)
+ return fd[:20]
+
+
+@pytest.fixture(
+ params=[
+ cov.Linear(),
+ cov.Polynomial(),
+ cov.Gaussian(),
+ cov.Exponential(),
+ cov.Matern(),
+ ],
+)
+def covariances_fixture(request: Any) -> Any:
+ """Fixture for getting a covariance kernel function."""
+ return request.param
+
+
+@pytest.fixture(
+ params=[
+ cov.Brownian(),
+ cov.WhiteNoise(),
+ ],
+)
+def covariances_raise_fixture(request: Any) -> Any:
+ """Fixture for getting a covariance kernel that raises a ValueError."""
+ return request.param
+
+
+@pytest.fixture
+def fdatabasis_data() -> Tuple[FDataBasis, FDataBasis]:
+ """Fixture for getting fdatabasis objects."""
+ basis = MonomialBasis(
+ n_basis=2,
+ domain_range=(-2, 2),
+ )
+
+ fd1 = FDataBasis(
+ basis=basis,
+ coefficients=[
+ [1, 0],
+ [1, 2],
+ ],
+ )
+
+ fd2 = FDataBasis(
+ basis=basis,
+ coefficients=[
+ [0, 1],
+ ],
+ )
+
+ return fd1, fd2
+
+
+@pytest.fixture
+def multivariate_data() -> np.array:
+ """Fixture for getting multivariate data."""
+ return np.linspace(-1, 1, 1000)[:, np.newaxis]
+
+
+@pytest.fixture(
+ params=[
+ (cov.Linear,
+ {
+ "variance": [1, 2],
+ "intercept": [3, 4],
+ },
+ ),
+ (cov.Polynomial,
+ {
+ "variance": [2],
+ "intercept": [0, 2],
+ "slope": [1, 2],
+ "degree": [1, 2, 3],
+ },
+ ),
+ (cov.Exponential,
+ {
+ "variance": [1, 2],
+ "length_scale": [0.5, 1, 2],
+ },
+ ),
+ (cov.Gaussian,
+ {
+ "variance": [1, 2],
+ "length_scale": [0.5, 1, 2],
+ },
+ ),
+ (cov.Matern,
+ {
+ "variance": [2],
+ "length_scale": [0.5],
+ "nu": [0.5, 1, 1.5, 2.5, 3.5, np.inf],
+ },
+ ),
+ ],
+)
+def covariance_and_params(request: Any) -> Any:
+ """Fixture to load the covariance functions."""
+ return request.param
+
+
+##############################################################################
+# Tests
+##############################################################################
+
+
+def test_covariances(
+ fetch_phoneme_fixture: FDataGrid,
+ covariances_fixture: cov.Covariance,
+) -> None:
+ """Check that invalid parameters in fit raise exception."""
+ fd = fetch_phoneme_fixture
+ cov_kernel = covariances_fixture
+
+ # Also test that it does not fail
+ res1 = cov_kernel(fd, fd)
+ res2 = cov_kernel(fd)
+
+ np.testing.assert_allclose(
+ res1,
+ res2,
+ atol=1e-7,
+ )
+
+
+def test_raises(
+ fetch_phoneme_fixture: FDataGrid,
+ covariances_raise_fixture: Any,
+) -> None:
+ """Check that it raises a ValueError exception."""
+ fd = fetch_phoneme_fixture
+ cov_kernel = covariances_raise_fixture
+
+ np.testing.assert_raises(
+ ValueError,
+ cov_kernel,
+ fd,
+ )
+
+
+def test_fdatabasis_example_linear(
+ fdatabasis_data: Tuple[FDataBasis, FDataBasis],
+) -> None:
+ """Check a precalculated example for Linear covariance kernel."""
+ fd1, fd2 = fdatabasis_data
+ res1 = cov.Linear(variance=1 / 2, intercept=3)(fd1, fd2)
+ res2 = np.array([[3 / 2], [3 / 2 + 32 / 6]])
+ np.testing.assert_allclose(
+ res1,
+ res2,
+ rtol=1e-6,
+ )
+
+
+def test_fdatabasis_example_polynomial(
+ fdatabasis_data: Tuple[FDataBasis, FDataBasis],
+) -> None:
+ """Check a precalculated example for Polynomial covariance kernel."""
+ fd1, fd2 = fdatabasis_data
+ res1 = cov.Polynomial(
+ variance=1 / 3,
+ slope=2,
+ intercept=1,
+ degree=2,
+ )(fd1, fd2)
+ res2 = np.array([[1 / 3], [67**2 / 3**3]])
+ np.testing.assert_allclose(
+ res1,
+ res2,
+ rtol=1e-6,
+ )
+
+
+def test_fdatabasis_example_gaussian(
+ fdatabasis_data: Tuple[FDataBasis, FDataBasis],
+) -> None:
+ """Check a precalculated example for Gaussian covariance kernel."""
+ fd1, fd2 = fdatabasis_data
+ res1 = cov.Gaussian(variance=3, length_scale=2)(fd1, fd2)
+ res2 = np.array([
+ [3 * np.exp(-7 / 6)],
+ [3 * np.exp(-7 / 6)],
+ ])
+ np.testing.assert_allclose(
+ res1,
+ res2,
+ rtol=1e-6,
+ )
+
+
+def test_fdatabasis_example_exponential(
+ fdatabasis_data: Tuple[FDataBasis, FDataBasis],
+) -> None:
+ """Check a precalculated example for Exponential covariance kernel."""
+ fd1, fd2 = fdatabasis_data
+ res1 = cov.Exponential(variance=4, length_scale=5)(fd1, fd2)
+ res2 = np.array([
+ [4 * np.exp(-np.sqrt(28 / 3) / 5)],
+ [4 * np.exp(-np.sqrt(28 / 3) / 5)],
+ ])
+ np.testing.assert_allclose(
+ res1,
+ res2,
+ rtol=1e-6,
+ )
+
+
+def test_fdatabasis_example_matern(
+ fdatabasis_data: Tuple[FDataBasis, FDataBasis],
+) -> None:
+ """Check a precalculated example for Matern covariance kernel."""
+ fd1, fd2 = fdatabasis_data
+ res1 = cov.Matern(variance=2, length_scale=3, nu=2)(fd1, fd2)
+ res2 = np.array([
+ [(2 / 3) ** 2 * (28 / 3) * 0.239775899566],
+ [(2 / 3) ** 2 * (28 / 3) * 0.239775899566],
+ ])
+ np.testing.assert_allclose(
+ res1,
+ res2,
+ rtol=1e-6,
+ )
+
+
+def test_multivariate_covariance_kernel(
+ multivariate_data: np.array,
+ covariance_and_params: Any,
+) -> None:
+ """Test general covariance kernel against scikit-learn's kernel."""
+ cov_kernel, param_dict = covariance_and_params
+ for input_params in list(ParameterGrid(param_dict)):
+ _test_compare_sklearn(multivariate_data, cov_kernel(**input_params))
From 2c95ed16d39bd1e516f4796e5fa812e6d305b146 Mon Sep 17 00:00:00 2001
From: E105D104U125
Date: Tue, 26 Mar 2024 11:08:26 +0100
Subject: [PATCH 06/14] Changed import to avoid circular dependencies.
---
skfda/misc/covariances.py | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/skfda/misc/covariances.py b/skfda/misc/covariances.py
index bff439734..4b25c439a 100644
--- a/skfda/misc/covariances.py
+++ b/skfda/misc/covariances.py
@@ -9,8 +9,8 @@
from matplotlib.figure import Figure
from scipy.special import gamma, kv
-from ..misc import inner_product_matrix
-from ..misc.metrics import PairwiseMetric, LpNorm, l2_distance
+from ..misc._math import inner_product_matrix
+from ..misc.metrics import PairwiseMetric, l2_distance
from ..representation import FData, FDataBasis, FDataGrid
from ..representation.basis import TensorBasis
from ..typing._numpy import ArrayLike, NDArrayFloat
@@ -38,7 +38,7 @@ def _transform_to_2d(t: ArrayLike) -> NDArrayFloat:
"""Transform 1d arrays in column vectors."""
t = np.asfarray(t)
- dim = t.ndim
+ dim = len(t.shape)
assert dim <= 2
if dim < 2:
From 2f993453b65143443ece22edf92aa0720dfdb2f4 Mon Sep 17 00:00:00 2001
From: E105D104U125
Date: Tue, 26 Mar 2024 11:28:05 +0100
Subject: [PATCH 07/14] Changed data fecthing function name
---
skfda/tests/test_covariances.py | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/skfda/tests/test_covariances.py b/skfda/tests/test_covariances.py
index 598280342..c28019afc 100644
--- a/skfda/tests/test_covariances.py
+++ b/skfda/tests/test_covariances.py
@@ -7,7 +7,7 @@
import skfda.misc.covariances as cov
from skfda import FDataBasis, FDataGrid
-from skfda.datasets import fetch_weather # fetch_phoneme,
+from skfda.datasets import fetch_weather
from skfda.representation.basis import MonomialBasis
@@ -138,11 +138,11 @@ def covariance_and_params(request: Any) -> Any:
def test_covariances(
- fetch_phoneme_fixture: FDataGrid,
+ fetch_functional_data: FDataGrid,
covariances_fixture: cov.Covariance,
) -> None:
"""Check that invalid parameters in fit raise exception."""
- fd = fetch_phoneme_fixture
+ fd = fetch_functional_data
cov_kernel = covariances_fixture
# Also test that it does not fail
@@ -157,11 +157,11 @@ def test_covariances(
def test_raises(
- fetch_phoneme_fixture: FDataGrid,
+ fetch_functional_data: FDataGrid,
covariances_raise_fixture: Any,
) -> None:
"""Check that it raises a ValueError exception."""
- fd = fetch_phoneme_fixture
+ fd = fetch_functional_data
cov_kernel = covariances_raise_fixture
np.testing.assert_raises(
From 81f6e585b991dbbcc3dab74122cab974efa0d886 Mon Sep 17 00:00:00 2001
From: E105D104U125
Date: Thu, 4 Jul 2024 12:04:46 +0200
Subject: [PATCH 08/14] Covariance_kernels pullrequest changes incorporated.
---
skfda/misc/covariances.py | 123 ++++++++++++++++++--------------
skfda/tests/test_covariances.py | 16 +++--
2 files changed, 81 insertions(+), 58 deletions(-)
diff --git a/skfda/misc/covariances.py b/skfda/misc/covariances.py
index 4b25c439a..d5d7a1abd 100644
--- a/skfda/misc/covariances.py
+++ b/skfda/misc/covariances.py
@@ -1,12 +1,14 @@
+"""Covariances module."""
from __future__ import annotations
import abc
-from typing import Callable, Sequence, Tuple, Union
+from typing import Any, Callable, Sequence
import matplotlib.pyplot as plt
import numpy as np
import sklearn.gaussian_process.kernels as sklearn_kern
from matplotlib.figure import Figure
+from numpy.typing import NDArray
from scipy.special import gamma, kv
from ..misc._math import inner_product_matrix
@@ -22,23 +24,20 @@ def _squared_norms(x: NDArrayFloat, y: NDArrayFloat) -> NDArrayFloat:
).sum(2)
-CovarianceLike = Union[
- float,
- NDArrayFloat,
- Callable[[ArrayLike, ArrayLike], NDArrayFloat],
-]
+CovarianceLike = (
+ float
+ | NDArrayFloat
+ | Callable[[ArrayLike, ArrayLike], NDArrayFloat]
+)
-InputAcceptable = Union[
- np.ndarray,
- FData,
-]
+Input = NDArray[Any] | FData
def _transform_to_2d(t: ArrayLike) -> NDArrayFloat:
"""Transform 1d arrays in column vectors."""
t = np.asfarray(t)
- dim = len(t.shape)
+ dim = t.ndim
assert dim <= 2
if dim < 2:
@@ -58,39 +57,40 @@ def _execute_covariance(
if isinstance(covariance, (int, float)):
return np.array(covariance)
+
+ if callable(covariance):
+ result = covariance(x, y)
+ elif isinstance(covariance, np.ndarray):
+ result = covariance
else:
- if callable(covariance):
- result = covariance(x, y)
- elif isinstance(covariance, np.ndarray):
- result = covariance
- else:
- # GPy kernel
- result = covariance.K(x, y)
+ # GPy kernel
+ result = covariance.K(x, y)
- assert result.shape[0] == len(x)
- assert result.shape[1] == len(y)
- return result
+ assert result.shape[0] == len(x)
+ assert result.shape[1] == len(y)
+ return result
class Covariance(abc.ABC):
"""Abstract class for covariance functions."""
- _parameters_str: Sequence[Tuple[str, str]]
+ _parameters_str: Sequence[tuple[str, str]]
_latex_formula: str
@abc.abstractmethod
def __call__(
self,
- x: InputAcceptable,
- y: InputAcceptable | None = None,
+ x: Input,
+ y: Input | None = None,
) -> NDArrayFloat:
+ """Compute covariance function on input data."""
pass
def _param_check_and_transform(
self,
- x: InputAcceptable,
- y: InputAcceptable | None = None,
- ) -> Tuple[np.ndarray | FData, np.ndarray | FData]:
+ x: Input,
+ y: Input | None = None,
+ ) -> tuple[Input, Input]:
# Param check
if y is None:
y = x
@@ -101,8 +101,7 @@ def _param_check_and_transform(
f'({type(x)}, {type(y)}).',
)
- if isinstance(x, np.ndarray) and isinstance(y, np.ndarray):
- x, y = np.array(x), np.array(y)
+ if not isinstance(x, FData) and not isinstance(y, FData):
if len(x.shape) < 2:
x = np.atleast_2d(x)
if len(y.shape) < 2:
@@ -110,7 +109,7 @@ def _param_check_and_transform(
return x, y
- def heatmap(self, limits: Tuple[float, float] = (-1, 1)) -> Figure:
+ def heatmap(self, limits: tuple[float, float] = (-1, 1)) -> Figure:
"""Return a heatmap plot of the covariance function."""
from ..exploratory.visualization._utils import _create_figure
@@ -266,6 +265,7 @@ class Brownian(Covariance):
Brownian()
"""
+
_latex_formula = (
r"K(x, x') = \sigma^2 \frac{|x - \mathcal{O}| + "
r"|x' - \mathcal{O}| - |x - x'|}{2}"
@@ -282,14 +282,21 @@ def __init__(self, *, variance: float = 1, origin: float = 0) -> None:
def __call__(
self,
- x: InputAcceptable | FData,
- y: InputAcceptable | None = None,
+ x: NDArray[Any],
+ y: NDArray[Any] | None = None,
) -> NDArrayFloat:
+ """Compute Brownian covariance function on input data."""
if isinstance(x, FData) or isinstance(y, FData):
- raise ValueError('Not defined for FData objects.')
+ raise ValueError(
+ 'Brownian covariance not defined for FData objects.',
+ )
+
+ xx: NDArray[Any]
+ yy: NDArray[Any]
+ xx, yy = self._param_check_and_transform(x, y)
- x = _transform_to_2d(x) - self.origin
- y = _transform_to_2d(y) - self.origin
+ xx = xx - self.origin
+ yy = yy - self.origin
sum_norms = np.add.outer(
np.linalg.norm(x, axis=-1),
@@ -363,9 +370,10 @@ def __init__(self, *, variance: float = 1, intercept: float = 0) -> None:
def __call__(
self,
- x: InputAcceptable,
- y: InputAcceptable | None = None,
+ x: Input,
+ y: Input | None = None,
) -> NDArrayFloat:
+ """Compute linear covariance function on input data."""
x, y = self._param_check_and_transform(x, y)
x_y = inner_product_matrix(x, y)
@@ -445,12 +453,13 @@ def __init__(
self.intercept = intercept
self.slope = slope
self.degree = degree
-
+
def __call__(
self,
- x: InputAcceptable,
- y: InputAcceptable | None = None,
+ x: Input,
+ y: Input | None = None,
) -> NDArrayFloat:
+ """Compute polynomial covariance function on input data."""
x, y = self._param_check_and_transform(x, y)
x_y = inner_product_matrix(x, y)
@@ -527,13 +536,16 @@ def __init__(self, *, variance: float = 1, length_scale: float = 1):
def __call__(
self,
- x: InputAcceptable,
- y: InputAcceptable | None = None,
+ x: Input,
+ y: Input | None = None,
) -> NDArrayFloat:
+ """Compute Gaussian covariance function on input data."""
x, y = self._param_check_and_transform(x, y)
distance_x_y = PairwiseMetric(l2_distance)(x, y)
- return self.variance * np.exp(-distance_x_y ** 2 / (2 * self.length_scale ** 2))
+ return self.variance * np.exp( # type: ignore[no-any-return]
+ -distance_x_y ** 2 / (2 * self.length_scale ** 2),
+ )
def to_sklearn(self) -> sklearn_kern.Kernel:
return (
@@ -606,9 +618,10 @@ def __init__(
def __call__(
self,
- x: InputAcceptable,
- y: InputAcceptable | None = None,
+ x: Input,
+ y: Input | None = None,
) -> NDArrayFloat:
+ """Compute exponential covariance function on input data."""
x, y = self._param_check_and_transform(x, y)
distance_x_y = PairwiseMetric(l2_distance)(x, y)
@@ -680,9 +693,10 @@ def __init__(self, *, variance: float = 1):
def __call__(
self,
- x: InputAcceptable,
- y: InputAcceptable | None = None,
+ x: Input,
+ y: Input | None = None,
) -> NDArrayFloat:
+ """Compute white noise covariance function on input data."""
if isinstance(x, FData) or isinstance(y, FData):
raise ValueError('Not defined for FData objects.')
@@ -767,9 +781,10 @@ def __init__(
def __call__(
self,
- x: InputAcceptable,
- y: InputAcceptable | None = None,
+ x: Input,
+ y: Input | None = None,
) -> NDArrayFloat:
+ """Compute Matern covariance function on input data."""
x, y = self._param_check_and_transform(x, y)
distance_x_y = PairwiseMetric(l2_distance)(x, y)
@@ -864,12 +879,9 @@ def __init__(self, data: FData) -> None:
def __call__(
self,
- x: InputAcceptable,
- y: InputAcceptable | None = None,
+ x: Input,
+ y: Input | None = None,
) -> NDArrayFloat:
- if isinstance(x, FData) or isinstance(y, FData):
- raise ValueError('Not defined for FData objects.')
-
"""Evaluate the covariance function.
Args:
@@ -879,6 +891,9 @@ def __call__(
Returns:
Covariance function evaluated at the grid formed by x and y.
"""
+ if isinstance(x, FData) or isinstance(y, FData):
+ raise ValueError('Not defined for FData objects.')
+
return self.cov_fdata([x, y], grid=True)[0, ..., 0]
diff --git a/skfda/tests/test_covariances.py b/skfda/tests/test_covariances.py
index c28019afc..ebf78f20d 100644
--- a/skfda/tests/test_covariances.py
+++ b/skfda/tests/test_covariances.py
@@ -60,7 +60,11 @@ def covariances_raise_fixture(request: Any) -> Any:
@pytest.fixture
def fdatabasis_data() -> Tuple[FDataBasis, FDataBasis]:
- """Fixture for getting fdatabasis objects."""
+ """Fixture for getting fdatabasis objects.
+
+ The dataset is used to test manual calculations of the covariance functions
+ against the implementation.
+ """
basis = MonomialBasis(
n_basis=2,
domain_range=(-2, 2),
@@ -141,7 +145,7 @@ def test_covariances(
fetch_functional_data: FDataGrid,
covariances_fixture: cov.Covariance,
) -> None:
- """Check that invalid parameters in fit raise exception."""
+ """Check that parameter conversion is done correctly."""
fd = fetch_functional_data
cov_kernel = covariances_fixture
@@ -160,11 +164,15 @@ def test_raises(
fetch_functional_data: FDataGrid,
covariances_raise_fixture: Any,
) -> None:
- """Check that it raises a ValueError exception."""
+ """Check raises ValueError.
+
+ Check that non-functional kernels raise a ValueError exception
+ with functional data.
+ """
fd = fetch_functional_data
cov_kernel = covariances_raise_fixture
- np.testing.assert_raises(
+ pytest.raises(
ValueError,
cov_kernel,
fd,
From b18326967aa50036cf3082dc76e937cb84ac1c70 Mon Sep 17 00:00:00 2001
From: E105D104U125
Date: Thu, 4 Jul 2024 12:32:12 +0200
Subject: [PATCH 09/14] Covariance_kernels pullrequest changes incorporated.
---
skfda/misc/covariances.py | 58 +++++++++++++++++++++++----------------
1 file changed, 35 insertions(+), 23 deletions(-)
diff --git a/skfda/misc/covariances.py b/skfda/misc/covariances.py
index d5d7a1abd..a36142afb 100644
--- a/skfda/misc/covariances.py
+++ b/skfda/misc/covariances.py
@@ -181,7 +181,10 @@ def _repr_html_(self) -> str:
row_style = ''
- def column_style(percent: float, margin_top: str = "0") -> str:
+ def column_style( # noqa: WPS430
+ percent: float,
+ margin_top: str = "0",
+ ) -> str:
return (
f'style="display: inline-block; '
f'margin:0; '
@@ -205,7 +208,7 @@ def column_style(percent: float, margin_top: str = "0") -> str:
{heatmap}
- """
+ """ # noqa: WPS432
def to_sklearn(self) -> sklearn_kern.Kernel:
"""Convert it to a sklearn kernel, if there is one."""
@@ -291,12 +294,11 @@ def __call__(
'Brownian covariance not defined for FData objects.',
)
- xx: NDArray[Any]
- yy: NDArray[Any]
- xx, yy = self._param_check_and_transform(x, y)
+ x = _transform_to_2d(x)
+ y = _transform_to_2d(y)
- xx = xx - self.origin
- yy = yy - self.origin
+ x = x - self.origin
+ y = y - self.origin
sum_norms = np.add.outer(
np.linalg.norm(x, axis=-1),
@@ -380,6 +382,7 @@ def __call__(
return self.variance * (x_y + self.intercept)
def to_sklearn(self) -> sklearn_kern.Kernel:
+ """Obtain corresponding scikit-learn kernel type."""
return (
self.variance
* (sklearn_kern.DotProduct(0) + self.intercept)
@@ -469,6 +472,7 @@ def __call__(
)
def to_sklearn(self) -> sklearn_kern.Kernel:
+ """Obtain corresponding scikit-learn kernel type."""
return (
self.variance
* (self.slope * sklearn_kern.DotProduct(0) + self.intercept)
@@ -548,6 +552,7 @@ def __call__(
)
def to_sklearn(self) -> sklearn_kern.Kernel:
+ """Obtain corresponding scikit-learn kernel type."""
return (
self.variance * sklearn_kern.RBF(length_scale=self.length_scale)
)
@@ -625,9 +630,13 @@ def __call__(
x, y = self._param_check_and_transform(x, y)
distance_x_y = PairwiseMetric(l2_distance)(x, y)
- return self.variance * np.exp(-distance_x_y / (self.length_scale))
+ return self.variance * np.exp( # type: ignore[no-any-return]
+ -distance_x_y
+ / (self.length_scale),
+ )
def to_sklearn(self) -> sklearn_kern.Kernel:
+ """Obtain corresponding scikit-learn kernel type."""
return (
self.variance
* sklearn_kern.Matern(length_scale=self.length_scale, nu=0.5)
@@ -704,6 +713,7 @@ def __call__(
return self.variance * np.eye(x.shape[0])
def to_sklearn(self) -> sklearn_kern.Kernel:
+ """Obtain corresponding scikit-learn kernel type."""
return sklearn_kern.WhiteKernel(noise_level=self.variance)
@@ -756,6 +766,7 @@ class Matern(Covariance):
Matern()
"""
+
_latex_formula = (
r"K(x, x') = \sigma^2 \frac{2^{1-\nu}}{\Gamma(\nu)}"
r"\left( \frac{\sqrt{2\nu}|x - x'|}{l} \right)^{\nu}"
@@ -818,23 +829,24 @@ def __call__(
-distance_x_y ** 2 / (2 * self.length_scale ** 2),
)
)
- else:
- # General formula
- scaling = 2**(1 - self.nu) / gamma(self.nu)
- body = np.sqrt(2 * self.nu) * distance_x_y / self.length_scale
- power = body**self.nu
- bessel = kv(self.nu, body)
-
- with np.errstate(invalid='ignore'):
- eval_cov = self.variance * scaling * power * bessel
-
- # Values with nan are where the distance is 0
- return np.nan_to_num( # type: ignore[no-any-return]
- eval_cov,
- nan=self.variance,
- )
+
+ # General formula
+ scaling = 2**(1 - self.nu) / gamma(self.nu)
+ body = np.sqrt(2 * self.nu) * distance_x_y / self.length_scale
+ power = body**self.nu
+ bessel = kv(self.nu, body)
+
+ with np.errstate(invalid='ignore'):
+ eval_cov = self.variance * scaling * power * bessel
+
+ # Values with nan are where the distance is 0
+ return np.nan_to_num( # type: ignore[no-any-return]
+ eval_cov,
+ nan=self.variance,
+ )
def to_sklearn(self) -> sklearn_kern.Kernel:
+ """Obtain corresponding scikit-learn kernel type."""
return (
self.variance
* sklearn_kern.Matern(length_scale=self.length_scale, nu=self.nu)
From fb79ed1bc4fbfc7c3b9fd2b75f41dc49e62dccd1 Mon Sep 17 00:00:00 2001
From: E105D104U125
Date: Mon, 8 Jul 2024 11:33:44 +0200
Subject: [PATCH 10/14] Merge develop into covariance_kernels
---
skfda/misc/covariances.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/skfda/misc/covariances.py b/skfda/misc/covariances.py
index a36142afb..c130fc454 100644
--- a/skfda/misc/covariances.py
+++ b/skfda/misc/covariances.py
@@ -730,7 +730,7 @@ class Matern(Covariance):
where :math:`\sigma^2` is the variance, :math:`l` is the length scale
and :math:`\nu` controls the smoothness of the related Gaussian process.
- The trajectories of a Gaussian process with Matérn covariance is
+ The trajectories of a Gaussian process with Matérn covariance is
:math:`\lceil \nu \rceil - 1` times differentiable.
From 4e9db76673313716bfb8f9178c2d3daebee6d902 Mon Sep 17 00:00:00 2001
From: E105D104U125
Date: Mon, 8 Jul 2024 12:22:42 +0200
Subject: [PATCH 11/14] Refactored covariances test module and corrected style
problems.
---
skfda/tests/test_covariances.py | 252 ++++++++++++++------------------
1 file changed, 110 insertions(+), 142 deletions(-)
diff --git a/skfda/tests/test_covariances.py b/skfda/tests/test_covariances.py
index ebf78f20d..1e8550564 100644
--- a/skfda/tests/test_covariances.py
+++ b/skfda/tests/test_covariances.py
@@ -1,5 +1,5 @@
"""Tests for Covariance module."""
-from typing import Any, Tuple
+from typing import Any
import numpy as np
import pytest
@@ -21,14 +21,27 @@ def _test_compare_sklearn(
np.testing.assert_array_almost_equal(cov_matrix, cov_sklearn_matrix)
+###############################################################################
+# Example datasets for which to calculate the evaluation of the kernel by hand
+# to compare it against the results yielded by the implementation.
+###############################################################################
+
+
+basis = MonomialBasis(n_basis=2, domain_range=(-2, 2))
+
+fd = [
+ FDataBasis(basis=basis, coefficients=[[1, 0], [1, 2]]),
+ FDataBasis(basis=basis, coefficients=[[0, 1]]),
+]
+
##############################################################################
# Fixtures
##############################################################################
@pytest.fixture
-def fetch_functional_data() -> FDataGrid:
- """Fixture for loading the phoneme dataset example."""
+def fetch_weather_subset() -> FDataGrid:
+ """Fixture for loading the canadian weather dataset example."""
fd, _ = fetch_weather(return_X_y=True)
return fd[:20]
@@ -58,34 +71,46 @@ def covariances_raise_fixture(request: Any) -> Any:
return request.param
-@pytest.fixture
-def fdatabasis_data() -> Tuple[FDataBasis, FDataBasis]:
+@pytest.fixture(
+ params=[
+ [
+ cov.Linear(variance=1 / 2, intercept=3),
+ np.array([[3 / 2], [3 / 2 + 32 / 6]]),
+ ],
+ [
+ cov.Polynomial(variance=1 / 3, slope=2, intercept=1, degree=2),
+ np.array([[1 / 3], [67**2 / 3**3]]),
+ ],
+ [
+ cov.Gaussian(variance=3, length_scale=2),
+ np.array([[3 * np.exp(-7 / 6)], [3 * np.exp(-7 / 6)]]),
+ ],
+ [
+ cov.Exponential(variance=4, length_scale=5),
+ np.array([
+ [4 * np.exp(-np.sqrt(28 / 3) / 5)],
+ [4 * np.exp(-np.sqrt(28 / 3) / 5)],
+ ]),
+ ],
+ [
+ cov.Matern(variance=2, length_scale=3, nu=2),
+ np.array([
+ [(2 / 3) ** 2 * (28 / 3) * 0.239775899566],
+ [(2 / 3) ** 2 * (28 / 3) * 0.239775899566],
+ ]),
+ ],
+ ],
+)
+def precalc_example_data(
+ request: Any,
+) -> list[FDataBasis, FDataBasis, cov.Covariance, np.array]:
"""Fixture for getting fdatabasis objects.
The dataset is used to test manual calculations of the covariance functions
against the implementation.
"""
- basis = MonomialBasis(
- n_basis=2,
- domain_range=(-2, 2),
- )
-
- fd1 = FDataBasis(
- basis=basis,
- coefficients=[
- [1, 0],
- [1, 2],
- ],
- )
-
- fd2 = FDataBasis(
- basis=basis,
- coefficients=[
- [0, 1],
- ],
- )
-
- return fd1, fd2
+ # First fd, Second fd, kernel used, result
+ return *fd, *request.param
@pytest.fixture
@@ -96,39 +121,44 @@ def multivariate_data() -> np.array:
@pytest.fixture(
params=[
- (cov.Linear,
- {
- "variance": [1, 2],
- "intercept": [3, 4],
- },
- ),
- (cov.Polynomial,
- {
- "variance": [2],
- "intercept": [0, 2],
- "slope": [1, 2],
- "degree": [1, 2, 3],
- },
- ),
- (cov.Exponential,
- {
- "variance": [1, 2],
- "length_scale": [0.5, 1, 2],
- },
- ),
- (cov.Gaussian,
- {
- "variance": [1, 2],
- "length_scale": [0.5, 1, 2],
- },
- ),
- (cov.Matern,
- {
- "variance": [2],
- "length_scale": [0.5],
- "nu": [0.5, 1, 1.5, 2.5, 3.5, np.inf],
- },
- ),
+ [
+ cov.Linear,
+ {
+ "variance": [1, 2],
+ "intercept": [3, 4],
+ },
+ ],
+ [
+ cov.Polynomial,
+ {
+ "variance": [2],
+ "intercept": [0, 2],
+ "slope": [1, 2],
+ "degree": [1, 2, 3],
+ },
+ ],
+ [
+ cov.Exponential,
+ {
+ "variance": [1, 2],
+ "length_scale": [0.5, 1, 2],
+ },
+ ],
+ [
+ cov.Gaussian,
+ {
+ "variance": [1, 2],
+ "length_scale": [0.5, 1, 2],
+ },
+ ],
+ [
+ cov.Matern,
+ {
+ "variance": [2],
+ "length_scale": [0.5],
+ "nu": [0.5, 1, 1.5, 2.5, 3.5, np.inf],
+ },
+ ],
],
)
def covariance_and_params(request: Any) -> Any:
@@ -142,11 +172,11 @@ def covariance_and_params(request: Any) -> Any:
def test_covariances(
- fetch_functional_data: FDataGrid,
+ fetch_weather_subset: FDataGrid,
covariances_fixture: cov.Covariance,
) -> None:
"""Check that parameter conversion is done correctly."""
- fd = fetch_functional_data
+ fd = fetch_weather_subset
cov_kernel = covariances_fixture
# Also test that it does not fail
@@ -161,7 +191,7 @@ def test_covariances(
def test_raises(
- fetch_functional_data: FDataGrid,
+ fetch_weather_subset: FDataGrid,
covariances_raise_fixture: Any,
) -> None:
"""Check raises ValueError.
@@ -169,7 +199,7 @@ def test_raises(
Check that non-functional kernels raise a ValueError exception
with functional data.
"""
- fd = fetch_functional_data
+ fd = fetch_weather_subset
cov_kernel = covariances_raise_fixture
pytest.raises(
@@ -179,86 +209,24 @@ def test_raises(
)
-def test_fdatabasis_example_linear(
- fdatabasis_data: Tuple[FDataBasis, FDataBasis],
-) -> None:
- """Check a precalculated example for Linear covariance kernel."""
- fd1, fd2 = fdatabasis_data
- res1 = cov.Linear(variance=1 / 2, intercept=3)(fd1, fd2)
- res2 = np.array([[3 / 2], [3 / 2 + 32 / 6]])
- np.testing.assert_allclose(
- res1,
- res2,
- rtol=1e-6,
- )
-
-
-def test_fdatabasis_example_polynomial(
- fdatabasis_data: Tuple[FDataBasis, FDataBasis],
-) -> None:
- """Check a precalculated example for Polynomial covariance kernel."""
- fd1, fd2 = fdatabasis_data
- res1 = cov.Polynomial(
- variance=1 / 3,
- slope=2,
- intercept=1,
- degree=2,
- )(fd1, fd2)
- res2 = np.array([[1 / 3], [67**2 / 3**3]])
- np.testing.assert_allclose(
- res1,
- res2,
- rtol=1e-6,
- )
-
-
-def test_fdatabasis_example_gaussian(
- fdatabasis_data: Tuple[FDataBasis, FDataBasis],
-) -> None:
- """Check a precalculated example for Gaussian covariance kernel."""
- fd1, fd2 = fdatabasis_data
- res1 = cov.Gaussian(variance=3, length_scale=2)(fd1, fd2)
- res2 = np.array([
- [3 * np.exp(-7 / 6)],
- [3 * np.exp(-7 / 6)],
- ])
- np.testing.assert_allclose(
- res1,
- res2,
- rtol=1e-6,
- )
-
-
-def test_fdatabasis_example_exponential(
- fdatabasis_data: Tuple[FDataBasis, FDataBasis],
-) -> None:
- """Check a precalculated example for Exponential covariance kernel."""
- fd1, fd2 = fdatabasis_data
- res1 = cov.Exponential(variance=4, length_scale=5)(fd1, fd2)
- res2 = np.array([
- [4 * np.exp(-np.sqrt(28 / 3) / 5)],
- [4 * np.exp(-np.sqrt(28 / 3) / 5)],
- ])
- np.testing.assert_allclose(
- res1,
- res2,
- rtol=1e-6,
- )
-
-
-def test_fdatabasis_example_matern(
- fdatabasis_data: Tuple[FDataBasis, FDataBasis],
-) -> None:
- """Check a precalculated example for Matern covariance kernel."""
- fd1, fd2 = fdatabasis_data
- res1 = cov.Matern(variance=2, length_scale=3, nu=2)(fd1, fd2)
- res2 = np.array([
- [(2 / 3) ** 2 * (28 / 3) * 0.239775899566],
- [(2 / 3) ** 2 * (28 / 3) * 0.239775899566],
- ])
+def test_precalc_example(
+ precalc_example_data: list[ # noqa: WPS320
+ FDataBasis, FDataBasis, cov.Covariance, np.array,
+ ],
+):
+ """Check the precalculated example for Linear covariance kernel.
+
+ Compare the theoretical precalculated results against the covariance kernel
+ implementation, for different kernels.
+ The structure of the input is a list containing:
+ [First functional dataset, Second functional dataset,
+ Covariance kernel used, Result]
+ """
+ fd1, fd2, kernel, precalc_result = precalc_example_data
+ computed_result = kernel(fd1, fd2)
np.testing.assert_allclose(
- res1,
- res2,
+ computed_result,
+ precalc_result,
rtol=1e-6,
)
From 26c0e73b82fae6e76e645c25488e70db2a523e31 Mon Sep 17 00:00:00 2001
From: E105D104U125
Date: Mon, 8 Jul 2024 12:28:17 +0200
Subject: [PATCH 12/14] Changes in style
---
skfda/misc/covariances.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/skfda/misc/covariances.py b/skfda/misc/covariances.py
index c130fc454..00baa7e66 100644
--- a/skfda/misc/covariances.py
+++ b/skfda/misc/covariances.py
@@ -208,7 +208,7 @@ def column_style( # noqa: WPS430
{heatmap}
- """ # noqa: WPS432
+ """ # noqa: WPS432, WPS318
def to_sklearn(self) -> sklearn_kern.Kernel:
"""Convert it to a sklearn kernel, if there is one."""
From 85c0dccf75df25927fa027f80770e4b368c5973e Mon Sep 17 00:00:00 2001
From: E105D104U125
Date: Tue, 9 Jul 2024 18:53:14 +0200
Subject: [PATCH 13/14] Add FDM feature, tests and doc example.
---
docs/modules/preprocessing/dim_reduction.rst | 1 +
docs/refs.bib | 12 +
examples/plot_fdm.py | 412 ++++++++++++++++++
skfda/preprocessing/dim_reduction/__init__.py | 2 +
skfda/preprocessing/dim_reduction/_fdm.py | 230 ++++++++++
skfda/tests/test_fdm.py | 351 +++++++++++++++
6 files changed, 1008 insertions(+)
create mode 100644 examples/plot_fdm.py
create mode 100644 skfda/preprocessing/dim_reduction/_fdm.py
create mode 100644 skfda/tests/test_fdm.py
diff --git a/docs/modules/preprocessing/dim_reduction.rst b/docs/modules/preprocessing/dim_reduction.rst
index d78d8705c..7bf81dc4c 100644
--- a/docs/modules/preprocessing/dim_reduction.rst
+++ b/docs/modules/preprocessing/dim_reduction.rst
@@ -43,5 +43,6 @@ of the covariance between the two data blocks.
.. autosummary::
:toctree: autosummary
+ skfda.preprocessing.dim_reduction.FDM
skfda.preprocessing.dim_reduction.FPCA
skfda.preprocessing.dim_reduction.FPLS
\ No newline at end of file
diff --git a/docs/refs.bib b/docs/refs.bib
index 005bc0112..cc88606cc 100644
--- a/docs/refs.bib
+++ b/docs/refs.bib
@@ -31,6 +31,18 @@ @article{baillo+grane_2009_local
keywords = {62G08,62G30,Cross-validation,Fourier expansion,Functional data,Kernel regression,Local linear regression,Nonparametric smoothing}
}
+@article{barroso++_2023_fdm,
+ author = {M. Barroso and C. M. Alaíz and J. L. Torrecilla and A. Fernández},
+ title = {Functional diffusion maps},
+ journal = {Statistics and Computing},
+ year = {2023},
+ volume = {34},
+ number = {1},
+ pages = {22},
+ doi = {10.1007/s11222-023-10332-1},
+ url = {https://doi.org/10.1007/s11222-023-10332-1}
+}
+
@article{berrendero++_2016_mrmr,
title = {The {{mRMR}} Variable Selection Method: A Comparative Study for Functional Data},
shorttitle = {The {{mRMR}} Variable Selection Method},
diff --git a/examples/plot_fdm.py b/examples/plot_fdm.py
new file mode 100644
index 000000000..a75efdeef
--- /dev/null
+++ b/examples/plot_fdm.py
@@ -0,0 +1,412 @@
+"""
+Functional Diffusion Maps
+============================================================================
+In this example, the use of the Functional Diffusion Map (FDM) technique is
+shown over different datasets.
+Firstly, an example of basic use of the technique is provided.
+Later, two examples of parameter tuning are presented, for embedding spaces
+of diferent dimensions.
+Finally, an application of the method for a real, non-synthetic, dataset is
+provided.
+"""
+
+# Author: Eduardo Terrés Caballero
+# License: MIT
+
+from itertools import product
+
+import matplotlib.pyplot as plt
+import numpy as np
+from matplotlib.colors import ListedColormap
+from sklearn import datasets
+
+from skfda.datasets import fetch_phoneme
+from skfda.misc.covariances import Gaussian
+from skfda.preprocessing.dim_reduction import FDM
+from skfda.representation import FDataGrid
+
+random_state = 0
+
+####################################################################
+# Some examples shown here are further explained in the
+# article :footcite:t:`barroso++_2023_fdm`.
+
+
+####################################################################
+# **MOONS DATASET EXAMPLE**
+#
+# Firstly, a basic example of execution is presented using a functional version
+# of the moons dataset, a dataset consisting of two dimentional coordinates
+# representing the position of two different moons.
+n_samples, n_grid_pts = 100, 50
+data_moons, y = datasets.make_moons(
+ n_samples=n_samples,
+ noise=0,
+ random_state=random_state,
+)
+
+colors = ["blue", "orange"]
+cmap = ListedColormap(colors)
+plt.scatter(data_moons[:, 0], data_moons[:, 1], c=y, cmap=cmap)
+plt.title("Moons data")
+plt.show()
+
+####################################################################
+# Using a two elements basis, the functional observation corresponding
+# to a multivariate observation is obtained by treating the coordinates
+# as coefficients that multiply the elements of the basis.
+# In other words, the multivariate vectors are interpreted as elements
+# of the basis.
+# Below is the code to generate the synthetic moons functional data.
+grid = np.linspace(-np.pi, np.pi, n_grid_pts)
+basis = np.array([np.sin(4 * grid), grid ** 2 + 2 * grid - 2])
+fd_moons = FDataGrid(
+ data_matrix=data_moons @ basis,
+ grid_points=grid,
+ dataset_name="Functional moons data",
+ argument_names=("x",),
+ coordinate_names=("f (x)",),
+)
+fd_moons.plot(linewidth=0.5, group=y, group_colors=colors)
+plt.xlim((-np.pi, np.pi))
+plt.show()
+
+####################################################################
+# Once the functional data is available, it simply remains to choose
+# the value of the parameters of the model.
+#
+# The FDM technique involves the use of a kernel operator, that acts
+# as a measure of similarity for the data. In this case we will be using
+# the Gaussian kernel, with a length scale parameter of 0.25.
+fdm = FDM(
+ n_components=2,
+ kernel=Gaussian(length_scale=0.25),
+ alpha=0,
+ n_steps=1,
+)
+embedding = fdm.fit_transform(fd_moons)
+
+plt.scatter(embedding[:, 0], embedding[:, 1], c=y, cmap=cmap)
+plt.title("Diffusion coordinates for the functional moons data")
+plt.show()
+
+####################################################################
+# As we can see, the functional diffusion map has correctly interpreted
+# the topological nature of the data, by successfully separating the
+# coordinates associated to both moons.
+
+
+####################################################################
+# **SPIRALS DATASET EXAMPLE**
+#
+# Next is an example of parameter tuning in the form of a grid
+# search for a set of given values for the length_scale kernel parameter
+# and the alpha parameter of the FDM method.
+# We have two spirals, which are initially entangled and thus
+# indistinguishable to the machine.
+#
+# Below is the code for the generation of the spiral data and its
+# functional equivalent, following a similar dynamic as in the moons dataset.
+n_samples, n_grid_pts = 100, 50
+t = (np.pi / 4 + np.linspace(0, 4, n_samples)) * np.pi
+dx, dy = 10 * t * np.cos(t), 10 * t * np.sin(t)
+y = np.array([0] * n_samples + [1] * n_samples)
+data_spirals = np.column_stack((
+ np.concatenate((dx, -dx)), np.concatenate((dy, -dy)),
+))
+
+colors = ["yellow", "purple"]
+cmap = ListedColormap(colors)
+plt.scatter(data_spirals[:, 0], data_spirals[:, 1], c=y, cmap=cmap)
+plt.gca().set_aspect("equal", adjustable="box")
+plt.title("Spirals data")
+plt.show()
+
+# Define functional data object
+grid = np.linspace(-np.pi, np.pi, n_grid_pts)
+basis = np.array([grid * np.cos(grid) / 3, grid * np.sin(grid) / 3])
+fd_spirals = FDataGrid(
+ data_matrix=data_spirals @ basis,
+ grid_points=grid,
+ dataset_name="Functional spirals data",
+ argument_names=("x",),
+ coordinate_names=("f (x)",),
+)
+fd_spirals.plot(linewidth=0.5, group=y, group_colors=colors)
+plt.show()
+
+
+####################################################################
+# Once the functional data is ready, we will perform a grid search
+# for the following values of the parameters, as well as plot
+# the resulting embeddings for visual comparison.
+alpha_set = [0, 0.33, 0.66, 1]
+length_scale_set = [2.5, 3, 4.5, 7, 10, 11, 15]
+param_grid = product(alpha_set, length_scale_set)
+
+####################################################################
+fig, axes = plt.subplots(
+ len(alpha_set), len(length_scale_set), figsize=(16, 8),
+)
+
+for (alpha, length_scale), ax in zip(param_grid, axes.ravel()):
+ fdm = FDM(
+ n_components=2,
+ kernel=Gaussian(length_scale=length_scale),
+ alpha=alpha,
+ n_steps=1,
+ )
+ embedding = fdm.fit_transform(fd_spirals)
+
+ ax.scatter(embedding[:, 0], embedding[:, 1], c=y, cmap=cmap)
+ ax.set_xticklabels([])
+ ax.set_yticklabels([])
+
+for ax, alpha in zip(axes[:, 0], alpha_set):
+ ax.set_ylabel(f"$\\alpha$: {alpha}", size=20, rotation=0, ha="right")
+
+for ax, length_scale in zip(axes[0], length_scale_set):
+ ax.set_title(f"$len\_sc$: {length_scale}", size=20, va="bottom")
+
+plt.show()
+
+####################################################################
+# The first thing to notice is that the parameter length scale exerts
+# a greater influence in the resulting embedding than the parameter alpha.
+# In this sense, the figures of any given column are more similar than those
+# of any given row.
+# Thus, we shall set alpha equal to 0 because, by theory, it is
+# equivalent to skipping a normalization step in the process
+#
+# Moreover, we can see that the optimal choice of the length scale parameter
+# of the kernel is 4.5 because it visually presents the more clear separation
+# between the trajectories of both spirals.
+# Hence, for a length scale of the kernel function of 4.5 the method is able
+# to understand the local geometry of the spirals dataset.
+# For a small value of the kernel parameter (for example 1) contiguous points
+# in the same arm of the spiral are not considered close because the kernel is
+# too narrow, resulting in apparently random diffusion coordinates.
+# For a large value of the kernel parameter (for example 15) the kernel is wide
+# enough so that points in contiguous spiral arms, which belong to different
+# trajectories, are considered similar. Hence the diffusion coordinates keep
+# these relations by mantaining both trajectories entagled.
+# In summary, for a value of length scale of 4.5 the kernel is wide enough so
+# that points in the same arm of a trajectory are considered similar,
+# but its not too wide so that points in contiguous arms of the spiral are
+# also considered similar.
+
+####################################################################
+# For a reliable comparison between embeddings, it is advisable to use
+# the same scale in all axis.
+# To ilustrate this idea, next is a re-execution for the row alpha
+# equals 0.
+alpha_set = [0]
+length_scale_set = [2.5, 3, 4.5, 7, 10, 11, 15]
+param_grid = product(alpha_set, length_scale_set)
+
+fig, axes = plt.subplots(
+ len(alpha_set),
+ len(length_scale_set),
+ figsize=(16, 4),
+)
+
+for (alpha, length_scale), ax in zip(param_grid, axes.ravel()):
+ fdm = FDM(
+ n_components=2,
+ kernel=Gaussian(length_scale=length_scale),
+ alpha=alpha,
+ n_steps=1,
+ )
+ embedding = fdm.fit_transform(fd_spirals)
+
+ ax.scatter(embedding[:, 0], embedding[:, 1], c=y, cmap=cmap)
+ ax.set_xlim((-0.4, 0.4))
+ ax.set_ylim((-0.4, 0.4))
+
+axes[0].set_ylabel(
+ f"$\\alpha$: {alpha_set[0]}", size=20, rotation=0, ha="right",
+)
+
+for ax, length_scale in zip(axes, length_scale_set):
+ ax.set_title(f"$len\_sc$: {length_scale}", size=20, va="bottom")
+
+plt.show()
+
+####################################################################
+# **SWISS ROLL DATASET EXAMPLE**
+#
+# So far, the above examples have been computed with a value of the
+# n_components parameter of 2. This implies that the resulting
+# diffusion coordinate points belong to a two-dimensional space and thus we
+# can provide a graphical representation.
+# The aim of this new section is to explore further possibilities
+# regarding n_components.
+#
+# We will now apply the method to a more complex example, the
+# Swiss roll dataset. This dataset consists of three dimensional points
+# that lay over a topological manifold shaped like a Swiss roll.
+n_samples, n_grid_pts = 500, 100
+data_swiss, y = datasets.make_swiss_roll(
+ n_samples=n_samples,
+ noise=0,
+ random_state=random_state,
+)
+fig = plt.figure()
+axis = fig.add_subplot(111, projection="3d")
+axis.set_title("Swiss roll data")
+axis.scatter(data_swiss[:, 0], data_swiss[:, 1], data_swiss[:, 2], c=y)
+plt.show()
+
+####################################################################
+# Similarly to the previous examples, the functional data object is defined.
+# In this case a three element base will be used, since the multivariate data
+# points belong to a three-dimensional space.
+# For clarity purposes, only the first fifty functional observations are
+# plotted.
+grid = np.linspace(-np.pi, np.pi, n_grid_pts)
+basis = np.array([np.sin(4 * grid), np.cos(8 * grid), np.sin(12 * grid)])
+data_matrix = np.array(data_swiss) @ basis
+fd_swiss = FDataGrid(
+ data_matrix=data_matrix,
+ grid_points=grid,
+ dataset_name="Functional Swiss roll data",
+ argument_names=("x",),
+ coordinate_names=("f (x)",),
+)
+fd_swiss[:50].plot(linewidth=0.5, group=y[:50])
+plt.show()
+
+
+####################################################################
+# Now, the FDM method will be applied for different values of the
+# parameters, again in the form of a grid search.
+# Note that the diffusion coordinates will now consist of three components.
+alpha_set = [0, 0.5, 1]
+length_scale_set = [1.5, 2.5, 4, 5]
+param_grid = product(alpha_set, length_scale_set)
+
+####################################################################
+fig, axes = plt.subplots(
+ len(alpha_set),
+ len(length_scale_set),
+ figsize=(16, 8),
+ subplot_kw={"projection": "3d"},
+)
+
+for (alpha, length_scale), ax in zip(param_grid, axes.ravel()):
+ fdm = FDM(
+ n_components=3,
+ kernel=Gaussian(length_scale=length_scale),
+ alpha=alpha,
+ n_steps=1,
+ )
+ embedding = fdm.fit_transform(fd_swiss)
+
+ ax.scatter(embedding[:, 0], embedding[:, 1], embedding[:, 2], c=y)
+ ax.set_xticklabels([])
+ ax.set_yticklabels([])
+ ax.set_zticklabels([])
+ ax.set_title(f"$\\alpha$: {alpha} $len\_sc$: {length_scale}")
+
+plt.show()
+
+####################################################################
+# Let's take a closer look at the resulting embedding for a value
+# of length_scale and alpha equal to 2.5 and 0, respectively.
+alpha, length_scale = 0, 2.5
+fdm = FDM(
+ n_components=3,
+ kernel=Gaussian(length_scale=length_scale),
+ alpha=alpha,
+ n_steps=1,
+)
+embedding = fdm.fit_transform(fd_swiss)
+
+fig = plt.figure()
+ax = fig.add_subplot(111, projection="3d")
+ax.scatter(embedding[:, 0], embedding[:, 1], embedding[:, 2], c=y)
+ax.set_title(
+ "Diffusion coordinates for \n"
+ f"$\\alpha$: {alpha} $len\_sc$: {length_scale}",
+)
+plt.show()
+
+####################################################################
+# The election of the optimal parameters is relative to the problem at hand.
+# The goal behind choosing values of length_scale equal to 2.5 and alpha equal
+# to 0 is to obtain a unrolled transformation to the Swiss roll.
+# Note that in the roll there are pairs of points whose euclidean
+# distance is small but whose shortest path contained in the
+# manifold is significantly larger, since it must complete an entire loop.
+#
+# In this sense, the process happens to have taken into account the
+# shortest path distance rather than the euclidean one. Thus, one may argue
+# that the topological nature of the data has been respected.
+# This new diffusion coordinates could be useful to gain more insights into
+# the initial data through further analysis.
+
+#########################################################################
+# **REAL DATASET: PHONEME**
+#
+# The aim of this section is to provide an example of application of
+# the FDM method to a non-synthetic dataset.
+# Below is an example of execution using the phoneme dataset,
+# a dataset consisting of the computed log-periodogram for five distinct
+# phonemes coming from recorded male speech from the TIMIT database.
+n_samples = 300
+colors = ["C0", "C1", "C2", "C3", "C4"]
+group_names = ["aa", "ao", "dcl", "iy", "sh"]
+
+# Fetch phoneme dataset
+fd_phoneme, y = fetch_phoneme(return_X_y=True)
+fd_phoneme, y = fd_phoneme[:n_samples], y[:n_samples]
+fd_phoneme.plot(
+ linewidth=0.7,
+ group=y,
+ group_colors=colors,
+ group_names=group_names,
+ legend=True,
+)
+plt.show()
+
+####################################################################
+# The resulting diffusion coordinates in three dimensions will be
+# plotted, using different views to better understand the plot.
+cmap = ListedColormap(colors)
+alpha, length_scale = 1, 10
+fdm = FDM(
+ n_components=3,
+ kernel=Gaussian(length_scale=length_scale),
+ alpha=alpha,
+ n_steps=1,
+)
+diffusion_coord = fdm.fit_transform(fd_phoneme)
+
+# Plot three views of the diffusion coordinates
+view_points = [(30, 70), (28, 0), (10, -120)]
+
+fig, axes = plt.subplots(
+ 1, len(view_points), figsize=(18, 6), subplot_kw={"projection": "3d"},
+)
+
+for view, ax in zip(view_points, axes.ravel()):
+ ax.scatter(
+ diffusion_coord[:, 0], diffusion_coord[:, 1], diffusion_coord[:, 2],
+ c=y, cmap=cmap,
+ )
+ ax.view_init(*view)
+ ax.set_title(f"View {view}", fontsize=26)
+plt.show()
+
+####################################################################
+# We can see that the diffusion coordinates for the different phonemes
+# have been clustered in the 3D space. This representation enables a
+# more clear separation of the data into the different phoneme groups.
+# In this way, the phonemes groups that are similar to each other, namely /aa/
+# and /ao/ are closer in the space. In fact, these two groups partly overlap
+# (orange and blue).
+
+
+###############################################################################
+# **References:**
+# .. footbibliography::
diff --git a/skfda/preprocessing/dim_reduction/__init__.py b/skfda/preprocessing/dim_reduction/__init__.py
index 5c8ae8405..6aad2fca8 100644
--- a/skfda/preprocessing/dim_reduction/__init__.py
+++ b/skfda/preprocessing/dim_reduction/__init__.py
@@ -12,6 +12,7 @@
"variable_selection",
],
submod_attrs={
+ "_fdm": ["FDM"],
"_fpca": ["FPCA"],
"_fpls": ["FPLS"],
"_neighbor_transforms": ["KNeighborsTransformer"],
@@ -19,6 +20,7 @@
)
if TYPE_CHECKING:
+ from ._fdm import FDM as FDM
from ._fpca import FPCA as FPCA
from ._fpls import FPLS as FPLS
from ._neighbor_transforms import (
diff --git a/skfda/preprocessing/dim_reduction/_fdm.py b/skfda/preprocessing/dim_reduction/_fdm.py
new file mode 100644
index 000000000..490fbd604
--- /dev/null
+++ b/skfda/preprocessing/dim_reduction/_fdm.py
@@ -0,0 +1,230 @@
+"""Functional Diffusion Maps Module."""
+from __future__ import annotations
+
+from typing import Callable
+
+import numpy as np
+import scipy
+from sklearn.utils.extmath import svd_flip
+from typing_extensions import Self
+
+from ..._utils._sklearn_adapter import BaseEstimator, InductiveTransformerMixin
+from ...representation import FData
+from ...typing._numpy import NDArrayFloat
+
+KernelFunction = Callable[[FData, FData | None], NDArrayFloat]
+
+
+class FDM(
+ BaseEstimator,
+ InductiveTransformerMixin[FData, NDArrayFloat, object],
+):
+ r"""
+ Functional diffusion maps.
+
+ Class that implements functional diffusion maps for both
+ basis and grid representations of the data.
+
+ Parameters:
+ n_components: Dimension of the space where the embedded
+ functional data belongs to. For visualization of the
+ data purposes, a value of 2 or 3 shall be used.
+ kernel: kernel function used over the functional observations.
+ It serves as a measure of connectivity or similitude between
+ points, where higher value means greater connectivity.
+ alpha: density parameter in the interval [0, 1] used in the
+ normalization step. A value of 0 means the data distribution
+ is not taken into account during the normalization step.
+ The opposite holds for a higher value of alpha.
+ n_steps: Number of steps in the random walk.
+
+
+ Attributes:
+ transition_matrix\_: trasition matrix computed from the data.
+ eigenvalues\_: highest n_components eigenvalues of transition_matrix\_
+ in descending order starting from the second highest.
+ eigenvectors\_right\_: right eigenvectors of transition\_matrix\_
+ corresponding to eigenvalues\_.
+ d\_alpha\_: vector of densities of the weigthed graph.
+ training\_dataset\_: dataset used for training the method. It is needed
+ for the transform method.
+
+ Examples:
+ In this example we fetch the Canadian weather dataset and divide it
+ into train and test sets. We then obtain the diffusion coordinates
+ for the train set and predict these coordinates for the test set.
+
+ >>> from skfda.datasets import fetch_weather
+ >>> from skfda.representation import FDataGrid
+ >>> from skfda.misc.covariances import Gaussian
+ >>> X, y = fetch_weather(return_X_y=True, as_frame=True)
+ >>> fd : FDataGrid = X.iloc[:, 0].values
+ >>> fd_train = fd[:25]
+ >>> fd_test = fd[25:]
+ >>> fdm = FDM(
+ ... n_components=2,
+ ... kernel=Gaussian(variance=1, length_scale=1),
+ ... alpha=1,
+ ... n_steps=1
+ ... )
+ >>> embedding_train = fdm.fit_transform(X=fd_train)
+ >>> embedding_test = fdm.transform(X=fd_test)
+
+ Notes:
+ Performing fit and transform actions is not equivalent to performing
+ fit_transform. In the former case an approximation of the diffusion
+ coordinates is computed via the Nyström method. In the latter, the
+ true diffusion coordinates are computed.
+ """
+
+ def __init__(
+ self,
+ *,
+ n_components: int = 2,
+ kernel: KernelFunction,
+ alpha: float = 0,
+ n_steps: int = 1,
+ ) -> None:
+ self.n_components = n_components
+ self.kernel = kernel
+ self.alpha = alpha
+ self.n_steps = n_steps
+
+ def fit( # noqa: WPS238
+ self,
+ X: FData,
+ y: object = None,
+ ) -> Self:
+ """
+ Compute the transition matrix and save it.
+
+ Args:
+ X: Functional data for which to obtain diffusion coordinates.
+ y: Ignored.
+
+ Returns:
+ self
+
+ """
+ # Parameter validation
+ if self.n_components < 1:
+ raise ValueError(
+ f'Embedding dimension ({self.n_components}) cannot '
+ 'be less than 1. ',
+ )
+
+ if self.n_components >= X.n_samples:
+ raise ValueError(
+ f'Embedding dimension ({self.n_components}) cannot be '
+ f'greater or equal to the number of samples ({X.n_samples}).',
+ )
+
+ if self.alpha < 0 or self.alpha > 1:
+ raise ValueError(
+ f'Parameter alpha (= {self.alpha}) must '
+ 'be in the interval [0, 1].',
+ )
+
+ if self.n_steps < 0:
+ raise ValueError(
+ f'Parameter n_steps (= {self.n_steps}) must ',
+ 'be a greater than 0.',
+ )
+
+ # Construct the weighted graph
+ weighted_graph = self.kernel(X, X)
+
+ # Compute density of each vertex by adding the rows
+ self.d_alpha_ = np.sum(weighted_graph, axis=1) ** self.alpha
+
+ # Construct the normalized graph
+ norm_w_graph = weighted_graph / np.outer(self.d_alpha_, self.d_alpha_)
+
+ rows_sum = np.sum(norm_w_graph, axis=1)
+ self.transition_matrix_ = norm_w_graph / rows_sum[:, np.newaxis]
+
+ eigenvalues, eigenvectors_right_ = scipy.linalg.eig(
+ self.transition_matrix_,
+ )
+
+ # Remove highest eigenvalue and take the n_components
+ # highest eigenvalues in descending order
+ index_order = eigenvalues.argsort()[-2:-self.n_components - 2:-1]
+ eigenvalues = np.real(eigenvalues[index_order])
+ eigenvectors_right, _ = svd_flip(
+ eigenvectors_right_[:, index_order],
+ np.zeros_like(eigenvectors_right_[:, index_order]).T,
+ )
+ self.eigenvalues_ = eigenvalues
+ self.eigenvectors_right_ = eigenvectors_right
+
+ self.training_dataset_ = X
+
+ return self
+
+ def fit_transform(
+ self,
+ X: FData,
+ y: object = None,
+ ) -> NDArrayFloat:
+ """
+ Compute the diffusion coordinates for the functional data X.
+
+ The diffusion coordinate corresponding to the i-th curve is stored
+ in the i-th row of the returning matrix.
+ Note that fit_transform is not equivalent to applying fit and
+ transform.
+
+ Args:
+ X: Functional data for which to obtain diffusion coordinates.
+ y: Ignored.
+
+ Returns:
+ Diffusion coordinates for the functional data X.
+
+ """
+ self.fit(X, y)
+
+ # Compute and return the diffusion map
+ return np.real(
+ self.eigenvectors_right_
+ * self.eigenvalues_ ** self.n_steps,
+ )
+
+ def transform(
+ self,
+ X: FData,
+ ) -> NDArrayFloat:
+ """
+ Compute the diffusion coordinates for the functional data X.
+
+ Compute the diffusion coordinates of out-of-sample data using the
+ eigenvectors and eigenvalues computed during the training.
+
+ Args:
+ X: Functional data for which to predict diffusion coordinates.
+
+ Returns:
+ Diffusion coordinates for the functional data X_out.
+
+ """
+ # Construct the weighted graph crossing the training
+ # and out-of-sample data
+ weighted_graph_out = self.kernel(X, self.training_dataset_)
+
+ # Compute density of each vertex by adding the rows
+ h_alpha = np.sum(weighted_graph_out, axis=1) ** self.alpha
+
+ # Construct the normalized graph
+ norm_w_graph_out = (
+ weighted_graph_out / np.outer(h_alpha, self.d_alpha_)
+ )
+
+ # Define transition matrix by normalizing by rows
+ rows_sum = np.sum(norm_w_graph_out, axis=1)
+ transition_matrix_out = norm_w_graph_out / rows_sum[:, np.newaxis]
+
+ return transition_matrix_out @ ( # type: ignore[no-any-return]
+ self.eigenvectors_right_
+ * self.eigenvalues_ ** (self.n_steps - 1)
+ )
diff --git a/skfda/tests/test_fdm.py b/skfda/tests/test_fdm.py
new file mode 100644
index 000000000..566686bc5
--- /dev/null
+++ b/skfda/tests/test_fdm.py
@@ -0,0 +1,351 @@
+"""Tests for FDM."""
+from typing import Tuple
+
+import numpy as np
+import pytest
+from sklearn import datasets
+
+from skfda import FData, FDataBasis, FDataGrid
+from skfda.misc.covariances import Gaussian
+from skfda.misc.metrics import PairwiseMetric, l1_distance, l2_distance
+from skfda.preprocessing.dim_reduction import FDM
+from skfda.representation.basis import MonomialBasis
+from skfda.typing._numpy import NDArrayFloat
+
+
+def _discretize_fdatabasis(fd_basis: FDataBasis) -> FDataGrid:
+ return fd_basis.to_grid(
+ np.linspace(*fd_basis.basis.domain_range[0], 300),
+ )
+
+
+def rbf_kernel(fd: FData, sigma: float) -> NDArrayFloat:
+ """Calculate the radial basis function kernel.
+
+ Args:
+ fd: Functional data object
+ sigma: locality parameter
+
+ Returns:
+ Computed real value
+ """
+ norm_matrix = PairwiseMetric(l2_distance)(fd)
+ return np.exp( # type: ignore[no-any-return]
+ - np.square(norm_matrix) / (2 * sigma ** 2),
+ )
+
+
+def laplacian_kernel(fd: FData, sigma: float) -> NDArrayFloat:
+ """Calculate the radial basis function kernel.
+
+ Args:
+ fd: Functional data object
+ sigma: locality parameter
+
+ Returns:
+ Computed real value
+ """
+ norm_matrix = PairwiseMetric(l1_distance)(fd)
+ return np.exp(- norm_matrix / sigma) # type: ignore[no-any-return]
+
+
+##############################################################################
+# Fixtures
+##############################################################################
+dummy_fdata = FDataGrid(data_matrix=[[0.9]])
+
+
+@pytest.fixture(
+ params=[
+ FDM(kernel=Gaussian(), alpha=-1),
+ FDM(kernel=Gaussian(), n_components=0),
+ FDM(kernel=Gaussian(), n_components=2),
+ FDM(kernel=Gaussian(), n_steps=0),
+ ],
+)
+def data_grid_param_check(request: FDM) -> Tuple[FDM, FDataGrid]:
+ """Fixture for testing parameter checks."""
+ return request.param, dummy_fdata
+
+
+@pytest.fixture(
+ params=[
+ FDataGrid(
+ data_matrix=[
+ [52, 93, 15],
+ [72, 61, 21],
+ [83, 87, 75],
+ [75, 88, 24],
+ ],
+ grid_points=[0, 1 / 2, 1],
+ ),
+ ],
+)
+def precalculated_fdatagrid_example(
+ request: FDataGrid,
+) -> FDataGrid:
+ """Fixture for loading a precalculated FDataGrid example."""
+ return request.param # type: ignore[no-any-return]
+
+
+@pytest.fixture(
+ params=[
+ (
+ FDataBasis(
+ basis=MonomialBasis(domain_range=(0, 2), n_basis=3),
+ coefficients=[
+ [0, 0, 1],
+ [1, 1, 0],
+ [0, 2, 0],
+ ],
+ ),
+ FDataBasis(
+ basis=MonomialBasis(domain_range=(0, 2), n_basis=3),
+ coefficients=[
+ [-1, 1, 0],
+ ],
+ ),
+ ),
+ ],
+)
+def precalculated_fdatabasis_example(
+ request: FDataBasis,
+) -> Tuple[FDataBasis, FDataGrid, FDataBasis]:
+ """Load FDataBasis example.
+
+ Fixture for loading a prealculated FDataBasis exmaple and discretizing
+ an FDataBasis into a FDataGrid object.
+ """
+ fd_basis: FDataBasis
+ fd_out: FDataBasis
+ fd_basis, fd_out = request.param
+ return (
+ fd_basis,
+ _discretize_fdatabasis(fd_basis),
+ fd_out,
+ )
+
+
+@pytest.fixture
+def moons_functional_dataset() -> FDataGrid:
+ """Fixture for loading moons dataset example."""
+ n_grid_pts, n_samples = 100, 20
+ data_moon, _ = datasets.make_moons(
+ n_samples=n_samples,
+ noise=0.0,
+ random_state=0,
+ )
+ grid = np.linspace(-np.pi, np.pi, n_grid_pts)
+
+ # Basis
+ basis_1 = np.sin(4 * grid)
+ basis_2 = (grid ** 2 + 2 * grid - 2)
+ basis = np.array([basis_1, basis_2])
+
+ # Generate functional data object
+ data_matrix = np.array(data_moon) @ basis
+ return FDataGrid(
+ data_matrix=data_matrix,
+ grid_points=grid,
+ )
+
+##############################################################################
+# Tests
+##############################################################################
+
+
+def test_param_check(data_grid_param_check: Tuple[FDM, FDataGrid]) -> None:
+ """Check that invalid parameters in fit raise exception."""
+ fdm, fd = data_grid_param_check
+
+ pytest.raises(
+ ValueError,
+ fdm.fit,
+ fd,
+ )
+
+
+def test_precalculated_grid_example(
+ precalculated_fdatagrid_example: FDataGrid,
+) -> None:
+ """Compare the embedding in grid against the fda package."""
+ fd = precalculated_fdatagrid_example
+ fdm = FDM(
+ n_components=2,
+ kernel=Gaussian(length_scale=10),
+ alpha=0.9,
+ n_steps=2,
+ )
+ embedding = fdm.fit_transform(fd)
+
+ expected_embedding = [
+ [-0.0513176, 0.4572292],
+ [0.6188756, -0.2537263],
+ [-0.5478710, -0.3488652],
+ [-0.0525144, 0.3176671],
+ ]
+
+ np.testing.assert_allclose(
+ embedding,
+ expected_embedding,
+ atol=1e-7,
+ )
+
+
+def test_precalculated_basis_example(
+ precalculated_fdatabasis_example: Tuple[FDataBasis, FDataGrid, FDataBasis],
+) -> None:
+ """Compare the embedding in basis and grid against the fda package."""
+ fd_basis, fd_grid, _ = precalculated_fdatabasis_example
+
+ fdm_basis = FDM(
+ n_components=2,
+ kernel=Gaussian(),
+ alpha=1,
+ n_steps=2,
+ )
+ embedding_basis = fdm_basis.fit_transform(fd_basis)
+
+ fdm_grid = FDM(
+ n_components=2,
+ kernel=Gaussian(),
+ alpha=1,
+ n_steps=2,
+ )
+ embedding_grid = fdm_grid.fit_transform(fd_grid)
+
+ expected_transition_matrix = [
+ [0.52466, 0.20713, 0.26821],
+ [0.21187, 0.47341, 0.31472],
+ [0.27529, 0.31581, 0.40891],
+ ]
+
+ # Check transition matrix
+ for tran_matrix in (
+ fdm_basis.transition_matrix_,
+ fdm_grid.transition_matrix_,
+ ):
+ np.testing.assert_allclose(
+ tran_matrix,
+ expected_transition_matrix,
+ atol=1e-5,
+ )
+
+ # Check diffusion coordinates
+ expected_embedding = [
+ [0.0687, -0.00285],
+ [-0.05426, -0.00648],
+ [-0.01607, 0.00944],
+ ]
+
+ for embedding in (embedding_basis, embedding_grid):
+ np.testing.assert_allclose(
+ embedding,
+ expected_embedding,
+ atol=1e-5,
+ )
+
+
+def test_nystrom(
+ precalculated_fdatabasis_example: Tuple[FDataBasis, FDataGrid, FDataBasis],
+) -> None:
+ """Test Nystrom method.
+
+ Compare the embedding of out-of-sample points in basis and grid
+ against the fda package via the Nystrom method.
+ """
+ fd_basis, fd_grid, fd_out = precalculated_fdatabasis_example
+
+ fdm_basis = FDM(
+ n_components=2,
+ kernel=Gaussian(),
+ alpha=1,
+ n_steps=2,
+ )
+ embedding_basis = fdm_basis.fit(fd_basis).transform(fd_out)
+
+ fdm_grid = FDM(
+ n_components=2,
+ kernel=Gaussian(),
+ alpha=1,
+ n_steps=2,
+ )
+ embedding_grid = fdm_grid.fit(fd_grid).transform(
+ _discretize_fdatabasis(fd_out),
+ )
+
+ expected_embedding = [
+ [0.156125, -0.021132],
+ ]
+
+ np.testing.assert_allclose(
+ embedding_basis,
+ expected_embedding,
+ atol=1e-3,
+ )
+
+ np.testing.assert_allclose(
+ embedding_grid,
+ expected_embedding,
+ atol=1e-3,
+ )
+
+
+def test_moons_dataset(
+ moons_functional_dataset: FDataGrid,
+) -> None:
+ """Test the embedding for a small version of the moons dataset example."""
+ fdata = moons_functional_dataset
+ alpha, sigma = (1.0, 2.5)
+ fdm = FDM(
+ n_components=2,
+ kernel=Gaussian(length_scale=sigma),
+ alpha=alpha,
+ n_steps=1,
+ )
+ embedding = fdm.fit_transform(fdata)
+
+ expected_embedding = [
+ [-0.07094005, -0.19475867],
+ [0.04054447, -0.21670899],
+ [0.09244529, -0.19569411],
+ [0.07094005, -0.19475867],
+ [0.13443593, -0.12554232],
+ [-0.21055562, 0.01815767],
+ [0.27732511, 0.1802182],
+ [-0.26962763, 0.16043396],
+ [0.29220798, 0.22086707],
+ [0.18339034, -0.04185513],
+ [0.29344042, 0.22420163],
+ [-0.29220798, 0.22086707],
+ [-0.09244529, -0.19569411],
+ [0.21055562, 0.01815767],
+ [-0.27732511, 0.1802182],
+ [-0.04054447, -0.21670899],
+ [0.26962763, 0.16043396],
+ [-0.13443593, -0.12554232],
+ [-0.29344042, 0.22420163],
+ [-0.18339034, -0.04185513],
+ ]
+
+ np.testing.assert_allclose(
+ embedding,
+ expected_embedding,
+ atol=1e-5,
+ )
+
+
+def test_gaussian_covariance(
+ moons_functional_dataset: FDataGrid,
+) -> None:
+ """Test Gaussian covariance in covariance module against RBF kernel."""
+ sigma = 0.1
+ fdata = moons_functional_dataset
+ gaussian_cov_matrix = Gaussian(length_scale=sigma)(fdata)
+ rbf_kernel_matrix = rbf_kernel(fdata, sigma)
+
+ np.testing.assert_allclose(
+ gaussian_cov_matrix,
+ rbf_kernel_matrix,
+ atol=1e-7,
+ )
From eb33f51bb0fce5ed64e08df459d880455baa2ab8 Mon Sep 17 00:00:00 2001
From: E105D104U125
Date: Tue, 9 Jul 2024 19:14:04 +0200
Subject: [PATCH 14/14] Changes in style.
---
examples/plot_fdm.py | 21 +++++++++++++--------
1 file changed, 13 insertions(+), 8 deletions(-)
diff --git a/examples/plot_fdm.py b/examples/plot_fdm.py
index a75efdeef..10e9da21a 100644
--- a/examples/plot_fdm.py
+++ b/examples/plot_fdm.py
@@ -1,3 +1,5 @@
+# flake8: noqa: WPS440, WPS441
+
"""
Functional Diffusion Maps
============================================================================
@@ -166,7 +168,7 @@
ax.set_ylabel(f"$\\alpha$: {alpha}", size=20, rotation=0, ha="right")
for ax, length_scale in zip(axes[0], length_scale_set):
- ax.set_title(f"$len\_sc$: {length_scale}", size=20, va="bottom")
+ ax.set_title(f"$len-sc$: {length_scale}", size=20, va="bottom")
plt.show()
@@ -227,8 +229,8 @@
f"$\\alpha$: {alpha_set[0]}", size=20, rotation=0, ha="right",
)
-for ax, length_scale in zip(axes, length_scale_set):
- ax.set_title(f"$len\_sc$: {length_scale}", size=20, va="bottom")
+for ax, length_scale in zip(axes, length_scale_set):
+ ax.set_title(f"$len-sc$: {length_scale}", size=20, va="bottom")
plt.show()
@@ -293,7 +295,7 @@
subplot_kw={"projection": "3d"},
)
-for (alpha, length_scale), ax in zip(param_grid, axes.ravel()):
+for (alpha, length_scale), ax in zip(param_grid, axes.ravel()):
fdm = FDM(
n_components=3,
kernel=Gaussian(length_scale=length_scale),
@@ -306,7 +308,7 @@
ax.set_xticklabels([])
ax.set_yticklabels([])
ax.set_zticklabels([])
- ax.set_title(f"$\\alpha$: {alpha} $len\_sc$: {length_scale}")
+ ax.set_title(f"$\\alpha$: {alpha} $len-sc$: {length_scale}")
plt.show()
@@ -327,7 +329,7 @@
ax.scatter(embedding[:, 0], embedding[:, 1], embedding[:, 2], c=y)
ax.set_title(
"Diffusion coordinates for \n"
- f"$\\alpha$: {alpha} $len\_sc$: {length_scale}",
+ f"$\\alpha$: {alpha} $len-sc$: {length_scale}",
)
plt.show()
@@ -391,8 +393,11 @@
for view, ax in zip(view_points, axes.ravel()):
ax.scatter(
- diffusion_coord[:, 0], diffusion_coord[:, 1], diffusion_coord[:, 2],
- c=y, cmap=cmap,
+ diffusion_coord[:, 0],
+ diffusion_coord[:, 1],
+ diffusion_coord[:, 2],
+ c=y,
+ cmap=cmap,
)
ax.view_init(*view)
ax.set_title(f"View {view}", fontsize=26)