From 8e2323d3e2f7c3af1f3de3324a75f03177a27787 Mon Sep 17 00:00:00 2001 From: E105D104U125 <72515278+E105D104U125@users.noreply.github.com> Date: Thu, 15 Feb 2024 15:49:44 +0100 Subject: [PATCH 01/14] Converted tests to pytests and added FData tests. Converted tests from old test_covariances file to pytest format and added tests for functional data objects. --- skfda/tests/test_covariances.py | 450 ++++++++++++++++++++++++-------- 1 file changed, 344 insertions(+), 106 deletions(-) diff --git a/skfda/tests/test_covariances.py b/skfda/tests/test_covariances.py index 1005da49d..f9a54c17a 100644 --- a/skfda/tests/test_covariances.py +++ b/skfda/tests/test_covariances.py @@ -1,106 +1,344 @@ -import unittest - -import numpy as np - -import skfda.misc.covariances - - -class TestsSklearn(unittest.TestCase): - - def setUp(self) -> None: - unittest.TestCase.setUp(self) - - self.x = np.linspace(-1, 1, 1000)[:, np.newaxis] - - def _test_compare_sklearn( - self, - cov: skfda.misc.covariances.Covariance, - ) -> None: - cov_sklearn = cov.to_sklearn() - cov_matrix = cov(self.x, self.x) - cov_sklearn_matrix = cov_sklearn(self.x) - - np.testing.assert_array_almost_equal(cov_matrix, cov_sklearn_matrix) - - def test_linear(self) -> None: - - for variance in (1, 2): - for intercept in (0, 1, 2): - with self.subTest(variance=variance, intercept=intercept): - cov = skfda.misc.covariances.Linear( - variance=variance, intercept=intercept) - self._test_compare_sklearn(cov) - - def test_polynomial(self) -> None: - - # Test a couple of non-default parameters only for speed - for variance in (2,): - for intercept in (0, 2): - for slope in (1, 2): - for degree in (1, 2, 3): - with self.subTest( - variance=variance, - intercept=intercept, - slope=slope, - degree=degree, - ): - cov = skfda.misc.covariances.Polynomial( - variance=variance, - intercept=intercept, - slope=slope, - degree=degree, - ) - self._test_compare_sklearn(cov) - - def test_gaussian(self) -> None: - - for variance in (1, 2): - for length_scale in (0.5, 1, 2): - with self.subTest( - variance=variance, - length_scale=length_scale, - ): - cov = skfda.misc.covariances.Gaussian( - variance=variance, - length_scale=length_scale, - ) - self._test_compare_sklearn(cov) - - def test_exponential(self) -> None: - - for variance in (1, 2): - for length_scale in (0.5, 1, 2): - with self.subTest( - variance=variance, - length_scale=length_scale, - ): - cov = skfda.misc.covariances.Exponential( - variance=variance, - length_scale=length_scale, - ) - self._test_compare_sklearn(cov) - - def test_matern(self) -> None: - - # Test a couple of non-default parameters only for speed - for variance in (2,): - for length_scale in (0.5,): - for nu in (0.5, 1, 1.5, 2.5, 3.5, np.inf): - with self.subTest( - variance=variance, - length_scale=length_scale, - nu=nu, - ): - cov = skfda.misc.covariances.Matern( - variance=variance, - length_scale=length_scale, - nu=nu, - ) - self._test_compare_sklearn(cov) - - def test_white_noise(self) -> None: - - for variance in (1, 2): - with self.subTest(variance=variance): - cov = skfda.misc.covariances.WhiteNoise(variance=variance) - self._test_compare_sklearn(cov) +""" +Tests for Covariance module. + +This file includes tests for multivariate data from the previous version +of the file. It additionally incorporates tests cases for functional data +objects. +""" +import pytest +from typing import Tuple + +import numpy as np + +import skfda.misc.covariances as cov +from skfda import FDataGrid, FDataBasis +from skfda.datasets import fetch_phoneme +from skfda.representation.basis import MonomialBasis + + +def _test_compare_sklearn( + multivariate_data, + cov: cov.Covariance, +) -> None: + cov_sklearn = cov.to_sklearn() + cov_matrix = cov(multivariate_data, multivariate_data) + cov_sklearn_matrix = cov_sklearn(multivariate_data) + + np.testing.assert_array_almost_equal(cov_matrix, cov_sklearn_matrix) + +############################################################################## +# Fixtures +############################################################################## + + +@pytest.fixture +def fetch_phoneme_fixture() -> FDataGrid: + """Fixture for loading the phoneme dataset example.""" + fd, _ = fetch_phoneme(return_X_y=True) + return fd[:20] + + +@pytest.fixture( + params=[ + cov.Linear(), + cov.Polynomial(), + cov.Gaussian(), + cov.Exponential(), + cov.Matern(), + ], +) +def covariances_fixture(request) -> cov.Covariance: + """Fixture for getting a covariance kernel function.""" + return request.param + + +@pytest.fixture( + params=[ + cov.Brownian(), + cov.WhiteNoise(), + ], +) +def covariances_raise_fixture(request) -> cov.Covariance: + """Fixture for getting a covariance kernel that raises a ValueError.""" + return request.param + + +@pytest.fixture +def fdatabasis_data() -> Tuple[FDataBasis, FDataBasis]: + """Fixture for loading fdatabasis objects.""" + basis = MonomialBasis( + n_basis=2, + domain_range=(-2, 2), + ) + + fd1 = FDataBasis( + basis=basis, + coefficients=[ + [1, 0], + [1, 2], + ], + ) + + fd2 = FDataBasis( + basis=basis, + coefficients=[ + [0, 1], + ], + ) + + return fd1, fd2 + + +@pytest.fixture +def multivariate_data() -> None: + """Fixture for loading multivariate data.""" + return np.linspace(-1, 1, 1000)[:, np.newaxis] + + +@pytest.fixture( + params=[1, 2], +) +def variance_param(request) -> np.ndarray: + """Fixture for loading variance parameter.""" + return request.param + + +@pytest.fixture( + params=[0, 1, 2], +) +def intercept_param(request) -> np.ndarray: + """Fixture for loading intercept parameter.""" + return request.param + + +@pytest.fixture( + params=[1, 2], +) +def slope_param(request) -> np.ndarray: + """Fixture for loading slope parameter.""" + return request.param + + +@pytest.fixture( + params=[1, 2, 3], +) +def degree_param(request) -> np.ndarray: + """Fixture for loading degree parameter.""" + return request.param + + +@pytest.fixture( + params=[1, 2, 3], +) +def length_scale_param(request) -> np.ndarray: + """Fixture for loading length scale parameter.""" + return request.param + + +@pytest.fixture( + params=[0.5, 1, 1.5, 2.5, 3.5, np.inf], +) +def nu_param(request) -> np.ndarray: + """Fixture for loading nu parameter.""" + return request.param + + +############################################################################## +# Tests +############################################################################## + + +def test_covariances(fetch_phoneme_fixture, covariances_fixture) -> None: + """Check that invalid parameters in fit raise exception.""" + fd = fetch_phoneme_fixture + cov_kernel = covariances_fixture + + # Also test that it does not fail + res1 = cov_kernel(fd, fd) + res2 = cov_kernel(fd) + + np.testing.assert_allclose( + res1, + res2, + atol=1e-7, + ) + + +def test_raises(fetch_phoneme_fixture, covariances_raise_fixture): + """Check that it raises a ValueError exception.""" + fd = fetch_phoneme_fixture + cov_kernel = covariances_raise_fixture + + np.testing.assert_raises( + ValueError, + cov_kernel, + fd, + ) + + +def test_fdatabasis_example_linear(fdatabasis_data): + """Check a precalculated example for Linear covariance kernel.""" + fd1, fd2 = fdatabasis_data + res1 = cov.Linear(variance=1 / 2, intercept=3)(fd1, fd2) + res2 = np.array([[3 / 2], [3 / 2 + 32 / 6]]) + np.testing.assert_allclose( + res1, + res2, + rtol=1e-6, + ) + + +def test_fdatabasis_example_polynomial(fdatabasis_data): + """Check a precalculated example for Polynomial covariance kernel.""" + fd1, fd2 = fdatabasis_data + res1 = cov.Polynomial( + variance=1 / 3, + slope=2, + intercept=1, + degree=2, + )(fd1, fd2) + res2 = np.array([[1 / 3], [67**2 / 3**3]]) + np.testing.assert_allclose( + res1, + res2, + rtol=1e-6, + ) + + +def test_fdatabasis_example_gaussian(fdatabasis_data): + """Check a precalculated example for Gaussian covariance kernel.""" + fd1, fd2 = fdatabasis_data + res1 = cov.Gaussian(variance=3, length_scale=2)(fd1, fd2) + res2 = np.array([ + [3 * np.exp(-7 / 6)], + [3 * np.exp(-7 / 6)], + ]) + np.testing.assert_allclose( + res1, + res2, + rtol=1e-6, + ) + + +def test_fdatabasis_example_exponential(fdatabasis_data): + """Check a precalculated example for Exponential covariance kernel.""" + fd1, fd2 = fdatabasis_data + res1 = cov.Exponential(variance=4, length_scale=5)(fd1, fd2) + res2 = np.array([ + [4 * np.exp(-np.sqrt(28 / 3) / 5)], + [4 * np.exp(-np.sqrt(28 / 3) / 5)], + ]) + np.testing.assert_allclose( + res1, + res2, + rtol=1e-6, + ) + + +def test_fdatabasis_example_matern(fdatabasis_data): + """Check a precalculated example for Matern covariance kernel.""" + fd1, fd2 = fdatabasis_data + res1 = cov.Matern(variance=2, length_scale=3, nu=2)(fd1, fd2) + res2 = np.array([ + [(2 / 3) ** 2 * (28 / 3) * 0.239775899566], + [(2 / 3) ** 2 * (28 / 3) * 0.239775899566], + ]) + np.testing.assert_allclose( + res1, + res2, + rtol=1e-6, + ) + + +def test_multivariate_linear( + multivariate_data, + variance_param, + intercept_param, +) -> None: + """Test linear covariance kernel against scikit learn's kernel.""" + _test_compare_sklearn( + multivariate_data, + cov.Linear( + variance=variance_param, + intercept=intercept_param, + ), + ) + + +def test_multivariate_polynomial( + multivariate_data, + variance_param, + intercept_param, + slope_param, + degree_param, +) -> None: + """Test polynomial covariance kernel against scikit learn's kernel.""" + _test_compare_sklearn( + multivariate_data, + cov.Polynomial( + variance=variance_param, + intercept=intercept_param, + slope=slope_param, + degree=degree_param, + ), + ) + + +def test_multivariate_gaussian( + multivariate_data, + variance_param, + length_scale_param, +) -> None: + """Test gaussian covariance kernel against scikit learn's kernel.""" + _test_compare_sklearn( + multivariate_data, + cov.Gaussian( + variance=variance_param, + length_scale=length_scale_param, + ), + ) + + +def test_multivariate_exponential( + multivariate_data, + variance_param, + length_scale_param, +) -> None: + """Test exponential covariance kernel against scikit learn's kernel.""" + _test_compare_sklearn( + multivariate_data, + cov.Exponential( + variance=variance_param, + length_scale=length_scale_param, + ), + ) + + +def test_multivariate_matern( + multivariate_data, + variance_param, + length_scale_param, + nu_param, +) -> None: + """Test matern covariance kernel against scikit learn's kernel.""" + _test_compare_sklearn( + multivariate_data, + cov.Matern( + variance=variance_param, + length_scale=length_scale_param, + nu=nu_param, + ), + ) + + +def test_multivariate_white_noise( + multivariate_data, + variance_param, +) -> None: + """Test white noise covariance kernel against scikit learn's kernel.""" + _test_compare_sklearn( + multivariate_data, + cov.WhiteNoise( + variance=variance_param, + ), + ) From 16d8eb023a954a61d70c095bca8f1fa0d748cc7e Mon Sep 17 00:00:00 2001 From: E105D104U125 <72515278+E105D104U125@users.noreply.github.com> Date: Thu, 15 Feb 2024 15:52:39 +0100 Subject: [PATCH 02/14] Adapted covariances to accept FData objects. Adapted Linear, Polynomial, Exponential, Gaussian and Matern covariances and added control messages for covariance kernels that do not support functional data objects. --- skfda/misc/covariances.py | 1811 +++++++++++++++++++------------------ 1 file changed, 941 insertions(+), 870 deletions(-) diff --git a/skfda/misc/covariances.py b/skfda/misc/covariances.py index 43635eb02..9bce97d74 100644 --- a/skfda/misc/covariances.py +++ b/skfda/misc/covariances.py @@ -1,870 +1,941 @@ -from __future__ import annotations - -import abc -from typing import Callable, Sequence, Tuple, Union - -import matplotlib.pyplot as plt -import numpy as np -import sklearn.gaussian_process.kernels as sklearn_kern -from matplotlib.figure import Figure -from scipy.special import gamma, kv - -from ..representation import FData, FDataBasis, FDataGrid -from ..representation.basis import TensorBasis -from ..typing._numpy import ArrayLike, NDArrayFloat - - -def _squared_norms(x: NDArrayFloat, y: NDArrayFloat) -> NDArrayFloat: - return ( # type: ignore[no-any-return] - (x[:, np.newaxis, :] - y[np.newaxis, :, :]) ** 2 - ).sum(2) - - -CovarianceLike = Union[ - float, - NDArrayFloat, - Callable[[ArrayLike, ArrayLike], NDArrayFloat], -] - - -def _transform_to_2d(t: ArrayLike) -> NDArrayFloat: - """Transform 1d arrays in column vectors.""" - t = np.asfarray(t) - - dim = len(t.shape) - assert dim <= 2 - - if dim < 2: - t = np.atleast_2d(t).T - - return t - - -def _execute_covariance( - covariance: CovarianceLike, - x: ArrayLike, - y: ArrayLike, -) -> NDArrayFloat: - """Execute a covariance function.""" - x = _transform_to_2d(x) - y = _transform_to_2d(y) - - if isinstance(covariance, (int, float)): - return np.array(covariance) - else: - if callable(covariance): - result = covariance(x, y) - elif isinstance(covariance, np.ndarray): - result = covariance - else: - # GPy kernel - result = covariance.K(x, y) - - assert result.shape[0] == len(x) - assert result.shape[1] == len(y) - return result - - -class Covariance(abc.ABC): - """Abstract class for covariance functions.""" - - _parameters_str: Sequence[Tuple[str, str]] - _latex_formula: str - - @abc.abstractmethod - def __call__(self, x: ArrayLike, y: ArrayLike) -> NDArrayFloat: - pass - - def heatmap(self, limits: Tuple[float, float] = (-1, 1)) -> Figure: - """Return a heatmap plot of the covariance function.""" - from ..exploratory.visualization._utils import _create_figure - - x = np.linspace(*limits, 1000) - - cov_matrix = self(x, x) - - fig = _create_figure() - ax = fig.add_subplot(1, 1, 1) - ax.imshow( - cov_matrix, - extent=[limits[0], limits[1], limits[1], limits[0]], - ) - ax.set_title(f"Covariance function in [{limits[0]}, {limits[1]}]") - - return fig - - def _sample_trajectories_plot(self) -> Figure: - from ..datasets import make_gaussian_process - - fd = make_gaussian_process( - start=-1, - n_samples=10, - cov=self, - random_state=0, - ) - fig = fd.plot() - fig.axes[0].set_title("Sample trajectories") - return fig - - def __repr__(self) -> str: - - params_str = ', '.join( - f'{n}={getattr(self, n)}' for n, _ in self._parameters_str - ) - - return ( - f"{self.__module__}.{type(self).__qualname__}(" - f"{params_str}" - f")" - ) - - def _latex_content(self) -> str: - params_str = ''.join( - fr'{l} &= {getattr(self, n)} \\' for n, l in self._parameters_str - ) - - return ( - fr"{self._latex_formula} \\" - r"\text{where:}" - r"\begin{aligned}" - fr"\qquad{params_str}" - r"\end{aligned}" - ) - - def _repr_latex_(self) -> str: - return fr"\(\displaystyle {self._latex_content()}\)" - - def _repr_html_(self) -> str: - from ..exploratory.visualization._utils import _figure_to_svg - - fig = self.heatmap() - heatmap = _figure_to_svg(fig) - plt.close(fig) - - fig = self._sample_trajectories_plot() - sample_trajectories = _figure_to_svg(fig) - plt.close(fig) - - row_style = '' - - def column_style(percent: float, margin_top: str = "0") -> str: - return ( - f'style="display: inline-block; ' - f'margin:0; ' - f'margin-top: {margin_top}; ' - f'width:{percent}%; ' - f'height:auto;' - f'vertical-align: middle"' - ) - - return fr""" -
-
- \[{self._latex_content()}\] -
-
-
-
- {sample_trajectories} -
-
- {heatmap} -
-
- """ - - def to_sklearn(self) -> sklearn_kern.Kernel: - """Convert it to a sklearn kernel, if there is one.""" - raise NotImplementedError( - f"{type(self).__name__} covariance not " - f"implemented in scikit-learn", - ) - - -class Brownian(Covariance): - r""" - Brownian covariance function. - - The covariance function is - - .. math:: - K(x, x') = \sigma^2 \frac{|x - \mathcal{O}| + |x' - \mathcal{O}| - - |x - x'|}{2} - - where :math:`\sigma^2` is the variance at distance 1 from - :math:`\mathcal{O}` and :math:`\mathcal{O}` is the origin point. - If :math:`\mathcal{O} = 0` (the default) and we only - consider positive values, the formula can be simplified as - - .. math:: - K(x, y) = \sigma^2 \min(x, y). - - Heatmap plot of the covariance function: - - .. jupyter-execute:: - - from skfda.misc.covariances import Brownian - import matplotlib.pyplot as plt - - Brownian().heatmap(limits=(0, 1)) - plt.show() - - Example of Gaussian process trajectories using this covariance: - - .. jupyter-execute:: - - from skfda.misc.covariances import Brownian - from skfda.datasets import make_gaussian_process - import matplotlib.pyplot as plt - - gp = make_gaussian_process( - n_samples=10, cov=Brownian(), random_state=0) - gp.plot() - plt.show() - - Default representation in a Jupyter notebook: - - .. jupyter-execute:: - - from skfda.misc.covariances import Brownian - - Brownian() - - """ - _latex_formula = ( - r"K(x, x') = \sigma^2 \frac{|x - \mathcal{O}| + " - r"|x' - \mathcal{O}| - |x - x'|}{2}" - ) - - _parameters_str = [ - ("variance", r"\sigma^2"), - ("origin", r"\mathcal{O}"), - ] - - def __init__(self, *, variance: float = 1, origin: float = 0) -> None: - self.variance = variance - self.origin = origin - - def __call__(self, x: ArrayLike, y: ArrayLike) -> NDArrayFloat: - x = _transform_to_2d(x) - self.origin - y = _transform_to_2d(y) - self.origin - - sum_norms = np.add.outer( - np.linalg.norm(x, axis=-1), - np.linalg.norm(y, axis=-1), - ) - norm_sub = np.linalg.norm( - x[:, np.newaxis, :] - y[np.newaxis, :, :], - axis=-1, - ) - - return ( # type: ignore[no-any-return] - self.variance * (sum_norms - norm_sub) / 2 - ) - - -class Linear(Covariance): - r""" - Linear covariance function. - - The covariance function is - - .. math:: - K(x, x') = \sigma^2 (x^T x' + c) - - where :math:`\sigma^2` is the scale of the variance and - :math:`c` is the intercept. - - Heatmap plot of the covariance function: - - .. jupyter-execute:: - - from skfda.misc.covariances import Linear - import matplotlib.pyplot as plt - - Linear().heatmap(limits=(0, 1)) - plt.show() - - Example of Gaussian process trajectories using this covariance: - - .. jupyter-execute:: - - from skfda.misc.covariances import Linear - from skfda.datasets import make_gaussian_process - import matplotlib.pyplot as plt - - gp = make_gaussian_process( - n_samples=10, cov=Linear(), random_state=0) - gp.plot() - plt.show() - - Default representation in a Jupyter notebook: - - .. jupyter-execute:: - - from skfda.misc.covariances import Linear - - Linear() - - """ - - _latex_formula = r"K(x, x') = \sigma^2 (x^T x' + c)" - - _parameters_str = [ - ("variance", r"\sigma^2"), - ("intercept", "c"), - ] - - def __init__(self, *, variance: float = 1, intercept: float = 0) -> None: - self.variance = variance - self.intercept = intercept - - def __call__(self, x: ArrayLike, y: ArrayLike) -> NDArrayFloat: - x = _transform_to_2d(x) - y = _transform_to_2d(y) - - return self.variance * (x @ y.T + self.intercept) - - def to_sklearn(self) -> sklearn_kern.Kernel: - return ( - self.variance - * (sklearn_kern.DotProduct(0) + self.intercept) - ) - - -class Polynomial(Covariance): - r""" - Polynomial covariance function. - - The covariance function is - - .. math:: - K(x, x') = \sigma^2 (\alpha x^T x' + c)^d - - where :math:`\sigma^2` is the scale of the variance, - :math:`\alpha` is the slope, :math:`d` the degree of the - polynomial and :math:`c` is the intercept. - - Heatmap plot of the covariance function: - - .. jupyter-execute:: - - from skfda.misc.covariances import Polynomial - import matplotlib.pyplot as plt - - Polynomial().heatmap(limits=(0, 1)) - plt.show() - - Example of Gaussian process trajectories using this covariance: - - .. jupyter-execute:: - - from skfda.misc.covariances import Polynomial - from skfda.datasets import make_gaussian_process - import matplotlib.pyplot as plt - - gp = make_gaussian_process( - n_samples=10, cov=Polynomial(), random_state=0) - gp.plot() - plt.show() - - Default representation in a Jupyter notebook: - - .. jupyter-execute:: - - from skfda.misc.covariances import Polynomial - - Polynomial() - - """ - - _latex_formula = r"K(x, x') = \sigma^2 (\alpha x^T x' + c)^d" - - _parameters_str = [ - ("variance", r"\sigma^2"), - ("intercept", "c"), - ("slope", r"\alpha"), - ("degree", "d"), - ] - - def __init__( - self, - *, - variance: float = 1, - intercept: float = 0, - slope: float = 1, - degree: float = 2, - ) -> None: - self.variance = variance - self.intercept = intercept - self.slope = slope - self.degree = degree - - def __call__(self, x: ArrayLike, y: ArrayLike) -> NDArrayFloat: - x = _transform_to_2d(x) - y = _transform_to_2d(y) - - return ( - self.variance - * (self.slope * x @ y.T + self.intercept) ** self.degree - ) - - def to_sklearn(self) -> sklearn_kern.Kernel: - return ( - self.variance - * (self.slope * sklearn_kern.DotProduct(0) + self.intercept) - ** self.degree - ) - - -class Gaussian(Covariance): - r""" - Gaussian covariance function. - - The covariance function is - - .. math:: - K(x, x') = \sigma^2 \exp\left(-\frac{||x - x'||^2}{2l^2}\right) - - where :math:`\sigma^2` is the variance and :math:`l` is the length scale. - - Heatmap plot of the covariance function: - - .. jupyter-execute:: - - from skfda.misc.covariances import Gaussian - import matplotlib.pyplot as plt - - Gaussian().heatmap(limits=(0, 1)) - plt.show() - - Example of Gaussian process trajectories using this covariance: - - .. jupyter-execute:: - - from skfda.misc.covariances import Gaussian - from skfda.datasets import make_gaussian_process - import matplotlib.pyplot as plt - - gp = make_gaussian_process( - n_samples=10, cov=Gaussian(), random_state=0) - gp.plot() - plt.show() - - Default representation in a Jupyter notebook: - - .. jupyter-execute:: - - from skfda.misc.covariances import Gaussian - - Gaussian() - - """ - - _latex_formula = ( - r"K(x, x') = \sigma^2 \exp\left(-\frac{\|x - x'\|^2}{2l^2}" - r"\right)" - ) - - _parameters_str = [ - ("variance", r"\sigma^2"), - ("length_scale", "l"), - ] - - def __init__(self, *, variance: float = 1, length_scale: float = 1): - self.variance = variance - self.length_scale = length_scale - - def __call__(self, x: ArrayLike, y: ArrayLike) -> NDArrayFloat: - x = _transform_to_2d(x) - y = _transform_to_2d(y) - - x_y = _squared_norms(x, y) - - return self.variance * np.exp(-x_y / (2 * self.length_scale ** 2)) - - def to_sklearn(self) -> sklearn_kern.Kernel: - return ( - self.variance * sklearn_kern.RBF(length_scale=self.length_scale) - ) - - -class Exponential(Covariance): - r""" - Exponential covariance function. - - The covariance function is - - .. math:: - K(x, x') = \sigma^2 \exp\left(-\frac{\|x - x'\|}{l}\right) - - where :math:`\sigma^2` is the variance and :math:`l` is the length scale. - - Heatmap plot of the covariance function: - - .. jupyter-execute:: - - from skfda.misc.covariances import Exponential - import matplotlib.pyplot as plt - - Exponential().heatmap(limits=(0, 1)) - plt.show() - - Example of Gaussian process trajectories using this covariance: - - .. jupyter-execute:: - - from skfda.misc.covariances import Exponential - from skfda.datasets import make_gaussian_process - import matplotlib.pyplot as plt - - gp = make_gaussian_process( - n_samples=10, cov=Exponential(), random_state=0) - gp.plot() - plt.show() - - Default representation in a Jupyter notebook: - - .. jupyter-execute:: - - from skfda.misc.covariances import Exponential - - Exponential() - - """ - - _latex_formula = ( - r"K(x, x') = \sigma^2 \exp\left(-\frac{||x - x'||}{l}" - r"\right)" - ) - - _parameters_str = [ - ("variance", r"\sigma^2"), - ("length_scale", "l"), - ] - - def __init__( - self, - *, - variance: float = 1, - length_scale: float = 1, - ) -> None: - self.variance = variance - self.length_scale = length_scale - - def __call__(self, x: ArrayLike, y: ArrayLike) -> NDArrayFloat: - x = _transform_to_2d(x) - y = _transform_to_2d(y) - - x_y = _squared_norms(x, y) - return self.variance * np.exp(-np.sqrt(x_y) / (self.length_scale)) - - def to_sklearn(self) -> sklearn_kern.Kernel: - return ( - self.variance - * sklearn_kern.Matern(length_scale=self.length_scale, nu=0.5) - ) - - -class WhiteNoise(Covariance): - r""" - Gaussian covariance function. - - The covariance function is - - .. math:: - K(x, x')= \begin{cases} - \sigma^2, \quad x = x' \\ - 0, \quad x \neq x'\\ - \end{cases} - - where :math:`\sigma^2` is the variance. - - Heatmap plot of the covariance function: - - .. jupyter-execute:: - - from skfda.misc.covariances import WhiteNoise - import matplotlib.pyplot as plt - - WhiteNoise().heatmap(limits=(0, 1)) - plt.show() - - Example of Gaussian process trajectories using this covariance: - - .. jupyter-execute:: - - from skfda.misc.covariances import WhiteNoise - from skfda.datasets import make_gaussian_process - import matplotlib.pyplot as plt - - gp = make_gaussian_process( - n_samples=10, cov=WhiteNoise(), random_state=0) - gp.plot() - plt.show() - - Default representation in a Jupyter notebook: - - .. jupyter-execute:: - - from skfda.misc.covariances import WhiteNoise - - WhiteNoise() - - """ - - _latex_formula = ( - r"K(x, x')= \begin{cases} \sigma^2, \quad x = x' \\" - r"0, \quad x \neq x'\\ \end{cases}" - ) - - _parameters_str = [("variance", r"\sigma^2")] - - def __init__(self, *, variance: float = 1): - self.variance = variance - - def __call__(self, x: ArrayLike, y: ArrayLike) -> NDArrayFloat: - x = _transform_to_2d(x) - return self.variance * np.eye(x.shape[0]) - - def to_sklearn(self) -> sklearn_kern.Kernel: - return sklearn_kern.WhiteKernel(noise_level=self.variance) - - -class Matern(Covariance): - r""" - Matérn covariance function. - - The covariance function is - - .. math:: - K(x, x') = \sigma^2 \frac{2^{1-\nu}}{\Gamma(\nu)} - \left( \frac{\sqrt{2\nu}|x - x'|}{l} \right)^{\nu} - K_{\nu}\left( \frac{\sqrt{2\nu}|x - x'|}{l} \right) - - where :math:`\sigma^2` is the variance, :math:`l` is the length scale - and :math:`\nu` controls the smoothness of the related Gaussian process. - The trajectories of a Gaussian process with Matérn covariance is - :math:`\lceil \nu \rceil - 1` times differentiable. - - - Heatmap plot of the covariance function: - - .. jupyter-execute:: - - from skfda.misc.covariances import Matern - import matplotlib.pyplot as plt - - Matern().heatmap(limits=(0, 1)) - plt.show() - - Example of Gaussian process trajectories using this covariance: - - .. jupyter-execute:: - - from skfda.misc.covariances import Matern - from skfda.datasets import make_gaussian_process - import matplotlib.pyplot as plt - - gp = make_gaussian_process( - n_samples=10, cov=Matern(), random_state=0) - gp.plot() - plt.show() - - Default representation in a Jupyter notebook: - - .. jupyter-execute:: - - from skfda.misc.covariances import Matern - - Matern() - - """ - _latex_formula = ( - r"K(x, x') = \sigma^2 \frac{2^{1-\nu}}{\Gamma(\nu)}" - r"\left( \frac{\sqrt{2\nu}|x - x'|}{l} \right)^{\nu}" - r"K_{\nu}\left( \frac{\sqrt{2\nu}|x - x'|}{l} \right)" - ) - - _parameters_str = [ - ("variance", r"\sigma^2"), - ("length_scale", "l"), - ("nu", r"\nu"), - ] - - def __init__( - self, - *, - variance: float = 1, - length_scale: float = 1, - nu: float = 1.5, - ) -> None: - self.variance = variance - self.length_scale = length_scale - self.nu = nu - - def __call__(self, x: ArrayLike, y: ArrayLike) -> NDArrayFloat: - x = _transform_to_2d(x) - y = _transform_to_2d(y) - - x_y_squared = _squared_norms(x, y) - x_y = np.sqrt(x_y_squared) - - p = self.nu - 0.5 - if p.is_integer(): - # Formula for half-integers - p = int(p) - body = np.sqrt(2 * p + 1) * x_y / self.length_scale - exponential = np.exp(-body) - power_list = np.full(shape=(p,) + body.shape, fill_value=2 * body) - power_list = np.cumprod(power_list, axis=0) - power_list = np.concatenate( - (power_list[::-1], np.asarray([np.ones_like(body)])), - ) - power_list = np.moveaxis(power_list, 0, -1) - numerator = np.cumprod(np.arange(p, 0, -1)) - numerator = np.concatenate(([1], numerator)) - denom1 = np.cumprod(np.arange(2 * p, p, -1)) - denom1 = np.concatenate((denom1[::-1], [1])) - denom2 = np.cumprod(np.arange(1, p + 1)) - denom2 = np.concatenate(([1], denom2)) - - sum_terms = power_list * numerator / (denom1 * denom2) - return ( # type: ignore[no-any-return] - self.variance * exponential * np.sum(sum_terms, axis=-1) - ) - elif self.nu == np.inf: - return ( - self.variance * np.exp( - -x_y_squared / (2 * self.length_scale ** 2), - ) - ) - else: - # General formula - scaling = 2**(1 - self.nu) / gamma(self.nu) - body = np.sqrt(2 * self.nu) * x_y / self.length_scale - power = body**self.nu - bessel = kv(self.nu, body) - - with np.errstate(invalid='ignore'): - eval_cov = self.variance * scaling * power * bessel - - # Values with nan are where the distance is 0 - return np.nan_to_num( # type: ignore[no-any-return] - eval_cov, - nan=self.variance, - ) - - def to_sklearn(self) -> sklearn_kern.Kernel: - return ( - self.variance - * sklearn_kern.Matern(length_scale=self.length_scale, nu=self.nu) - ) - - -class Empirical(Covariance): - r""" - Sample covariance function. - - The sample covariance function is defined as - . math:: - K(t, s) = \frac{1}{N-\text{correction}}\sum_{n=1}^N\left(x_n(t) - - \bar{x}(t)\right) \left(x_n(s) - \bar{x}(s)\right) - - where :math:`x_n(t)` is the n-th sample and :math:`\bar{x}(t)` is the - mean of the samples. :math:`N` is the number of samples, - :math:`\text{correction}` is the degrees of freedom adjustment and is such - that :math:`N-\text{correction}` is the divisor used in the calculation of - the covariance function. - - """ - - _latex_formula = ( - r"K(t, s) = \frac{1}{N-\text{correction}}\sum_{n=1}^N" - r"(x_n(t) - \bar{x}(t))(x_n(s) - \bar{x}(s))" - ) - _parameters_str = [ - ("data", "data"), - ] - - cov_fdata: FData - correction: int - - @abc.abstractmethod - def __init__(self, data: FData) -> None: - if data.dim_domain != 1 or data.dim_codomain != 1: - raise NotImplementedError( - "Covariance only implemented " - "for univariate functions", - ) - - def __call__(self, x: ArrayLike, y: ArrayLike) -> NDArrayFloat: - """Evaluate the covariance function. - - Args: - x: First array of points of evaluation. - y: Second array of points of evaluation. - - Returns: - Covariance function evaluated at the grid formed by x and y. - """ - return self.cov_fdata([x, y], grid=True)[0, ..., 0] - - -class EmpiricalGrid(Empirical): - """Sample covariance function for FDataGrid.""" - - cov_fdata: FDataGrid - correction: int - - def __init__(self, data: FDataGrid, correction: int = 0) -> None: - super().__init__(data=data) - - self.correction = correction - self.cov_fdata = data.copy( - data_matrix=np.cov( - data.data_matrix[..., 0], - rowvar=False, - ddof=correction, - )[np.newaxis, ...], - grid_points=[ - data.grid_points[0], - data.grid_points[0], - ], - domain_range=[ - data.domain_range[0], - data.domain_range[0], - ], - argument_names=data.argument_names * 2, - sample_names=("covariance",), - ) - - -class EmpiricalBasis(Empirical): - """ - Sample covariance function for FDataBasis. - - In this case one may use the basis expression of the samples to - express the sample covariance function in the tensor product basis - of the original basis. - """ - - cov_fdata: FDataBasis - coeff_matrix: NDArrayFloat - correction: int - - def __init__(self, data: FDataBasis, correction: int = 0) -> None: - super().__init__(data=data) - - self.correction = correction - self.coeff_matrix = np.cov( - data.coefficients, - rowvar=False, - ddof=correction, - ) - self.cov_fdata = FDataBasis( - basis=TensorBasis([data.basis, data.basis]), - coefficients=self.coeff_matrix.flatten(), - argument_names=data.argument_names * 2, - sample_names=("covariance",), - ) +from __future__ import annotations + +import abc +from typing import Callable, Sequence, Tuple, Union + +import matplotlib.pyplot as plt +import numpy as np +import sklearn.gaussian_process.kernels as sklearn_kern +from matplotlib.figure import Figure +from scipy.special import gamma, kv + +from ..misc import inner_product_matrix +from ..misc.metrics import PairwiseMetric, LpNorm, l2_distance +from ..representation import FData, FDataBasis, FDataGrid +from ..representation.basis import TensorBasis +from ..typing._numpy import ArrayLike, NDArrayFloat + + +def _squared_norms(x: NDArrayFloat, y: NDArrayFloat) -> NDArrayFloat: + return ( # type: ignore[no-any-return] + (x[:, np.newaxis, :] - y[np.newaxis, :, :]) ** 2 + ).sum(2) + + +CovarianceLike = Union[ + float, + NDArrayFloat, + Callable[[ArrayLike, ArrayLike], NDArrayFloat], +] + +InputAcceptable = Union[ + np.ndarray, + FData, +] + + +def _transform_to_2d(t: ArrayLike) -> NDArrayFloat: + """Transform 1d arrays in column vectors.""" + t = np.asfarray(t) + + dim = len(t.shape) + assert dim <= 2 + + if dim < 2: + t = np.atleast_2d(t).T + + return t + + +def _execute_covariance( + covariance: CovarianceLike, + x: ArrayLike, + y: ArrayLike, +) -> NDArrayFloat: + """Execute a covariance function.""" + x = _transform_to_2d(x) + y = _transform_to_2d(y) + + if isinstance(covariance, (int, float)): + return np.array(covariance) + else: + if callable(covariance): + result = covariance(x, y) + elif isinstance(covariance, np.ndarray): + result = covariance + else: + # GPy kernel + result = covariance.K(x, y) + + assert result.shape[0] == len(x) + assert result.shape[1] == len(y) + return result + + +class Covariance(abc.ABC): + """Abstract class for covariance functions.""" + + _parameters_str: Sequence[Tuple[str, str]] + _latex_formula: str + + @abc.abstractmethod + def __call__( + self, + x: InputAcceptable, + y: InputAcceptable | None = None, + ) -> NDArrayFloat: + pass + + def _param_check_and_transform( + self, + x: InputAcceptable, + y: InputAcceptable | None = None, + ) -> Tuple[np.ndarray | FData, np.ndarray | FData]: + # Param check + if y is None: + y = x + + if type(x) is not type(y): # noqa: WPS516 + raise ValueError( + 'Cannot operate objects x and y from different classes', + f'({type(x)}, {type(y)}).', + ) + + if isinstance(x, np.ndarray) and isinstance(y, np.ndarray): + x, y = np.array(x), np.array(y) + if len(x.shape) < 2: + x = np.atleast_2d(x) + if len(y.shape) < 2: + y = np.atleast_2d(y) + + return x, y + + def heatmap(self, limits: Tuple[float, float] = (-1, 1)) -> Figure: + """Return a heatmap plot of the covariance function.""" + from ..exploratory.visualization._utils import _create_figure + + x = np.linspace(*limits, 1000) + + cov_matrix = self(x, x) + + fig = _create_figure() + ax = fig.add_subplot(1, 1, 1) + ax.imshow( + cov_matrix, + extent=[limits[0], limits[1], limits[1], limits[0]], + ) + ax.set_title(f"Covariance function in [{limits[0]}, {limits[1]}]") + + return fig + + def _sample_trajectories_plot(self) -> Figure: + from ..datasets import make_gaussian_process + + fd = make_gaussian_process( + start=-1, + n_samples=10, + cov=self, + random_state=0, + ) + fig = fd.plot() + fig.axes[0].set_title("Sample trajectories") + return fig + + def __repr__(self) -> str: + + params_str = ', '.join( + f'{n}={getattr(self, n)}' for n, _ in self._parameters_str + ) + + return ( + f"{self.__module__}.{type(self).__qualname__}(" + f"{params_str}" + f")" + ) + + def _latex_content(self) -> str: + params_str = ''.join( + fr'{l} &= {getattr(self, n)} \\' for n, l in self._parameters_str + ) + + return ( + fr"{self._latex_formula} \\" + r"\text{where:}" + r"\begin{aligned}" + fr"\qquad{params_str}" + r"\end{aligned}" + ) + + def _repr_latex_(self) -> str: + return fr"\(\displaystyle {self._latex_content()}\)" + + def _repr_html_(self) -> str: + from ..exploratory.visualization._utils import _figure_to_svg + + fig = self.heatmap() + heatmap = _figure_to_svg(fig) + plt.close(fig) + + fig = self._sample_trajectories_plot() + sample_trajectories = _figure_to_svg(fig) + plt.close(fig) + + row_style = '' + + def column_style(percent: float, margin_top: str = "0") -> str: + return ( + f'style="display: inline-block; ' + f'margin:0; ' + f'margin-top: {margin_top}; ' + f'width:{percent}%; ' + f'height:auto;' + f'vertical-align: middle"' + ) + + return fr""" +
+
+ \[{self._latex_content()}\] +
+
+
+
+ {sample_trajectories} +
+
+ {heatmap} +
+
+ """ + + def to_sklearn(self) -> sklearn_kern.Kernel: + """Convert it to a sklearn kernel, if there is one.""" + raise NotImplementedError( + f"{type(self).__name__} covariance not " + f"implemented in scikit-learn", + ) + + +class Brownian(Covariance): + r""" + Brownian covariance function. + + The covariance function is + + .. math:: + K(x, x') = \sigma^2 \frac{|x - \mathcal{O}| + |x' - \mathcal{O}| + - |x - x'|}{2} + + where :math:`\sigma^2` is the variance at distance 1 from + :math:`\mathcal{O}` and :math:`\mathcal{O}` is the origin point. + If :math:`\mathcal{O} = 0` (the default) and we only + consider positive values, the formula can be simplified as + + .. math:: + K(x, y) = \sigma^2 \min(x, y). + + Heatmap plot of the covariance function: + + .. jupyter-execute:: + + from skfda.misc.covariances import Brownian + import matplotlib.pyplot as plt + + Brownian().heatmap(limits=(0, 1)) + plt.show() + + Example of Gaussian process trajectories using this covariance: + + .. jupyter-execute:: + + from skfda.misc.covariances import Brownian + from skfda.datasets import make_gaussian_process + import matplotlib.pyplot as plt + + gp = make_gaussian_process( + n_samples=10, cov=Brownian(), random_state=0) + gp.plot() + plt.show() + + Default representation in a Jupyter notebook: + + .. jupyter-execute:: + + from skfda.misc.covariances import Brownian + + Brownian() + + """ + _latex_formula = ( + r"K(x, x') = \sigma^2 \frac{|x - \mathcal{O}| + " + r"|x' - \mathcal{O}| - |x - x'|}{2}" + ) + + _parameters_str = [ + ("variance", r"\sigma^2"), + ("origin", r"\mathcal{O}"), + ] + + def __init__(self, *, variance: float = 1, origin: float = 0) -> None: + self.variance = variance + self.origin = origin + + def __call__( + self, + x: InputAcceptable | FData, + y: InputAcceptable | None = None, + ) -> NDArrayFloat: + if isinstance(x, FData) or isinstance(y, FData): + raise ValueError('Not defined for FData objects.') + + x = _transform_to_2d(x) - self.origin + y = _transform_to_2d(y) - self.origin + + sum_norms = np.add.outer( + np.linalg.norm(x, axis=-1), + np.linalg.norm(y, axis=-1), + ) + norm_sub = np.linalg.norm( + x[:, np.newaxis, :] - y[np.newaxis, :, :], + axis=-1, + ) + + return ( # type: ignore[no-any-return] + self.variance * (sum_norms - norm_sub) / 2 + ) + + +class Linear(Covariance): + r""" + Linear covariance function. + + The covariance function is + + .. math:: + K(x, x') = \sigma^2 (x^T x' + c) + + where :math:`\sigma^2` is the scale of the variance and + :math:`c` is the intercept. + + Heatmap plot of the covariance function: + + .. jupyter-execute:: + + from skfda.misc.covariances import Linear + import matplotlib.pyplot as plt + + Linear().heatmap(limits=(0, 1)) + plt.show() + + Example of Gaussian process trajectories using this covariance: + + .. jupyter-execute:: + + from skfda.misc.covariances import Linear + from skfda.datasets import make_gaussian_process + import matplotlib.pyplot as plt + + gp = make_gaussian_process( + n_samples=10, cov=Linear(), random_state=0) + gp.plot() + plt.show() + + Default representation in a Jupyter notebook: + + .. jupyter-execute:: + + from skfda.misc.covariances import Linear + + Linear() + + """ + + _latex_formula = r"K(x, x') = \sigma^2 (x^T x' + c)" + + _parameters_str = [ + ("variance", r"\sigma^2"), + ("intercept", "c"), + ] + + def __init__(self, *, variance: float = 1, intercept: float = 0) -> None: + self.variance = variance + self.intercept = intercept + + def __call__( + self, + x: InputAcceptable, + y: InputAcceptable | None = None, + ) -> NDArrayFloat: + x, y = self._param_check_and_transform(x, y) + + x_y = inner_product_matrix(x, y) + return self.variance * (x_y + self.intercept) + + def to_sklearn(self) -> sklearn_kern.Kernel: + return ( + self.variance + * (sklearn_kern.DotProduct(0) + self.intercept) + ) + + +class Polynomial(Covariance): + r""" + Polynomial covariance function. + + The covariance function is + + .. math:: + K(x, x') = \sigma^2 (\alpha x^T x' + c)^d + + where :math:`\sigma^2` is the scale of the variance, + :math:`\alpha` is the slope, :math:`d` the degree of the + polynomial and :math:`c` is the intercept. + + Heatmap plot of the covariance function: + + .. jupyter-execute:: + + from skfda.misc.covariances import Polynomial + import matplotlib.pyplot as plt + + Polynomial().heatmap(limits=(0, 1)) + plt.show() + + Example of Gaussian process trajectories using this covariance: + + .. jupyter-execute:: + + from skfda.misc.covariances import Polynomial + from skfda.datasets import make_gaussian_process + import matplotlib.pyplot as plt + + gp = make_gaussian_process( + n_samples=10, cov=Polynomial(), random_state=0) + gp.plot() + plt.show() + + Default representation in a Jupyter notebook: + + .. jupyter-execute:: + + from skfda.misc.covariances import Polynomial + + Polynomial() + + """ + + _latex_formula = r"K(x, x') = \sigma^2 (\alpha x^T x' + c)^d" + + _parameters_str = [ + ("variance", r"\sigma^2"), + ("intercept", "c"), + ("slope", r"\alpha"), + ("degree", "d"), + ] + + def __init__( + self, + *, + variance: float = 1, + intercept: float = 0, + slope: float = 1, + degree: float = 2, + ) -> None: + self.variance = variance + self.intercept = intercept + self.slope = slope + self.degree = degree + + def __call__( + self, + x: InputAcceptable, + y: InputAcceptable | None = None, + ) -> NDArrayFloat: + x, y = self._param_check_and_transform(x, y) + + x_y = inner_product_matrix(x, y) + return ( + self.variance + * (self.slope * x_y + self.intercept) ** self.degree + ) + + def to_sklearn(self) -> sklearn_kern.Kernel: + return ( + self.variance + * (self.slope * sklearn_kern.DotProduct(0) + self.intercept) + ** self.degree + ) + + +class Gaussian(Covariance): + r""" + Gaussian covariance function. + + The covariance function is + + .. math:: + K(x, x') = \sigma^2 \exp\left(-\frac{||x - x'||^2}{2l^2}\right) + + where :math:`\sigma^2` is the variance and :math:`l` is the length scale. + + Heatmap plot of the covariance function: + + .. jupyter-execute:: + + from skfda.misc.covariances import Gaussian + import matplotlib.pyplot as plt + + Gaussian().heatmap(limits=(0, 1)) + plt.show() + + Example of Gaussian process trajectories using this covariance: + + .. jupyter-execute:: + + from skfda.misc.covariances import Gaussian + from skfda.datasets import make_gaussian_process + import matplotlib.pyplot as plt + + gp = make_gaussian_process( + n_samples=10, cov=Gaussian(), random_state=0) + gp.plot() + plt.show() + + Default representation in a Jupyter notebook: + + .. jupyter-execute:: + + from skfda.misc.covariances import Gaussian + + Gaussian() + + """ + + _latex_formula = ( + r"K(x, x') = \sigma^2 \exp\left(-\frac{\|x - x'\|^2}{2l^2}" + r"\right)" + ) + + _parameters_str = [ + ("variance", r"\sigma^2"), + ("length_scale", "l"), + ] + + def __init__(self, *, variance: float = 1, length_scale: float = 1): + self.variance = variance + self.length_scale = length_scale + + def __call__( + self, + x: InputAcceptable, + y: InputAcceptable | None = None, + ) -> NDArrayFloat: + x, y = self._param_check_and_transform(x, y) + + distance_x_y = PairwiseMetric(l2_distance)(x, y) + return self.variance * np.exp(-distance_x_y ** 2 / (2 * self.length_scale ** 2)) + + def to_sklearn(self) -> sklearn_kern.Kernel: + return ( + self.variance * sklearn_kern.RBF(length_scale=self.length_scale) + ) + + +class Exponential(Covariance): + r""" + Exponential covariance function. + + The covariance function is + + .. math:: + K(x, x') = \sigma^2 \exp\left(-\frac{\|x - x'\|}{l}\right) + + where :math:`\sigma^2` is the variance and :math:`l` is the length scale. + + Heatmap plot of the covariance function: + + .. jupyter-execute:: + + from skfda.misc.covariances import Exponential + import matplotlib.pyplot as plt + + Exponential().heatmap(limits=(0, 1)) + plt.show() + + Example of Gaussian process trajectories using this covariance: + + .. jupyter-execute:: + + from skfda.misc.covariances import Exponential + from skfda.datasets import make_gaussian_process + import matplotlib.pyplot as plt + + gp = make_gaussian_process( + n_samples=10, cov=Exponential(), random_state=0) + gp.plot() + plt.show() + + Default representation in a Jupyter notebook: + + .. jupyter-execute:: + + from skfda.misc.covariances import Exponential + + Exponential() + + """ + + _latex_formula = ( + r"K(x, x') = \sigma^2 \exp\left(-\frac{||x - x'||}{l}" + r"\right)" + ) + + _parameters_str = [ + ("variance", r"\sigma^2"), + ("length_scale", "l"), + ] + + def __init__( + self, + *, + variance: float = 1, + length_scale: float = 1, + ) -> None: + self.variance = variance + self.length_scale = length_scale + + def __call__( + self, + x: InputAcceptable, + y: InputAcceptable | None = None, + ) -> NDArrayFloat: + x, y = self._param_check_and_transform(x, y) + + distance_x_y = PairwiseMetric(l2_distance)(x, y) + return self.variance * np.exp(-distance_x_y / (self.length_scale)) + + def to_sklearn(self) -> sklearn_kern.Kernel: + return ( + self.variance + * sklearn_kern.Matern(length_scale=self.length_scale, nu=0.5) + ) + + +class WhiteNoise(Covariance): + r""" + Gaussian covariance function. + + The covariance function is + + .. math:: + K(x, x')= \begin{cases} + \sigma^2, \quad x = x' \\ + 0, \quad x \neq x'\\ + \end{cases} + + where :math:`\sigma^2` is the variance. + + Heatmap plot of the covariance function: + + .. jupyter-execute:: + + from skfda.misc.covariances import WhiteNoise + import matplotlib.pyplot as plt + + WhiteNoise().heatmap(limits=(0, 1)) + plt.show() + + Example of Gaussian process trajectories using this covariance: + + .. jupyter-execute:: + + from skfda.misc.covariances import WhiteNoise + from skfda.datasets import make_gaussian_process + import matplotlib.pyplot as plt + + gp = make_gaussian_process( + n_samples=10, cov=WhiteNoise(), random_state=0) + gp.plot() + plt.show() + + Default representation in a Jupyter notebook: + + .. jupyter-execute:: + + from skfda.misc.covariances import WhiteNoise + + WhiteNoise() + + """ + + _latex_formula = ( + r"K(x, x')= \begin{cases} \sigma^2, \quad x = x' \\" + r"0, \quad x \neq x'\\ \end{cases}" + ) + + _parameters_str = [("variance", r"\sigma^2")] + + def __init__(self, *, variance: float = 1): + self.variance = variance + + def __call__( + self, + x: InputAcceptable, + y: InputAcceptable | None = None, + ) -> NDArrayFloat: + if isinstance(x, FData) or isinstance(y, FData): + raise ValueError('Not defined for FData objects.') + + x = _transform_to_2d(x) + return self.variance * np.eye(x.shape[0]) + + def to_sklearn(self) -> sklearn_kern.Kernel: + return sklearn_kern.WhiteKernel(noise_level=self.variance) + + +class Matern(Covariance): + r""" + Matérn covariance function. + + The covariance function is + + .. math:: + K(x, x') = \sigma^2 \frac{2^{1-\nu}}{\Gamma(\nu)} + \left( \frac{\sqrt{2\nu}|x - x'|}{l} \right)^{\nu} + K_{\nu}\left( \frac{\sqrt{2\nu}|x - x'|}{l} \right) + + where :math:`\sigma^2` is the variance, :math:`l` is the length scale + and :math:`\nu` controls the smoothness of the related Gaussian process. + The trajectories of a Gaussian process with Matérn covariance is + :math:`\lceil \nu \rceil - 1` times differentiable. + + + Heatmap plot of the covariance function: + + .. jupyter-execute:: + + from skfda.misc.covariances import Matern + import matplotlib.pyplot as plt + + Matern().heatmap(limits=(0, 1)) + plt.show() + + Example of Gaussian process trajectories using this covariance: + + .. jupyter-execute:: + + from skfda.misc.covariances import Matern + from skfda.datasets import make_gaussian_process + import matplotlib.pyplot as plt + + gp = make_gaussian_process( + n_samples=10, cov=Matern(), random_state=0) + gp.plot() + plt.show() + + Default representation in a Jupyter notebook: + + .. jupyter-execute:: + + from skfda.misc.covariances import Matern + + Matern() + + """ + _latex_formula = ( + r"K(x, x') = \sigma^2 \frac{2^{1-\nu}}{\Gamma(\nu)}" + r"\left( \frac{\sqrt{2\nu}|x - x'|}{l} \right)^{\nu}" + r"K_{\nu}\left( \frac{\sqrt{2\nu}|x - x'|}{l} \right)" + ) + + _parameters_str = [ + ("variance", r"\sigma^2"), + ("length_scale", "l"), + ("nu", r"\nu"), + ] + + def __init__( + self, + *, + variance: float = 1, + length_scale: float = 1, + nu: float = 1.5, + ) -> None: + self.variance = variance + self.length_scale = length_scale + self.nu = nu + + def __call__( + self, + x: InputAcceptable, + y: InputAcceptable | None = None, + ) -> NDArrayFloat: + x, y = self._param_check_and_transform(x, y) + + distance_x_y = PairwiseMetric(l2_distance)(x, y) + + p = self.nu - 0.5 + if p.is_integer(): + # Formula for half-integers + p = int(p) + body = np.sqrt(2 * p + 1) * distance_x_y / self.length_scale + exponential = np.exp(-body) + power_list = np.full(shape=(p,) + body.shape, fill_value=2 * body) + power_list = np.cumprod(power_list, axis=0) + power_list = np.concatenate( + (power_list[::-1], np.asarray([np.ones_like(body)])), + ) + power_list = np.moveaxis(power_list, 0, -1) + numerator = np.cumprod(np.arange(p, 0, -1)) + numerator = np.concatenate(([1], numerator)) + denom1 = np.cumprod(np.arange(2 * p, p, -1)) + denom1 = np.concatenate((denom1[::-1], [1])) + denom2 = np.cumprod(np.arange(1, p + 1)) + denom2 = np.concatenate(([1], denom2)) + + sum_terms = power_list * numerator / (denom1 * denom2) + return ( # type: ignore[no-any-return] + self.variance * exponential * np.sum(sum_terms, axis=-1) + ) + elif self.nu == np.inf: + return ( # type: ignore[no-any-return] + self.variance * np.exp( + -distance_x_y ** 2 / (2 * self.length_scale ** 2), + ) + ) + else: + # General formula + scaling = 2**(1 - self.nu) / gamma(self.nu) + body = np.sqrt(2 * self.nu) * distance_x_y / self.length_scale + power = body**self.nu + bessel = kv(self.nu, body) + + with np.errstate(invalid='ignore'): + eval_cov = self.variance * scaling * power * bessel + + # Values with nan are where the distance is 0 + return np.nan_to_num( # type: ignore[no-any-return] + eval_cov, + nan=self.variance, + ) + + def to_sklearn(self) -> sklearn_kern.Kernel: + return ( + self.variance + * sklearn_kern.Matern(length_scale=self.length_scale, nu=self.nu) + ) + + +class Empirical(Covariance): + r""" + Sample covariance function. + + The sample covariance function is defined as + . math:: + K(t, s) = \frac{1}{N-\text{correction}}\sum_{n=1}^N\left(x_n(t) - + \bar{x}(t)\right) \left(x_n(s) - \bar{x}(s)\right) + + where :math:`x_n(t)` is the n-th sample and :math:`\bar{x}(t)` is the + mean of the samples. :math:`N` is the number of samples, + :math:`\text{correction}` is the degrees of freedom adjustment and is such + that :math:`N-\text{correction}` is the divisor used in the calculation of + the covariance function. + + """ + + _latex_formula = ( + r"K(t, s) = \frac{1}{N-\text{correction}}\sum_{n=1}^N" + r"(x_n(t) - \bar{x}(t))(x_n(s) - \bar{x}(s))" + ) + _parameters_str = [ + ("data", "data"), + ] + + cov_fdata: FData + correction: int + + @abc.abstractmethod + def __init__(self, data: FData) -> None: + if data.dim_domain != 1 or data.dim_codomain != 1: + raise NotImplementedError( + "Covariance only implemented " + "for univariate functions", + ) + + def __call__( + self, + x: InputAcceptable, + y: InputAcceptable | None = None, + ) -> NDArrayFloat: + if isinstance(x, FData) or isinstance(y, FData): + raise ValueError('Not defined for FData objects.') + + """Evaluate the covariance function. + + Args: + x: First array of points of evaluation. + y: Second array of points of evaluation. + + Returns: + Covariance function evaluated at the grid formed by x and y. + """ + return self.cov_fdata([x, y], grid=True)[0, ..., 0] + + +class EmpiricalGrid(Empirical): + """Sample covariance function for FDataGrid.""" + + cov_fdata: FDataGrid + correction: int + + def __init__(self, data: FDataGrid, correction: int = 0) -> None: + super().__init__(data=data) + + self.correction = correction + self.cov_fdata = data.copy( + data_matrix=np.cov( + data.data_matrix[..., 0], + rowvar=False, + ddof=correction, + )[np.newaxis, ...], + grid_points=[ + data.grid_points[0], + data.grid_points[0], + ], + domain_range=[ + data.domain_range[0], + data.domain_range[0], + ], + argument_names=data.argument_names * 2, + sample_names=("covariance",), + ) + + +class EmpiricalBasis(Empirical): + """ + Sample covariance function for FDataBasis. + + In this case one may use the basis expression of the samples to + express the sample covariance function in the tensor product basis + of the original basis. + """ + + cov_fdata: FDataBasis + coeff_matrix: NDArrayFloat + correction: int + + def __init__(self, data: FDataBasis, correction: int = 0) -> None: + super().__init__(data=data) + + self.correction = correction + self.coeff_matrix = np.cov( + data.coefficients, + rowvar=False, + ddof=correction, + ) + self.cov_fdata = FDataBasis( + basis=TensorBasis([data.basis, data.basis]), + coefficients=self.coeff_matrix.flatten(), + argument_names=data.argument_names * 2, + sample_names=("covariance",), + ) From 4abb6e5e7540048d898469b274626f7fcfe12273 Mon Sep 17 00:00:00 2001 From: E69D68U85 <72515278+E105D104U125@users.noreply.github.com> Date: Sun, 3 Mar 2024 22:27:26 +0100 Subject: [PATCH 03/14] Input parameters now parsed using ParameterGrid. Changed function to test multivariate data for all covariance functions. Now uses ParameterGrid. --- skfda/tests/test_covariances.py | 237 +++++++++++--------------------- 1 file changed, 79 insertions(+), 158 deletions(-) diff --git a/skfda/tests/test_covariances.py b/skfda/tests/test_covariances.py index f9a54c17a..dc5982ca0 100644 --- a/skfda/tests/test_covariances.py +++ b/skfda/tests/test_covariances.py @@ -1,27 +1,22 @@ -""" -Tests for Covariance module. - -This file includes tests for multivariate data from the previous version -of the file. It additionally incorporates tests cases for functional data -objects. -""" -import pytest -from typing import Tuple +"""Tests for Covariance module.""" +from typing import Any, Tuple import numpy as np +import pytest +from sklearn.model_selection import ParameterGrid import skfda.misc.covariances as cov -from skfda import FDataGrid, FDataBasis +from skfda import FDataBasis, FDataGrid from skfda.datasets import fetch_phoneme from skfda.representation.basis import MonomialBasis def _test_compare_sklearn( - multivariate_data, + multivariate_data: Any, cov: cov.Covariance, ) -> None: cov_sklearn = cov.to_sklearn() - cov_matrix = cov(multivariate_data, multivariate_data) + cov_matrix = cov(multivariate_data) cov_sklearn_matrix = cov_sklearn(multivariate_data) np.testing.assert_array_almost_equal(cov_matrix, cov_sklearn_matrix) @@ -47,7 +42,7 @@ def fetch_phoneme_fixture() -> FDataGrid: cov.Matern(), ], ) -def covariances_fixture(request) -> cov.Covariance: +def covariances_fixture(request: Any) -> Any: """Fixture for getting a covariance kernel function.""" return request.param @@ -58,14 +53,14 @@ def covariances_fixture(request) -> cov.Covariance: cov.WhiteNoise(), ], ) -def covariances_raise_fixture(request) -> cov.Covariance: +def covariances_raise_fixture(request: Any) -> Any: """Fixture for getting a covariance kernel that raises a ValueError.""" return request.param @pytest.fixture def fdatabasis_data() -> Tuple[FDataBasis, FDataBasis]: - """Fixture for loading fdatabasis objects.""" + """Fixture for getting fdatabasis objects.""" basis = MonomialBasis( n_basis=2, domain_range=(-2, 2), @@ -90,56 +85,50 @@ def fdatabasis_data() -> Tuple[FDataBasis, FDataBasis]: @pytest.fixture -def multivariate_data() -> None: - """Fixture for loading multivariate data.""" +def multivariate_data() -> np.array: + """Fixture for getting multivariate data.""" return np.linspace(-1, 1, 1000)[:, np.newaxis] @pytest.fixture( - params=[1, 2], -) -def variance_param(request) -> np.ndarray: - """Fixture for loading variance parameter.""" - return request.param - - -@pytest.fixture( - params=[0, 1, 2], -) -def intercept_param(request) -> np.ndarray: - """Fixture for loading intercept parameter.""" - return request.param - - -@pytest.fixture( - params=[1, 2], -) -def slope_param(request) -> np.ndarray: - """Fixture for loading slope parameter.""" - return request.param - - -@pytest.fixture( - params=[1, 2, 3], -) -def degree_param(request) -> np.ndarray: - """Fixture for loading degree parameter.""" - return request.param - - -@pytest.fixture( - params=[1, 2, 3], -) -def length_scale_param(request) -> np.ndarray: - """Fixture for loading length scale parameter.""" - return request.param - - -@pytest.fixture( - params=[0.5, 1, 1.5, 2.5, 3.5, np.inf], + params=[ + (cov.Linear, + { + "variance": [1, 2], + "intercept": [3, 4], + }, + ), + (cov.Polynomial, + { + "variance": [2], + "intercept": [0, 2], + "slope": [1, 2], + "degree": [1, 2, 3], + }, + ), + (cov.Exponential, + { + "variance": [1, 2], + "length_scale": [0.5, 1, 2], + }, + ), + (cov.Gaussian, + { + "variance": [1, 2], + "length_scale": [0.5, 1, 2], + }, + ), + (cov.Matern, + { + "variance": [2], + "length_scale": [0.5], + "nu": [0.5, 1, 1.5, 2.5, 3.5, np.inf], + }, + ), + ], ) -def nu_param(request) -> np.ndarray: - """Fixture for loading nu parameter.""" +def covariance_and_params(request: Any) -> Any: + """Fixture to load the covariance functions.""" return request.param @@ -148,7 +137,10 @@ def nu_param(request) -> np.ndarray: ############################################################################## -def test_covariances(fetch_phoneme_fixture, covariances_fixture) -> None: +def test_covariances( + fetch_phoneme_fixture: Any, + covariances_fixture: Any, +) -> None: """Check that invalid parameters in fit raise exception.""" fd = fetch_phoneme_fixture cov_kernel = covariances_fixture @@ -164,7 +156,10 @@ def test_covariances(fetch_phoneme_fixture, covariances_fixture) -> None: ) -def test_raises(fetch_phoneme_fixture, covariances_raise_fixture): +def test_raises( + fetch_phoneme_fixture: Any, + covariances_raise_fixture: Any, +) -> None: """Check that it raises a ValueError exception.""" fd = fetch_phoneme_fixture cov_kernel = covariances_raise_fixture @@ -176,7 +171,9 @@ def test_raises(fetch_phoneme_fixture, covariances_raise_fixture): ) -def test_fdatabasis_example_linear(fdatabasis_data): +def test_fdatabasis_example_linear( + fdatabasis_data: Any, +) -> None: """Check a precalculated example for Linear covariance kernel.""" fd1, fd2 = fdatabasis_data res1 = cov.Linear(variance=1 / 2, intercept=3)(fd1, fd2) @@ -188,7 +185,9 @@ def test_fdatabasis_example_linear(fdatabasis_data): ) -def test_fdatabasis_example_polynomial(fdatabasis_data): +def test_fdatabasis_example_polynomial( + fdatabasis_data: Any, +) -> None: """Check a precalculated example for Polynomial covariance kernel.""" fd1, fd2 = fdatabasis_data res1 = cov.Polynomial( @@ -205,7 +204,9 @@ def test_fdatabasis_example_polynomial(fdatabasis_data): ) -def test_fdatabasis_example_gaussian(fdatabasis_data): +def test_fdatabasis_example_gaussian( + fdatabasis_data: Any, +) -> None: """Check a precalculated example for Gaussian covariance kernel.""" fd1, fd2 = fdatabasis_data res1 = cov.Gaussian(variance=3, length_scale=2)(fd1, fd2) @@ -220,7 +221,9 @@ def test_fdatabasis_example_gaussian(fdatabasis_data): ) -def test_fdatabasis_example_exponential(fdatabasis_data): +def test_fdatabasis_example_exponential( + fdatabasis_data: Any, +) -> None: """Check a precalculated example for Exponential covariance kernel.""" fd1, fd2 = fdatabasis_data res1 = cov.Exponential(variance=4, length_scale=5)(fd1, fd2) @@ -235,7 +238,9 @@ def test_fdatabasis_example_exponential(fdatabasis_data): ) -def test_fdatabasis_example_matern(fdatabasis_data): +def test_fdatabasis_example_matern( + fdatabasis_data: Any, +) -> None: """Check a precalculated example for Matern covariance kernel.""" fd1, fd2 = fdatabasis_data res1 = cov.Matern(variance=2, length_scale=3, nu=2)(fd1, fd2) @@ -250,95 +255,11 @@ def test_fdatabasis_example_matern(fdatabasis_data): ) -def test_multivariate_linear( - multivariate_data, - variance_param, - intercept_param, +def test_multivariate_covariance_kernel( + multivariate_data: Any, + covariance_and_params: Any, ) -> None: - """Test linear covariance kernel against scikit learn's kernel.""" - _test_compare_sklearn( - multivariate_data, - cov.Linear( - variance=variance_param, - intercept=intercept_param, - ), - ) - - -def test_multivariate_polynomial( - multivariate_data, - variance_param, - intercept_param, - slope_param, - degree_param, -) -> None: - """Test polynomial covariance kernel against scikit learn's kernel.""" - _test_compare_sklearn( - multivariate_data, - cov.Polynomial( - variance=variance_param, - intercept=intercept_param, - slope=slope_param, - degree=degree_param, - ), - ) - - -def test_multivariate_gaussian( - multivariate_data, - variance_param, - length_scale_param, -) -> None: - """Test gaussian covariance kernel against scikit learn's kernel.""" - _test_compare_sklearn( - multivariate_data, - cov.Gaussian( - variance=variance_param, - length_scale=length_scale_param, - ), - ) - - -def test_multivariate_exponential( - multivariate_data, - variance_param, - length_scale_param, -) -> None: - """Test exponential covariance kernel against scikit learn's kernel.""" - _test_compare_sklearn( - multivariate_data, - cov.Exponential( - variance=variance_param, - length_scale=length_scale_param, - ), - ) - - -def test_multivariate_matern( - multivariate_data, - variance_param, - length_scale_param, - nu_param, -) -> None: - """Test matern covariance kernel against scikit learn's kernel.""" - _test_compare_sklearn( - multivariate_data, - cov.Matern( - variance=variance_param, - length_scale=length_scale_param, - nu=nu_param, - ), - ) - - -def test_multivariate_white_noise( - multivariate_data, - variance_param, -) -> None: - """Test white noise covariance kernel against scikit learn's kernel.""" - _test_compare_sklearn( - multivariate_data, - cov.WhiteNoise( - variance=variance_param, - ), - ) + """Test general covariance kernel against scikit-learn's kernel.""" + cov_kernel, param_dict = covariance_and_params + for input_params in list(ParameterGrid(param_dict)): + _test_compare_sklearn(multivariate_data, cov_kernel(**input_params)) From 3a54b9f275775266611979d590aa6c0a99d387ad Mon Sep 17 00:00:00 2001 From: E69D68U85 <72515278+E105D104U125@users.noreply.github.com> Date: Mon, 25 Mar 2024 19:56:52 +0100 Subject: [PATCH 04/14] Changed EOL to LF. --- skfda/misc/covariances.py | 1882 ++++++++++++++++++------------------- 1 file changed, 941 insertions(+), 941 deletions(-) diff --git a/skfda/misc/covariances.py b/skfda/misc/covariances.py index 9bce97d74..b441892ff 100644 --- a/skfda/misc/covariances.py +++ b/skfda/misc/covariances.py @@ -1,941 +1,941 @@ -from __future__ import annotations - -import abc -from typing import Callable, Sequence, Tuple, Union - -import matplotlib.pyplot as plt -import numpy as np -import sklearn.gaussian_process.kernels as sklearn_kern -from matplotlib.figure import Figure -from scipy.special import gamma, kv - -from ..misc import inner_product_matrix -from ..misc.metrics import PairwiseMetric, LpNorm, l2_distance -from ..representation import FData, FDataBasis, FDataGrid -from ..representation.basis import TensorBasis -from ..typing._numpy import ArrayLike, NDArrayFloat - - -def _squared_norms(x: NDArrayFloat, y: NDArrayFloat) -> NDArrayFloat: - return ( # type: ignore[no-any-return] - (x[:, np.newaxis, :] - y[np.newaxis, :, :]) ** 2 - ).sum(2) - - -CovarianceLike = Union[ - float, - NDArrayFloat, - Callable[[ArrayLike, ArrayLike], NDArrayFloat], -] - -InputAcceptable = Union[ - np.ndarray, - FData, -] - - -def _transform_to_2d(t: ArrayLike) -> NDArrayFloat: - """Transform 1d arrays in column vectors.""" - t = np.asfarray(t) - - dim = len(t.shape) - assert dim <= 2 - - if dim < 2: - t = np.atleast_2d(t).T - - return t - - -def _execute_covariance( - covariance: CovarianceLike, - x: ArrayLike, - y: ArrayLike, -) -> NDArrayFloat: - """Execute a covariance function.""" - x = _transform_to_2d(x) - y = _transform_to_2d(y) - - if isinstance(covariance, (int, float)): - return np.array(covariance) - else: - if callable(covariance): - result = covariance(x, y) - elif isinstance(covariance, np.ndarray): - result = covariance - else: - # GPy kernel - result = covariance.K(x, y) - - assert result.shape[0] == len(x) - assert result.shape[1] == len(y) - return result - - -class Covariance(abc.ABC): - """Abstract class for covariance functions.""" - - _parameters_str: Sequence[Tuple[str, str]] - _latex_formula: str - - @abc.abstractmethod - def __call__( - self, - x: InputAcceptable, - y: InputAcceptable | None = None, - ) -> NDArrayFloat: - pass - - def _param_check_and_transform( - self, - x: InputAcceptable, - y: InputAcceptable | None = None, - ) -> Tuple[np.ndarray | FData, np.ndarray | FData]: - # Param check - if y is None: - y = x - - if type(x) is not type(y): # noqa: WPS516 - raise ValueError( - 'Cannot operate objects x and y from different classes', - f'({type(x)}, {type(y)}).', - ) - - if isinstance(x, np.ndarray) and isinstance(y, np.ndarray): - x, y = np.array(x), np.array(y) - if len(x.shape) < 2: - x = np.atleast_2d(x) - if len(y.shape) < 2: - y = np.atleast_2d(y) - - return x, y - - def heatmap(self, limits: Tuple[float, float] = (-1, 1)) -> Figure: - """Return a heatmap plot of the covariance function.""" - from ..exploratory.visualization._utils import _create_figure - - x = np.linspace(*limits, 1000) - - cov_matrix = self(x, x) - - fig = _create_figure() - ax = fig.add_subplot(1, 1, 1) - ax.imshow( - cov_matrix, - extent=[limits[0], limits[1], limits[1], limits[0]], - ) - ax.set_title(f"Covariance function in [{limits[0]}, {limits[1]}]") - - return fig - - def _sample_trajectories_plot(self) -> Figure: - from ..datasets import make_gaussian_process - - fd = make_gaussian_process( - start=-1, - n_samples=10, - cov=self, - random_state=0, - ) - fig = fd.plot() - fig.axes[0].set_title("Sample trajectories") - return fig - - def __repr__(self) -> str: - - params_str = ', '.join( - f'{n}={getattr(self, n)}' for n, _ in self._parameters_str - ) - - return ( - f"{self.__module__}.{type(self).__qualname__}(" - f"{params_str}" - f")" - ) - - def _latex_content(self) -> str: - params_str = ''.join( - fr'{l} &= {getattr(self, n)} \\' for n, l in self._parameters_str - ) - - return ( - fr"{self._latex_formula} \\" - r"\text{where:}" - r"\begin{aligned}" - fr"\qquad{params_str}" - r"\end{aligned}" - ) - - def _repr_latex_(self) -> str: - return fr"\(\displaystyle {self._latex_content()}\)" - - def _repr_html_(self) -> str: - from ..exploratory.visualization._utils import _figure_to_svg - - fig = self.heatmap() - heatmap = _figure_to_svg(fig) - plt.close(fig) - - fig = self._sample_trajectories_plot() - sample_trajectories = _figure_to_svg(fig) - plt.close(fig) - - row_style = '' - - def column_style(percent: float, margin_top: str = "0") -> str: - return ( - f'style="display: inline-block; ' - f'margin:0; ' - f'margin-top: {margin_top}; ' - f'width:{percent}%; ' - f'height:auto;' - f'vertical-align: middle"' - ) - - return fr""" -
-
- \[{self._latex_content()}\] -
-
-
-
- {sample_trajectories} -
-
- {heatmap} -
-
- """ - - def to_sklearn(self) -> sklearn_kern.Kernel: - """Convert it to a sklearn kernel, if there is one.""" - raise NotImplementedError( - f"{type(self).__name__} covariance not " - f"implemented in scikit-learn", - ) - - -class Brownian(Covariance): - r""" - Brownian covariance function. - - The covariance function is - - .. math:: - K(x, x') = \sigma^2 \frac{|x - \mathcal{O}| + |x' - \mathcal{O}| - - |x - x'|}{2} - - where :math:`\sigma^2` is the variance at distance 1 from - :math:`\mathcal{O}` and :math:`\mathcal{O}` is the origin point. - If :math:`\mathcal{O} = 0` (the default) and we only - consider positive values, the formula can be simplified as - - .. math:: - K(x, y) = \sigma^2 \min(x, y). - - Heatmap plot of the covariance function: - - .. jupyter-execute:: - - from skfda.misc.covariances import Brownian - import matplotlib.pyplot as plt - - Brownian().heatmap(limits=(0, 1)) - plt.show() - - Example of Gaussian process trajectories using this covariance: - - .. jupyter-execute:: - - from skfda.misc.covariances import Brownian - from skfda.datasets import make_gaussian_process - import matplotlib.pyplot as plt - - gp = make_gaussian_process( - n_samples=10, cov=Brownian(), random_state=0) - gp.plot() - plt.show() - - Default representation in a Jupyter notebook: - - .. jupyter-execute:: - - from skfda.misc.covariances import Brownian - - Brownian() - - """ - _latex_formula = ( - r"K(x, x') = \sigma^2 \frac{|x - \mathcal{O}| + " - r"|x' - \mathcal{O}| - |x - x'|}{2}" - ) - - _parameters_str = [ - ("variance", r"\sigma^2"), - ("origin", r"\mathcal{O}"), - ] - - def __init__(self, *, variance: float = 1, origin: float = 0) -> None: - self.variance = variance - self.origin = origin - - def __call__( - self, - x: InputAcceptable | FData, - y: InputAcceptable | None = None, - ) -> NDArrayFloat: - if isinstance(x, FData) or isinstance(y, FData): - raise ValueError('Not defined for FData objects.') - - x = _transform_to_2d(x) - self.origin - y = _transform_to_2d(y) - self.origin - - sum_norms = np.add.outer( - np.linalg.norm(x, axis=-1), - np.linalg.norm(y, axis=-1), - ) - norm_sub = np.linalg.norm( - x[:, np.newaxis, :] - y[np.newaxis, :, :], - axis=-1, - ) - - return ( # type: ignore[no-any-return] - self.variance * (sum_norms - norm_sub) / 2 - ) - - -class Linear(Covariance): - r""" - Linear covariance function. - - The covariance function is - - .. math:: - K(x, x') = \sigma^2 (x^T x' + c) - - where :math:`\sigma^2` is the scale of the variance and - :math:`c` is the intercept. - - Heatmap plot of the covariance function: - - .. jupyter-execute:: - - from skfda.misc.covariances import Linear - import matplotlib.pyplot as plt - - Linear().heatmap(limits=(0, 1)) - plt.show() - - Example of Gaussian process trajectories using this covariance: - - .. jupyter-execute:: - - from skfda.misc.covariances import Linear - from skfda.datasets import make_gaussian_process - import matplotlib.pyplot as plt - - gp = make_gaussian_process( - n_samples=10, cov=Linear(), random_state=0) - gp.plot() - plt.show() - - Default representation in a Jupyter notebook: - - .. jupyter-execute:: - - from skfda.misc.covariances import Linear - - Linear() - - """ - - _latex_formula = r"K(x, x') = \sigma^2 (x^T x' + c)" - - _parameters_str = [ - ("variance", r"\sigma^2"), - ("intercept", "c"), - ] - - def __init__(self, *, variance: float = 1, intercept: float = 0) -> None: - self.variance = variance - self.intercept = intercept - - def __call__( - self, - x: InputAcceptable, - y: InputAcceptable | None = None, - ) -> NDArrayFloat: - x, y = self._param_check_and_transform(x, y) - - x_y = inner_product_matrix(x, y) - return self.variance * (x_y + self.intercept) - - def to_sklearn(self) -> sklearn_kern.Kernel: - return ( - self.variance - * (sklearn_kern.DotProduct(0) + self.intercept) - ) - - -class Polynomial(Covariance): - r""" - Polynomial covariance function. - - The covariance function is - - .. math:: - K(x, x') = \sigma^2 (\alpha x^T x' + c)^d - - where :math:`\sigma^2` is the scale of the variance, - :math:`\alpha` is the slope, :math:`d` the degree of the - polynomial and :math:`c` is the intercept. - - Heatmap plot of the covariance function: - - .. jupyter-execute:: - - from skfda.misc.covariances import Polynomial - import matplotlib.pyplot as plt - - Polynomial().heatmap(limits=(0, 1)) - plt.show() - - Example of Gaussian process trajectories using this covariance: - - .. jupyter-execute:: - - from skfda.misc.covariances import Polynomial - from skfda.datasets import make_gaussian_process - import matplotlib.pyplot as plt - - gp = make_gaussian_process( - n_samples=10, cov=Polynomial(), random_state=0) - gp.plot() - plt.show() - - Default representation in a Jupyter notebook: - - .. jupyter-execute:: - - from skfda.misc.covariances import Polynomial - - Polynomial() - - """ - - _latex_formula = r"K(x, x') = \sigma^2 (\alpha x^T x' + c)^d" - - _parameters_str = [ - ("variance", r"\sigma^2"), - ("intercept", "c"), - ("slope", r"\alpha"), - ("degree", "d"), - ] - - def __init__( - self, - *, - variance: float = 1, - intercept: float = 0, - slope: float = 1, - degree: float = 2, - ) -> None: - self.variance = variance - self.intercept = intercept - self.slope = slope - self.degree = degree - - def __call__( - self, - x: InputAcceptable, - y: InputAcceptable | None = None, - ) -> NDArrayFloat: - x, y = self._param_check_and_transform(x, y) - - x_y = inner_product_matrix(x, y) - return ( - self.variance - * (self.slope * x_y + self.intercept) ** self.degree - ) - - def to_sklearn(self) -> sklearn_kern.Kernel: - return ( - self.variance - * (self.slope * sklearn_kern.DotProduct(0) + self.intercept) - ** self.degree - ) - - -class Gaussian(Covariance): - r""" - Gaussian covariance function. - - The covariance function is - - .. math:: - K(x, x') = \sigma^2 \exp\left(-\frac{||x - x'||^2}{2l^2}\right) - - where :math:`\sigma^2` is the variance and :math:`l` is the length scale. - - Heatmap plot of the covariance function: - - .. jupyter-execute:: - - from skfda.misc.covariances import Gaussian - import matplotlib.pyplot as plt - - Gaussian().heatmap(limits=(0, 1)) - plt.show() - - Example of Gaussian process trajectories using this covariance: - - .. jupyter-execute:: - - from skfda.misc.covariances import Gaussian - from skfda.datasets import make_gaussian_process - import matplotlib.pyplot as plt - - gp = make_gaussian_process( - n_samples=10, cov=Gaussian(), random_state=0) - gp.plot() - plt.show() - - Default representation in a Jupyter notebook: - - .. jupyter-execute:: - - from skfda.misc.covariances import Gaussian - - Gaussian() - - """ - - _latex_formula = ( - r"K(x, x') = \sigma^2 \exp\left(-\frac{\|x - x'\|^2}{2l^2}" - r"\right)" - ) - - _parameters_str = [ - ("variance", r"\sigma^2"), - ("length_scale", "l"), - ] - - def __init__(self, *, variance: float = 1, length_scale: float = 1): - self.variance = variance - self.length_scale = length_scale - - def __call__( - self, - x: InputAcceptable, - y: InputAcceptable | None = None, - ) -> NDArrayFloat: - x, y = self._param_check_and_transform(x, y) - - distance_x_y = PairwiseMetric(l2_distance)(x, y) - return self.variance * np.exp(-distance_x_y ** 2 / (2 * self.length_scale ** 2)) - - def to_sklearn(self) -> sklearn_kern.Kernel: - return ( - self.variance * sklearn_kern.RBF(length_scale=self.length_scale) - ) - - -class Exponential(Covariance): - r""" - Exponential covariance function. - - The covariance function is - - .. math:: - K(x, x') = \sigma^2 \exp\left(-\frac{\|x - x'\|}{l}\right) - - where :math:`\sigma^2` is the variance and :math:`l` is the length scale. - - Heatmap plot of the covariance function: - - .. jupyter-execute:: - - from skfda.misc.covariances import Exponential - import matplotlib.pyplot as plt - - Exponential().heatmap(limits=(0, 1)) - plt.show() - - Example of Gaussian process trajectories using this covariance: - - .. jupyter-execute:: - - from skfda.misc.covariances import Exponential - from skfda.datasets import make_gaussian_process - import matplotlib.pyplot as plt - - gp = make_gaussian_process( - n_samples=10, cov=Exponential(), random_state=0) - gp.plot() - plt.show() - - Default representation in a Jupyter notebook: - - .. jupyter-execute:: - - from skfda.misc.covariances import Exponential - - Exponential() - - """ - - _latex_formula = ( - r"K(x, x') = \sigma^2 \exp\left(-\frac{||x - x'||}{l}" - r"\right)" - ) - - _parameters_str = [ - ("variance", r"\sigma^2"), - ("length_scale", "l"), - ] - - def __init__( - self, - *, - variance: float = 1, - length_scale: float = 1, - ) -> None: - self.variance = variance - self.length_scale = length_scale - - def __call__( - self, - x: InputAcceptable, - y: InputAcceptable | None = None, - ) -> NDArrayFloat: - x, y = self._param_check_and_transform(x, y) - - distance_x_y = PairwiseMetric(l2_distance)(x, y) - return self.variance * np.exp(-distance_x_y / (self.length_scale)) - - def to_sklearn(self) -> sklearn_kern.Kernel: - return ( - self.variance - * sklearn_kern.Matern(length_scale=self.length_scale, nu=0.5) - ) - - -class WhiteNoise(Covariance): - r""" - Gaussian covariance function. - - The covariance function is - - .. math:: - K(x, x')= \begin{cases} - \sigma^2, \quad x = x' \\ - 0, \quad x \neq x'\\ - \end{cases} - - where :math:`\sigma^2` is the variance. - - Heatmap plot of the covariance function: - - .. jupyter-execute:: - - from skfda.misc.covariances import WhiteNoise - import matplotlib.pyplot as plt - - WhiteNoise().heatmap(limits=(0, 1)) - plt.show() - - Example of Gaussian process trajectories using this covariance: - - .. jupyter-execute:: - - from skfda.misc.covariances import WhiteNoise - from skfda.datasets import make_gaussian_process - import matplotlib.pyplot as plt - - gp = make_gaussian_process( - n_samples=10, cov=WhiteNoise(), random_state=0) - gp.plot() - plt.show() - - Default representation in a Jupyter notebook: - - .. jupyter-execute:: - - from skfda.misc.covariances import WhiteNoise - - WhiteNoise() - - """ - - _latex_formula = ( - r"K(x, x')= \begin{cases} \sigma^2, \quad x = x' \\" - r"0, \quad x \neq x'\\ \end{cases}" - ) - - _parameters_str = [("variance", r"\sigma^2")] - - def __init__(self, *, variance: float = 1): - self.variance = variance - - def __call__( - self, - x: InputAcceptable, - y: InputAcceptable | None = None, - ) -> NDArrayFloat: - if isinstance(x, FData) or isinstance(y, FData): - raise ValueError('Not defined for FData objects.') - - x = _transform_to_2d(x) - return self.variance * np.eye(x.shape[0]) - - def to_sklearn(self) -> sklearn_kern.Kernel: - return sklearn_kern.WhiteKernel(noise_level=self.variance) - - -class Matern(Covariance): - r""" - Matérn covariance function. - - The covariance function is - - .. math:: - K(x, x') = \sigma^2 \frac{2^{1-\nu}}{\Gamma(\nu)} - \left( \frac{\sqrt{2\nu}|x - x'|}{l} \right)^{\nu} - K_{\nu}\left( \frac{\sqrt{2\nu}|x - x'|}{l} \right) - - where :math:`\sigma^2` is the variance, :math:`l` is the length scale - and :math:`\nu` controls the smoothness of the related Gaussian process. - The trajectories of a Gaussian process with Matérn covariance is - :math:`\lceil \nu \rceil - 1` times differentiable. - - - Heatmap plot of the covariance function: - - .. jupyter-execute:: - - from skfda.misc.covariances import Matern - import matplotlib.pyplot as plt - - Matern().heatmap(limits=(0, 1)) - plt.show() - - Example of Gaussian process trajectories using this covariance: - - .. jupyter-execute:: - - from skfda.misc.covariances import Matern - from skfda.datasets import make_gaussian_process - import matplotlib.pyplot as plt - - gp = make_gaussian_process( - n_samples=10, cov=Matern(), random_state=0) - gp.plot() - plt.show() - - Default representation in a Jupyter notebook: - - .. jupyter-execute:: - - from skfda.misc.covariances import Matern - - Matern() - - """ - _latex_formula = ( - r"K(x, x') = \sigma^2 \frac{2^{1-\nu}}{\Gamma(\nu)}" - r"\left( \frac{\sqrt{2\nu}|x - x'|}{l} \right)^{\nu}" - r"K_{\nu}\left( \frac{\sqrt{2\nu}|x - x'|}{l} \right)" - ) - - _parameters_str = [ - ("variance", r"\sigma^2"), - ("length_scale", "l"), - ("nu", r"\nu"), - ] - - def __init__( - self, - *, - variance: float = 1, - length_scale: float = 1, - nu: float = 1.5, - ) -> None: - self.variance = variance - self.length_scale = length_scale - self.nu = nu - - def __call__( - self, - x: InputAcceptable, - y: InputAcceptable | None = None, - ) -> NDArrayFloat: - x, y = self._param_check_and_transform(x, y) - - distance_x_y = PairwiseMetric(l2_distance)(x, y) - - p = self.nu - 0.5 - if p.is_integer(): - # Formula for half-integers - p = int(p) - body = np.sqrt(2 * p + 1) * distance_x_y / self.length_scale - exponential = np.exp(-body) - power_list = np.full(shape=(p,) + body.shape, fill_value=2 * body) - power_list = np.cumprod(power_list, axis=0) - power_list = np.concatenate( - (power_list[::-1], np.asarray([np.ones_like(body)])), - ) - power_list = np.moveaxis(power_list, 0, -1) - numerator = np.cumprod(np.arange(p, 0, -1)) - numerator = np.concatenate(([1], numerator)) - denom1 = np.cumprod(np.arange(2 * p, p, -1)) - denom1 = np.concatenate((denom1[::-1], [1])) - denom2 = np.cumprod(np.arange(1, p + 1)) - denom2 = np.concatenate(([1], denom2)) - - sum_terms = power_list * numerator / (denom1 * denom2) - return ( # type: ignore[no-any-return] - self.variance * exponential * np.sum(sum_terms, axis=-1) - ) - elif self.nu == np.inf: - return ( # type: ignore[no-any-return] - self.variance * np.exp( - -distance_x_y ** 2 / (2 * self.length_scale ** 2), - ) - ) - else: - # General formula - scaling = 2**(1 - self.nu) / gamma(self.nu) - body = np.sqrt(2 * self.nu) * distance_x_y / self.length_scale - power = body**self.nu - bessel = kv(self.nu, body) - - with np.errstate(invalid='ignore'): - eval_cov = self.variance * scaling * power * bessel - - # Values with nan are where the distance is 0 - return np.nan_to_num( # type: ignore[no-any-return] - eval_cov, - nan=self.variance, - ) - - def to_sklearn(self) -> sklearn_kern.Kernel: - return ( - self.variance - * sklearn_kern.Matern(length_scale=self.length_scale, nu=self.nu) - ) - - -class Empirical(Covariance): - r""" - Sample covariance function. - - The sample covariance function is defined as - . math:: - K(t, s) = \frac{1}{N-\text{correction}}\sum_{n=1}^N\left(x_n(t) - - \bar{x}(t)\right) \left(x_n(s) - \bar{x}(s)\right) - - where :math:`x_n(t)` is the n-th sample and :math:`\bar{x}(t)` is the - mean of the samples. :math:`N` is the number of samples, - :math:`\text{correction}` is the degrees of freedom adjustment and is such - that :math:`N-\text{correction}` is the divisor used in the calculation of - the covariance function. - - """ - - _latex_formula = ( - r"K(t, s) = \frac{1}{N-\text{correction}}\sum_{n=1}^N" - r"(x_n(t) - \bar{x}(t))(x_n(s) - \bar{x}(s))" - ) - _parameters_str = [ - ("data", "data"), - ] - - cov_fdata: FData - correction: int - - @abc.abstractmethod - def __init__(self, data: FData) -> None: - if data.dim_domain != 1 or data.dim_codomain != 1: - raise NotImplementedError( - "Covariance only implemented " - "for univariate functions", - ) - - def __call__( - self, - x: InputAcceptable, - y: InputAcceptable | None = None, - ) -> NDArrayFloat: - if isinstance(x, FData) or isinstance(y, FData): - raise ValueError('Not defined for FData objects.') - - """Evaluate the covariance function. - - Args: - x: First array of points of evaluation. - y: Second array of points of evaluation. - - Returns: - Covariance function evaluated at the grid formed by x and y. - """ - return self.cov_fdata([x, y], grid=True)[0, ..., 0] - - -class EmpiricalGrid(Empirical): - """Sample covariance function for FDataGrid.""" - - cov_fdata: FDataGrid - correction: int - - def __init__(self, data: FDataGrid, correction: int = 0) -> None: - super().__init__(data=data) - - self.correction = correction - self.cov_fdata = data.copy( - data_matrix=np.cov( - data.data_matrix[..., 0], - rowvar=False, - ddof=correction, - )[np.newaxis, ...], - grid_points=[ - data.grid_points[0], - data.grid_points[0], - ], - domain_range=[ - data.domain_range[0], - data.domain_range[0], - ], - argument_names=data.argument_names * 2, - sample_names=("covariance",), - ) - - -class EmpiricalBasis(Empirical): - """ - Sample covariance function for FDataBasis. - - In this case one may use the basis expression of the samples to - express the sample covariance function in the tensor product basis - of the original basis. - """ - - cov_fdata: FDataBasis - coeff_matrix: NDArrayFloat - correction: int - - def __init__(self, data: FDataBasis, correction: int = 0) -> None: - super().__init__(data=data) - - self.correction = correction - self.coeff_matrix = np.cov( - data.coefficients, - rowvar=False, - ddof=correction, - ) - self.cov_fdata = FDataBasis( - basis=TensorBasis([data.basis, data.basis]), - coefficients=self.coeff_matrix.flatten(), - argument_names=data.argument_names * 2, - sample_names=("covariance",), - ) +from __future__ import annotations + +import abc +from typing import Callable, Sequence, Tuple, Union + +import matplotlib.pyplot as plt +import numpy as np +import sklearn.gaussian_process.kernels as sklearn_kern +from matplotlib.figure import Figure +from scipy.special import gamma, kv + +from ..misc import inner_product_matrix +from ..misc.metrics import PairwiseMetric, LpNorm, l2_distance +from ..representation import FData, FDataBasis, FDataGrid +from ..representation.basis import TensorBasis +from ..typing._numpy import ArrayLike, NDArrayFloat + + +def _squared_norms(x: NDArrayFloat, y: NDArrayFloat) -> NDArrayFloat: + return ( # type: ignore[no-any-return] + (x[:, np.newaxis, :] - y[np.newaxis, :, :]) ** 2 + ).sum(2) + + +CovarianceLike = Union[ + float, + NDArrayFloat, + Callable[[ArrayLike, ArrayLike], NDArrayFloat], +] + +InputAcceptable = Union[ + np.ndarray, + FData, +] + + +def _transform_to_2d(t: ArrayLike) -> NDArrayFloat: + """Transform 1d arrays in column vectors.""" + t = np.asfarray(t) + + dim = len(t.shape) + assert dim <= 2 + + if dim < 2: + t = np.atleast_2d(t).T + + return t + + +def _execute_covariance( + covariance: CovarianceLike, + x: ArrayLike, + y: ArrayLike, +) -> NDArrayFloat: + """Execute a covariance function.""" + x = _transform_to_2d(x) + y = _transform_to_2d(y) + + if isinstance(covariance, (int, float)): + return np.array(covariance) + else: + if callable(covariance): + result = covariance(x, y) + elif isinstance(covariance, np.ndarray): + result = covariance + else: + # GPy kernel + result = covariance.K(x, y) + + assert result.shape[0] == len(x) + assert result.shape[1] == len(y) + return result + + +class Covariance(abc.ABC): + """Abstract class for covariance functions.""" + + _parameters_str: Sequence[Tuple[str, str]] + _latex_formula: str + + @abc.abstractmethod + def __call__( + self, + x: InputAcceptable, + y: InputAcceptable | None = None, + ) -> NDArrayFloat: + pass + + def _param_check_and_transform( + self, + x: InputAcceptable, + y: InputAcceptable | None = None, + ) -> Tuple[np.ndarray | FData, np.ndarray | FData]: + # Param check + if y is None: + y = x + + if type(x) is not type(y): # noqa: WPS516 + raise ValueError( + 'Cannot operate objects x and y from different classes', + f'({type(x)}, {type(y)}).', + ) + + if isinstance(x, np.ndarray) and isinstance(y, np.ndarray): + x, y = np.array(x), np.array(y) + if len(x.shape) < 2: + x = np.atleast_2d(x) + if len(y.shape) < 2: + y = np.atleast_2d(y) + + return x, y + + def heatmap(self, limits: Tuple[float, float] = (-1, 1)) -> Figure: + """Return a heatmap plot of the covariance function.""" + from ..exploratory.visualization._utils import _create_figure + + x = np.linspace(*limits, 1000) + + cov_matrix = self(x, x) + + fig = _create_figure() + ax = fig.add_subplot(1, 1, 1) + ax.imshow( + cov_matrix, + extent=[limits[0], limits[1], limits[1], limits[0]], + ) + ax.set_title(f"Covariance function in [{limits[0]}, {limits[1]}]") + + return fig + + def _sample_trajectories_plot(self) -> Figure: + from ..datasets import make_gaussian_process + + fd = make_gaussian_process( + start=-1, + n_samples=10, + cov=self, + random_state=0, + ) + fig = fd.plot() + fig.axes[0].set_title("Sample trajectories") + return fig + + def __repr__(self) -> str: + + params_str = ', '.join( + f'{n}={getattr(self, n)}' for n, _ in self._parameters_str + ) + + return ( + f"{self.__module__}.{type(self).__qualname__}(" + f"{params_str}" + f")" + ) + + def _latex_content(self) -> str: + params_str = ''.join( + fr'{l} &= {getattr(self, n)} \\' for n, l in self._parameters_str + ) + + return ( + fr"{self._latex_formula} \\" + r"\text{where:}" + r"\begin{aligned}" + fr"\qquad{params_str}" + r"\end{aligned}" + ) + + def _repr_latex_(self) -> str: + return fr"\(\displaystyle {self._latex_content()}\)" + + def _repr_html_(self) -> str: + from ..exploratory.visualization._utils import _figure_to_svg + + fig = self.heatmap() + heatmap = _figure_to_svg(fig) + plt.close(fig) + + fig = self._sample_trajectories_plot() + sample_trajectories = _figure_to_svg(fig) + plt.close(fig) + + row_style = '' + + def column_style(percent: float, margin_top: str = "0") -> str: + return ( + f'style="display: inline-block; ' + f'margin:0; ' + f'margin-top: {margin_top}; ' + f'width:{percent}%; ' + f'height:auto;' + f'vertical-align: middle"' + ) + + return fr""" +
+
+ \[{self._latex_content()}\] +
+
+
+
+ {sample_trajectories} +
+
+ {heatmap} +
+
+ """ + + def to_sklearn(self) -> sklearn_kern.Kernel: + """Convert it to a sklearn kernel, if there is one.""" + raise NotImplementedError( + f"{type(self).__name__} covariance not " + f"implemented in scikit-learn", + ) + + +class Brownian(Covariance): + r""" + Brownian covariance function. + + The covariance function is + + .. math:: + K(x, x') = \sigma^2 \frac{|x - \mathcal{O}| + |x' - \mathcal{O}| + - |x - x'|}{2} + + where :math:`\sigma^2` is the variance at distance 1 from + :math:`\mathcal{O}` and :math:`\mathcal{O}` is the origin point. + If :math:`\mathcal{O} = 0` (the default) and we only + consider positive values, the formula can be simplified as + + .. math:: + K(x, y) = \sigma^2 \min(x, y). + + Heatmap plot of the covariance function: + + .. jupyter-execute:: + + from skfda.misc.covariances import Brownian + import matplotlib.pyplot as plt + + Brownian().heatmap(limits=(0, 1)) + plt.show() + + Example of Gaussian process trajectories using this covariance: + + .. jupyter-execute:: + + from skfda.misc.covariances import Brownian + from skfda.datasets import make_gaussian_process + import matplotlib.pyplot as plt + + gp = make_gaussian_process( + n_samples=10, cov=Brownian(), random_state=0) + gp.plot() + plt.show() + + Default representation in a Jupyter notebook: + + .. jupyter-execute:: + + from skfda.misc.covariances import Brownian + + Brownian() + + """ + _latex_formula = ( + r"K(x, x') = \sigma^2 \frac{|x - \mathcal{O}| + " + r"|x' - \mathcal{O}| - |x - x'|}{2}" + ) + + _parameters_str = [ + ("variance", r"\sigma^2"), + ("origin", r"\mathcal{O}"), + ] + + def __init__(self, *, variance: float = 1, origin: float = 0) -> None: + self.variance = variance + self.origin = origin + + def __call__( + self, + x: InputAcceptable | FData, + y: InputAcceptable | None = None, + ) -> NDArrayFloat: + if isinstance(x, FData) or isinstance(y, FData): + raise ValueError('Not defined for FData objects.') + + x = _transform_to_2d(x) - self.origin + y = _transform_to_2d(y) - self.origin + + sum_norms = np.add.outer( + np.linalg.norm(x, axis=-1), + np.linalg.norm(y, axis=-1), + ) + norm_sub = np.linalg.norm( + x[:, np.newaxis, :] - y[np.newaxis, :, :], + axis=-1, + ) + + return ( # type: ignore[no-any-return] + self.variance * (sum_norms - norm_sub) / 2 + ) + + +class Linear(Covariance): + r""" + Linear covariance function. + + The covariance function is + + .. math:: + K(x, x') = \sigma^2 (x^T x' + c) + + where :math:`\sigma^2` is the scale of the variance and + :math:`c` is the intercept. + + Heatmap plot of the covariance function: + + .. jupyter-execute:: + + from skfda.misc.covariances import Linear + import matplotlib.pyplot as plt + + Linear().heatmap(limits=(0, 1)) + plt.show() + + Example of Gaussian process trajectories using this covariance: + + .. jupyter-execute:: + + from skfda.misc.covariances import Linear + from skfda.datasets import make_gaussian_process + import matplotlib.pyplot as plt + + gp = make_gaussian_process( + n_samples=10, cov=Linear(), random_state=0) + gp.plot() + plt.show() + + Default representation in a Jupyter notebook: + + .. jupyter-execute:: + + from skfda.misc.covariances import Linear + + Linear() + + """ + + _latex_formula = r"K(x, x') = \sigma^2 (x^T x' + c)" + + _parameters_str = [ + ("variance", r"\sigma^2"), + ("intercept", "c"), + ] + + def __init__(self, *, variance: float = 1, intercept: float = 0) -> None: + self.variance = variance + self.intercept = intercept + + def __call__( + self, + x: InputAcceptable, + y: InputAcceptable | None = None, + ) -> NDArrayFloat: + x, y = self._param_check_and_transform(x, y) + + x_y = inner_product_matrix(x, y) + return self.variance * (x_y + self.intercept) + + def to_sklearn(self) -> sklearn_kern.Kernel: + return ( + self.variance + * (sklearn_kern.DotProduct(0) + self.intercept) + ) + + +class Polynomial(Covariance): + r""" + Polynomial covariance function. + + The covariance function is + + .. math:: + K(x, x') = \sigma^2 (\alpha x^T x' + c)^d + + where :math:`\sigma^2` is the scale of the variance, + :math:`\alpha` is the slope, :math:`d` the degree of the + polynomial and :math:`c` is the intercept. + + Heatmap plot of the covariance function: + + .. jupyter-execute:: + + from skfda.misc.covariances import Polynomial + import matplotlib.pyplot as plt + + Polynomial().heatmap(limits=(0, 1)) + plt.show() + + Example of Gaussian process trajectories using this covariance: + + .. jupyter-execute:: + + from skfda.misc.covariances import Polynomial + from skfda.datasets import make_gaussian_process + import matplotlib.pyplot as plt + + gp = make_gaussian_process( + n_samples=10, cov=Polynomial(), random_state=0) + gp.plot() + plt.show() + + Default representation in a Jupyter notebook: + + .. jupyter-execute:: + + from skfda.misc.covariances import Polynomial + + Polynomial() + + """ + + _latex_formula = r"K(x, x') = \sigma^2 (\alpha x^T x' + c)^d" + + _parameters_str = [ + ("variance", r"\sigma^2"), + ("intercept", "c"), + ("slope", r"\alpha"), + ("degree", "d"), + ] + + def __init__( + self, + *, + variance: float = 1, + intercept: float = 0, + slope: float = 1, + degree: float = 2, + ) -> None: + self.variance = variance + self.intercept = intercept + self.slope = slope + self.degree = degree + + def __call__( + self, + x: InputAcceptable, + y: InputAcceptable | None = None, + ) -> NDArrayFloat: + x, y = self._param_check_and_transform(x, y) + + x_y = inner_product_matrix(x, y) + return ( + self.variance + * (self.slope * x_y + self.intercept) ** self.degree + ) + + def to_sklearn(self) -> sklearn_kern.Kernel: + return ( + self.variance + * (self.slope * sklearn_kern.DotProduct(0) + self.intercept) + ** self.degree + ) + + +class Gaussian(Covariance): + r""" + Gaussian covariance function. + + The covariance function is + + .. math:: + K(x, x') = \sigma^2 \exp\left(-\frac{||x - x'||^2}{2l^2}\right) + + where :math:`\sigma^2` is the variance and :math:`l` is the length scale. + + Heatmap plot of the covariance function: + + .. jupyter-execute:: + + from skfda.misc.covariances import Gaussian + import matplotlib.pyplot as plt + + Gaussian().heatmap(limits=(0, 1)) + plt.show() + + Example of Gaussian process trajectories using this covariance: + + .. jupyter-execute:: + + from skfda.misc.covariances import Gaussian + from skfda.datasets import make_gaussian_process + import matplotlib.pyplot as plt + + gp = make_gaussian_process( + n_samples=10, cov=Gaussian(), random_state=0) + gp.plot() + plt.show() + + Default representation in a Jupyter notebook: + + .. jupyter-execute:: + + from skfda.misc.covariances import Gaussian + + Gaussian() + + """ + + _latex_formula = ( + r"K(x, x') = \sigma^2 \exp\left(-\frac{\|x - x'\|^2}{2l^2}" + r"\right)" + ) + + _parameters_str = [ + ("variance", r"\sigma^2"), + ("length_scale", "l"), + ] + + def __init__(self, *, variance: float = 1, length_scale: float = 1): + self.variance = variance + self.length_scale = length_scale + + def __call__( + self, + x: InputAcceptable, + y: InputAcceptable | None = None, + ) -> NDArrayFloat: + x, y = self._param_check_and_transform(x, y) + + distance_x_y = PairwiseMetric(l2_distance)(x, y) + return self.variance * np.exp(-distance_x_y ** 2 / (2 * self.length_scale ** 2)) + + def to_sklearn(self) -> sklearn_kern.Kernel: + return ( + self.variance * sklearn_kern.RBF(length_scale=self.length_scale) + ) + + +class Exponential(Covariance): + r""" + Exponential covariance function. + + The covariance function is + + .. math:: + K(x, x') = \sigma^2 \exp\left(-\frac{\|x - x'\|}{l}\right) + + where :math:`\sigma^2` is the variance and :math:`l` is the length scale. + + Heatmap plot of the covariance function: + + .. jupyter-execute:: + + from skfda.misc.covariances import Exponential + import matplotlib.pyplot as plt + + Exponential().heatmap(limits=(0, 1)) + plt.show() + + Example of Gaussian process trajectories using this covariance: + + .. jupyter-execute:: + + from skfda.misc.covariances import Exponential + from skfda.datasets import make_gaussian_process + import matplotlib.pyplot as plt + + gp = make_gaussian_process( + n_samples=10, cov=Exponential(), random_state=0) + gp.plot() + plt.show() + + Default representation in a Jupyter notebook: + + .. jupyter-execute:: + + from skfda.misc.covariances import Exponential + + Exponential() + + """ + + _latex_formula = ( + r"K(x, x') = \sigma^2 \exp\left(-\frac{||x - x'||}{l}" + r"\right)" + ) + + _parameters_str = [ + ("variance", r"\sigma^2"), + ("length_scale", "l"), + ] + + def __init__( + self, + *, + variance: float = 1, + length_scale: float = 1, + ) -> None: + self.variance = variance + self.length_scale = length_scale + + def __call__( + self, + x: InputAcceptable, + y: InputAcceptable | None = None, + ) -> NDArrayFloat: + x, y = self._param_check_and_transform(x, y) + + distance_x_y = PairwiseMetric(l2_distance)(x, y) + return self.variance * np.exp(-distance_x_y / (self.length_scale)) + + def to_sklearn(self) -> sklearn_kern.Kernel: + return ( + self.variance + * sklearn_kern.Matern(length_scale=self.length_scale, nu=0.5) + ) + + +class WhiteNoise(Covariance): + r""" + Gaussian covariance function. + + The covariance function is + + .. math:: + K(x, x')= \begin{cases} + \sigma^2, \quad x = x' \\ + 0, \quad x \neq x'\\ + \end{cases} + + where :math:`\sigma^2` is the variance. + + Heatmap plot of the covariance function: + + .. jupyter-execute:: + + from skfda.misc.covariances import WhiteNoise + import matplotlib.pyplot as plt + + WhiteNoise().heatmap(limits=(0, 1)) + plt.show() + + Example of Gaussian process trajectories using this covariance: + + .. jupyter-execute:: + + from skfda.misc.covariances import WhiteNoise + from skfda.datasets import make_gaussian_process + import matplotlib.pyplot as plt + + gp = make_gaussian_process( + n_samples=10, cov=WhiteNoise(), random_state=0) + gp.plot() + plt.show() + + Default representation in a Jupyter notebook: + + .. jupyter-execute:: + + from skfda.misc.covariances import WhiteNoise + + WhiteNoise() + + """ + + _latex_formula = ( + r"K(x, x')= \begin{cases} \sigma^2, \quad x = x' \\" + r"0, \quad x \neq x'\\ \end{cases}" + ) + + _parameters_str = [("variance", r"\sigma^2")] + + def __init__(self, *, variance: float = 1): + self.variance = variance + + def __call__( + self, + x: InputAcceptable, + y: InputAcceptable | None = None, + ) -> NDArrayFloat: + if isinstance(x, FData) or isinstance(y, FData): + raise ValueError('Not defined for FData objects.') + + x = _transform_to_2d(x) + return self.variance * np.eye(x.shape[0]) + + def to_sklearn(self) -> sklearn_kern.Kernel: + return sklearn_kern.WhiteKernel(noise_level=self.variance) + + +class Matern(Covariance): + r""" + Matérn covariance function. + + The covariance function is + + .. math:: + K(x, x') = \sigma^2 \frac{2^{1-\nu}}{\Gamma(\nu)} + \left( \frac{\sqrt{2\nu}|x - x'|}{l} \right)^{\nu} + K_{\nu}\left( \frac{\sqrt{2\nu}|x - x'|}{l} \right) + + where :math:`\sigma^2` is the variance, :math:`l` is the length scale + and :math:`\nu` controls the smoothness of the related Gaussian process. + The trajectories of a Gaussian process with Matérn covariance is + :math:`\lceil \nu \rceil - 1` times differentiable. + + + Heatmap plot of the covariance function: + + .. jupyter-execute:: + + from skfda.misc.covariances import Matern + import matplotlib.pyplot as plt + + Matern().heatmap(limits=(0, 1)) + plt.show() + + Example of Gaussian process trajectories using this covariance: + + .. jupyter-execute:: + + from skfda.misc.covariances import Matern + from skfda.datasets import make_gaussian_process + import matplotlib.pyplot as plt + + gp = make_gaussian_process( + n_samples=10, cov=Matern(), random_state=0) + gp.plot() + plt.show() + + Default representation in a Jupyter notebook: + + .. jupyter-execute:: + + from skfda.misc.covariances import Matern + + Matern() + + """ + _latex_formula = ( + r"K(x, x') = \sigma^2 \frac{2^{1-\nu}}{\Gamma(\nu)}" + r"\left( \frac{\sqrt{2\nu}|x - x'|}{l} \right)^{\nu}" + r"K_{\nu}\left( \frac{\sqrt{2\nu}|x - x'|}{l} \right)" + ) + + _parameters_str = [ + ("variance", r"\sigma^2"), + ("length_scale", "l"), + ("nu", r"\nu"), + ] + + def __init__( + self, + *, + variance: float = 1, + length_scale: float = 1, + nu: float = 1.5, + ) -> None: + self.variance = variance + self.length_scale = length_scale + self.nu = nu + + def __call__( + self, + x: InputAcceptable, + y: InputAcceptable | None = None, + ) -> NDArrayFloat: + x, y = self._param_check_and_transform(x, y) + + distance_x_y = PairwiseMetric(l2_distance)(x, y) + + p = self.nu - 0.5 + if p.is_integer(): + # Formula for half-integers + p = int(p) + body = np.sqrt(2 * p + 1) * distance_x_y / self.length_scale + exponential = np.exp(-body) + power_list = np.full(shape=(p,) + body.shape, fill_value=2 * body) + power_list = np.cumprod(power_list, axis=0) + power_list = np.concatenate( + (power_list[::-1], np.asarray([np.ones_like(body)])), + ) + power_list = np.moveaxis(power_list, 0, -1) + numerator = np.cumprod(np.arange(p, 0, -1)) + numerator = np.concatenate(([1], numerator)) + denom1 = np.cumprod(np.arange(2 * p, p, -1)) + denom1 = np.concatenate((denom1[::-1], [1])) + denom2 = np.cumprod(np.arange(1, p + 1)) + denom2 = np.concatenate(([1], denom2)) + + sum_terms = power_list * numerator / (denom1 * denom2) + return ( # type: ignore[no-any-return] + self.variance * exponential * np.sum(sum_terms, axis=-1) + ) + elif self.nu == np.inf: + return ( # type: ignore[no-any-return] + self.variance * np.exp( + -distance_x_y ** 2 / (2 * self.length_scale ** 2), + ) + ) + else: + # General formula + scaling = 2**(1 - self.nu) / gamma(self.nu) + body = np.sqrt(2 * self.nu) * distance_x_y / self.length_scale + power = body**self.nu + bessel = kv(self.nu, body) + + with np.errstate(invalid='ignore'): + eval_cov = self.variance * scaling * power * bessel + + # Values with nan are where the distance is 0 + return np.nan_to_num( # type: ignore[no-any-return] + eval_cov, + nan=self.variance, + ) + + def to_sklearn(self) -> sklearn_kern.Kernel: + return ( + self.variance + * sklearn_kern.Matern(length_scale=self.length_scale, nu=self.nu) + ) + + +class Empirical(Covariance): + r""" + Sample covariance function. + + The sample covariance function is defined as + . math:: + K(t, s) = \frac{1}{N-\text{correction}}\sum_{n=1}^N\left(x_n(t) - + \bar{x}(t)\right) \left(x_n(s) - \bar{x}(s)\right) + + where :math:`x_n(t)` is the n-th sample and :math:`\bar{x}(t)` is the + mean of the samples. :math:`N` is the number of samples, + :math:`\text{correction}` is the degrees of freedom adjustment and is such + that :math:`N-\text{correction}` is the divisor used in the calculation of + the covariance function. + + """ + + _latex_formula = ( + r"K(t, s) = \frac{1}{N-\text{correction}}\sum_{n=1}^N" + r"(x_n(t) - \bar{x}(t))(x_n(s) - \bar{x}(s))" + ) + _parameters_str = [ + ("data", "data"), + ] + + cov_fdata: FData + correction: int + + @abc.abstractmethod + def __init__(self, data: FData) -> None: + if data.dim_domain != 1 or data.dim_codomain != 1: + raise NotImplementedError( + "Covariance only implemented " + "for univariate functions", + ) + + def __call__( + self, + x: InputAcceptable, + y: InputAcceptable | None = None, + ) -> NDArrayFloat: + if isinstance(x, FData) or isinstance(y, FData): + raise ValueError('Not defined for FData objects.') + + """Evaluate the covariance function. + + Args: + x: First array of points of evaluation. + y: Second array of points of evaluation. + + Returns: + Covariance function evaluated at the grid formed by x and y. + """ + return self.cov_fdata([x, y], grid=True)[0, ..., 0] + + +class EmpiricalGrid(Empirical): + """Sample covariance function for FDataGrid.""" + + cov_fdata: FDataGrid + correction: int + + def __init__(self, data: FDataGrid, correction: int = 0) -> None: + super().__init__(data=data) + + self.correction = correction + self.cov_fdata = data.copy( + data_matrix=np.cov( + data.data_matrix[..., 0], + rowvar=False, + ddof=correction, + )[np.newaxis, ...], + grid_points=[ + data.grid_points[0], + data.grid_points[0], + ], + domain_range=[ + data.domain_range[0], + data.domain_range[0], + ], + argument_names=data.argument_names * 2, + sample_names=("covariance",), + ) + + +class EmpiricalBasis(Empirical): + """ + Sample covariance function for FDataBasis. + + In this case one may use the basis expression of the samples to + express the sample covariance function in the tensor product basis + of the original basis. + """ + + cov_fdata: FDataBasis + coeff_matrix: NDArrayFloat + correction: int + + def __init__(self, data: FDataBasis, correction: int = 0) -> None: + super().__init__(data=data) + + self.correction = correction + self.coeff_matrix = np.cov( + data.coefficients, + rowvar=False, + ddof=correction, + ) + self.cov_fdata = FDataBasis( + basis=TensorBasis([data.basis, data.basis]), + coefficients=self.coeff_matrix.flatten(), + argument_names=data.argument_names * 2, + sample_names=("covariance",), + ) From f31229dd36e2741d168b8510348372c8957b3e8d Mon Sep 17 00:00:00 2001 From: E105D104U125 Date: Mon, 25 Mar 2024 20:00:57 +0100 Subject: [PATCH 05/14] Replaced EOL to LF --- skfda/tests/test_covariances.py | 530 ++++++++++++++++---------------- 1 file changed, 265 insertions(+), 265 deletions(-) diff --git a/skfda/tests/test_covariances.py b/skfda/tests/test_covariances.py index dc5982ca0..598280342 100644 --- a/skfda/tests/test_covariances.py +++ b/skfda/tests/test_covariances.py @@ -1,265 +1,265 @@ -"""Tests for Covariance module.""" -from typing import Any, Tuple - -import numpy as np -import pytest -from sklearn.model_selection import ParameterGrid - -import skfda.misc.covariances as cov -from skfda import FDataBasis, FDataGrid -from skfda.datasets import fetch_phoneme -from skfda.representation.basis import MonomialBasis - - -def _test_compare_sklearn( - multivariate_data: Any, - cov: cov.Covariance, -) -> None: - cov_sklearn = cov.to_sklearn() - cov_matrix = cov(multivariate_data) - cov_sklearn_matrix = cov_sklearn(multivariate_data) - - np.testing.assert_array_almost_equal(cov_matrix, cov_sklearn_matrix) - -############################################################################## -# Fixtures -############################################################################## - - -@pytest.fixture -def fetch_phoneme_fixture() -> FDataGrid: - """Fixture for loading the phoneme dataset example.""" - fd, _ = fetch_phoneme(return_X_y=True) - return fd[:20] - - -@pytest.fixture( - params=[ - cov.Linear(), - cov.Polynomial(), - cov.Gaussian(), - cov.Exponential(), - cov.Matern(), - ], -) -def covariances_fixture(request: Any) -> Any: - """Fixture for getting a covariance kernel function.""" - return request.param - - -@pytest.fixture( - params=[ - cov.Brownian(), - cov.WhiteNoise(), - ], -) -def covariances_raise_fixture(request: Any) -> Any: - """Fixture for getting a covariance kernel that raises a ValueError.""" - return request.param - - -@pytest.fixture -def fdatabasis_data() -> Tuple[FDataBasis, FDataBasis]: - """Fixture for getting fdatabasis objects.""" - basis = MonomialBasis( - n_basis=2, - domain_range=(-2, 2), - ) - - fd1 = FDataBasis( - basis=basis, - coefficients=[ - [1, 0], - [1, 2], - ], - ) - - fd2 = FDataBasis( - basis=basis, - coefficients=[ - [0, 1], - ], - ) - - return fd1, fd2 - - -@pytest.fixture -def multivariate_data() -> np.array: - """Fixture for getting multivariate data.""" - return np.linspace(-1, 1, 1000)[:, np.newaxis] - - -@pytest.fixture( - params=[ - (cov.Linear, - { - "variance": [1, 2], - "intercept": [3, 4], - }, - ), - (cov.Polynomial, - { - "variance": [2], - "intercept": [0, 2], - "slope": [1, 2], - "degree": [1, 2, 3], - }, - ), - (cov.Exponential, - { - "variance": [1, 2], - "length_scale": [0.5, 1, 2], - }, - ), - (cov.Gaussian, - { - "variance": [1, 2], - "length_scale": [0.5, 1, 2], - }, - ), - (cov.Matern, - { - "variance": [2], - "length_scale": [0.5], - "nu": [0.5, 1, 1.5, 2.5, 3.5, np.inf], - }, - ), - ], -) -def covariance_and_params(request: Any) -> Any: - """Fixture to load the covariance functions.""" - return request.param - - -############################################################################## -# Tests -############################################################################## - - -def test_covariances( - fetch_phoneme_fixture: Any, - covariances_fixture: Any, -) -> None: - """Check that invalid parameters in fit raise exception.""" - fd = fetch_phoneme_fixture - cov_kernel = covariances_fixture - - # Also test that it does not fail - res1 = cov_kernel(fd, fd) - res2 = cov_kernel(fd) - - np.testing.assert_allclose( - res1, - res2, - atol=1e-7, - ) - - -def test_raises( - fetch_phoneme_fixture: Any, - covariances_raise_fixture: Any, -) -> None: - """Check that it raises a ValueError exception.""" - fd = fetch_phoneme_fixture - cov_kernel = covariances_raise_fixture - - np.testing.assert_raises( - ValueError, - cov_kernel, - fd, - ) - - -def test_fdatabasis_example_linear( - fdatabasis_data: Any, -) -> None: - """Check a precalculated example for Linear covariance kernel.""" - fd1, fd2 = fdatabasis_data - res1 = cov.Linear(variance=1 / 2, intercept=3)(fd1, fd2) - res2 = np.array([[3 / 2], [3 / 2 + 32 / 6]]) - np.testing.assert_allclose( - res1, - res2, - rtol=1e-6, - ) - - -def test_fdatabasis_example_polynomial( - fdatabasis_data: Any, -) -> None: - """Check a precalculated example for Polynomial covariance kernel.""" - fd1, fd2 = fdatabasis_data - res1 = cov.Polynomial( - variance=1 / 3, - slope=2, - intercept=1, - degree=2, - )(fd1, fd2) - res2 = np.array([[1 / 3], [67**2 / 3**3]]) - np.testing.assert_allclose( - res1, - res2, - rtol=1e-6, - ) - - -def test_fdatabasis_example_gaussian( - fdatabasis_data: Any, -) -> None: - """Check a precalculated example for Gaussian covariance kernel.""" - fd1, fd2 = fdatabasis_data - res1 = cov.Gaussian(variance=3, length_scale=2)(fd1, fd2) - res2 = np.array([ - [3 * np.exp(-7 / 6)], - [3 * np.exp(-7 / 6)], - ]) - np.testing.assert_allclose( - res1, - res2, - rtol=1e-6, - ) - - -def test_fdatabasis_example_exponential( - fdatabasis_data: Any, -) -> None: - """Check a precalculated example for Exponential covariance kernel.""" - fd1, fd2 = fdatabasis_data - res1 = cov.Exponential(variance=4, length_scale=5)(fd1, fd2) - res2 = np.array([ - [4 * np.exp(-np.sqrt(28 / 3) / 5)], - [4 * np.exp(-np.sqrt(28 / 3) / 5)], - ]) - np.testing.assert_allclose( - res1, - res2, - rtol=1e-6, - ) - - -def test_fdatabasis_example_matern( - fdatabasis_data: Any, -) -> None: - """Check a precalculated example for Matern covariance kernel.""" - fd1, fd2 = fdatabasis_data - res1 = cov.Matern(variance=2, length_scale=3, nu=2)(fd1, fd2) - res2 = np.array([ - [(2 / 3) ** 2 * (28 / 3) * 0.239775899566], - [(2 / 3) ** 2 * (28 / 3) * 0.239775899566], - ]) - np.testing.assert_allclose( - res1, - res2, - rtol=1e-6, - ) - - -def test_multivariate_covariance_kernel( - multivariate_data: Any, - covariance_and_params: Any, -) -> None: - """Test general covariance kernel against scikit-learn's kernel.""" - cov_kernel, param_dict = covariance_and_params - for input_params in list(ParameterGrid(param_dict)): - _test_compare_sklearn(multivariate_data, cov_kernel(**input_params)) +"""Tests for Covariance module.""" +from typing import Any, Tuple + +import numpy as np +import pytest +from sklearn.model_selection import ParameterGrid + +import skfda.misc.covariances as cov +from skfda import FDataBasis, FDataGrid +from skfda.datasets import fetch_weather # fetch_phoneme, +from skfda.representation.basis import MonomialBasis + + +def _test_compare_sklearn( + multivariate_data: Any, + cov: cov.Covariance, +) -> None: + cov_sklearn = cov.to_sklearn() + cov_matrix = cov(multivariate_data) + cov_sklearn_matrix = cov_sklearn(multivariate_data) + + np.testing.assert_array_almost_equal(cov_matrix, cov_sklearn_matrix) + +############################################################################## +# Fixtures +############################################################################## + + +@pytest.fixture +def fetch_functional_data() -> FDataGrid: + """Fixture for loading the phoneme dataset example.""" + fd, _ = fetch_weather(return_X_y=True) + return fd[:20] + + +@pytest.fixture( + params=[ + cov.Linear(), + cov.Polynomial(), + cov.Gaussian(), + cov.Exponential(), + cov.Matern(), + ], +) +def covariances_fixture(request: Any) -> Any: + """Fixture for getting a covariance kernel function.""" + return request.param + + +@pytest.fixture( + params=[ + cov.Brownian(), + cov.WhiteNoise(), + ], +) +def covariances_raise_fixture(request: Any) -> Any: + """Fixture for getting a covariance kernel that raises a ValueError.""" + return request.param + + +@pytest.fixture +def fdatabasis_data() -> Tuple[FDataBasis, FDataBasis]: + """Fixture for getting fdatabasis objects.""" + basis = MonomialBasis( + n_basis=2, + domain_range=(-2, 2), + ) + + fd1 = FDataBasis( + basis=basis, + coefficients=[ + [1, 0], + [1, 2], + ], + ) + + fd2 = FDataBasis( + basis=basis, + coefficients=[ + [0, 1], + ], + ) + + return fd1, fd2 + + +@pytest.fixture +def multivariate_data() -> np.array: + """Fixture for getting multivariate data.""" + return np.linspace(-1, 1, 1000)[:, np.newaxis] + + +@pytest.fixture( + params=[ + (cov.Linear, + { + "variance": [1, 2], + "intercept": [3, 4], + }, + ), + (cov.Polynomial, + { + "variance": [2], + "intercept": [0, 2], + "slope": [1, 2], + "degree": [1, 2, 3], + }, + ), + (cov.Exponential, + { + "variance": [1, 2], + "length_scale": [0.5, 1, 2], + }, + ), + (cov.Gaussian, + { + "variance": [1, 2], + "length_scale": [0.5, 1, 2], + }, + ), + (cov.Matern, + { + "variance": [2], + "length_scale": [0.5], + "nu": [0.5, 1, 1.5, 2.5, 3.5, np.inf], + }, + ), + ], +) +def covariance_and_params(request: Any) -> Any: + """Fixture to load the covariance functions.""" + return request.param + + +############################################################################## +# Tests +############################################################################## + + +def test_covariances( + fetch_phoneme_fixture: FDataGrid, + covariances_fixture: cov.Covariance, +) -> None: + """Check that invalid parameters in fit raise exception.""" + fd = fetch_phoneme_fixture + cov_kernel = covariances_fixture + + # Also test that it does not fail + res1 = cov_kernel(fd, fd) + res2 = cov_kernel(fd) + + np.testing.assert_allclose( + res1, + res2, + atol=1e-7, + ) + + +def test_raises( + fetch_phoneme_fixture: FDataGrid, + covariances_raise_fixture: Any, +) -> None: + """Check that it raises a ValueError exception.""" + fd = fetch_phoneme_fixture + cov_kernel = covariances_raise_fixture + + np.testing.assert_raises( + ValueError, + cov_kernel, + fd, + ) + + +def test_fdatabasis_example_linear( + fdatabasis_data: Tuple[FDataBasis, FDataBasis], +) -> None: + """Check a precalculated example for Linear covariance kernel.""" + fd1, fd2 = fdatabasis_data + res1 = cov.Linear(variance=1 / 2, intercept=3)(fd1, fd2) + res2 = np.array([[3 / 2], [3 / 2 + 32 / 6]]) + np.testing.assert_allclose( + res1, + res2, + rtol=1e-6, + ) + + +def test_fdatabasis_example_polynomial( + fdatabasis_data: Tuple[FDataBasis, FDataBasis], +) -> None: + """Check a precalculated example for Polynomial covariance kernel.""" + fd1, fd2 = fdatabasis_data + res1 = cov.Polynomial( + variance=1 / 3, + slope=2, + intercept=1, + degree=2, + )(fd1, fd2) + res2 = np.array([[1 / 3], [67**2 / 3**3]]) + np.testing.assert_allclose( + res1, + res2, + rtol=1e-6, + ) + + +def test_fdatabasis_example_gaussian( + fdatabasis_data: Tuple[FDataBasis, FDataBasis], +) -> None: + """Check a precalculated example for Gaussian covariance kernel.""" + fd1, fd2 = fdatabasis_data + res1 = cov.Gaussian(variance=3, length_scale=2)(fd1, fd2) + res2 = np.array([ + [3 * np.exp(-7 / 6)], + [3 * np.exp(-7 / 6)], + ]) + np.testing.assert_allclose( + res1, + res2, + rtol=1e-6, + ) + + +def test_fdatabasis_example_exponential( + fdatabasis_data: Tuple[FDataBasis, FDataBasis], +) -> None: + """Check a precalculated example for Exponential covariance kernel.""" + fd1, fd2 = fdatabasis_data + res1 = cov.Exponential(variance=4, length_scale=5)(fd1, fd2) + res2 = np.array([ + [4 * np.exp(-np.sqrt(28 / 3) / 5)], + [4 * np.exp(-np.sqrt(28 / 3) / 5)], + ]) + np.testing.assert_allclose( + res1, + res2, + rtol=1e-6, + ) + + +def test_fdatabasis_example_matern( + fdatabasis_data: Tuple[FDataBasis, FDataBasis], +) -> None: + """Check a precalculated example for Matern covariance kernel.""" + fd1, fd2 = fdatabasis_data + res1 = cov.Matern(variance=2, length_scale=3, nu=2)(fd1, fd2) + res2 = np.array([ + [(2 / 3) ** 2 * (28 / 3) * 0.239775899566], + [(2 / 3) ** 2 * (28 / 3) * 0.239775899566], + ]) + np.testing.assert_allclose( + res1, + res2, + rtol=1e-6, + ) + + +def test_multivariate_covariance_kernel( + multivariate_data: np.array, + covariance_and_params: Any, +) -> None: + """Test general covariance kernel against scikit-learn's kernel.""" + cov_kernel, param_dict = covariance_and_params + for input_params in list(ParameterGrid(param_dict)): + _test_compare_sklearn(multivariate_data, cov_kernel(**input_params)) From 2c95ed16d39bd1e516f4796e5fa812e6d305b146 Mon Sep 17 00:00:00 2001 From: E105D104U125 Date: Tue, 26 Mar 2024 11:08:26 +0100 Subject: [PATCH 06/14] Changed import to avoid circular dependencies. --- skfda/misc/covariances.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/skfda/misc/covariances.py b/skfda/misc/covariances.py index bff439734..4b25c439a 100644 --- a/skfda/misc/covariances.py +++ b/skfda/misc/covariances.py @@ -9,8 +9,8 @@ from matplotlib.figure import Figure from scipy.special import gamma, kv -from ..misc import inner_product_matrix -from ..misc.metrics import PairwiseMetric, LpNorm, l2_distance +from ..misc._math import inner_product_matrix +from ..misc.metrics import PairwiseMetric, l2_distance from ..representation import FData, FDataBasis, FDataGrid from ..representation.basis import TensorBasis from ..typing._numpy import ArrayLike, NDArrayFloat @@ -38,7 +38,7 @@ def _transform_to_2d(t: ArrayLike) -> NDArrayFloat: """Transform 1d arrays in column vectors.""" t = np.asfarray(t) - dim = t.ndim + dim = len(t.shape) assert dim <= 2 if dim < 2: From 2f993453b65143443ece22edf92aa0720dfdb2f4 Mon Sep 17 00:00:00 2001 From: E105D104U125 Date: Tue, 26 Mar 2024 11:28:05 +0100 Subject: [PATCH 07/14] Changed data fecthing function name --- skfda/tests/test_covariances.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/skfda/tests/test_covariances.py b/skfda/tests/test_covariances.py index 598280342..c28019afc 100644 --- a/skfda/tests/test_covariances.py +++ b/skfda/tests/test_covariances.py @@ -7,7 +7,7 @@ import skfda.misc.covariances as cov from skfda import FDataBasis, FDataGrid -from skfda.datasets import fetch_weather # fetch_phoneme, +from skfda.datasets import fetch_weather from skfda.representation.basis import MonomialBasis @@ -138,11 +138,11 @@ def covariance_and_params(request: Any) -> Any: def test_covariances( - fetch_phoneme_fixture: FDataGrid, + fetch_functional_data: FDataGrid, covariances_fixture: cov.Covariance, ) -> None: """Check that invalid parameters in fit raise exception.""" - fd = fetch_phoneme_fixture + fd = fetch_functional_data cov_kernel = covariances_fixture # Also test that it does not fail @@ -157,11 +157,11 @@ def test_covariances( def test_raises( - fetch_phoneme_fixture: FDataGrid, + fetch_functional_data: FDataGrid, covariances_raise_fixture: Any, ) -> None: """Check that it raises a ValueError exception.""" - fd = fetch_phoneme_fixture + fd = fetch_functional_data cov_kernel = covariances_raise_fixture np.testing.assert_raises( From 81f6e585b991dbbcc3dab74122cab974efa0d886 Mon Sep 17 00:00:00 2001 From: E105D104U125 Date: Thu, 4 Jul 2024 12:04:46 +0200 Subject: [PATCH 08/14] Covariance_kernels pullrequest changes incorporated. --- skfda/misc/covariances.py | 123 ++++++++++++++++++-------------- skfda/tests/test_covariances.py | 16 +++-- 2 files changed, 81 insertions(+), 58 deletions(-) diff --git a/skfda/misc/covariances.py b/skfda/misc/covariances.py index 4b25c439a..d5d7a1abd 100644 --- a/skfda/misc/covariances.py +++ b/skfda/misc/covariances.py @@ -1,12 +1,14 @@ +"""Covariances module.""" from __future__ import annotations import abc -from typing import Callable, Sequence, Tuple, Union +from typing import Any, Callable, Sequence import matplotlib.pyplot as plt import numpy as np import sklearn.gaussian_process.kernels as sklearn_kern from matplotlib.figure import Figure +from numpy.typing import NDArray from scipy.special import gamma, kv from ..misc._math import inner_product_matrix @@ -22,23 +24,20 @@ def _squared_norms(x: NDArrayFloat, y: NDArrayFloat) -> NDArrayFloat: ).sum(2) -CovarianceLike = Union[ - float, - NDArrayFloat, - Callable[[ArrayLike, ArrayLike], NDArrayFloat], -] +CovarianceLike = ( + float + | NDArrayFloat + | Callable[[ArrayLike, ArrayLike], NDArrayFloat] +) -InputAcceptable = Union[ - np.ndarray, - FData, -] +Input = NDArray[Any] | FData def _transform_to_2d(t: ArrayLike) -> NDArrayFloat: """Transform 1d arrays in column vectors.""" t = np.asfarray(t) - dim = len(t.shape) + dim = t.ndim assert dim <= 2 if dim < 2: @@ -58,39 +57,40 @@ def _execute_covariance( if isinstance(covariance, (int, float)): return np.array(covariance) + + if callable(covariance): + result = covariance(x, y) + elif isinstance(covariance, np.ndarray): + result = covariance else: - if callable(covariance): - result = covariance(x, y) - elif isinstance(covariance, np.ndarray): - result = covariance - else: - # GPy kernel - result = covariance.K(x, y) + # GPy kernel + result = covariance.K(x, y) - assert result.shape[0] == len(x) - assert result.shape[1] == len(y) - return result + assert result.shape[0] == len(x) + assert result.shape[1] == len(y) + return result class Covariance(abc.ABC): """Abstract class for covariance functions.""" - _parameters_str: Sequence[Tuple[str, str]] + _parameters_str: Sequence[tuple[str, str]] _latex_formula: str @abc.abstractmethod def __call__( self, - x: InputAcceptable, - y: InputAcceptable | None = None, + x: Input, + y: Input | None = None, ) -> NDArrayFloat: + """Compute covariance function on input data.""" pass def _param_check_and_transform( self, - x: InputAcceptable, - y: InputAcceptable | None = None, - ) -> Tuple[np.ndarray | FData, np.ndarray | FData]: + x: Input, + y: Input | None = None, + ) -> tuple[Input, Input]: # Param check if y is None: y = x @@ -101,8 +101,7 @@ def _param_check_and_transform( f'({type(x)}, {type(y)}).', ) - if isinstance(x, np.ndarray) and isinstance(y, np.ndarray): - x, y = np.array(x), np.array(y) + if not isinstance(x, FData) and not isinstance(y, FData): if len(x.shape) < 2: x = np.atleast_2d(x) if len(y.shape) < 2: @@ -110,7 +109,7 @@ def _param_check_and_transform( return x, y - def heatmap(self, limits: Tuple[float, float] = (-1, 1)) -> Figure: + def heatmap(self, limits: tuple[float, float] = (-1, 1)) -> Figure: """Return a heatmap plot of the covariance function.""" from ..exploratory.visualization._utils import _create_figure @@ -266,6 +265,7 @@ class Brownian(Covariance): Brownian() """ + _latex_formula = ( r"K(x, x') = \sigma^2 \frac{|x - \mathcal{O}| + " r"|x' - \mathcal{O}| - |x - x'|}{2}" @@ -282,14 +282,21 @@ def __init__(self, *, variance: float = 1, origin: float = 0) -> None: def __call__( self, - x: InputAcceptable | FData, - y: InputAcceptable | None = None, + x: NDArray[Any], + y: NDArray[Any] | None = None, ) -> NDArrayFloat: + """Compute Brownian covariance function on input data.""" if isinstance(x, FData) or isinstance(y, FData): - raise ValueError('Not defined for FData objects.') + raise ValueError( + 'Brownian covariance not defined for FData objects.', + ) + + xx: NDArray[Any] + yy: NDArray[Any] + xx, yy = self._param_check_and_transform(x, y) - x = _transform_to_2d(x) - self.origin - y = _transform_to_2d(y) - self.origin + xx = xx - self.origin + yy = yy - self.origin sum_norms = np.add.outer( np.linalg.norm(x, axis=-1), @@ -363,9 +370,10 @@ def __init__(self, *, variance: float = 1, intercept: float = 0) -> None: def __call__( self, - x: InputAcceptable, - y: InputAcceptable | None = None, + x: Input, + y: Input | None = None, ) -> NDArrayFloat: + """Compute linear covariance function on input data.""" x, y = self._param_check_and_transform(x, y) x_y = inner_product_matrix(x, y) @@ -445,12 +453,13 @@ def __init__( self.intercept = intercept self.slope = slope self.degree = degree - + def __call__( self, - x: InputAcceptable, - y: InputAcceptable | None = None, + x: Input, + y: Input | None = None, ) -> NDArrayFloat: + """Compute polynomial covariance function on input data.""" x, y = self._param_check_and_transform(x, y) x_y = inner_product_matrix(x, y) @@ -527,13 +536,16 @@ def __init__(self, *, variance: float = 1, length_scale: float = 1): def __call__( self, - x: InputAcceptable, - y: InputAcceptable | None = None, + x: Input, + y: Input | None = None, ) -> NDArrayFloat: + """Compute Gaussian covariance function on input data.""" x, y = self._param_check_and_transform(x, y) distance_x_y = PairwiseMetric(l2_distance)(x, y) - return self.variance * np.exp(-distance_x_y ** 2 / (2 * self.length_scale ** 2)) + return self.variance * np.exp( # type: ignore[no-any-return] + -distance_x_y ** 2 / (2 * self.length_scale ** 2), + ) def to_sklearn(self) -> sklearn_kern.Kernel: return ( @@ -606,9 +618,10 @@ def __init__( def __call__( self, - x: InputAcceptable, - y: InputAcceptable | None = None, + x: Input, + y: Input | None = None, ) -> NDArrayFloat: + """Compute exponential covariance function on input data.""" x, y = self._param_check_and_transform(x, y) distance_x_y = PairwiseMetric(l2_distance)(x, y) @@ -680,9 +693,10 @@ def __init__(self, *, variance: float = 1): def __call__( self, - x: InputAcceptable, - y: InputAcceptable | None = None, + x: Input, + y: Input | None = None, ) -> NDArrayFloat: + """Compute white noise covariance function on input data.""" if isinstance(x, FData) or isinstance(y, FData): raise ValueError('Not defined for FData objects.') @@ -767,9 +781,10 @@ def __init__( def __call__( self, - x: InputAcceptable, - y: InputAcceptable | None = None, + x: Input, + y: Input | None = None, ) -> NDArrayFloat: + """Compute Matern covariance function on input data.""" x, y = self._param_check_and_transform(x, y) distance_x_y = PairwiseMetric(l2_distance)(x, y) @@ -864,12 +879,9 @@ def __init__(self, data: FData) -> None: def __call__( self, - x: InputAcceptable, - y: InputAcceptable | None = None, + x: Input, + y: Input | None = None, ) -> NDArrayFloat: - if isinstance(x, FData) or isinstance(y, FData): - raise ValueError('Not defined for FData objects.') - """Evaluate the covariance function. Args: @@ -879,6 +891,9 @@ def __call__( Returns: Covariance function evaluated at the grid formed by x and y. """ + if isinstance(x, FData) or isinstance(y, FData): + raise ValueError('Not defined for FData objects.') + return self.cov_fdata([x, y], grid=True)[0, ..., 0] diff --git a/skfda/tests/test_covariances.py b/skfda/tests/test_covariances.py index c28019afc..ebf78f20d 100644 --- a/skfda/tests/test_covariances.py +++ b/skfda/tests/test_covariances.py @@ -60,7 +60,11 @@ def covariances_raise_fixture(request: Any) -> Any: @pytest.fixture def fdatabasis_data() -> Tuple[FDataBasis, FDataBasis]: - """Fixture for getting fdatabasis objects.""" + """Fixture for getting fdatabasis objects. + + The dataset is used to test manual calculations of the covariance functions + against the implementation. + """ basis = MonomialBasis( n_basis=2, domain_range=(-2, 2), @@ -141,7 +145,7 @@ def test_covariances( fetch_functional_data: FDataGrid, covariances_fixture: cov.Covariance, ) -> None: - """Check that invalid parameters in fit raise exception.""" + """Check that parameter conversion is done correctly.""" fd = fetch_functional_data cov_kernel = covariances_fixture @@ -160,11 +164,15 @@ def test_raises( fetch_functional_data: FDataGrid, covariances_raise_fixture: Any, ) -> None: - """Check that it raises a ValueError exception.""" + """Check raises ValueError. + + Check that non-functional kernels raise a ValueError exception + with functional data. + """ fd = fetch_functional_data cov_kernel = covariances_raise_fixture - np.testing.assert_raises( + pytest.raises( ValueError, cov_kernel, fd, From b18326967aa50036cf3082dc76e937cb84ac1c70 Mon Sep 17 00:00:00 2001 From: E105D104U125 Date: Thu, 4 Jul 2024 12:32:12 +0200 Subject: [PATCH 09/14] Covariance_kernels pullrequest changes incorporated. --- skfda/misc/covariances.py | 58 +++++++++++++++++++++++---------------- 1 file changed, 35 insertions(+), 23 deletions(-) diff --git a/skfda/misc/covariances.py b/skfda/misc/covariances.py index d5d7a1abd..a36142afb 100644 --- a/skfda/misc/covariances.py +++ b/skfda/misc/covariances.py @@ -181,7 +181,10 @@ def _repr_html_(self) -> str: row_style = '' - def column_style(percent: float, margin_top: str = "0") -> str: + def column_style( # noqa: WPS430 + percent: float, + margin_top: str = "0", + ) -> str: return ( f'style="display: inline-block; ' f'margin:0; ' @@ -205,7 +208,7 @@ def column_style(percent: float, margin_top: str = "0") -> str: {heatmap} - """ + """ # noqa: WPS432 def to_sklearn(self) -> sklearn_kern.Kernel: """Convert it to a sklearn kernel, if there is one.""" @@ -291,12 +294,11 @@ def __call__( 'Brownian covariance not defined for FData objects.', ) - xx: NDArray[Any] - yy: NDArray[Any] - xx, yy = self._param_check_and_transform(x, y) + x = _transform_to_2d(x) + y = _transform_to_2d(y) - xx = xx - self.origin - yy = yy - self.origin + x = x - self.origin + y = y - self.origin sum_norms = np.add.outer( np.linalg.norm(x, axis=-1), @@ -380,6 +382,7 @@ def __call__( return self.variance * (x_y + self.intercept) def to_sklearn(self) -> sklearn_kern.Kernel: + """Obtain corresponding scikit-learn kernel type.""" return ( self.variance * (sklearn_kern.DotProduct(0) + self.intercept) @@ -469,6 +472,7 @@ def __call__( ) def to_sklearn(self) -> sklearn_kern.Kernel: + """Obtain corresponding scikit-learn kernel type.""" return ( self.variance * (self.slope * sklearn_kern.DotProduct(0) + self.intercept) @@ -548,6 +552,7 @@ def __call__( ) def to_sklearn(self) -> sklearn_kern.Kernel: + """Obtain corresponding scikit-learn kernel type.""" return ( self.variance * sklearn_kern.RBF(length_scale=self.length_scale) ) @@ -625,9 +630,13 @@ def __call__( x, y = self._param_check_and_transform(x, y) distance_x_y = PairwiseMetric(l2_distance)(x, y) - return self.variance * np.exp(-distance_x_y / (self.length_scale)) + return self.variance * np.exp( # type: ignore[no-any-return] + -distance_x_y + / (self.length_scale), + ) def to_sklearn(self) -> sklearn_kern.Kernel: + """Obtain corresponding scikit-learn kernel type.""" return ( self.variance * sklearn_kern.Matern(length_scale=self.length_scale, nu=0.5) @@ -704,6 +713,7 @@ def __call__( return self.variance * np.eye(x.shape[0]) def to_sklearn(self) -> sklearn_kern.Kernel: + """Obtain corresponding scikit-learn kernel type.""" return sklearn_kern.WhiteKernel(noise_level=self.variance) @@ -756,6 +766,7 @@ class Matern(Covariance): Matern() """ + _latex_formula = ( r"K(x, x') = \sigma^2 \frac{2^{1-\nu}}{\Gamma(\nu)}" r"\left( \frac{\sqrt{2\nu}|x - x'|}{l} \right)^{\nu}" @@ -818,23 +829,24 @@ def __call__( -distance_x_y ** 2 / (2 * self.length_scale ** 2), ) ) - else: - # General formula - scaling = 2**(1 - self.nu) / gamma(self.nu) - body = np.sqrt(2 * self.nu) * distance_x_y / self.length_scale - power = body**self.nu - bessel = kv(self.nu, body) - - with np.errstate(invalid='ignore'): - eval_cov = self.variance * scaling * power * bessel - - # Values with nan are where the distance is 0 - return np.nan_to_num( # type: ignore[no-any-return] - eval_cov, - nan=self.variance, - ) + + # General formula + scaling = 2**(1 - self.nu) / gamma(self.nu) + body = np.sqrt(2 * self.nu) * distance_x_y / self.length_scale + power = body**self.nu + bessel = kv(self.nu, body) + + with np.errstate(invalid='ignore'): + eval_cov = self.variance * scaling * power * bessel + + # Values with nan are where the distance is 0 + return np.nan_to_num( # type: ignore[no-any-return] + eval_cov, + nan=self.variance, + ) def to_sklearn(self) -> sklearn_kern.Kernel: + """Obtain corresponding scikit-learn kernel type.""" return ( self.variance * sklearn_kern.Matern(length_scale=self.length_scale, nu=self.nu) From fb79ed1bc4fbfc7c3b9fd2b75f41dc49e62dccd1 Mon Sep 17 00:00:00 2001 From: E105D104U125 Date: Mon, 8 Jul 2024 11:33:44 +0200 Subject: [PATCH 10/14] Merge develop into covariance_kernels --- skfda/misc/covariances.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skfda/misc/covariances.py b/skfda/misc/covariances.py index a36142afb..c130fc454 100644 --- a/skfda/misc/covariances.py +++ b/skfda/misc/covariances.py @@ -730,7 +730,7 @@ class Matern(Covariance): where :math:`\sigma^2` is the variance, :math:`l` is the length scale and :math:`\nu` controls the smoothness of the related Gaussian process. - The trajectories of a Gaussian process with Matérn covariance is + The trajectories of a Gaussian process with Matérn covariance is :math:`\lceil \nu \rceil - 1` times differentiable. From 4e9db76673313716bfb8f9178c2d3daebee6d902 Mon Sep 17 00:00:00 2001 From: E105D104U125 Date: Mon, 8 Jul 2024 12:22:42 +0200 Subject: [PATCH 11/14] Refactored covariances test module and corrected style problems. --- skfda/tests/test_covariances.py | 252 ++++++++++++++------------------ 1 file changed, 110 insertions(+), 142 deletions(-) diff --git a/skfda/tests/test_covariances.py b/skfda/tests/test_covariances.py index ebf78f20d..1e8550564 100644 --- a/skfda/tests/test_covariances.py +++ b/skfda/tests/test_covariances.py @@ -1,5 +1,5 @@ """Tests for Covariance module.""" -from typing import Any, Tuple +from typing import Any import numpy as np import pytest @@ -21,14 +21,27 @@ def _test_compare_sklearn( np.testing.assert_array_almost_equal(cov_matrix, cov_sklearn_matrix) +############################################################################### +# Example datasets for which to calculate the evaluation of the kernel by hand +# to compare it against the results yielded by the implementation. +############################################################################### + + +basis = MonomialBasis(n_basis=2, domain_range=(-2, 2)) + +fd = [ + FDataBasis(basis=basis, coefficients=[[1, 0], [1, 2]]), + FDataBasis(basis=basis, coefficients=[[0, 1]]), +] + ############################################################################## # Fixtures ############################################################################## @pytest.fixture -def fetch_functional_data() -> FDataGrid: - """Fixture for loading the phoneme dataset example.""" +def fetch_weather_subset() -> FDataGrid: + """Fixture for loading the canadian weather dataset example.""" fd, _ = fetch_weather(return_X_y=True) return fd[:20] @@ -58,34 +71,46 @@ def covariances_raise_fixture(request: Any) -> Any: return request.param -@pytest.fixture -def fdatabasis_data() -> Tuple[FDataBasis, FDataBasis]: +@pytest.fixture( + params=[ + [ + cov.Linear(variance=1 / 2, intercept=3), + np.array([[3 / 2], [3 / 2 + 32 / 6]]), + ], + [ + cov.Polynomial(variance=1 / 3, slope=2, intercept=1, degree=2), + np.array([[1 / 3], [67**2 / 3**3]]), + ], + [ + cov.Gaussian(variance=3, length_scale=2), + np.array([[3 * np.exp(-7 / 6)], [3 * np.exp(-7 / 6)]]), + ], + [ + cov.Exponential(variance=4, length_scale=5), + np.array([ + [4 * np.exp(-np.sqrt(28 / 3) / 5)], + [4 * np.exp(-np.sqrt(28 / 3) / 5)], + ]), + ], + [ + cov.Matern(variance=2, length_scale=3, nu=2), + np.array([ + [(2 / 3) ** 2 * (28 / 3) * 0.239775899566], + [(2 / 3) ** 2 * (28 / 3) * 0.239775899566], + ]), + ], + ], +) +def precalc_example_data( + request: Any, +) -> list[FDataBasis, FDataBasis, cov.Covariance, np.array]: """Fixture for getting fdatabasis objects. The dataset is used to test manual calculations of the covariance functions against the implementation. """ - basis = MonomialBasis( - n_basis=2, - domain_range=(-2, 2), - ) - - fd1 = FDataBasis( - basis=basis, - coefficients=[ - [1, 0], - [1, 2], - ], - ) - - fd2 = FDataBasis( - basis=basis, - coefficients=[ - [0, 1], - ], - ) - - return fd1, fd2 + # First fd, Second fd, kernel used, result + return *fd, *request.param @pytest.fixture @@ -96,39 +121,44 @@ def multivariate_data() -> np.array: @pytest.fixture( params=[ - (cov.Linear, - { - "variance": [1, 2], - "intercept": [3, 4], - }, - ), - (cov.Polynomial, - { - "variance": [2], - "intercept": [0, 2], - "slope": [1, 2], - "degree": [1, 2, 3], - }, - ), - (cov.Exponential, - { - "variance": [1, 2], - "length_scale": [0.5, 1, 2], - }, - ), - (cov.Gaussian, - { - "variance": [1, 2], - "length_scale": [0.5, 1, 2], - }, - ), - (cov.Matern, - { - "variance": [2], - "length_scale": [0.5], - "nu": [0.5, 1, 1.5, 2.5, 3.5, np.inf], - }, - ), + [ + cov.Linear, + { + "variance": [1, 2], + "intercept": [3, 4], + }, + ], + [ + cov.Polynomial, + { + "variance": [2], + "intercept": [0, 2], + "slope": [1, 2], + "degree": [1, 2, 3], + }, + ], + [ + cov.Exponential, + { + "variance": [1, 2], + "length_scale": [0.5, 1, 2], + }, + ], + [ + cov.Gaussian, + { + "variance": [1, 2], + "length_scale": [0.5, 1, 2], + }, + ], + [ + cov.Matern, + { + "variance": [2], + "length_scale": [0.5], + "nu": [0.5, 1, 1.5, 2.5, 3.5, np.inf], + }, + ], ], ) def covariance_and_params(request: Any) -> Any: @@ -142,11 +172,11 @@ def covariance_and_params(request: Any) -> Any: def test_covariances( - fetch_functional_data: FDataGrid, + fetch_weather_subset: FDataGrid, covariances_fixture: cov.Covariance, ) -> None: """Check that parameter conversion is done correctly.""" - fd = fetch_functional_data + fd = fetch_weather_subset cov_kernel = covariances_fixture # Also test that it does not fail @@ -161,7 +191,7 @@ def test_covariances( def test_raises( - fetch_functional_data: FDataGrid, + fetch_weather_subset: FDataGrid, covariances_raise_fixture: Any, ) -> None: """Check raises ValueError. @@ -169,7 +199,7 @@ def test_raises( Check that non-functional kernels raise a ValueError exception with functional data. """ - fd = fetch_functional_data + fd = fetch_weather_subset cov_kernel = covariances_raise_fixture pytest.raises( @@ -179,86 +209,24 @@ def test_raises( ) -def test_fdatabasis_example_linear( - fdatabasis_data: Tuple[FDataBasis, FDataBasis], -) -> None: - """Check a precalculated example for Linear covariance kernel.""" - fd1, fd2 = fdatabasis_data - res1 = cov.Linear(variance=1 / 2, intercept=3)(fd1, fd2) - res2 = np.array([[3 / 2], [3 / 2 + 32 / 6]]) - np.testing.assert_allclose( - res1, - res2, - rtol=1e-6, - ) - - -def test_fdatabasis_example_polynomial( - fdatabasis_data: Tuple[FDataBasis, FDataBasis], -) -> None: - """Check a precalculated example for Polynomial covariance kernel.""" - fd1, fd2 = fdatabasis_data - res1 = cov.Polynomial( - variance=1 / 3, - slope=2, - intercept=1, - degree=2, - )(fd1, fd2) - res2 = np.array([[1 / 3], [67**2 / 3**3]]) - np.testing.assert_allclose( - res1, - res2, - rtol=1e-6, - ) - - -def test_fdatabasis_example_gaussian( - fdatabasis_data: Tuple[FDataBasis, FDataBasis], -) -> None: - """Check a precalculated example for Gaussian covariance kernel.""" - fd1, fd2 = fdatabasis_data - res1 = cov.Gaussian(variance=3, length_scale=2)(fd1, fd2) - res2 = np.array([ - [3 * np.exp(-7 / 6)], - [3 * np.exp(-7 / 6)], - ]) - np.testing.assert_allclose( - res1, - res2, - rtol=1e-6, - ) - - -def test_fdatabasis_example_exponential( - fdatabasis_data: Tuple[FDataBasis, FDataBasis], -) -> None: - """Check a precalculated example for Exponential covariance kernel.""" - fd1, fd2 = fdatabasis_data - res1 = cov.Exponential(variance=4, length_scale=5)(fd1, fd2) - res2 = np.array([ - [4 * np.exp(-np.sqrt(28 / 3) / 5)], - [4 * np.exp(-np.sqrt(28 / 3) / 5)], - ]) - np.testing.assert_allclose( - res1, - res2, - rtol=1e-6, - ) - - -def test_fdatabasis_example_matern( - fdatabasis_data: Tuple[FDataBasis, FDataBasis], -) -> None: - """Check a precalculated example for Matern covariance kernel.""" - fd1, fd2 = fdatabasis_data - res1 = cov.Matern(variance=2, length_scale=3, nu=2)(fd1, fd2) - res2 = np.array([ - [(2 / 3) ** 2 * (28 / 3) * 0.239775899566], - [(2 / 3) ** 2 * (28 / 3) * 0.239775899566], - ]) +def test_precalc_example( + precalc_example_data: list[ # noqa: WPS320 + FDataBasis, FDataBasis, cov.Covariance, np.array, + ], +): + """Check the precalculated example for Linear covariance kernel. + + Compare the theoretical precalculated results against the covariance kernel + implementation, for different kernels. + The structure of the input is a list containing: + [First functional dataset, Second functional dataset, + Covariance kernel used, Result] + """ + fd1, fd2, kernel, precalc_result = precalc_example_data + computed_result = kernel(fd1, fd2) np.testing.assert_allclose( - res1, - res2, + computed_result, + precalc_result, rtol=1e-6, ) From 26c0e73b82fae6e76e645c25488e70db2a523e31 Mon Sep 17 00:00:00 2001 From: E105D104U125 Date: Mon, 8 Jul 2024 12:28:17 +0200 Subject: [PATCH 12/14] Changes in style --- skfda/misc/covariances.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skfda/misc/covariances.py b/skfda/misc/covariances.py index c130fc454..00baa7e66 100644 --- a/skfda/misc/covariances.py +++ b/skfda/misc/covariances.py @@ -208,7 +208,7 @@ def column_style( # noqa: WPS430 {heatmap} - """ # noqa: WPS432 + """ # noqa: WPS432, WPS318 def to_sklearn(self) -> sklearn_kern.Kernel: """Convert it to a sklearn kernel, if there is one.""" From 85c0dccf75df25927fa027f80770e4b368c5973e Mon Sep 17 00:00:00 2001 From: E105D104U125 Date: Tue, 9 Jul 2024 18:53:14 +0200 Subject: [PATCH 13/14] Add FDM feature, tests and doc example. --- docs/modules/preprocessing/dim_reduction.rst | 1 + docs/refs.bib | 12 + examples/plot_fdm.py | 412 ++++++++++++++++++ skfda/preprocessing/dim_reduction/__init__.py | 2 + skfda/preprocessing/dim_reduction/_fdm.py | 230 ++++++++++ skfda/tests/test_fdm.py | 351 +++++++++++++++ 6 files changed, 1008 insertions(+) create mode 100644 examples/plot_fdm.py create mode 100644 skfda/preprocessing/dim_reduction/_fdm.py create mode 100644 skfda/tests/test_fdm.py diff --git a/docs/modules/preprocessing/dim_reduction.rst b/docs/modules/preprocessing/dim_reduction.rst index d78d8705c..7bf81dc4c 100644 --- a/docs/modules/preprocessing/dim_reduction.rst +++ b/docs/modules/preprocessing/dim_reduction.rst @@ -43,5 +43,6 @@ of the covariance between the two data blocks. .. autosummary:: :toctree: autosummary + skfda.preprocessing.dim_reduction.FDM skfda.preprocessing.dim_reduction.FPCA skfda.preprocessing.dim_reduction.FPLS \ No newline at end of file diff --git a/docs/refs.bib b/docs/refs.bib index 005bc0112..cc88606cc 100644 --- a/docs/refs.bib +++ b/docs/refs.bib @@ -31,6 +31,18 @@ @article{baillo+grane_2009_local keywords = {62G08,62G30,Cross-validation,Fourier expansion,Functional data,Kernel regression,Local linear regression,Nonparametric smoothing} } +@article{barroso++_2023_fdm, + author = {M. Barroso and C. M. Alaíz and J. L. Torrecilla and A. Fernández}, + title = {Functional diffusion maps}, + journal = {Statistics and Computing}, + year = {2023}, + volume = {34}, + number = {1}, + pages = {22}, + doi = {10.1007/s11222-023-10332-1}, + url = {https://doi.org/10.1007/s11222-023-10332-1} +} + @article{berrendero++_2016_mrmr, title = {The {{mRMR}} Variable Selection Method: A Comparative Study for Functional Data}, shorttitle = {The {{mRMR}} Variable Selection Method}, diff --git a/examples/plot_fdm.py b/examples/plot_fdm.py new file mode 100644 index 000000000..a75efdeef --- /dev/null +++ b/examples/plot_fdm.py @@ -0,0 +1,412 @@ +""" +Functional Diffusion Maps +============================================================================ +In this example, the use of the Functional Diffusion Map (FDM) technique is +shown over different datasets. +Firstly, an example of basic use of the technique is provided. +Later, two examples of parameter tuning are presented, for embedding spaces +of diferent dimensions. +Finally, an application of the method for a real, non-synthetic, dataset is +provided. +""" + +# Author: Eduardo Terrés Caballero +# License: MIT + +from itertools import product + +import matplotlib.pyplot as plt +import numpy as np +from matplotlib.colors import ListedColormap +from sklearn import datasets + +from skfda.datasets import fetch_phoneme +from skfda.misc.covariances import Gaussian +from skfda.preprocessing.dim_reduction import FDM +from skfda.representation import FDataGrid + +random_state = 0 + +#################################################################### +# Some examples shown here are further explained in the +# article :footcite:t:`barroso++_2023_fdm`. + + +#################################################################### +# **MOONS DATASET EXAMPLE** +# +# Firstly, a basic example of execution is presented using a functional version +# of the moons dataset, a dataset consisting of two dimentional coordinates +# representing the position of two different moons. +n_samples, n_grid_pts = 100, 50 +data_moons, y = datasets.make_moons( + n_samples=n_samples, + noise=0, + random_state=random_state, +) + +colors = ["blue", "orange"] +cmap = ListedColormap(colors) +plt.scatter(data_moons[:, 0], data_moons[:, 1], c=y, cmap=cmap) +plt.title("Moons data") +plt.show() + +#################################################################### +# Using a two elements basis, the functional observation corresponding +# to a multivariate observation is obtained by treating the coordinates +# as coefficients that multiply the elements of the basis. +# In other words, the multivariate vectors are interpreted as elements +# of the basis. +# Below is the code to generate the synthetic moons functional data. +grid = np.linspace(-np.pi, np.pi, n_grid_pts) +basis = np.array([np.sin(4 * grid), grid ** 2 + 2 * grid - 2]) +fd_moons = FDataGrid( + data_matrix=data_moons @ basis, + grid_points=grid, + dataset_name="Functional moons data", + argument_names=("x",), + coordinate_names=("f (x)",), +) +fd_moons.plot(linewidth=0.5, group=y, group_colors=colors) +plt.xlim((-np.pi, np.pi)) +plt.show() + +#################################################################### +# Once the functional data is available, it simply remains to choose +# the value of the parameters of the model. +# +# The FDM technique involves the use of a kernel operator, that acts +# as a measure of similarity for the data. In this case we will be using +# the Gaussian kernel, with a length scale parameter of 0.25. +fdm = FDM( + n_components=2, + kernel=Gaussian(length_scale=0.25), + alpha=0, + n_steps=1, +) +embedding = fdm.fit_transform(fd_moons) + +plt.scatter(embedding[:, 0], embedding[:, 1], c=y, cmap=cmap) +plt.title("Diffusion coordinates for the functional moons data") +plt.show() + +#################################################################### +# As we can see, the functional diffusion map has correctly interpreted +# the topological nature of the data, by successfully separating the +# coordinates associated to both moons. + + +#################################################################### +# **SPIRALS DATASET EXAMPLE** +# +# Next is an example of parameter tuning in the form of a grid +# search for a set of given values for the length_scale kernel parameter +# and the alpha parameter of the FDM method. +# We have two spirals, which are initially entangled and thus +# indistinguishable to the machine. +# +# Below is the code for the generation of the spiral data and its +# functional equivalent, following a similar dynamic as in the moons dataset. +n_samples, n_grid_pts = 100, 50 +t = (np.pi / 4 + np.linspace(0, 4, n_samples)) * np.pi +dx, dy = 10 * t * np.cos(t), 10 * t * np.sin(t) +y = np.array([0] * n_samples + [1] * n_samples) +data_spirals = np.column_stack(( + np.concatenate((dx, -dx)), np.concatenate((dy, -dy)), +)) + +colors = ["yellow", "purple"] +cmap = ListedColormap(colors) +plt.scatter(data_spirals[:, 0], data_spirals[:, 1], c=y, cmap=cmap) +plt.gca().set_aspect("equal", adjustable="box") +plt.title("Spirals data") +plt.show() + +# Define functional data object +grid = np.linspace(-np.pi, np.pi, n_grid_pts) +basis = np.array([grid * np.cos(grid) / 3, grid * np.sin(grid) / 3]) +fd_spirals = FDataGrid( + data_matrix=data_spirals @ basis, + grid_points=grid, + dataset_name="Functional spirals data", + argument_names=("x",), + coordinate_names=("f (x)",), +) +fd_spirals.plot(linewidth=0.5, group=y, group_colors=colors) +plt.show() + + +#################################################################### +# Once the functional data is ready, we will perform a grid search +# for the following values of the parameters, as well as plot +# the resulting embeddings for visual comparison. +alpha_set = [0, 0.33, 0.66, 1] +length_scale_set = [2.5, 3, 4.5, 7, 10, 11, 15] +param_grid = product(alpha_set, length_scale_set) + +#################################################################### +fig, axes = plt.subplots( + len(alpha_set), len(length_scale_set), figsize=(16, 8), +) + +for (alpha, length_scale), ax in zip(param_grid, axes.ravel()): + fdm = FDM( + n_components=2, + kernel=Gaussian(length_scale=length_scale), + alpha=alpha, + n_steps=1, + ) + embedding = fdm.fit_transform(fd_spirals) + + ax.scatter(embedding[:, 0], embedding[:, 1], c=y, cmap=cmap) + ax.set_xticklabels([]) + ax.set_yticklabels([]) + +for ax, alpha in zip(axes[:, 0], alpha_set): + ax.set_ylabel(f"$\\alpha$: {alpha}", size=20, rotation=0, ha="right") + +for ax, length_scale in zip(axes[0], length_scale_set): + ax.set_title(f"$len\_sc$: {length_scale}", size=20, va="bottom") + +plt.show() + +#################################################################### +# The first thing to notice is that the parameter length scale exerts +# a greater influence in the resulting embedding than the parameter alpha. +# In this sense, the figures of any given column are more similar than those +# of any given row. +# Thus, we shall set alpha equal to 0 because, by theory, it is +# equivalent to skipping a normalization step in the process +# +# Moreover, we can see that the optimal choice of the length scale parameter +# of the kernel is 4.5 because it visually presents the more clear separation +# between the trajectories of both spirals. +# Hence, for a length scale of the kernel function of 4.5 the method is able +# to understand the local geometry of the spirals dataset. +# For a small value of the kernel parameter (for example 1) contiguous points +# in the same arm of the spiral are not considered close because the kernel is +# too narrow, resulting in apparently random diffusion coordinates. +# For a large value of the kernel parameter (for example 15) the kernel is wide +# enough so that points in contiguous spiral arms, which belong to different +# trajectories, are considered similar. Hence the diffusion coordinates keep +# these relations by mantaining both trajectories entagled. +# In summary, for a value of length scale of 4.5 the kernel is wide enough so +# that points in the same arm of a trajectory are considered similar, +# but its not too wide so that points in contiguous arms of the spiral are +# also considered similar. + +#################################################################### +# For a reliable comparison between embeddings, it is advisable to use +# the same scale in all axis. +# To ilustrate this idea, next is a re-execution for the row alpha +# equals 0. +alpha_set = [0] +length_scale_set = [2.5, 3, 4.5, 7, 10, 11, 15] +param_grid = product(alpha_set, length_scale_set) + +fig, axes = plt.subplots( + len(alpha_set), + len(length_scale_set), + figsize=(16, 4), +) + +for (alpha, length_scale), ax in zip(param_grid, axes.ravel()): + fdm = FDM( + n_components=2, + kernel=Gaussian(length_scale=length_scale), + alpha=alpha, + n_steps=1, + ) + embedding = fdm.fit_transform(fd_spirals) + + ax.scatter(embedding[:, 0], embedding[:, 1], c=y, cmap=cmap) + ax.set_xlim((-0.4, 0.4)) + ax.set_ylim((-0.4, 0.4)) + +axes[0].set_ylabel( + f"$\\alpha$: {alpha_set[0]}", size=20, rotation=0, ha="right", +) + +for ax, length_scale in zip(axes, length_scale_set): + ax.set_title(f"$len\_sc$: {length_scale}", size=20, va="bottom") + +plt.show() + +#################################################################### +# **SWISS ROLL DATASET EXAMPLE** +# +# So far, the above examples have been computed with a value of the +# n_components parameter of 2. This implies that the resulting +# diffusion coordinate points belong to a two-dimensional space and thus we +# can provide a graphical representation. +# The aim of this new section is to explore further possibilities +# regarding n_components. +# +# We will now apply the method to a more complex example, the +# Swiss roll dataset. This dataset consists of three dimensional points +# that lay over a topological manifold shaped like a Swiss roll. +n_samples, n_grid_pts = 500, 100 +data_swiss, y = datasets.make_swiss_roll( + n_samples=n_samples, + noise=0, + random_state=random_state, +) +fig = plt.figure() +axis = fig.add_subplot(111, projection="3d") +axis.set_title("Swiss roll data") +axis.scatter(data_swiss[:, 0], data_swiss[:, 1], data_swiss[:, 2], c=y) +plt.show() + +#################################################################### +# Similarly to the previous examples, the functional data object is defined. +# In this case a three element base will be used, since the multivariate data +# points belong to a three-dimensional space. +# For clarity purposes, only the first fifty functional observations are +# plotted. +grid = np.linspace(-np.pi, np.pi, n_grid_pts) +basis = np.array([np.sin(4 * grid), np.cos(8 * grid), np.sin(12 * grid)]) +data_matrix = np.array(data_swiss) @ basis +fd_swiss = FDataGrid( + data_matrix=data_matrix, + grid_points=grid, + dataset_name="Functional Swiss roll data", + argument_names=("x",), + coordinate_names=("f (x)",), +) +fd_swiss[:50].plot(linewidth=0.5, group=y[:50]) +plt.show() + + +#################################################################### +# Now, the FDM method will be applied for different values of the +# parameters, again in the form of a grid search. +# Note that the diffusion coordinates will now consist of three components. +alpha_set = [0, 0.5, 1] +length_scale_set = [1.5, 2.5, 4, 5] +param_grid = product(alpha_set, length_scale_set) + +#################################################################### +fig, axes = plt.subplots( + len(alpha_set), + len(length_scale_set), + figsize=(16, 8), + subplot_kw={"projection": "3d"}, +) + +for (alpha, length_scale), ax in zip(param_grid, axes.ravel()): + fdm = FDM( + n_components=3, + kernel=Gaussian(length_scale=length_scale), + alpha=alpha, + n_steps=1, + ) + embedding = fdm.fit_transform(fd_swiss) + + ax.scatter(embedding[:, 0], embedding[:, 1], embedding[:, 2], c=y) + ax.set_xticklabels([]) + ax.set_yticklabels([]) + ax.set_zticklabels([]) + ax.set_title(f"$\\alpha$: {alpha} $len\_sc$: {length_scale}") + +plt.show() + +#################################################################### +# Let's take a closer look at the resulting embedding for a value +# of length_scale and alpha equal to 2.5 and 0, respectively. +alpha, length_scale = 0, 2.5 +fdm = FDM( + n_components=3, + kernel=Gaussian(length_scale=length_scale), + alpha=alpha, + n_steps=1, +) +embedding = fdm.fit_transform(fd_swiss) + +fig = plt.figure() +ax = fig.add_subplot(111, projection="3d") +ax.scatter(embedding[:, 0], embedding[:, 1], embedding[:, 2], c=y) +ax.set_title( + "Diffusion coordinates for \n" + f"$\\alpha$: {alpha} $len\_sc$: {length_scale}", +) +plt.show() + +#################################################################### +# The election of the optimal parameters is relative to the problem at hand. +# The goal behind choosing values of length_scale equal to 2.5 and alpha equal +# to 0 is to obtain a unrolled transformation to the Swiss roll. +# Note that in the roll there are pairs of points whose euclidean +# distance is small but whose shortest path contained in the +# manifold is significantly larger, since it must complete an entire loop. +# +# In this sense, the process happens to have taken into account the +# shortest path distance rather than the euclidean one. Thus, one may argue +# that the topological nature of the data has been respected. +# This new diffusion coordinates could be useful to gain more insights into +# the initial data through further analysis. + +######################################################################### +# **REAL DATASET: PHONEME** +# +# The aim of this section is to provide an example of application of +# the FDM method to a non-synthetic dataset. +# Below is an example of execution using the phoneme dataset, +# a dataset consisting of the computed log-periodogram for five distinct +# phonemes coming from recorded male speech from the TIMIT database. +n_samples = 300 +colors = ["C0", "C1", "C2", "C3", "C4"] +group_names = ["aa", "ao", "dcl", "iy", "sh"] + +# Fetch phoneme dataset +fd_phoneme, y = fetch_phoneme(return_X_y=True) +fd_phoneme, y = fd_phoneme[:n_samples], y[:n_samples] +fd_phoneme.plot( + linewidth=0.7, + group=y, + group_colors=colors, + group_names=group_names, + legend=True, +) +plt.show() + +#################################################################### +# The resulting diffusion coordinates in three dimensions will be +# plotted, using different views to better understand the plot. +cmap = ListedColormap(colors) +alpha, length_scale = 1, 10 +fdm = FDM( + n_components=3, + kernel=Gaussian(length_scale=length_scale), + alpha=alpha, + n_steps=1, +) +diffusion_coord = fdm.fit_transform(fd_phoneme) + +# Plot three views of the diffusion coordinates +view_points = [(30, 70), (28, 0), (10, -120)] + +fig, axes = plt.subplots( + 1, len(view_points), figsize=(18, 6), subplot_kw={"projection": "3d"}, +) + +for view, ax in zip(view_points, axes.ravel()): + ax.scatter( + diffusion_coord[:, 0], diffusion_coord[:, 1], diffusion_coord[:, 2], + c=y, cmap=cmap, + ) + ax.view_init(*view) + ax.set_title(f"View {view}", fontsize=26) +plt.show() + +#################################################################### +# We can see that the diffusion coordinates for the different phonemes +# have been clustered in the 3D space. This representation enables a +# more clear separation of the data into the different phoneme groups. +# In this way, the phonemes groups that are similar to each other, namely /aa/ +# and /ao/ are closer in the space. In fact, these two groups partly overlap +# (orange and blue). + + +############################################################################### +# **References:** +# .. footbibliography:: diff --git a/skfda/preprocessing/dim_reduction/__init__.py b/skfda/preprocessing/dim_reduction/__init__.py index 5c8ae8405..6aad2fca8 100644 --- a/skfda/preprocessing/dim_reduction/__init__.py +++ b/skfda/preprocessing/dim_reduction/__init__.py @@ -12,6 +12,7 @@ "variable_selection", ], submod_attrs={ + "_fdm": ["FDM"], "_fpca": ["FPCA"], "_fpls": ["FPLS"], "_neighbor_transforms": ["KNeighborsTransformer"], @@ -19,6 +20,7 @@ ) if TYPE_CHECKING: + from ._fdm import FDM as FDM from ._fpca import FPCA as FPCA from ._fpls import FPLS as FPLS from ._neighbor_transforms import ( diff --git a/skfda/preprocessing/dim_reduction/_fdm.py b/skfda/preprocessing/dim_reduction/_fdm.py new file mode 100644 index 000000000..490fbd604 --- /dev/null +++ b/skfda/preprocessing/dim_reduction/_fdm.py @@ -0,0 +1,230 @@ +"""Functional Diffusion Maps Module.""" +from __future__ import annotations + +from typing import Callable + +import numpy as np +import scipy +from sklearn.utils.extmath import svd_flip +from typing_extensions import Self + +from ..._utils._sklearn_adapter import BaseEstimator, InductiveTransformerMixin +from ...representation import FData +from ...typing._numpy import NDArrayFloat + +KernelFunction = Callable[[FData, FData | None], NDArrayFloat] + + +class FDM( + BaseEstimator, + InductiveTransformerMixin[FData, NDArrayFloat, object], +): + r""" + Functional diffusion maps. + + Class that implements functional diffusion maps for both + basis and grid representations of the data. + + Parameters: + n_components: Dimension of the space where the embedded + functional data belongs to. For visualization of the + data purposes, a value of 2 or 3 shall be used. + kernel: kernel function used over the functional observations. + It serves as a measure of connectivity or similitude between + points, where higher value means greater connectivity. + alpha: density parameter in the interval [0, 1] used in the + normalization step. A value of 0 means the data distribution + is not taken into account during the normalization step. + The opposite holds for a higher value of alpha. + n_steps: Number of steps in the random walk. + + + Attributes: + transition_matrix\_: trasition matrix computed from the data. + eigenvalues\_: highest n_components eigenvalues of transition_matrix\_ + in descending order starting from the second highest. + eigenvectors\_right\_: right eigenvectors of transition\_matrix\_ + corresponding to eigenvalues\_. + d\_alpha\_: vector of densities of the weigthed graph. + training\_dataset\_: dataset used for training the method. It is needed + for the transform method. + + Examples: + In this example we fetch the Canadian weather dataset and divide it + into train and test sets. We then obtain the diffusion coordinates + for the train set and predict these coordinates for the test set. + + >>> from skfda.datasets import fetch_weather + >>> from skfda.representation import FDataGrid + >>> from skfda.misc.covariances import Gaussian + >>> X, y = fetch_weather(return_X_y=True, as_frame=True) + >>> fd : FDataGrid = X.iloc[:, 0].values + >>> fd_train = fd[:25] + >>> fd_test = fd[25:] + >>> fdm = FDM( + ... n_components=2, + ... kernel=Gaussian(variance=1, length_scale=1), + ... alpha=1, + ... n_steps=1 + ... ) + >>> embedding_train = fdm.fit_transform(X=fd_train) + >>> embedding_test = fdm.transform(X=fd_test) + + Notes: + Performing fit and transform actions is not equivalent to performing + fit_transform. In the former case an approximation of the diffusion + coordinates is computed via the Nyström method. In the latter, the + true diffusion coordinates are computed. + """ + + def __init__( + self, + *, + n_components: int = 2, + kernel: KernelFunction, + alpha: float = 0, + n_steps: int = 1, + ) -> None: + self.n_components = n_components + self.kernel = kernel + self.alpha = alpha + self.n_steps = n_steps + + def fit( # noqa: WPS238 + self, + X: FData, + y: object = None, + ) -> Self: + """ + Compute the transition matrix and save it. + + Args: + X: Functional data for which to obtain diffusion coordinates. + y: Ignored. + + Returns: + self + + """ + # Parameter validation + if self.n_components < 1: + raise ValueError( + f'Embedding dimension ({self.n_components}) cannot ' + 'be less than 1. ', + ) + + if self.n_components >= X.n_samples: + raise ValueError( + f'Embedding dimension ({self.n_components}) cannot be ' + f'greater or equal to the number of samples ({X.n_samples}).', + ) + + if self.alpha < 0 or self.alpha > 1: + raise ValueError( + f'Parameter alpha (= {self.alpha}) must ' + 'be in the interval [0, 1].', + ) + + if self.n_steps < 0: + raise ValueError( + f'Parameter n_steps (= {self.n_steps}) must ', + 'be a greater than 0.', + ) + + # Construct the weighted graph + weighted_graph = self.kernel(X, X) + + # Compute density of each vertex by adding the rows + self.d_alpha_ = np.sum(weighted_graph, axis=1) ** self.alpha + + # Construct the normalized graph + norm_w_graph = weighted_graph / np.outer(self.d_alpha_, self.d_alpha_) + + rows_sum = np.sum(norm_w_graph, axis=1) + self.transition_matrix_ = norm_w_graph / rows_sum[:, np.newaxis] + + eigenvalues, eigenvectors_right_ = scipy.linalg.eig( + self.transition_matrix_, + ) + + # Remove highest eigenvalue and take the n_components + # highest eigenvalues in descending order + index_order = eigenvalues.argsort()[-2:-self.n_components - 2:-1] + eigenvalues = np.real(eigenvalues[index_order]) + eigenvectors_right, _ = svd_flip( + eigenvectors_right_[:, index_order], + np.zeros_like(eigenvectors_right_[:, index_order]).T, + ) + self.eigenvalues_ = eigenvalues + self.eigenvectors_right_ = eigenvectors_right + + self.training_dataset_ = X + + return self + + def fit_transform( + self, + X: FData, + y: object = None, + ) -> NDArrayFloat: + """ + Compute the diffusion coordinates for the functional data X. + + The diffusion coordinate corresponding to the i-th curve is stored + in the i-th row of the returning matrix. + Note that fit_transform is not equivalent to applying fit and + transform. + + Args: + X: Functional data for which to obtain diffusion coordinates. + y: Ignored. + + Returns: + Diffusion coordinates for the functional data X. + + """ + self.fit(X, y) + + # Compute and return the diffusion map + return np.real( + self.eigenvectors_right_ + * self.eigenvalues_ ** self.n_steps, + ) + + def transform( + self, + X: FData, + ) -> NDArrayFloat: + """ + Compute the diffusion coordinates for the functional data X. + + Compute the diffusion coordinates of out-of-sample data using the + eigenvectors and eigenvalues computed during the training. + + Args: + X: Functional data for which to predict diffusion coordinates. + + Returns: + Diffusion coordinates for the functional data X_out. + + """ + # Construct the weighted graph crossing the training + # and out-of-sample data + weighted_graph_out = self.kernel(X, self.training_dataset_) + + # Compute density of each vertex by adding the rows + h_alpha = np.sum(weighted_graph_out, axis=1) ** self.alpha + + # Construct the normalized graph + norm_w_graph_out = ( + weighted_graph_out / np.outer(h_alpha, self.d_alpha_) + ) + + # Define transition matrix by normalizing by rows + rows_sum = np.sum(norm_w_graph_out, axis=1) + transition_matrix_out = norm_w_graph_out / rows_sum[:, np.newaxis] + + return transition_matrix_out @ ( # type: ignore[no-any-return] + self.eigenvectors_right_ + * self.eigenvalues_ ** (self.n_steps - 1) + ) diff --git a/skfda/tests/test_fdm.py b/skfda/tests/test_fdm.py new file mode 100644 index 000000000..566686bc5 --- /dev/null +++ b/skfda/tests/test_fdm.py @@ -0,0 +1,351 @@ +"""Tests for FDM.""" +from typing import Tuple + +import numpy as np +import pytest +from sklearn import datasets + +from skfda import FData, FDataBasis, FDataGrid +from skfda.misc.covariances import Gaussian +from skfda.misc.metrics import PairwiseMetric, l1_distance, l2_distance +from skfda.preprocessing.dim_reduction import FDM +from skfda.representation.basis import MonomialBasis +from skfda.typing._numpy import NDArrayFloat + + +def _discretize_fdatabasis(fd_basis: FDataBasis) -> FDataGrid: + return fd_basis.to_grid( + np.linspace(*fd_basis.basis.domain_range[0], 300), + ) + + +def rbf_kernel(fd: FData, sigma: float) -> NDArrayFloat: + """Calculate the radial basis function kernel. + + Args: + fd: Functional data object + sigma: locality parameter + + Returns: + Computed real value + """ + norm_matrix = PairwiseMetric(l2_distance)(fd) + return np.exp( # type: ignore[no-any-return] + - np.square(norm_matrix) / (2 * sigma ** 2), + ) + + +def laplacian_kernel(fd: FData, sigma: float) -> NDArrayFloat: + """Calculate the radial basis function kernel. + + Args: + fd: Functional data object + sigma: locality parameter + + Returns: + Computed real value + """ + norm_matrix = PairwiseMetric(l1_distance)(fd) + return np.exp(- norm_matrix / sigma) # type: ignore[no-any-return] + + +############################################################################## +# Fixtures +############################################################################## +dummy_fdata = FDataGrid(data_matrix=[[0.9]]) + + +@pytest.fixture( + params=[ + FDM(kernel=Gaussian(), alpha=-1), + FDM(kernel=Gaussian(), n_components=0), + FDM(kernel=Gaussian(), n_components=2), + FDM(kernel=Gaussian(), n_steps=0), + ], +) +def data_grid_param_check(request: FDM) -> Tuple[FDM, FDataGrid]: + """Fixture for testing parameter checks.""" + return request.param, dummy_fdata + + +@pytest.fixture( + params=[ + FDataGrid( + data_matrix=[ + [52, 93, 15], + [72, 61, 21], + [83, 87, 75], + [75, 88, 24], + ], + grid_points=[0, 1 / 2, 1], + ), + ], +) +def precalculated_fdatagrid_example( + request: FDataGrid, +) -> FDataGrid: + """Fixture for loading a precalculated FDataGrid example.""" + return request.param # type: ignore[no-any-return] + + +@pytest.fixture( + params=[ + ( + FDataBasis( + basis=MonomialBasis(domain_range=(0, 2), n_basis=3), + coefficients=[ + [0, 0, 1], + [1, 1, 0], + [0, 2, 0], + ], + ), + FDataBasis( + basis=MonomialBasis(domain_range=(0, 2), n_basis=3), + coefficients=[ + [-1, 1, 0], + ], + ), + ), + ], +) +def precalculated_fdatabasis_example( + request: FDataBasis, +) -> Tuple[FDataBasis, FDataGrid, FDataBasis]: + """Load FDataBasis example. + + Fixture for loading a prealculated FDataBasis exmaple and discretizing + an FDataBasis into a FDataGrid object. + """ + fd_basis: FDataBasis + fd_out: FDataBasis + fd_basis, fd_out = request.param + return ( + fd_basis, + _discretize_fdatabasis(fd_basis), + fd_out, + ) + + +@pytest.fixture +def moons_functional_dataset() -> FDataGrid: + """Fixture for loading moons dataset example.""" + n_grid_pts, n_samples = 100, 20 + data_moon, _ = datasets.make_moons( + n_samples=n_samples, + noise=0.0, + random_state=0, + ) + grid = np.linspace(-np.pi, np.pi, n_grid_pts) + + # Basis + basis_1 = np.sin(4 * grid) + basis_2 = (grid ** 2 + 2 * grid - 2) + basis = np.array([basis_1, basis_2]) + + # Generate functional data object + data_matrix = np.array(data_moon) @ basis + return FDataGrid( + data_matrix=data_matrix, + grid_points=grid, + ) + +############################################################################## +# Tests +############################################################################## + + +def test_param_check(data_grid_param_check: Tuple[FDM, FDataGrid]) -> None: + """Check that invalid parameters in fit raise exception.""" + fdm, fd = data_grid_param_check + + pytest.raises( + ValueError, + fdm.fit, + fd, + ) + + +def test_precalculated_grid_example( + precalculated_fdatagrid_example: FDataGrid, +) -> None: + """Compare the embedding in grid against the fda package.""" + fd = precalculated_fdatagrid_example + fdm = FDM( + n_components=2, + kernel=Gaussian(length_scale=10), + alpha=0.9, + n_steps=2, + ) + embedding = fdm.fit_transform(fd) + + expected_embedding = [ + [-0.0513176, 0.4572292], + [0.6188756, -0.2537263], + [-0.5478710, -0.3488652], + [-0.0525144, 0.3176671], + ] + + np.testing.assert_allclose( + embedding, + expected_embedding, + atol=1e-7, + ) + + +def test_precalculated_basis_example( + precalculated_fdatabasis_example: Tuple[FDataBasis, FDataGrid, FDataBasis], +) -> None: + """Compare the embedding in basis and grid against the fda package.""" + fd_basis, fd_grid, _ = precalculated_fdatabasis_example + + fdm_basis = FDM( + n_components=2, + kernel=Gaussian(), + alpha=1, + n_steps=2, + ) + embedding_basis = fdm_basis.fit_transform(fd_basis) + + fdm_grid = FDM( + n_components=2, + kernel=Gaussian(), + alpha=1, + n_steps=2, + ) + embedding_grid = fdm_grid.fit_transform(fd_grid) + + expected_transition_matrix = [ + [0.52466, 0.20713, 0.26821], + [0.21187, 0.47341, 0.31472], + [0.27529, 0.31581, 0.40891], + ] + + # Check transition matrix + for tran_matrix in ( + fdm_basis.transition_matrix_, + fdm_grid.transition_matrix_, + ): + np.testing.assert_allclose( + tran_matrix, + expected_transition_matrix, + atol=1e-5, + ) + + # Check diffusion coordinates + expected_embedding = [ + [0.0687, -0.00285], + [-0.05426, -0.00648], + [-0.01607, 0.00944], + ] + + for embedding in (embedding_basis, embedding_grid): + np.testing.assert_allclose( + embedding, + expected_embedding, + atol=1e-5, + ) + + +def test_nystrom( + precalculated_fdatabasis_example: Tuple[FDataBasis, FDataGrid, FDataBasis], +) -> None: + """Test Nystrom method. + + Compare the embedding of out-of-sample points in basis and grid + against the fda package via the Nystrom method. + """ + fd_basis, fd_grid, fd_out = precalculated_fdatabasis_example + + fdm_basis = FDM( + n_components=2, + kernel=Gaussian(), + alpha=1, + n_steps=2, + ) + embedding_basis = fdm_basis.fit(fd_basis).transform(fd_out) + + fdm_grid = FDM( + n_components=2, + kernel=Gaussian(), + alpha=1, + n_steps=2, + ) + embedding_grid = fdm_grid.fit(fd_grid).transform( + _discretize_fdatabasis(fd_out), + ) + + expected_embedding = [ + [0.156125, -0.021132], + ] + + np.testing.assert_allclose( + embedding_basis, + expected_embedding, + atol=1e-3, + ) + + np.testing.assert_allclose( + embedding_grid, + expected_embedding, + atol=1e-3, + ) + + +def test_moons_dataset( + moons_functional_dataset: FDataGrid, +) -> None: + """Test the embedding for a small version of the moons dataset example.""" + fdata = moons_functional_dataset + alpha, sigma = (1.0, 2.5) + fdm = FDM( + n_components=2, + kernel=Gaussian(length_scale=sigma), + alpha=alpha, + n_steps=1, + ) + embedding = fdm.fit_transform(fdata) + + expected_embedding = [ + [-0.07094005, -0.19475867], + [0.04054447, -0.21670899], + [0.09244529, -0.19569411], + [0.07094005, -0.19475867], + [0.13443593, -0.12554232], + [-0.21055562, 0.01815767], + [0.27732511, 0.1802182], + [-0.26962763, 0.16043396], + [0.29220798, 0.22086707], + [0.18339034, -0.04185513], + [0.29344042, 0.22420163], + [-0.29220798, 0.22086707], + [-0.09244529, -0.19569411], + [0.21055562, 0.01815767], + [-0.27732511, 0.1802182], + [-0.04054447, -0.21670899], + [0.26962763, 0.16043396], + [-0.13443593, -0.12554232], + [-0.29344042, 0.22420163], + [-0.18339034, -0.04185513], + ] + + np.testing.assert_allclose( + embedding, + expected_embedding, + atol=1e-5, + ) + + +def test_gaussian_covariance( + moons_functional_dataset: FDataGrid, +) -> None: + """Test Gaussian covariance in covariance module against RBF kernel.""" + sigma = 0.1 + fdata = moons_functional_dataset + gaussian_cov_matrix = Gaussian(length_scale=sigma)(fdata) + rbf_kernel_matrix = rbf_kernel(fdata, sigma) + + np.testing.assert_allclose( + gaussian_cov_matrix, + rbf_kernel_matrix, + atol=1e-7, + ) From eb33f51bb0fce5ed64e08df459d880455baa2ab8 Mon Sep 17 00:00:00 2001 From: E105D104U125 Date: Tue, 9 Jul 2024 19:14:04 +0200 Subject: [PATCH 14/14] Changes in style. --- examples/plot_fdm.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/examples/plot_fdm.py b/examples/plot_fdm.py index a75efdeef..10e9da21a 100644 --- a/examples/plot_fdm.py +++ b/examples/plot_fdm.py @@ -1,3 +1,5 @@ +# flake8: noqa: WPS440, WPS441 + """ Functional Diffusion Maps ============================================================================ @@ -166,7 +168,7 @@ ax.set_ylabel(f"$\\alpha$: {alpha}", size=20, rotation=0, ha="right") for ax, length_scale in zip(axes[0], length_scale_set): - ax.set_title(f"$len\_sc$: {length_scale}", size=20, va="bottom") + ax.set_title(f"$len-sc$: {length_scale}", size=20, va="bottom") plt.show() @@ -227,8 +229,8 @@ f"$\\alpha$: {alpha_set[0]}", size=20, rotation=0, ha="right", ) -for ax, length_scale in zip(axes, length_scale_set): - ax.set_title(f"$len\_sc$: {length_scale}", size=20, va="bottom") +for ax, length_scale in zip(axes, length_scale_set): + ax.set_title(f"$len-sc$: {length_scale}", size=20, va="bottom") plt.show() @@ -293,7 +295,7 @@ subplot_kw={"projection": "3d"}, ) -for (alpha, length_scale), ax in zip(param_grid, axes.ravel()): +for (alpha, length_scale), ax in zip(param_grid, axes.ravel()): fdm = FDM( n_components=3, kernel=Gaussian(length_scale=length_scale), @@ -306,7 +308,7 @@ ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_zticklabels([]) - ax.set_title(f"$\\alpha$: {alpha} $len\_sc$: {length_scale}") + ax.set_title(f"$\\alpha$: {alpha} $len-sc$: {length_scale}") plt.show() @@ -327,7 +329,7 @@ ax.scatter(embedding[:, 0], embedding[:, 1], embedding[:, 2], c=y) ax.set_title( "Diffusion coordinates for \n" - f"$\\alpha$: {alpha} $len\_sc$: {length_scale}", + f"$\\alpha$: {alpha} $len-sc$: {length_scale}", ) plt.show() @@ -391,8 +393,11 @@ for view, ax in zip(view_points, axes.ravel()): ax.scatter( - diffusion_coord[:, 0], diffusion_coord[:, 1], diffusion_coord[:, 2], - c=y, cmap=cmap, + diffusion_coord[:, 0], + diffusion_coord[:, 1], + diffusion_coord[:, 2], + c=y, + cmap=cmap, ) ax.view_init(*view) ax.set_title(f"View {view}", fontsize=26)