diff --git a/docs/modules/representation.rst b/docs/modules/representation.rst index 224f30bef..4f00b3fde 100644 --- a/docs/modules/representation.rst +++ b/docs/modules/representation.rst @@ -85,6 +85,29 @@ methods. skfda.representation.basis.Basis + +Irregular representation +------------------------ + +In practice, many functional datasets do not contain functions evaluated +uniformly over a fixed grid. In other words, it is paramount to be able +to represent irregular functional data. + +While the FDataGrid class could support these kind of datasets by filling a +common grid with possibly emtpy (or nan) values, it is inefficient to store a +complete grid with low data density. Furthermore, there are specific methods +that can be applied to irregular data in order to obtain, among other things, +a better conversion to basis representation. + +The FDataIrregular class provides the functionality which suits these purposes. + + +.. autosummary:: + :toctree: autosummary + + skfda.representation.irregular.FDataIrregular + + Generic representation ---------------------- diff --git a/skfda/__init__.py b/skfda/__init__.py index 9aeae4099..eab04fe8d 100644 --- a/skfda/__init__.py +++ b/skfda/__init__.py @@ -17,7 +17,9 @@ "representation", ], submod_attrs={ - 'representation': ["FData", "FDataBasis", "FDataGrid"], + 'representation': [ + "FData", "FDataBasis", "FDataGrid", "FDataIrregular", + ], 'representation._functional_data': ['concatenate'], }, ) diff --git a/skfda/datasets/__init__.py b/skfda/datasets/__init__.py index 666495cef..8abd0dfda 100644 --- a/skfda/datasets/__init__.py +++ b/skfda/datasets/__init__.py @@ -20,6 +20,7 @@ "fetch_tecator", "fetch_ucr", "fetch_weather", + "fetch_bone_density", ], "_samples_generators": [ "make_gaussian", diff --git a/skfda/datasets/_real_datasets.py b/skfda/datasets/_real_datasets.py index dfc6f2651..f101fafa5 100644 --- a/skfda/datasets/_real_datasets.py +++ b/skfda/datasets/_real_datasets.py @@ -12,6 +12,7 @@ from typing_extensions import Literal from ..representation import FDataGrid +from ..representation.irregular import FDataIrregular from ..typing._numpy import NDArrayFloat, NDArrayInt @@ -162,7 +163,7 @@ def fetch_ucr( return_X_y: bool = False, **kwargs: Any, ) -> Bunch | Tuple[FDataGrid, NDArrayInt]: - """ + r""" Fetch a dataset from the UCR/UEA repository. The UCR/UEA Time Series Classification repository, hosted at @@ -173,6 +174,7 @@ def fetch_ucr( Args: name: Dataset name. + return_X_y: Return tuple (data, target) kwargs: Additional parameters for the function :func:`skdatasets.repositories.ucr.fetch`. @@ -247,7 +249,7 @@ def _fetch_fda_usc(name: str) -> Any: Acoustic-Phonetic Continuous Speech Corpus, NTIS, US Dept of Commerce) which is a widely used resource for research in speech recognition. A dataset was formed by selecting five phonemes for - classification based on digitized speech from this database. + classification based on digitized speech from this database. phonemes are transcribed as follows: "sh" as in "she", "dcl" as in "dark", "iy" as the vowel in "she", "aa" as the vowel in "dark", and "ao" as the first vowel in "water". From continuous speech of 50 male @@ -1551,3 +1553,92 @@ def fetch_mco( cite=":footcite:p:`ruiz-meana++_2003_cariporide`", bibliography=".. footbibliography::", ) + _param_descr + + +def _fetch_loon_data(name: str) -> Any: + return _fetch_cran_no_encoding_warning( + name, + "loon.data", + version="0.1.3", + ) + + +_bone_density_descr = """ + The Bone Density dataset is a study of bone density + in boys and girls aged 8-17. It contains data from 423 + individuals, measured irregularly in different times, + with an average of ~3 points per individual. + + References: + https://cran.r-project.org/package=loon.data + Laura K. Bachrach, Trevor Hastie, May-Choo Wang, + Balasubramanian Narasimhan, and Robert Marcus (1999) + "Bone Mineral Acquisition in Healthy Asian, Hispanic, Black + and Caucasian Youth. A Longitudinal Study", + J Clin Endocrinol Metab, 84, 4702-12. + Trevor Hastie, Robert Tibshirani, and Jerome Friedman (2009) + "The Elements of Statistical Learning", + 2nd Edition, Springer New York + +""" + + +def fetch_bone_density( + return_X_y: bool = False, + as_frame: bool = False, +) -> Bunch | Tuple[FDataGrid, NDArrayInt] | Tuple[DataFrame, Series]: + """ + Load the Bone Density dataset. This is an irregular dataset. + + The data is obtained from the R package 'loon.data', which compiles several + irregular datasets. Sources to be determined. + """ + descr = _bone_density_descr + frame = None + + raw_dataset = _fetch_loon_data("bone_ext") + + data = raw_dataset["bone_ext"] + + curve_name = "idnum" + argument_name = "age" + target_name = "sex" + coordinate_name = "spnbmd" + + curves = FDataIrregular._from_dataframe( + data, + id_column=curve_name, + argument_columns=argument_name, + coordinate_columns=coordinate_name, + argument_names=[argument_name], + coordinate_names=[coordinate_name], + dataset_name="bone_ext", + ) + + target = pd.Series( + data.drop_duplicates(subset=["idnum"])[target_name], + name="group", + ) + + feature_name = curves.dataset_name.lower() + target_names = target.values.tolist() + + if as_frame: + curves = pd.DataFrame({feature_name: curves}) + target_as_frame = target.reset_index(drop=True).to_frame() + frame = pd.concat([curves, target_as_frame], axis=1) + else: + target = target.values.codes + + if return_X_y: + return curves, target + + return Bunch( + data=curves, + target=target, + frame=frame, + categories={}, + feature_names=[argument_name], + target_names=target_names, + DESCR=descr, + ) diff --git a/skfda/exploratory/stats/_stats.py b/skfda/exploratory/stats/_stats.py index 5bf81b3b9..b16f0c01e 100644 --- a/skfda/exploratory/stats/_stats.py +++ b/skfda/exploratory/stats/_stats.py @@ -6,13 +6,12 @@ from typing import Callable, TypeVar, Union import numpy as np -from scipy import integrate from scipy.stats import rankdata from skfda._utils.ndfunction import average_function_value from ...misc.metrics._lp_distances import l2_distance -from ...representation import FData, FDataBasis, FDataGrid +from ...representation import FData, FDataBasis, FDataGrid, FDataIrregular from ...typing._metric import Metric from ...typing._numpy import NDArrayFloat from ..depth import Depth, ModifiedBandDepth @@ -103,7 +102,7 @@ def cov( @functools.singledispatch -def std(X: F, correction: int = 1) -> F: +def std(X: F, correction: int = 0) -> F: r""" Compute the standard deviation of all the samples in a FData object. @@ -127,7 +126,7 @@ def std(X: F, correction: int = 1) -> F: @std.register -def std_fdatagrid(X: FDataGrid, correction: int = 1) -> FDataGrid: +def std_fdatagrid(X: FDataGrid, correction: int = 0) -> FDataGrid: """Compute the standard deviation of a FDataGrid.""" return X.copy( data_matrix=np.std( @@ -138,7 +137,25 @@ def std_fdatagrid(X: FDataGrid, correction: int = 1) -> FDataGrid: @std.register -def std_fdatabasis(X: FDataBasis, correction: int = 1) -> FDataBasis: +def std_fdatairregular( + X: FDataIrregular, correction: int = 0, +) -> FDataIrregular: + """Compute the standard deviation of a FDataIrregular.""" + common_points, common_values = X._get_common_points_and_values() + std_values = np.std( + common_values, axis=0, ddof=correction, + ) + + return FDataIrregular( + start_indices=np.array([0]), + points=common_points, + values=std_values, + sample_names=(None,), + ) + + +@std.register +def std_fdatabasis(X: FDataBasis, correction: int = 0) -> FDataBasis: """Compute the standard deviation of a FDataBasis.""" from ..._utils import function_to_fdatabasis diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index 8a2abcf87..f54ca010d 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -8,7 +8,7 @@ """ from __future__ import annotations -from typing import Any, Dict, Sequence, Sized, Tuple, TypeVar +from typing import Any, Dict, Optional, Sequence, Sized, Tuple, TypeVar import matplotlib.cm import matplotlib.patches @@ -22,6 +22,7 @@ from ..._utils import _to_grid_points, constants from ...misc.validation import validate_domain_range from ...representation._functional_data import FData +from ...representation.irregular import FDataIrregular from ...typing._base import DomainRangeLike, GridPointsLike from ._baseplot import BasePlot from ._utils import ColorLike, _set_labels @@ -549,6 +550,217 @@ def _plot( _set_labels(self.fdata, fig, axes, self.patches) +class PlotIrregular(BasePlot): # noqa: WPS230 + """ + Class used to plot a FDataIrregular object. + + Args: + fdata: FDataIrregular object set that we want to plot. + chart: figure over + with the graphs are plotted or axis over where the graphs are + plotted. If None and ax is also None, the figure is + initialized. + fig: figure over with the graphs are + plotted in case ax is not specified. If None and ax is also + None, the figure is initialized. + axes: axis over where the graphs + are plotted. If None, see param fig. + n_rows: designates the number of rows of the figure + to plot the different dimensions of the image. Only specified + if fig and ax are None. + n_cols: designates the number of columns of the + figure to plot the different dimensions of the image. Only + specified if fig and ax are None. + domain_range: Range where the + function will be plotted. In objects with unidimensional domain + the domain range should be a tuple with the bounds of the + interval; in the case of surfaces a list with 2 tuples with + the ranges for each dimension. Default uses the domain range + of the functional object. + group: contains integers from [0 to number of + labels) indicating to which group each sample belongs to. Then, + the samples with the same label are plotted in the same color. + If None, the default value, each sample is plotted in the color + assigned by matplotlib.pyplot.rcParams['axes.prop_cycle']. + group_colors: colors in which groups are + represented, there must be one for each group. If None, each + group is shown with distict colors in the "Greys" colormap. + group_names: name of each of the groups which appear + in a legend, there must be one for each one. Defaults to None + and the legend is not shown. Implies `legend=True`. + legend: if `True`, show a legend with the groups. If + `group_names` is passed, it will be used for finding the names + to display in the legend. Otherwise, the values passed to + `group` will be used. + kwargs: if dim_domain is 1, keyword arguments to be passed to + the matplotlib.pyplot.plot function; if dim_domain is 2, + keyword arguments to be passed to the + matplotlib.pyplot.plot_surface function. + """ + + def __init__( # noqa: WPS211 + self, + fdata: FDataIrregular, + chart: Figure | Axes | None = None, + *, + fig: Figure | None = None, + axes: Axes | None = None, + n_rows: int | None = None, + n_cols: int | None = None, + domain_range: Tuple[int, int] | DomainRangeLike | None = None, + group: Sequence[K] | None = None, + group_colors: Indexable[K, ColorLike] | None = None, + group_names: Indexable[K, str] | None = None, + legend: bool = False, + marker: str | None = None, + **kwargs: Any, + ) -> None: + super().__init__( + chart, + fig=fig, + axes=axes, + n_rows=n_rows, + n_cols=n_cols, + ) + self.fdata = fdata + + # There may be different points for each function + self.grid_points = np.split( + self.fdata.points, self.fdata.start_indices[1:], + ) + self.evaluated_points = np.split( + self.fdata.values, self.fdata.start_indices[1:], + ) + + self.domain_range = domain_range + self.group = group + self.group_colors = group_colors + self.group_names = group_names + self.legend = legend + self.marker = marker + + if self.domain_range is None: + self.domain_range = self.fdata.domain_range + else: + self.domain_range = validate_domain_range(self.domain_range) + + sample_colors, patches = _get_color_info( + self.fdata, + self.group, + self.group_names, + self.group_colors, + self.legend, + kwargs, + ) + self.sample_colors = sample_colors + self.patches = patches + + @property + def dim(self) -> int: + return self.fdata.dim_domain + 1 + + @property + def n_subplots(self) -> int: + return self.fdata.dim_codomain + + @property + def n_samples(self) -> int: + return self.fdata.n_samples + + def _plot( + self, + fig: Figure, + axes: Sequence[Axes], + ) -> None: + # Implement in subclasses + pass + + +class LinearPlotIrregular(PlotIrregular): + """ + Class used to plot the individual curves of a FDataIrregular object. + + It uses linear interpolation between the points of each curve. + """ + + def _plot( + self, + fig: Figure, + axes: Sequence[Axes], + ) -> None: + """ + Plot the individual curves of a FDataIrregular object. + + Returns: + fig: figure object in which the graphs are plotted. + """ + artists_shape = (self.n_samples, self.fdata.dim_codomain) + self.artists = np.zeros(artists_shape, dtype=Artist) + + color_dict: Dict[str, ColorLike | None] = {} + + if self.fdata.dim_domain == 1: + for j in range(self.fdata.n_samples): + + set_color_dict(self.sample_colors, j, color_dict) + + self.artists[j, 0] = axes[0].plot( + self.grid_points[j].flatten(), + self.evaluated_points[j].flatten(), + **color_dict, + picker=True, + pickradius=2, + marker=self.marker, + ) + else: + # TODO Implementar para multidimension. Como hacer mesh? + raise NotImplementedError() + + _set_labels(self.fdata, fig, axes, self.patches) + + +class ScatterPlotIrregular(PlotIrregular): + """Class used to scatter plot a FDataIrregular object.""" + + def _plot( + self, + fig: Figure, + axes: Sequence[Axes], + ) -> None: + """ + Scatter FDataIrregular object. + + Returns: + fig: figure object in which the graphs are plotted. + """ + artists_shape = (self.n_samples, self.fdata.dim_codomain) + self.artists = np.zeros(artists_shape, dtype=Artist) + + color_dict: Dict[str, ColorLike | None] = {} + + if self.fdata.dim_domain == 1: + + for j in range(self.fdata.n_samples): + + set_color_dict(self.sample_colors, j, color_dict) + + self.artists[j, 0] = axes[0].scatter( + self.grid_points[j], + self.evaluated_points[j], + **color_dict, + picker=True, + pickradius=2, + marker=self.marker, + ) + + else: + + # TODO Implement for multidimensional + raise NotImplementedError() + + _set_labels(self.fdata, fig, axes, self.patches) + + def set_color_dict( sample_colors: Any, ind: int, diff --git a/skfda/misc/covariances.py b/skfda/misc/covariances.py index 43635eb02..298fcf64d 100644 --- a/skfda/misc/covariances.py +++ b/skfda/misc/covariances.py @@ -31,7 +31,7 @@ def _transform_to_2d(t: ArrayLike) -> NDArrayFloat: """Transform 1d arrays in column vectors.""" t = np.asfarray(t) - dim = len(t.shape) + dim = t.ndim assert dim <= 2 if dim < 2: diff --git a/skfda/ml/regression/_linear_regression.py b/skfda/ml/regression/_linear_regression.py index f672a2923..4cc45524a 100644 --- a/skfda/ml/regression/_linear_regression.py +++ b/skfda/ml/regression/_linear_regression.py @@ -607,7 +607,7 @@ def _check_and_convert( np.ndarray: numpy 2D array. """ new_X = np.asarray(X) - if len(new_X.shape) == 1: + if new_X.ndim == 1: new_X = new_X[:, np.newaxis] return new_X diff --git a/skfda/preprocessing/dim_reduction/variable_selection/recursive_maxima_hunting.py b/skfda/preprocessing/dim_reduction/variable_selection/recursive_maxima_hunting.py index 432b65bb8..2a66ffa19 100644 --- a/skfda/preprocessing/dim_reduction/variable_selection/recursive_maxima_hunting.py +++ b/skfda/preprocessing/dim_reduction/variable_selection/recursive_maxima_hunting.py @@ -46,7 +46,7 @@ def _transform_to_2d(t: ArrayLike) -> NDArrayFloat: t = np.asfarray(t) - dim = len(t.shape) + dim = t.ndim assert dim <= 2 if dim < 2: diff --git a/skfda/preprocessing/smoothing/_basis.py b/skfda/preprocessing/smoothing/_basis.py index 687ab16b1..9dcb7ca52 100644 --- a/skfda/preprocessing/smoothing/_basis.py +++ b/skfda/preprocessing/smoothing/_basis.py @@ -14,13 +14,17 @@ from ..._utils import _cartesian_product, _to_grid_points from ...misc.lstsq import LstsqMethod, solve_regularized_weighted_lstsq from ...misc.regularization import L2Regularization -from ...representation import FData, FDataBasis, FDataGrid +from ...representation import FData, FDataBasis, FDataGrid, FDataIrregular from ...representation.basis import Basis from ...typing._base import GridPointsLike from ...typing._numpy import NDArrayFloat from ._linear import _LinearSmoother +############################# +# BasisSmoother +############################# + class BasisSmoother(_LinearSmoother): r""" Transform raw data to a smooth functional form. @@ -225,15 +229,15 @@ def __init__( def _coef_matrix( self, - input_points: GridPointsLike, + eval_points: NDArrayFloat, *, - data_matrix: Optional[NDArrayFloat] = None, + function_values: NDArrayFloat | None = None, ) -> NDArrayFloat: """Get the matrix that gives the coefficients.""" from ...misc.regularization import compute_penalty_matrix basis_values_input = self.basis( - _cartesian_product(_to_grid_points(input_points)), + eval_points, ).reshape((self.basis.n_basis, -1)).T penalty_matrix = compute_penalty_matrix( @@ -243,13 +247,13 @@ def _coef_matrix( ) # Get the matrix for computing the coefficients if no - # data_matrix is passed - if data_matrix is None: - data_matrix = np.eye(basis_values_input.shape[0]) + # function_values is passed + if function_values is None: + function_values = np.eye(basis_values_input.shape[0]) return solve_regularized_weighted_lstsq( coefs=basis_values_input, - result=data_matrix, + result=function_values, weights=self.weights, penalty_matrix=penalty_matrix, lstsq_method=self.method, @@ -266,11 +270,13 @@ def _hat_matrix( ), ).reshape((self.basis.n_basis, -1)).T - return basis_values_output @ self._coef_matrix(input_points) + return basis_values_output @ self._coef_matrix( + _cartesian_product(_to_grid_points(input_points)), + ) def fit( self, - X: FDataGrid, + X: FDataGrid | FDataIrregular, y: object = None, ) -> BasisSmoother: """Compute the hat matrix for the desired output points. @@ -283,7 +289,7 @@ def fit( self """ - self.input_points_ = X.grid_points + self.input_points_ = X._get_input_points() self.output_points_ = ( _to_grid_points(self.output_points) if self.output_points is not None @@ -297,7 +303,7 @@ def fit( def transform( self, - X: FDataGrid, + X: FDataGrid | FDataIrregular, y: object = None, ) -> FData: """ @@ -314,14 +320,16 @@ def transform( assert all( np.array_equal(i, s) for i, s in zip( self.input_points_, - X.grid_points, + X._get_input_points(), ) ) + eval_points, function_values = X._get_points_and_values() + if self.return_basis: coefficients = self._coef_matrix( - input_points=X.grid_points, - data_matrix=X.data_matrix.reshape((X.n_samples, -1)).T, + eval_points=eval_points, + function_values=function_values, ).T return FDataBasis( diff --git a/skfda/representation/__init__.py b/skfda/representation/__init__.py index fd5afe3c5..9467213a3 100644 --- a/skfda/representation/__init__.py +++ b/skfda/representation/__init__.py @@ -10,11 +10,13 @@ "extrapolation", "grid", "interpolation", + "irregular", ], submod_attrs={ '_functional_data': ["FData", "concatenate"], 'basis': ["FDataBasis"], 'grid': ["FDataGrid"], + 'irregular': ["FDataIrregular"], }, ) @@ -22,3 +24,4 @@ from ._functional_data import FData as FData, concatenate as concatenate from .basis import FDataBasis as FDataBasis from .grid import FDataGrid as FDataGrid + from .irregular import FDataIrregular as FDataIrregular diff --git a/skfda/representation/basis/_fdatabasis.py b/skfda/representation/basis/_fdatabasis.py index 945dda05d..b943f7932 100644 --- a/skfda/representation/basis/_fdatabasis.py +++ b/skfda/representation/basis/_fdatabasis.py @@ -55,7 +55,7 @@ class FDataBasis(FData): # noqa: WPS214 functional datum. domain_range: 2 dimension matrix where each row contains the bounds of the interval in which the functional data - is considered to exist for each one of the axies. + is considered to exist for each one of the axes. dataset_name: name of the dataset. argument_names: tuple containing the names of the different arguments. @@ -687,8 +687,8 @@ def _array_to_R( # noqa: N802 coefficients: NDArrayFloat, transpose: bool = False, ) -> str: - if len(coefficients.shape) == 1: - coefficients = coefficients.reshape((1, coefficients.shape[0])) + if coefficients.ndim == 1: + coefficients = coefficients[None] if transpose is True: coefficients = np.transpose(coefficients) diff --git a/skfda/representation/grid.py b/skfda/representation/grid.py index bdd01c3e4..292062204 100644 --- a/skfda/representation/grid.py +++ b/skfda/representation/grid.py @@ -16,6 +16,7 @@ Callable, Optional, Sequence, + Tuple, Type, TypeVar, Union, @@ -30,7 +31,13 @@ import scipy.stats.mstats from matplotlib.figure import Figure -from .._utils import _check_array_key, _int_to_real, _to_grid_points, constants +from .._utils import ( + _cartesian_product, + _check_array_key, + _int_to_real, + _to_grid_points, + constants, +) from ..typing._base import ( DomainRange, DomainRangeLike, @@ -64,7 +71,7 @@ class FDataGrid(FData): # noqa: WPS214 contains the points of dicretisation for each axis of data_matrix. domain_range: 2 dimension matrix where each row contains the bounds of the interval in which the functional data - is considered to exist for each one of the axies. + is considered to exist for each one of the axes. dataset_name: name of the dataset. argument_names: tuple containing the names of the different arguments. @@ -529,6 +536,15 @@ def _check_same_dimensions(self: T, other: T) -> None: if not np.array_equal(self.grid_points, other.grid_points): raise ValueError("Grid points for both objects must be equal") + def _get_points_and_values(self: T) -> Tuple[NDArrayFloat, NDArrayFloat]: + return ( + _cartesian_product(_to_grid_points(self.grid_points)), + self.data_matrix.reshape((self.n_samples, -1)).T, + ) + + def _get_input_points(self: T) -> GridPoints: + return self.grid_points + def sum( # noqa: WPS125 self: T, *, diff --git a/skfda/representation/irregular.py b/skfda/representation/irregular.py new file mode 100644 index 000000000..1524a02fa --- /dev/null +++ b/skfda/representation/irregular.py @@ -0,0 +1,1630 @@ +"""Discretised functional data module. + +This module defines a class for representing discretized irregular data, +in which the observations may be made in different grid points in each +data function, and the overall density of the observations may be low + +""" +from __future__ import annotations + +import itertools +import numbers +from typing import ( + Any, + Callable, + Optional, + Sequence, + Tuple, + Type, + TypeVar, + Union, +) + +import numpy as np +import pandas.api.extensions +from matplotlib.figure import Figure + +from .._utils import _cartesian_product, _check_array_key, _to_grid_points +from ..typing._base import ( + DomainRange, + DomainRangeLike, + GridPoints, + GridPointsLike, + LabelTupleLike, +) +from ..typing._numpy import ( + ArrayLike, + DTypeLike, + NDArrayBool, + NDArrayFloat, + NDArrayInt, +) +from ._functional_data import FData +from .basis import Basis, FDataBasis +from .evaluator import Evaluator +from .extrapolation import ExtrapolationLike +from .grid import FDataGrid +from .interpolation import SplineInterpolation + +T = TypeVar("T", bound='FDataIrregular') + +###################### +# Auxiliary functions# +###################### + + +def _reduceat( + ufunc, + array: ArrayLike, + indices: ArrayLike, + axis: int = 0, + dtype=None, + out=None, + *, + value_empty +): + """ + Wrapped `np.ufunc.reduceat` to manage some edge cases. + + The edge cases are the one described in the doc of + `np.ufunc.reduceat`. Different behaviours are the following: + - No exception is raised when `indices[i] < 0` or + `indices[i] >=len(array)`. Instead, the corresponding value + is `value_empty`. + - When not in the previous case, the result is `value_empty` if + `indices[i] == indices[i+1]` and otherwise, the same as + `ufunc.reduce(array[indices[i]:indices[i+1]])`. This means + that an exception is still be raised if `indices[i] > + indices[i+1]`. + + Note: The `value_empty` must be convertible to the `dtype` (either + provided or inferred from the `ufunc` operations). + """ + array = np.asarray(array) + indices = np.asarray(indices) + + n = array.shape[axis] + good_axis_idx = ( + (indices >= 0) & (indices < n) & (np.diff(indices, append=n) > 0) + ) + + good_idx = [slice(None)] * array.ndim + good_idx[axis] = good_axis_idx + good_idx = tuple(good_idx) + + reduceat_out = ufunc.reduceat( + array, indices[good_axis_idx], axis=axis, dtype=dtype + ) + + out_shape = list(array.shape) + out_shape[axis] = len(indices) + out_dtype = dtype or reduceat_out.dtype + + if out is None: + out = np.full(out_shape, value_empty, dtype=out_dtype) + else: + out.astype(out_dtype, copy=False) + out.fill(value_empty) + + out[good_idx] = reduceat_out + + return out + + +def _get_sample_range_from_data( + start_indices: NDArrayInt, + points: NDArrayFloat, +) -> DomainRangeLike: + """Compute the domain ranges of each sample. + + Args: + start_indices: start_indices of the FDataIrregular object. + points: points of the FDataIrregular object. + + Returns: + DomainRange: (sample_range) a tuple of tuples of 2-tuples where + sample_range[f][d] = (min_point, max_point) is the domain range for + the function f in dimension d. + """ + return np.stack( + [ + _reduceat( + ufunc, + points, + start_indices, + value_empty=np.nan, + dtype=float, + ) + for ufunc in (np.fmin, np.fmax) + ], + axis=-1, + ) + + +def _get_domain_range_from_sample_range( + sample_range: DomainRangeLike, +) -> DomainRange: + """Compute the domain range of the whole dataset. + + Args: + sample_range: sample_range of the FDataIrregular object. + + Returns: + DomainRange: (domain_range) a tuple of 2-tuples where + domain_range[d] = (min_point, max_point) is the domain range for + the dimension d. + """ + sample_range_array = np.asarray(sample_range) + min_arguments = np.nanmin(sample_range_array[..., 0], axis=0) + max_arguments = np.nanmax(sample_range_array[..., 1], axis=0) + return tuple(zip(min_arguments, max_arguments)) + + +###################### +# FDataIrregular# +###################### + + +class FDataIrregular(FData): # noqa: WPS214 + r"""Represent discretised functional data of an irregular or sparse nature. + + Class for representing irregular functional data in a compact manner, + allowing basic operations, representation and conversion to basis format. + + Attributes: + start_indices: A unidimensional array which stores the index of + the functional_values and functional_values arrays where the data + of each individual curve of the sample begins. + points: An array of every argument of the domain for + every curve in the sample. Each row contains an observation. + values: An array of every value of the codomain for + every curve in the sample. Each row contains an observation. + domain_range: 2 dimension matrix where each row + contains the bounds of the interval in which the functional data + is considered to exist for each one of the axes. + dataset_name: Name of the dataset. + argument_names: Tuple containing the names of the different + arguments. + coordinate_names: Tuple containing the names of the different + coordinate functions. + extrapolation: Defines the default type of + extrapolation. By default None, which does not apply any type of + extrapolation. See `Extrapolation` for detailled information of the + types of extrapolation. + interpolation: Defines the type of interpolation + applied in `evaluate`. + + Raises: + ValueError: + - if `points` and `values` lengths don't match + - if `start_indices` does'nt start with `0`, or is decreasing + somewhere, or ends with a value greater than or equal to + `len(points)`. + + Examples: + Representation of an irregular functional data object with 2 samples + representing a function :math:`f : \mathbb{R}\longmapsto\mathbb{R}`, + with 2 and 3 discretization points respectively. + + >>> indices = [0, 2] + >>> arguments = [[1], [2], [3], [4], [5]] + >>> values = [[1], [2], [3], [4], [5]] + >>> FDataIrregular(indices, arguments, values) + FDataIrregular( + start_indices=array([0, 2]), + points=array([[1], + [2], + [3], + [4], + [5]]), + values=array([[1], + [2], + [3], + [4], + [5]]), + domain_range=((1.0, 5.0),), + ...) + + The number of arguments and values must be the same. + + >>> indices = [0,2] + >>> arguments = np.arange(5).reshape(-1, 1) + >>> values = np.arange(6).reshape(-1, 1) + >>> FDataIrregular(indices, arguments, values) + Traceback (most recent call last): + .... + ValueError: Dimension mismatch ... + + The indices in start_indices must point to correct rows + in points and values. + + >>> indices = [0,7] + >>> arguments = np.arange(5).reshape(-1, 1) + >>> values = np.arange(5).reshape(-1, 1) + >>> FDataIrregular(indices, arguments, values) + Traceback (most recent call last): + .... + ValueError: Index in start_indices out of bounds... + + FDataIrregular supports higher dimensional data both in the domain + and in the codomain (image). + + Representation of a functional data object with 2 samples + representing a function :math:`f : \mathbb{R}\longmapsto\mathbb{R}^2`. + + >>> indices = [0, 2] + >>> arguments = [[1.], [2.], [3.], [4.], [5.]] + >>> values = [[1., 1.], [2., 2.], [3., 3.], [4., 4.], [5., 5.]] + >>> fd = FDataIrregular(indices, arguments, values) + >>> fd.dim_domain, fd.dim_codomain + (1, 2) + + Representation of a functional data object with 2 samples + representing a function :math:`f : \mathbb{R}^2\longmapsto\mathbb{R}`. + + >>> indices = [0, 2] + >>> arguments = [[1., 1.], [2., 2.], [3., 3.], [4., 4.], [5., 5.]] + >>> values = [[1.], [2.], [3.], [4.], [5.]] + >>> fd = FDataIrregular(indices, arguments, values) + >>> fd.dim_domain, fd.dim_codomain + (2, 1) + + """ + + def __init__( # noqa: WPS211 + self, + start_indices: ArrayLike, + points: ArrayLike, + values: ArrayLike, + *, + domain_range: Optional[DomainRangeLike] = None, + dataset_name: Optional[str] = None, + sample_names: Optional[LabelTupleLike] = None, + extrapolation: Optional[ExtrapolationLike] = None, + interpolation: Optional[Evaluator] = None, + argument_names: Optional[LabelTupleLike] = None, + coordinate_names: Optional[LabelTupleLike] = None, + ): + """Construct a FDataIrregular object.""" + self.start_indices = np.asarray(start_indices) + self.points = np.asarray(points) + if self.points.ndim == 1: + self.points = self.points.reshape(-1, 1) + self.values = np.asarray(values) + if self.values.ndim == 1: + self.values = self.values.reshape(-1, 1) + + if len(self.points) != len(self.values): + raise ValueError("Dimension mismatch in points and values") + + if self.start_indices[0] != 0: + raise ValueError("Array start_indices must start with 0") + + if np.any(np.diff(self.start_indices) < 0): + raise ValueError("Array start_indices must be non-decreasing") + + if self.start_indices[-1] > len(self.points): + raise ValueError("Index in start_indices out of bounds") + + # Ensure arguments are in order within each function + sorted_arguments, sorted_values = self._sort_by_arguments() + self.points = sorted_arguments + self.values = sorted_values + + self._sample_range = _get_sample_range_from_data( + self.start_indices, + self.points, + ) + + # Default value for sample_range is a list of tuples with + # the first and last arguments of each curve for each dimension + + if domain_range is None: + domain_range = _get_domain_range_from_sample_range( + self._sample_range, + ) + + # Default value for domain_range is a list of tuples with + # the minimum and maximum value of the arguments for each + # dimension + + from ..misc.validation import validate_domain_range + self._domain_range = validate_domain_range(domain_range) + + self.interpolation = interpolation + + super().__init__( + extrapolation=extrapolation, + dataset_name=dataset_name, + argument_names=argument_names, + coordinate_names=coordinate_names, + sample_names=sample_names, + ) + + @classmethod + def _from_dataframe( + cls, + dataframe: pandas.DataFrame, + id_column: str, + argument_columns: Sequence[str | None], + coordinate_columns: Sequence[str | None], + **kwargs: Any, + ) -> FDataIrregular: + """Create a FDataIrregular object from a pandas dataframe. + + The pandas dataframe should be in 'long' format: each row + containing the arguments and values of a given point of the + dataset, and an identifier which specifies which curve they + belong to. + + Args: + dataframe: Pandas dataframe containing the + irregular functional dataset. + id_column: Name of the column which contains the information + about which curve does each each row belong to. + argument_columns: list of columns where + the arguments for each dimension of the domain can be found. + coordinate_columns: list of columns where + the values for each dimension of the image can be found. + kwargs: Arguments for the FDataIrregular constructor. + + Returns: + FDataIrregular: Returns a FDataIrregular object which contains + the irregular functional data of the dataset. + """ + # Accept strings but ensure the column names are tuples + if isinstance(argument_columns, str): + argument_columns = [argument_columns] + + if isinstance(coordinate_columns, str): + coordinate_columns = [coordinate_columns] + + # Obtain num functions and num observations from data + n_measurements = dataframe.shape[0] + num_functions = dataframe[id_column].nunique() + + # Create data structure of function pointers and coordinates + start_indices = np.zeros((num_functions, ), dtype=np.uint32) + points = np.zeros( + (n_measurements, len(argument_columns)), + ) + values = np.zeros( + (n_measurements, len(coordinate_columns)), + ) + + head = 0 + index = 0 + for _, f_values in dataframe.groupby(id_column): + start_indices[index] = head + num_values = f_values.shape[0] + + # Insert in order + f_values = f_values.sort_values(argument_columns) + + new_args = f_values[argument_columns].values + points[head:head + num_values, :] = new_args + + new_coords = f_values[coordinate_columns].values + values[head:head + num_values, :] = new_coords + + # Update head and index + head += num_values + index += 1 + + return cls( + start_indices, + points, + values, + **kwargs, + ) + + @classmethod + def from_fdatagrid( + cls: Type[T], + f_data: FDataGrid, + **kwargs, + ) -> FDataIrregular: + """Create a FDataIrregular object from a source FDataGrid. + + Args: + f_data (FDataGrid): FDataGrid object used as source. + kwargs: Arguments for the FDataIrregular constructor. + + Returns: + FDataIrregular: FDataIrregular containing the same data + as the source but with an irregular structure. + """ + all_points_single_function = _cartesian_product( + _to_grid_points(f_data.grid_points), + ) + flat_points = np.tile( + all_points_single_function, (f_data.n_samples, 1), + ) + + all_values = f_data.data_matrix.reshape( + (f_data.n_samples, -1, f_data.dim_codomain), + ) + flat_values = all_values.reshape((-1, f_data.dim_codomain)) + nonnan_all_values = ~np.all(np.isnan(all_values), axis=-1) + nonnan_flat_values = nonnan_all_values.reshape((-1,)) + + values = flat_values[nonnan_flat_values] + points = flat_points[nonnan_flat_values] + + n_points_per_function = np.sum(nonnan_all_values, axis=-1) + start_indices = np.concatenate(( + np.zeros(1, np.int32), np.cumsum(n_points_per_function[:-1]), + )) + + return cls( + start_indices, + points, + values, + **kwargs, + ) + + def _sort_by_arguments(self) -> Tuple[ArrayLike, ArrayLike]: + """Sort the arguments lexicographically functionwise. + + Additionally, sort the values accordingly. + + Returns: + Tuple[ArrayLike, Arraylike]: sorted pair (arguments, values) + """ + ind = np.repeat( + range(len(self.start_indices)), + np.diff(self.start_indices, append=len(self.points)), + ) + # In order to use lexsort the following manipulations are required: + # - Transpose the axis, so that the first axis contains the keys. + # - Flip that axis so that the primary key is last, and they are thus + # in last-to-first order. + sorter = np.lexsort(np.c_[ind, self.points].T[::-1]) + + return self.points[sorter], self.values[sorter] + + def round( + self, + decimals: int = 0, + out: Optional[FDataIrregular] = None, + ) -> FDataIrregular: + """Evenly round values to the given number of decimals. + + Arguments are not rounded due to possibility of coalescing + various arguments to the same rounded value. + + .. deprecated:: 0.6 + Use :func:`numpy.round` function instead. + + Args: + decimals: Number of decimal places to round to. + If decimals is negative, it specifies the number of + positions to the left of the decimal point. Defaults to 0. + out: FDataIrregular where to place the result, if any. + + Returns: + Returns a FDataIrregular object where all elements + in its values are rounded. + + """ + # Arguments are not rounded due to possibility of + # coalescing various arguments to the same rounded value + rounded_values = self.values.round(decimals=decimals) + + if isinstance(out, FDataIrregular): + out.values = rounded_values + return out + + return self.copy(values=rounded_values) + + @property + def dim_domain(self) -> int: + return self.points.shape[1] + + @property + def dim_codomain(self) -> int: + return self.values.shape[1] + + @property + def coordinates(self) -> _IrregularCoordinateIterator[T]: + return _IrregularCoordinateIterator(self) + + @property + def n_samples(self) -> int: + return len(self.start_indices) + + @property + def sample_range(self) -> DomainRange: + """ + Return the sample range of the function. + + This contains the minimum and maximum values of the grid points in + each dimension. + + It does not have to be equal to the `domain_range`. + """ + return self._sample_range + + @property + def domain_range(self) -> DomainRange: + """ + Return the :term:`domain range` of the function. + + It does not have to be equal to the `sample_range`. + + """ + return self._domain_range + + @property + def interpolation(self) -> Evaluator: + """Define the type of interpolation applied in `evaluate`.""" + return self._interpolation + + @interpolation.setter + def interpolation(self, new_interpolation: Optional[Evaluator]) -> None: + + if new_interpolation is None: + new_interpolation = SplineInterpolation() + + self._interpolation = new_interpolation + + def _evaluate( + self, + eval_points: NDArrayFloat, + *, + aligned: bool = True, + ) -> NDArrayFloat: + + return self.interpolation( + self.to_grid(), # TODO Create native interpolation for irregular + eval_points, + aligned=aligned, + ) + + def derivative( + self: T, + order: int = 1, + method: Optional[Basis] = None, + ) -> T: + """Differentiate the FDataIrregular object. + + Args: + order: Order of the derivative. Defaults to one. + method (Optional[Basis]): Method used to generate + the derivatives. + + Returns: + FDataIrregular with the derivative of the dataset. + """ + raise NotImplementedError() + + def integrate( + self: T, + domain: Optional[DomainRange] = None, + ) -> NDArrayFloat: + """Integrate the FDataIrregular object. + + Args: + domain (Optional[DomainRange]): tuple with + the domain ranges for each dimension + of the domain + + Returns: + FDataIrregular with the integral. + """ + raise NotImplementedError() + + def check_same_dimensions(self: T, other: T) -> None: + """Ensure that other FDataIrregular object has compatible dimensions. + + Args: + other (T): FDataIrregular object to compare dimensions + with. + + Raises: + ValueError: Dimension mismatch in coordinates. + ValueError: Dimension mismatch in arguments. + """ + if self.dim_codomain != other.dim_codomain: + raise ValueError("Dimension mismatch in coordinates") + if self.dim_domain != other.dim_domain: + raise ValueError("Dimension mismatch in arguments") + + def _get_points_and_values(self: T) -> Tuple[NDArrayFloat, NDArrayFloat]: + return (self.points, self.values) + + def _get_input_points(self: T) -> GridPoints: + return self.points # type: ignore[return-value] + + def _get_common_points_and_values( + self: T, + ) -> Tuple[NDArrayFloat, NDArrayFloat]: + unique_points, counts = ( + np.unique(self.points, axis=0, return_counts=True) + ) + common_points = unique_points[counts == self.n_samples] + + # Find which points are common to all curves by subtracting each point + # to each of the common points + subtraction = self.points[:, np.newaxis, :] - common_points + is_common_point = np.any(~np.any(subtraction, axis=-1), axis=-1) + common_points_values = self.values[is_common_point].reshape( + (self.n_samples, len(common_points), self.dim_codomain), + ) + return common_points, common_points_values + + def sum( # noqa: WPS125 + self: T, + *, + axis: Optional[int] = None, + out: None = None, + keepdims: bool = False, + skipna: bool = False, + min_count: int = 0, + ) -> T: + """Compute the sum of all the samples. + + Args: + axis (Optional[int]): Used for compatibility with numpy. + Must be None or 0. + out (None): Used for compatibility with numpy. + Must be None. + keepdims (bool): Used for compatibility with numpy. + Must be False. + skipna (bool): Wether the NaNs are ignored or not. + min_count: Number of valid (non NaN) data to have in order + for the a variable to not be NaN when `skipna` is + `True`. + + Returns: + FDataIrregular object with only one curve and one value + representing the sum of all the samples in the original object. + The points of the new object are the points common to all the + samples in the original object. Only values present in those + common points are considered for the sum. + """ + super().sum(axis=axis, out=out, keepdims=keepdims, skipna=skipna) + + common_points, common_values = self._get_common_points_and_values() + + if len(common_points) == 0: + raise ValueError("No common points in FDataIrregular object") + + sum_function = np.nansum if skipna else np.sum + sum_values = sum_function(common_values, axis=0) + + return FDataIrregular( + start_indices=np.array([0]), + points=common_points, + values=sum_values, + sample_names=(None,), + ) + + def var(self: T, correction: int = 0) -> T: + """Compute the variance of all the samples. + + Args: + correction: degrees of freedom adjustment. The divisor used in the + calculation is `N - correction`, where `N` represents the + number of elements. Default: `0`. + + Returns: + FDataIrregular object with only one curve and one value + representing the pointwise variance of all the samples in the + original object. The points of the new object are the points + common to all the samples in the original object. + """ + # Find all distinct arguments (ordered) and corresponding values + common_points, common_values = self._get_common_points_and_values() + var_values = np.var( + common_values, axis=0, ddof=correction, + ) + + return FDataIrregular( + start_indices=np.array([0]), + points=common_points, + values=var_values, + sample_names=(None,), + ) + + def cov(self: T) -> T: + """Compute the covariance for a FDataIrregular object. + + Returns: + FDataIrregular with the covariance function. + """ + # TODO Implementation to be decided + raise NotImplementedError() + + def equals(self, other: object) -> bool: + """Comparison of FDataIrregular objects.""" + if not isinstance(other, FDataIrregular): + return False + + if not super().equals(other): + return False + + if not self._eq_elemenwise(other): + return False + + # Comparison of the domain + if not np.array_equal(self.domain_range, other.domain_range): + return False + + if self.interpolation != other.interpolation: + return False + + return True + + def _eq_elemenwise(self: T, other: T) -> NDArrayBool: + """Elementwise equality of FDataIrregular.""" + return np.all( + [ + (self.start_indices == other.start_indices).all(), + (self.points == other.points).all(), + (self.values == other.values).all(), + ], + ) + + def __eq__(self, other: object) -> NDArrayBool: + return np.array([ + f.equals(o) for f, o in zip(self, other) + ]) + + def _get_op_matrix( # noqa: WPS212 + self, + other: Union[T, NDArrayFloat, NDArrayInt, float], + ) -> Union[None, float, NDArrayFloat, NDArrayInt]: + if isinstance(other, numbers.Real): + return float(other) + elif isinstance(other, np.ndarray): + if other.shape in {(), (1,)}: + return other + elif other.shape == (self.n_samples,): + other_index = ( + (slice(None),) + + (np.newaxis,) * (self.values.ndim - 1) + ) + + other_vector = other[other_index] + + # Number of values in each curve + values_curve = np.diff( + self.start_indices, append=len(self.points)) + + # Repeat the other value for each curve as many times + # as values inside the curve + return np.repeat(other_vector, values_curve).reshape(-1, 1) + elif other.shape == ( + self.n_samples, + self.dim_codomain, + ): + other_index = ( + (slice(None),) + + (np.newaxis,) * (self.values.ndim - 2) + + (slice(None),) + ) + + other_vector = other[other_index] + + # Number of values in each curve + values_curve = np.diff( + self.start_indices, append=len(self.points)) + + # Repeat the other value for each curve as many times + # as values inside the curve + return np.repeat(other_vector, values_curve, axis=0) + + raise ValueError( + f"Invalid dimensions in operator between FDataIrregular and " + f"Numpy array: {other.shape}", + ) + + elif isinstance(other, FDataIrregular): + # TODO What to do with different argument and value sizes? + return other.values + + return None + + def __add__( + self: T, + other: Union[T, NDArrayFloat, NDArrayInt, float], + ) -> T: + values = self._get_op_matrix(other) + if values is None: + return NotImplemented + + return self._copy_op( + other, + values=self.values + values, + ) + + def __radd__( + self: T, + other: Union[T, NDArrayFloat, NDArrayInt, float], + ) -> T: + return self.__add__(other) + + def __sub__( + self: T, + other: Union[T, NDArrayFloat, NDArrayInt, float], + ) -> T: + values = self._get_op_matrix(other) + if values is None: + return NotImplemented + + return self._copy_op( + other, + values=self.values - values, + ) + + def __rsub__( + self: T, + other: Union[T, NDArrayFloat, NDArrayInt, float], + ) -> T: + values = self._get_op_matrix(other) + if values is None: + return NotImplemented + + return self._copy_op( + other, + values=values - self.values, + ) + + def __mul__( + self: T, + other: Union[T, NDArrayFloat, NDArrayInt, float], + ) -> T: + values = self._get_op_matrix(other) + if values is None: + return NotImplemented + + return self._copy_op( + other, + values=self.values * values, + ) + + def __rmul__( + self: T, + other: Union[T, NDArrayFloat, NDArrayInt, float], + ) -> T: + return self.__mul__(other) + + def __truediv__( + self: T, + other: Union[T, NDArrayFloat, NDArrayInt, float], + ) -> T: + values = self._get_op_matrix(other) + if values is None: + return NotImplemented + + return self._copy_op( + other, + values=self.values / values, + ) + + def __rtruediv__( + self: T, + other: Union[T, NDArrayFloat, NDArrayInt, float], + ) -> T: + values = self._get_op_matrix(other) + if values is None: + return NotImplemented + + return self._copy_op( + other, + values=values / self.values, + ) + + def __neg__(self: T) -> T: + """Negation of FDataIrregular object.""" + return self.copy(values=-self.values) + + def concatenate(self: T, *others: T, as_coordinates: bool = False) -> T: + """Join samples from a similar FDataIrregular object. + + Joins samples from another FDataIrregular object if it has the same + dimensions. + + Args: + others: Objects to be concatenated. + as_coordinates (bool): If False concatenates as + new samples, else, concatenates the other functions as + new components of the image. Defaults to false. + + Raises: + NotImplementedError: Not implemented for as_coordinates = True + + Returns: + T: FDataIrregular object with the samples from the source objects. + + Examples: + >>> indices = [0, 2] + >>> arguments = values = np.arange(5.).reshape(-1, 1) + >>> fd = FDataIrregular(indices, arguments, values) + >>> arguments_2 = values_2 = np.arange(5, 10).reshape(-1, 1) + >>> fd_2 = FDataIrregular(indices, arguments_2, values_2) + >>> fd.concatenate(fd_2) + FDataIrregular( + start_indices=array([0, 2, 5, 7]), + points=array([[ 0.], + [ 1.], + [ 2.], + [ 3.], + [ 4.], + [ 5.], + [ 6.], + [ 7.], + [ 8.], + [ 9.]]), + values=array([[ 0.], + [ 1.], + [ 2.], + [ 3.], + [ 4.], + [ 5.], + [ 6.], + [ 7.], + [ 8.], + [ 9.]]), + domain_range=((0.0, 9.0),), + ...) + """ + # TODO As coordinates + if as_coordinates: + raise NotImplementedError( + "Not implemented for as_coordinates = True", + ) + # Verify that dimensions are compatible + assert others, "No objects to concatenate" + all_objects = (self,) + others + start_indices_split = [] + total_points = 0 + points_split = [] + values_split = [] + total_sample_names_split = [] + domain_range_split = [] + for x, y in itertools.pairwise(all_objects + (self,)): + x.check_same_dimensions(y) + start_indices_split.append(x.start_indices + total_points) + total_points += len(x.points) + points_split.append(x.points) + values_split.append(x.values) + total_sample_names_split.append(x.sample_names) + domain_range_split.append(x.domain_range) + + start_indices = np.concatenate(start_indices_split) + points = np.concatenate(points_split) + values = np.concatenate(values_split) + total_sample_names = list(itertools.chain(*total_sample_names_split)) + domain_range_stacked = np.stack(domain_range_split, axis=-1) + domain_range = np.c_[ + domain_range_stacked[:, 0].min(axis=-1), + domain_range_stacked[:, 1].max(axis=-1), + ] + + return self.copy( + start_indices, + points, + values, + domain_range=domain_range, + sample_names=total_sample_names, + ) + + def plot(self, *args: Any, **kwargs: Any) -> Figure: + """Plot the functional data of FDataIrregular with a lines plot. + + Args: + args: Positional arguments to be passed to the class + :class:`~skfda.exploratory.visualization.representation.LinearPlotIrregular`. + kwargs: Keyword arguments to be passed to the class + :class:`~skfda.exploratory.visualization.representation.LinearPlotIrregular`. + + Returns: + Figure object in which the graphs are plotted. + """ + from ..exploratory.visualization.representation import ( + LinearPlotIrregular, + ) + + return LinearPlotIrregular(self, *args, **kwargs).plot() + + def scatter(self, *args: Any, **kwargs: Any) -> Figure: + """Plot the functional data of FDataIrregular with a scatter plot. + + Args: + args: Positional arguments to be passed to the class + :class:`~skfda.exploratory.visualization.representation.ScatterPlotIrregular`. + kwargs: Keyword arguments to be passed to the class + :class:`~skfda.exploratory.visualization.representation.ScatterPlotIrregular`. + + Returns: + Figure object in which the graphs are plotted. + """ + from ..exploratory.visualization.representation import ( + ScatterPlotIrregular, + ) + + return ScatterPlotIrregular(self, *args, **kwargs).plot() + + def to_basis(self, basis: Basis, **kwargs: Any) -> FDataBasis: + """Return the basis representation of the object. + + Args: + basis (Basis): basis object in which the functional data are + going to be represented. + kwargs: keyword arguments to be passed to + FDataBasis.from_data(). + + Raises: + ValueError: Incorrect domain dimension + ValueError: Incorrect codomain dimension + + Returns: + FDataBasis: Basis representation of the funtional data + object. + """ + from ..preprocessing.smoothing import BasisSmoother + + if self.dim_domain != basis.dim_domain: + raise ValueError( + f"The domain of the function has " + f"dimension {self.dim_domain} " + f"but the domain of the basis has " + f"dimension {basis.dim_domain}", + ) + elif self.dim_codomain != basis.dim_codomain: + raise ValueError( + f"The codomain of the function has " + f"dimension {self.dim_codomain} " + f"but the codomain of the basis has " + f"dimension {basis.dim_codomain}", + ) + + # Readjust the domain range if there was not an explicit one + if not basis.is_domain_range_fixed(): + basis = basis.copy(domain_range=self.domain_range) + + smoother = BasisSmoother( + basis=basis, + **kwargs, + return_basis=True, + ) + + # Only uses the available values for each curve + basis_coefficients = [ + smoother.fit_transform(curve).coefficients[0] + for curve in self + ] + + return FDataBasis( + basis, + basis_coefficients, + dataset_name=self.dataset_name, + argument_names=self.argument_names, + coordinate_names=self.coordinate_names, + sample_names=self.sample_names, + extrapolation=self.extrapolation, + ) + + def _to_data_matrix(self) -> tuple[ArrayLike, list[ArrayLike]]: + """Convert FDataIrregular values to numpy matrix. + + Undefined values in the grid will be represented with np.nan. + + Returns: + ArrayLike: numpy array with the resulting matrix. + list: numpy arrays representing grid_points. + """ + # Find the common grid points + grid_points = list(map(np.unique, self.points.T)) + + unified_matrix = np.full( + (self.n_samples, *map(len, grid_points), self.dim_codomain), np.nan + ) + + points_pos = tuple( + np.searchsorted(*arg) for arg in zip(grid_points, self.points.T) + ) + + sample_idx = ( + np.searchsorted( + self.start_indices, np.arange(len(self.points)), "right" + ) + - 1 + ) + + unified_matrix[(sample_idx,) + points_pos] = self.values + + return unified_matrix, grid_points + + def to_grid( # noqa: D102 + self: T, + ) -> FDataGrid: + """Convert FDataIrregular to FDataGrid. + + Undefined values in the grid will be represented with np.nan. + + Returns: + FDataGrid: FDataGrid with the irregular functional data. + """ + data_matrix, grid_points = self._to_data_matrix() + + return FDataGrid( + data_matrix=data_matrix, + grid_points=grid_points, + dataset_name=self.dataset_name, + argument_names=self.argument_names, + coordinate_names=self.coordinate_names, + extrapolation=self.extrapolation, + ) + + def copy( # noqa: WPS211 + self: T, + start_indices: Optional[ArrayLike] = None, + points: Optional[ArrayLike] = None, + values: Optional[ArrayLike] = None, + deep: bool = False, # For Pandas compatibility + domain_range: Optional[DomainRangeLike] = None, + dataset_name: Optional[str] = None, + sample_names: Optional[LabelTupleLike] = None, + extrapolation: Optional[ExtrapolationLike] = None, + interpolation: Optional[Evaluator] = None, + argument_names: Optional[LabelTupleLike] = None, + coordinate_names: Optional[LabelTupleLike] = None, + ) -> T: + """ + Return a copy of the FDataIrregular. + + If an argument is provided the corresponding attribute in the new copy + is updated. + + """ + if start_indices is None: + start_indices = self.start_indices + + if points is None: + points = self.points + + if values is None: + values = self.values + + if domain_range is None: + domain_range = self.domain_range + + if dataset_name is None: + dataset_name = self.dataset_name + + if argument_names is None: + # Tuple, immutable + argument_names = self.argument_names + + if coordinate_names is None: + # Tuple, immutable + coordinate_names = self.coordinate_names + + if sample_names is None: + # Tuple, immutable + sample_names = self.sample_names + + if extrapolation is None: + extrapolation = self.extrapolation + + if interpolation is None: + interpolation = self.interpolation + + return FDataIrregular( + start_indices, + points, + values, + domain_range=domain_range, + dataset_name=dataset_name, + argument_names=argument_names, + coordinate_names=coordinate_names, + sample_names=sample_names, + extrapolation=extrapolation, + interpolation=interpolation, + ) + + def restrict( # noqa: WPS210 + self: T, + domain_range: DomainRangeLike, + *, + with_bounds: bool = False, + ) -> T: + """ + Restrict the functions to a new domain range. + + Args: + domain_range: New domain range. + with_bounds: Whether or not to ensure domain boundaries + appear in `grid_points`. + + Returns: + T: Restricted function. + + """ + if with_bounds: # To do + raise NotImplementedError('Not yet implemented for FDataIrregular') + + from ..misc.validation import validate_domain_range + + npdr = np.broadcast_to( + validate_domain_range(domain_range), + (self.dim_domain, 2), + ) + + mask = np.all( + (npdr[:, 0] <= self.points) & (self.points <= npdr[:, 1]), + axis=1, + ) + + num_points = _reduceat(np.add, mask, self.start_indices, value_empty=0) + start_indices = np.r_[[0], num_points[:-1].cumsum()] + + return self.copy( + start_indices=start_indices, + points=self.points[mask], + values=self.values[mask], + domain_range=npdr, + ) + + def shift( + self, + shifts: Union[ArrayLike, float], + *, + restrict_domain: bool = False, + extrapolation: Optional[ExtrapolationLike] = None, + ) -> FDataIrregular: + r""" + Perform a shift of the curves. + + The i-th shifted function :math:`y_i` has the form + + .. math:: + y_i(t) = x_i(t + \delta_i) + + where :math:`x_i` is the i-th original function and :math:`delta_i` is + the shift performed for that function, that must be a vector in the + domain space. + + Note that a positive shift moves the graph of the function in the + negative direction and vice versa. + + Args: + shifts: List with the shifts + corresponding for each sample or numeric with the shift to + apply to all samples. + restrict_domain: If True restricts the domain to avoid the + evaluation of points outside the domain using extrapolation. + Defaults uses extrapolation. + extrapolation: Controls the + extrapolation mode for elements outside the domain range. + By default uses the method defined in fd. See extrapolation to + more information. + + Returns: + Shifted functions. + """ + raise NotImplementedError() + + def compose( + self: T, + fd: T, + *, + eval_points: Optional[GridPointsLike] = None, + ) -> T: + """Composition of functions. + + Performs the composition of functions. + + Args: + fd: FData object to make the composition. Should + have the same number of samples and image dimension equal to 1. + eval_points: Points to perform the evaluation. + + Returns: + Function representing the composition. + + """ + raise NotImplementedError() + + def __str__(self) -> str: + """Return str(self).""" + return ( + f"function indices: {self.start_indices}\n" + f"function arguments: {self.points}\n" + f"function values: {self.values}\n" + f"time range: {self.domain_range}" + ) + + def __repr__(self) -> str: + """Return repr(self).""" + return ( + f"FDataIrregular(" # noqa: WPS221 + f"\nstart_indices={self.start_indices!r}," + f"\npoints={self.points!r}," + f"\nvalues={self.values!r}," + f"\ndomain_range={self.domain_range!r}," + f"\ndataset_name={self.dataset_name!r}," + f"\nargument_names={self.argument_names!r}," + f"\ncoordinate_names={self.coordinate_names!r}," + f"\nextrapolation={self.extrapolation!r}," + f"\ninterpolation={self.interpolation!r})" + ).replace( + '\n', + '\n ', + ) + + def __getitem__( + self: T, + key: Union[int, slice, NDArrayInt, NDArrayBool], + ) -> T: + required_slices = [] + key = _check_array_key(self.start_indices, key) + indices = range(self.n_samples) + required_indices = np.array(indices)[key] + for i in required_indices: + next_index = None + if i + 1 < self.n_samples: + next_index = self.start_indices[i + 1] + s = slice(self.start_indices[i], next_index) + required_slices.append(s) + + arguments = np.concatenate( + [ + self.points[s] + for s in required_slices + ], + ) + values = np.concatenate( + [ + self.values[s] + for s in required_slices + ], + ) + + chunk_sizes = np.array( + [ + s.stop - s.start if s.stop is not None + else len(self.points) - s.start + for s in required_slices + ], + ) + + indices = np.cumsum(chunk_sizes) - chunk_sizes[0] + + return self.copy( + start_indices=indices.astype(int), + points=arguments, + values=values, + sample_names=self.sample_names[key], + ) + ##################################################################### + # Numpy methods + ##################################################################### + + def __array_ufunc__( + self, + ufunc: Any, + method: str, + *inputs: Any, + **kwargs: Any, + ) -> Any: + + for i in inputs: + if ( + isinstance(i, FDataIrregular) + and not np.array_equal( + i.points, + self.points, + ) + ): + return NotImplemented + + new_inputs = [ + self._get_op_matrix(input_) for input_ in inputs + ] + + outputs = kwargs.pop('out', None) + if outputs: + new_outputs = [ + o.values if isinstance(o, FDataIrregular) + else o for o in outputs + ] + kwargs['out'] = tuple(new_outputs) + else: + new_outputs = (None,) * ufunc.nout + + results = getattr(ufunc, method)(*new_inputs, **kwargs) + if results is NotImplemented: + return NotImplemented + + if ufunc.nout == 1: + results = (results,) + + results = tuple( + (result if output is None else output) + for result, output in zip(results, new_outputs) + ) + + results = [self.copy(values=r) for r in results] + + return results[0] if len(results) == 1 else results + + ##################################################################### + # Pandas ExtensionArray methods + ##################################################################### + + def _take_allow_fill( + self: T, + indices: NDArrayInt, + fill_value: T, + ) -> T: + result = self.copy() + result.values = np.full( + (len(indices),) + self.values.shape[1:], + np.nan, + ) + + positive_mask = indices >= 0 + result.values[positive_mask] = self.values[ + indices[positive_mask] + ] + + if fill_value is not self.dtype.na_value: + fill_value_ = fill_value.values[0] + result.values[~positive_mask] = fill_value_ + + return result + + @property + def dtype(self) -> FDataIrregularDType: + """The dtype for this extension array, FDataIrregularDType""" + return FDataIrregularDType( + start_indices=self.start_indices, + points=self.points, + dim_codomain=self.dim_codomain, + domain_range=self.domain_range, + ) + + @property + def nbytes(self) -> int: + """ + The number of bytes needed to store this object in memory. + """ + array_nbytes = [ + self.start_indices.nbytes, + self.points.nbytes, + self.values.nbytes, + ] + return sum(array_nbytes) + + def isna(self) -> NDArrayBool: + """ + Return a 1-D array indicating if each value is missing. + + Returns: + na_values (NDArrayBool): Positions of NA. + """ + return np.array([ + np.all(np.isnan(v.values)) for v in self + ]) + + +class FDataIrregularDType( + pandas.api.extensions.ExtensionDtype, # type: ignore[misc] +): + """DType corresponding to FDataIrregular in Pandas.""" + + name = 'FDataIrregular' + kind = 'O' + type = FDataIrregular # noqa: WPS125 + na_value = pandas.NA + + def __init__( + self, + start_indices: ArrayLike, + points: ArrayLike, + dim_codomain: int, + domain_range: Optional[DomainRangeLike] = None, + ) -> None: + from ..misc.validation import validate_domain_range + self.start_indices = start_indices + self.points = points + self.dim_domain = points.shape[1] + + if domain_range is None: + sample_range = _get_sample_range_from_data( + self.start_indices, self.points + ) + domain_range = _get_domain_range_from_sample_range(sample_range) + + self.domain_range = validate_domain_range(domain_range) + self.dim_codomain = dim_codomain + + @classmethod + def construct_array_type(cls) -> Type[FDataIrregular]: # noqa: D102 + return FDataIrregular + + def _na_repr(self) -> FDataIrregular: + + shape = ( + (len(self.points),) + + (self.dim_codomain,) + ) + + values = np.full(shape=shape, fill_value=self.na_value) + + return FDataIrregular( + start_indices=self.start_indices, + points=self.points, + values=values, + domain_range=self.domain_range, + ) + + def __eq__(self, other: Any) -> bool: + """ + Compare dtype equality. + + Rules for equality (similar to categorical): + 1) Any FData is equal to the string 'category' + 2) Any FData is equal to itself + 3) Otherwise, they are equal if the arguments are equal. + 6) Any other comparison returns False + """ + if isinstance(other, str): + return other == self.name + elif other is self: + return True + elif not isinstance(other, FDataIrregularDType): + return False + + return ( + self.start_indices == other.start_indices + and self.points == other.points + and self.domain_range == other.domain_range + and self.dim_codomain == other.dim_codomain + ) + + def __hash__(self) -> int: + return hash( + ( + str(self.start_indices), + str(self.points), + self.domain_range, + self.dim_codomain, + ), + ) + + +class _IrregularCoordinateIterator(Sequence[T]): + """Internal class to iterate through the image coordinates.""" + + def __init__(self, fdatairregular: T) -> None: + """Create an iterator through the image coordinates.""" + self._fdatairregular = fdatairregular + + def __getitem__( + self, + key: Union[int, slice, NDArrayInt, NDArrayBool], + ) -> T: + """Get a specific coordinate.""" + s_key = key + if isinstance(s_key, int): + s_key = slice(s_key, s_key + 1) + + coordinate_names = np.array( + self._fdatairregular.coordinate_names, + )[s_key] + + coordinate_values = self._fdatairregular.values[..., key] + + return self._fdatairregular.copy( + values=coordinate_values.reshape(-1, 1), + coordinate_names=tuple(coordinate_names), + ) + + def __len__(self) -> int: + """Return the number of coordinates.""" + return self._fdatairregular.dim_codomain diff --git a/skfda/tests/test_irregular.py b/skfda/tests/test_irregular.py new file mode 100644 index 000000000..a056efc6b --- /dev/null +++ b/skfda/tests/test_irregular.py @@ -0,0 +1,445 @@ +"""Test the basic methods of the FDataIrregular structure.""" +from typing import Any, Tuple + +import numpy as np +import pandas +import pytest + +from skfda.datasets._real_datasets import _fetch_loon_data +from skfda.representation import FDataGrid, FDataIrregular +from skfda.representation.interpolation import SplineInterpolation + +from ..typing._numpy import ArrayLike + +############ +# FIXTURES +############ + +SEED = 2906198114 + +NUM_CURVES = 10 +DIMENSIONS = 2 +TEST_DECIMALS = range(10) +COPY_KWARGS = [ # noqa: WPS407 + {"domain_range": ((0, 10))}, + {"dataset_name": "test"}, + {"sample_names": ["test"] * NUM_CURVES}, + {"interpolation": SplineInterpolation(3)}, + {"argument_names": ("test",)}, + {"coordinate_names": ("test",)}, +] + +random_state = np.random.RandomState(seed=SEED) + + +@pytest.fixture() +def input_arrays( +) -> Tuple[ArrayLike, ArrayLike, ArrayLike]: + """Create unidimensional arrays describing a FDataIrregular structure.""" + num_values_per_curve = np.array(range(NUM_CURVES)) + 1 + + values_per_curve = [ + random_state.rand(num_values, 1) + for num_values in num_values_per_curve + ] + + args_per_curve = [ + random_state.rand(num_values, 1) + for num_values in num_values_per_curve + ] + + indices = np.cumsum(num_values_per_curve) - num_values_per_curve + values = np.concatenate(values_per_curve) + arguments = np.concatenate(args_per_curve) + + return indices, values, arguments + + +@pytest.fixture() +def input_arrays_multidimensional( +) -> Tuple[ArrayLike, ArrayLike, ArrayLike]: + """Create multidimensional arrays describing a FDataIrregular structure.""" + num_values_per_curve = np.array(range(NUM_CURVES)) + 1 + + values_per_curve = [ + random_state.rand(num_values, DIMENSIONS) + for num_values in num_values_per_curve + ] + + args_per_curve = [ + random_state.rand(num_values, DIMENSIONS) + for num_values in num_values_per_curve + ] + + indices = np.cumsum(num_values_per_curve) - num_values_per_curve + values = np.concatenate(values_per_curve) + arguments = np.concatenate(args_per_curve) + + return indices, values, arguments + + +@pytest.fixture( + params=[ + "input_arrays", + "input_arrays_multidimensional", + ], +) +def fdatairregular( + request: Any, + input_arrays: FDataIrregular, + input_arrays_multidimensional: FDataIrregular, +) -> FDataIrregular: + """Return 'input_arrays' or 'input_arrays_multidimensional'.""" + if request.param == "input_arrays": + return FDataIrregular(*input_arrays) + elif request.param == "input_arrays_multidimensional": + return FDataIrregular(*input_arrays_multidimensional) + + +@pytest.fixture() +def fdatagrid_unidimensional( +) -> FDataGrid: + """Generate FDataGrid.""" + num_values_per_curve = NUM_CURVES + + data_matrix = random_state.rand(NUM_CURVES, num_values_per_curve, 1) + # Grid points must be sorted + grid_points = np.sort(random_state.rand(num_values_per_curve)) + + return FDataGrid( + data_matrix=data_matrix, + grid_points=grid_points, + ) + + +@pytest.fixture() +def fdatagrid_multidimensional( +) -> FDataGrid: + """Generate multidimensional FDataGrid.""" + num_values_per_curve = NUM_CURVES + + data_matrix = random_state.rand( + NUM_CURVES, + num_values_per_curve, + DIMENSIONS, + ) + + # Grid points must be sorted + grid_points = np.sort(random_state.rand(num_values_per_curve)) + + return FDataGrid( + data_matrix=data_matrix, + grid_points=grid_points, + ) + + +@pytest.fixture( + params=[ + "fdatagrid_unidimensional", + "fdatagrid_multidimensional", + ], +) +def fdatagrid( + request: Any, + fdatagrid_unidimensional: FDataGrid, + fdatagrid_multidimensional: FDataGrid, +) -> FDataIrregular: + """Return 'fdatagrid_unidimensional' or 'fdatagrid_multidimensional'.""" + if request.param == "fdatagrid_unidimensional": + return fdatagrid_unidimensional + elif request.param == "fdatagrid_multidimensional": + return fdatagrid_multidimensional + + +@pytest.fixture() +def dataframe( +) -> pandas.DataFrame: + """Generate long dataframe for testing.""" + raw_dataset = _fetch_loon_data("bone_ext") + + return raw_dataset["bone_ext"] + +############ +# TESTS +############ + + +def test_fdatairregular_init( + fdatairregular: FDataIrregular, +) -> None: + """Tests creating a correct FDataIrregular object from arrays. + + Test both unidimensional and multidimensional. + + Args: + fdatairregular (FDataIrregular): FDataIrregular object + which can be unidimensional or multidimensional. + """ + arguments = fdatairregular.points + assert fdatairregular is not None + assert len(fdatairregular) == len(fdatairregular.start_indices) + assert len(arguments) == len(fdatairregular.values) + + +def test_fdatairregular_copy( + fdatairregular: FDataIrregular, +) -> None: + """Test the copy function for FDataIrregular for an exact copy. + + Args: + fdatairregular (FDataIrregular): FDataIrregular object + which can be unidimensional or multidimensional. + """ + assert np.all(fdatairregular == fdatairregular.copy()) + + +@pytest.mark.parametrize("kwargs", COPY_KWARGS) +def test_fdatairregular_copy_kwargs( + fdatairregular: FDataIrregular, + kwargs: dict, +) -> None: + """Test the copy function for FDataIrregular. + + Test with additional keyword arguments which replace + certain parameters of the object. + + Args: + fdatairregular (FDataIrregular): FDataIrregular object + which can be unidimensional or multidimensional. + kwargs: Dict with the parameters for each iteration of the test + """ + changed_attribute = next(iter(kwargs)) + local_kwargs = kwargs.copy() + + if changed_attribute == "argument_names": + # Set correct dimensionality + dim = fdatairregular.dim_domain + local_kwargs[changed_attribute] = kwargs[changed_attribute] * dim + if changed_attribute == "coordinate_names": + # Set correct dimensionality + dim = fdatairregular.dim_codomain + local_kwargs[changed_attribute] = kwargs[changed_attribute] * dim + + f_data_copy = fdatairregular.copy(**local_kwargs) + + og_attribute = getattr(fdatairregular, changed_attribute) + copy_attribute = getattr(f_data_copy, changed_attribute) + + # Check everything equal except specified kwarg + assert len(f_data_copy) == len(fdatairregular) + assert len(f_data_copy.points) == len(fdatairregular.points) + assert f_data_copy.dim_domain == fdatairregular.dim_domain + assert f_data_copy.dim_domain == fdatairregular.dim_codomain + assert og_attribute != copy_attribute + + +def test_fdatairregular_from_fdatagrid( + fdatagrid: FDataGrid, +) -> None: + """Tests creating a correct FDataIrregular object from FDataGrid. + + Args: + fdatagrid (FDataGrid): FDataGrid object. Can be dense or sparse + (contain NaNs) + """ + f_data_irreg = FDataIrregular.from_fdatagrid(fdatagrid) + + assert f_data_irreg is not None + assert len(f_data_irreg) == len(fdatagrid) + + +def test_fdatairregular_from_dataframe( + dataframe: pandas.DataFrame, +) -> None: + """Test creating FDataIrregular from pandas DataFrame. + + Args: + dataframe (pandas:DataFrame): DataFrame object. + It should be in 'long' format. + """ + curve_name = "idnum" + argument_name = "age" + coordinate_name = "spnbmd" + + f_irreg = FDataIrregular._from_dataframe( + dataframe, + id_column=curve_name, + argument_columns=argument_name, + coordinate_columns=coordinate_name, + argument_names=[argument_name], + coordinate_names=[coordinate_name], + dataset_name="bone_ext", + ) + + assert len(f_irreg) == 423 + assert len(f_irreg.values) == 1003 + + +def test_fdatairregular_getitem( + fdatairregular: FDataIrregular, +) -> None: + """Tests the getitem method of FDataIrregular. + + Use slices to get subsamples of a given FDataIrregular, + using the method __getitem__ of the class, and then + verify the length of the result is correct. + + Args: + fdatairregular (FDataIrregular): FDataIrregular object + which can be unidimensional or multidimensional. + """ + assert len(fdatairregular[0]) == len(fdatairregular[-1]) == 1 + assert len(fdatairregular[:]) == len(fdatairregular) + assert len(fdatairregular[:NUM_CURVES]) == NUM_CURVES + assert len(fdatairregular[:NUM_CURVES:2]) == NUM_CURVES / 2 + assert len(fdatairregular[:NUM_CURVES:2]) == NUM_CURVES / 2 + + +def test_fdatairregular_coordinates( + fdatairregular: FDataIrregular, +) -> None: + """Test the coordinates function. + + First obtain the different coordinates for a multidimensional + FDataGrid object by using the custom _IrregularCoordinateIterator. + + Then check that the coordinates are equal elementwise to the + original. + + Args: + fdatairregular (FDataIrregular): FDataIrregular object + which can be unidimensional or multidimensional. + """ + for dim, f_data_coordinate in enumerate(fdatairregular.coordinates): + assert len(f_data_coordinate) == len(fdatairregular) + assert f_data_coordinate.dim_codomain == 1 + assert np.all( + f_data_coordinate.values[:, 0] == fdatairregular.values[:, dim], + ) + + +@pytest.mark.parametrize("decimals", TEST_DECIMALS) +def test_fdatairregular_round( + fdatairregular: FDataIrregular, + decimals: int, +) -> None: + """Test the round function for FDataIrregular. + + Args: + fdatairregular (FDataIrregular): FDataIrregular object + which can be unidimensional or multidimensional. + decimals (int): Number of decimal places to round. + """ + assert np.all( + fdatairregular.round(decimals).values + == np.round(fdatairregular.values, decimals), + ) + + +def test_fdatairregular_concatenate( + fdatairregular: FDataIrregular, +) -> None: + """Test concatenate FDataIrregular objects. + + Args: + fdatairregular (FDataIrregular): FDataIrregular object + which can be unidimensional or multidimensional. + """ + fd_concat = fdatairregular.concatenate(fdatairregular) + + start_indices_halves = np.split(fd_concat.start_indices, 2) + indices = fdatairregular.start_indices + second_half_indices = indices + len(fdatairregular.points) + + function_args_halves = np.split(fd_concat.points, 2) + values_halves = np.split(fd_concat.values, 2) + + assert len(fd_concat) == 2 * len(fdatairregular) + assert np.all(start_indices_halves[1] == second_half_indices) + assert len(fd_concat.points) == 2 * len(fdatairregular.points) + assert np.all(function_args_halves[1] == fdatairregular.points) + assert np.all(values_halves[1] == fdatairregular.values) + + +def test_fdatairregular_equals( + fdatairregular: FDataIrregular, +) -> None: + """Test for equals method. + + It uses _eq_elementwise to verify equality in every + index, argument and value. + + Args: + fdatairregular (FDataIrregular): FDataIrregular object + which can be unidimensional or multidimensional. + """ + assert fdatairregular.equals(fdatairregular) + assert fdatairregular.equals(fdatairregular.copy()) + + +def test_fdatairregular_restrict( + fdatairregular: FDataIrregular, +) -> None: + """Test the restrict function for FDataIrregular. + + Args: + fdatairregular (FDataIrregular): FDataIrregular object + which can be unidimensional or multidimensional. + """ + restricted_domain = [ + (dr[0] + (dr[0] + dr[1]) / 4, dr[1] - (dr[0] + dr[1]) / 4) + for dr in fdatairregular.domain_range + ] + + restricted_fdata = fdatairregular.restrict(restricted_domain) + + samples_by_dim = [ + restricted_fdata.points[:, dim] + for dim in range(fdatairregular.dim_domain) + ] + + sample_ranges = [(np.min(args), np.max(args)) for args in samples_by_dim] + + # The min arg is larger than the domain min constraint + assert len(restricted_fdata) > 0 + assert all( + sr[0] > restricted_domain[i][0] + for i, sr in enumerate(sample_ranges) + ) + + # The max arg is lesser than the domain max constraint + assert all( + sr[1] < restricted_domain[i][1] + for i, sr in enumerate(sample_ranges) + ) + + +def test_fdatairregular_to_grid( + fdatairregular: FDataIrregular, + fdatagrid: FDataGrid, +) -> None: + """Test conversion of FDataIrregular to and from FDataGrid. + + Args: + fdatairregular (FDataIrregular): FDataIrregular object + which can be unidimensional or multidimensional. + fdatagrid (FDataGrid): FDataGrid object. + """ + f_data_grid = fdatairregular.to_grid() + + # FDataGrid -> FDataIrregular -> FDataGrid + assert fdatagrid.equals(FDataIrregular.from_fdatagrid(fdatagrid).to_grid()) + # FDataIrregular -> FDataGrid -> FDataIrregular + assert fdatairregular.equals(FDataIrregular.from_fdatagrid(f_data_grid)) + + +def test_fdatairregular_isna( + fdatairregular: FDataIrregular, +) -> None: + """Test the shape of isna function output for FDataIrregular. + + Args: + fdatairregular (FDataIrregular): FDataIrregular object + which can be unidimensional or multidimensional. + """ + assert fdatairregular.isna().shape == (len(fdatairregular),) diff --git a/skfda/tests/test_irregular_operations.py b/skfda/tests/test_irregular_operations.py new file mode 100644 index 000000000..3c6b36ea3 --- /dev/null +++ b/skfda/tests/test_irregular_operations.py @@ -0,0 +1,995 @@ +"""Test the operations of the FDataIrregular structure.""" +from typing import Optional, Tuple + +import numpy as np +import pytest + +from skfda.representation import FDataGrid, FDataIrregular +from skfda.representation.basis import ( + Basis, + BSplineBasis, + FDataBasis, + FourierBasis, + TensorBasis, +) + +from ..typing._numpy import Any, ArrayLike + +############ +# MACROS +############ +SEED = 2906198114 + +NUM_CURVES = 100 +MAX_VALUES_PER_CURVE = 10 +DIMENSIONS = 2 +N_BASIS = 5 +DECIMALS = 4 + +random_state = np.random.default_rng(seed=SEED) + +############ +# FIXTURES +############ + + +@pytest.fixture() +def input_arrays( + num_curves: Optional[int] = NUM_CURVES, + max_values_per_curve: Optional[int] = MAX_VALUES_PER_CURVE, + dimensions: Optional[int] = 1, +) -> Tuple[ArrayLike, ArrayLike, ArrayLike]: + """Create undiimensional arrays for FDataIrregular. + + Generate three unidimensional arrays describing a + FDataIrregular structure with fixed sizes given by + the parameters + """ + num_values_per_curve = max_values_per_curve * np.ones(num_curves) + num_values_per_curve = num_values_per_curve.astype(int) + + values_per_curve = [ + random_state.random((num_values, dimensions)) + for num_values in num_values_per_curve + ] + args_per_curve = [ + random_state.random((num_values, dimensions)) + for num_values in num_values_per_curve + ] + + indices = np.cumsum(num_values_per_curve) - num_values_per_curve + values = np.concatenate(values_per_curve) + arguments = np.concatenate(args_per_curve) + + return indices, values, arguments + + +@pytest.fixture() +def input_arrays_2d( + num_curves: Optional[int] = NUM_CURVES, + max_values_per_curve: Optional[int] = MAX_VALUES_PER_CURVE, + dimensions: Optional[int] = DIMENSIONS, +) -> Tuple[ArrayLike, ArrayLike, ArrayLike]: + """Create multidimensional arrays for FDataIrregular. + + Generate three unidimensional arrays describing a + FDataIrregular structure with fixed sizes given by + the parameters + """ + num_values_per_curve = max_values_per_curve * np.ones(num_curves) + num_values_per_curve = num_values_per_curve.astype(int) + + values_per_curve = [ + random_state.random((num_values, dimensions)) + for num_values in num_values_per_curve + ] + args_per_curve = [ + random_state.random((num_values, dimensions)) + for num_values in num_values_per_curve + ] + + indices = np.cumsum(num_values_per_curve) - num_values_per_curve + values = np.concatenate(values_per_curve) + arguments = np.concatenate(args_per_curve) + + return indices, values, arguments + + +@pytest.fixture() +def fdatagrid_1d( +) -> FDataGrid: + """Generate FDataGrid.""" + num_values_per_curve = NUM_CURVES + + data_matrix = random_state.random((NUM_CURVES, num_values_per_curve, 1)) + # Grid points must be sorted + grid_points = np.sort(random_state.random((num_values_per_curve,))) + + return FDataGrid( + data_matrix=data_matrix, + grid_points=grid_points, + ) + + +@pytest.fixture() +def fdatagrid_2d( +) -> FDataGrid: + """Generate multidimensional FDataGrid.""" + num_values_per_curve = NUM_CURVES + + data_matrix = random_state.random(( + NUM_CURVES, + num_values_per_curve, + DIMENSIONS, + )) + + # Grid points must be sorted + grid_points = np.sort(random_state.random((num_values_per_curve,))) + + return FDataGrid( + data_matrix=data_matrix, + grid_points=grid_points, + ) + + +@pytest.fixture( + params=[ + "fdatagrid_1d", + "fdatagrid_2d", + ], +) +def fdatagrid( + request: Any, + fdatagrid_1d: FDataGrid, + fdatagrid_2d: FDataGrid, +) -> FDataIrregular: + """Return 'fdatagrid_1d' or 'fdatagrid_2d'.""" + if request.param == "fdatagrid_1d": + return fdatagrid_1d + elif request.param == "fdatagrid_2d": + return fdatagrid_2d + + +@pytest.fixture(params=["single_curve", "multiple_curves"]) +def fdatairregular_1d( + request: Any, + input_arrays: Tuple[ArrayLike, ArrayLike, ArrayLike], +) -> FDataIrregular: + """Return FDataIrregular with only 1 curve or NUM_CURVES as requested.""" + indices, arguments, values = input_arrays + f_data_irreg = FDataIrregular( + start_indices=indices, + points=arguments, + values=values, + ) + + if request.param == "single_curve": + return f_data_irreg[0] + elif request.param == "multiple_curves": + return f_data_irreg + + +@pytest.fixture(params=["single_curve", "multiple_curves"]) +def fdatairregular_2d( + request: Any, + input_arrays_2d: Tuple[ArrayLike, ArrayLike, ArrayLike], +) -> FDataIrregular: + """Return FDataIrregular with only 1 curve or NUM_CURVES as requested.""" + indices, arguments, values = input_arrays_2d + f_data_irreg = FDataIrregular( + start_indices=indices, + points=arguments, + values=values, + ) + + if request.param == "single_curve": + return f_data_irreg[0] + elif request.param == "multiple_curves": + return f_data_irreg + + +@pytest.fixture(params=["fdatairregular_1d", "fdatairregular_2d"]) +def fdatairregular( + request: Any, + fdatairregular_1d: FDataIrregular, + fdatairregular_2d: FDataIrregular, +) -> FDataIrregular: + """Return 'fdatairregular_1d' or 'fdatairregular_2d'.""" + if request.param == "fdatairregular_1d": + return fdatairregular_1d + elif request.param == "fdatairregular_2d": + return fdatairregular_2d + + +@pytest.fixture( + params=[ + "unidimensional", + "multidimensional", + ], +) +def fdatairregular_and_sum(request: Any) -> FDataIrregular: + if request.param == "unidimensional": + return ( + FDataIrregular( + start_indices=[0, 3, 7], + points=[ + -9, -3, 3, -3, 3, 9, 15, -15, -9, -3, 3, 9, 17, 22, 29, + ], + values=[ + 548, 893, 657, 752, 459, 181, 434, 846, 1102, 801, 824, + 866, 704, 757, 726, + ], + ), + FDataIrregular( + start_indices=[0], + points=[-3, 3], + values=[2446, 1940], + ), + ) + if request.param == "multidimensional": + return ( + FDataIrregular( + start_indices=[0, 3, 5], + points=[ + [0, 0], [1, 2], [1, 1], + [0, 0], [1, 1], + [0, 0], [6, 2], [1, 1], + ], + values=[ + [0, 0, -1], [657, 752, 5], [10, 20, 30], + [-1, 0, 0], [1102, 801, 2], + [0, 1, 0], [704, 0, 757], [-11, -21, 31], + ], + ), + FDataIrregular( + start_indices=[0], + points=[[0, 0], [1, 1]], + values=[[-1, 1, -1], [1101, 800, 63]], + ), + ) + + +@pytest.fixture( + params=[ + "unidimensional", + "multidimensional", + ], +) +def fdatairregular_common_points(request: Any) -> FDataIrregular: + if request.param == "unidimensional": + return FDataIrregular( + start_indices=[0, 3, 7], + points=[ + -9, -3, 3, -3, 3, 9, 15, -15, -9, -3, 3, 9, 17, 22, 29, + ], + values=[ + 548, 893, 657, 752, 459, 181, 434, 846, 1102, 801, 824, + 866, 704, 757, 726, + ], + ) + if request.param == "multidimensional": + return FDataIrregular( + start_indices=[0, 3, 5], + points=[ + [0, 0], [1, 2], [1, 1], + [0, 0], [1, 1], + [0, 0], [6, 2], [1, 1], + ], + values=[ + [0, 0, -1], [657, 752, 5], [10, 20, 30], + [-1, 0, 0], [1102, 801, 2], + [0, 1, 0], [704, 0, 757], [-11, -21, 31], + ], + ) + + +@pytest.fixture() +def fdatairregular_no_common_points() -> FDataIrregular: + return FDataIrregular( + start_indices=[0, 3, 5], + points=[ + [0, 1], [1, 2], [1, 1], + [0, -1], [1, 10], + [0, -2], [6, 2], [10, 1], + ], + values=[ + [0, 0, -1], [657, 752, 5], [10, 20, 30], + [-1, 0, 0], [1102, 801, 2], + [0, 1, 0], [704, 0, 757], [-11, -21, 31], + ], + ) + + +@pytest.fixture(params=["scalar", "vector", "matrix", "fdatairregular"]) +def other_1d( + request: Any, + fdatairregular_1d: FDataIrregular, +) -> FDataIrregular: + """Return an operator for testing FDataIrregular operations.""" + if request.param == "scalar": + return 2 + elif request.param == "vector": + return 2 * np.ones(NUM_CURVES) + elif request.param == "matrix": + return 2 * np.ones((NUM_CURVES, 1)) + elif request.param == "fdatairregular": + return fdatairregular_1d + + +@pytest.fixture(params=["scalar", "vector", "matrix", "fdatairregular"]) +def other_2d( + request: Any, + fdatairregular_2d: FDataIrregular, +) -> FDataIrregular: + """Return an operator for testing FDataIrregular operations.""" + if request.param == "scalar": + return 2 + elif request.param == "vector": + return 2 * np.ones(NUM_CURVES) + elif request.param == "matrix": + return 2 * np.ones((NUM_CURVES, DIMENSIONS)) + elif request.param == "fdatairregular": + return fdatairregular_2d + + +_all_numeric_reductions = [ + "sum", + "var", + "mean", + # "cov", +] + + +@pytest.fixture(params=_all_numeric_reductions) +def all_numeric_reductions(request: Any) -> Any: + """Fixture for numeric reduction names.""" + return request.param + + +_all_basis_operations = [ + "to_basis", +] + + +@pytest.fixture(params=_all_basis_operations) +def all_basis_operations(request: Any) -> Any: + """Fixture for basis operation names.""" + return request.param + + +_all_basis = [ + FourierBasis, + BSplineBasis, +] + + +@pytest.fixture(params=_all_basis) +def all_basis(request: Any) -> Any: + """Fixture for basis names.""" + return request.param + +################## +# TEST OPERATIONS +################## + + +class TestArithmeticOperations1D: + """Class for testing basic operations for unidimensional FDataIrregular.""" + + def _take_first( + self, + other, + ) -> float: + if isinstance(other, np.ndarray): + return other[0] + elif isinstance(other, FDataIrregular): + return other.values + return other + + def _single_curve( + self, + fdatairregular_1d, + other_1d, + ) -> np.ndarray: + if isinstance(other_1d, (np.ndarray, FDataIrregular)): + if len(fdatairregular_1d) == 1: + return other_1d[:1] + return other_1d + + def test_fdatairregular_arithmetic_sum( + self, + fdatairregular_1d: FDataIrregular, + other_1d: Any, + ) -> None: + """Tests the basic arithmetic operation fdatairregular + other. + + Args: + fdatairregular_1d (FDataIrregular): FDataIrregular object to test. + other_1d (Any): Scalar, vector, matrix or FDataIrregular. + """ + # Account for single curve test + other_1d = self._single_curve(fdatairregular_1d, other_1d) + + f_data_sum = fdatairregular_1d + other_1d + + result = fdatairregular_1d.values + self._take_first(other_1d) + + assert np.all(f_data_sum.values == result) + + def test_fdatairregular_arithmetic_rsum( + self, + fdatairregular_1d: FDataIrregular, + other_1d: Any, + ) -> None: + """Tests the basic arithmetic operation other + fdatairregular. + + Args: + fdatairregular_1d (FDataIrregular): FDataIrregular object to test. + other_1d (Any): Scalar, vector, matrix or FDataIrregular. + """ + # Account for single curve test + other_1d = self._single_curve(fdatairregular_1d, other_1d) + + f_data_sum = other_1d + fdatairregular_1d + + result = self._take_first(other_1d) + fdatairregular_1d.values + + assert np.all(f_data_sum.values == result) + + def test_fdatairregular_arithmetic_sum_commutative( # noqa: WPS118 + self, + fdatairregular_1d: FDataIrregular, + other_1d: Any, + ) -> None: + """Tests the basic arithmetic operation other + fdatairregular. + + Args: + fdatairregular_1d (FDataIrregular): FDataIrregular object to test. + other_1d (Any): Scalar, vector, matrix or FDataIrregular. + """ + # Account for single curve test + other_1d = self._single_curve(fdatairregular_1d, other_1d) + + assert np.all( + (fdatairregular_1d + other_1d) == (other_1d + fdatairregular_1d), + ) + + def test_fdatairregular_arithmetic_sub( + self, + fdatairregular_1d: FDataIrregular, + other_1d: Any, + ) -> None: + """Tests the basic arithmetic operation fdatairregular - other. + + Args: + fdatairregular_1d (FDataIrregular): FDataIrregular object to test. + other_1d (Any): Scalar, vector, matrix or FDataIrregular. + """ + # Account for single curve test + other_1d = self._single_curve(fdatairregular_1d, other_1d) + + f_data_sub = fdatairregular_1d - other_1d + + result = fdatairregular_1d.values - self._take_first(other_1d) + + assert np.all(f_data_sub.values == result) + + def test_fdatairregular_arithmetic_rsub( + self, + fdatairregular_1d: FDataIrregular, + other_1d: Any, + ) -> None: + """Tests the basic arithmetic operation other - fdatairregular. + + Args: + fdatairregular_1d (FDataIrregular): FDataIrregular object to test. + other_1d (Any): Scalar, vector, matrix or FDataIrregular. + """ + # Account for single curve test + other_1d = self._single_curve(fdatairregular_1d, other_1d) + + f_data_sub = other_1d - fdatairregular_1d + + result = self._take_first(other_1d) - fdatairregular_1d.values + + assert np.all(f_data_sub.values == result) + + def test_fdatairregular_arithmetic_mul( + self, + fdatairregular_1d: FDataIrregular, + other_1d: Any, + ) -> None: + """Tests the basic arithmetic operation fdatairregular * other. + + Args: + fdatairregular_1d (FDataIrregular): FDataIrregular object to test. + other_1d (Any): Scalar, vector, matrix or FDataIrregular. + """ + # Account for single curve test + other_1d = self._single_curve(fdatairregular_1d, other_1d) + + f_data_mul = fdatairregular_1d * other_1d + + result = fdatairregular_1d.values * self._take_first(other_1d) + + assert np.all(f_data_mul.values == result) + + def test_fdatairregular_arithmetic_rmul( + self, + fdatairregular_1d: FDataIrregular, + other_1d: Any, + ) -> None: + """Tests the basic arithmetic operation other * fdatairregular. + + Args: + fdatairregular_1d (FDataIrregular): FDataIrregular object to test. + other_1d (Any): Scalar, vector, matrix or FDataIrregular. + """ + # Account for single curve test + other_1d = self._single_curve(fdatairregular_1d, other_1d) + + f_data_mul = other_1d * fdatairregular_1d + + result = self._take_first(other_1d) * fdatairregular_1d.values + + assert np.all(f_data_mul.values == result) + + def test_fdatairregular_arithmetic_mul_commutative( # noqa: WPS118 + self, + fdatairregular_1d: FDataIrregular, + other_1d: Any, + ) -> None: + """Tests the basic arithmetic operation other * fdatairregular. + + Args: + fdatairregular_1d (FDataIrregular): FDataIrregular object to test. + other_1d (Any): Scalar, vector, matrix or FDataIrregular. + """ + # Account for single curve test + other_1d = self._single_curve(fdatairregular_1d, other_1d) + + assert np.all( + (fdatairregular_1d * other_1d) == (other_1d * fdatairregular_1d), + ) + + def test_fdatairregular_arithmetic_div( + self, + fdatairregular_1d: FDataIrregular, + other_1d: Any, + ) -> None: + """Tests the basic arithmetic operation fdatairregular / other. + + Args: + fdatairregular_1d (FDataIrregular): FDataIrregular object to test. + other_1d (Any): Scalar, vector, matrix or FDataIrregular. + """ + # Account for single curve test + other_1d = self._single_curve(fdatairregular_1d, other_1d) + + f_data_div = fdatairregular_1d / other_1d + + result = fdatairregular_1d.values / self._take_first(other_1d) + + assert np.all(f_data_div.values == result) + + def test_fdatairregular_arithmetic_rdiv( + self, + fdatairregular_1d: FDataIrregular, + other_1d: Any, + ) -> None: + """Tests the basic arithmetic operation other / fdatairregular. + + Args: + fdatairregular_1d (FDataIrregular): FDataIrregular object to test. + other_1d (Any): Scalar, vector, matrix or FDataIrregular. + """ + # Account for single curve test + other_1d = self._single_curve(fdatairregular_1d, other_1d) + + f_data_div = other_1d / fdatairregular_1d + + result = self._take_first(other_1d) / fdatairregular_1d.values + + assert np.all(f_data_div.values == result) + + +class TestArithmeticOperations2D: + """Test basic operations for multidimensional FDataIrregular.""" + + def _take_first( + self, + other, + ) -> float: + if isinstance(other, np.ndarray): + return other[0] + elif isinstance(other, FDataIrregular): + return other.values + return other + + def _single_curve( + self, + fdatairregular_2d, + other_2d, + ) -> np.ndarray: + if isinstance(other_2d, (np.ndarray, FDataIrregular)): + if len(fdatairregular_2d) == 1: + return other_2d[:1] + return other_2d + + def test_fdatairregular_arithmetic_sum( + self, + fdatairregular_2d: FDataIrregular, + other_2d: Any, + ) -> None: + """Tests the basic arithmetic operation fdatairregular + other. + + Args: + fdatairregular_2d (FDataIrregular): FDataIrregular object to test. + other_2d (Any): Scalar, vector, matrix or FDataIrregular. + """ + # Account for single curve test + other_2d = self._single_curve(fdatairregular_2d, other_2d) + + f_data_sum = fdatairregular_2d + other_2d + + result = fdatairregular_2d.values + self._take_first(other_2d) + + assert np.all(f_data_sum.values == result) + + def test_fdatairregular_arithmetic_rsum( + self, + fdatairregular_2d: FDataIrregular, + other_2d: Any, + ) -> None: + """Tests the basic arithmetic operation other + fdatairregular. + + Args: + fdatairregular_2d (FDataIrregular): FDataIrregular object to test. + other_2d (Any): Scalar, vector, matrix or FDataIrregular. + """ + # Account for single curve test + other_2d = self._single_curve(fdatairregular_2d, other_2d) + + f_data_sum = other_2d + fdatairregular_2d + + result = self._take_first(other_2d) + fdatairregular_2d.values + + assert np.all(f_data_sum.values == result) + + def test_fdatairregular_arithmetic_sum_commutative( # noqa: WPS118 + self, + fdatairregular_2d: FDataIrregular, + other_2d: Any, + ) -> None: + """Tests the basic arithmetic operation other + fdatairregular. + + Args: + fdatairregular_2d (FDataIrregular): FDataIrregular object to test. + other_2d (Any): Scalar, vector, matrix or FDataIrregular. + """ + # Account for single curve test + other_2d = self._single_curve(fdatairregular_2d, other_2d) + + assert np.all( + (fdatairregular_2d + other_2d) == (other_2d + fdatairregular_2d), + ) + + def test_fdatairregular_arithmetic_sub( + self, + fdatairregular_2d: FDataIrregular, + other_2d: Any, + ) -> None: + """Tests the basic arithmetic operation fdatairregular - other. + + Args: + fdatairregular_2d (FDataIrregular): FDataIrregular object to test. + other_2d (Any): Scalar, vector, matrix or FDataIrregular. + """ + # Account for single curve test + other_2d = self._single_curve(fdatairregular_2d, other_2d) + + f_data_sub = fdatairregular_2d - other_2d + + result = fdatairregular_2d.values - self._take_first(other_2d) + + assert np.all(f_data_sub.values == result) + + def test_fdatairregular_arithmetic_rsub( + self, + fdatairregular_2d: FDataIrregular, + other_2d: Any, + ) -> None: + """Tests the basic arithmetic operation other - fdatairregular. + + Args: + fdatairregular_2d (FDataIrregular): FDataIrregular object to test. + other_2d (Any): Scalar, vector, matrix or FDataIrregular. + """ + # Account for single curve test + other_2d = self._single_curve(fdatairregular_2d, other_2d) + + f_data_sub = other_2d - fdatairregular_2d + + result = self._take_first(other_2d) - fdatairregular_2d.values + + assert np.all(f_data_sub.values == result) + + def test_fdatairregular_arithmetic_mul( + self, + fdatairregular_2d: FDataIrregular, + other_2d: Any, + ) -> None: + """Tests the basic arithmetic operation fdatairregular * other. + + Args: + fdatairregular_2d (FDataIrregular): FDataIrregular object to test. + other_2d (Any): Scalar, vector, matrix or FDataIrregular. + """ + # Account for single curve test + other_2d = self._single_curve(fdatairregular_2d, other_2d) + + f_data_mul = fdatairregular_2d * other_2d + + result = fdatairregular_2d.values * self._take_first(other_2d) + + assert np.all(f_data_mul.values == result) + + def test_fdatairregular_arithmetic_rmul( + self, + fdatairregular_2d: FDataIrregular, + other_2d: Any, + ) -> None: + """Tests the basic arithmetic operation other * fdatairregular. + + Args: + fdatairregular_2d (FDataIrregular): FDataIrregular object to test. + other_2d (Any): Scalar, vector, matrix or FDataIrregular. + """ + # Account for single curve test + other_2d = self._single_curve(fdatairregular_2d, other_2d) + + f_data_mul = other_2d * fdatairregular_2d + + result = self._take_first(other_2d) * fdatairregular_2d.values + + assert np.all(f_data_mul.values == result) + + def test_fdatairregular_arithmetic_mul_commutative( # noqa: WPS118 + self, + fdatairregular_2d: FDataIrregular, + other_2d: Any, + ) -> None: + """Tests the basic arithmetic operation other * fdatairregular. + + Args: + fdatairregular_2d (FDataIrregular): FDataIrregular object to test. + other_2d (Any): Scalar, vector, matrix or FDataIrregular. + """ + # Account for single curve test + other_2d = self._single_curve(fdatairregular_2d, other_2d) + + assert np.all( + (fdatairregular_2d * other_2d) == (other_2d * fdatairregular_2d), + ) + + def test_fdatairregular_arithmetic_div( + self, + fdatairregular_2d: FDataIrregular, + other_2d: Any, + ) -> None: + """Tests the basic arithmetic operation fdatairregular / other. + + Args: + fdatairregular_2d (FDataIrregular): FDataIrregular object to test. + other_2d (Any): Scalar, vector, matrix or FDataIrregular. + """ + # Account for single curve test + other_2d = self._single_curve(fdatairregular_2d, other_2d) + + f_data_div = fdatairregular_2d / other_2d + + result = fdatairregular_2d.values / self._take_first(other_2d) + + assert np.all(f_data_div.values == result) + + def test_fdatairregular_arithmetic_rdiv( + self, + fdatairregular_2d: FDataIrregular, + other_2d: Any, + ) -> None: + """Tests the basic arithmetic operation other / fdatairregular. + + Args: + fdatairregular_2d (FDataIrregular): FDataIrregular object to test. + other_2d (Any): Scalar, vector, matrix or FDataIrregular. + """ + # Account for single curve test + other_2d = self._single_curve(fdatairregular_2d, other_2d) + + f_data_div = other_2d / fdatairregular_2d + + result = self._take_first(other_2d) / fdatairregular_2d.values + + assert np.all(f_data_div.values == result) + + +########################## +# TEST NUMERIC REDUCTIONS +########################## + +class TestNumericReductions: + """Class for testing numeric reductions (mean, std) for FDataIrregular.""" + + def test_fdatairregular_numeric_reduction( + self, + fdatairregular_common_points: FDataIrregular, + all_numeric_reductions: str, + ) -> None: + """Test FDataIrregular numeric statistichal operations. + + All conversion methods will be tested with multiple + dimensions of codomain and domain. + + Args: + fdatairregular_common_points (FDataIrregular): FDataIrregular + object with points common to all samples. + all_numeric_reductions (str): Method of the class + FDataIrregular to be tested. + """ + reduction = getattr( + fdatairregular_common_points, all_numeric_reductions, + )() + assert isinstance(reduction, FDataIrregular) + + def test_fdatairregular_sum( + self, + fdatairregular_and_sum: Tuple[FDataIrregular, FDataIrregular], + ) -> None: + """Test the sum function for FDataIrregular. + + Test both unidimensional and multidimensional. + + Args: + fdatairregular_and_sum: FDataIrregular object and expected sum. + """ + fdatairregular, expected_sum = fdatairregular_and_sum + actual_sum = fdatairregular.sum() + assert actual_sum.equals(expected_sum) + + def test_fdatairregular_mean( + self, + fdatairregular_and_sum: Tuple[FDataIrregular, FDataIrregular], + ) -> None: + """Test the mean function for FDataIrregular. + + Test both unidimensional and multidimensional. + + Args: + fdatairregular_and_sum: FDataIrregular object and expected sum. + """ + fdatairregular, expected_sum = fdatairregular_and_sum + actual_mean = fdatairregular.mean() + assert actual_mean.equals(expected_sum / fdatairregular.n_samples) + + def test_fdatairregular_sum_invalid( + self, + fdatairregular_no_common_points: FDataIrregular, + ) -> None: + """Test the sum function for FDataIrregular. + + Args: + fdatairregular_no_common_points: FDataIrregular object with no + common points. + """ + with pytest.raises(ValueError): + fdatairregular_no_common_points.sum() + + +######################## +# TEST BASIS OPERATIONS +######################## + + +class TestBasisOperations: + """Class for testing the basis operations or FDataIrregular objects.""" + + def test_fdatairregular_basis_operation( + self, + fdatairregular: FDataIrregular, + all_basis: Basis, + all_basis_operations: str, + ) -> None: + """Test FDataIrregular conversion to FDataBasis. + + All conversion methods will be tested with multiple + dimensions of codomain and domain, as well as with + different types of Basis. + + Args: + fdatairregular (FDataIrregular): FDataIrregular + object to be transformed to basis. + all_basis (Basis): Basis to use (Spline, Fourier, ..). + all_basis_operations (str): Method of the class + FDataIrregular to be tested. + """ + # Create Tensor basis for higher dimensions + if fdatairregular.dim_domain == 1: + basis = all_basis( + domain_range=fdatairregular.domain_range, + n_basis=N_BASIS, + ) + else: + basis_by_dim = [ + all_basis( + domain_range=fdatairregular.domain_range[dim: dim + 1], + n_basis=N_BASIS, + ) + for dim in range(fdatairregular.dim_domain) + ] + basis = TensorBasis(basis_by_dim) + + fd_basis_coords = [ + getattr(coordinate, all_basis_operations)(basis) + for coordinate in fdatairregular.coordinates + ] + + assert all( + isinstance(fd_basis, FDataBasis) for fd_basis in fd_basis_coords + ) + + +def test_fdatairregular_to_basis_consistency( + fdatagrid: FDataGrid, + all_basis: Basis, +) -> None: + """Test that irregular to_basis is consistent with FDataGrid. + + FDataGrid is used as source because FDataIrregular can support + regular data, but the reverse is not necessarily true. The + to_basis method specifically does not allow NaN values. + + Args: + fdatagrid (FDataGrid): FDataGrid object + all_basis (Basis): FDataBasis object. + """ + fd_irregular = FDataIrregular.from_fdatagrid(fdatagrid) + + if fd_irregular.dim_domain == 1: + basis = all_basis( + domain_range=fd_irregular.domain_range, + n_basis=N_BASIS, + ) + else: + basis_by_dim = [ + all_basis( + domain_range=fd_irregular.domain_range[dim: dim + 1], + n_basis=N_BASIS, + ) + for dim in range(fd_irregular.dim_domain) + ] + basis = TensorBasis(basis_by_dim) + + irregular_basis = [ + coord.to_basis(basis) + for coord in fd_irregular.coordinates + ] + + grid_basis = [ + coord.to_basis(basis) + for coord in fdatagrid.coordinates + ] + + irregular_coefs = [ + b.coefficients.round(DECIMALS) + for b in irregular_basis + ] + + grid_coefs = [ + b.coefficients.round(DECIMALS) + for b in grid_basis + ] + + assert all( + np.all(irregular_coefs[i] == g_coef) + for i, g_coef in enumerate(grid_coefs) + ) diff --git a/skfda/tests/test_stats_std.py b/skfda/tests/test_stats_std.py index 24f6687af..8bbc64d5b 100644 --- a/skfda/tests/test_stats_std.py +++ b/skfda/tests/test_stats_std.py @@ -7,7 +7,7 @@ import numpy as np import pytest -from skfda import FDataBasis, FDataGrid +from skfda import FDataBasis, FDataGrid, FDataIrregular from skfda.exploratory.stats import std from skfda.representation.basis import ( Basis, @@ -66,6 +66,22 @@ def t_basis2(request: Any, t_n_basis2: int = 5) -> Basis: # Tests +def test_std_fdatairregular_1d_to_1d() -> None: + """Test std_fdatairregular with R to R functions.""" + fd = FDataIrregular( + start_indices=[0, 3, 7], + points=[0, 1, 10, 0, 1, 2, 10, 0, 1, 4, 10], + values=[0, 0, 10, 1, 1, 6, 10, 2, 2, 9, 10], + ) + expected_std = FDataIrregular( + start_indices=[0], + points=[0, 1, 10], + values=[np.sqrt(2 / 3), np.sqrt(2 / 3), 0], + ) + actual_std = std(fd) + assert actual_std.equals(expected_std), actual_std + + def test_std_fdatagrid_1d_to_2d() -> None: """Test std_fdatagrid with R to R^2 functions.""" fd = FDataGrid( @@ -78,7 +94,7 @@ def test_std_fdatagrid_1d_to_2d() -> None: [0, 1, 2, 3, 4, 5], ], ) - expected_std_data_matrix = np.full((1, 2, 6, 1), np.sqrt(2)) + expected_std_data_matrix = np.full((1, 2, 6, 1), 1) np.testing.assert_allclose( std(fd).data_matrix, expected_std_data_matrix, @@ -103,7 +119,7 @@ def test_std_fdatagrid_2d_to_2d() -> None: [0, 1, 2], ], ) - expected_std_data_matrix = np.full((1, 2, 3, 2), np.sqrt(1 / 2)) + expected_std_data_matrix = np.full((1, 2, 3, 2), np.sqrt(1 / 4)) np.testing.assert_allclose( std(fd).data_matrix, expected_std_data_matrix, @@ -129,7 +145,7 @@ def test_std_fdatabasis_vector_valued_basis( ) np.testing.assert_allclose( - std(fd).coefficients, + std(fd, correction=1).coefficients, np.array([np.sqrt(1 / 2) * one_coefficients]), rtol=1e-7, atol=1e-7, @@ -152,7 +168,7 @@ def test_std_fdatabasis_tensor_basis( ) np.testing.assert_allclose( - std(fd).coefficients, + std(fd, correction=1).coefficients, np.array([np.sqrt(1 / 2) * one_coefficients]), rtol=1e-7, atol=1e-7, @@ -181,7 +197,7 @@ def test_std_fdatabasis_2d_to_2d() -> None: expected_coefficients = np.array([[np.sqrt(1 / 2), 0, 0, 0] * 2]) np.testing.assert_allclose( - std(fd).coefficients, + std(fd, correction=1).coefficients, expected_coefficients, rtol=1e-7, atol=1e-7, diff --git a/skfda/typing/_numpy.py b/skfda/typing/_numpy.py index 774511cc4..b870c2bc4 100644 --- a/skfda/typing/_numpy.py +++ b/skfda/typing/_numpy.py @@ -4,10 +4,11 @@ import numpy as np -try: - from numpy.typing import ArrayLike as ArrayLike # noqa: WPS113 +try: # noqa: WPS113 + from numpy.typing import ArrayLike as ArrayLike, DTypeLike as DTypeLike except ImportError: ArrayLike = np.ndarray # type:ignore[misc] # noqa: WPS440 + DTypeLike = np.dtype # type:ignore[misc] try: # noqa: WPS229 from numpy.typing import NDArray