diff --git a/.gitignore b/.gitignore index 765aa793c..53f5bcc28 100644 --- a/.gitignore +++ b/.gitignore @@ -105,3 +105,8 @@ ENV/ #IDE metadata .idea/ + +pip-wheel-metadata/ + +# macOS DS_Store +.DS_Store diff --git a/.travis.yml b/.travis.yml index 8b445a1df..e48c60eda 100644 --- a/.travis.yml +++ b/.travis.yml @@ -16,7 +16,8 @@ matrix: - name: "Python 3.7.3 on Windows" os: windows # Windows 10.0.17134 N/A Build 17134 language: shell # 'language: python' is an error on Travis CI Windows - before_install: choco install python + before_install: + - choco install python3 --version=3.7.3 env: PATH=/c/Python37:/c/Python37/Scripts:$PATH - name: "Coverage and pep 8 tests on Python 3.7.1 on Xenial Linux" python: 3.7 # this works for Linux but is ignored on macOS or Windows diff --git a/README.rst b/README.rst index 07a5a7017..aef40ddda 100644 --- a/README.rst +++ b/README.rst @@ -4,7 +4,7 @@ scikit-fda: Functional Data Analysis in Python =================================================== -|python|_ |build-status| |docs| |Codecov|_ |PyPIBadge|_ |license|_ +|python|_ |build-status| |docs| |Codecov|_ |PyPIBadge|_ |license|_ |doi| Functional Data Analysis, or FDA, is the field of Statistics that analyses data that depend on a continuous parameter. @@ -44,37 +44,35 @@ Installation from source It is possible to install the latest version of the package, available in the develop branch, by cloning this repository and doing a manual installation. -.. code:: +.. code:: bash git clone https://github.com/GAA-UAM/scikit-fda.git - cd scikit-fda/ - pip install -r requirements.txt # Install dependencies - python setup.py install + pip install ./scikit-fda Make sure that your default Python version is currently supported, or change the python and pip commands by specifying a version, such as ``python3.6``: -.. code:: +.. code:: bash git clone https://github.com/GAA-UAM/scikit-fda.git - cd scikit-fda/ - python3.6 -m pip install -r requirements.txt # Install dependencies - python3.6 setup.py install + python3.6 -m pip install ./scikit-fda Requirements ------------ *scikit-fda* depends on the following packages: -* `setuptools `_ - Python Packaging * `cython `_ - Python to C compiler -* `numpy `_ - The fundamental package for scientific computing with Python -* `pandas `_ - Powerful Python data analysis toolkit -* `scipy `_ - Scientific computation in Python -* `scikit-learn `_ - Machine learning in Python +* `findiff `_ - Finite differences * `matplotlib `_ - Plotting with Python * `mpldatacursor `_ - Interactive data cursors for matplotlib +* `multimethod `_ - Multiple dispatch +* `numpy `_ - The fundamental package for scientific computing with Python +* `pandas `_ - Powerful Python data analysis toolkit * `rdata `_ - Reader of R datasets in .rda format in Python * `scikit-datasets `_ - Scikit-learn compatible datasets +* `scikit-learn `_ - Machine learning in Python +* `scipy `_ - Scientific computation in Python +* `setuptools `_ - Python Packaging The dependencies are automatically installed. @@ -88,11 +86,11 @@ The people involved at some point in the development of the package can be found in the `contributors file `_. -Citation -======== -If you find this project useful, please cite: +.. Citation + ======== + If you find this project useful, please cite: -.. todo:: Include citation to scikit-fda paper. + .. todo:: Include citation to scikit-fda paper. License ======= @@ -109,7 +107,7 @@ license_ can be found along with the code. .. |build-status| image:: https://travis-ci.org/GAA-UAM/scikit-fda.svg?branch=develop :alt: build status :scale: 100% - :target: https://travis-ci.org/GAA-UAM/scikit-fda + :target: https://travis-ci.com/GAA-UAM/scikit-fda .. |docs| image:: https://readthedocs.org/projects/fda/badge/?version=latest :alt: Documentation Status @@ -124,3 +122,6 @@ license_ can be found along with the code. .. |license| image:: https://img.shields.io/badge/License-BSD%203--Clause-blue.svg .. _license: https://github.com/GAA-UAM/scikit-fda/blob/master/LICENSE.txt + +.. |doi| image:: https://zenodo.org/badge/DOI/10.5281/zenodo.3468127.svg + :target: https://doi.org/10.5281/zenodo.3468127 diff --git a/VERSION b/VERSION index be5863417..bd73f4707 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.3 +0.4 diff --git a/conftest.py b/conftest.py index 889066c3a..0bf55fc27 100644 --- a/conftest.py +++ b/conftest.py @@ -7,6 +7,6 @@ except TypeError: pass -collect_ignore = ['setup.py'] +collect_ignore = ['setup.py', 'docs/conf.py'] pytest.register_assert_rewrite("skfda") diff --git a/docs/Makefile b/docs/Makefile index 09cb3d000..e5d3c5645 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -60,6 +60,7 @@ clean: rm -rf modules/misc/autosummary rm -rf modules/ml/autosummary rm -rf modules/ml/clustering/autosummary + rm -rf modules/inference/autosummary rm -rf backreferences .PHONY: html diff --git a/docs/_templates/autosummary/base.rst b/docs/_templates/autosummary/base.rst index 27f71e506..38fba4a8b 100644 --- a/docs/_templates/autosummary/base.rst +++ b/docs/_templates/autosummary/base.rst @@ -1,4 +1,4 @@ -{{ fullname | escape | underline}} +{{ objname | escape | underline}} .. currentmodule:: {{ module }} diff --git a/docs/_templates/autosummary/class.rst b/docs/_templates/autosummary/class.rst index 5d4fff393..c97621a73 100644 --- a/docs/_templates/autosummary/class.rst +++ b/docs/_templates/autosummary/class.rst @@ -1,12 +1,10 @@ -{{ fullname | escape | underline}} +{{ objname | escape | underline}} .. currentmodule:: {{ module }} .. autoclass:: {{ objname }} {% block methods %} - .. automethod:: __init__ - {% if methods %} .. rubric:: Methods @@ -15,6 +13,8 @@ ~{{ name }}.{{ item }} {%- endfor %} {% endif %} + + .. automethod:: __init__ {% endblock %} {% block attributes %} diff --git a/docs/apilist.rst b/docs/apilist.rst index e443d49ce..b7cc7d15a 100644 --- a/docs/apilist.rst +++ b/docs/apilist.rst @@ -13,3 +13,4 @@ API Reference modules/datasets modules/misc modules/ml + modules/inference diff --git a/docs/conf.py b/docs/conf.py index 18a8d6170..a5b3f2d07 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -17,9 +17,6 @@ # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. # -# import os -# import sys -# sys.path.insert(0, '/home/miguel/Desktop/fda/fda') import os import sys @@ -55,7 +52,8 @@ 'sphinx_rtd_theme', 'sphinx_gallery.gen_gallery', 'sphinx.ext.intersphinx', - 'sphinx.ext.doctest'] + 'sphinx.ext.doctest', + 'jupyter_sphinx'] autodoc_default_flags = ['members', 'inherited-members'] @@ -79,7 +77,8 @@ # General information about the project. project = 'scikit-fda' -copyright = '2017, Author' +copyright = ('2019, Grupo de Aprendizaje Automático - ' + + 'Universidad Autónoma de Madrid') author = 'Author' # The language for content autogenerated by Sphinx. Refer to documentation @@ -238,3 +237,47 @@ } autosummary_generate = True + + +# Napoleon fix for attributes +# Taken from +# https://michaelgoerz.net/notes/extending-sphinx-napoleon-docstring-sections.html + +# -- Extensions to the Napoleon GoogleDocstring class --------------------- +from sphinx.ext.napoleon.docstring import GoogleDocstring + +# first, we define new methods for any new sections and add them to the class + + +def parse_keys_section(self, section): + return self._format_fields('Keys', self._consume_fields()) + + +GoogleDocstring._parse_keys_section = parse_keys_section + + +def parse_attributes_section(self, section): + return self._format_fields('Attributes', self._consume_fields()) + + +GoogleDocstring._parse_attributes_section = parse_attributes_section + + +def parse_class_attributes_section(self, section): + return self._format_fields('Class Attributes', self._consume_fields()) + + +GoogleDocstring._parse_class_attributes_section = parse_class_attributes_section + +# we now patch the parse method to guarantee that the the above methods are +# assigned to the _section dict + + +def patched_parse(self): + self._sections['keys'] = self._parse_keys_section + self._sections['class attributes'] = self._parse_class_attributes_section + self._unpatched_parse() + + +GoogleDocstring._unpatched_parse = GoogleDocstring._parse +GoogleDocstring._parse = patched_parse diff --git a/docs/index.rst b/docs/index.rst index f5f999aff..f451e872b 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -5,18 +5,78 @@ Welcome to scikit-fda's documentation! ====================================== +This package offers classes, methods and functions to give support to +Functional Data Analysis in Python. Includes a wide range of utils to work with +functional data, and its representation, exploratory analysis, or +preprocessing, among other tasks such as inference, classification, regression +or clustering of functional data. + +In the `project page `_ hosted by +Github you can find more information related to the development of the package. + + .. toctree:: - :includehidden: - :maxdepth: 4 + :maxdepth: 2 :caption: Contents: :titlesonly: apilist + + +.. toctree:: + :maxdepth: 1 + :titlesonly: + auto_examples/index -Indices and tables -================== +An exhaustive list of all the contents of the package can be found in the +:ref:`genindex`. + +Installation +------------ + +Currently, scikit-fda is available in Python 3.6 and 3.7, regardless of the +platform. The stable version can be installed via +`PyPI `_: + +.. code-block:: bash + + pip install scikit-fda + + +It is possible to install the latest version of the package, available in +the develop branch, by cloning this repository and doing a manual installation. + +.. code-block:: bash + + git clone https://github.com/GAA-UAM/scikit-fda.git + pip install ./scikit-fda + + +In this type of installation make sure that your default Python version is +currently supported, or change the python and pip commands by specifying a +version, such as python3.6. + + +Contributions +------------- + +All contributions are welcome. You can help this project grow in multiple ways, +from creating an issue, reporting an improvement or a bug, to doing a +repository fork and creating a pull request to the development branch. +The people involved at some point in the development of the package can be +found in the `contributors file +`_. + +.. Citation + -------- + If you find this project useful, please cite: + + .. todo:: Include citation to scikit-fda paper. + +License +------- -* :ref:`genindex` -* :ref:`modindex` -* :ref:`search` +The package is licensed under the BSD 3-Clause License. A copy of the +`license `_ +can be found along with the code or in the project page. diff --git a/docs/make.bat b/docs/make.bat index aaea5cb30..820e54269 100644 --- a/docs/make.bat +++ b/docs/make.bat @@ -5,7 +5,7 @@ REM Command file for Sphinx documentation pushd %~dp0 if "%SPHINXBUILD%" == "" ( - set SPHINXBUILD=python -msphinx + set SPHINXBUILD=sphinx-build ) set BUILDDIR=_build set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% . @@ -54,7 +54,7 @@ if "%1" == "clean" ( REM Check if sphinx-build is available %SPHINXBUILD% 1>NUL 2>NUL -if errorlevel 1 ( +if errorlevel 9009 ( echo. echo.The Sphinx module was not found. Make sure you have Sphinx installed, echo.then set the SPHINXBUILD environment variable to point to the full diff --git a/docs/modules/datasets.rst b/docs/modules/datasets.rst index 6946376c9..fc09bb486 100644 --- a/docs/modules/datasets.rst +++ b/docs/modules/datasets.rst @@ -17,6 +17,8 @@ The following functions are used to retrieve specific functional datasets: skfda.datasets.fetch_medflies skfda.datasets.fetch_weather skfda.datasets.fetch_aemet + skfda.datasets.fetch_octane + skfda.datasets.fetch_gait Those functions return a dictionary with at least a "data" field containing the instance data, and a "target" field containing the class labels or regression values, diff --git a/docs/modules/exploratory.rst b/docs/modules/exploratory.rst index 45f048bfa..832b93193 100644 --- a/docs/modules/exploratory.rst +++ b/docs/modules/exploratory.rst @@ -10,4 +10,4 @@ and visualize functional data. exploratory/visualization exploratory/depth - exploratory/outliers \ No newline at end of file + exploratory/outliers diff --git a/docs/modules/exploratory/outliers.rst b/docs/modules/exploratory/outliers.rst index 290a1e377..0adba3291 100644 --- a/docs/modules/exploratory/outliers.rst +++ b/docs/modules/exploratory/outliers.rst @@ -4,21 +4,28 @@ Outlier detection Functional outlier detection is the identification of functions that do not seem to behave like the others in the dataset. There are several ways in which a function may be different from the others. For example, a function may have a different shape than the others, or its values could be more extreme. Thus, outlyingness is difficult to -categorize exactly as each outlier detection method looks at different features of the functions in order to +categorize exactly as each outlier detection method looks at different features of the functions in order to identify the outliers. Each of the outlier detection methods in scikit-fda has the same API as the outlier detection methods of `scikit-learn `_. +Interquartile Range Outlier Detector +------------------------------------ + One of the most common ways of outlier detection is given by the functional data boxplot. An observation is marked as an outlier if it has points :math:`1.5 \cdot IQR` times outside the region containing the deepest 50% of the curves -(the central region), where :math:`IQR` is the interquartilic range. +(the central region), where :math:`IQR` is the interquartile range. .. autosummary:: :toctree: autosummary skfda.exploratory.outliers.IQROutlierDetector - + + +DirectionalOutlierDetector +-------------------------- + Other more novel way of outlier detection takes into account the magnitude and shape of the curves. Curves which have a very different shape or magnitude are considered outliers. @@ -26,11 +33,11 @@ a very different shape or magnitude are considered outliers. :toctree: autosummary skfda.exploratory.outliers.DirectionalOutlierDetector - + For this method, it is necessary to compute the mean and variation of the directional outlyingness, which can be done with the following function. .. autosummary:: :toctree: autosummary - skfda.exploratory.outliers.directional_outlyingness_stats \ No newline at end of file + skfda.exploratory.outliers.directional_outlyingness_stats diff --git a/docs/modules/exploratory/visualization.rst b/docs/modules/exploratory/visualization.rst index cb701b337..a2de8fb3a 100644 --- a/docs/modules/exploratory/visualization.rst +++ b/docs/modules/exploratory/visualization.rst @@ -10,4 +10,5 @@ the functional data, that highlight several important aspects of it. visualization/boxplot visualization/magnitude_shape_plot - visualization/clustering \ No newline at end of file + visualization/clustering + visualization/fpca \ No newline at end of file diff --git a/docs/modules/exploratory/visualization/fpca.rst b/docs/modules/exploratory/visualization/fpca.rst new file mode 100644 index 000000000..8f22e884e --- /dev/null +++ b/docs/modules/exploratory/visualization/fpca.rst @@ -0,0 +1,14 @@ +Functional Principal Component Analysis plots +============================================= +In order to show the modes of variation that the principal components represent, +the following function is implemented + +.. autosummary:: + :toctree: autosummary + + skfda.exploratory.visualization.fpca.plot_fpca_perturbation_graphs + +See the example :ref:`sphx_glr_auto_examples_plot_fpca.py` for detailed +explanation. + + diff --git a/docs/modules/inference.rst b/docs/modules/inference.rst new file mode 100644 index 000000000..ad751703c --- /dev/null +++ b/docs/modules/inference.rst @@ -0,0 +1,13 @@ +Inference +============= + +This module provides functions and utilities to analyze functional data in +order to draw conclusions from a sampled population and the degree of +reliability of this results. + +.. toctree:: + :maxdepth: 3 + :caption: Modules: + + inference/anova + inference/hotelling diff --git a/docs/modules/inference/anova.rst b/docs/modules/inference/anova.rst new file mode 100644 index 000000000..9aad0fc3f --- /dev/null +++ b/docs/modules/inference/anova.rst @@ -0,0 +1,27 @@ +ANOVA +============== +This package groups a collection of statistical models, useful for analyzing +equality of means for different subsets of a sample. + +One-way functional ANOVA +------------------------ +Functionality to perform One-way ANOVA analysis, to compare means among +different samples. One-way stands for one functional response variable and +one unique variable of input. + +.. autosummary:: + :toctree: autosummary + + skfda.inference.anova.oneway_anova + +Statistics +---------- +Statistics that measure the internal and external variability between +groups, used in the models above. + +.. autosummary:: + :toctree: autosummary + + skfda.inference.anova.v_sample_stat + skfda.inference.anova.v_asymptotic_stat + diff --git a/docs/modules/inference/hotelling.rst b/docs/modules/inference/hotelling.rst new file mode 100644 index 000000000..85ec04f1c --- /dev/null +++ b/docs/modules/inference/hotelling.rst @@ -0,0 +1,12 @@ +Hotelling +============== +This package groups a collection of statistical tests based on Hotelling's +statistic. + +.. autosummary:: + :toctree: autosummary + + skfda.inference.hotelling.hotelling_t2 + skfda.inference.hotelling.hotelling_test_ind + + diff --git a/docs/modules/misc.rst b/docs/modules/misc.rst index 66654d9c6..e72660025 100644 --- a/docs/modules/misc.rst +++ b/docs/modules/misc.rst @@ -7,4 +7,7 @@ Miscellaneous functions and objects. :maxdepth: 4 :caption: Modules: + misc/covariances misc/metrics + misc/operators + misc/regularization diff --git a/docs/modules/misc/covariances.rst b/docs/modules/misc/covariances.rst new file mode 100644 index 000000000..f27137ef8 --- /dev/null +++ b/docs/modules/misc/covariances.rst @@ -0,0 +1,17 @@ +Covariance functions +==================== + +This module contains several common covariance functions of Gaussian +processes. These functions can be used as covariances in +:func:`make_gaussian_process`. + +.. autosummary:: + :toctree: autosummary + + skfda.misc.covariances.Brownian + skfda.misc.covariances.Covariance + skfda.misc.covariances.Exponential + skfda.misc.covariances.Gaussian + skfda.misc.covariances.Linear + skfda.misc.covariances.Polynomial + skfda.misc.covariances.WhiteNoise \ No newline at end of file diff --git a/docs/modules/misc/metrics.rst b/docs/modules/misc/metrics.rst index e6ca52f93..a7ac7c7e7 100644 --- a/docs/modules/misc/metrics.rst +++ b/docs/modules/misc/metrics.rst @@ -12,7 +12,7 @@ The following functions computes the norms and distances used in Lp spaces. .. autosummary:: :toctree: autosummary - skfda.misc.metrics.norm_lp + skfda.misc.metrics.lp_norm skfda.misc.metrics.lp_distance @@ -38,6 +38,5 @@ Utils .. autosummary:: :toctree: autosummary - skfda.misc.metrics.vectorial_norm skfda.misc.metrics.distance_from_norm skfda.misc.metrics.pairwise_distance diff --git a/docs/modules/misc/operators.rst b/docs/modules/misc/operators.rst new file mode 100644 index 000000000..d2a877a2e --- /dev/null +++ b/docs/modules/misc/operators.rst @@ -0,0 +1,14 @@ +Operators +========= + +This module contains several useful operators that can be applied to +functional data, and sometimes to multivariate data. + +The operators that are linear can also be used in the context of +:doc:`regularization`. + +.. autosummary:: + :toctree: autosummary + + skfda.misc.operators.Identity + skfda.misc.operators.LinearDifferentialOperator \ No newline at end of file diff --git a/docs/modules/misc/regularization.rst b/docs/modules/misc/regularization.rst new file mode 100644 index 000000000..e68aeb7c2 --- /dev/null +++ b/docs/modules/misc/regularization.rst @@ -0,0 +1,36 @@ +Regularization +============== + +This module contains several regularization techniques that can be applied +in several situations, such as regression, PCA or basis smoothing. + +These regularization methods are useful to obtain simple solutions and to +introduce known hypothesis to the model, such as periodicity or smoothness, +reducing the effects caused by noise in the observations. + +In functional data analysis is also common to have ill posed problems, because +of the infinite nature of the data and the finite sample size. The application +of regularization techniques in these kind of problems is then necessary to +obtain reasonable solutions. + +When dealing with multivariate data, a common choice for the regularization +is to penalize the squared Euclidean norm, or :math:`L_2` norm, of the vectors +in order to obtain simpler solutions. This can be done in scikit-fda for +both multivariate and functional data using the :class:`L2Regularization` +class. + +A more flexible generalization of this approach is the so called Tikhonov +regularization, available as :class:`TikhonovRegularization`, in which the +squared :math:`L_2` norm is penalized after a particular linear operator is +applied. This for example allows to penalize the second derivative of a curve, +which is a measure of its curvature, because the differential operator +is linear. As arbitrary Python callables can be used as operators (provided +that they correspond to a linear transformation), it is possible to penalize +the evaluation at a point, the difference between points or other arbitrary +linear operations. + +.. autosummary:: + :toctree: autosummary + + skfda.misc.regularization.L2Regularization + skfda.misc.regularization.TikhonovRegularization \ No newline at end of file diff --git a/docs/modules/ml/classification.rst b/docs/modules/ml/classification.rst index 9524a4aea..e4c2d0a77 100644 --- a/docs/modules/ml/classification.rst +++ b/docs/modules/ml/classification.rst @@ -21,4 +21,4 @@ it is explained the basic usage of these estimators. skfda.ml.classification.KNeighborsClassifier skfda.ml.classification.RadiusNeighborsClassifier - skfda.ml.classification.NearestCentroids + skfda.ml.classification.NearestCentroid diff --git a/docs/modules/ml/clustering.rst b/docs/modules/ml/clustering.rst index ce07f534b..3bdb59647 100644 --- a/docs/modules/ml/clustering.rst +++ b/docs/modules/ml/clustering.rst @@ -20,7 +20,7 @@ detailed explanation. :toctree: autosummary skfda.ml.clustering.KMeans - skfda.ml.clustering.FuzzyKMeans + skfda.ml.clustering.FuzzyCMeans Nearest Neighbors diff --git a/docs/modules/ml/regression.rst b/docs/modules/ml/regression.rst index 72ba60f4b..ce416a58a 100644 --- a/docs/modules/ml/regression.rst +++ b/docs/modules/ml/regression.rst @@ -8,12 +8,14 @@ Module with classes to perform regression of functional data. Linear regression ----------------- -Todo: Add documentation of linear regression models. +A linear regression model is one in which the response variable can be +expressed as a linear combination of the covariates (which could be +multivariate or functional). .. autosummary:: :toctree: autosummary - skfda.ml.regression.LinearScalarRegression + skfda.ml.regression.LinearRegression Nearest Neighbors ----------------- diff --git a/docs/modules/preprocessing.rst b/docs/modules/preprocessing.rst index 06f3eb6da..ae14a2938 100644 --- a/docs/modules/preprocessing.rst +++ b/docs/modules/preprocessing.rst @@ -12,6 +12,7 @@ this category deal with this problem. preprocessing/smoothing preprocessing/registration + preprocessing/dim_reduction Smoothing --------- @@ -28,4 +29,14 @@ Sometimes, the functional data may be misaligned, or the phase variation should be ignored in the analysis. To align the data and eliminate the phase variation, we need to use *registration* methods. :doc:`Here ` you can learn more about the -registration methods available in the library. \ No newline at end of file +registration methods available in the library. + +Dimensionality Reduction +------------------------ + +The functional data may have too many features so we cannot analyse +the data with clarity. To better understand the data, we need to use +*dimensionality reduction* methods that can reduce the number of features +while still preserving the most relevant information. +:doc:`Here ` you can learn more about the +dimension reduction methods available in the library. \ No newline at end of file diff --git a/docs/modules/preprocessing/dim_reduction.rst b/docs/modules/preprocessing/dim_reduction.rst new file mode 100644 index 000000000..ded6b831f --- /dev/null +++ b/docs/modules/preprocessing/dim_reduction.rst @@ -0,0 +1,18 @@ +Dimensionality Reduction +======================== + +When dealing with data samples with high dimensionality, we often need to +reduce the dimensions so we can better observe the data. + +Projection +---------- +One way to reduce the dimension is through projection. For example, in +functional principal component analysis, we project the data samples +into a smaller sample of functions that preserve the maximum sample +variance. + +.. toctree:: + :maxdepth: 4 + :caption: Modules: + + dim_reduction/fpca \ No newline at end of file diff --git a/docs/modules/preprocessing/dim_reduction/fpca.rst b/docs/modules/preprocessing/dim_reduction/fpca.rst new file mode 100644 index 000000000..c6cc9bfd8 --- /dev/null +++ b/docs/modules/preprocessing/dim_reduction/fpca.rst @@ -0,0 +1,24 @@ +Functional Principal Component Analysis (FPCA) +============================================== + +This module provides tools to analyse functional data using FPCA. FPCA is +a common tool used to reduce dimensionality. It can be applied to a functional +data object in either a basis representation or a discretized representation. +The output of FPCA are the projections of the original sample functions into the +directions (principal components) in which most of the variance is conserved. +In multivariate PCA those directions are vectors. However, in FPCA we seek +functions that maximizes the sample variance operator, and then project our data +samples into those principal components. The number of principal components are +at most the number of original features. + +For a detailed example please view :ref:`sphx_glr_auto_examples_plot_fpca.py`, +where the process is applied to several datasets in both discretized and basis +forms. + +FPCA for functional data in both representations +---------------------------------------------------------------- + +.. autosummary:: + :toctree: autosummary + + skfda.preprocessing.dim_reduction.projection.FPCA diff --git a/docs/modules/preprocessing/registration.rst b/docs/modules/preprocessing/registration.rst index 941b063f6..abf7db45a 100644 --- a/docs/modules/preprocessing/registration.rst +++ b/docs/modules/preprocessing/registration.rst @@ -14,14 +14,13 @@ Many of the issues involved in registration can be solved by considering the simplest case, a simple shift in the time scale. This often happens because the time at which the recording process begins is arbitrary, and is unrelated to the beginning of the interesting segment of the data. In the -`Shift Registration Example <../auto_examples/plot_shift_registration_basis.html>`_ -it is shown the basic usage of this methods applied to periodic data. +:ref:`sphx_glr_auto_examples_plot_shift_registration.py` example +is shown the basic usage of this method. .. autosummary:: :toctree: autosummary - skfda.preprocessing.registration.shift_registration - skfda.preprocessing.registration.shift_registration_deltas + skfda.preprocessing.registration.ShiftRegistration Landmark Registration @@ -32,7 +31,7 @@ takes all the times of a given feature into a common value. The simplest case in which each sample presents a unique landmark can be solved by performing a translation in the time scale. See the -`Landmark Shift Example <../auto_examples/plot_landmark_shift.html>`_. +:ref:`sphx_glr_auto_examples_plot_landmark_shift.py` example.. .. autosummary:: :toctree: autosummary @@ -43,8 +42,7 @@ by performing a translation in the time scale. See the The general case of landmark registration may present multiple landmarks for each sample and a non-linear transformation in the time scale should be applied. -See the `Landmark Registration Example -<../auto_examples/plot_landmark_registration.html>`_ +See the :ref:`sphx_glr_auto_examples_plot_landmark_registration.py` example. .. autosummary:: :toctree: autosummary @@ -58,16 +56,15 @@ Elastic Registration The elastic registration is a novel approach to this problem that uses the properties of the Fisher-Rao metric to perform the alignment of the curves. -In the examples of `pairwise alignment -<../auto_examples/plot_pairwise_alignment.html>`_ and `elastic registration -<../auto_examples/plot_elastic_registration.html>`_ is shown a brief +In the examples of +:ref:`sphx_glr_auto_examples_plot_pairwise_alignment.py` and +:ref:`sphx_glr_auto_examples_plot_elastic_registration.py` is shown a brief introduction to this topic along the usage of the corresponding functions. .. autosummary:: :toctree: autosummary - skfda.preprocessing.registration.elastic_registration - skfda.preprocessing.registration.elastic_registration_warping + skfda.preprocessing.registration.ElasticRegistration The module contains some routines related with the elastic registration, making @@ -77,31 +74,32 @@ on the elastic framework. .. autosummary:: :toctree: autosummary - skfda.preprocessing.registration.elastic_mean - skfda.preprocessing.registration.warping_mean - skfda.preprocessing.registration.to_srsf - skfda.preprocessing.registration.from_srsf + skfda.preprocessing.registration.elastic.elastic_mean + skfda.preprocessing.registration.elastic.warping_mean + skfda.preprocessing.registration.elastic.SRSF +Validation +---------- -Amplitude and Phase Decomposition ---------------------------------- - -The amplitude and phase variation may be quantified by comparing a sample before -and after registration. The package contains an implementation of the -decomposition procedure developed by *Kneip and Ramsay (2008)*. +This module contains several classes methods for the quantification and +validation of the registration procedure. .. autosummary:: :toctree: autosummary - skfda.preprocessing.registration.mse_decomposition + + skfda.preprocessing.registration.validation.AmplitudePhaseDecomposition + skfda.preprocessing.registration.validation.LeastSquares + skfda.preprocessing.registration.validation.SobolevLeastSquares + skfda.preprocessing.registration.validation.PairwiseCorrelation -Utility functions +Warping utils ----------------- -There are some other method related with the registration problem in this -module. +This module contains some functions related with the warping of functional +data. .. autosummary:: :toctree: autosummary diff --git a/docs/modules/representation.rst b/docs/modules/representation.rst index f5c8719ad..83efe532a 100644 --- a/docs/modules/representation.rst +++ b/docs/modules/representation.rst @@ -30,7 +30,7 @@ following class allows interpolation with different splines. .. autosummary:: :toctree: autosummary - skfda.representation.interpolation.SplineInterpolator + skfda.representation.interpolation.SplineInterpolation Basis representation @@ -45,7 +45,8 @@ of elements of a basis function system. skfda.representation.basis.FDataBasis -The following classes are used to define different basis systems. +The following classes are used to define different basis for +:math:`\mathbb{R} \to \mathbb{R}` functions. .. autosummary:: :toctree: autosummary @@ -54,6 +55,24 @@ The following classes are used to define different basis systems. skfda.representation.basis.Fourier skfda.representation.basis.Monomial skfda.representation.basis.Constant + +The following class, allows the construction of a basis for +:math:`\mathbb{R}^n \to \mathbb{R}` functions from +several :math:`\mathbb{R} \to \mathbb{R}` bases. + +.. autosummary:: + :toctree: autosummary + + skfda.representation.basis.Tensor + +The following class, allows the construction of a basis for +:math:`\mathbb{R}^n \to \mathbb{R}^m` functions from +several :math:`\mathbb{R}^n \to \mathbb{R}` bases. + +.. autosummary:: + :toctree: autosummary + + skfda.representation.basis.VectorValued Generic representation ---------------------- diff --git a/docs/modules/representation/extrapolation.rst b/docs/modules/representation/extrapolation.rst index d1ab136e1..0736e883a 100644 --- a/docs/modules/representation/extrapolation.rst +++ b/docs/modules/representation/extrapolation.rst @@ -22,11 +22,10 @@ The following classes are used to define common methods of extrapolation. Custom Extrapolation -------------------- -Custom extrapolators could be done subclassing :class:`EvaluatorConstructor -`. +Custom extrapolators could be done subclassing :class:`Evaluator +`. .. autosummary:: :toctree: autosummary - skfda.representation.evaluator.EvaluatorConstructor skfda.representation.evaluator.Evaluator diff --git a/examples/plot_boxplot.py b/examples/plot_boxplot.py index 0e37b0186..2130824d2 100644 --- a/examples/plot_boxplot.py +++ b/examples/plot_boxplot.py @@ -38,9 +38,9 @@ nlabels = len(label_names) label_colors = colormap(np.arange(nlabels) / (nlabels - 1)) -fd_temperatures.plot(sample_labels=dataset["target"], - label_colors=label_colors, - label_names=label_names) +fd_temperatures.plot(group=dataset["target"], + group_colors=label_colors, + group_names=label_names) ############################################################################## @@ -70,9 +70,9 @@ color = 0.3 outliercol = 0.7 -fd_temperatures.plot(sample_labels=fdBoxplot.outliers.astype(int), - label_colors=colormap([color, outliercol]), - label_names=["nonoutliers", "outliers"]) +fd_temperatures.plot(group=fdBoxplot.outliers.astype(int), + group_colors=colormap([color, outliercol]), + group_names=["nonoutliers", "outliers"]) ############################################################################## # The curves pointed as outliers are are those curves with significantly lower diff --git a/examples/plot_clustering.py b/examples/plot_clustering.py index 537c1e264..a4f87b57c 100644 --- a/examples/plot_clustering.py +++ b/examples/plot_clustering.py @@ -17,7 +17,7 @@ from skfda import datasets from skfda.exploratory.visualization.clustering import ( plot_clusters, plot_cluster_lines, plot_cluster_bars) -from skfda.ml.clustering.base_kmeans import KMeans, FuzzyKMeans +from skfda.ml.clustering import KMeans, FuzzyCMeans ############################################################################## @@ -58,8 +58,8 @@ n_climates = len(climates) climate_colors = colormap(np.arange(n_climates) / (n_climates - 1)) -fd.plot(sample_labels=indexer, label_colors=climate_colors, - label_names=climates) +fd.plot(group=indexer, group_colors=climate_colors, + group_names=climates) ############################################################################## # The number of clusters is set with the number of climates, in order to see @@ -96,17 +96,17 @@ ############################################################################## # Other clustering algorithm implemented is the Fuzzy K-Means found in the -# class :class:`~skfda.ml.clustering.FuzzyKMeans`. Following the +# class :class:`~skfda.ml.clustering.FuzzyCMeans`. Following the # above procedure, an object of this type is instantiated with the desired # data and then, the -# :func:`~skfda.ml.clustering.FuzzyKMeans.fit` method is called. +# :func:`~skfda.ml.clustering.FuzzyCMeans.fit` method is called. # Internally, the attribute ``labels_`` is calculated, which contains # ´n_clusters´ elements for each sample and dimension, denoting the degree of # membership of each sample to each cluster. They are obtained calling the -# method :func:`~skfda.ml.clustering.FuzzyKMeans.predict`. Also, the centroids +# method :func:`~skfda.ml.clustering.FuzzyCMeans.predict`. Also, the centroids # of each cluster are obtained. -fuzzy_kmeans = FuzzyKMeans(n_clusters=n_clusters, random_state=seed) +fuzzy_kmeans = FuzzyCMeans(n_clusters=n_clusters, random_state=seed) fuzzy_kmeans.fit(fd) print(fuzzy_kmeans.predict(fd)) @@ -121,7 +121,7 @@ ############################################################################## # Another plot implemented to show the results in the class -# :class:`~skfda.ml.clustering.FuzzyKMeans` is +# :class:`~skfda.ml.clustering.FuzzyCMeans` is # :func:`~skfda.exploratory.visualization.clustering_plots.plot_cluster_lines` # which is similar to parallel coordinates. It is recommended to assign colors # to each of the samples in order to identify them. In this example, the diff --git a/examples/plot_composition.py b/examples/plot_composition.py index b390bfc70..ff9b33566 100644 --- a/examples/plot_composition.py +++ b/examples/plot_composition.py @@ -10,10 +10,11 @@ # sphinx_gallery_thumbnail_number = 3 +import skfda + from mpl_toolkits.mplot3d import axes3d import numpy as np -import skfda ############################################################################## @@ -42,7 +43,7 @@ g = skfda.FDataGrid(data_matrix, sample_points) # Sets cubic interpolation -g.interpolator = skfda.representation.interpolation.SplineInterpolator( +g.interpolation = skfda.representation.interpolation.SplineInterpolation( interpolation_order=3) # Plots the surface @@ -77,7 +78,7 @@ # Plots path along the surface path = f(t)[0] -fig.axes[0].plot(path[:, 0], path[:, 1], gof(t)[0], color="orange") +fig.axes[0].plot(path[:, 0], path[:, 1], gof(t)[0, ..., 0], color="orange") fig diff --git a/examples/plot_discrete_representation.py b/examples/plot_discrete_representation.py index 143eb4644..47e6afb80 100644 --- a/examples/plot_discrete_representation.py +++ b/examples/plot_discrete_representation.py @@ -10,9 +10,10 @@ # sphinx_gallery_thumbnail_number = 2 -import numpy as np from skfda import FDataGrid +import numpy as np + ############################################################################## # We will construct a dataset containing several sinusoidal functions with @@ -28,8 +29,9 @@ # that are measured at the same points. fd = FDataGrid(data, sample_points, - dataset_label='Sinusoidal curves', - axes_labels=['t', 'x(t)']) + dataset_name='Sinusoidal curves', + argument_names=['t'], + coordinate_names=['x(t)']) fd = fd[:5] diff --git a/examples/plot_elastic_registration.py b/examples/plot_elastic_registration.py index 222c6be65..1688126ad 100644 --- a/examples/plot_elastic_registration.py +++ b/examples/plot_elastic_registration.py @@ -10,13 +10,17 @@ # sphinx_gallery_thumbnail_number = 5 -import numpy as np import skfda +from skfda.datasets import make_multimodal_samples, fetch_growth +from skfda.preprocessing.registration import ElasticRegistration +from skfda.preprocessing.registration.elastic import elastic_mean + +import numpy as np ############################################################################## # In the example of pairwise alignment was shown the usage of -# :func:`~skfda.preprocessing.registration.elastic_registration` to align +# :class:`~skfda.preprocessing.registration.ElasticRegistration` to align # a set of functional observations to a given template or a set of templates. # # In the groupwise alignment all the samples are aligned to the same template, @@ -28,12 +32,12 @@ # We will create a synthetic dataset to show the basic usage of the # registration. # -fd = skfda.datasets.make_multimodal_samples(n_modes=2, stop=4, random_state=1) +fd = make_multimodal_samples(n_modes=2, stop=4, random_state=1) fd.plot() ############################################################################### # The following figure shows the -# :func:`~skfda.preprocessing.registration.elastic_mean` of the +# :func:`~skfda.preprocessing.registration.elastic.elastic_mean` of the # dataset and the cross-sectional mean, which correspond to the karcher-mean # under the :math:`\mathbb{L}^2` distance. # @@ -41,17 +45,18 @@ # curves compared to the standard mean, since it is not affected by the # deformations of the curves. + fig = fd.mean().plot(label="L2 mean") -skfda.preprocessing.registration.elastic_mean( - fd).plot(fig=fig, label="Elastic mean") +elastic_mean(fd).plot(fig=fig, label="Elastic mean") fig.legend() -fig ############################################################################## # In this case, the alignment completely reduces the amplitude variability # between the samples, aligning the maximum points correctly. -fd_align = skfda.preprocessing.registration.elastic_registration(fd) +elastic_registration = ElasticRegistration() + +fd_align = elastic_registration.fit_transform(fd) fd_align.plot() @@ -66,13 +71,13 @@ # # First we show the original curves: -growth = skfda.datasets.fetch_growth() +growth = fetch_growth() # Select only one sex fd = growth['data'][growth['target'] == 0] # Obtain velocity curves -fd.interpolator = skfda.representation.interpolation.SplineInterpolator(3) +fd.interpolation = skfda.representation.interpolation.SplineInterpolation(3) fd = fd.to_grid(np.linspace(*fd.domain_range[0], 200)).derivative() fd = fd.to_grid(np.linspace(*fd.domain_range[0], 50)) fd.plot() @@ -80,8 +85,8 @@ ############################################################################## # We now show the aligned curves: -fd_align = skfda.preprocessing.registration.elastic_registration(fd) -fd_align.dataset_label += " - aligned" +fd_align = elastic_registration.fit_transform(fd) +fd_align.dataset_name += " - aligned" fd_align.plot() diff --git a/examples/plot_explore.py b/examples/plot_explore.py index 0b86c21db..035d502b5 100644 --- a/examples/plot_explore.py +++ b/examples/plot_explore.py @@ -9,9 +9,10 @@ # Author: Miguel Carbajo Berrocal # License: MIT -import numpy as np import skfda +import numpy as np + ############################################################################## # In this example we are going to explore the functional properties of the @@ -32,12 +33,13 @@ # the rest. low_fat = fat < 20 -labels = np.zeros(fd.n_samples, dtype=int) -labels[low_fat] = 1 -colors = ['red', 'blue'] +labels = np.full(fd.n_samples, 'high fat') +labels[low_fat] = 'low fat' +colors = {'high fat': 'red', + 'low fat': 'blue'} -fig = fd.plot(sample_labels=labels, label_colors=colors, - linewidth=0.5, alpha=0.7) +fig = fd.plot(group=labels, group_colors=colors, + linewidth=0.5, alpha=0.7, legend=True) ############################################################################## # The means of each group are the following ones. @@ -47,9 +49,9 @@ means = mean_high.concatenate(mean_low) -means.dataset_label = fd.dataset_label + ' - means' -means.plot(sample_labels=[0, 1], label_colors=colors, - linewidth=0.5) +means.dataset_name = fd.dataset_name + ' - means' +means.plot(group=['high fat', 'low fat'], group_colors=colors, + linewidth=0.5, legend=True) ############################################################################## # In this dataset, the vertical shift in the original trajectories is not @@ -59,12 +61,12 @@ # # The first derivative is shown below: -fdd = fd.derivative(1) -fig = fdd.plot(sample_labels=labels, label_colors=colors, - linewidth=0.5, alpha=0.7) +fdd = fd.derivative() +fig = fdd.plot(group=labels, group_colors=colors, + linewidth=0.5, alpha=0.7, legend=True) ############################################################################## # We now show the second derivative: -fdd = fd.derivative(2) -fig = fdd.plot(sample_labels=labels, label_colors=colors, - linewidth=0.5, alpha=0.7) +fdd = fd.derivative(order=2) +fig = fdd.plot(group=labels, group_colors=colors, + linewidth=0.5, alpha=0.7, legend=True) diff --git a/examples/plot_extrapolation.py b/examples/plot_extrapolation.py index 1bde81622..afab1caf4 100644 --- a/examples/plot_extrapolation.py +++ b/examples/plot_extrapolation.py @@ -10,11 +10,12 @@ # sphinx_gallery_thumbnail_number = 2 +import skfda + import mpl_toolkits.mplot3d import matplotlib.pyplot as plt import numpy as np -import skfda ############################################################################## @@ -41,16 +42,16 @@ # fdgrid = skfda.datasets.make_sinusoidal_process( n_samples=2, error_std=0, random_state=0) -fdgrid.dataset_label = "Grid" +fdgrid.dataset_name = "Grid" fd_fourier = fdgrid.to_basis(skfda.representation.basis.Fourier()) -fd_fourier.dataset_label = "Fourier Basis" +fd_fourier.dataset_name = "Fourier Basis" fd_monomial = fdgrid.to_basis(skfda.representation.basis.Monomial(n_basis=5)) -fd_monomial.dataset_label = "Monomial Basis" +fd_monomial.dataset_name = "Monomial Basis" fd_bspline = fdgrid.to_basis(skfda.representation.basis.BSpline(n_basis=5)) -fd_bspline.dataset_label = "BSpline Basis" +fd_bspline.dataset_name = "BSpline Basis" # Plot of diferent representations @@ -65,7 +66,7 @@ ax[0][1].set_xticks([]) # Clear title for next plots -fdgrid.dataset_label = "" +fdgrid.dataset_name = "" ############################################################################## @@ -120,11 +121,11 @@ t = np.linspace(*domain_extended) fig = plt.figure() -fdgrid.dataset_label = "Periodic extrapolation" +fdgrid.dataset_name = "Periodic extrapolation" # Evaluation of the grid # Extrapolation supplied in the evaluation -values = fdgrid(t, extrapolation="periodic") +values = fdgrid(t, extrapolation="periodic")[..., 0] plt.plot(t, values.T, linestyle='--') @@ -140,13 +141,13 @@ # fig = plt.figure() -fdgrid.dataset_label = "Boundary extrapolation" +fdgrid.dataset_name = "Boundary extrapolation" # Other way to call the extrapolation, changing the default value fdgrid.extrapolation = "bounds" # Evaluation of the grid -values = fdgrid(t) +values = fdgrid(t)[..., 0] plt.plot(t, values.T, linestyle='--') plt.gca().set_prop_cycle(None) # Reset color cycle @@ -162,7 +163,7 @@ # ``extrapolation=FillExtrapolation(0)``. # -fdgrid.dataset_label = "Fill with zeros" +fdgrid.dataset_name = "Fill with zeros" # Evaluation of the grid filling with zeros fdgrid.extrapolation = "zeros" @@ -218,7 +219,7 @@ T, S = np.meshgrid(t, t) -ax.plot_wireframe(T, S, values[0], alpha=.3, color="C0") +ax.plot_wireframe(T, S, values[0, ..., 0], alpha=.3, color="C0") ax.plot_surface(X, Y, Z, color="C0") ############################################################################### @@ -231,7 +232,7 @@ fig = plt.figure() ax = fig.add_subplot(111, projection='3d') -ax.plot_wireframe(T, S, values[0], alpha=.3, color="C0") +ax.plot_wireframe(T, S, values[0, ..., 0], alpha=.3, color="C0") ax.plot_surface(X, Y, Z, color="C0") ############################################################################### @@ -243,5 +244,5 @@ fig = plt.figure() ax = fig.add_subplot(111, projection='3d') -ax.plot_wireframe(T, S, values[0], alpha=.3, color="C0") +ax.plot_wireframe(T, S, values[0, ..., 0], alpha=.3, color="C0") ax.plot_surface(X, Y, Z, color="C0") diff --git a/examples/plot_fpca.py b/examples/plot_fpca.py new file mode 100644 index 000000000..460a1db7c --- /dev/null +++ b/examples/plot_fpca.py @@ -0,0 +1,108 @@ +""" +Functional Principal Component Analysis +======================================= + +Explores the two possible ways to do functional principal component analysis. +""" + +# Author: Yujian Hong +# License: MIT + +import skfda +from skfda.datasets import fetch_growth +from skfda.exploratory.visualization import plot_fpca_perturbation_graphs +from skfda.preprocessing.dim_reduction.projection import FPCA +from skfda.representation.basis import BSpline, Fourier, Monomial + +import matplotlib.pyplot as plt +import numpy as np + + +############################################################################## +# In this example we are going to use functional principal component analysis to +# explore datasets and obtain conclusions about said dataset using this +# technique. +# +# First we are going to fetch the Berkeley Growth Study data. This dataset +# correspond to the height of several boys and girls measured from birth to +# when they are 18 years old. The number and time of the measurements are the +# same for each individual. To better understand the data we plot it. +dataset = skfda.datasets.fetch_growth() +fd = dataset['data'] +y = dataset['target'] +fd.plot() + +############################################################################## +# FPCA can be done in two ways. The first way is to operate directly with the +# raw data. We call it discretized FPCA as the functional data in this case +# consists in finite values dispersed over points in a domain range. +# We initialize and setup the FPCADiscretized object and run the fit method to +# obtain the first two components. By default, if we do not specify the number +# of components, it's 3. Other parameters are weights and centering. For more +# information please visit the documentation. +fpca_discretized = FPCA(n_components=2) +fpca_discretized.fit(fd) +fpca_discretized.components_.plot() + +############################################################################## +# In the second case, the data is first converted to use a basis representation +# and the FPCA is done with the basis representation of the original data. +# We obtain the same dataset again and transform the data to a basis +# representation. This is because the FPCA module modifies the original data. +# We also plot the data for better visual representation. +dataset = fetch_growth() +fd = dataset['data'] +basis = skfda.representation.basis.BSpline(n_basis=7) +basis_fd = fd.to_basis(basis) +basis_fd.plot() + +############################################################################## +# We initialize the FPCABasis object and run the fit function to obtain the +# first 2 principal components. By default the principal components are +# expressed in the same basis as the data. We can see that the obtained result +# is similar to the discretized case. +fpca = FPCA(n_components=2) +fpca.fit(basis_fd) +fpca.components_.plot() + +############################################################################## +# To better illustrate the effects of the obtained two principal components, +# we add and subtract a multiple of the components to the mean function. +# We can then observe now that this principal component represents the +# variation in the mean growth between the children. +# The second component is more interesting. The most appropriate explanation is +# that it represents the differences between girls and boys. Girls tend to grow +# faster at an early age and boys tend to start puberty later, therefore, their +# growth is more significant later. Girls also stop growing early + +plot_fpca_perturbation_graphs(basis_fd.mean(), + fpca.components_, + 30, + fig=plt.figure(figsize=(6, 2 * 4))) + +############################################################################## +# We can also specify another basis for the principal components as argument +# when creating the FPCABasis object. For example, if we use the Fourier basis +# for the obtained principal components we can see that the components are +# periodic. This example is only to illustrate the effect. In this dataset, as +# the functions are not periodic it does not make sense to use the Fourier +# basis +dataset = fetch_growth() +fd = dataset['data'] +basis_fd = fd.to_basis(BSpline(n_basis=7)) +fpca = FPCA(n_components=2, components_basis=Fourier(n_basis=7)) +fpca.fit(basis_fd) +fpca.components_.plot() + +############################################################################## +# We can observe that if we switch to the Monomial basis, we also lose the +# key features of the first principal components because it distorts the +# principal components, adding extra maximums and minimums. Therefore, in this +# case the best option is to use the BSpline basis as the basis for the +# principal components +dataset = fetch_growth() +fd = dataset['data'] +basis_fd = fd.to_basis(BSpline(n_basis=7)) +fpca = FPCA(n_components=2, components_basis=Monomial(n_basis=4)) +fpca.fit(basis_fd) +fpca.components_.plot() diff --git a/examples/plot_interpolation.py b/examples/plot_interpolation.py index 686c3f627..1c3abf7fc 100644 --- a/examples/plot_interpolation.py +++ b/examples/plot_interpolation.py @@ -11,12 +11,13 @@ # sphinx_gallery_thumbnail_number = 3 +import skfda +from skfda.representation.interpolation import SplineInterpolation + from mpl_toolkits.mplot3d import axes3d import matplotlib.pyplot as plt import numpy as np -import skfda -from skfda.representation.interpolation import SplineInterpolator ############################################################################## @@ -44,14 +45,14 @@ ############################################################################## # The interpolation method of the FDataGrid could be changed setting the -# attribute ``interpolator``. Once we have set an interpolator it is used for +# attribute ``interpolation``. Once we have set an interpolation it is used for # the evaluation of the object. # -# Polynomial spline interpolation could be performed using the interpolator -# :class:`~skfda.representation.interpolation.SplineInterpolator`. In the -# following example a cubic interpolator is set. +# Polynomial spline interpolation could be performed using the interpolation +# :class:`~skfda.representation.interpolation.SplineInterpolation. In the +# following example a cubic interpolation is set. -fd.interpolator = SplineInterpolator(interpolation_order=3) +fd.interpolation = SplineInterpolation(interpolation_order=3) fig = fd.plot() fd.scatter(fig=fig) @@ -59,63 +60,27 @@ ############################################################################## # Smooth interpolation could be performed with the attribute -# ``smoothness_parameter`` of the spline interpolator. +# ``smoothness_parameter`` of the spline interpolation. # # Sample with noise fd_smooth = skfda.datasets.make_sinusoidal_process(n_samples=1, n_features=30, random_state=1, error_std=.3) -# Cubic interpolator -fd_smooth.interpolator = SplineInterpolator(interpolation_order=3) +# Cubic interpolation +fd_smooth.interpolation = SplineInterpolation(interpolation_order=3) fig = fd_smooth.plot(label="Cubic") # Smooth interpolation -fd_smooth.interpolator = SplineInterpolator(interpolation_order=3, - smoothness_parameter=1.5) +fd_smooth.interpolation = SplineInterpolation(interpolation_order=3, + smoothness_parameter=1.5) fd_smooth.plot(fig=fig, label="Cubic smoothed") fd_smooth.scatter(fig=fig) fig.legend() - -############################################################################## -# It is possible to evaluate derivatives of the FDatagrid, -# but due to the fact that interpolation is performed first, the interpolation -# loses one degree for each order of derivation. In the next example, it is -# shown the first derivative of a sample using interpolation with different -# degrees. -# - -fd = fd[1] - -fig = plt.figure() -fig.add_subplot(1, 1, 1) - -for i in range(1, 4): - fd.interpolator = SplineInterpolator(interpolation_order=i) - fd.plot(fig=fig, derivative=1, label=f"Degree {i}") - -fig.legend() - -############################################################################## -# FDataGrids can be differentiate using lagged differences with the -# method :func:`~skfda.representation.grid.FDataGrid.derivative`, creating -# another FDataGrid which could be interpolated in order to avoid -# interpolating before differentiating. -# - -fd_derivative = fd.derivative() - -fig = fd_derivative.plot(label="Differentiation first") -fd_derivative.scatter(fig=fig) - -fd.plot(fig=fig, derivative=1, label="Interpolation first") - -fig.legend() - ############################################################################## # Sometimes our samples are required to be monotone, in these cases it is # possible to use monotone cubic interpolation with the attribute @@ -123,6 +88,7 @@ # will be used. # +fd = fd[1] fd_monotone = fd.copy(data_matrix=np.sort(fd.data_matrix, axis=1)) @@ -130,15 +96,15 @@ fig = fd_monotone.plot(linestyle='--', label="cubic") -fd_monotone.interpolator = SplineInterpolator(interpolation_order=3, - monotone=True) +fd_monotone.interpolation = SplineInterpolation(interpolation_order=3, + monotone=True) fd_monotone.plot(fig=fig, label="PCHIP") fd_monotone.scatter(fig=fig, c='C1') fig.legend() ############################################################################## -# All the interpolators will work regardless of the dimension of the image, but +# All the interpolations will work regardless of the dimension of the image, but # depending on the domain dimension some methods will not be available. # # For the next examples it is constructed a surface, :math:`x_i: \mathbb{R}^2 @@ -160,38 +126,28 @@ # In the following figure it is shown the result of the cubic interpolation # applied to the surface. # -# The degree of the interpolator polynomial does not have to coincide in both +# The degree of the interpolation polynomial does not have to coincide in both # directions, for example, cubic interpolation in the first # component and quadratic in the second one could be defined using a tuple with # the values (3,2). # - -fd.interpolator = SplineInterpolator(interpolation_order=3) +fd.interpolation = SplineInterpolation(interpolation_order=3) fig = fd.plot() fd.scatter(fig=fig) -############################################################################## -# In case of surface derivatives could be taked in two directions, for this -# reason a tuple with the order of derivates in each direction could be passed. -# Let :math:`x(t,s)` be the surface, in the following example it is shown the -# derivative with respect to the second coordinate, :math:`\frac{\partial} -# {\partial s}x(t,s)`. - -fd.plot(derivative=(0, 1)) - ############################################################################## # The following table shows the interpolation methods available by the class -# :class:`SplineInterpolator` depending on the domain dimension. +# :class:`SplineInterpolation` depending on the domain dimension. # -# +------------------+--------+----------------+----------+-------------+-------------+ -# | Domain dimension | Linear | Up to degree 5 | Monotone | Derivatives | Smoothing | -# +==================+========+================+==========+=============+=============+ -# | 1 | ✔ | ✔ | ✔ | ✔ | ✔ | -# +------------------+--------+----------------+----------+-------------+-------------+ -# | 2 | ✔ | ✔ | ✖ | ✔ | ✔ | -# +------------------+--------+----------------+----------+-------------+-------------+ -# | 3 or more | ✔ | ✖ | ✖ | ✖ | ✖ | -# +------------------+--------+----------------+----------+-------------+-------------+ +# +------------------+--------+----------------+----------+-------------+ +# | Domain dimension | Linear | Up to degree 5 | Monotone | Smoothing | +# +==================+========+================+==========+=============+ +# | 1 | ✔ | ✔ | ✔ | ✔ | +# +------------------+--------+----------------+----------+-------------+ +# | 2 | ✔ | ✔ | ✖ | ✔ | +# +------------------+--------+----------------+----------+-------------+ +# | 3 or more | ✔ | ✖ | ✖ | ✖ | +# +------------------+--------+----------------+----------+-------------+ # diff --git a/examples/plot_k_neighbors_classification.py b/examples/plot_k_neighbors_classification.py index 1b4d2d415..296273928 100644 --- a/examples/plot_k_neighbors_classification.py +++ b/examples/plot_k_neighbors_classification.py @@ -37,7 +37,7 @@ class_names = data['target_names'] # Plot samples grouped by sex -X.plot(sample_labels=y, label_names=class_names, label_colors=['C0', 'C1']) +X.plot(group=y, group_names=class_names) ############################################################################## diff --git a/examples/plot_landmark_shift.py b/examples/plot_landmark_shift.py index c38961244..bc1d47f84 100644 --- a/examples/plot_landmark_shift.py +++ b/examples/plot_landmark_shift.py @@ -34,7 +34,7 @@ # associate with a specific argument value t. These are typically maxima, # minima, or zero crossings of curves, and may be identified at the level of # some derivatives as well as at the level of the curves themselves -# [RaSi2005]_. +# [RaSi2005-2]_. # # For alignment we need to know in advance the location of the landmark of # each of the samples, in our case it will correspond to the maxima of each @@ -126,6 +126,6 @@ plt.show() ############################################################################### -# .. [RaSi2005] Ramsay, J., Silverman, B. W. (2005). Functional Data Analysis. +# .. [RaSi2005-2] Ramsay, J., Silverman, B. W. (2005). Functional Data Analysis. # Springer. # diff --git a/examples/plot_magnitude_shape.py b/examples/plot_magnitude_shape.py index 15c791c61..bda1e6f04 100644 --- a/examples/plot_magnitude_shape.py +++ b/examples/plot_magnitude_shape.py @@ -37,9 +37,9 @@ nlabels = len(label_names) label_colors = colormap(np.arange(nlabels) / (nlabels - 1)) -fd_temperatures.plot(sample_labels=dataset["target"], - label_colors=label_colors, - label_names=label_names) +fd_temperatures.plot(group=dataset["target"], + group_colors=label_colors, + group_names=label_names) ############################################################################## # The MS-Plot is generated. In order to show the results, the @@ -62,9 +62,9 @@ # To show the utility of the plot, the curves are plotted according to the # distinction made by the MS-Plot (outliers or not) with the same colors. -fd_temperatures.plot(sample_labels=msplot.outliers.astype(int), - label_colors=msplot.colormap([color, outliercol]), - label_names=['nonoutliers', 'outliers']) +fd_temperatures.plot(group=msplot.outliers.astype(int), + group_colors=msplot.colormap([color, outliercol]), + group_names=['nonoutliers', 'outliers']) ############################################################################## # We can observe that most of the curves pointed as outliers belong either to @@ -118,5 +118,5 @@ ############################################################################## # We now plot the curves with their corresponding color: -fd_temperatures.plot(sample_labels=labels, - label_colors=colormap([color, outliercol, 0.9])) +fd_temperatures.plot(group=labels, + group_colors=colormap([color, outliercol, 0.9])) diff --git a/examples/plot_magnitude_shape_synthetic.py b/examples/plot_magnitude_shape_synthetic.py index 7f2b18725..4242bc9f5 100644 --- a/examples/plot_magnitude_shape_synthetic.py +++ b/examples/plot_magnitude_shape_synthetic.py @@ -76,8 +76,8 @@ # The data is plotted to show the curves we are working with. labels = [0] * n_samples + [1] * 6 -fd.plot(sample_labels=labels, - label_colors=['lightgrey', 'black']) +fd.plot(group=labels, + group_colors=['lightgrey', 'black']) ############################################################################## # The MS-Plot is generated. In order to show the results, the @@ -96,8 +96,8 @@ colors = ['lightgrey', 'orange', 'blue', 'black', 'green', 'brown', 'lightblue'] -fd.plot(sample_labels=labels, - label_colors=colors) +fd.plot(group=labels, + group_colors=colors) ############################################################################## # We now show the points in the MS-plot using the same colors diff --git a/examples/plot_oneway.py b/examples/plot_oneway.py new file mode 100644 index 000000000..06d8b68b1 --- /dev/null +++ b/examples/plot_oneway.py @@ -0,0 +1,118 @@ +""" +One-way functional ANOVA with real data +======================================= + +This example shows how to perform a functional one-way ANOVA test using a +real dataset. +""" + +# Author: David García Fernández +# License: MIT + +# sphinx_gallery_thumbnail_number = 4 + +import skfda +from skfda.inference.anova import oneway_anova +from skfda.representation import FDataGrid, FDataBasis +from skfda.representation.basis import Fourier + +################################################################################ +# *One-way ANOVA* (analysis of variance) is a test that can be used to +# compare the means of different samples of data. +# Let :math:`X_{ij}(t), j=1, \dots, n_i` be trajectories corresponding to +# :math:`k` independent samples :math:`(i=1,\dots,k)` and let :math:`E(X_i(t)) = +# m_i(t)`. Thus, the null hypothesis in the statistical test is: +# +# .. math:: +# H_0: m_1(t) = \dots = m_k(t) +# +# To illustrate this functionality we are going to explore the data available +# in GAIT dataset from *fda* R library. This dataset compiles a set of angles +# of hips and knees from 39 different boys in a 20 point movement cycle. +dataset = skfda.datasets.fetch_gait() +fd_hip = dataset['data'].coordinates[0] +fd_knee = dataset['data'].coordinates[1].to_basis(Fourier(n_basis=10)) + +################################################################################ +# Let's start with the first feature, the angle of the hip. The sample +# consists in 39 different trajectories, each representing the movement of the +# hip of each of the boys studied. +fig = fd_hip.plot() + +############################################################################### +# The example is going to be divided in three different groups. Then we are +# going to apply the ANOVA procedure to this groups to test if the means of this +# three groups are equal or not. + +fd_hip1 = fd_hip[0:13] +fd_hip2 = fd_hip[13:26] +fd_hip3 = fd_hip[26:39] +fd_hip.plot(group=[0 if i < 13 else 1 if i < 26 else 39 for i in range(39)]) + +means = [fd_hip1.mean(), fd_hip2.mean(), fd_hip3.mean()] +fd_means = skfda.concatenate(means) +fig = fd_means.plot() + +############################################################################### +# At this point is time to perform the *ANOVA* test. This functionality is +# implemented in the function :func:`~skfda.inference.anova.oneway_anova`. As +# it consists in an asymptotic method it is possible to set the number of +# simulations necessary to approximate the result of the statistic. It is +# possible to set the :math:`p` of the :math:`L_p` norm used in the +# calculations (defaults 2). + +v_n, p_val = oneway_anova(fd_hip1, fd_hip2, fd_hip3) + +################################################################################ +# The function returns first the statistic :func:`~skfda.inference.anova +# .v_sample_stat` used to measure the variability between groups, +# second the *p-value* of the test . For further information visit +# :func:`~skfda.inference.anova.oneway_anova` and [1]. + +print('Statistic: ', v_n) +print('p-value: ', p_val) + +################################################################################ +# This was the simplest way to call this function. Let's see another example, +# this time using knee angles, this time with data in basis representation. +fig = fd_knee.plot() + +################################################################################ +# The same procedure as before is followed to prepare the data. + +fd_knee1 = fd_knee[0:13] +fd_knee2 = fd_knee[13:26] +fd_knee3 = fd_knee[26:39] +fd_knee.plot(group=[0 if i < 13 else 1 if i < 26 else 39 for i in range(39)]) + +means = [fd_knee1.mean(), fd_knee2.mean(), fd_knee3.mean()] +fd_means = skfda.concatenate(means) +fig = fd_means.plot() + +################################################################################ +# In this case the optional arguments of the function are going to be set. +# First, there is a `n_reps` parameter, which allows the user to select the +# number of simulations to perform in the asymptotic procedure of the test ( +# see :func:`~skfda.inference.anova.oneway_anova`), defaults to 2000. +# +# Also there is a `p` parameter to choose the :math:`p` of the +# :math:`L_p` norm used in the calculations (defaults 2). +# +# Finally we can set to True the flag `dist` which allows the function to +# return a third value. This third return value corresponds to the +# sampling distribution of the statistic which is compared with the first +# return to get the *p-value*. + +v_n, p_val, dist = oneway_anova(fd_knee1, fd_knee2, fd_knee3, n_reps=1500, + return_dist=True) + +print('Statistic: ', v_n) +print('p-value: ', p_val) +print('Distribution: ', dist) + +################################################################################ +# **References:** +# +# [1] Antonio Cuevas, Manuel Febrero-Bande, and Ricardo Fraiman. "An anova test +# for functional data". *Computational Statistics Data Analysis*, +# 47:111-112, 02 2004 diff --git a/examples/plot_oneway_synthetic.py b/examples/plot_oneway_synthetic.py new file mode 100644 index 000000000..2d210d08a --- /dev/null +++ b/examples/plot_oneway_synthetic.py @@ -0,0 +1,135 @@ +""" +One-way functional ANOVA with synthetic data +============================================ + +This example shows how to perform a functional one-way ANOVA test with +synthetic data. +""" + +# Author: David García Fernández +# License: MIT + + +from skfda.datasets import make_gaussian_process +from skfda.inference.anova import oneway_anova +from skfda.misc.covariances import WhiteNoise +from skfda.representation import FDataGrid + +import numpy as np + + +########################################################################## +# *One-way ANOVA* (analysis of variance) is a test that can be used to +# compare the means of different samples of data. +# Let :math:`X_{ij}(t), j=1, \dots, n_i` be trajectories corresponding to +# :math:`k` independent samples :math:`(i=1,\dots,k)` and let :math:`E(X_i(t)) = +# m_i(t)`. Thus, the null hypothesis in the statistical test is: +# +# .. math:: +# H_0: m_1(t) = \dots = m_k(t) +# +# In this example we will explain the nature of ANOVA method and its behavior +# under certain conditions simulating data. Specifically, we will generate +# three different trajectories, for each one we will simulate a stochastic +# process by adding to them white noise. The main objective of the +# test is to illustrate the differences in the results of the ANOVA method +# when the covariance function of the brownian processes changes. +########################################################################## +# First, the means for the future processes are drawn. +n_samples = 10 +n_features = 100 +n_groups = 3 +start = 0 +stop = 1 + +t = np.linspace(start, stop, n_features) + +m1 = t * (1 - t) ** 5 +m2 = t ** 2 * (1 - t) ** 4 +m3 = t ** 3 * (1 - t) ** 3 + +_ = FDataGrid([m1, m2, m3], + dataset_name="Means to be used in the simulation").plot() + +########################################################################## +# A total of `n_samples` trajectories will be created for each mean, so a array +# of labels is created to identify them when plotting. + +groups = np.full(n_samples * n_groups, 'Sample 1') +groups[10:20] = 'Sample 2' +groups[20:] = 'Sample 3' + +############################################################################### +# First simulation uses a low :math:`\sigma^2 = 0.01` value. In this case the +# differences between the means of each group should be clear, and the +# p-value for the test should be near to zero. + +sigma2 = 0.01 +cov = WhiteNoise(variance=sigma2) + +fd1 = make_gaussian_process(n_samples, mean=m1, cov=cov, + n_features=n_features, random_state=1, start=start, + stop=stop) +fd2 = make_gaussian_process(n_samples, mean=m2, cov=cov, + n_features=n_features, random_state=2, start=start, + stop=stop) +fd3 = make_gaussian_process(n_samples, mean=m3, cov=cov, + n_features=n_features, random_state=3, start=start, + stop=stop) +stat, p_val = oneway_anova(fd1, fd2, fd3, random_state=4) +print("Statistic: {:.3f}".format(stat)) +print("p-value: {:.3f}".format(p_val)) + + +########################################################################## +# In the following, the same process will be followed incrementing sigma +# value, this way the differences between the averages of each group will be +# lower and the p-values will increase (the null hypothesis will be harder to +# refuse). + +########################################################################## +# Plot for :math:`\sigma^2 = 0.1`: +sigma2 = 0.1 +cov = WhiteNoise(variance=sigma2) + +fd1 = make_gaussian_process(n_samples, mean=m1, cov=cov, + n_features=n_features, random_state=1, start=t[0], + stop=t[-1]) +fd2 = make_gaussian_process(n_samples, mean=m2, cov=cov, + n_features=n_features, random_state=2, start=t[0], + stop=t[-1]) +fd3 = make_gaussian_process(n_samples, mean=m3, cov=cov, + n_features=n_features, random_state=3, start=t[0], + stop=t[-1]) + +stat, p_val = oneway_anova(fd1, fd2, fd3, random_state=4) +print("Statistic: {:.3f}".format(stat)) +print("p-value: {:.3f}".format(p_val)) + + +########################################################################## +# Plot for :math:`\sigma^2 = 1`: + +sigma2 = 1 +cov = WhiteNoise(variance=sigma2) + +fd1 = make_gaussian_process(n_samples, mean=m1, cov=cov, + n_features=n_features, random_state=1, start=t[0], + stop=t[-1]) +fd2 = make_gaussian_process(n_samples, mean=m2, cov=cov, + n_features=n_features, random_state=2, start=t[0], + stop=t[-1]) +fd3 = make_gaussian_process(n_samples, mean=m3, cov=cov, + n_features=n_features, random_state=3, start=t[0], + stop=t[-1]) + +stat, p_val = oneway_anova(fd1, fd2, fd3, random_state=4) +print("Statistic: {:.3f}".format(stat)) +print("p-value: {:.3f}".format(p_val)) + +########################################################################## +# **References:** +# +# [1] Antonio Cuevas, Manuel Febrero-Bande, and Ricardo Fraiman. "An anova test +# for functional data". *Computational Statistics Data Analysis*, +# 47:111-112, 02 2004 diff --git a/examples/plot_pairwise_alignment.py b/examples/plot_pairwise_alignment.py index 1bf27c482..63f919b98 100644 --- a/examples/plot_pairwise_alignment.py +++ b/examples/plot_pairwise_alignment.py @@ -16,6 +16,8 @@ import numpy as np import skfda +from skfda.preprocessing.registration import ElasticRegistration, invert_warping +from skfda.datasets import make_multimodal_samples ############################################################################## # Given any two functions :math:`f` and :math:`g`, we define their @@ -38,47 +40,45 @@ # Due to the similarity of these curves can be aligned almost perfectly # between them. # + # Samples with modes in 1/3 and 2/3 -fd = skfda.datasets.make_multimodal_samples( - n_samples=2, modes_location=[1 / 3, 2 / 3], - random_state=1, start=0, mode_std=.01) +fd = make_multimodal_samples(n_samples=2, modes_location=[1 / 3, 2 / 3], + random_state=1, start=0, mode_std=.01) fig = fd.plot() fig.axes[0].legend(['$f$', '$g$']) -fig ############################################################################## # In this example :math:`g` will be used as template and :math:`f` will be # aligned to it. In the following figure it is shown the result of the # registration process, wich can be computed using -# :func:`~skfda.preprocessing.registration.elastic_registration`. +# :class:`~skfda.preprocessing.registration.ElasticRegistration`. # f, g = fd[0], fd[1] +elastic_registration = ElasticRegistration(template=g) + + # Aligns f to g -fd_align = skfda.preprocessing.registration.elastic_registration(f, g) +f_align = elastic_registration.fit_transform(f) fig = fd.plot() -fd_align.plot(fig=fig, color='C0', linestyle='--') +f_align.plot(fig=fig, color='C0', linestyle='--') # Legend fig.axes[0].legend(['$f$', '$g$', '$f \\circ \\gamma $']) -fig ############################################################################## # The non-linear transformation :math:`\gamma` applied to :math:`f` in -# the alignment can be obtained using -# :func:`~skfda.preprocessing.registration.elastic_registration_warping`. +# the alignment is stored in the attribute `warping_`. # -# Warping to align f to g -warping = skfda.preprocessing.registration.elastic_registration_warping(f, g) - -# Warping used +# Warping used in the last transformation +warping = elastic_registration.warping_ fig = warping.plot() # Plot identity @@ -97,7 +97,7 @@ # function. # -warping_inverse = skfda.preprocessing.registration.invert_warping(warping) +warping_inverse = invert_warping(warping) fig = fd.plot(label='$f$') g.compose(warping_inverse).plot(fig=fig, color='C1', linestyle='--') @@ -106,9 +106,6 @@ # Legend fig.axes[0].legend(['$f$', '$g$', '$g \\circ \\gamma^{-1} $']) -fig - - ############################################################################## # The amount of deformation used in the registration can be controlled by # using a variation of the metric with a penalty term @@ -120,19 +117,20 @@ # # Values of lambda -lambdas = np.linspace(0, .2, 20) +penalties = np.linspace(0, .2, 20) # Creation of a color gradient cmap = clr.LinearSegmentedColormap.from_list('custom cmap', ['C1', 'C0']) -color = cmap(.2 + 3 * lambdas) +color = cmap(.2 + 3 * penalties) fig = plt.figure() ax = fig.add_subplot(1, 1, 1) -for lam, c in zip(lambdas, color): - # Plots result of alignment - skfda.preprocessing.registration.elastic_registration( - f, g, lam=lam).plot(fig=fig, color=c) + +for penalty, c in zip(penalties, color): + + elastic_registration.set_params(penalty=penalty) + elastic_registration.transform(f).plot(fig, color=c) f.plot(fig=fig, color='C0', linewidth=2., label='$f$') @@ -142,7 +140,6 @@ fig.axes[0].legend() - ############################################################################## # This phenomenon of loss of elasticity is clearly observed in # the warpings used, since as the term of penalty increases, the functions @@ -152,9 +149,10 @@ fig = plt.figure() ax = fig.add_subplot(1, 1, 1) -for lam, c in zip(lambdas, color): - skfda.preprocessing.registration.elastic_registration_warping( - f, g, lam=lam).plot(fig=fig, color=c) +for penalty, c in zip(penalties, color): + elastic_registration.set_params(penalty=penalty) + elastic_registration.transform(f) + elastic_registration.warping_.plot(fig, color=c) # Plots identity fig.axes[0].plot(t, t, color='C0', linestyle="--") @@ -198,7 +196,9 @@ # # Registration of the sets -fd_registered = skfda.preprocessing.registration.elastic_registration(fd, g) +elastic_registration = ElasticRegistration(template=g) + +fd_registered = elastic_registration.fit_transform(fd) # Plot of the curves fig = fd.plot(color="C0", label="$f_i$") diff --git a/examples/plot_radius_neighbors_classification.py b/examples/plot_radius_neighbors_classification.py index 8591fe39f..4c07289d1 100644 --- a/examples/plot_radius_neighbors_classification.py +++ b/examples/plot_radius_neighbors_classification.py @@ -40,7 +40,7 @@ y = np.array(15 * [0] + 15 * [1]) # Plot toy dataset -X.plot(sample_labels=y, label_colors=['C0', 'C1']) +X.plot(group=y, group_colors=['C0', 'C1']) ############################################################################## # @@ -69,7 +69,7 @@ radius = 0.3 sample = X_test[0] # Center of the ball -fig = X_train.plot(sample_labels=y_train, label_colors=['C0', 'C1']) +fig = X_train.plot(group=y_train, group_colors=['C0', 'C1']) # Plot ball sample.plot(fig=fig, color='red', linewidth=3) diff --git a/examples/plot_representation.py b/examples/plot_representation.py index 1aa2de55f..1763c4887 100644 --- a/examples/plot_representation.py +++ b/examples/plot_representation.py @@ -9,8 +9,9 @@ # License: MIT import skfda +from skfda.representation.interpolation import SplineInterpolation + import skfda.representation.basis as basis -from skfda.representation.interpolation import SplineInterpolator ############################################################################## @@ -21,14 +22,13 @@ # Growth Study. This dataset correspond to the height of several boys and # girls measured until the 18 years of age. The number and times of the # measurements are the same for each individual. - dataset = skfda.datasets.fetch_growth() fd = dataset['data'] y = dataset['target'] print(repr(fd)) -fd.plot(sample_labels=y, label_colors=['red', 'blue']) +fd.plot(group=y, group_colors=['red', 'blue']) ############################################################################## # This kind of representation is a discretized representation, in which the @@ -51,7 +51,7 @@ ############################################################################## # The interpolation used can however be changed. Here, we will use an # interpolation with degree 3 splines. -first_curve.interpolator = SplineInterpolator(3) +first_curve.interpolation = SplineInterpolation(3) first_curve.plot() ############################################################################## diff --git a/examples/plot_shift_registration_basis.py b/examples/plot_shift_registration.py similarity index 64% rename from examples/plot_shift_registration_basis.py rename to examples/plot_shift_registration.py index 79dd8bdff..e4838186f 100644 --- a/examples/plot_shift_registration_basis.py +++ b/examples/plot_shift_registration.py @@ -1,6 +1,6 @@ """ -Shift Registration of basis -=========================== +Shift Registration +================== Shows the use of shift registration applied to a sinusoidal process represented in a Fourier basis. @@ -12,8 +12,10 @@ # sphinx_gallery_thumbnail_number = 3 import matplotlib.pyplot as plt -import skfda +from skfda.datasets import make_sinusoidal_process +from skfda.preprocessing.registration import ShiftRegistration +from skfda.representation.basis import Fourier ############################################################################## # In this example we will use a @@ -24,7 +26,7 @@ # # In this example we want to register the curves using a translation # and remove the phase variation to perform further analysis. -fd = skfda.datasets.make_sinusoidal_process(random_state=1) +fd = make_sinusoidal_process(random_state=1) fd.plot() @@ -32,26 +34,23 @@ # We will smooth the curves using a basis representation, which will help us # to remove the gaussian noise. Smoothing before registration # is essential due to the use of derivatives in the optimization process. -# # Because of their sinusoidal nature we will use a Fourier basis. -basis = skfda.representation.basis.Fourier(n_basis=11) -fd_basis = fd.to_basis(basis) - +fd_basis = fd.to_basis(Fourier(n_basis=11)) fd_basis.plot() ############################################################################## -# We will apply the -# :func:`~skfda.preprocessing.registration.shift_registration`, +# We will use the +# :func:`~skfda.preprocessing.registration.ShiftRegistration` transformer, # which is suitable due to the periodicity of the dataset and the small # amount of amplitude variation. - -fd_registered = skfda.preprocessing.registration.shift_registration(fd_basis) - -############################################################################## +# # We can observe how the sinusoidal pattern is easily distinguishable # once the alignment has been made. +shift_registration = ShiftRegistration() +fd_registered = shift_registration.fit_transform(fd_basis) + fd_registered.plot() ############################################################################## @@ -63,28 +62,23 @@ # curves varying their amplitude with respect to the original process, # however, this effect is mitigated after the registration. -fig = fd_basis.mean().plot() -fd_registered.mean().plot(fig=fig) - # sinusoidal process without variation and noise -sine = skfda.datasets.make_sinusoidal_process(n_samples=1, phase_std=0, - amplitude_std=0, error_std=0) +sine = make_sinusoidal_process(n_samples=1, phase_std=0, + amplitude_std=0, error_std=0) -sine.plot(fig=fig, linestyle='dashed') +fig = fd_basis.mean().plot() +fd_registered.mean().plot(fig) +sine.plot(fig, linestyle='dashed') fig.axes[0].legend(['original mean', 'registered mean', 'sine']) ############################################################################## -# The values of the shifts :math:`\delta_i` may be relevant for further -# analysis, as they may be considered as nuisance or random effects. +# The values of the shifts :math:`\delta_i`, stored in the attribute `deltas_` +# may be relevant for further analysis, as they may be considered as nuisance +# or random effects. # -deltas = skfda.preprocessing.registration.shift_registration_deltas(fd_basis) -print(deltas) +print(shift_registration.deltas_) -############################################################################## -# The aligned functions can be obtained from the :math:`\delta_i` list -# using the `shift` method. -# -fd_basis.shift(deltas).plot() +plt.show() diff --git a/examples/plot_surface_boxplot.py b/examples/plot_surface_boxplot.py index c15bc7223..d64dbb6a3 100644 --- a/examples/plot_surface_boxplot.py +++ b/examples/plot_surface_boxplot.py @@ -11,12 +11,13 @@ # sphinx_gallery_thumbnail_number = 3 -import matplotlib.pyplot as plt -import numpy as np from skfda import FDataGrid from skfda.datasets import make_gaussian_process from skfda.exploratory.visualization import SurfaceBoxplot, Boxplot +import matplotlib.pyplot as plt +import numpy as np + ############################################################################## # In order to instantiate a @@ -35,7 +36,7 @@ fd = make_gaussian_process(n_samples=n_samples, n_features=n_features, random_state=1) -fd.dataset_label = "Brownian process" +fd.dataset_name = "Brownian process" ############################################################################## # After, those values generated for one dimension on the domain are extruded @@ -50,7 +51,7 @@ fd_2 = FDataGrid(data_matrix=cube, sample_points=np.tile(fd.sample_points, (2, 1)), - dataset_label="Extruded Brownian process") + dataset_name="Extruded Brownian process") fd_2.plot() diff --git a/readthedocs-requirements.txt b/readthedocs-requirements.txt index be6a2f754..3562a73ad 100644 --- a/readthedocs-requirements.txt +++ b/readthedocs-requirements.txt @@ -1,7 +1,15 @@ --r requirements.txt +matplotlib +numpy +scipy +Cython +sklearn Sphinx sphinx_rtd_theme sphinx-gallery pillow matplotlib -mpldatacursor \ No newline at end of file +mpldatacursor +setuptools>=41.2 +multimethod>=1.2 +findiff +jupyter-sphinx \ No newline at end of file diff --git a/readthedocs.yml b/readthedocs.yml index 232af1b0b..08775369f 100644 --- a/readthedocs.yml +++ b/readthedocs.yml @@ -1,6 +1,25 @@ -build: - image: latest +# .readthedocs.yml +# Read the Docs configuration file +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details -python: - version: 3.6 +# Required +version: 2 + +# Build documentation in the docs/ directory with Sphinx +sphinx: + builder: html + configuration: docs/conf.py + +# Build documentation with MkDocs +#mkdocs: +# configuration: mkdocs.yml +# Optionally build your docs in additional formats such as PDF + +# Optionally set the version of Python and requirements required to build your docs +python: + version: 3.7 + install: + - requirements: readthedocs-requirements.txt + - method: pip + path: . \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 074553ffe..29588726f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,4 +5,5 @@ setuptools Cython sklearn mpldatacursor - +multimethod>=1.2 +findiff diff --git a/setup.py b/setup.py index 82ebce2ac..6d2ca1f3f 100644 --- a/setup.py +++ b/setup.py @@ -24,7 +24,6 @@ import sys from Cython.Build import cythonize -from Cython.Distutils import build_ext from setuptools import setup, find_packages from setuptools.extension import Extension @@ -80,15 +79,18 @@ 'Topic :: Scientific/Engineering :: Mathematics', 'Topic :: Software Development :: Libraries :: Python Modules', ], - install_requires=['numpy', + install_requires=['numpy>=1.16', 'scipy>=1.3.0', - 'scikit-learn', + 'scikit-learn>=0.20', + 'pandas', 'matplotlib', 'scikit-datasets[cran]>=0.1.24', 'rdata', - 'mpldatacursor'], + 'cython', + 'mpldatacursor', + 'multimethod>=1.2', + 'findiff'], setup_requires=pytest_runner, - tests_require=['pytest', - 'numpy>=1.14'], + tests_require=['pytest'], test_suite='tests', zip_safe=False) diff --git a/skfda/__init__.py b/skfda/__init__.py index f1354bf93..eecd71b6c 100644 --- a/skfda/__init__.py +++ b/skfda/__init__.py @@ -32,8 +32,10 @@ from .representation import FData from .representation import FDataBasis from .representation import FDataGrid +from .representation._functional_data import concatenate -from . import representation, datasets, preprocessing, exploratory, misc, ml +from . import representation, datasets, preprocessing, exploratory, misc, ml, \ + inference import os as _os diff --git a/skfda/_neighbors/__init__.py b/skfda/_neighbors/__init__.py index 58316566d..22047b996 100644 --- a/skfda/_neighbors/__init__.py +++ b/skfda/_neighbors/__init__.py @@ -3,7 +3,7 @@ - NearestNeighbors - KNeighborsClassifier - RadiusNeighborsClassifier - - NearestCentroids + - NearestCentroid - KNeighborsRegressor - RadiusNeighborsRegressor @@ -11,4 +11,4 @@ from .unsupervised import NearestNeighbors from .regression import KNeighborsRegressor, RadiusNeighborsRegressor from .classification import (KNeighborsClassifier, RadiusNeighborsClassifier, - NearestCentroids) + NearestCentroid) diff --git a/skfda/_neighbors/base.py b/skfda/_neighbors/base.py index 5e73364cd..8b2ffc76d 100644 --- a/skfda/_neighbors/base.py +++ b/skfda/_neighbors/base.py @@ -3,8 +3,8 @@ from abc import ABC, abstractmethod from sklearn.base import BaseEstimator -from sklearn.utils.validation import check_is_fitted as sklearn_check_is_fitted from sklearn.base import RegressorMixin +from sklearn.utils.validation import check_is_fitted as sklearn_check_is_fitted import numpy as np @@ -73,13 +73,13 @@ def _to_multivariate_metric(metric, sample_points): >>> fd = FDataGrid([np.ones(len(x))], x) >>> fd2 = FDataGrid([np.zeros(len(x))], x) >>> lp_distance(fd, fd2).round(2) - 1.0 + array([ 1.]) Creation of the sklearn-style metric. >>> sklearn_lp_distance = _to_multivariate_metric(lp_distance, [x]) >>> sklearn_lp_distance(np.ones(len(x)), np.zeros(len(x))).round(2) - 1.0 + array([ 1.]) """ # Shape -> (n_samples = 1, domain_dims...., image_dimension (-1)) @@ -97,11 +97,11 @@ def multivariate_metric(x, y, _check=False, **kwargs): class NeighborsBase(ABC, BaseEstimator): """Base class for nearest neighbors estimators.""" - @abstractmethod def __init__(self, n_neighbors=None, radius=None, weights='uniform', algorithm='auto', leaf_size=30, metric='l2', metric_params=None, n_jobs=None, multivariate_metric=False): + """Initializes the nearest neighbors estimator""" self.n_neighbors = n_neighbors self.radius = radius @@ -166,6 +166,7 @@ def fit(self, X, y=None): metric = lp_distance else: metric = self.metric + sklearn_metric = _to_multivariate_metric(metric, self._sample_points) else: @@ -203,7 +204,7 @@ def kneighbors(self, X=None, n_neighbors=None, return_distance=True): Indices of the nearest points in the population matrix. Examples: - Firstly, we will create a toy dataset with 2 classes + Firstly, we will create a toy dataset. >>> from skfda.datasets import make_sinusoidal_process >>> fd1 = make_sinusoidal_process(phase_std=.25, random_state=0) @@ -216,7 +217,7 @@ def kneighbors(self, X=None, n_neighbors=None, return_distance=True): >>> from skfda.ml.clustering import NearestNeighbors >>> neigh = NearestNeighbors() >>> neigh.fit(fd) - NearestNeighbors(algorithm='auto', leaf_size=30,...) + NearestNeighbors(...) Now we can query the k-nearest neighbors. @@ -260,7 +261,7 @@ def kneighbors_graph(self, X=None, n_neighbors=None, mode='connectivity'): A[i, j] is assigned the weight of edge that connects i to j. Examples: - Firstly, we will create a toy dataset with 2 classes. + Firstly, we will create a toy dataset. >>> from skfda.datasets import make_sinusoidal_process >>> fd1 = make_sinusoidal_process(phase_std=.25, random_state=0) @@ -273,7 +274,7 @@ def kneighbors_graph(self, X=None, n_neighbors=None, mode='connectivity'): >>> from skfda.ml.clustering import NearestNeighbors >>> neigh = NearestNeighbors() >>> neigh.fit(fd) - NearestNeighbors(algorithm='auto', leaf_size=30,...) + NearestNeighbors(...) Now we can obtain the graph of k-neighbors of a sample. @@ -329,7 +330,7 @@ def radius_neighbors(self, X=None, radius=None, return_distance=True): within a ball of size ``radius`` around the query points. Examples: - Firstly, we will create a toy dataset with 2 classes. + Firstly, we will create a toy dataset. >>> from skfda.datasets import make_sinusoidal_process >>> fd1 = make_sinusoidal_process(phase_std=.25, random_state=0) @@ -342,7 +343,7 @@ def radius_neighbors(self, X=None, radius=None, return_distance=True): >>> from skfda.ml.clustering import NearestNeighbors >>> neigh = NearestNeighbors(radius=.3) >>> neigh.fit(fd) - NearestNeighbors(algorithm='auto', leaf_size=30,...) + NearestNeighbors(...radius=0.3...) Now we can query the neighbors in the radius. @@ -542,15 +543,15 @@ def _weighted_local_regression(self, neighbors, distance): def predict(self, X): """Predict the target for the provided data - Parameters - ---------- - X (:class:`FDataGrid` or array-like): FDataGrid with the test - samples or array (n_query, n_indexed) if metric == - 'precomputed'. - Returns - ------- - y : array of shape = [n_samples] or [n_samples, n_outputs] - or :class:`FData` containing as many samples as X. + + Args: + X (:class:`FDataGrid` or array-like): FDataGrid with the test + samples or array (n_query, n_indexed) if metric == + 'precomputed'. + + Returns: + y : array of shape = [n_samples] or [n_samples, n_outputs] + or :class:`FData` containing as many samples as X. """ self._check_is_fitted() diff --git a/skfda/_neighbors/classification.py b/skfda/_neighbors/classification.py index c8f63482d..169fbf911 100644 --- a/skfda/_neighbors/classification.py +++ b/skfda/_neighbors/classification.py @@ -1,13 +1,13 @@ """Neighbor models for supervised classification.""" -from sklearn.utils.multiclass import check_classification_targets -from sklearn.preprocessing import LabelEncoder from sklearn.base import ClassifierMixin, BaseEstimator +from sklearn.preprocessing import LabelEncoder +from sklearn.utils.multiclass import check_classification_targets from sklearn.utils.validation import check_is_fitted as sklearn_check_is_fitted -from ..misc.metrics import lp_distance, pairwise_distance from ..exploratory.stats import mean as l2_mean +from ..misc.metrics import lp_distance, pairwise_distance from .base import (NeighborsBase, NeighborsMixin, KNeighborsMixin, NeighborsClassifierMixin, RadiusNeighborsMixin) @@ -59,8 +59,9 @@ class KNeighborsClassifier(NeighborsBase, NeighborsMixin, KNeighborsMixin, Doesn't affect :meth:`fit` method. multivariate_metric : boolean, optional (default = False) Indicates if the metric used is a sklearn distance between vectors (see - :class:`sklearn.neighbors.DistanceMetric`) or a functional metric of - the module :mod:`skfda.misc.metrics`. + :class:`~sklearn.neighbors.DistanceMetric`) or a functional metric of + the module `skfda.misc.metrics` if ``False``. + Examples -------- Firstly, we will create a toy dataset with 2 classes @@ -77,7 +78,7 @@ class KNeighborsClassifier(NeighborsBase, NeighborsMixin, KNeighborsMixin, >>> from skfda.ml.classification import KNeighborsClassifier >>> neigh = KNeighborsClassifier() >>> neigh.fit(fd, y) - KNeighborsClassifier(algorithm='auto', leaf_size=30,...) + KNeighborsClassifier(...) We can predict the class of new samples @@ -92,11 +93,12 @@ class KNeighborsClassifier(NeighborsBase, NeighborsMixin, KNeighborsMixin, See also -------- :class:`~skfda.ml.classification.RadiusNeighborsClassifier` - :class:`~skfda.ml.classification.NearestCentroids` + :class:`~skfda.ml.classification.NearestCentroid` :class:`~skfda.ml.regression.KNeighborsRegressor` :class:`~skfda.ml.regression.RadiusNeighborsRegressor` :class:`~skfda.ml.clustering.NearestNeighbors` + Notes ----- See Nearest Neighbors in the sklearn online documentation for a discussion @@ -239,7 +241,7 @@ class RadiusNeighborsClassifier(NeighborsBase, NeighborsMixin, >>> from skfda.ml.classification import RadiusNeighborsClassifier >>> neigh = RadiusNeighborsClassifier(radius=.3) >>> neigh.fit(fd, y) - RadiusNeighborsClassifier(algorithm='auto', leaf_size=30,...) + RadiusNeighborsClassifier(...radius=0.3...) We can predict the class of new samples. @@ -249,11 +251,12 @@ class RadiusNeighborsClassifier(NeighborsBase, NeighborsMixin, See also -------- :class:`~skfda.ml.classification.KNeighborsClassifier` - :class:`~skfda.ml.classification.NearestCentroids` + :class:`~skfda.ml.classification.NearestCentroid` :class:`~skfda.ml.regression.KNeighborsRegressor` :class:`~skfda.ml.regression.RadiusNeighborsRegressor` :class:`~skfda.ml.clustering.NearestNeighbors` + Notes ----- See Nearest Neighbors in the sklearn online documentation for a discussion @@ -300,7 +303,7 @@ def _init_estimator(self, sklearn_metric): outlier_label=self.outlier_label, n_jobs=self.n_jobs) -class NearestCentroids(BaseEstimator, ClassifierMixin): +class NearestCentroid(BaseEstimator, ClassifierMixin): """Nearest centroid classifier for functional data. Each class is represented by its centroid, with test samples classified to @@ -340,10 +343,10 @@ class and return a :class:`FData` object with only one sample We will fit a Nearest centroids classifier - >>> from skfda.ml.classification import NearestCentroids - >>> neigh = NearestCentroids() + >>> from skfda.ml.classification import NearestCentroid + >>> neigh = NearestCentroid() >>> neigh.fit(fd, y) - NearestCentroids(...) + NearestCentroid(...) We can predict the class of new samples @@ -358,6 +361,7 @@ class and return a :class:`FData` object with only one sample :class:`~skfda.ml.regression.RadiusNeighborsRegressor` :class:`~skfda.ml.clustering.NearestNeighbors` + """ def __init__(self, metric='l2', mean='mean'): diff --git a/skfda/_neighbors/outlier.py b/skfda/_neighbors/outlier.py new file mode 100644 index 000000000..9b844575d --- /dev/null +++ b/skfda/_neighbors/outlier.py @@ -0,0 +1,361 @@ + + +from sklearn.base import OutlierMixin + +from ..misc.metrics import lp_distance +from .base import (NeighborsBase, NeighborsMixin, KNeighborsMixin, + _to_multivariate_metric) + + +class LocalOutlierFactor(NeighborsBase, NeighborsMixin, KNeighborsMixin, + OutlierMixin): + """Unsupervised Outlier Detection. + + Unsupervised Outlier Detection using Local Outlier Factor (LOF). + + The anomaly score of each sample is called Local Outlier Factor. + It measures the local deviation of density of a given sample with + respect to its neighbors. + + It is local in that the anomaly score depends on how isolated the object + is with respect to the surrounding neighborhood. + + More precisely, locality is given by k-nearest neighbors, whose distance + is used to estimate the local density. + + By comparing the local density of a sample to the local densities of + its neighbors, one can identify samples that have a substantially lower + density than their neighbors. These are considered outliers. + + Parameters + ---------- + n_neighbors : int, optional (default=20) + Number of neighbors to use by default for :meth:`kneighbors` queries. + If n_neighbors is larger than the number of samples provided, + all samples will be used. + algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, optional + Algorithm used to compute the nearest neighbors: + + - 'ball_tree' will use :class:`BallTree` + - 'kd_tree' will use :class:`KDTree` + - 'brute' will use a brute-force search. + - 'auto' will attempt to decide the most appropriate algorithm + based on the values passed to :meth:`fit` method. + + leaf_size : int, optional (default=30) + Leaf size passed to :class:`BallTree` or :class:`KDTree`. This can + affect the speed of the construction and query, as well as the memory + required to store the tree. The optimal value depends on the + nature of the problem. + metric : string or callable, (default + :func:`lp_distance `) + the distance metric to use for the tree. The default metric is + the L2 distance. See the documentation of the metrics module + for a list of available metrics. + metric_params : dict, optional (default=None) + Additional keyword arguments for the metric function. + contamination : float in (0., 0.5), optional (default='auto') + The amount of contamination of the data set, i.e. the proportion + of outliers in the data set. When fitting this is used to define the + threshold on the decision function. If "auto", the decision function + threshold is determined as in the original paper [BKNS2000]_. + novelty : boolean, default False + By default, LocalOutlierFactor is only meant to be used for outlier + detection (novelty=False). Set novelty to True if you want to use + LocalOutlierFactor for novelty detection. In this case be aware that + that you should only use predict, decision_function and score_samples + on new unseen data and not on the training set. + n_jobs : int or None, optional (default=None) + The number of parallel jobs to run for neighbors search. + ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. + ``-1`` means using all processors. See :term:`Glossary ` + for more details. + Affects only :meth:`kneighbors` and :meth:`kneighbors_graph` methods. + multivariate_metric : boolean, optional (default = False) + Indicates if the metric used is a sklearn distance between vectors (see + :class:`~sklearn.neighbors.DistanceMetric`) or a functional metric of + the module `skfda.misc.metrics` if ``False``. + + Attributes + ---------- + negative_outlier_factor_ : numpy array, shape (n_samples,) + The opposite LOF of the training samples. The higher, the more normal. + Inliers tend to have a LOF score close to 1 + (``negative_outlier_factor_`` close to -1), while outliers tend to have + a larger LOF score. + The local outlier factor (LOF) of a sample captures its + supposed 'degree of abnormality'. + It is the average of the ratio of the local reachability density of + a sample and those of its k-nearest neighbors. + n_neighbors_ : integer + The actual number of neighbors used for :meth:`kneighbors` queries. + offset_ : float + Offset used to obtain binary labels from the raw scores. + Observations having a negative_outlier_factor smaller than `offset_` + are detected as abnormal. + The offset is set to -1.5 (inliers score around -1), except when a + contamination parameter different than "auto" is provided. In that + case, the offset is defined in such a way we obtain the expected + number of outliers in training. + + Examples: + + **Local Outlier Factor (LOF) for outlier detection**. + + >>> from skfda._neighbors.outlier import LocalOutlierFactor + + Creation of simulated dataset with 2 outliers to be used with LOF. + + >>> from skfda.datasets import make_sinusoidal_process + >>> fd_clean = make_sinusoidal_process(n_samples=25, error_std=0, + ... phase_std=0.1, random_state=0) + >>> fd_outliers = make_sinusoidal_process( + ... n_samples=2, error_std=0, phase_mean=0.5, random_state=5) + >>> fd = fd_outliers.concatenate(fd_clean) # Dataset with 2 outliers + + Detection of outliers with LOF. + + >>> lof = LocalOutlierFactor() + >>> is_outlier = lof.fit_predict(fd) + >>> is_outlier # -1 for anomalies/outliers and +1 for inliers + array([-1, -1, 1, 1, 1, 1, 1, 1, ..., 1, 1, 1, 1]) + + The negative outlier factor stored. + + >>> lof.negative_outlier_factor_.round(2) + array([-7.11, -1.54, -1. , -0.99, ..., -0.97, -1. , -0.99]) + + **Novelty detection with LOF**. + + Creation of a dataset without outliers. + + >>> fd_train = make_sinusoidal_process(n_samples=25, error_std=0, + ... phase_std=0.1, random_state=9) + + Fit of LOF using the dataset without outliers. + + >>> lof = LocalOutlierFactor(novelty=True) + >>> lof.fit(fd_train) + LocalOutlierFactor(...novelty=True) + + Detection of annomalies for new samples. + + >>> lof.predict(fd) # Predict with samples not used in fit + array([-1, -1, 1, 1, 1, 1, 1, 1, ..., 1, 1, 1, 1]) + + + References + ---------- + .. [BKNS2000] Breunig, M. M., Kriegel, H. P., Ng, R. T., & Sander, + J. (2000, May). LOF: identifying density-based local outliers. In ACM + sigmod record. + + Notes + ----- + This estimator wraps the scikit-learn class + :class:`~sklearn.neighbors.LocalOutlierFactor` employing functional + metrics and data instead of the multivariate ones. + + See also + -------- + :class:`~skfda.ml.classification.KNeighborsClassifier` + :class:`~skfda.ml.classification.RadiusNeighborsClassifier` + :class:`~skfda.ml.classification.NearestCentroids` + :class:`~skfda.ml.regression.KNeighborsRegressor` + :class:`~skfda.ml.regression.RadiusNeighborsRegressor` + :class:`~skfda.ml.clustering.NearestNeighbors` + """ + + def __init__(self, n_neighbors=20, algorithm='auto', + leaf_size=30, metric='l2', metric_params=None, + contamination='auto', novelty=False, + n_jobs=1, multivariate_metric=False): + """Initialize the Local Outlier Factor estimator.""" + + super().__init__(n_neighbors=n_neighbors, algorithm=algorithm, + leaf_size=leaf_size, metric=metric, + metric_params=metric_params, n_jobs=n_jobs, + multivariate_metric=multivariate_metric) + self.contamination = contamination + self.novelty = novelty + + def _init_estimator(self, sklearn_metric): + """Initialize the sklearn nearest neighbors estimator. + + Args: + sklearn_metric: (pyfunc or 'precomputed'): Metric compatible with + sklearn API or matrix (n_samples, n_samples) with precomputed + distances. + + Returns: + Sklearn LocalOutlierFactor estimator initialized. + + """ + from sklearn.neighbors import LocalOutlierFactor as _LocalOutlierFactor + + return _LocalOutlierFactor( + n_neighbors=self.n_neighbors, algorithm=self.algorithm, + leaf_size=self.leaf_size, metric=sklearn_metric, + metric_params=self.metric_params, contamination=self.contamination, + novelty=self.novelty, n_jobs=self.n_jobs) + + def _store_fit_data(self): + """Store the parameters created during the fit.""" + self.negative_outlier_factor_ = self.estimator_.negative_outlier_factor_ + self.n_neighbors_ = self.estimator_.n_neighbors_ + self.offset_ = self.estimator_.offset_ + + def fit(self, X, y=None): + """Fit the model using X as training data. + + Parameters + ---------- + X : :class:`~skfda.FDataGrid` or array_like + Training data. FDataGrid containing the samples, + or array with shape [n_samples, n_samples] if metric='precomputed'. + y : Ignored + not used, present for API consistency by convention. + Returns + ------- + self : object + """ + + super().fit(X, y) + self._store_fit_data() + + return self + + def predict(self, X=None): + """Predict the labels (1 inlier, -1 outlier) of X according to LOF. + + This method allows to generalize prediction to *new observations* (not + in the training set). Only available for novelty detection (when + novelty is set to True). + + If X is None, returns the same as fit_predict(X_train). + + Parameters + ---------- + X : :class:`~skfda.FDataGrid` or array_like + FDataGrid containing the query sample or samples to compute the + Local Outlier Factor w.r.t. to the training samples or array with + the distances to the training samples if metric='precomputed'. + + Returns + ------- + is_inlier : array, shape (n_samples,) + Returns -1 for anomalies/outliers and +1 for inliers. + """ + + self._check_is_fitted() + X_multivariate = self._transform_to_multivariate(X) + + return self.estimator_.predict(X_multivariate) + + def fit_predict(self, X, y=None): + """Fits the model to the training set X and returns the labels. + + Label is 1 for an inlier and -1 for an outlier according to the LOF + score and the contamination parameter. + + Parameters + ---------- + X : :class:`~skfda.FDataGrid` or array_like + Training data. FDataGrid containing the samples, + or array with shape [n_samples, n_samples] if metric='precomputed'. + y : Ignored + not used, present for API consistency by convention. + Returns + ------- + is_inlier : array, shape (n_samples,) + Returns -1 for anomalies/outliers and 1 for inliers. + """ + + # In this estimator fit_predict cannot be wrapped as fit().predict() + + if self.metric == 'precomputed': + self.estimator_ = self._init_estimator(self.metric) + res = self.estimator_.fit_predict(X, y) + else: + self._sample_points = X.sample_points + self._shape = X.data_matrix.shape[1:] + + if not self.multivariate_metric: + # Constructs sklearn metric to manage vector + if self.metric == 'l2': + metric = lp_distance + else: + metric = self.metric + sklearn_metric = _to_multivariate_metric(metric, + self._sample_points) + else: + sklearn_metric = self.metric + + self.estimator_ = self._init_estimator(sklearn_metric) + X_multivariate = self._transform_to_multivariate(X) + res = self.estimator_.fit_predict(X_multivariate, y) + + self._store_fit_data() + + return res + + def decision_function(self, X): + """Shifted opposite of the Local Outlier Factor of X. + + Bigger is better, i.e. large values correspond to inliers. + The shift offset allows a zero threshold for being an outlier. + Only available for novelty detection (when novelty is set to True). + The argument X is supposed to contain *new data*: if X contains a + point from training, it considers the later in its own neighborhood. + Also, the samples in X are not considered in the neighborhood of any + point. + + Parameters + ---------- + X : :class:`~skfda.FDataGrid` or array_like + FDataGrid containing the query sample or samples to compute the + Local Outlier Factor w.r.t. to the training samples. + + Returns + ------- + shifted_opposite_lof_scores : array, shape (n_samples,) + The shifted opposite of the Local Outlier Factor of each input + samples. The lower, the more abnormal. Negative scores represent + outliers, positive scores represent inliers. + """ + self._check_is_fitted() + X_multivariate = self._transform_to_multivariate(X) + + return self.estimator_.decision_function(X_multivariate) + + def score_samples(self, X): + """Opposite of the Local Outlier Factor of X. + + It is the opposite as bigger is better, i.e. large values correspond + to inliers. + + Only available for novelty detection (when novelty is set to True). + The argument X is supposed to contain *new data*: if X contains a + point from training, it considers the later in its own neighborhood. + Also, the samples in X are not considered in the neighborhood of any + point. + + The score_samples on training data is available by considering the + the ``negative_outlier_factor_`` attribute. + + Parameters + ---------- + X : :class:`~skfda.FDataGrid` or array_like + FDataGrid containing the query sample or samples to compute the + Local Outlier Factor w.r.t. to the training samples. + + Returns + ------- + opposite_lof_scores : array, shape (n_samples,) + The opposite of the Local Outlier Factor of each input samples. + The lower, the more abnormal. + """ + self._check_is_fitted() + X_multivariate = self._transform_to_multivariate(X) + + return self.estimator_.score_samples(X_multivariate) diff --git a/skfda/_neighbors/regression.py b/skfda/_neighbors/regression.py index 8300215ee..69878cbf3 100644 --- a/skfda/_neighbors/regression.py +++ b/skfda/_neighbors/regression.py @@ -79,7 +79,7 @@ class KNeighborsRegressor(NeighborsBase, NeighborsRegressorMixin, >>> neigh = KNeighborsRegressor() >>> neigh.fit(X_train, y_train) - KNeighborsRegressor(algorithm='auto', leaf_size=30,...) + KNeighborsRegressor(...) We can predict the modes of new samples @@ -96,7 +96,7 @@ class KNeighborsRegressor(NeighborsBase, NeighborsRegressorMixin, We train the estimator with the functional response >>> neigh.fit(X_train, y_train) - KNeighborsRegressor(algorithm='auto', leaf_size=30,...) + KNeighborsRegressor(...) And predict the responses as in the first case. @@ -111,6 +111,7 @@ class KNeighborsRegressor(NeighborsBase, NeighborsRegressorMixin, :class:`~skfda.ml.regression.RadiusNeighborsRegressor` :class:`~skfda.ml.clustering.NearestNeighbors` + Notes ----- See Nearest Neighbors in the sklearn online documentation for a discussion @@ -248,7 +249,7 @@ class RadiusNeighborsRegressor(NeighborsBase, NeighborsRegressorMixin, >>> neigh = RadiusNeighborsRegressor(radius=0.2) >>> neigh.fit(X_train, y_train) - RadiusNeighborsRegressor(algorithm='auto', leaf_size=30,...) + RadiusNeighborsRegressor(...radius=0.2...) We can predict the modes of new samples @@ -265,7 +266,7 @@ class RadiusNeighborsRegressor(NeighborsBase, NeighborsRegressorMixin, We train the estimator with the functional response >>> neigh.fit(X_train, y_train) - RadiusNeighborsRegressor(algorithm='auto', leaf_size=30,...) + RadiusNeighborsRegressor(...radius=0.2...) And predict the responses as in the first case. @@ -280,6 +281,7 @@ class RadiusNeighborsRegressor(NeighborsBase, NeighborsRegressorMixin, :class:`~skfda.ml.regression.KNeighborsRegressor` :class:`~skfda.ml.clustering.NearestNeighbors` + Notes ----- See Nearest Neighbors in the sklearn online documentation for a discussion diff --git a/skfda/_neighbors/unsupervised.py b/skfda/_neighbors/unsupervised.py index 9e2fbee1a..dcc067ead 100644 --- a/skfda/_neighbors/unsupervised.py +++ b/skfda/_neighbors/unsupervised.py @@ -59,7 +59,7 @@ class NearestNeighbors(NeighborsBase, NeighborsMixin, KNeighborsMixin, >>> from skfda.ml.clustering import NearestNeighbors >>> neigh = NearestNeighbors(radius=.3) >>> neigh.fit(fd) - NearestNeighbors(algorithm='auto', leaf_size=30,...) + NearestNeighbors(...radius=0.3...) Now we can query the k-nearest neighbors. @@ -88,6 +88,7 @@ class NearestNeighbors(NeighborsBase, NeighborsMixin, KNeighborsMixin, :class:`~skfda.ml.regression.KNeighborsRegressor` :class:`~skfda.ml.regression.RadiusNeighborsRegressor` + Notes ----- See Nearest Neighbors in the sklearn online documentation for a discussion diff --git a/skfda/_utils/__init__.py b/skfda/_utils/__init__.py index 6d7d7e221..c58ce4023 100644 --- a/skfda/_utils/__init__.py +++ b/skfda/_utils/__init__.py @@ -1,4 +1,9 @@ from . import constants -from ._utils import (_list_of_arrays, _coordinate_list, - _check_estimator, parameter_aliases) +from ._utils import (_list_of_arrays, _cartesian_product, + _check_estimator, parameter_aliases, + _to_grid, check_is_univariate, + _same_domain, _to_array_maybe_ragged, + _reshape_eval_points, + _evaluate_grid, nquad_vec, + _FDataCallable, _pairwise_commutative) diff --git a/skfda/_utils/_utils.py b/skfda/_utils/_utils.py index 29142d6fb..b2332041a 100644 --- a/skfda/_utils/_utils.py +++ b/skfda/_utils/_utils.py @@ -1,12 +1,85 @@ """Module with generic methods""" import functools - import types +import scipy.integrate + import numpy as np +class _FDataCallable(): + + def __init__(self, function, *, domain_range, n_samples=1): + + self.function = function + self.domain_range = domain_range + self.n_samples = n_samples + + def __call__(self, *args, **kwargs): + + return self.function(*args, **kwargs) + + def __len__(self): + + return self.n_samples + + def __getitem__(self, key): + + def new_function(*args, **kwargs): + return self.function(*args, **kwargs)[key] + + tmp = np.empty(self.n_samples) + new_nsamples = len(tmp[key]) + + return _FDataCallable(new_function, + domain_range=self.domain_range, + n_samples=new_nsamples) + + +def check_is_univariate(fd): + """Checks if an FData is univariate and raises an error + + Args: + fd (:class:`~skfda.FData`): Functional object to check if is + univariate. + + Raises: + ValueError: If it is not univariate, i.e., `fd.dim_domain != 1` or + `fd.dim_codomain != 1`. + + """ + if fd.dim_domain != 1 or fd.dim_codomain != 1: + raise ValueError(f"The functional data must be univariate, i.e., " + + f"with dim_domain=1 " + + (f"" if fd.dim_domain == 1 + else f"(currently is {fd.dim_domain}) ") + + f"and dim_codomain=1 " + + (f"" if fd.dim_codomain == 1 else + f"(currently is {fd.dim_codomain})")) + + +def _to_grid(X, y, eval_points=None): + """Transform a pair of FDatas in grids to perform calculations.""" + + from .. import FDataGrid + x_is_grid = isinstance(X, FDataGrid) + y_is_grid = isinstance(y, FDataGrid) + + if eval_points is not None: + X = X.to_grid(eval_points) + y = y.to_grid(eval_points) + elif x_is_grid and not y_is_grid: + y = y.to_grid(X.sample_points[0]) + elif not x_is_grid and y_is_grid: + X = X.to_grid(y.sample_points[0]) + elif not x_is_grid and not y_is_grid: + X = X.to_grid() + y = y.to_grid() + + return X, y + + def _list_of_arrays(original_array): """Convert to a list of arrays. @@ -15,24 +88,62 @@ def _list_of_arrays(original_array): If the original list is two-dimensional (e.g. [[1, 2, 3], [4, 5]]), return a list containing other one-dimensional arrays (in this case - [array([1, 2, 3]), array([4, 5, 6])]). + [array([1, 2, 3]), array([4, 5])]). In any other case the behaviour is unespecified. """ - new_array = np.array([np.asarray(i) for i in - np.atleast_1d(original_array)]) - # Special case: Only one array, expand dimension - if len(new_array.shape) == 1 and not any(isinstance(s, np.ndarray) - for s in new_array): - new_array = np.atleast_2d(new_array) + unidimensional = False - return list(new_array) + try: + iter(original_array) + except TypeError: + original_array = [original_array] + try: + iter(original_array[0]) + except TypeError: + unidimensional = True -def _coordinate_list(axes): - """Convert a list with axes in a list with coordinates. + if unidimensional: + return [np.asarray(original_array)] + else: + return [np.asarray(i) for i in original_array] + + +def _to_array_maybe_ragged(array, *, row_shape=None): + """ + Convert to an array where each element may or may not be of equal length. + + If each element is of equal length the array is multidimensional. + Otherwise it is a ragged array. + + """ + def convert_row(row): + r = np.array(row) + + if row_shape is not None: + r = r.reshape(row_shape) + + return r + + array_list = [convert_row(a) for a in array] + shapes = [a.shape for a in array_list] + + if all(s == shapes[0] for s in shapes): + return np.array(array_list) + else: + res = np.empty(len(array_list), dtype=np.object_) + + for i, a in enumerate(array_list): + res[i] = a + + return res + + +def _cartesian_product(axes, flatten=True, return_shape=False): + """Computes the cartesian product of the axes. Computes the cartesian product of the axes and returns a numpy array of 1 dimension with all the possible combinations, for an arbitrary number of @@ -47,28 +158,244 @@ def _coordinate_list(axes): Examples: - >>> from skfda.representation._functional_data import _coordinate_list + >>> from skfda._utils import _cartesian_product >>> axes = [[0,1],[2,3]] - >>> _coordinate_list(axes) + >>> _cartesian_product(axes) array([[0, 2], [0, 3], [1, 2], [1, 3]]) >>> axes = [[0,1],[2,3],[4]] - >>> _coordinate_list(axes) + >>> _cartesian_product(axes) array([[0, 2, 4], [0, 3, 4], [1, 2, 4], [1, 3, 4]]) >>> axes = [[0,1]] - >>> _coordinate_list(axes) + >>> _cartesian_product(axes) array([[0], [1]]) """ - return np.vstack(list(map(np.ravel, np.meshgrid(*axes, indexing='ij')))).T + cartesian = np.stack(np.meshgrid(*axes, indexing='ij'), -1) + + shape = cartesian.shape + + if flatten: + cartesian = cartesian.reshape(-1, len(axes)) + + if return_shape: + return cartesian, shape + else: + return cartesian + + +def _same_domain(fd, fd2): + """Check if the domain range of two objects is the same.""" + return np.array_equal(fd.domain_range, fd2.domain_range) + + +def _reshape_eval_points(eval_points, *, aligned, n_samples, dim_domain): + """Convert and reshape the eval_points to ndarray with the + corresponding shape. + + Args: + eval_points (array_like): Evaluation points to be reshaped. + aligned (bool): Boolean flag. True if all the samples + will be evaluated at the same evaluation_points. + dim_domain (int): Dimension of the domain. + + Returns: + (np.ndarray): Numpy array with the eval_points, if + evaluation_aligned is True with shape `number of evaluation points` + x `dim_domain`. If the points are not aligned the shape of the + points will be `n_samples` x `number of evaluation points` + x `dim_domain`. + + """ + + if aligned: + eval_points = np.asarray(eval_points) + else: + eval_points = _to_array_maybe_ragged( + eval_points, row_shape=(-1, dim_domain)) + + # Case evaluation of a single value, i.e., f(0) + # Only allowed for aligned evaluation + if aligned and (eval_points.shape == (dim_domain,) + or (eval_points.ndim == 0 and dim_domain == 1)): + eval_points = np.array([eval_points]) + + if aligned: # Samples evaluated at same eval points + + eval_points = eval_points.reshape((eval_points.shape[0], + dim_domain)) + + else: # Different eval_points for each sample + + if eval_points.shape[0] != n_samples: + + raise ValueError(f"eval_points should be a list " + f"of length {n_samples} with the " + f"evaluation points for each sample.") + + return eval_points + + +def _one_grid_to_points(axes, *, dim_domain): + """ + Convert a list of ndarrays, one per domain dimension, in the points. + + Returns also the shape containing the information of how each point + is formed. + """ + axes = _list_of_arrays(axes) + + if len(axes) != dim_domain: + raise ValueError(f"Length of axes should be " + f"{dim_domain}") + + cartesian, shape = _cartesian_product(axes, return_shape=True) + + # Drop domain size dimension, as it is not needed to reshape the output + shape = shape[:-1] + + return cartesian, shape + + +def _evaluate_grid(axes, *, evaluate_method, + n_samples, dim_domain, dim_codomain, + extrapolation=None, + aligned=True): + """Evaluate the functional object in the cartesian grid. + + This method is called internally by :meth:`evaluate` when the argument + `grid` is True. + + Evaluates the functional object in the grid generated by the cartesian + product of the axes. The length of the list of axes should be equal + than the domain dimension of the object. + + If the list of axes has lengths :math:`n_1, n_2, ..., n_m`, where + :math:`m` is equal than the dimension of the domain, the result of the + evaluation in the grid will be a matrix with :math:`m+1` dimensions and + shape :math:`n_{samples} x n_1 x n_2 x ... x n_m`. + + If `aligned` is false each sample is evaluated in a + different grid, and the list of axes should contain a list of axes for + each sample. + + If the domain dimension is 1, the result of the behaviour of the + evaluation will be the same than :meth:`evaluate` without the grid + option, but with worst performance. + + Args: + axes (array_like): List of axes to generated the grid where the + object will be evaluated. + extrapolation (str or Extrapolation, optional): Controls the + extrapolation mode for elements outside the domain range. By + default it is used the mode defined during the instance of the + object. + aligned (bool, optional): If False evaluates each sample + in a different grid. + + Returns: + (numpy.darray): Numpy array with dim_domain + 1 dimensions with + the result of the evaluation. + + Raises: + ValueError: If there are a different number of axes than the domain + dimension. + + """ + + # Compute intersection points and resulting shapes + if aligned: + + eval_points, shape = _one_grid_to_points(axes, dim_domain=dim_domain) + + else: + + axes = list(axes) + + if len(axes) != n_samples: + raise ValueError("Should be provided a list of axis per " + "sample") + + eval_points, shape = zip( + *[_one_grid_to_points(a, dim_domain=dim_domain) for a in axes]) + + eval_points = np.array(eval_points) + + # Evaluate the points + res = evaluate_method(eval_points, + extrapolation=extrapolation, + aligned=aligned) + + # Reshape the result + if aligned: + + res = res.reshape([n_samples] + + list(shape) + [dim_codomain]) + + else: + + res = _to_array_maybe_ragged([ + r.reshape(list(s) + [dim_codomain]) + for r, s in zip(res, shape)]) + + return res + + +def nquad_vec(func, ranges): + + initial_depth = len(ranges) - 1 + + def integrate(*args, depth): + + if depth == 0: + f = functools.partial(func, *args) + else: + f = functools.partial(integrate, *args, depth=depth - 1) + + return scipy.integrate.quad_vec(f, *ranges[initial_depth - depth])[0] + + return integrate(depth=initial_depth) + + +def _pairwise_commutative(function, arg1, arg2=None, **kwargs): + """ + Compute pairwise a commutative function. + + """ + if arg2 is None: + + indices = np.triu_indices(len(arg1)) + + matrix = np.empty((len(arg1), len(arg1))) + + triang_vec = function( + arg1[indices[0]], arg1[indices[1]], + **kwargs) + + # Set upper matrix + matrix[indices] = triang_vec + + # Set lower matrix + matrix[(indices[1], indices[0])] = triang_vec + + return matrix + + else: + + indices = np.indices((len(arg1), len(arg2))) + + return function( + arg1[indices[0].ravel()], arg2[indices[1].ravel()], + **kwargs).reshape( + (len(arg1), len(arg2))) def parameter_aliases(**alias_assignments): diff --git a/skfda/datasets/__init__.py b/skfda/datasets/__init__.py index ec3dcc9ab..48ead6663 100644 --- a/skfda/datasets/__init__.py +++ b/skfda/datasets/__init__.py @@ -2,7 +2,8 @@ fetch_ucr, fetch_phoneme, fetch_growth, fetch_tecator, fetch_medflies, - fetch_weather, fetch_aemet) + fetch_weather, fetch_aemet, + fetch_octane, fetch_gait) from ._samples_generators import (make_gaussian_process, make_sinusoidal_process, make_multimodal_samples, diff --git a/skfda/datasets/_real_datasets.py b/skfda/datasets/_real_datasets.py index ca5767837..2b3f362a2 100644 --- a/skfda/datasets/_real_datasets.py +++ b/skfda/datasets/_real_datasets.py @@ -23,8 +23,9 @@ def fdata_constructor(obj, attrs): return FDataGrid(data_matrix=obj["data"], sample_points=obj["argvals"], domain_range=obj["rangeval"], - dataset_label=names['main'][0], - axes_labels=[names['xlab'][0], names['ylab'][0]]) + dataset_name=names['main'][0], + argument_names=(names['xlab'][0],), + coordinate_names=(names['ylab'][0],)) def functional_constructor(obj, attrs): @@ -51,8 +52,9 @@ def functional_constructor(obj, attrs): return (FDataGrid(data_matrix=data_matrix, sample_points=sample_points, domain_range=(args_init, args_end), - dataset_label=name[0], - axes_labels=[args_label[0], values_label[0]]), target) + dataset_name=name[0], + argument_names=(args_label[0],), + coordinate_names=(values_label[0],)), target) def fetch_cran(name, package_name, *, converter=None, @@ -218,9 +220,11 @@ def fetch_phoneme(return_X_y: bool = False): speaker = data["speaker"].values curves = FDataGrid(data_matrix=curve_data.values, - sample_points=range(0, 256), - dataset_label="Phoneme", - axes_labels=["frequency", "log-periodogram"]) + sample_points=np.linspace(0, 8, 256), + domain_range=[0, 8], + dataset_name="Phoneme", + argument_names=("frequency (kHz)",), + coordinate_names=("log-periodogram",)) if return_X_y: return curves, sound @@ -272,8 +276,9 @@ def fetch_growth(return_X_y: bool = False): curves = FDataGrid(data_matrix=np.concatenate((males, females), axis=0), sample_points=ages, - dataset_label="Berkeley Growth Study", - axes_labels=["age", "height"]) + dataset_name="Berkeley Growth Study", + argument_names=("age",), + coordinate_names=("height",)) sex = np.array([0] * males.shape[0] + [1] * females.shape[0]) @@ -466,9 +471,10 @@ def fetch_weather(return_X_y: bool = False): curves = FDataGrid(data_matrix=temp_prec_daily, sample_points=range(1, 366), - dataset_label="Canadian Weather", - axes_labels=["day", "temperature (ºC)", - "precipitation (mm.)"]) + dataset_name="Canadian Weather", + argument_names=("day",), + coordinate_names=("temperature (ºC)", + "precipitation (mm.)")) target_names, target = np.unique(data["region"], return_inverse=True) @@ -479,6 +485,16 @@ def fetch_weather(return_X_y: bool = False): "target": target, "target_names": target_names, "target_feature_names": ["region"], + "meta": list(zip(data["place"], data["province"], + np.asarray(data["coordinates"])[0], + np.asarray(data["coordinates"])[1], + data["geogindex"], + np.asarray(data["monthlyTemp"]), + np.asarray(data["monthlyPrecip"]))), + + "meta_names": ["place", "province", "latitude", "longitude", + "ind", "monthlyTemp", "monthlyPrecip"], + "meta_feature_names": ["location"], "DESCR": DESCR} @@ -513,17 +529,25 @@ def fetch_aemet(return_X_y: bool = False): data = raw_dataset["aemet"] - fd_temp = data["temp"] - fd_logprec = data["logprec"] - fd_wind = data["wind.speed"] + data_matrix = np.empty((73, 365, 3)) + data_matrix[:, :, 0] = data["temp"].data_matrix[:, :, 0] + data_matrix[:, :, 1] = data["logprec"].data_matrix[:, :, 0] + data_matrix[:, :, 2] = data["wind.speed"].data_matrix[:, :, 0] + + curves = data["temp"].copy(data_matrix=data_matrix, + dataset_name="AEMET", + argument_names=("day",), + coordinate_names=("temperature (ºC)", + "logprecipitation", + "wind speed (m/s)")) if return_X_y: - return fd_temp, fd_logprec, fd_wind + return curves, None else: - return {"data": (fd_temp, fd_logprec, fd_wind), + return {"data": curves, "meta": np.asarray(data["df"])[:, np.array([0, 1, 2, 3, 6, 7])], - "meta_names": ["ind", "name", "province", "altitude", + "meta_names": ["ind", "place", "province", "altitude", "longitude", "latitude"], "meta_feature_names": ["location"], "DESCR": DESCR} @@ -531,3 +555,125 @@ def fetch_aemet(return_X_y: bool = False): if hasattr(fetch_aemet, "__doc__"): # docstrings can be stripped off fetch_aemet.__doc__ += _aemet_descr + _param_descr + + +_octane_descr = """ + Near infrared (NIR) spectra of gasoline samples, with wavelengths ranging + from 1102nm to 1552nm with measurements every two nm. + This dataset contains six outliers to which ethanol was added, which is + required in some states. See [RDEH2006]_ and [HuRS2015]_ for further + details. + + The data is labeled according to this different composition. + + Source: + Esbensen K. (2001). Multivariate data analysis in practice. 5th edn. + Camo Software, Trondheim, Norway. + + References: + .. [RDEH2006] Rousseeuw, Peter & Debruyne, Michiel & Engelen, Sanne & + Hubert, Mia. (2006). Robustness and Outlier Detection in + Chemometrics. Critical Reviews in Analytical Chemistry. 36. + 221-242. 10.1080/10408340600969403. + .. [HuRS2015] Hubert, Mia & Rousseeuw, Peter & Segaert, Pieter. (2015). + Multivariate functional outlier detection. Statistical Methods and + Applications. 24. 177-202. 10.1007/s10260-015-0297-8. + +""" + + +def fetch_octane(return_X_y: bool = False): + """Load near infrared spectra of gasoline samples. + + This function fetchs the octane dataset from the R package 'mrfDepth' + from CRAN. + + """ + DESCR = _octane_descr + + # octane file from mrfDepth R package + raw_dataset = fetch_cran("octane", "mrfDepth", version="1.0.11") + data = raw_dataset['octane'][..., 0].T + + # The R package only stores the values of the curves, but the paper + # describes the rest of the data. According to [RDEH2006], Section 5.4: + + # "wavelengths ranging from 1102nm to 1552nm with measurements every two + # nm."" + sample_points = np.linspace(1102, 1552, 226) + + # "The octane data set contains six outliers (25, 26, 36–39) to which + # alcohol was added". + target = np.zeros(len(data), dtype=int) + target[24] = target[25] = target[35:39] = 1 # Outliers 1 + + curves = FDataGrid(data, + sample_points=sample_points, + dataset_name="Octane", + argument_names=("wavelength (nm)",), + coordinate_names=("absorbances",)) + + if return_X_y: + return curves, target + else: + return {"data": curves, + "target": target, + "target_names": ['inliner', 'outlier'], + "DESCR": DESCR} + + +if hasattr(fetch_octane, "__doc__"): # docstrings can be stripped off + fetch_octane.__doc__ += _octane_descr + _param_descr + +_gait_descr = """ + Angles formed by the hip and knee of each of 39 children over each boy + gait cycle. + + References: + Ramsay, James O., and Silverman, Bernard W. (2006), + Functional Data Analysis, 2nd ed. , Springer, New York. + + Ramsay, James O., and Silverman, Bernard W. (2002), + Applied Functional Data Analysis, Springer, New York +""" + + +def fetch_gait(return_X_y: bool = False): + """ + Load the GAIT dataset. + + The data is obtained from the R package 'fda' from CRAN. + + """ + DESCR = _gait_descr + + raw_data = _fetch_fda("gait") + + data = raw_data["gait"] + + data_matrix = np.asarray(data) + data_matrix = np.transpose(data_matrix, axes=(1, 0, 2)) + sample_points = np.asarray(data.coords.get('dim_0'), np.float64) + + curves = FDataGrid(data_matrix=data_matrix, + sample_points=sample_points, + dataset_name="GAIT", + argument_names=("Time (proportion of gait cycle)",), + coordinate_names=("Hip angle (degrees)", + "Knee angle (degrees)")) + + meta_names, meta = np.unique(np.asarray(data.coords.get('dim_1')), + return_inverse=True) + + if return_X_y: + return curves, None + else: + return {"data": curves, + "meta": meta, + "meta_names": meta_names, + "meta_feature_names": ["boys"], + "DESCR": DESCR} + + +if hasattr(fetch_gait, "__doc__"): # docstrings can be stripped off + fetch_gait.__doc__ += _gait_descr + _param_descr diff --git a/skfda/datasets/_samples_generators.py b/skfda/datasets/_samples_generators.py index ac24b104d..059cc3489 100644 --- a/skfda/datasets/_samples_generators.py +++ b/skfda/datasets/_samples_generators.py @@ -7,7 +7,7 @@ from .. import FDataGrid from ..misc import covariances from ..preprocessing.registration import normalize_warping -from ..representation.interpolation import SplineInterpolator +from ..representation.interpolation import SplineInterpolation def make_gaussian_process(n_samples: int = 100, n_features: int = 100, *, @@ -18,7 +18,7 @@ def make_gaussian_process(n_samples: int = 100, n_features: int = 100, *, Args: n_samples: The total number of trajectories. - n_features: The total number of trajectories. + n_features: The total number of features (points of evaluation). start: Starting point of the trajectories. stop: Ending point of the trajectories. mean: The mean function of the process. Can be a callable accepting @@ -348,7 +348,7 @@ def make_random_warping(n_samples: int = 15, n_features: int = 100, *, axis=0) warping = FDataGrid(data_matrix.T, sample_points=time[:, 0]) warping = normalize_warping(warping, domain_range=(start, stop)) - warping.interpolator = SplineInterpolator(interpolation_order=3, - monotone=True) + warping.interpolation = SplineInterpolation(interpolation_order=3, + monotone=True) return warping diff --git a/skfda/exploratory/depth/multivariate.py b/skfda/exploratory/depth/multivariate.py index 2d12cc6d4..2fb9f6a2e 100644 --- a/skfda/exploratory/depth/multivariate.py +++ b/skfda/exploratory/depth/multivariate.py @@ -15,7 +15,7 @@ def _stagel_donoho_outlyingness(X, *, pointwise=False): m = X.data_matrix[..., 0] return (np.abs(m - np.median(m, axis=0)) / - scipy.stats.median_absolute_deviation(m, axis=0)) + scipy.stats.median_abs_deviation(m, axis=0, scale=1 / 1.4826)) else: raise NotImplementedError("Only implemented for one dimension") diff --git a/skfda/exploratory/outliers/_iqr.py b/skfda/exploratory/outliers/_iqr.py index d5a7ac6da..d48d41cf1 100644 --- a/skfda/exploratory/outliers/_iqr.py +++ b/skfda/exploratory/outliers/_iqr.py @@ -5,10 +5,10 @@ class IQROutlierDetector(BaseEstimator, OutlierMixin): - r"""Outlier detector using the interquartilic range. + r"""Outlier detector using the interquartile range. Detects as outliers functions that have one or more points outside - ``factor`` times the interquartilic range plus or minus the central + ``factor`` times the interquartile range plus or minus the central envelope, given a functional depth measure. This corresponds to the points selected as outliers by the functional boxplot. diff --git a/skfda/exploratory/stats/__init__.py b/skfda/exploratory/stats/__init__.py index 611d8fbbe..d81e0ade3 100644 --- a/skfda/exploratory/stats/__init__.py +++ b/skfda/exploratory/stats/__init__.py @@ -1 +1 @@ -from ._stats import mean, var, gmean, cov +from ._stats import mean, var, gmean, cov, depth_based_median, trim_mean diff --git a/skfda/exploratory/stats/_stats.py b/skfda/exploratory/stats/_stats.py index 55dfd7c2c..d84fece80 100644 --- a/skfda/exploratory/stats/_stats.py +++ b/skfda/exploratory/stats/_stats.py @@ -1,6 +1,6 @@ """Functional data descriptive statistics. """ - +from ..depth import modified_band_depth def mean(fdata, weights=None): """Compute the mean of all the samples in a FData object. @@ -67,3 +67,66 @@ def cov(fdatagrid): """ return fdatagrid.cov() + + +def depth_based_median(fdatagrid, depth_method=modified_band_depth): + """Compute the median based on a depth measure. + + The depth based median is the deepest curve given a certain + depth measure + + Args: + fdatagrid (FDataGrid): Object containing different samples of a + functional variable. + depth_method (:ref:`depth measure `, optional): + Method used to order the data. Defaults to :func:`modified + band depth `. + + Returns: + FDataGrid: object containing the computed depth_based median. + + """ + depth = depth_method(fdatagrid) + indices_descending_depth = (-depth).argsort(axis=0) + + # The median is the deepest curve + return fdatagrid[indices_descending_depth[0]] + + +def trim_mean(fdatagrid, + proportiontocut, + depth_method=modified_band_depth): + """Compute the trimmed means based on a depth measure. + + The trimmed means consists in computing the mean function without a + percentage of least deep curves. That is, we first remove the least deep + curves and then we compute the mean as usual. + + Note that in scipy the leftmost and rightmost proportiontocut data are + removed. In this case, as we order the data by the depth, we only remove + those that have the least depth values. + + Args: + fdatagrid (FDataGrid): Object containing different samples of a + functional variable. + proportiontocut (float): indicates the percentage of functions to + remove. It is not easy to determine as it varies from dataset to + dataset. + depth_method (:ref:`depth measure `, optional): + Method used to order the data. Defaults to :func:`modified + band depth `. + + Returns: + FDataGrid: object containing the computed trimmed mean. + + """ + n_samples_to_keep = (fdatagrid.n_samples - + int(fdatagrid.n_samples * proportiontocut)) + + # compute the depth of each curve and store the indexes in descending order + depth = depth_method(fdatagrid) + indices_descending_depth = (-depth).argsort(axis=0) + + trimmed_curves = fdatagrid[indices_descending_depth[:n_samples_to_keep]] + + return trimmed_curves.mean() diff --git a/skfda/exploratory/visualization/__init__.py b/skfda/exploratory/visualization/__init__.py index 8f135ae5f..838c653f2 100644 --- a/skfda/exploratory/visualization/__init__.py +++ b/skfda/exploratory/visualization/__init__.py @@ -1,3 +1,4 @@ from . import clustering, representation from ._boxplot import Boxplot, SurfaceBoxplot from ._magnitude_shape_plot import MagnitudeShapePlot +from .fpca import plot_fpca_perturbation_graphs diff --git a/skfda/exploratory/visualization/_boxplot.py b/skfda/exploratory/visualization/_boxplot.py index 67bf52609..90e1f0ab9 100644 --- a/skfda/exploratory/visualization/_boxplot.py +++ b/skfda/exploratory/visualization/_boxplot.py @@ -78,6 +78,8 @@ def plot(self, chart=None, *, fig=None, axes=None, def _repr_svg_(self): fig = self.plot() + plt.close(fig) + return _figure_to_svg(fig) @@ -96,7 +98,21 @@ class Boxplot(FDataBoxplot): detected in a functional boxplot by the 1.5 times the 50% central region empirical rule, analogous to the rule for classical boxplots. + Args: + + fdatagrid (FDataGrid): Object containing the data. + depth_method (:ref:`depth measure `, optional): + Method used to order the data. Defaults to :func:`modified + band depth + `. + prob (list of float, optional): List with float numbers (in the + range from 1 to 0) that indicate which central regions to + represent. + Defaults to [0.5] which represents the 50% central region. + factor (double): Number used to calculate the outlying envelope. + Attributes: + fdatagrid (FDataGrid): Object containing the data. median (array, (fdatagrid.dim_codomain, nsample_points)): contains the median/s. @@ -118,17 +134,35 @@ class Boxplot(FDataBoxplot): outside the box is plotted. If True, complete outling curves are plotted. - Example: + Representation in a Jupyter notebook: + + .. jupyter-execute:: + + from skfda.datasets import make_gaussian_process + from skfda.misc.covariances import Exponential + from skfda.exploratory.visualization import Boxplot + + fd = make_gaussian_process( + n_samples=20, cov=Exponential(), random_state=3) + + Boxplot(fd) + + + Examples: + Function :math:`f : \mathbb{R}\longmapsto\mathbb{R}`. >>> from skfda import FDataGrid + >>> from skfda.exploratory.visualization import Boxplot + >>> >>> data_matrix = [[1, 1, 2, 3, 2.5, 2], ... [0.5, 0.5, 1, 2, 1.5, 1], ... [-1, -1, -0.5, 1, 1, 0.5], ... [-0.5, -0.5, -0.5, -1, -1, -1]] >>> sample_points = [0, 2, 4, 6, 8, 10] - >>> fd = FDataGrid(data_matrix, sample_points, dataset_label="dataset", - ... axes_labels=["x_label", "y_label"]) + >>> fd = FDataGrid(data_matrix, sample_points, dataset_name="dataset", + ... argument_names=["x_label"], + ... coordinate_names=["y_label"]) >>> Boxplot(fd) Boxplot( FDataGrid=FDataGrid( @@ -158,12 +192,10 @@ class Boxplot(FDataBoxplot): [-1. ]]]), sample_points=[array([ 0, 2, 4, 6, 8, 10])], domain_range=array([[ 0, 10]]), - dataset_label='dataset', - axes_labels=['x_label', 'y_label'], - extrapolation=None, - interpolator=SplineInterpolator(interpolation_order=1, - smoothness_parameter=0.0, monotone=False), - keepdims=False), + dataset_name='dataset', + argument_names=('x_label',), + coordinate_names=('y_label',), + ...), median=array([[ 0.5], [ 0.5], [ 1. ], @@ -205,6 +237,13 @@ class Boxplot(FDataBoxplot): [ 1. ]]))], outliers=array([ True, False, False, True])) + References: + + Sun, Y., & Genton, M. G. (2011). Functional Boxplots. Journal of + Computational and Graphical Statistics, 20(2), 316-334. + https://doi.org/10.1198/jcgs.2011.09224 + + """ def __init__(self, fdatagrid, depth_method=modified_band_depth, prob=[0.5], @@ -422,7 +461,20 @@ class SurfaceBoxplot(FDataBoxplot): 50% central region, the median curve, and the maximum non-outlying envelope. + Args: + + fdatagrid (FDataGrid): Object containing the data. + method (:ref:`depth measure `, optional): Method + used to order the data. Defaults to :func:`modified band depth + `. + prob (list of float, optional): List with float numbers (in the + range from 1 to 0) that indicate which central regions to + represent. + Defaults to [0.5] which represents the 50% central region. + factor (double): Number used to calculate the outlying envelope. + Attributes: + fdatagrid (FDataGrid): Object containing the data. median (array, (fdatagrid.dim_codomain, lx, ly)): contains the median/s. @@ -436,7 +488,8 @@ class SurfaceBoxplot(FDataBoxplot): envelope. outcol (string): Color of the outlying envelope. - Example: + Examples: + Function :math:`f : \mathbb{R^2}\longmapsto\mathbb{R}`. >>> from skfda import FDataGrid @@ -445,8 +498,9 @@ class SurfaceBoxplot(FDataBoxplot): ... [[[2], [0.5], [2]], ... [[3], [0.6], [3]]]] >>> sample_points = [[2, 4], [3, 6, 8]] - >>> fd = FDataGrid(data_matrix, sample_points, dataset_label="dataset", - ... axes_labels=["x1_label", "x2_label", "y_label"]) + >>> fd = FDataGrid(data_matrix, sample_points, dataset_name="dataset", + ... argument_names=["x1_label", "x2_label"], + ... coordinate_names=["y_label"]) >>> SurfaceBoxplot(fd) SurfaceBoxplot( FDataGrid=FDataGrid( @@ -465,12 +519,11 @@ class SurfaceBoxplot(FDataBoxplot): sample_points=[array([2, 4]), array([3, 6, 8])], domain_range=array([[2, 4], [3, 8]]), - dataset_label='dataset', - axes_labels=['x1_label', 'x2_label', 'y_label'], + dataset_name='dataset', + argument_names=('x1_label', 'x2_label'), + coordinate_names=('y_label',), extrapolation=None, - interpolator=SplineInterpolator(interpolation_order=1, - smoothness_parameter=0.0, monotone=False), - keepdims=False), + ...), median=array([[[ 1. ], [ 0.7], [ 1. ]], @@ -502,6 +555,12 @@ class SurfaceBoxplot(FDataBoxplot): [ 0.4], [ 5. ]]]))) + References: + + Sun, Y., & Genton, M. G. (2011). Functional Boxplots. Journal of + Computational and Graphical Statistics, 20(2), 316-334. + https://doi.org/10.1198/jcgs.2011.09224 + """ def __init__(self, fdatagrid, method=modified_band_depth, factor=1.5): diff --git a/skfda/exploratory/visualization/_magnitude_shape_plot.py b/skfda/exploratory/visualization/_magnitude_shape_plot.py index 345e6457f..5b21f60f4 100644 --- a/skfda/exploratory/visualization/_magnitude_shape_plot.py +++ b/skfda/exploratory/visualization/_magnitude_shape_plot.py @@ -35,7 +35,39 @@ class MagnitudeShapePlot: The outliers are detected using an instance of :class:`DirectionalOutlierDetector`. + Args: + + fdatagrid (FDataGrid): Object containing the data. + depth_method (:ref:`depth measure `, optional): + Method used to order the data. Defaults to :func:`projection + depth `. + pointwise_weights (array_like, optional): an array containing the + weights of each points of discretisati on where values have + been recorded. + alpha (float, optional): Denotes the quantile to choose the cutoff + value for detecting outliers Defaults to 0.993, which is used + in the classical boxplot. + assume_centered (boolean, optional): If True, the support of the + robust location and the covariance estimates is computed, and a + covariance estimate is recomputed from it, without centering + the data. Useful to work with data whose mean is significantly + equal to zero but is not exactly zero. If False, default value, + the robust location and covariance are directly computed with + the FastMCD algorithm without additional treatment. + support_fraction (float, 0 < support_fraction < 1, optional): The + proportion of points to be included in the support of the + raw MCD estimate. + Default is None, which implies that the minimum value of + support_fraction will be used within the algorithm: + [n_sample + n_features + 1] / 2 + random_state (int, RandomState instance or None, optional): If int, + random_state is the seed used by the random number generator; + If RandomState instance, random_state is the random number + generator; If None, the random number generator is the + RandomState instance used by np.random. By default, it is 0. + Attributes: + fdatagrid (FDataGrid): Object to be visualized. depth_method (:ref:`depth measure `, optional): Method used to order the data. Defaults to :func:`modified band depth @@ -63,6 +95,19 @@ class MagnitudeShapePlot: variation of the directional outlyingness. title (string, optional): Title of the plot. defaults to 'MS-Plot'. + Representation in a Jupyter notebook: + + .. jupyter-execute:: + + from skfda.datasets import make_gaussian_process + from skfda.misc.covariances import Exponential + from skfda.exploratory.visualization import MagnitudeShapePlot + + fd = make_gaussian_process( + n_samples=20, cov=Exponential(), random_state=1) + + MagnitudeShapePlot(fd) + Example: >>> import skfda @@ -102,12 +147,7 @@ class MagnitudeShapePlot: [-1. ]]]), sample_points=[array([ 0, 2, 4, 6, 8, 10])], domain_range=array([[ 0, 10]]), - dataset_label=None, - axes_labels=None, - extrapolation=None, - interpolator=SplineInterpolator(interpolation_order=1, - smoothness_parameter=0.0, monotone=False), - keepdims=False), + ...), depth_method=projection_depth, pointwise_weights=None, alpha=0.993, @@ -122,6 +162,14 @@ class MagnitudeShapePlot: xlabel='MO', ylabel='VO', title='MS-Plot') + + References: + + Dai, W., & Genton, M. G. (2018). Multivariate Functional Data + Visualization and Outlier Detection. Journal of Computational + and Graphical Statistics, 27(4), 923-934. + https://doi.org/10.1080/10618600.2018.1473781 + """ def __init__(self, fdatagrid, **kwargs): @@ -283,4 +331,5 @@ def __repr__(self): def _repr_svg_(self): fig = self.plot() + plt.close(fig) return _figure_to_svg(fig) diff --git a/skfda/exploratory/visualization/_utils.py b/skfda/exploratory/visualization/_utils.py index 9fd0d6198..021f11832 100644 --- a/skfda/exploratory/visualization/_utils.py +++ b/skfda/exploratory/visualization/_utils.py @@ -1,5 +1,6 @@ import io import math +import re import matplotlib.axes import matplotlib.backends.backend_svg @@ -7,6 +8,14 @@ import matplotlib.pyplot as plt +non_close_text = '[^>]*?' +svg_width_regex = re.compile( + f'()') +svg_width_replacement = r'\g<1>100%\g<2>' +svg_height_regex = re.compile( + f'()') +svg_height_replacement = r'\g<1>\g<2>' + def _create_figure(): """Create figure using the default backend.""" @@ -24,7 +33,14 @@ def _figure_to_svg(figure): figure.savefig(output, format='svg') figure.set_canvas(old_canvas) data = output.getvalue() - return data.decode('utf-8') + decoded_data = data.decode('utf-8') + + new_data = svg_width_regex.sub( + svg_width_replacement, decoded_data, count=1) + new_data = svg_height_regex.sub( + svg_height_replacement, new_data, count=1) + + return new_data def _get_figure_and_axes(chart=None, fig=None, axes=None): @@ -192,8 +208,8 @@ def _set_labels(fdata, fig=None, axes=None, patches=None): """ # Dataset name - if fdata.dataset_label is not None: - fig.suptitle(fdata.dataset_label) + if fdata.dataset_name is not None: + fig.suptitle(fdata.dataset_name) # Legend if patches is not None: @@ -202,21 +218,20 @@ def _set_labels(fdata, fig=None, axes=None, patches=None): axes[0].legend(handles=patches) # Axis labels - if fdata.axes_labels is not None: - if axes[0].name == '3d': - for i in range(fdata.dim_codomain): - if fdata.axes_labels[0] is not None: - axes[i].set_xlabel(fdata.axes_labels[0]) - if fdata.axes_labels[1] is not None: - axes[i].set_ylabel(fdata.axes_labels[1]) - if fdata.axes_labels[i + 2] is not None: - axes[i].set_zlabel(fdata.axes_labels[i + 2]) - else: - for i in range(fdata.dim_codomain): - if fdata.axes_labels[0] is not None: - axes[i].set_xlabel(fdata.axes_labels[0]) - if fdata.axes_labels[i + 1] is not None: - axes[i].set_ylabel(fdata.axes_labels[i + 1]) + if axes[0].name == '3d': + for i in range(fdata.dim_codomain): + if fdata.argument_names[0] is not None: + axes[i].set_xlabel(fdata.argument_names[0]) + if fdata.argument_names[1] is not None: + axes[i].set_ylabel(fdata.argument_names[1]) + if fdata.coordinate_names[i] is not None: + axes[i].set_zlabel(fdata.coordinate_names[i]) + else: + for i in range(fdata.dim_codomain): + if fdata.argument_names[0] is not None: + axes[i].set_xlabel(fdata.argument_names[0]) + if fdata.coordinate_names[i] is not None: + axes[i].set_ylabel(fdata.coordinate_names[i]) def _change_luminosity(color, amount=0.5): diff --git a/skfda/exploratory/visualization/clustering.py b/skfda/exploratory/visualization/clustering.py index a266da294..c945e02ef 100644 --- a/skfda/exploratory/visualization/clustering.py +++ b/skfda/exploratory/visualization/clustering.py @@ -4,11 +4,12 @@ from mpldatacursor import datacursor from sklearn.exceptions import NotFittedError +from sklearn.utils.validation import check_is_fitted import matplotlib.patches as mpatches import matplotlib.pyplot as plt import numpy as np -from ...ml.clustering.base_kmeans import FuzzyKMeans +from ...ml.clustering import FuzzyCMeans from ._utils import (_darken, _get_figure_and_axes, _set_figure_layout_for_fdata, _set_figure_layout, _set_labels) @@ -249,12 +250,12 @@ def plot_clusters(estimator, X, chart=None, fig=None, axes=None, """ _check_if_estimator(estimator) try: - estimator._check_is_fitted() + check_is_fitted(estimator) estimator._check_test_data(X) except NotFittedError: estimator.fit(X) - if isinstance(estimator, FuzzyKMeans): + if isinstance(estimator, FuzzyCMeans): labels = np.argmax(estimator.labels_, axis=1) else: labels = estimator.labels_ @@ -355,11 +356,11 @@ def plot_cluster_lines(estimator, X, chart=None, fig=None, axes=None, fdata = X _check_if_estimator(estimator) - if not isinstance(estimator, FuzzyKMeans): - raise ValueError("The estimator must be a FuzzyKMeans object.") + if not isinstance(estimator, FuzzyCMeans): + raise ValueError("The estimator must be a FuzzyCMeans object.") try: - estimator._check_is_fitted() + check_is_fitted(estimator) estimator._check_test_data(X) except NotFittedError: estimator.fit(X) @@ -456,11 +457,11 @@ def plot_cluster_bars(estimator, X, chart=None, fig=None, axes=None, sort=-1, fdata = X _check_if_estimator(estimator) - if not isinstance(estimator, FuzzyKMeans): - raise ValueError("The estimator must be a FuzzyKMeans object.") + if not isinstance(estimator, FuzzyCMeans): + raise ValueError("The estimator must be a FuzzyCMeans object.") try: - estimator._check_is_fitted() + check_is_fitted(estimator) estimator._check_test_data(X) except NotFittedError: estimator.fit(X) diff --git a/skfda/exploratory/visualization/fpca.py b/skfda/exploratory/visualization/fpca.py new file mode 100644 index 000000000..5edbc7fa8 --- /dev/null +++ b/skfda/exploratory/visualization/fpca.py @@ -0,0 +1,79 @@ +from matplotlib import pyplot as plt +from skfda.representation import FDataGrid, FDataBasis, FData +from skfda.exploratory.visualization._utils import _get_figure_and_axes + + +def plot_fpca_perturbation_graphs(mean, components, multiple, + chart = None, + fig=None, + axes=None, + **kwargs): + """ Plots the perturbation graphs for the principal components. + The perturbations are defined as variations over the mean. Adding a multiple + of the principal component curve to the mean function results in the + positive perturbation and subtracting a multiple of the principal component + curve results in the negative perturbation. For each principal component + curve passed, a subplot with the mean and the perturbations is shown. + + Args: + mean (FDataGrid or FDataBasis): + the functional data object containing the mean function. + If len(mean) > 1, the mean is computed. + components (FDataGrid or FDataBasis): + the principal components + multiple (float): + multiple of the principal component curve to be added or + subtracted. + fig (figure object, optional): + figure over which the graph is plotted. If not specified it will + be initialized + axes (axes object, optional): axis over where the graph is plotted. + If None, see param fig. + + Returns: + (FDataGrid or FDataBasis): this contains the mean function followed + by the positive perturbation and the negative perturbation. + """ + + if len(mean) > 1: + mean = mean.mean() + + fig, axes = _get_figure_and_axes(chart, fig, axes) + + if not axes: + axes = fig.subplots(nrows=len(components)) + + for i in range(len(axes)): + aux = _get_component_perturbations(mean, components, i, multiple) + aux.plot(axes[i], **kwargs) + axes[i].set_title('Principal component ' + str(i + 1)) + + return fig + + +def _get_component_perturbations(mean, components, index=0, multiple=30): + """ Computes the perturbations over the mean function of a principal + component at a certain index. + + Args: + X (FDataGrid or FDataBasis): + the functional data object from which we obtain the mean + index (int): + index of the component for which we want to compute the + perturbations + multiple (float): + multiple of the principal component curve to be added or + subtracted. + + Returns: + (FDataGrid or FDataBasis): this contains the mean function followed + by the positive perturbation and the negative perturbation. + """ + if not isinstance(mean, FData): + raise AttributeError("X must be a FData object") + perturbations = mean.copy() + perturbations = perturbations.concatenate( + perturbations[0] + multiple * components[index]) + perturbations = perturbations.concatenate( + perturbations[0] - multiple * components[index]) + return perturbations diff --git a/skfda/exploratory/visualization/representation.py b/skfda/exploratory/visualization/representation.py index 18a6c6772..739fbbb57 100644 --- a/skfda/exploratory/visualization/representation.py +++ b/skfda/exploratory/visualization/representation.py @@ -9,50 +9,55 @@ _set_labels) -def _get_label_colors(n_labels, label_colors=None): +def _get_label_colors(n_labels, group_colors=None): """Get the colors of each label""" - if label_colors is not None: - if len(label_colors) != n_labels: - raise ValueError("There must be a color in label_colors " + if group_colors is not None: + if len(group_colors) != n_labels: + raise ValueError("There must be a color in group_colors " "for each of the labels that appear in " - "sample_labels.") + "group.") else: colormap = matplotlib.cm.get_cmap() - label_colors = colormap(np.arange(n_labels) / (n_labels - 1)) + group_colors = colormap(np.arange(n_labels) / (n_labels - 1)) - return label_colors + return group_colors -def _get_color_info(fdata, sample_labels, label_names, label_colors, kwargs): +def _get_color_info(fdata, group, group_names, group_colors, legend, kwargs): patches = None - if sample_labels is not None: + if group is not None: # In this case, each curve has a label, and all curves with the same # label should have the same color - sample_labels = np.asarray(sample_labels) + group_unique, group_indexes = np.unique(group, return_inverse=True) + n_labels = len(group_unique) - n_labels = np.max(sample_labels) + 1 + if group_colors is not None: + group_colors_array = np.array( + [group_colors[g] for g in group_unique]) + else: + prop_cycle = matplotlib.rcParams['axes.prop_cycle'] + cycle_colors = prop_cycle.by_key()['color'] + + group_colors_array = np.take( + cycle_colors, np.arange(n_labels), mode='wrap') - if np.any((sample_labels < 0) | (sample_labels >= n_labels)) or \ - not np.all(np.isin(range(n_labels), sample_labels)): - raise ValueError("Sample_labels must contain at least an " - "occurence of numbers between 0 and number " - "of distint sample labels.") + sample_colors = group_colors_array[group_indexes] - label_colors = _get_label_colors(n_labels, label_colors) - sample_colors = np.asarray(label_colors)[sample_labels] + group_names_array = None - if label_names is not None: - if len(label_names) != n_labels: - raise ValueError("There must be a name in label_names " - "for each of the labels that appear in " - "sample_labels.") + if group_names is not None: + group_names_array = np.array( + [group_names[g] for g in group_unique]) + elif legend is True: + group_names_array = group_unique + if group_names_array is not None: patches = [matplotlib.patches.Patch(color=c, label=l) - for c, l in zip(label_colors, label_names)] + for c, l in zip(group_colors_array, group_names_array)] else: # In this case, each curve has a different color unless specified @@ -72,10 +77,11 @@ def _get_color_info(fdata, sample_labels, label_names, label_colors, kwargs): return sample_colors, patches -def plot_graph(fdata, chart=None, *, derivative=0, fig=None, axes=None, +def plot_graph(fdata, chart=None, *, fig=None, axes=None, n_rows=None, n_cols=None, n_points=None, domain_range=None, - sample_labels=None, label_colors=None, label_names=None, + group=None, group_colors=None, group_names=None, + legend: bool = False, **kwargs): """Plot the FDatGrid object graph as hypersurfaces. @@ -87,10 +93,6 @@ def plot_graph(fdata, chart=None, *, derivative=0, fig=None, axes=None, with the graphs are plotted or axis over where the graphs are plotted. If None and ax is also None, the figure is initialized. - derivative (int or tuple, optional): Order of derivative to be - plotted. In case of surfaces a tuple with the order of - derivation in each direction can be passed. See - :func:`evaluate` to obtain more information. Defaults 0. fig (figure object, optional): figure over with the graphs are plotted in case ax is not specified. If None and ax is also None, the figure is initialized. @@ -115,17 +117,21 @@ def plot_graph(fdata, chart=None, *, derivative=0, fig=None, axes=None, interval; in the case of surfaces a list with 2 tuples with the ranges for each dimension. Default uses the domain range of the functional object. - sample_labels (list of int): contains integers from [0 to number of + group (list of int): contains integers from [0 to number of labels) indicating to which group each sample belongs to. Then, the samples with the same label are plotted in the same color. If None, the default value, each sample is plotted in the color assigned by matplotlib.pyplot.rcParams['axes.prop_cycle']. - label_colors (list of colors): colors in which groups are + group_colors (list of colors): colors in which groups are represented, there must be one for each group. If None, each group is shown with distict colors in the "Greys" colormap. - label_names (list of str): name of each of the groups which appear + group_names (list of str): name of each of the groups which appear in a legend, there must be one for each one. Defaults to None - and the legend is not shown. + and the legend is not shown. Implies `legend=True`. + legend (bool): if `True`, show a legend with the groups. If + `group_names` is passed, it will be used for finding the names + to display in the legend. Otherwise, the values passed to + `group` will be used. **kwargs: if dim_domain is 1, keyword arguments to be passed to the matplotlib.pyplot.plot function; if dim_domain is 2, keyword arguments to be passed to the @@ -145,7 +151,7 @@ def plot_graph(fdata, chart=None, *, derivative=0, fig=None, axes=None, domain_range = _list_of_arrays(domain_range) sample_colors, patches = _get_color_info( - fdata, sample_labels, label_names, label_colors, kwargs) + fdata, group, group_names, group_colors, legend, kwargs) if fdata.dim_domain == 1: @@ -154,7 +160,7 @@ def plot_graph(fdata, chart=None, *, derivative=0, fig=None, axes=None, # Evaluates the object in a linspace eval_points = np.linspace(*domain_range[0], n_points) - mat = fdata(eval_points, derivative=derivative, keepdims=True) + mat = fdata(eval_points) color_dict = {} @@ -183,7 +189,7 @@ def plot_graph(fdata, chart=None, *, derivative=0, fig=None, axes=None, y = np.linspace(*domain_range[1], npoints[1]) # Evaluation of the functional object - Z = fdata((x, y), derivative=derivative, grid=True, keepdims=True) + Z = fdata((x, y), grid=True) X, Y = np.meshgrid(x, y, indexing='ij') @@ -203,10 +209,11 @@ def plot_graph(fdata, chart=None, *, derivative=0, fig=None, axes=None, return fig -def plot_scatter(fdata, chart=None, *, sample_points=None, derivative=0, +def plot_scatter(fdata, chart=None, *, sample_points=None, fig=None, axes=None, - n_rows=None, n_cols=None, n_points=None, domain_range=None, - sample_labels=None, label_colors=None, label_names=None, + n_rows=None, n_cols=None, domain_range=None, + group=None, group_colors=None, group_names=None, + legend: bool = False, **kwargs): """Plot the FDatGrid object. @@ -216,10 +223,6 @@ def plot_scatter(fdata, chart=None, *, sample_points=None, derivative=0, plotted. If None and ax is also None, the figure is initialized. sample_points (ndarray): points to plot. - derivative (int or tuple, optional): Order of derivative to be - plotted. In case of surfaces a tuple with the order of - derivation in each direction can be passed. See - :func:`evaluate` to obtain more information. Defaults 0. fig (figure object, optional): figure over with the graphs are plotted in case ax is not specified. If None and ax is also None, the figure is initialized. @@ -231,30 +234,27 @@ def plot_scatter(fdata, chart=None, *, sample_points=None, derivative=0, n_cols(int, optional): designates the number of columns of the figure to plot the different dimensions of the image. Only specified if fig and ax are None. - n_points (int or tuple, optional): Number of points to evaluate in - the plot. In case of surfaces a tuple of length 2 can be pased - with the number of points to plot in each axis, otherwise the - same number of points will be used in the two axes. By default - in unidimensional plots will be used 501 points; in surfaces - will be used 30 points per axis, wich makes a grid with 900 - points. domain_range (tuple or list of tuples, optional): Range where the function will be plotted. In objects with unidimensional domain the domain range should be a tuple with the bounds of the interval; in the case of surfaces a list with 2 tuples with the ranges for each dimension. Default uses the domain range of the functional object. - sample_labels (list of int): contains integers from [0 to number of + group (list of int): contains integers from [0 to number of labels) indicating to which group each sample belongs to. Then, the samples with the same label are plotted in the same color. If None, the default value, each sample is plotted in the color assigned by matplotlib.pyplot.rcParams['axes.prop_cycle']. - label_colors (list of colors): colors in which groups are + group_colors (list of colors): colors in which groups are represented, there must be one for each group. If None, each group is shown with distict colors in the "Greys" colormap. - label_names (list of str): name of each of the groups which appear + group_names (list of str): name of each of the groups which appear in a legend, there must be one for each one. Defaults to None - and the legend is not shown. + and the legend is not shown. Implies `legend=True`. + legend (bool): if `True`, show a legend with the groups. If + `group_names` is passed, it will be used for finding the names + to display in the legend. Otherwise, the values passed to + `group` will be used. **kwargs: if dim_domain is 1, keyword arguments to be passed to the matplotlib.pyplot.plot function; if dim_domain is 2, keyword arguments to be passed to the @@ -265,12 +265,16 @@ def plot_scatter(fdata, chart=None, *, sample_points=None, derivative=0, """ + evaluated_points = None + if sample_points is None: # This can only be done for FDataGrid sample_points = fdata.sample_points evaluated_points = fdata.data_matrix - else: - evaluated_points = fdata(sample_points, grid=True) + + if evaluated_points is None: + evaluated_points = fdata( + sample_points, grid=True) fig, axes = _get_figure_and_axes(chart, fig, axes) fig, axes = _set_figure_layout_for_fdata(fdata, fig, axes, n_rows, n_cols) @@ -281,7 +285,7 @@ def plot_scatter(fdata, chart=None, *, sample_points=None, derivative=0, domain_range = _list_of_arrays(domain_range) sample_colors, patches = _get_color_info( - fdata, sample_labels, label_names, label_colors, kwargs) + fdata, group, group_names, group_colors, legend, kwargs) if fdata.dim_domain == 1: diff --git a/skfda/inference/__init__.py b/skfda/inference/__init__.py index e69de29bb..73a2e789d 100644 --- a/skfda/inference/__init__.py +++ b/skfda/inference/__init__.py @@ -0,0 +1 @@ +from . import anova, hotelling diff --git a/skfda/inference/anova/__init__.py b/skfda/inference/anova/__init__.py new file mode 100644 index 000000000..516031100 --- /dev/null +++ b/skfda/inference/anova/__init__.py @@ -0,0 +1,2 @@ +from . import anova_oneway +from .anova_oneway import v_sample_stat, v_asymptotic_stat, oneway_anova diff --git a/skfda/inference/anova/anova_oneway.py b/skfda/inference/anova/anova_oneway.py new file mode 100644 index 000000000..9432f4daa --- /dev/null +++ b/skfda/inference/anova/anova_oneway.py @@ -0,0 +1,335 @@ +import numpy as np +from sklearn.utils import check_random_state + +from skfda import concatenate +from skfda.misc.metrics import lp_distance +from skfda.representation import FData, FDataGrid +from skfda.datasets import make_gaussian_process + + +def v_sample_stat(fd, weights, p=2): + r""" + Calculates a statistic that measures the variability between groups of + samples in a :class:`skfda.representation.FData` object. + + The statistic defined as below is calculated between all the samples in a + :class:`skfda.representation.FData` object with a given set of + weights. + + Let :math:`\{f_i\}_{i=1}^k` be a set of samples in a FData object. + Let :math:`\{w_j\}_{j=1}^k` be a set of weights, where :math:`w_i` is + related to the sample :math:`f_i` for :math:`i=1,\dots,k`. + The statistic is defined as: + + .. math:: + V_n = \sum_{i>> from skfda.inference.anova import v_sample_stat + >>> from skfda.representation.grid import FDataGrid + >>> import numpy as np + + We create different trajectories to be applied in the statistic and a + set of weights. + + >>> t = np.linspace(0, 1, 50) + >>> x1 = t * (1 - t) ** 5 + >>> x2 = t ** 2 * (1 - t) ** 4 + >>> x3 = t ** 3 * (1 - t) ** 3 + >>> fd = FDataGrid([x1, x2, x3], sample_points=t) + >>> weights = [10, 20, 30] + + Finally the value of the statistic is calculated: + + >>> v_sample_stat(fd, weights) + 0.01649448843348894 + + References: + [1] Antonio Cuevas, Manuel Febrero-Bande, and Ricardo Fraiman. "An + anova test for functional data". *Computational Statistics Data + Analysis*, 47:111-112, 02 2004 + """ + + weights = np.asarray(weights) + if not isinstance(fd, FData): + raise ValueError("Argument type must inherit FData.") + if len(weights) != fd.n_samples: + raise ValueError("Number of weights must match number of samples.") + + t_ind = np.tril_indices(fd.n_samples, -1) + coef = weights[t_ind[1]] + return np.sum(coef * lp_distance(fd[t_ind[0]], fd[t_ind[1]], p=p) ** p) + + +def v_asymptotic_stat(fd, weights, p=2): + r""" + Calculates a statistic that measures the variability between groups of + samples in a :class:`skfda.representation.FData` object. + + The statistic defined as below is calculated between all the samples in a + :class:`skfda.representation.FData` object with a given set of + weights. + + Let :math:`\{f_i\}_{i=1}^k` be a set of samples in a FData object. + Let :math:`\{w_j\}_{j=1}^k` be a set of weights, where :math:`w_i` is + related to the sample :math:`f_i` for :math:`i=1,\dots,k`. + The statistic is defined as: + + .. math:: + \sum_{i>> from skfda.inference.anova import v_asymptotic_stat + >>> from skfda.representation.grid import FDataGrid + >>> import numpy as np + + We create different trajectories to be applied in the statistic and a + set of weights. + + >>> t = np.linspace(0, 1, 50) + >>> x1 = t * (1 - t) ** 5 + >>> x2 = t ** 2 * (1 - t) ** 4 + >>> x3 = t ** 3 * (1 - t) ** 3 + >>> fd = FDataGrid([x1, x2, x3], sample_points=t) + >>> weights = [10, 20, 30] + + Finally the value of the statistic is calculated: + + >>> v_asymptotic_stat(fd, weights) + 0.0018159320335885969 + + References: + [1] Antonio Cuevas, Manuel Febrero-Bande, and Ricardo Fraiman. "An + anova test for functional data". *Computational Statistics Data + Analysis*, 47:111-112, 02 2004 + """ + weights = np.asarray(weights) + if not isinstance(fd, FData): + raise ValueError("Argument type must inherit FData.") + if len(weights) != fd.n_samples: + raise ValueError("Number of weights must match number of samples.") + if np.count_nonzero(weights) != len(weights): + raise ValueError("All weights must be non-zero.") + + t_ind = np.tril_indices(fd.n_samples, -1) + coef = np.sqrt(weights[t_ind[1]] / weights[t_ind[0]]) + left_fd = fd[t_ind[1]] + if isinstance(fd, FDataGrid): + right_fd = coef[:, None, np.newaxis] * fd[t_ind[0]] + else: + right_fd = fd[t_ind[0]].times(coef) + return np.sum(lp_distance(left_fd, right_fd, p=p) ** p) + + +def _anova_bootstrap(fd_grouped, n_reps, random_state=None, p=2, + equal_var=True): + + n_groups = len(fd_grouped) + if n_groups < 2: + raise ValueError("At least two groups must be passed in fd_grouped.") + + for fd in fd_grouped[1:]: + if not np.array_equal(fd.domain_range, fd_grouped[0].domain_range): + raise ValueError("Domain range must match for every FData in " + "fd_grouped.") + + start, stop = fd_grouped[0].domain_range[0] + + sizes = [fd.n_samples for fd in fd_grouped] # List with sizes of each group + + # Instance a random state object in case random_state is an int + random_state = check_random_state(random_state) + + if equal_var: + k_est = concatenate(fd_grouped).cov().data_matrix[0, ..., 0] + k_est = [k_est] * len(fd_grouped) + else: + # Estimating covariances for each group + k_est = [fd.cov().data_matrix[0, ..., 0] for fd in fd_grouped] + + # Number of sample points for gaussian processes have to match + # the features of the covariances. + n_features = k_est[0].shape[0] + + # Simulating n_reps observations for each of the n_groups gaussian + # processes + sim = [make_gaussian_process(n_reps, n_features=n_features, start=start, + stop=stop, cov=k_est[i], + random_state=random_state) + for i in range(n_groups)] + + v_samples = np.empty(n_reps) + for i in range(n_reps): + fd = FDataGrid([s.data_matrix[i, ..., 0] for s in sim]) + v_samples[i] = v_asymptotic_stat(fd, sizes, p=p) + return v_samples + + +def oneway_anova(*args, n_reps=2000, return_dist=False, random_state=None, + p=2, equal_var=True): + r""" + Performs one-way functional ANOVA. + + This function implements an asymptotic method to test the following + null hypothesis: + + Let :math:`\{X_i\}_{i=1}^k` be a set of :math:`k` independent samples + each one with :math:`n_i` trajectories, and let :math:`E(X_i) = m_i( + t)`. The null hypothesis is defined as: + + .. math:: + H_0: m_1(t) = \dots = m_k(t) + + This function calculates the value of the statistic + :func:`~skfda.inference.anova.v_sample_stat` :math:`V_n` with the means + of the given samples. Under the null hypothesis this statistic is + asymptotically equivalent to + :func:`~skfda.inference.anova.v_asymptotic_stat`, where each sample + is replaced by a gaussian process, with mean zero and the same + covariance function as the original. + + The simulation of the distribution of the asymptotic statistic :math:`V` is + implemented using a bootstrap procedure. One observation of the + :math:`k` different gaussian processes defined above is simulated, + and the value of :func:`~skfda.inference.anova.v_asymptotic_stat` is + calculated. This procedure is repeated `n_reps` times, creating a + sampling distribution of the statistic. + + This procedure is from Cuevas[1]. + + Args: + fd1,fd2,.... (FDataGrid): The sample measurements for each each group. + + n_reps (int, optional): Number of simulations for the bootstrap + procedure. Defaults to 2000 (This value may change in future + versions). + + return_dist (bool, optional): Flag to indicate if the function should + return a numpy.array with the sampling distribution simulated. + + random_state (optional): Random state. + + p (int, optional): p of the lp norm. Must be greater or equal + than 1. If p='inf' or p=np.inf it is used the L infinity metric. + Defaults to 2. + + equal_var (bool, optional): If True (default), perform a One-way + ANOVA assuming the same covariance operator for all the groups, + else considers an independent covariance operator for each group. + + Returns: + Value of the sample statistic, p-value and sampling distribution of + the simulated asymptotic statistic. + + Return type: + (float, float, numpy.array) + + Raises: + ValueError: In case of bad arguments. + + Examples: + >>> from skfda.inference.anova import oneway_anova + >>> from skfda.datasets import fetch_gait + >>> from numpy.random import RandomState + >>> from numpy import printoptions + + >>> fd = fetch_gait()["data"].coordinates[1] + >>> fd1, fd2, fd3 = fd[:13], fd[13:26], fd[26:] + >>> oneway_anova(fd1, fd2, fd3, random_state=RandomState(42)) + (179.52499999999998, 0.5945) + >>> _, _, dist = oneway_anova(fd1, fd2, fd3, n_reps=3, + ... random_state=RandomState(42), + ... return_dist=True) + >>> with printoptions(precision=4): + ... print(dist) + [ 184.0698 212.7395 195.3663] + + References: + [1] Antonio Cuevas, Manuel Febrero-Bande, and Ricardo Fraiman. "An + anova test for functional data". *Computational Statistics Data + Analysis*, 47:111-112, 02 2004 + """ + + if len(args) < 2: + raise ValueError("At least two groups must be passed as parameter.") + if not all(isinstance(fd, FData) for fd in args): + raise ValueError("Argument type must inherit FData.") + if n_reps < 1: + raise ValueError("Number of simulations must be positive.") + + fd_groups = args + if not all([isinstance(fd, type(fd_groups[0])) for fd in fd_groups[1:]]): + raise TypeError('Found mixed FData types in arguments.') + + for fd in fd_groups[1:]: + if not np.array_equal(fd.domain_range, fd_groups[0].domain_range): + raise ValueError("Domain range must match for every FData passed.") + + if isinstance(fd_groups[0], FDataGrid): + # Creating list with all the sample points + list_sample = [fd.sample_points[0].tolist() for fd in fd_groups] + # Checking that the all the entries in the list are the same + if not list_sample.count(list_sample[0]) == len(list_sample): + raise ValueError("All FDataGrid passed must have the same sample " + "points.") + else: # If type is FDataBasis, check same basis + list_basis = [fd.basis for fd in fd_groups] + if not list_basis.count(list_basis[0]) == len(list_basis): + raise NotImplementedError("Not implemented for FDataBasis with " + "different basis.") + + # FData where each sample is the mean of each group + fd_means = concatenate([fd.mean() for fd in fd_groups]) + + # Base statistic + vn = v_sample_stat(fd_means, [fd.n_samples for fd in fd_groups], p=p) + + # Computing sampling distribution + simulation = _anova_bootstrap(fd_groups, n_reps, + random_state=random_state, p=p, + equal_var=equal_var) + + p_value = np.sum(simulation > vn) / len(simulation) + + if return_dist: + return vn, p_value, simulation + + return vn, p_value diff --git a/skfda/inference/hotelling/__init__.py b/skfda/inference/hotelling/__init__.py new file mode 100644 index 000000000..6498f54bc --- /dev/null +++ b/skfda/inference/hotelling/__init__.py @@ -0,0 +1,2 @@ +from . import hotelling +from .hotelling import hotelling_t2, hotelling_test_ind diff --git a/skfda/inference/hotelling/hotelling.py b/skfda/inference/hotelling/hotelling.py new file mode 100644 index 000000000..f5fde264a --- /dev/null +++ b/skfda/inference/hotelling/hotelling.py @@ -0,0 +1,206 @@ +from skfda.representation import FDataBasis, FData +import numpy as np +import itertools +import scipy +from sklearn.utils import check_random_state + + +def hotelling_t2(fd1, fd2): + r""" + Calculates Hotelling's :math:`T^2` over two samples in + :class:`skfda.representation.FData` objects with sizes :math:`n_1` + and :math:`n_2`. + + .. math:: + T^2 = n(\mathbf{m}_1 - \mathbf{m}_2)^\top \mathbf{W}^{1/2}( + \mathbf{W}^{1/2}\mathbf{K_{\operatorname{pooled}}} \mathbf{W}^{ + 1/2})^+ + \mathbf{W}^{1/2} (\mathbf{m}_1 - \mathbf{m}_2), + + where :math:`(\cdot)^{+}` indicates the Moore-Penrose pseudo-inverse + operator, :math:`n=n_1+n_2`, `W` is Gram matrix (identity in case of + discretized data), :math:`\mathbf{m}_1, \mathbf{m}_2` are the + means of each ample and :math:`\mathbf{K}_{\operatorname{pooled}}` + matrix is defined as + + .. math:: + \mathbf{K}_{\operatorname{pooled}} := + \cfrac{n_1 - 1}{n_1 + n_2 - 2} \mathbf{K}_{n_1} + + \cfrac{n_2 - 1}{n_1 + n_2 - 2} \mathbf{K}_{n_2}, + + where :math:`\mathbf{K}_{n_1}`, :math:`\mathbf{K}_{n_2}` are the sample + covariance matrices, computed with the basis coefficients or using + the discrete representation, depending on the input. + + This statistic is defined in Pini, Stamm and Vantini[1]. + + Args: + fd1 (FData): Object with the first sample. + fd2 (FData): Object containing second sample. + + Returns: + The value of the statistic. + + Raises: + TypeError. + + Examples: + + >>> from skfda.inference.hotelling import hotelling_t2 + >>> from skfda.representation import FDataGrid, basis + + >>> fd1 = FDataGrid([[1, 1, 1], [3, 3, 3]]) + >>> fd2 = FDataGrid([[3, 3, 3], [5, 5, 5]]) + >>> '%.2f' % hotelling_t2(fd1, fd2) + '2.00' + >>> fd1 = fd1.to_basis(basis.Fourier(n_basis=3)) + >>> fd2 = fd2.to_basis(basis.Fourier(n_basis=3)) + >>> '%.2f' % hotelling_t2(fd1, fd2) + '2.00' + + References: + [1] A. Pini, A. Stamm and S. Vantini, "Hotelling's t2 in + separable hilbert spaces", *Jounal of Multivariate Analysis*, + 167 (2018), pp.284-305. + + """ + if not isinstance(fd1, FData): + raise TypeError("Argument type must inherit FData.") + + if not isinstance(fd2, type(fd1)): + raise TypeError("Both samples must be instances of the same type.") + + n1, n2 = fd1.n_samples, fd2.n_samples # Size of each sample + n = n1 + n2 # Size of full sample + m = fd1.mean() - fd2.mean() # Delta mean + + if isinstance(fd1, FDataBasis): + if fd1.basis != fd2.basis: + raise ValueError("Both FDataBasis objects must share the same " + "basis.") + # When working on basis representation we use the coefficients + m = m.coefficients[0] + k1 = np.cov(fd1.coefficients, rowvar=False) + k2 = np.cov(fd2.coefficients, rowvar=False) + # If no weight matrix is passed, then we compute the Gram Matrix + weights = fd1.basis.gram_matrix() + weights = np.sqrt(weights) + else: + # Working with standard discretized data + m = m.data_matrix[0, ..., 0] + k1 = fd1.cov().data_matrix[0, ..., 0] + k2 = fd2.cov().data_matrix[0, ..., 0] + + m = m.reshape((-1, 1)) # Reshaping the mean for a proper matrix product + k_pool = ((n1 - 1) * k1 + (n2 - 1) * k2) / (n - 2) # Combination of covs + + if isinstance(fd1, FDataBasis): + # Product of pooled covariance with the weights and Moore-Penrose inv. + k_inv = np.linalg.pinv(np.linalg.multi_dot([weights, k_pool, weights])) + k_inv = weights.dot(k_inv).dot(weights) + else: + # If data is discrete no weights are needed + k_inv = np.linalg.pinv(k_pool) + + return n1 * n2 / n * m.T.dot(k_inv).dot(m)[0][0] + + +def hotelling_test_ind(fd1, fd2, *, n_reps=None, random_state=None, + return_dist=False): + r""" + Calculate the :math:`T^2`-test for the means of two independent samples of + functional data. + + This is a two-sided test for the null hypothesis that 2 independent samples + have identical average (expected) values. This test assumes that the + populations have identical variances by default. + + The p-value of the test is calculated using a permutation test over the + statistic :func:`~skfda.inference.hotelling.hotelling_t2`. If a maximum + number of repetitions of the algorithm is provided then the permutations + tested are generated randomly. + + This procedure is from Pini, Stamm and Vantinni[1]. + + Args: + fd1,fd2 (FData): Samples of data. The FData objects must have the same + type. + + n_reps (int, optional): Maximum number of repetitions to compute + p-value. Default value is None. + + random_state (optional): Random state. + + return_dist (bool, optional): Flag to indicate if the function should + return a numpy.array with the values of the statistic computed over + each permutation. + + + Returns: + Value of the sample statistic, one tailed p-value and a collection of + statistic values from permutations of the sample. + + Return type: + (float, float, numpy.array) + + Raises: + TypeError: In case of bad arguments. + + Examples: + >>> from skfda.inference.hotelling import hotelling_t2 + >>> from skfda.representation import FDataGrid, basis + >>> from numpy import printoptions + + >>> fd1 = FDataGrid([[1, 1, 1], [3, 3, 3]]) + >>> fd2 = FDataGrid([[3, 3, 3], [5, 5, 5]]) + >>> t2n, pval, dist = hotelling_test_ind(fd1, fd2, return_dist=True) + >>> '%.2f' % t2n + '2.00' + >>> '%.2f' % pval + '0.00' + >>> with printoptions(precision=4): + ... print(dist) + [ 2. 2. 0. 0. 2. 2.] + + References: + [1] A. Pini, A. Stamm and S. Vantini, "Hotelling's t2 in + separable hilbert spaces", *Jounal of Multivariate Analysis*, + 167 (2018), pp.284-305. + """ + if not isinstance(fd1, FData): + raise TypeError("Argument type must inherit FData.") + + if not isinstance(fd2, type(fd1)): + raise TypeError("Both samples must be instances of the same type.") + + if n_reps is not None and n_reps < 1: + raise ValueError("Number of repetitions must be positive.") + + n1, n2 = fd1.n_samples, fd2.n_samples + t2_0 = hotelling_t2(fd1, fd2) + n = n1 + n2 + sample = fd1.concatenate(fd2) + indices = np.arange(n) + + if n_reps is not None: # Computing n_reps random permutations + random_state = check_random_state(random_state) + dist = np.empty(n_reps) + for i in range(n_reps): + random_state.shuffle(indices) + dist[i] = hotelling_t2(sample[indices[:n1]], sample[indices[n1:]]) + + else: # Full permutation test + combinations = itertools.combinations(indices, n1) + dist = np.empty(int(scipy.special.comb(n, n1))) + for i, comb in enumerate(combinations): + sample1_i = np.asarray(comb) # Comb is a selection of n1 indices + sample2_i = np.setdiff1d(indices, sample1_i) # Remaining n2 ind. + sample1, sample2 = sample[sample1_i], sample[sample2_i] + dist[i] = hotelling_t2(sample1, sample2) + + p_value = np.sum(dist > t2_0) / len(dist) + + if return_dist: + return t2_0, p_value, dist + + return t2_0, p_value diff --git a/skfda/misc/__init__.py b/skfda/misc/__init__.py index f06f03ee0..f3ef87ad1 100644 --- a/skfda/misc/__init__.py +++ b/skfda/misc/__init__.py @@ -1,3 +1,5 @@ -from ._math import log, log2, log10, exp, sqrt, cumsum, inner_product from . import covariances, kernels, metrics -from ._lfd import LinearDifferentialOperator +from . import operators +from . import regularization +from ._math import (log, log2, log10, exp, sqrt, cumsum, + inner_product, inner_product_matrix) diff --git a/skfda/misc/_lfd.py b/skfda/misc/_lfd.py deleted file mode 100644 index 80e1d2308..000000000 --- a/skfda/misc/_lfd.py +++ /dev/null @@ -1,103 +0,0 @@ -import numpy as np - - -__author__ = "Pablo Pérez Manso" -__email__ = "92manso@gmail.com" - - -class LinearDifferentialOperator: - """Defines the structure of a linear differential operator function system - - .. math:: - Lx(t) = b_0(t) x(t) + b_1(t) x'(x) + - \\dots + b_{n-1}(t) d^{n-1}(x(t)) + b_n(t) d^n(x(t)) - - Attributes: - order (int): the order of the operator. It's the n coefficient in the - equation above. - - weights (list): A FDataBasis objects list of length order + 1 - - """ - - def __init__(self, order=None, weights=None, domain_range=(0, 1)): - """Lfd Constructor. You have to provide one of the two first - parameters. It both are provided, it will raise an error - - Args: - order (int, optional): the order of the operator. It's the highest - derivative order of the operator - - weights (list, optional): A FDataBasis objects list of length - order + 1 items - - domain_range (tuple or list of tuples, optional): Definition - of the interval where the weight functions are - defined. Defaults to (0,1). - """ - - from ..representation.basis import (FDataBasis, Constant, - _same_domain) - - if order is not None and weights is not None: - raise ValueError("You have to provide the order or the weights, " - "not both") - - self.domain_range = domain_range - - if order is None and weights is None: - self.order = 0 - self.weights = [] - - elif weights is None: - if order < 0: - raise ValueError("Order should be an non-negative integer") - - self.order = order - self.weights = [ - FDataBasis(Constant(domain_range), 0 if (i < order) else 1) for - i in range(order + 1)] - - else: - if len(weights) == 0: - raise ValueError("You have to provide one weight at least") - - if all(isinstance(n, int) for n in weights): - self.order = len(weights) - 1 - self.weights = (FDataBasis(Constant(domain_range), - np.array(weights) - .reshape(-1, 1)).to_list()) - - elif all(isinstance(n, FDataBasis) for n in weights): - if all([_same_domain(weights[0].domain_range, - x.domain_range) and x.n_samples == 1 for x - in weights]): - self.order = len(weights) - 1 - self.weights = weights - self.domain_range = weights[0].domain_range - - else: - raise ValueError("FDataBasis objects in the list has " - "not the same domain_range") - - else: - raise ValueError("The elements of the list are neither " - "integers or FDataBasis objects") - - def __repr__(self): - """Representation of Lfd object.""" - - bwtliststr = "" - for i in range(self.order + 1): - bwtliststr = bwtliststr + "\n" + self.weights[i].__repr__() + "," - - return (f"{self.__class__.__name__}(" - f"\nnderiv={self.order}," - f"\nbwtlist=[{bwtliststr[:-1]}]" - f"\n)").replace('\n', '\n ') - - def __eq__(self, other): - """Equality of Lfd objects""" - return (self.order == other.nderic and - all(self.weights[i] == other.bwtlist[i] - for i in range(self.order))) diff --git a/skfda/misc/_math.py b/skfda/misc/_math.py index 22cd635fc..fbf9b5af9 100644 --- a/skfda/misc/_math.py +++ b/skfda/misc/_math.py @@ -4,10 +4,18 @@ package. FDataBasis and FDataGrid. """ +from builtins import isinstance +from typing import Union + +import multimethod import scipy.integrate import numpy as np +from .._utils import _same_domain, nquad_vec, _pairwise_commutative +from ..representation import FDataGrid, FDataBasis +from ..representation.basis import Basis + __author__ = "Miguel Carbajo Berrocal" __license__ = "GPL3" @@ -135,68 +143,211 @@ def cumsum(fdatagrid): axis=0)) -def inner_product(fdatagrid, fdatagrid2): - r"""Return inner product for FDataGrid. +@multimethod.multidispatch +def inner_product(arg1, arg2, **kwargs): + r"""Return the usual (:math:`L_2`) inner product. - Calculates the inner product amongst all the samples in two + Calculates the inner product between matching samples in two FDataGrid objects. - For each pair of samples f and g the inner product is defined as: + For two samples x and y the inner product is defined as: + + .. math:: + = \sum_i x_i y_i + + for multivariate data and .. math:: - = \int_a^bf(x)g(x)dx + = \int_a^b x(t)y(t)dt + + for functional data. - The integral is approximated using Simpson's rule. + The two arguments must have the same number of samples, or one should + contain only one sample (and will be broadcasted). Args: - fdatagrid (FDataGrid): First FDataGrid object. - fdatagrid2 (FDataGrid): Second FDataGrid object. + + arg1: First sample. + arg2: Second sample. Returns: - numpy.darray: Matrix with as many rows as samples in the first - object and as many columns as samples in the second one. Each - element (i, j) of the matrix is the inner product of the ith sample - of the first object and the jth sample of the second one. + + numpy.darray: Vector with the inner products of each pair of + samples. Examples: + + This function can compute the multivariate inner product. + + >>> import numpy as np + >>> from skfda.misc import inner_product + >>> + >>> array1 = np.array([1, 2, 3]) + >>> array2 = np.array([4, 5, 6]) + >>> inner_product(array1, array2) + 32 + + If the arrays contain more than one sample + + >>> array1 = np.array([[1, 2, 3], [2, 3, 4]]) + >>> array2 = np.array([[4, 5, 6], [1, 1, 1]]) + >>> inner_product(array1, array2) + array([32, 9]) + The inner product of the :math:'f(x) = x` and the constant :math:`y=1` defined over the interval [0,1] is the area of the triangle delimited by the the lines y = 0, x = 1 and y = x; 0.5. >>> import skfda - >>> x = np.linspace(0,1,1001) + >>> + >>> x = np.linspace(0,1,1000) + >>> >>> fd1 = skfda.FDataGrid(x,x) >>> fd2 = skfda.FDataGrid(np.ones(len(x)),x) >>> inner_product(fd1, fd2) - array([[ 0.5]]) + array([ 0.5]) If the FDataGrid object contains more than one sample >>> fd1 = skfda.FDataGrid([x, np.ones(len(x))], x) >>> fd2 = skfda.FDataGrid([np.ones(len(x)), x] ,x) >>> inner_product(fd1, fd2).round(2) - array([[ 0.5 , 0.33], - [ 1. , 0.5 ]]) + array([ 0.5, 0.5]) + + If one argument contains only one sample it is + broadcasted. + + >>> fd1 = skfda.FDataGrid([x, np.ones(len(x))], x) + >>> fd2 = skfda.FDataGrid([np.ones(len(x))] ,x) + >>> inner_product(fd1, fd2).round(2) + array([ 0.5, 1. ]) + + It also work with basis objects + + >>> basis = skfda.representation.basis.Monomial(n_basis=3) + >>> + >>> fd1 = skfda.FDataBasis(basis, [0, 1, 0]) + >>> fd2 = skfda.FDataBasis(basis, [1, 0, 0]) + >>> inner_product(fd1, fd2) + array([ 0.5]) + + >>> basis = skfda.representation.basis.Monomial(n_basis=3) + >>> + >>> fd1 = skfda.FDataBasis(basis, [[0, 1, 0], [0, 0, 1]]) + >>> fd2 = skfda.FDataBasis(basis, [1, 0, 0]) + >>> inner_product(fd1, fd2) + array([ 0.5 , 0.33333333]) + + >>> basis = skfda.representation.basis.Monomial(n_basis=3) + >>> + >>> fd1 = skfda.FDataBasis(basis, [[0, 1, 0], [0, 0, 1]]) + >>> fd2 = skfda.FDataBasis(basis, [[1, 0, 0], [0, 1, 0]]) + >>> inner_product(fd1, fd2) + array([ 0.5 , 0.25]) """ - if fdatagrid.dim_domain != 1: - raise NotImplementedError("This method only works when the dimension " - "of the domain of the FDatagrid object is " - "one.") - # Checks - if not np.array_equal(fdatagrid.sample_points, - fdatagrid2.sample_points): + + if callable(arg1): + return _inner_product_integrate(arg1, arg2) + else: + return (arg1 * arg2).sum(axis=-1) + + +@inner_product.register +def inner_product_fdatagrid(arg1: FDataGrid, arg2: FDataGrid): + + if not np.array_equal(arg1.sample_points, + arg2.sample_points): raise ValueError("Sample points for both objects must be equal") - # Creates an empty matrix with the desired size to store the results. - matrix = np.empty([fdatagrid.n_samples, fdatagrid2.n_samples]) - # Iterates over the different samples of both objects. - for i in range(fdatagrid.n_samples): - for j in range(fdatagrid2.n_samples): - # Calculates the inner product using Simpson's rule. - matrix[i, j] = (scipy.integrate.simps( - fdatagrid.data_matrix[i, ..., 0] * - fdatagrid2.data_matrix[j, ..., 0], - x=fdatagrid.sample_points[0] - )) - return matrix + integrand = arg1.data_matrix * arg2.data_matrix + + for s in arg1.sample_points: + integrand = scipy.integrate.simps(integrand, + x=s, + axis=1) + + return np.sum(integrand, axis=-1) + + +@inner_product.register(FDataBasis, FDataBasis) +@inner_product.register(FDataBasis, Basis) +@inner_product.register(Basis, FDataBasis) +@inner_product.register(Basis, Basis) +def inner_product_fdatabasis(arg1: Union[FDataBasis, Basis], + arg2: Union[FDataBasis, Basis], + *, + inner_product_matrix=None, + force_numerical=False): + + if not _same_domain(arg1, arg2): + raise ValueError("Both Objects should have the same domain_range") + + if isinstance(arg1, Basis): + arg1 = arg1.to_basis() + + if isinstance(arg2, Basis): + arg2 = arg2.to_basis() + + # Now several cases where computing the matrix is preferrable + # + # First, if force_numerical is True, the matrix is NOT used + # Otherwise, if the matrix is given, it is used + # Two other cases follow + + # The basis is the same: most basis can optimize this case, + # and also the Gram matrix is cached the first time, so computing + # it is usually worthwhile + same_basis = arg1.basis == arg2.basis + + # The number of operations is less usinf the matrix + n_ops_best_with_matrix = max( + arg1.n_samples, arg2.n_samples) > arg1.n_basis * arg2.n_basis + + if not force_numerical and ( + inner_product_matrix is not None + or same_basis + or n_ops_best_with_matrix): + + if inner_product_matrix is None: + inner_product_matrix = arg1.basis.inner_product_matrix(arg2.basis) + + return (arg1.coefficients @ + inner_product_matrix * + arg2.coefficients).sum(axis=-1) + else: + return _inner_product_integrate(arg1, arg2) + + +def _inner_product_integrate(arg1, arg2): + + if not np.array_equal(arg1.domain_range, + arg2.domain_range): + raise ValueError("Domain range for both objects must be equal") + + integral = nquad_vec( + lambda *args: arg1([*args])[:, 0, :] * arg2([*args])[:, 0, :], + arg1.domain_range) + + return np.sum(integral, axis=-1) + + +def inner_product_matrix(arg1, arg2=None, **kwargs): + """ + Returns the inner product matrix between is arguments. + + If arg2 is ``None`` returns the Gram matrix. + + Args: + + arg1: First sample. + arg2: Second sample. + + """ + + if isinstance(arg1, Basis): + arg1 = arg1.to_basis() + if isinstance(arg2, Basis): + arg2 = arg2.to_basis() + + return _pairwise_commutative(inner_product, arg1, arg2, **kwargs) diff --git a/skfda/misc/covariances.py b/skfda/misc/covariances.py index f433a38a3..1ba97f2c2 100644 --- a/skfda/misc/covariances.py +++ b/skfda/misc/covariances.py @@ -1,7 +1,7 @@ import abc import numbers -import matplotlib +import matplotlib.pyplot as plt import numpy as np import sklearn.gaussian_process.kernels as sklearn_kern @@ -37,6 +37,8 @@ def _execute_covariance(covariance, x, y): else: if callable(covariance): result = covariance(x, y) + elif hasattr(covariance, "shape"): + result = covariance else: # GPy kernel result = covariance.K(x, y) @@ -53,22 +55,29 @@ class Covariance(abc.ABC): def __call__(self, x, y): pass - def heatmap(self): - x = np.linspace(-1, 1, 1000) + def heatmap(self, limits=(-1, 1)): + """ + Return a heatmap plot of the covariance function. + + """ + + x = np.linspace(*limits, 1000) cov_matrix = self(x, x) fig = _create_figure() ax = fig.add_subplot(1, 1, 1) - ax.imshow(cov_matrix, extent=[-1, 1, 1, -1]) - ax.set_title("Covariance function in [-1, 1]") + ax.imshow(cov_matrix, extent=[limits[0], limits[1], + limits[1], limits[0]]) + ax.set_title(f"Covariance function in [{limits[0]}, {limits[1]}]") return fig def _sample_trajectories_plot(self): from ..datasets import make_gaussian_process - fd = make_gaussian_process(start=-1, cov=self) + fd = make_gaussian_process( + start=-1, n_samples=10, cov=self, random_state=0) fig = fd.plot() fig.axes[0].set_title("Sample trajectories") return fig @@ -98,27 +107,33 @@ def _repr_latex_(self): def _repr_html_(self): fig = self.heatmap() heatmap = _figure_to_svg(fig) + plt.close(fig) fig = self._sample_trajectories_plot() sample_trajectories = _figure_to_svg(fig) + plt.close(fig) - row_style = 'style="position:relative; display:table-row"' + row_style = '' - def column_style(percent): - return (f'style="width: {percent}%; display: table-cell; ' + def column_style(percent, margin_top=0): + return (f'style="display: inline-block; ' + f'margin:0; ' + f'margin-top: {margin_top}; ' + f'width:{percent}%; ' + f'height:auto;' f'vertical-align: middle"') html = f"""
-
+
\\[{self._latex_content()}\\]
-
+
{sample_trajectories}
-
+
{heatmap}
@@ -133,10 +148,57 @@ def to_sklearn(self): class Brownian(Covariance): - """Brownian covariance function.""" + r""" + Brownian covariance function. + + The covariance function is + + .. math:: + K(x, x') = \sigma^2 \frac{|x - \mathcal{O}| + |x' - \mathcal{O}| + - |x - x'|}{2} + + where :math:`\sigma^2` is the variance at distance 1 from + :math:`\mathcal{O}` and :math:`\mathcal{O}` is the origin point. + If :math:`\mathcal{O} = 0` (the default) and we only + consider positive values, the formula can be simplified as + + .. math:: + K(x, y) = \sigma^2 \min(x, y). + + Heatmap plot of the covariance function: + + .. jupyter-execute:: + + from skfda.misc.covariances import Brownian + import matplotlib.pyplot as plt + + Brownian().heatmap(limits=(0, 1)) + plt.show() + + Example of Gaussian process trajectories using this covariance: + + .. jupyter-execute:: + + from skfda.misc.covariances import Brownian + from skfda.datasets import make_gaussian_process + import matplotlib.pyplot as plt - _latex_formula = (r"K(x, y) = \sigma^2 \frac{|x - \mathcal{O}| + " - r"|y - \mathcal{O}| - |x-y|}{2}") + gp = make_gaussian_process( + n_samples=10, cov=Brownian(), random_state=0) + gp.plot() + plt.show() + + Default representation in a Jupyter notebook: + + .. jupyter-execute:: + + from skfda.misc.covariances import Brownian + + Brownian() + + """ + _latex_formula = (r"K(x, x') = \sigma^2 \frac{|x - \mathcal{O}| + " + r"|x' - \mathcal{O}| - |x - x'|}{2}") _parameters = [("variance", r"\sigma^2"), ("origin", r"\mathcal{O}")] @@ -153,9 +215,50 @@ def __call__(self, x, y): class Linear(Covariance): - """Linear covariance function.""" + r""" + Linear covariance function. + + The covariance function is + + .. math:: + K(x, x') = \sigma^2 (x^T x' + c) + + where :math:`\sigma^2` is the scale of the variance and + :math:`c` is the intercept. + + Heatmap plot of the covariance function: + + .. jupyter-execute:: + + from skfda.misc.covariances import Linear + import matplotlib.pyplot as plt + + Linear().heatmap(limits=(0, 1)) + plt.show() + + Example of Gaussian process trajectories using this covariance: - _latex_formula = r"K(x, y) = \sigma^2 (x^T y + c)" + .. jupyter-execute:: + + from skfda.misc.covariances import Linear + from skfda.datasets import make_gaussian_process + import matplotlib.pyplot as plt + + gp = make_gaussian_process( + n_samples=10, cov=Linear(), random_state=0) + gp.plot() + plt.show() + + Default representation in a Jupyter notebook: + + .. jupyter-execute:: + + from skfda.misc.covariances import Linear + + Linear() + + """ + _latex_formula = r"K(x, x') = \sigma^2 (x^T x' + c)" _parameters = [("variance", r"\sigma^2"), ("intercept", r"c")] @@ -177,9 +280,51 @@ def to_sklearn(self): class Polynomial(Covariance): - """Polynomial covariance function.""" + r""" + Polynomial covariance function. + + The covariance function is + + .. math:: + K(x, x') = \sigma^2 (\alpha x^T x' + c)^d + + where :math:`\sigma^2` is the scale of the variance, + :math:`\alpha` is the slope, :math:`d` the degree of the + polynomial and :math:`c` is the intercept. + + Heatmap plot of the covariance function: + + .. jupyter-execute:: + + from skfda.misc.covariances import Polynomial + import matplotlib.pyplot as plt + + Polynomial().heatmap(limits=(0, 1)) + plt.show() - _latex_formula = r"K(x, y) = \sigma^2 (\alpha x^T y + c)^d" + Example of Gaussian process trajectories using this covariance: + + .. jupyter-execute:: + + from skfda.misc.covariances import Polynomial + from skfda.datasets import make_gaussian_process + import matplotlib.pyplot as plt + + gp = make_gaussian_process( + n_samples=10, cov=Polynomial(), random_state=0) + gp.plot() + plt.show() + + Default representation in a Jupyter notebook: + + .. jupyter-execute:: + + from skfda.misc.covariances import Polynomial + + Polynomial() + + """ + _latex_formula = r"K(x, x') = \sigma^2 (\alpha x^T x' + c)^d" _parameters = [("variance", r"\sigma^2"), ("intercept", r"c"), @@ -209,9 +354,49 @@ def to_sklearn(self): class Gaussian(Covariance): - """Gaussian covariance function.""" + r""" + Gaussian covariance function. + + The covariance function is + + .. math:: + K(x, x') = \sigma^2 \exp\left(-\frac{||x - x'||^2}{2l^2}\right) + + where :math:`\sigma^2` is the variance and :math:`l` is the length scale. + + Heatmap plot of the covariance function: + + .. jupyter-execute:: - _latex_formula = (r"K(x, y) = \sigma^2 \exp\left(\frac{||x - y||^2}{2l^2}" + from skfda.misc.covariances import Gaussian + import matplotlib.pyplot as plt + + Gaussian().heatmap(limits=(0, 1)) + plt.show() + + Example of Gaussian process trajectories using this covariance: + + .. jupyter-execute:: + + from skfda.misc.covariances import Gaussian + from skfda.datasets import make_gaussian_process + import matplotlib.pyplot as plt + + gp = make_gaussian_process( + n_samples=10, cov=Gaussian(), random_state=0) + gp.plot() + plt.show() + + Default representation in a Jupyter notebook: + + .. jupyter-execute:: + + from skfda.misc.covariances import Gaussian + + Gaussian() + + """ + _latex_formula = (r"K(x, x') = \sigma^2 \exp\left(-\frac{\|x - x'\|^2}{2l^2}" r"\right)") _parameters = [("variance", r"\sigma^2"), @@ -236,9 +421,49 @@ def to_sklearn(self): class Exponential(Covariance): - """Exponential covariance function.""" + r""" + Exponential covariance function. + + The covariance function is + + .. math:: + K(x, x') = \sigma^2 \exp\left(-\frac{\|x - x'\|}{l}\right) + + where :math:`\sigma^2` is the variance and :math:`l` is the length scale. + + Heatmap plot of the covariance function: - _latex_formula = (r"K(x, y) = \sigma^2 \exp\left(\frac{||x - y||}{l}" + .. jupyter-execute:: + + from skfda.misc.covariances import Exponential + import matplotlib.pyplot as plt + + Exponential().heatmap(limits=(0, 1)) + plt.show() + + Example of Gaussian process trajectories using this covariance: + + .. jupyter-execute:: + + from skfda.misc.covariances import Exponential + from skfda.datasets import make_gaussian_process + import matplotlib.pyplot as plt + + gp = make_gaussian_process( + n_samples=10, cov=Exponential(), random_state=0) + gp.plot() + plt.show() + + Default representation in a Jupyter notebook: + + .. jupyter-execute:: + + from skfda.misc.covariances import Exponential + + Exponential() + + """ + _latex_formula = (r"K(x, x') = \sigma^2 \exp\left(-\frac{||x - x'||}{l}" r"\right)") _parameters = [("variance", r"\sigma^2"), @@ -253,10 +478,73 @@ def __call__(self, x, y): y = _transform_to_2d(y) x_y = _squared_norms(x, y) - return self.variance * np.exp(-np.sqrt(x_y) / (self.length_scale)) def to_sklearn(self): """Convert it to a sklearn kernel, if there is one""" return (self.variance * sklearn_kern.Matern(length_scale=self.length_scale, nu=0.5)) + + +class WhiteNoise(Covariance): + r""" + Gaussian covariance function. + + The covariance function is + + .. math:: + K(x, x')= \begin{cases} + \sigma^2, \quad x = x' \\ + 0, \quad x \neq x'\\ + \end{cases} + + where :math:`\sigma^2` is the variance. + + Heatmap plot of the covariance function: + + .. jupyter-execute:: + + from skfda.misc.covariances import WhiteNoise + import matplotlib.pyplot as plt + + WhiteNoise().heatmap(limits=(0, 1)) + plt.show() + + Example of Gaussian process trajectories using this covariance: + + .. jupyter-execute:: + + from skfda.misc.covariances import WhiteNoise + from skfda.datasets import make_gaussian_process + import matplotlib.pyplot as plt + + gp = make_gaussian_process( + n_samples=10, cov=WhiteNoise(), random_state=0) + gp.plot() + plt.show() + + Default representation in a Jupyter notebook: + + .. jupyter-execute:: + + from skfda.misc.covariances import WhiteNoise + + WhiteNoise() + + """ + + _latex_formula = (r"K(x, x')= \begin{cases} \sigma^2, \quad x = x' \\" + r"0, \quad x \neq x'\\ \end{cases}") + + _parameters = [("variance", r"\sigma^2")] + + def __init__(self, *, variance: float = 1.): + self.variance = variance + + def __call__(self, x, y): + x = _transform_to_2d(x) + return self.variance * np.eye(x.shape[0]) + + def to_sklearn(self): + """Convert it to a sklearn kernel, if there is one""" + return sklearn_kern.WhiteKernel(noise_level=self.variance) diff --git a/skfda/misc/metrics.py b/skfda/misc/metrics.py index 9ab25b97c..18188c40a 100644 --- a/skfda/misc/metrics.py +++ b/skfda/misc/metrics.py @@ -2,11 +2,21 @@ import numpy as np -from ..preprocessing.registration import ( - normalize_warping, _normalize_scale, to_srsf, - elastic_registration_warping) -from ..representation import FData -from ..representation import FDataGrid +from .._utils import _pairwise_commutative +from ..preprocessing.registration import normalize_warping, ElasticRegistration +from ..preprocessing.registration._warping import _normalize_scale +from ..preprocessing.registration.elastic import SRSF +from ..representation import FDataGrid, FDataBasis + + +def _check_compatible(fdata1, fdata2): + + if (fdata2.dim_codomain != fdata1.dim_codomain or + fdata2.dim_domain != fdata1.dim_domain): + raise ValueError("Objects should have the same dimensions") + + if not np.array_equal(fdata1.domain_range, fdata2.domain_range): + raise ValueError("Domain ranges for both objects must be equal") def _cast_to_grid(fdata1, fdata2, eval_points=None, _check=True, **kwargs): @@ -25,16 +35,10 @@ def _cast_to_grid(fdata1, fdata2, eval_points=None, _check=True, **kwargs): if not _check: return fdata1, fdata2 - elif (fdata2.dim_codomain != fdata1.dim_codomain or - fdata2.dim_domain != fdata1.dim_domain): - raise ValueError("Objects should have the same dimensions") - - # Case different domain ranges - elif not np.array_equal(fdata1.domain_range, fdata2.domain_range): - raise ValueError("Domain ranges for both objects must be equal") + _check_compatible(fdata1, fdata2) # Case new evaluation points specified - elif eval_points is not None: + if eval_points is not None: fdata1 = fdata1.to_grid(eval_points) fdata2 = fdata2.to_grid(eval_points) @@ -59,65 +63,6 @@ def _cast_to_grid(fdata1, fdata2, eval_points=None, _check=True, **kwargs): return fdata1, fdata2 -def vectorial_norm(fdatagrid, p=2): - r"""Apply a vectorial norm to a multivariate function. - - Given a multivariate function :math:`f:\mathbb{R}^n\rightarrow - \mathbb{R}^d` applies a vectorial norm :math:`\| \cdot \|` to produce a - function :math:`\|f\|:\mathbb{R}^n\rightarrow \mathbb{R}`. - - For example, let :math:`f:\mathbb{R} \rightarrow \mathbb{R}^2` be - :math:`f(t)=(f_1(t), f_2(t))` and :math:`\| \cdot \|_2` the euclidian norm. - - .. math:: - \|f\|_2(t) = \sqrt { |f_1(t)|^2 + |f_2(t)|^2 } - - In general if :math:`p \neq \pm \infty` and :math:`f:\mathbb{R}^n - \rightarrow \mathbb{R}^d` - - .. math:: - \|f\|_p(x_1, ... x_n) = \left ( \sum_{k=1}^{d} |f_k(x_1, ..., x_n)|^p - \right )^{(1/p)} - - Args: - fdatagrid (:class:`FDatagrid`): Functional object to be transformed. - p (int, optional): Exponent in the lp norm. If p is a number then - it is applied sum(abs(x)**p)**(1./p), if p is inf then max(abs(x)), - and if p is -inf it is applied min(abs(x)). See numpy.linalg.norm - to more information. Defaults to 2. - - Returns: - (:class:`FDatagrid`): FDatagrid with image dimension equal to 1. - - Examples: - - >>> from skfda.datasets import make_multimodal_samples - >>> from skfda.misc.metrics import vectorial_norm - - First we will construct an example dataset with curves in - :math:`\mathbb{R}^2`. - - >>> fd = make_multimodal_samples(dim_codomain=2, random_state=0) - >>> fd.dim_codomain - 2 - - We will apply the euclidean norm - - >>> fd = vectorial_norm(fd, p=2) - >>> fd.dim_codomain - 1 - - """ - - if p == 'inf': - p = np.inf - - data_matrix = np.linalg.norm(fdatagrid.data_matrix, ord=p, axis=-1, - keepdims=True) - - return fdatagrid.copy(data_matrix=data_matrix) - - def distance_from_norm(norm, **kwargs): r"""Returns the distance induced by a norm. @@ -151,7 +96,7 @@ def distance_from_norm(norm, **kwargs): To construct the :math:`\mathbb{L}^2` distance it is used the :math:`\mathbb{L}^2` norm wich it is used to compute the distance. - >>> l2_distance = distance_from_norm(norm_lp, p=2) + >>> l2_distance = distance_from_norm(lp_norm, p=2) >>> d = l2_distance(fd, fd2) >>> float('%.3f'% d) 0.289 @@ -189,27 +134,16 @@ def pairwise_distance(distance, **kwargs): :obj:`Function`: Pairwise distance function, wich accepts two functional data objects and returns the pairwise distance matrix. """ - def pairwise(fdata1, fdata2): + def pairwise(fdata1, fdata2=None): - fdata1, fdata2 = _cast_to_grid(fdata1, fdata2, **kwargs) - - # Creates an empty matrix with the desired size to store the results. - matrix = np.empty((fdata1.n_samples, fdata2.n_samples)) - - # Iterates over the different samples of both objects. - for i in range(fdata1.n_samples): - for j in range(fdata2.n_samples): - matrix[i, j] = distance(fdata1[i], fdata2[j], _check=False, - **kwargs) - # Computes the metric between all piars of x and y. - return matrix + return _pairwise_commutative(distance, fdata1, fdata2) pairwise.__name__ = f"pairwise_{distance.__name__}" return pairwise -def norm_lp(fdatagrid, p=2, p2=2): +def lp_norm(fdata, p=2, p2=None): r"""Calculate the norm of all the samples in a FDataGrid object. For each sample sample f the Lp norm is defined as: @@ -244,7 +178,7 @@ def norm_lp(fdatagrid, p=2, p2=2): Args: - fdatagrid (FDataGrid): FDataGrid object. + fdata (FData): FData object. p (int, optional): p of the lp norm. Must be greater or equal than 1. If p='inf' or p=np.inf it is used the L infinity metric. Defaults to 2. @@ -264,45 +198,65 @@ def norm_lp(fdatagrid, p=2, p2=2): >>> x = np.linspace(0,1,1001) >>> fd = FDataGrid([np.ones(len(x)), x] ,x) - >>> norm_lp(fd).round(2) + >>> lp_norm(fd).round(2) array([ 1. , 0.58]) The lp norm is only defined if p >= 1. - >>> norm_lp(fd, p = 0.5) + >>> lp_norm(fd, p = 0.5) Traceback (most recent call last): .... ValueError: p must be equal or greater than 1. """ + from ..misc import inner_product + + if p2 is None: + p2 = p + + # Special case, the inner product is heavily optimized + if p == p2 == 2: + return np.sqrt(inner_product(fdata, fdata)) + # Checks that the lp normed is well defined if not (p == 'inf' or np.isinf(p)) and p < 1: raise ValueError(f"p must be equal or greater than 1.") - if fdatagrid.dim_codomain > 1: - if p2 == 'inf': - p2 = np.inf - data_matrix = np.linalg.norm(fdatagrid.data_matrix, ord=p2, axis=-1, - keepdims=True) - else: - data_matrix = np.abs(fdatagrid.data_matrix) + if isinstance(fdata, FDataBasis): + if fdata.dim_codomain > 1 or p != 2: + raise NotImplementedError - if p == 'inf' or np.isinf(p): + start, end = fdata.domain_range[0] + integral = scipy.integrate.quad_vec( + lambda x: np.power(np.abs(fdata(x)), p), start, end) + res = np.sqrt(integral[0]).flatten() - if fdatagrid.dim_domain == 1: - res = np.max(data_matrix[..., 0], axis=1) + else: + if fdata.dim_codomain > 1: + if p2 == 'inf': + p2 = np.inf + data_matrix = np.linalg.norm(fdata.data_matrix, ord=p2, axis=-1, + keepdims=True) else: - res = np.array([np.max(sample) for sample in data_matrix]) + data_matrix = np.abs(fdata.data_matrix) - elif fdatagrid.dim_domain == 1: + if p == 'inf' or np.isinf(p): - # Computes the norm, approximating the integral with Simpson's rule. - res = scipy.integrate.simps(data_matrix[..., 0] ** p, - x=fdatagrid.sample_points) ** (1 / p) + if fdata.dim_domain == 1: + res = np.max(data_matrix[..., 0], axis=1) + else: + res = np.array([np.max(sample) for sample in data_matrix]) - else: - # Needed to perform surface integration - return NotImplemented + elif fdata.dim_domain == 1: + + # Computes the norm, approximating the integral with Simpson's + # rule. + res = scipy.integrate.simps(data_matrix[..., 0] ** p, + x=fdata.sample_points) ** (1 / p) + + else: + # Needed to perform surface integration + return NotImplemented if len(res) == 1: return res[0] @@ -329,7 +283,7 @@ def lp_distance(fdata1, fdata2, p=2, p2=2, *, eval_points=None, _check=True): than 1. If p='inf' or p=np.inf it is used the L infinity metric. Defaults to 2. p2 (int, optional): p index of the vectorial norm applied in case of - multivariate objects. Defaults to 2. See :func:`norm_lp`. + multivariate objects. Defaults to 2. See :func:`lp_norm`. Examples: Computes the distances between an object containing functional data @@ -342,7 +296,7 @@ def lp_distance(fdata1, fdata2, p=2, p2=2, *, eval_points=None, _check=True): >>> fd = FDataGrid([np.ones(len(x))], x) >>> fd2 = FDataGrid([np.zeros(len(x))], x) >>> lp_distance(fd, fd2).round(2) - 1.0 + array([ 1.]) If the functional data are defined over a different set of points of @@ -353,15 +307,12 @@ def lp_distance(fdata1, fdata2, p=2, p2=2, *, eval_points=None, _check=True): >>> lp_distance(fd, fd2) Traceback (most recent call last): .... - ValueError: Domain ranges for both objects must be equal + ValueError: ... """ - # Checks + _check_compatible(fdata1, fdata2) - fdata1, fdata2 = _cast_to_grid(fdata1, fdata2, eval_points=eval_points, - _check=_check) - - return norm_lp(fdata1 - fdata2, p=p, p2=p2) + return lp_norm(fdata1 - fdata2, p=p, p2=p2) def fisher_rao_distance(fdata1, fdata2, *, eval_points=None, _check=True): @@ -369,7 +320,7 @@ def fisher_rao_distance(fdata1, fdata2, *, eval_points=None, _check=True): Let :math:`f_i` and :math:`f_j` be two functional observations, and let :math:`q_i` and :math:`q_j` be the corresponding SRSF - (see :func:`to_srsf`), the fisher rao distance is defined as + (see :class:`SRSF`), the fisher rao distance is defined as .. math:: d_{FR}(f_i, f_j) = \| q_i - q_j \|_2 = @@ -413,8 +364,9 @@ def fisher_rao_distance(fdata1, fdata2, *, eval_points=None, _check=True): fdata2 = fdata2.copy(sample_points=eval_points_normalized, domain_range=(0, 1)) - fdata1_srsf = to_srsf(fdata1) - fdata2_srsf = to_srsf(fdata2) + srsf = SRSF(initial_value=0) + fdata1_srsf = srsf.fit_transform(fdata1) + fdata2_srsf = srsf.transform(fdata2) # Return the L2 distance of the SRSF return lp_distance(fdata1_srsf, fdata2_srsf, p=2) @@ -426,7 +378,7 @@ def amplitude_distance(fdata1, fdata2, *, lam=0., eval_points=None, Let :math:`f_i` and :math:`f_j` be two functional observations, and let :math:`q_i` and :math:`q_j` be the corresponding SRSF - (see :func:`to_srsf`), the amplitude distance is defined as + (see :class:`SRSF`), the amplitude distance is defined as .. math:: d_{A}(f_i, f_j)=min_{\gamma \in \Gamma}d_{FR}(f_i \circ \gamma,f_j) @@ -482,25 +434,23 @@ def amplitude_distance(fdata1, fdata2, *, lam=0., eval_points=None, fdata2 = fdata2.copy(sample_points=eval_points_normalized, domain_range=(0, 1)) - fdata1_srsf = to_srsf(fdata1) - fdata2_srsf = to_srsf(fdata2) - - warping = elastic_registration_warping(fdata1, - template=fdata2, - lam=lam, - val_points=eval_points_normalized, - fdatagrid_srsf=fdata1_srsf, - template_srsf=fdata2_srsf, - **kwargs) + elastic_registration = ElasticRegistration( + template=fdata2, + penalty=lam, + output_points=eval_points_normalized, + **kwargs) - fdata1_reg = fdata1.compose(warping) + fdata1_reg = elastic_registration.fit_transform(fdata1) - distance = lp_distance(to_srsf(fdata1_reg), fdata2_srsf) + srsf = SRSF(initial_value=0) + fdata1_reg_srsf = srsf.fit_transform(fdata1_reg) + fdata2_srsf = srsf.transform(fdata2) + distance = lp_distance(fdata1_reg_srsf, fdata2_srsf) if lam != 0.0: # L2 norm || sqrt(Dh) - 1 ||^2 - penalty = warping(eval_points_normalized, derivative=1, - keepdims=False)[0] + warping_deriv = elastic_registration.warping_.derivative() + penalty = warping_deriv(eval_points_normalized)[0, ..., 0] penalty = np.sqrt(penalty, out=penalty) penalty -= 1 penalty = np.square(penalty, out=penalty) @@ -564,18 +514,19 @@ def phase_distance(fdata1, fdata2, *, lam=0., eval_points=None, _check=True, fdata2 = fdata2.copy(sample_points=eval_points_normalized, domain_range=(0, 1)) - warping = elastic_registration_warping(fdata1, - template=fdata2, - lam=lam, - eval_points=eval_points_normalized, - **kwargs) + elastic_registration = ElasticRegistration( + penalty=lam, template=fdata2, + output_points=eval_points_normalized) - derivative_warping = warping(eval_points_normalized, keepdims=False, - derivative=1)[0] + elastic_registration.fit_transform(fdata1) + + warping_deriv = elastic_registration.warping_.derivative() + derivative_warping = warping_deriv(eval_points_normalized)[0, ..., 0] derivative_warping = np.sqrt(derivative_warping, out=derivative_warping) d = scipy.integrate.simps(derivative_warping, x=eval_points_normalized) + d = np.clip(d, -1, 1) return np.arccos(d) @@ -626,11 +577,18 @@ def warping_distance(warping1, warping2, *, eval_points=None, _check=True): warping1_data = warping1.derivative().data_matrix[0, ..., 0] warping2_data = warping2.derivative().data_matrix[0, ..., 0] + # Derivative approximations can have negatives, specially in the + # borders. + warping1_data[warping1_data < 0] = 0 + warping2_data[warping2_data < 0] = 0 + # In this case the srsf is the sqrt(gamma') srsf_warping1 = np.sqrt(warping1_data, out=warping1_data) srsf_warping2 = np.sqrt(warping2_data, out=warping2_data) product = np.multiply(srsf_warping1, srsf_warping2, out=srsf_warping1) + d = scipy.integrate.simps(product, x=warping1.sample_points[0]) + d = np.clip(d, -1, 1) return np.arccos(d) diff --git a/skfda/misc/operators/__init__.py b/skfda/misc/operators/__init__.py new file mode 100644 index 000000000..62d78e994 --- /dev/null +++ b/skfda/misc/operators/__init__.py @@ -0,0 +1,6 @@ +from ._identity import Identity +from ._integral_transform import IntegralTransform +from ._linear_differential_operator import LinearDifferentialOperator +from ._operators import (Operator, gramian_matrix, + gramian_matrix_optimization, + MatrixOperator) diff --git a/skfda/misc/operators/_identity.py b/skfda/misc/operators/_identity.py new file mode 100644 index 000000000..16067002e --- /dev/null +++ b/skfda/misc/operators/_identity.py @@ -0,0 +1,38 @@ +import numpy as np + +from ...representation import FDataGrid +from ...representation.basis import Basis +from ._operators import Operator, gramian_matrix_optimization + + +class Identity(Operator): + """Identity operator. + + Linear operator that returns its input. + + .. math:: + Ix = x + + Can be applied to both functional and multivariate data. + + """ + + def __call__(self, f): + return f + + +@gramian_matrix_optimization.register +def basis_penalty_matrix_optimized( + linear_operator: Identity, + basis: Basis): + + return basis.gram_matrix() + + +@gramian_matrix_optimization.register +def fdatagrid_penalty_matrix_optimized( + linear_operator: Identity, + basis: FDataGrid): + from ..metrics import lp_norm + + return np.diag(lp_norm(basis)**2) diff --git a/skfda/misc/operators/_integral_transform.py b/skfda/misc/operators/_integral_transform.py new file mode 100644 index 000000000..aab01d5ad --- /dev/null +++ b/skfda/misc/operators/_integral_transform.py @@ -0,0 +1,33 @@ +import scipy.integrate + +from ._operators import Operator + + +class IntegralTransform(Operator): + """Integral operator. + + + + Attributes: + kernel_function (callable): Kernel function corresponding to + the operator. + + """ + + def __init__(self, kernel_function): + self.kernel_function = kernel_function + + def __call__(self, f): + + def evaluate_covariance(points): + + def integral_body(integration_var): + return (f(integration_var) * + self.kernel_function(integration_var, points)) + + domain_range = f.domain_range[0] + + return scipy.integrate.quad_vec( + integral_body, domain_range[0], domain_range[1])[0] + + return evaluate_covariance diff --git a/skfda/misc/operators/_linear_differential_operator.py b/skfda/misc/operators/_linear_differential_operator.py new file mode 100644 index 000000000..fa55f4ee5 --- /dev/null +++ b/skfda/misc/operators/_linear_differential_operator.py @@ -0,0 +1,584 @@ +import numbers + +from numpy import polyder, polyint, polymul, polyval +import scipy.integrate +from scipy.interpolate import PPoly + +import numpy as np + +from ..._utils import _same_domain, _FDataCallable +from ...representation import FDataGrid +from ...representation.basis import Constant, Monomial, Fourier, BSpline +from ._operators import Operator, gramian_matrix_optimization + + +__author__ = "Pablo Pérez Manso" +__email__ = "92manso@gmail.com" + + +class LinearDifferentialOperator(Operator): + """Defines the structure of a linear differential operator function system + + .. math:: + Lx(t) = b_0(t) x(t) + b_1(t) x'(x) + + \\dots + b_{n-1}(t) d^{n-1}(x(t)) + b_n(t) d^n(x(t)) + + Can only be applied to functional data, as multivariate data has no + derivatives. + + Attributes: + + weights (list): A list of callables. + + Examples: + + Create a linear differential operator that penalizes the second + derivative (acceleration) + + >>> from skfda.misc.operators import LinearDifferentialOperator + >>> from skfda.representation.basis import (FDataBasis, + ... Monomial, Constant) + >>> + >>> LinearDifferentialOperator(2) + LinearDifferentialOperator( + weights=[ + FDataBasis( + basis=Constant(domain_range=[array([0, 1])], n_basis=1), + coefficients=[[0]], + ...), + FDataBasis( + basis=Constant(domain_range=[array([0, 1])], n_basis=1), + coefficients=[[0]], + ...), + FDataBasis( + basis=Constant(domain_range=[array([0, 1])], n_basis=1), + coefficients=[[1]], + ...)] + ) + + Create a linear differential operator that penalizes three times + the second derivative (acceleration) and twice the first (velocity). + + >>> LinearDifferentialOperator(weights=[0, 2, 3]) + LinearDifferentialOperator( + weights=[ + FDataBasis( + basis=Constant(domain_range=[array([0, 1])], n_basis=1), + coefficients=[[0]], + ...), + FDataBasis( + basis=Constant(domain_range=[array([0, 1])], n_basis=1), + coefficients=[[2]], + ...), + FDataBasis( + basis=Constant(domain_range=[array([0, 1])], n_basis=1), + coefficients=[[3]], + ...)] + ) + + Create a linear differential operator with non-constant weights. + + >>> constant = Constant() + >>> monomial = Monomial((0, 1), n_basis=3) + >>> fdlist = [FDataBasis(constant, [0]), + ... FDataBasis(constant, [0]), + ... FDataBasis(monomial, [1, 2, 3])] + >>> LinearDifferentialOperator(weights=fdlist) + LinearDifferentialOperator( + weights=[ + FDataBasis( + basis=Constant(domain_range=[array([0, 1])], n_basis=1), + coefficients=[[0]], + ...), + FDataBasis( + basis=Constant(domain_range=[array([0, 1])], n_basis=1), + coefficients=[[0]], + ...), + FDataBasis( + basis=Monomial(domain_range=[array([0, 1])], n_basis=3), + coefficients=[[1 2 3]], + ...)] + ) + + """ + + def __init__( + self, order_or_weights=None, *, order=None, weights=None, + domain_range=None): + """Constructor. You have to provide either order or weights. + If both are provided, it will raise an error. + If a positional argument is supplied it will be considered the + order if it is an integral type and the weights otherwise. + + Args: + order (int, optional): the order of the operator. It's the highest + derivative order of the operator + + weights (list, optional): A FDataBasis objects list of length + order + 1 items + + domain_range (tuple or list of tuples, optional): Definition + of the interval where the weight functions are + defined. If the functional weights are specified + and this is not, takes the domain range from them. + Otherwise, defaults to (0,1). + + """ + + from ...representation.basis import FDataBasis + + num_args = sum( + [a is not None for a in [order_or_weights, order, weights]]) + + if num_args > 1: + raise ValueError("You have to provide the order or the weights, " + "not both") + + real_domain_range = (domain_range if domain_range is not None + else (0, 1)) + + if order_or_weights is not None: + if isinstance(order_or_weights, numbers.Integral): + order = order_or_weights + else: + weights = order_or_weights + + if order is None and weights is None: + self.weights = (FDataBasis(Constant(real_domain_range), 0),) + + elif weights is None: + if order < 0: + raise ValueError("Order should be an non-negative integer") + + self.weights = [ + FDataBasis(Constant(real_domain_range), + 0 if (i < order) else 1) + for i in range(order + 1)] + + else: + if len(weights) == 0: + raise ValueError("You have to provide one weight at least") + + if all(isinstance(n, numbers.Real) for n in weights): + self.weights = list(FDataBasis(Constant(real_domain_range), + np.array(weights) + .reshape(-1, 1))) + + elif all(isinstance(n, FDataBasis) for n in weights): + if all([_same_domain(weights[0], x) + and x.n_samples == 1 for x in weights]): + self.weights = weights + + real_domain_range = weights[0].domain_range + if (domain_range is not None + and real_domain_range != domain_range): + raise ValueError("The domain range provided for the " + "linear operator does not match the " + "domain range of the weights") + + else: + raise ValueError("FDataBasis objects in the list have " + "not the same domain_range") + + else: + raise ValueError("The elements of the list are neither " + "integers or FDataBasis objects") + + self.domain_range = real_domain_range + + def __repr__(self): + """Representation of linear differential operator object.""" + + bwtliststr = "" + for w in self.weights: + bwtliststr = bwtliststr + "\n" + repr(w) + "," + + return (f"{self.__class__.__name__}(" + f"\nweights=[{bwtliststr[:-1]}]" + f"\n)").replace('\n', '\n ') + + def __eq__(self, other): + """Equality of linear differential operator objects""" + return (self.weights == other.weights) + + def constant_weights(self): + """ + Return the scalar weights of the linear differential operator if they + are constant basis. + Otherwise, return None. + + This function is mostly useful for basis which want to override + the _penalty method in order to use an analytical expression + for constant weights. + + """ + coefs = [w.coefficients[0, 0] if isinstance(w.basis, Constant) + else None + for w in self.weights] + + return np.array(coefs) if coefs.count(None) == 0 else None + + def __call__(self, f): + """Return the function that results of applying the operator.""" + + function_derivatives = [ + f.derivative(order=i) for i, _ in enumerate(self.weights)] + + def applied_linear_diff_op(t): + return sum(w(t) * function_derivatives[i](t) + for i, w in enumerate(self.weights)) + + return _FDataCallable(applied_linear_diff_op, + domain_range=f.domain_range, + n_samples=len(f)) + + +############################################################# +# +# Optimized implementations of gramian matrix for each basis. +# +############################################################# + + +@gramian_matrix_optimization.register +def constant_penalty_matrix_optimized( + linear_operator: LinearDifferentialOperator, + basis: Constant): + + coefs = linear_operator.constant_weights() + if coefs is None: + return NotImplemented + + return np.array([[coefs[0] ** 2 * + (basis.domain_range[0][1] - + basis.domain_range[0][0])]]) + + +def _monomial_evaluate_constant_linear_diff_op(basis, weights): + """ + Evaluate constant weights of a linear differential operator + over the basis functions. + """ + + max_derivative = len(weights) - 1 + + seq = np.arange(basis.n_basis) + coef_mat = np.linspace(seq, seq - max_derivative + 1, + max_derivative, dtype=int) + + # Compute coefficients for each derivative + coefs = np.cumprod(coef_mat, axis=0) + + # Add derivative 0 row + coefs = np.concatenate((np.ones((1, basis.n_basis)), coefs)) + + # Now each row correspond to each basis and each column to + # each derivative + coefs_t = coefs.T + + # Multiply by the weights + weighted_coefs = coefs_t * weights + assert len(weighted_coefs) == basis.n_basis + + # Now each row has the right weight, but the polynomials are in a + # decreasing order and with different exponents + + # Resize the coefs so that there are as many rows as the number of + # basis + # The matrix is now triangular + # refcheck is False to prevent exceptions while debugging + weighted_coefs = np.copy(weighted_coefs.T) + weighted_coefs.resize(basis.n_basis, + basis.n_basis, refcheck=False) + weighted_coefs = weighted_coefs.T + + # Shift the coefficients so that they correspond to the right + # exponent + indexes = np.tril_indices(basis.n_basis) + polynomials = np.zeros_like(weighted_coefs) + polynomials[indexes[0], indexes[1] - + indexes[0] - 1] = weighted_coefs[indexes] + + # At this point, each row of the matrix correspond to a polynomial + # that is the result of applying the linear differential operator + # to each element of the basis + + return polynomials + + +@gramian_matrix_optimization.register +def monomial_penalty_matrix_optimized( + linear_operator: LinearDifferentialOperator, + basis: Monomial): + + weights = linear_operator.constant_weights() + if weights is None: + return NotImplemented + + polynomials = _monomial_evaluate_constant_linear_diff_op(basis, weights) + + # Expand the polinomials with 0, so that the multiplication fits + # inside. It will need the double of the degree + length_with_padding = polynomials.shape[1] * 2 - 1 + + # Multiplication of polynomials is a convolution. + # The convolution can be performed in parallel applying a Fourier + # transform and then doing a normal multiplication in that + # space, coverting back with the inverse Fourier transform + fft = np.fft.rfft(polynomials, length_with_padding) + + # We compute only the upper matrix, as the penalty matrix is + # symmetrical + indices = np.triu_indices(basis.n_basis) + fft_mul = fft[indices[0]] * fft[indices[1]] + + integrand = np.fft.irfft(fft_mul, length_with_padding) + + integration_domain = basis.domain_range[0] + + # To integrate, divide by the position and increase the exponent + # in the evaluation + denom = np.arange(integrand.shape[1], 0, -1) + integrand /= denom + + # Add column of zeros at the right to increase exponent + integrand = np.pad(integrand, + pad_width=((0, 0), + (0, 1)), + mode='constant') + + # Now, apply Barrow's rule + # polyval applies Horner method over the first dimension, + # so we need to transpose + x_right = np.polyval(integrand.T, integration_domain[1]) + x_left = np.polyval(integrand.T, integration_domain[0]) + + integral = x_right - x_left + + penalty_matrix = np.empty((basis.n_basis, basis.n_basis)) + + # Set upper matrix + penalty_matrix[indices] = integral + + # Set lower matrix + penalty_matrix[(indices[1], indices[0])] = integral + + return penalty_matrix + + +def _fourier_penalty_matrix_optimized_orthonormal(basis, weights): + """ + Return the penalty when the basis is orthonormal. + """ + + signs = np.array([1, 1, -1, -1]) + signs_expanded = np.tile(signs, len(weights) // 4 + 1) + + signs_odd = signs_expanded[:len(weights)] + signs_even = signs_expanded[1:len(weights) + 1] + + phases = (np.arange(1, (basis.n_basis - 1) // 2 + 1) * + 2 * np.pi / basis.period) + + # Compute increasing powers + coefs_no_sign = np.vander(phases, len(weights), increasing=True) + + coefs_no_sign *= weights + + coefs_odd = signs_odd * coefs_no_sign + coefs_even = signs_even * coefs_no_sign + + # After applying the linear differential operator to a sinusoidal + # element of the basis e, the result can be expressed as + # A e + B e*, where e* is the other basis element in the pair + # with the same phase + + odd_sin_coefs = np.sum(coefs_odd[:, ::2], axis=1) + odd_cos_coefs = np.sum(coefs_odd[:, 1::2], axis=1) + + even_cos_coefs = np.sum(coefs_even[:, ::2], axis=1) + even_sin_coefs = np.sum(coefs_even[:, 1::2], axis=1) + + # The diagonal is the inner product of A e + B e* + # with itself. As the basis is orthonormal, the cross products e e* + # are 0, and the products e e and e* e* are one. + # Thus, the diagonal is A^2 + B^2 + # All elements outside the main diagonal are 0 + main_diag_odd = odd_sin_coefs**2 + odd_cos_coefs**2 + main_diag_even = even_sin_coefs**2 + even_cos_coefs**2 + + # The main diagonal should intercalate both diagonals + main_diag = np.array((main_diag_odd, main_diag_even)).T.ravel() + + penalty_matrix = np.diag(main_diag) + + # Add row and column for the constant + penalty_matrix = np.pad(penalty_matrix, pad_width=((1, 0), (1, 0)), + mode='constant') + + penalty_matrix[0, 0] = weights[0]**2 + + return penalty_matrix + + +@gramian_matrix_optimization.register +def fourier_penalty_matrix_optimized( + linear_operator: LinearDifferentialOperator, + basis: Fourier): + + weights = linear_operator.constant_weights() + if weights is None: + return NotImplemented + + # If the period and domain range are not the same, the basis functions + # are not orthogonal + if basis.period != (basis.domain_range[0][1] - basis.domain_range[0][0]): + return NotImplemented + + return _fourier_penalty_matrix_optimized_orthonormal(basis, weights) + + +@gramian_matrix_optimization.register +def bspline_penalty_matrix_optimized( + linear_operator: LinearDifferentialOperator, + basis: BSpline): + + coefs = linear_operator.constant_weights() + if coefs is None: + return NotImplemented + + nonzero = np.flatnonzero(coefs) + + # All derivatives above the order of the spline are effectively + # zero + nonzero = nonzero[nonzero < basis.order] + + if len(nonzero) == 0: + return np.zeros((basis.n_basis, basis.n_basis)) + + # We will only deal with one nonzero coefficient right now + if len(nonzero) != 1: + return NotImplemented + + derivative_degree = nonzero[0] + + if derivative_degree == basis.order - 1: + # The derivative of the bsplines are constant in the intervals + # defined between knots + knots = np.array(basis.knots) + mid_inter = (knots[1:] + knots[:-1]) / 2 + basis_deriv = basis.derivative(order=derivative_degree) + constants = basis_deriv(mid_inter)[..., 0].T + knots_intervals = np.diff(basis.knots) + # Integration of product of constants + return constants.T @ np.diag(knots_intervals) @ constants + + # We only deal with the case without zero length intervals + # for now + if np.any(np.diff(basis.knots) == 0): + return NotImplemented + + # Compute exactly using the piecewise polynomial + # representation of splines + + # Places m knots at the boundaries + knots = basis._evaluation_knots() + + # c is used the select which spline the function + # PPoly.from_spline below computes + c = np.zeros(len(knots)) + + # Initialise empty list to store the piecewise polynomials + ppoly_lst = [] + + no_0_intervals = np.where(np.diff(knots) > 0)[0] + + # For each basis gets its piecewise polynomial representation + for i in range(basis.n_basis): + + # Write a 1 in c in the position of the spline + # transformed in each iteration + c[i] = 1 + + # Gets the piecewise polynomial representation and gets + # only the positions for no zero length intervals + # This polynomial are defined relatively to the knots + # meaning that the column i corresponds to the ith knot. + # Let the ith knot be a + # Then f(x) = pp(x - a) + pp = PPoly.from_spline((knots, c, basis.order - 1)) + pp_coefs = pp.c[:, no_0_intervals] + + # We have the coefficients for each interval in coordinates + # (x - a), so we will need to subtract a when computing the + # definite integral + ppoly_lst.append(pp_coefs) + c[i] = 0 + + # Now for each pair of basis computes the inner product after + # applying the linear differential operator + penalty_matrix = np.zeros((basis.n_basis, basis.n_basis)) + for interval in range(len(no_0_intervals)): + for i in range(basis.n_basis): + poly_i = np.trim_zeros(ppoly_lst[i][:, + interval], 'f') + if len(poly_i) <= derivative_degree: + # if the order of the polynomial is lesser or + # equal to the derivative the result of the + # integral will be 0 + continue + # indefinite integral + derivative = polyder(poly_i, derivative_degree) + square = polymul(derivative, derivative) + integral = polyint(square) + + # definite integral + penalty_matrix[i, i] += np.diff(polyval( + integral, basis.knots[interval: interval + 2] + - basis.knots[interval]))[0] + + for j in range(i + 1, basis.n_basis): + poly_j = np.trim_zeros(ppoly_lst[j][:, + interval], 'f') + if len(poly_j) <= derivative_degree: + # if the order of the polynomial is lesser + # or equal to the derivative the result of + # the integral will be 0 + continue + # indefinite integral + integral = polyint( + polymul(polyder(poly_i, derivative_degree), + polyder(poly_j, derivative_degree))) + # definite integral + penalty_matrix[i, j] += np.diff(polyval( + integral, basis.knots[interval: interval + 2] + - basis.knots[interval]) + )[0] + penalty_matrix[j, i] = penalty_matrix[i, j] + return penalty_matrix + + +@gramian_matrix_optimization.register +def fdatagrid_penalty_matrix_optimized( + linear_operator: LinearDifferentialOperator, + basis: FDataGrid): + + evaluated_basis = sum( + w(basis.sample_points[0]) * + basis.derivative(order=i)(basis.sample_points[0]) + for i, w in enumerate(linear_operator.weights)) + + indices = np.triu_indices(basis.n_samples) + product = evaluated_basis[indices[0]] * evaluated_basis[indices[1]] + + triang_vec = scipy.integrate.simps(product[..., 0], x=basis.sample_points) + + matrix = np.empty((basis.n_samples, basis.n_samples)) + + # Set upper matrix + matrix[indices] = triang_vec + + # Set lower matrix + matrix[(indices[1], indices[0])] = triang_vec + + return matrix diff --git a/skfda/misc/operators/_operators.py b/skfda/misc/operators/_operators.py new file mode 100644 index 000000000..8d1b955d5 --- /dev/null +++ b/skfda/misc/operators/_operators.py @@ -0,0 +1,83 @@ +import abc + +import multimethod + + +class Operator(abc.ABC): + """ + Abstract class for operators (functions whose domain are functions). + + """ + + @abc.abstractmethod + def __call__(self, vector): + pass + + +@multimethod.multidispatch +def gramian_matrix_optimization(linear_operator, basis): + r""" + Generic function that can be subclassed for different combinations of + operator and basis in order to provide a more efficient implementation + for the gramian matrix. + """ + return NotImplemented + + +def gramian_matrix_numerical(linear_operator, basis): + r""" + Return the gramian matrix given a basis, computed numerically. + + This method should work for every linear operator. + + """ + from .. import inner_product_matrix + + evaluated_basis = linear_operator(basis) + + return inner_product_matrix(evaluated_basis) + + +def gramian_matrix(linear_operator, basis): + r""" + Return the gramian matrix given a basis. + + The gramian operator of a linear operator :math:`\Gamma` is + + .. math:: + G = \Gamma*\Gamma + + This method evaluates that gramian operator in a given basis, + which is necessary for performing Tikhonov regularization, + among other things. + + It tries to use an optimized implementation if one is available, + falling back to a numerical computation otherwise. + + """ + + # Try to use a more efficient implementation + matrix = gramian_matrix_optimization(linear_operator, basis) + if matrix is not NotImplemented: + return matrix + + return gramian_matrix_numerical(linear_operator, basis) + + +class MatrixOperator(Operator): + """Linear operator for finite spaces. + + Between finite dimensional spaces, every linear operator can be expressed + as a product by a matrix. + + Attributes: + matrix (array-like object): The matrix containing the linear + transformation. + + """ + + def __init__(self, matrix): + self.matrix = matrix + + def __call__(self, f): + return self.matrix @ f diff --git a/skfda/misc/regularization/__init__.py b/skfda/misc/regularization/__init__.py new file mode 100644 index 000000000..01f89d797 --- /dev/null +++ b/skfda/misc/regularization/__init__.py @@ -0,0 +1,3 @@ +from ._regularization import (TikhonovRegularization, + L2Regularization, + compute_penalty_matrix) diff --git a/skfda/misc/regularization/_regularization.py b/skfda/misc/regularization/_regularization.py new file mode 100644 index 000000000..42a496ac6 --- /dev/null +++ b/skfda/misc/regularization/_regularization.py @@ -0,0 +1,149 @@ +from collections.abc import Iterable +import itertools +from skfda.misc.operators import gramian_matrix, Identity + +import scipy.linalg +from sklearn.base import BaseEstimator + +import numpy as np + + +class TikhonovRegularization(BaseEstimator): + r""" + Implements Tikhonov regularization. + + The penalization term in this type of regularization is the square of the + :math:`L_2` (Euclidean) norm of a linear operator applied to the function + or vector + + .. math:: + \lambda \| \Gamma x \|_2^2 + + where :math:`\Gamma` is the so called Tikhonov operator + (matrix for finite vectors) and :math:`\lambda` is a positive real number. + + This linear operator can be an arbitrary Python callable that correspond + to a linear transformation. However, the + :doc:`operators ` module + provides several common linear operators. + + Parameters: + linear_operator: linear operator used for regularization. + regularization_parameter: scaling parameter (:math:`\lambda`) of the + penalization. + + Examples: + + Construct a regularization that penalizes the second derivative, + which is a measure of the curvature of the function. + + >>> from skfda.misc.regularization import TikhonovRegularization + >>> from skfda.misc.operators import LinearDifferentialOperator + >>> + >>> regularization = TikhonovRegularization( + ... LinearDifferentialOperator(2)) + + Construct a regularization that penalizes the identity operator, + that is, completely equivalent to the :math:`L_2` regularization ( + :class:`L2Regularization`). + + >>> from skfda.misc.regularization import TikhonovRegularization + >>> from skfda.misc.operators import Identity + >>> + >>> regularization = TikhonovRegularization(Identity()) + + Construct a regularization that penalizes the difference between + the points :math:`f(1)` and :math:`f(0)` of a function :math:`f`. + + >>> from skfda.misc.regularization import TikhonovRegularization + >>> + >>> regularization = TikhonovRegularization(lambda x: x(1) - x(0)) + + Construct a regularization that penalizes the harmonic acceleration + operator :math:`Lf = \omega^2 D f + D^3 f`, that, when the + regularization parameter is large, forces the function to be + :math:`f(t) = c_1 + c_2 \sin \omega t + c_3 \cos \omega t`, where + :math:`\omega` is the angular frequency. This is useful for some + periodic functions. + + >>> from skfda.misc.regularization import TikhonovRegularization + >>> from skfda.misc.operators import LinearDifferentialOperator + >>> import numpy as np + >>> + >>> period = 1 + >>> w = 2 * np.pi / period + >>> regularization = TikhonovRegularization( + ... LinearDifferentialOperator([0, w**2, 0, 1])) + + """ + + def __init__(self, linear_operator, + *, regularization_parameter=1): + self.linear_operator = linear_operator + self.regularization_parameter = regularization_parameter + + def penalty_matrix(self, basis): + r""" + Return a penalty matrix for ordinary least squares. + + """ + return self.regularization_parameter * gramian_matrix( + self.linear_operator, basis) + + +class L2Regularization(TikhonovRegularization): + r""" + Implements :math:`L_2` regularization. + + The penalization term in this type of regularization is the square of the + :math:`L_2` (Euclidean) norm of the function or vector + + .. math:: + \lambda \| x \|_2^2 + + where :math:`\lambda` is a positive real number. + + This is equivalent to Tikhonov regularization ( + :class:`TikhonovRegularization`) using the identity operator ( + :class:`Identity`). + + Parameters: + regularization_parameter: scaling parameter (:math:`\lambda`) of the + penalization. + + """ + + def __init__(self, *, regularization_parameter=1): + return super().__init__( + linear_operator=Identity(), + regularization_parameter=regularization_parameter) + + +def compute_penalty_matrix(basis_iterable, regularization_parameter, + regularization): + """ + Computes the regularization matrix for a linear differential operator. + + X can be a list of mixed data. + + """ + # If there is no regularization, return 0 and rely on broadcasting + if regularization_parameter == 0 or regularization is None: + return 0 + + # Compute penalty matrix if not provided + if not isinstance(regularization, Iterable): + regularization = (regularization,) + + if not isinstance(regularization_parameter, Iterable): + regularization_parameter = itertools.repeat( + regularization_parameter) + + penalty_blocks = [ + np.zeros((len(b), len(b))) if r is None else + a * r.penalty_matrix(b) + for b, r, a in zip(basis_iterable, regularization, + regularization_parameter)] + penalty_matrix = scipy.linalg.block_diag(*penalty_blocks) + + return penalty_matrix diff --git a/skfda/ml/classification/__init__.py b/skfda/ml/classification/__init__.py index 6f69cb3a8..7a2b9e3bb 100644 --- a/skfda/ml/classification/__init__.py +++ b/skfda/ml/classification/__init__.py @@ -1,4 +1,4 @@ from ..._neighbors import (KNeighborsClassifier, RadiusNeighborsClassifier, - NearestCentroids) + NearestCentroid) diff --git a/skfda/ml/clustering/__init__.py b/skfda/ml/clustering/__init__.py index 96b818792..01e2be6af 100644 --- a/skfda/ml/clustering/__init__.py +++ b/skfda/ml/clustering/__init__.py @@ -1,5 +1,5 @@ -from . import base_kmeans -from .base_kmeans import KMeans, FuzzyKMeans +from . import kmeans from ..._neighbors import NearestNeighbors +from .kmeans import KMeans, FuzzyCMeans diff --git a/skfda/ml/clustering/base_kmeans.py b/skfda/ml/clustering/kmeans.py similarity index 58% rename from skfda/ml/clustering/base_kmeans.py rename to skfda/ml/clustering/kmeans.py index 6ef1efabe..aed86ce2c 100644 --- a/skfda/ml/clustering/base_kmeans.py +++ b/skfda/ml/clustering/kmeans.py @@ -3,15 +3,12 @@ from abc import abstractmethod import warnings +import numpy as np from sklearn.base import BaseEstimator, ClusterMixin, TransformerMixin -from sklearn.exceptions import NotFittedError from sklearn.utils import check_random_state - -import numpy as np +from sklearn.utils.validation import check_is_fitted from ...misc.metrics import pairwise_distance, lp_distance -from ...representation.grid import FDataGrid - __author__ = "Amanda Hernando Bernabé" __email__ = "amanda.hernando@estudiante.uam.es" @@ -38,12 +35,12 @@ def __init__(self, n_clusters, init, metric, n_init, max_iter, tol, must be of the shape (n_clusters, fdatagrid.ncol, fdatagrid.dim_codomain). Defaults to None, and the centers are initialized randomly. - metric (optional): metric that acceps two FDataGrid objects and - returns a matrix with shape (fdatagrid1.n_samples, - fdatagrid2.n_samples). Defaults to *pairwise_distance(lp_distance)*. + metric (optional): functional data metric. Defaults to + *lp_distance*. n_init (int, optional): Number of time the k-means algorithm will - be run with different centroid seeds. The final results will be the - best output of n_init consecutive runs in terms of inertia. + be run with different centroid seeds. The final results will + be the best output of n_init consecutive runs in terms of + inertia. max_iter (int, optional): Maximum number of iterations of the clustering algorithm for a single run. Defaults to 100. tol (float, optional): tolerance used to compare the centroids @@ -63,20 +60,20 @@ def __init__(self, n_clusters, init, metric, n_init, max_iter, tol, self.tol = tol self.random_state = random_state - def _generic_clustering_checks(self, fdatagrid): + def _check_clustering(self, fdata): """Checks the arguments used in the :func:`fit method `. Args: - fdatagrid (FDataGrid object): Object whose samples + fdata (FDataGrid object): Object whose samples are classified into different groups. """ - if fdatagrid.dim_domain > 1: + if fdata.dim_domain > 1: raise NotImplementedError( "Only support 1 dimension on the domain.") - if fdatagrid.n_samples < 2: + if fdata.n_samples < 2: raise ValueError( "The number of observations must be greater than 1.") @@ -94,10 +91,10 @@ def _generic_clustering_checks(self, fdatagrid): "because the init parameter is set.") if self.init is not None and self.init.data_matrix.shape != ( - self.n_clusters, fdatagrid.ncol, fdatagrid.dim_codomain): + self.n_clusters, fdata.ncol, fdata.dim_codomain): raise ValueError("The init FDataGrid data_matrix should be of " - "shape (n_clusters, n_features, dim_codomain) and " - "gives the initial centers.") + "shape (n_clusters, n_features, dim_codomain) " + "and gives the initial centers.") if self.max_iter < 1: raise ValueError( @@ -106,7 +103,13 @@ def _generic_clustering_checks(self, fdatagrid): if self.tol < 0: raise ValueError("The tolerance must be positive.") - return fdatagrid + return fdata + + def _tolerance(self, fdata): + variance = fdata.var() + mean_variance = np.mean(variance[0].data_matrix) + + return mean_variance * self.tol def _init_centroids(self, fdatagrid, random_state): """Compute the initial centroids @@ -121,44 +124,146 @@ def _init_centroids(self, fdatagrid, random_state): centroid initialization. Returns: - centers (ndarray): initial centers + centroids (ndarray): initial centroids """ - comparison = True - while comparison: - indices = random_state.permutation(fdatagrid.n_samples)[ + + if self.init is None: + _, idx = np.unique(fdatagrid.data_matrix, + axis=0, return_index=True) + unique_data = fdatagrid.data_matrix[np.sort(idx)] + + if len(unique_data) < self.n_clusters: + return ValueError("Not enough unique data points to " + "initialize the requested number of " + "clusters") + + indices = random_state.permutation(len(unique_data))[ :self.n_clusters] - centers = fdatagrid.data_matrix[indices] - unique_centers = np.unique(centers, axis=0) - comparison = len(unique_centers) != self.n_clusters + centroids = unique_data[indices] + + return fdatagrid.copy(data_matrix=centroids) + else: + return self.init.copy() + + def _check_params(self): + pass + + @abstractmethod + def _create_membership(self, n_samples): + pass + + @abstractmethod + def _update(self, fdata, membership_matrix, distances_to_centroids, + centroids): + pass + + def _algorithm(self, fdata, random_state): + """ Implementation of the Fuzzy K-Means algorithm for FDataGrid objects + of any dimension. + + Args: + fdata (FDataGrid object): Object whose samples are clustered, + classified into different groups. + random_state (RandomState object): random number generation for + centroid initialization. + + Returns: + (tuple): tuple containing: + + membership values (numpy.ndarray): + membership value that observation has to each cluster. + + centroids (numpy.ndarray: (n_clusters, ncol, dim_codomain)): + centroids for each cluster. + + distances_to_centroids (numpy.ndarray: (n_samples, + n_clusters)): distances of each sample to each cluster. + + repetitions(int): number of iterations the algorithm was run. - return centers + """ + repetitions = 0 + centroids_old_matrix = np.zeros( + (self.n_clusters, fdata.ncol, fdata.dim_codomain)) + membership_matrix = self._create_membership(fdata.n_samples) + + centroids = self._init_centroids(fdata, random_state) + centroids_old = centroids.copy(data_matrix=centroids_old_matrix) + + pairwise_metric = pairwise_distance(self.metric) + + tolerance = self._tolerance(fdata) + + while (repetitions == 0 or + (not np.all(self.metric(centroids, centroids_old) < tolerance) + and repetitions < self.max_iter)): + + centroids_old.data_matrix[...] = centroids.data_matrix + + distances_to_centroids = pairwise_metric(fdata1=fdata, + fdata2=centroids) + + self._update( + fdata=fdata, + membership_matrix=membership_matrix, + distances_to_centroids=distances_to_centroids, + centroids=centroids) + + repetitions += 1 + + return (membership_matrix, centroids, + distances_to_centroids, repetitions) @abstractmethod + def _compute_inertia(self, membership, centroids, + distances_to_centroids): + pass + def fit(self, X, y=None, sample_weight=None): - """ Computes clustering. + """ Computes Fuzzy K-Means clustering calculating the attributes + *labels_*, *cluster_centers_*, *inertia_* and *n_iter_*. Args: X (FDataGrid object): Object whose samples are clusered, classified into different groups. - y (Ignored): present here for API consistency by convention. + y (Ignored): present here for API consistency by convention. sample_weight (Ignored): present here for API consistency by convention. """ - pass + fdata = self._check_clustering(X) + random_state = check_random_state(self.random_state) - def _check_is_fitted(self): - """Perform is_fitted validation for estimator. + self._check_params() - Checks if the estimator is fitted by verifying the presence of - of the calculated attributes "labels_" and "cluster_centers_", and - raises a NotFittedError if that is not the case. - """ - msg = ("This %(name)s instance is not fitted yet. Call 'fit' with " - "appropriate arguments before using this method.") + best_inertia = None + best_membership = None + best_centroids = None + best_distances_to_centroids = None + best_n_iter = None + + for _ in range(self.n_init): + (membership, centroids, + distances_to_centroids, n_iter) = ( + self._algorithm(fdata=fdata, + random_state=random_state)) + + inertia = self._compute_inertia(membership, centroids, + distances_to_centroids) + + if best_inertia is None or inertia < best_inertia: + best_inertia = inertia + best_membership = membership + best_centroids = centroids + best_distances_to_centroids = distances_to_centroids + best_n_iter = n_iter + + self.labels_ = best_membership + self.cluster_centers_ = best_centroids + self._distances_to_centers = best_distances_to_centroids + self.inertia_ = best_inertia + self.n_iter_ = best_n_iter - if not hasattr(self, "labels_") or \ - not hasattr(self, "cluster_centers_"): - raise NotFittedError(msg % {'name': type(self).__name__}) + return self def _check_test_data(self, fdatagrid): """Checks that the FDataGrid object and the calculated centroids have @@ -180,27 +285,26 @@ def predict(self, X, sample_weight=None): convention. Returns: - labels_ + Label of each sample. """ - self._check_is_fitted() + check_is_fitted(self) self._check_test_data(X) - return self.labels_ - - def fit_predict(self, X, y=None, sample_weight=None): - """Compute cluster centers and predict cluster index for each sample. - - Args: - X (FDataGrid object): Object whose samples are classified into - different groups. - y (Ignored): present here for API consistency by convention. - sample_weight (Ignored): present here for API consistency by - convention. - - Returns: - labels_ - """ - self.fit(X) - return self.labels_ + + membership_matrix = self._create_membership(X.n_samples) + centroids = self.cluster_centers_.copy() + + pairwise_metric = pairwise_distance(self.metric) + + distances_to_centroids = pairwise_metric(fdata1=X, + fdata2=centroids) + + self._update( + fdata=X, + membership_matrix=membership_matrix, + distances_to_centroids=distances_to_centroids, + centroids=centroids) + + return membership_matrix def transform(self, X): """Transform X to a cluster-distance space. @@ -216,7 +320,7 @@ def transform(self, X): distances_to_centers (numpy.ndarray: (n_samples, n_clusters)): distances of each sample to each cluster. """ - self._check_is_fitted() + check_is_fitted(self) self._check_test_data(X) return self._distances_to_centers @@ -252,7 +356,7 @@ def score(self, X, y=None, sample_weight=None): attribute. """ - self._check_is_fitted() + check_is_fitted(self) self._check_test_data(X) return -self.inertia_ @@ -315,11 +419,11 @@ class KMeans(BaseKMeans): classified. Defaults to 2. init (FDataGrid, optional): Contains the initial centers of the different clusters the algorithm starts with. Its data_marix must - be of the shape (n_clusters, fdatagrid.ncol, fdatagrid.dim_codomain). - Defaults to None, and the centers are initialized randomly. - metric (optional): metric that acceps two FDataGrid objects and returns - a matrix with shape (fdatagrid1.n_samples, fdatagrid2.n_samples). - Defaults to *pairwise_distance(lp_distance)*. + be of the shape (n_clusters, fdatagrid.ncol, + fdatagrid.dim_codomain). Defaults to None, and the centers are + initialized randomly. + metric (optional): functional data metric. Defaults to + *lp_distance*. n_init (int, optional): Number of time the k-means algorithm will be run with different centroid seeds. The final results will be the best output of n_init consecutive runs in terms of inertia. @@ -334,8 +438,8 @@ class KMeans(BaseKMeans): See :term:`Glossary `. Attributes: - labels_ (numpy.ndarray: (n_samples, dim_codomain)): 2-dimensional matrix - in which each row contains the cluster that observation belongs to. + labels_ (numpy.ndarray: n_samples): vector in which each entry contains + the cluster each observation belongs to. cluster_centers_ (FDataGrid object): data_matrix of shape (n_clusters, ncol, dim_codomain) and contains the centroids for each cluster. @@ -347,25 +451,34 @@ class KMeans(BaseKMeans): Example: + >>> import skfda >>> data_matrix = [[1, 1, 2, 3, 2.5, 2], ... [0.5, 0.5, 1, 2, 1.5, 1], ... [-1, -1, -0.5, 1, 1, 0.5], ... [-0.5, -0.5, -0.5, -1, -1, -1]] >>> sample_points = [0, 2, 4, 6, 8, 10] - >>> fd = FDataGrid(data_matrix, sample_points) - >>> kmeans = KMeans() - >>> init= np.array([[0, 0, 0, 0, 0, 0], [2, 1, -1, 0.5, 0, -0.5]]) - >>> init_fd = FDataGrid(init, sample_points) - >>> kmeans.fit(fd, init=init_fd) - >>> kmeans - KMeans(max_iter=100, - metric=.pairwise at - 0x7faf3aa061e0>, # doctest:+ELLIPSIS - n_clusters=2, random_state=0, tol=0.0001) - """.replace('+IGNORE_RESULT', '+ELLIPSIS\n<...>') + >>> fd = skfda.FDataGrid(data_matrix, sample_points) + >>> kmeans = skfda.ml.clustering.KMeans(random_state=0) + >>> kmeans.fit(fd) + KMeans(...) + >>> kmeans.cluster_centers_.data_matrix + array([[[ 0.16666667], + [ 0.16666667], + [ 0.83333333], + [ 2. ], + [ 1.66666667], + [ 1.16666667]], + [[-0.5 ], + [-0.5 ], + [-0.5 ], + [-1. ], + [-1. ], + [-1. ]]]) + + """ def __init__(self, n_clusters=2, init=None, - metric=pairwise_distance(lp_distance), + metric=lp_distance, n_init=1, max_iter=100, tol=1e-4, random_state=0): """Initialization of the KMeans class. @@ -377,10 +490,8 @@ def __init__(self, n_clusters=2, init=None, must be of the shape (n_clusters, fdatagrid.ncol, fdatagrid.dim_codomain). Defaults to None, and the centers are initialized randomly. - metric (optional): metric that acceps two FDataGrid objects and - returns a matrix with shape (fdatagrid1.n_samples, - fdatagrid2.n_samples). - Defaults to *pairwise_distance(lp_distance)*. + metric (optional): functional data metric. Defaults to + *lp_distance*. n_init (int, optional): Number of time the k-means algorithm will be run with different centroid seeds. The final results will be the best output of n_init consecutive runs in terms @@ -400,105 +511,32 @@ def __init__(self, n_clusters=2, init=None, n_init=n_init, max_iter=max_iter, tol=tol, random_state=random_state) - def _kmeans_implementation(self, fdatagrid, random_state): - """ Implementation of the K-Means algorithm for FDataGrid objects - of any dimension. - - Args: - fdatagrid (FDataGrid object): Object whose samples are clusered, - classified into different groups. - random_state (RandomState object): random number generation for - centroid initialization. - - Returns: - (tuple): tuple containing: + def _compute_inertia(self, membership, centroids, + distances_to_centroids): + distances_to_their_center = np.choose(membership, + distances_to_centroids.T) - clustering_values (numpy.ndarray: (n_samples,)): 1-dimensional - array where each row contains the cluster that observation - belongs to. + return np.sum(distances_to_their_center ** 2) - centers (numpy.ndarray: (n_clusters, ncol, dim_codomain)): - Contains the centroids for each cluster. + def _create_membership(self, n_samples): + return np.empty(n_samples, dtype=int) - distances_to_centers (numpy.ndarray: (n_samples, n_clusters)): - distances of each sample to each cluster. - - repetitions(int): number of iterations the algorithm was run. - """ - repetitions = 0 - centers_old = np.zeros( - (self.n_clusters, fdatagrid.ncol, fdatagrid.dim_codomain)) - - if self.init is None: - centers = self._init_centroids(fdatagrid, random_state) - else: - centers = np.copy(self.init.data_matrix) - - while not np.allclose(centers, centers_old, rtol=self.tol, - atol=self.tol) and repetitions < self.max_iter: - centers_old = np.copy(centers) - centers_fd = FDataGrid(centers, fdatagrid.sample_points) - distances_to_centers = self.metric(fdata1=fdatagrid, - fdata2=centers_fd) - clustering_values = np.argmin(distances_to_centers, axis=1) - for i in range(self.n_clusters): - indices, = np.where(clustering_values == i) - if indices.size != 0: - centers[i] = np.average( - fdatagrid.data_matrix[indices, ...], axis=0) - repetitions += 1 + def _update(self, fdata, membership_matrix, distances_to_centroids, + centroids): - return clustering_values, centers, distances_to_centers, repetitions + membership_matrix[:] = np.argmin(distances_to_centroids, axis=1) - def fit(self, X, y=None, sample_weight=None): - """ Computes K-Means clustering calculating the attributes - *labels_*, *cluster_centers_*, *inertia_* and *n_iter_*. + for i in range(self.n_clusters): - Args: - X (FDataGrid object): Object whose samples are clusered, - classified into different groups. - y (Ignored): present here for API consistency by convention. - sample_weight (Ignored): present here for API consistency by - convention. - """ - random_state = check_random_state(self.random_state) - fdatagrid = super()._generic_clustering_checks(fdatagrid=X) - - clustering_values = np.empty( - (self.n_init, fdatagrid.n_samples)).astype(int) - centers = np.empty((self.n_init, self.n_clusters, - fdatagrid.ncol, fdatagrid.dim_codomain)) - distances_to_centers = np.empty( - (self.n_init, fdatagrid.n_samples, self.n_clusters)) - distances_to_their_center = np.empty( - (self.n_init, fdatagrid.n_samples)) - n_iter = np.empty((self.n_init)) - - for j in range(self.n_init): - (clustering_values[j, :], centers[j, :, :, :], - distances_to_centers[j, :, :], n_iter[j]) = ( - self._kmeans_implementation(fdatagrid=fdatagrid, - random_state=random_state)) - distances_to_their_center[j, :] = distances_to_centers[ - j, np.arange(fdatagrid.n_samples), - clustering_values[j, :]] - - inertia = np.sum(distances_to_their_center ** 2, axis=1) - index_best_iter = np.argmin(inertia) - - self.labels_ = clustering_values[index_best_iter] - self.cluster_centers_ = FDataGrid(data_matrix=centers[index_best_iter], - sample_points=fdatagrid.sample_points - ) - self._distances_to_centers = distances_to_centers[index_best_iter] - self.inertia_ = inertia[index_best_iter] - self.n_iter_ = n_iter[index_best_iter] + indices, = np.where(membership_matrix == i) - return self + if len(indices) != 0: + centroids.data_matrix[i] = np.average( + fdata.data_matrix[indices, ...], axis=0) -class FuzzyKMeans(BaseKMeans): - r""" Representation and implementation of the Fuzzy K-Means clustering +class FuzzyCMeans(BaseKMeans): + r""" Representation and implementation of the Fuzzy c-Means clustering algorithm for the FDataGrid object. Let :math:`\mathbf{X = \left\{ x_{1}, x_{2}, ..., x_{n}\right\}}` be a @@ -561,11 +599,11 @@ class FuzzyKMeans(BaseKMeans): classified. Defaults to 2. init (FDataGrid, optional): Contains the initial centers of the different clusters the algorithm starts with. Its data_marix must - be of the shape (n_clusters, fdatagrid.ncol, fdatagrid.dim_codomain). - Defaults to None, and the centers are initialized randomly. - metric (optional): metric that acceps two FDataGrid objects and returns - a matrix with shape (fdatagrid1.n_samples, fdatagrid2.n_samples). - Defaults to *pairwise_distance(lp_distance)*. + be of the shape (n_clusters, fdatagrid.ncol, + fdatagrid.dim_codomain). Defaults to None, and the centers are + initialized randomly. + metric (optional): functional data metric. Defaults to + *lp_distance*. n_init (int, optional): Number of time the k-means algorithm will be run with different centroid seeds. The final results will be the best output of n_init consecutive runs in terms of inertia. @@ -580,12 +618,11 @@ class FuzzyKMeans(BaseKMeans): See :term:`Glossary `. fuzzifier (int, optional): Scalar parameter used to specify the degree of fuzziness in the fuzzy algorithm. Defaults to 2. - n_dec (int, optional): designates the number of decimals of the labels - returned in the fuzzy algorithm. Defaults to 3. Attributes: - labels_ (numpy.ndarray: (n_samples, dim_codomain)): 2-dimensional matrix - in which each row contains the cluster that observation belongs to. + labels_ (numpy.ndarray: (n_samples, n_clusters)): 2-dimensional + matrix in which each row contains the cluster that observation + belongs to. cluster_centers_ (FDataGrid object): data_matrix of shape (n_clusters, ncol, dim_codomain) and contains the centroids for each cluster. @@ -598,26 +635,31 @@ class FuzzyKMeans(BaseKMeans): Example: + >>> import skfda >>> data_matrix = [[[1, 0.3], [2, 0.4], [3, 0.5], [4, 0.6]], ... [[2, 0.5], [3, 0.6], [4, 0.7], [5, 0.7]], ... [[3, 0.2], [4, 0.3], [5, 0.4], [6, 0.5]]] >>> sample_points = [2, 4, 6, 8] - >>> fd = FDataGrid(data_matrix, sample_points) - >>> fuzzy_kmeans = FuzzyKMeans() - >>> init=np.array([[[3, 0], [5, 0], [2, 0], [4, 0]], - ... [[0, 0], [0, 1], [0, 0], [0, 1]]]) - >>> init_fd = FDataGrid(init, sample_points) - >>> fuzzy_kmeans.fit(fd, init=init_fd) - >>> fuzzy_kmeans - FuzzyKMeans(fuzzifier=2, max_iter=100, - metric=.pairwise at - 0x7faf3aa06488>, # doctest:+ELLIPSIS - n_clusters=2, n_dec=3, random_state=0, tol=0.0001) - """.replace('+IGNORE_RESULT', '+ELLIPSIS\n<...>') + >>> fd = skfda.FDataGrid(data_matrix, sample_points) + >>> fuzzy_kmeans = skfda.ml.clustering.FuzzyCMeans(random_state=0) + >>> fuzzy_kmeans.fit(fd) + FuzzyCMeans(...) + >>> fuzzy_kmeans.cluster_centers_.data_matrix + array([[[ 2.83994301, 0.24786354], + [ 3.83994301, 0.34786354], + [ 4.83994301, 0.44786354], + [ 5.83994301, 0.53191927]], + [[ 1.25134384, 0.35023779], + [ 2.25134384, 0.45023779], + [ 3.25134384, 0.55023779], + [ 4.25134384, 0.6251158 ]]]) + + + """ def __init__(self, n_clusters=2, init=None, - metric=pairwise_distance(lp_distance), n_init=1, max_iter=100, - tol=1e-4, random_state=0, fuzzifier=2, n_dec=3): + metric=lp_distance, n_init=1, max_iter=100, + tol=1e-4, random_state=0, fuzzifier=2): """Initialization of the FuzzyKMeans class. Args: @@ -628,10 +670,8 @@ def __init__(self, n_clusters=2, init=None, must be of the shape (n_clusters, fdatagrid.ncol, fdatagrid.dim_codomain). Defaults to None, and the centers are initialized randomly. - metric (optional): metric that acceps two FDataGrid objects and - returns a matrix with shape (fdatagrid1.n_samples, - fdatagrid2.n_samples). - Defaults to *pairwise_distance(lp_distance)*. + metric (optional): functional data metric. Defaults to + *lp_distance*. n_init (int, optional): Number of time the k-means algorithm will be run with different centroid seeds. The final results will be the best output of n_init consecutive runs in terms of inertia. @@ -646,136 +686,48 @@ def __init__(self, n_clusters=2, init=None, deterministic. Defaults to 0. fuzzifier (int, optional): Scalar parameter used to specify the degree of fuzziness in the fuzzy algorithm. Defaults to 2. - n_dec (int, optional): designates the number of decimals of the - labels returned in the fuzzy algorithm. Defaults to 3. + """ super().__init__(n_clusters=n_clusters, init=init, metric=metric, n_init=n_init, max_iter=max_iter, tol=tol, random_state=random_state) self.fuzzifier = fuzzifier - self.n_dec = n_dec - def _fuzzy_kmeans_implementation(self, fdatagrid, random_state): - """ Implementation of the Fuzzy K-Means algorithm for FDataGrid objects - of any dimension. - - Args: - fdatagrid (FDataGrid object): Object whose samples are clusered, - classified into different groups. - random_state (RandomState object): random number generation for - centroid initialization. - - Returns: - (tuple): tuple containing: - - membership values (numpy.ndarray: (n_samples, n_clusters)): - 2-dimensional matrix where each row contains the membership - value that observation has to each cluster. - - centers (numpy.ndarray: (n_clusters, ncol, dim_codomain)): - Contains the centroids for each cluster. - - distances_to_centers (numpy.ndarray: (n_samples, n_clusters)): - distances of each sample to each cluster. - - repetitions(int): number of iterations the algorithm was run. - - """ - repetitions = 0 - centers_old = np.zeros( - (self.n_clusters, fdatagrid.ncol, fdatagrid.dim_codomain)) - U = np.empty((fdatagrid.n_samples, self.n_clusters)) - distances_to_centers = np.empty((fdatagrid.n_samples, self.n_clusters)) - - if self.init is None: - centers = self._init_centroids(fdatagrid, random_state) - else: - centers = np.copy(self.init.data_matrix) - - while not np.allclose(centers, centers_old, rtol=self.tol, - atol=self.tol) and repetitions < self.max_iter: - - centers_old = np.copy(centers) - centers_fd = FDataGrid(centers, fdatagrid.sample_points) - distances_to_centers = self.metric( - fdata1=fdatagrid, - fdata2=centers_fd) - distances_to_centers_raised = (distances_to_centers ** ( - 2 / (self.fuzzifier - 1))) - - for i in range(fdatagrid.n_samples): - comparison = (fdatagrid.data_matrix[i] == centers).all( - axis=tuple(np.arange(fdatagrid.data_matrix.ndim)[1:])) - if comparison.sum() >= 1: - U[i, np.where(comparison == True)] = 1 - U[i, np.where(comparison == False)] = 0 - else: - for j in range(self.n_clusters): - U[i, j] = 1 / np.sum( - distances_to_centers_raised[i, j] / - distances_to_centers_raised[i]) - - U = np.power(U, self.fuzzifier) - for i in range(self.n_clusters): - centers[i] = np.sum((U[:, i] * fdatagrid.data_matrix.T).T, - axis=0) / np.sum(U[:, i]) - repetitions += 1 - - return (np.round(np.power(U, 1 / self.fuzzifier), self.n_dec), centers, - distances_to_centers, repetitions) - - def fit(self, X, y=None, sample_weight=None): - """ Computes Fuzzy K-Means clustering calculating the attributes - *labels_*, *cluster_centers_*, *inertia_* and *n_iter_*. - - Args: - X (FDataGrid object): Object whose samples are clusered, - classified into different groups. - y (Ignored): present here for API consistency by convention. - sample_weight (Ignored): present here for API consistency by - convention. - """ - fdatagrid = super()._generic_clustering_checks(fdatagrid=X) - random_state = check_random_state(self.random_state) - - if self.fuzzifier < 2: + def _check_params(self): + if self.fuzzifier <= 1: raise ValueError("The fuzzifier parameter must be greater than 1.") - if self.n_dec < 1: - raise ValueError( - "The number of decimals should be greater than 0 in order to " - "obtain a rational result.") - - membership_values = np.empty( - (self.n_init, fdatagrid.n_samples, self.n_clusters)) - centers = np.empty( - (self.n_init, self.n_clusters, fdatagrid.ncol, - fdatagrid.dim_codomain)) - distances_to_centers = np.empty( - (self.n_init, fdatagrid.n_samples, self.n_clusters)) - distances_to_their_center = np.empty( - (self.n_init, fdatagrid.n_samples)) - n_iter = np.empty((self.n_init)) - - for j in range(self.n_init): - (membership_values[j, :, :], centers[j, :, :, :], - distances_to_centers[j, :, :], n_iter[j]) = ( - self._fuzzy_kmeans_implementation(fdatagrid=fdatagrid, - random_state=random_state)) - distances_to_their_center[j, :] = distances_to_centers[ - j, np.arange(fdatagrid.n_samples), - np.argmax(membership_values[j, :, :], axis=-1)] - - inertia = np.sum(distances_to_their_center ** 2, axis=1) - index_best_iter = np.argmin(inertia) - - self.labels_ = membership_values[index_best_iter] - self.cluster_centers_ = FDataGrid(data_matrix=centers[index_best_iter], - sample_points=fdatagrid.sample_points - ) - self._distances_to_centers = distances_to_centers[index_best_iter] - self.inertia_ = inertia[index_best_iter] - self.n_iter_ = n_iter[index_best_iter] - - return self + def _compute_inertia(self, membership, centroids, + distances_to_centroids): + return np.sum(membership ** self.fuzzifier * distances_to_centroids ** 2) + + def _create_membership(self, n_samples): + return np.empty((n_samples, self.n_clusters)) + + def _update(self, fdata, membership_matrix, distances_to_centroids, + centroids): + # Divisions by zero allowed + with np.errstate(divide='ignore'): + distances_to_centers_raised = (distances_to_centroids ** ( + 2 / (1 - self.fuzzifier))) + + # Divisions infinity by infinity allowed + with np.errstate(invalid='ignore'): + membership_matrix[:, :] = (distances_to_centers_raised + / np.sum( + distances_to_centers_raised, + axis=1, keepdims=True)) + + # inf / inf divisions should be 1 in this context + membership_matrix[np.isnan(membership_matrix)] = 1 + + membership_matrix_raised = np.power( + membership_matrix, self.fuzzifier) + + slice_denominator = ((slice(None),) + (np.newaxis,) * + (fdata.data_matrix.ndim - 1)) + centroids.data_matrix[:] = ( + np.einsum('ij,i...->j...', membership_matrix_raised, + fdata.data_matrix) + / np.sum(membership_matrix_raised, axis=0)[slice_denominator]) diff --git a/skfda/ml/regression/__init__.py b/skfda/ml/regression/__init__.py index c2a67127a..ed1ee3890 100644 --- a/skfda/ml/regression/__init__.py +++ b/skfda/ml/regression/__init__.py @@ -1,4 +1,4 @@ from ..._neighbors import KNeighborsRegressor, RadiusNeighborsRegressor -from .linear_model import LinearScalarRegression +from .linear import LinearRegression diff --git a/skfda/ml/regression/_coefficients.py b/skfda/ml/regression/_coefficients.py new file mode 100644 index 000000000..67f30ab16 --- /dev/null +++ b/skfda/ml/regression/_coefficients.py @@ -0,0 +1,96 @@ +from functools import singledispatch + +import numpy as np + +from ...misc._math import inner_product +from ...representation.basis import Basis, FDataBasis + + +class CoefficientInfo(): + """ + Information about an estimated coefficient. + + Parameters: + basis: Basis of the coefficient. + + """ + + def __init__(self, basis): + self.basis = basis + + def regression_matrix(self, X, y): + """ + Return the constant coefficients matrix for regression. + + Parameters: + X: covariate data for regression. + y: target data for regression. + + """ + return np.atleast_2d(X) + + def convert_from_constant_coefs(self, coefs): + """ + Return the coefficients object from the constant coefs. + + Parameters: + coefs: estimated constant coefficients. + + """ + return coefs + + def inner_product(self, coefs, X): + """ + Compute the inner product between the coefficient and + the covariate. + + """ + return inner_product(coefs, X) + + +class CoefficientInfoFDataBasis(CoefficientInfo): + """ + Information about a FDataBasis coefficient. + + Parameters: + basis: Basis of the coefficient. + + """ + + def regression_matrix(self, X, y): + # The matrix is the matrix of coefficients multiplied by + # the matrix of inner products. + + xcoef = X.coefficients + self.inner_basis = X.basis.inner_product_matrix(self.basis) + return xcoef @ self.inner_basis + + def convert_from_constant_coefs(self, coefs): + return FDataBasis(self.basis, coefs.T) + + def inner_product(self, coefs, X): + # Efficient implementation of the inner product using the + # inner product matrix previously computed + return inner_product(coefs, X, inner_product_matrix=self.inner_basis.T) + + +@singledispatch +def coefficient_info_from_covariate(X, y, **kwargs) -> CoefficientInfo: + """ + Make a coefficient info object from a covariate. + + """ + return CoefficientInfo(basis=np.identity(X.shape[1], dtype=X.dtype)) + + +@coefficient_info_from_covariate.register(FDataBasis) +def coefficient_info_from_covariate_fdatabasis( + X: FDataBasis, y, **kwargs) -> CoefficientInfoFDataBasis: + basis = kwargs['basis'] + if basis is None: + basis = X.basis + + if not isinstance(basis, Basis): + raise TypeError(f"basis must be a Basis object, not {type(basis)}") + + return CoefficientInfoFDataBasis(basis=basis) diff --git a/skfda/ml/regression/linear.py b/skfda/ml/regression/linear.py new file mode 100644 index 000000000..30cbe5faf --- /dev/null +++ b/skfda/ml/regression/linear.py @@ -0,0 +1,252 @@ +from collections.abc import Iterable +import itertools +import warnings + +from sklearn.base import BaseEstimator, RegressorMixin +from sklearn.utils.validation import check_is_fitted + +import numpy as np + +from ...misc.regularization import compute_penalty_matrix +from ...representation import FData +from ._coefficients import coefficient_info_from_covariate + + +class LinearRegression(BaseEstimator, RegressorMixin): + r"""Linear regression with multivariate response. + + This is a regression algorithm equivalent to multivariate linear + regression, but accepting also functional data expressed in a basis + expansion. + + The model assumed by this method is: + + .. math:: + y = w_0 + w_1 x_1 + \ldots + w_p x_p + \int w_{p+1}(t) x_{p+1}(t) dt \ + + \ldots + \int w_r(t) x_r(t) dt + + where the covariates can be either multivariate or functional and the + response is multivariate. + + .. warning:: + For now, only scalar responses are supported. + + Args: + coef_basis (iterable): Basis of the coefficient functions of the + functional covariates. If multivariate data is supplied, their + corresponding entries should be ``None``. If ``None`` is provided + for a functional covariate, the same basis is assumed. If this + parameter is ``None`` (the default), it is assumed that ``None`` + is provided for all covariates. + fit_intercept (bool): Whether to calculate the intercept for this + model. If set to False, no intercept will be used in calculations + (i.e. data is expected to be centered). + regularization (int, iterable or :class:`Regularization`): If it is + not a :class:`Regularization` object, linear differential + operator regularization is assumed. If it + is an integer, it indicates the order of the + derivative used in the computing of the penalty matrix. For + instance 2 means that the differential operator is + :math:`f''(x)`. If it is an iterable, it consists on coefficients + representing the differential operator used in the computing of + the penalty matrix. For instance the tuple (1, 0, + numpy.sin) means :math:`1 + sin(x)D^{2}`. If not supplied this + defaults to 2. Only used if penalty_matrix is + ``None``. + + Attributes: + coef_ (iterable): A list containing the weight coefficient for each + covariate. For multivariate data, the covariate is a Numpy array. + For functional data, the covariate is a FDataBasis object. + intercept_ (float): Independent term in the linear model. Set to 0.0 + if `fit_intercept = False`. + + Examples: + + >>> from skfda.ml.regression import LinearRegression + >>> from skfda.representation.basis import (FDataBasis, Monomial, + ... Constant) + + Multivariate linear regression can be used with functions expressed in + a basis. Also, a functional basis for the weights can be specified: + + >>> x_basis = Monomial(n_basis=3) + >>> x_fd = FDataBasis(x_basis, [[0, 0, 1], + ... [0, 1, 0], + ... [0, 1, 1], + ... [1, 0, 1]]) + >>> y = [2, 3, 4, 5] + >>> linear = LinearRegression() + >>> _ = linear.fit(x_fd, y) + >>> linear.coef_[0] + FDataBasis( + basis=Monomial(domain_range=[array([0, 1])], n_basis=3), + coefficients=[[-15. 96. -90.]], + ...) + >>> linear.intercept_ + array([ 1.]) + >>> linear.predict(x_fd) + array([ 2., 3., 4., 5.]) + + Covariates can include also multivariate data: + + >>> x_basis = Monomial(n_basis=2) + >>> x_fd = FDataBasis(x_basis, [[0, 2], + ... [0, 4], + ... [1, 0], + ... [2, 0], + ... [1, 2], + ... [2, 2]]) + >>> x = [[1, 7], [2, 3], [4, 2], [1, 1], [3, 1], [2, 5]] + >>> y = [11, 10, 12, 6, 10, 13] + >>> linear = LinearRegression( + ... coef_basis=[None, Constant()]) + >>> _ = linear.fit([x, x_fd], y) + >>> linear.coef_[0] + array([ 2., 1.]) + >>> linear.coef_[1] + FDataBasis( + basis=Constant(domain_range=[array([0, 1])], n_basis=1), + coefficients=[[ 1.]], + ...) + >>> linear.intercept_ + array([ 1.]) + >>> linear.predict([x, x_fd]) + array([ 11., 10., 12., 6., 10., 13.]) + + """ + + def __init__(self, *, coef_basis=None, fit_intercept=True, + regularization=None): + self.coef_basis = coef_basis + self.fit_intercept = fit_intercept + self.regularization = regularization + + def fit(self, X, y=None, sample_weight=None): + + X, y, sample_weight, coef_info = self._argcheck_X_y( + X, y, sample_weight, self.coef_basis) + + regularization = self.regularization + + if self.fit_intercept: + new_x = np.ones((len(y), 1)) + X = [new_x] + X + coef_info = [coefficient_info_from_covariate(new_x, y)] + coef_info + + if isinstance(regularization, Iterable): + regularization = itertools.chain([None], regularization) + elif regularization is not None: + regularization = (None, regularization) + + inner_products_list = [c.regression_matrix(x, y) + for x, c in zip(X, coef_info)] + + # This is C @ J + inner_products = np.concatenate(inner_products_list, axis=1) + + if sample_weight is not None: + inner_products = inner_products * np.sqrt(sample_weight) + y = y * np.sqrt(sample_weight) + + penalty_matrix = compute_penalty_matrix( + basis_iterable=(c.basis for c in coef_info), + regularization_parameter=1, + regularization=regularization) + + if self.fit_intercept and hasattr(penalty_matrix, "shape"): + # Intercept is not penalized + penalty_matrix[0, 0] = 0 + + gram_inner_x_coef = inner_products.T @ inner_products + penalty_matrix + inner_x_coef_y = inner_products.T @ y + + coef_lengths = np.array([i.shape[1] for i in inner_products_list]) + coef_start = np.cumsum(coef_lengths) + + basiscoefs = np.linalg.solve(gram_inner_x_coef, inner_x_coef_y) + basiscoef_list = np.split(basiscoefs, coef_start) + + # Express the coefficients in functional form + coefs = [c.convert_from_constant_coefs(bcoefs) + for c, bcoefs in zip(coef_info, basiscoef_list)] + + if self.fit_intercept: + self.intercept_ = coefs[0] + coefs = coefs[1:] + else: + self.intercept_ = 0.0 + + self.coef_ = coefs + self._coef_info = coef_info + self._target_ndim = y.ndim + + return self + + def predict(self, X): + from ...misc import inner_product + + check_is_fitted(self) + X = self._argcheck_X(X) + + result = np.sum([coef_info.inner_product(coef, x) + for coef, x, coef_info + in zip(self.coef_, X, self._coef_info)], axis=0) + + result += self.intercept_ + + if self._target_ndim == 1: + result = result.ravel() + + return result + + def _argcheck_X(self, X): + if isinstance(X, FData) or isinstance(X, np.ndarray): + X = [X] + + X = [x if isinstance(x, FData) else np.asarray(x) for x in X] + + if all(not isinstance(i, FData) for i in X): + warnings.warn("All the covariates are scalar.") + + return X + + def _argcheck_X_y(self, X, y, sample_weight=None, coef_basis=None): + """Do some checks to types and shapes""" + + # TODO: Add support for Dataframes + + X = self._argcheck_X(X) + + y = np.asarray(y) + + if (np.issubdtype(y.dtype, np.object_) + and any(isinstance(i, FData) for i in y)): + raise ValueError( + "Some of the response variables are not scalar") + + if coef_basis is None: + coef_basis = [None] * len(X) + + if len(coef_basis) != len(X): + raise ValueError("Number of regression coefficients does " + "not match number of independent variables.") + + if any(len(y) != len(x) for x in X): + raise ValueError("The number of samples on independent and " + "dependent variables should be the same") + + coef_info = [coefficient_info_from_covariate(x, y, basis=b) + for x, b in zip(X, coef_basis)] + + if sample_weight is not None: + + if len(sample_weight) != len(y): + raise ValueError("The number of sample weights should be " + "equal to the number of samples.") + + if np.any(np.array(sample_weight) < 0): + raise ValueError( + "The sample weights should be non negative values") + + return X, y, sample_weight, coef_info diff --git a/skfda/ml/regression/linear_model.py b/skfda/ml/regression/linear_model.py deleted file mode 100644 index 49014b114..000000000 --- a/skfda/ml/regression/linear_model.py +++ /dev/null @@ -1,96 +0,0 @@ -from sklearn.base import BaseEstimator, RegressorMixin -from sklearn.utils.validation import check_is_fitted - -import numpy as np -from skfda.representation.basis import FDataBasis, Constant, Basis, FData - - -class LinearScalarRegression(BaseEstimator, RegressorMixin): - - def __init__(self, beta_basis): - self.beta_basis = beta_basis - - def fit(self, X, y=None, sample_weight=None): - - y, X, weights = self._argcheck(y, X, sample_weight) - - nbeta = len(self.beta_basis) - n_samples = X[0].n_samples - - y = np.asarray(y).reshape((n_samples, 1)) - - for j in range(nbeta): - xcoef = X[j].coefficients - inner_basis_x_beta_j = X[j].basis.inner_product(self.beta_basis[j]) - inner_x_beta = (xcoef @ inner_basis_x_beta_j - if j == 0 - else np.concatenate((inner_x_beta, - xcoef @ inner_basis_x_beta_j), - axis=1)) - - if any(w != 1 for w in weights): - inner_x_beta = inner_x_beta * np.sqrt(weights) - y = y * np.sqrt(weights) - - gram_inner_x_beta = inner_x_beta.T @ inner_x_beta - inner_x_beta_y = inner_x_beta.T @ y - - gram_inner_x_beta_inv = np.linalg.inv(gram_inner_x_beta) - betacoefs = gram_inner_x_beta_inv @ inner_x_beta_y - - idx = 0 - for j in range(0, nbeta): - self.beta_basis[j] = FDataBasis( - self.beta_basis[j], - betacoefs[idx:idx + self.beta_basis[j].n_basis].T) - idx = idx + self.beta_basis[j].n_basis - - self.beta_ = self.beta_basis - return self - - def predict(self, X): - check_is_fitted(self, "beta_") - return [sum(self.beta[i].inner_product(X[i][j])[0, 0] for i in - range(len(self.beta))) for j in range(X[0].n_samples)] - - def _argcheck(self, y, x, weights=None): - """Do some checks to types and shapes""" - if all(not isinstance(i, FData) for i in x): - raise ValueError("All the dependent variable are scalar.") - if any(isinstance(i, FData) for i in y): - raise ValueError( - "Some of the independent variables are not scalar") - - ylen = len(y) - xlen = len(x) - blen = len(self.beta_basis) - domain_range = ([i for i in x if isinstance(i, FData)][0] - .domain_range) - - if blen != xlen: - raise ValueError("Number of regression coefficients does" - " not match number of independent variables.") - - for j in range(xlen): - if isinstance(x[j], list): - xjcoefs = np.array(x[j]).reshape((-1, 1)) - x[j] = FDataBasis(Constant(domain_range), xjcoefs) - - if any(ylen != xfd.n_samples for xfd in x): - raise ValueError("The number of samples on independent and " - "dependent variables should be the same") - - if any(not isinstance(b, Basis) for b in self.beta_basis): - raise ValueError("Betas should be a list of Basis.") - - if weights is None: - weights = [1 for _ in range(ylen)] - - if len(weights) != ylen: - raise ValueError("The number of weights should be equal to the " - "independent samples.") - - if np.any(np.array(weights) < 0): - raise ValueError("The weights should be non negative values") - - return y, x, weights diff --git a/skfda/preprocessing/dim_reduction/__init__.py b/skfda/preprocessing/dim_reduction/__init__.py index e69de29bb..641ba946c 100644 --- a/skfda/preprocessing/dim_reduction/__init__.py +++ b/skfda/preprocessing/dim_reduction/__init__.py @@ -0,0 +1 @@ +from . import projection diff --git a/skfda/preprocessing/dim_reduction/projection/__init__.py b/skfda/preprocessing/dim_reduction/projection/__init__.py index fd4b4dadc..4b6cf980c 100644 --- a/skfda/preprocessing/dim_reduction/projection/__init__.py +++ b/skfda/preprocessing/dim_reduction/projection/__init__.py @@ -1 +1 @@ -from ._fpca import fpca +from ._fpca import FPCA diff --git a/skfda/preprocessing/dim_reduction/projection/_fpca.py b/skfda/preprocessing/dim_reduction/projection/_fpca.py index f966cce17..95ca70b1f 100644 --- a/skfda/preprocessing/dim_reduction/projection/_fpca.py +++ b/skfda/preprocessing/dim_reduction/projection/_fpca.py @@ -1,33 +1,408 @@ -"""Functional principal component analysis. -""" +"""Functional Principal Component Analysis Module.""" + +import skfda +from skfda.misc.regularization import compute_penalty_matrix +from skfda.representation.basis import FDataBasis +from skfda.representation.grid import FDataGrid + +from scipy.linalg import solve_triangular +from sklearn.base import BaseEstimator, TransformerMixin +from sklearn.decomposition import PCA import numpy as np -from ....exploratory.stats import mean +__author__ = "Yujian Hong" +__email__ = "yujian.hong@estudiante.uam.es" + + +class FPCA(BaseEstimator, TransformerMixin): + """Class that implements functional principal component analysis for both + basis and grid representations of the data. Most parameters are shared + when fitting a FDataBasis or FDataGrid, except weights and components_basis. + + Parameters: + n_components (int): number of principal components to obtain from + functional principal component analysis. Defaults to 3. + centering (bool): if True then calculate the mean of the functional data + object and center the data first. Defaults to True. If True the + passed FDataBasis object is modified. + regularization (Regularization): + Regularization object to be applied. + components_basis (Basis): the basis in which we want the principal + components. We can use a different basis than the basis contained in + the passed FDataBasis object. This parameter is only used when + fitting a FDataBasis. + weights (numpy.array or callable): the weights vector used for + discrete integration. If none then the trapezoidal rule is used for + computing the weights. If a callable object is passed, then the + weight vector will be obtained by evaluating the object at the + sample points of the passed FDataGrid object in the fit method. + This parameter is only used when fitting a FDataGrid. -def fpca(fdatagrid, n=2): - """Compute Functional Principal Components Analysis. + Attributes: + components_ (FData): this contains the principal components in a + basis representation. + explained_variance_ (array_like): The amount of variance explained by + each of the selected components. + explained_variance_ratio_ (array_like): this contains the percentage of + variance explained by each principal component. + mean_ (FData): mean of the train data. - Performs Functional Principal Components Analysis to reduce - dimensionality and obtain the principal modes of variation for a - functional data object. - It uses SVD numpy implementation to compute PCA. + Examples: + Construct an artificial FDataBasis object and run FPCA with this object. + The resulting principal components are not compared because there are + several equivalent possibilities. - Args: - fdatagrid (FDataGrid): functional data object. - n (int, optional): Number of principal components. Defaults to 2. + >>> data_matrix = np.array([[1.0, 0.0], [0.0, 2.0]]) + >>> sample_points = [0, 1] + >>> fd = FDataGrid(data_matrix, sample_points) + >>> basis = skfda.representation.basis.Monomial((0,1), n_basis=2) + >>> basis_fd = fd.to_basis(basis) + >>> fpca_basis = FPCA(2) + >>> fpca_basis = fpca_basis.fit(basis_fd) + + In this example we apply discretized functional PCA with some simple + data to illustrate the usage of this class. We initialize the + FPCA object, fit the artificial data and obtain the scores. + The results are not tested because there are several equivalent + possibilities. + + >>> data_matrix = np.array([[1.0, 0.0], [0.0, 2.0]]) + >>> sample_points = [0, 1] + >>> fd = FDataGrid(data_matrix, sample_points) + >>> fpca_grid = FPCA(2) + >>> fpca_grid = fpca_grid.fit(fd) - Returns: - tuple: (scores, principal directions, eigenvalues) """ - fdatagrid = fdatagrid - mean(fdatagrid) # centers the data - # singular value decomposition - u, s, v = np.linalg.svd(fdatagrid.data_matrix) - principal_directions = v.T # obtain the eigenvectors matrix - eigenvalues = (np.diag(s) ** 2) / (fdatagrid.n_samples - 1) - scores = u @ s # functional principal scores - - return scores, principal_directions, eigenvalues + + def __init__(self, + n_components=3, + centering=True, + regularization=None, + weights=None, + components_basis=None + ): + self.n_components = n_components + self.centering = centering + self.regularization = regularization + self.weights = weights + self.components_basis = components_basis + + def _center_if_necessary(self, X, *, learn_mean=True): + + if learn_mean: + self.mean_ = X.mean() + + return X - self.mean_ if self.centering else X + + def _fit_basis(self, X: FDataBasis, y=None): + """Computes the first n_components principal components and saves them. + The eigenvalues associated with these principal components are also + saved. For more details about how it is implemented please view the + referenced book. + + Args: + X (FDataBasis): + the functional data object to be analysed in basis + representation + y (None, not used): + only present for convention of a fit function + + Returns: + self (object) + + References: + .. [RS05-8-4-2] Ramsay, J., Silverman, B. W. (2005). Basis function + expansion of the functions. In *Functional Data Analysis* + (pp. 161-164). Springer. + + """ + + # the maximum number of components is established by the target basis + # if the target basis is available. + n_basis = (self.components_basis.n_basis if self.components_basis + else X.basis.n_basis) + n_samples = X.n_samples + + # check that the number of components is smaller than the sample size + if self.n_components > X.n_samples: + raise AttributeError("The sample size must be bigger than the " + "number of components") + + # check that we do not exceed limits for n_components as it should + # be smaller than the number of attributes of the basis + if self.n_components > n_basis: + raise AttributeError("The number of components should be " + "smaller than the number of attributes of " + "target principal components' basis.") + + # if centering is True then subtract the mean function to each function + # in FDataBasis + X = self._center_if_necessary(X) + + # setup principal component basis if not given + components_basis = self.components_basis + if components_basis is not None: + # First fix domain range if not already done + components_basis.domain_range = X.basis.domain_range + g_matrix = components_basis.gram_matrix() + # the matrix that are in charge of changing the computed principal + # components to target matrix is essentially the inner product + # of both basis. + j_matrix = X.basis.inner_product_matrix(components_basis) + else: + # if no other basis is specified we use the same basis as the passed + # FDataBasis Object + components_basis = X.basis.copy() + g_matrix = components_basis.gram_matrix() + j_matrix = g_matrix + + self._X_basis = X.basis + self._j_matrix = j_matrix + + # Apply regularization / penalty if applicable + regularization_matrix = compute_penalty_matrix( + basis_iterable=(components_basis,), + regularization_parameter=1, + regularization=self.regularization) + + # apply regularization + g_matrix = (g_matrix + regularization_matrix) + + # obtain triangulation using cholesky + l_matrix = np.linalg.cholesky(g_matrix) + + # we need L^{-1} for a multiplication, there are two possible ways: + # using solve to get the multiplication result directly or just invert + # the matrix. We choose solve because it is faster and more stable. + # The following matrix is needed: L^{-1}*J^T + l_inv_j_t = solve_triangular(l_matrix, np.transpose(j_matrix), + lower=True) + + # the final matrix, C(L-1Jt)t for svd or (L-1Jt)-1CtC(L-1Jt)t for PCA + final_matrix = (X.coefficients @ np.transpose(l_inv_j_t) / + np.sqrt(n_samples)) + + # initialize the pca module provided by scikit-learn + pca = PCA(n_components=self.n_components) + pca.fit(final_matrix) + + # we choose solve to obtain the component coefficients for the + # same reason: it is faster and more efficient + component_coefficients = solve_triangular(np.transpose(l_matrix), + np.transpose( + pca.components_), + lower=False) + + component_coefficients = np.transpose(component_coefficients) + + self.explained_variance_ratio_ = pca.explained_variance_ratio_ + self.explained_variance_ = pca.explained_variance_ + self.components_ = X.copy(basis=components_basis, + coefficients=component_coefficients) + + return self + + def _transform_basis(self, X, y=None): + """Computes the n_components first principal components score and + returns them. + + Args: + X (FDataBasis): + the functional data object to be analysed + y (None, not used): + only present because of fit function convention + + Returns: + (array_like): the scores of the data with reference to the + principal components + """ + + if X.basis != self._X_basis: + raise ValueError("The basis used in fit is different from " + "the basis used in transform.") + + # in this case it is the inner product of our data with the components + return (X.coefficients @ self._j_matrix + @ self.components_.coefficients.T) + + def _fit_grid(self, X: FDataGrid, y=None): + r"""Computes the n_components first principal components and saves them. + + The eigenvalues associated with these principal + components are also saved. For more details about how it is implemented + please view the referenced book, chapter 8. + + In summary, we are performing standard multivariate PCA over + :math:`\frac{1}{\sqrt{N}} \mathbf{X} \mathbf{W}^{1/2}` where :math:`N` + is the number of samples in the dataset, :math:`\mathbf{X}` is the data + matrix and :math:`\mathbf{W}` is the weight matrix (this matrix + defines the numerical integration). By default the weight matrix is + obtained using the trapezoidal rule. + + Args: + X (FDataGrid): + the functional data object to be analysed in basis + representation + y (None, not used): + only present for convention of a fit function + + Returns: + self (object) + + References: + .. [RS05-8-4-1] Ramsay, J., Silverman, B. W. (2005). Discretizing + the functions. In *Functional Data Analysis* (p. 161). Springer. + """ + + # check that the number of components is smaller than the sample size + if self.n_components > X.n_samples: + raise AttributeError("The sample size must be bigger than the " + "number of components") + + # check that we do not exceed limits for n_components as it should + # be smaller than the number of attributes of the funcional data object + if self.n_components > X.data_matrix.shape[1]: + raise AttributeError("The number of components should be " + "smaller than the number of discretization " + "points of the functional data object.") + + # data matrix initialization + fd_data = X.data_matrix.reshape(X.data_matrix.shape[:-1]) + + # get the number of samples and the number of points of descretization + n_samples, n_points_discretization = fd_data.shape + + # if centering is True then subtract the mean function to each function + # in FDataBasis + X = self._center_if_necessary(X) + + # establish weights for each point of discretization + if not self.weights: + # sample_points is a list with one array in the 1D case + # in trapezoidal rule, suppose \deltax_k = x_k - x_{k-1}, the weight + # vector is as follows: [\deltax_1/2, \deltax_1/2 + \deltax_2/2, + # \deltax_2/2 + \deltax_3/2, ... , \deltax_n/2] + differences = np.diff(X.sample_points[0]) + differences = np.concatenate(((0,), differences, (0,))) + self.weights = (differences[:-1] + differences[1:]) / 2 + elif callable(self.weights): + self.weights = self.weights(X.sample_points[0]) + # if its a FDataGrid then we need to reduce the dimension to 1-D + # array + if isinstance(self.weights, FDataGrid): + self.weights = np.squeeze(self.weights.data_matrix) + + weights_matrix = np.diag(self.weights) + + basis = FDataGrid( + data_matrix=np.identity(n_points_discretization), + sample_points=X.sample_points + ) + + regularization_matrix = compute_penalty_matrix( + basis_iterable=(basis,), + regularization_parameter=1, + regularization=self.regularization) + + fd_data = np.transpose(np.linalg.solve( + np.transpose(basis.data_matrix[..., 0] + regularization_matrix), + np.transpose(fd_data))) + + # see docstring for more information + final_matrix = fd_data @ np.sqrt(weights_matrix) / np.sqrt(n_samples) + + pca = PCA(n_components=self.n_components) + pca.fit(final_matrix) + self.components_ = X.copy(data_matrix=np.transpose( + np.linalg.solve(np.sqrt(weights_matrix), + np.transpose(pca.components_)))) + self.explained_variance_ratio_ = pca.explained_variance_ratio_ + self.explained_variance_ = pca.explained_variance_ + + return self + + def _transform_grid(self, X: FDataGrid, y=None): + """Computes the n_components first principal components score and + returns them. + + Args: + X (FDataGrid): + the functional data object to be analysed + y (None, not used): + only present because of fit function convention + + Returns: + (array_like): the scores of the data with reference to the + principal components + """ + + # in this case its the coefficient matrix multiplied by the principal + # components as column vectors + + return X.data_matrix.reshape( + X.data_matrix.shape[:-1]) @ np.transpose( + self.components_.data_matrix.reshape( + self.components_.data_matrix.shape[:-1])) + + def fit(self, X, y=None): + """Computes the n_components first principal components and saves them + inside the FPCA object, both FDataGrid and FDataBasis are accepted + + Args: + X (FDataGrid or FDataBasis): + the functional data object to be analysed + y (None, not used): + only present for convention of a fit function + + Returns: + self (object) + """ + if isinstance(X, FDataGrid): + return self._fit_grid(X, y) + elif isinstance(X, FDataBasis): + return self._fit_basis(X, y) + else: + raise AttributeError("X must be either FDataGrid or FDataBasis") + + def transform(self, X, y=None): + """Computes the n_components first principal components score and + returns them. + + Args: + X (FDataGrid or FDataBasis): + the functional data object to be analysed + y (None, not used): + only present because of fit function convention + + Returns: + (array_like): the scores of the data with reference to the + principal components + """ + X = self._center_if_necessary(X, learn_mean=False) + + if isinstance(X, FDataGrid): + return self._transform_grid(X, y) + elif isinstance(X, FDataBasis): + return self._transform_basis(X, y) + else: + raise AttributeError("X must be either FDataGrid or FDataBasis") + + def fit_transform(self, X, y=None, **fit_params): + """Computes the n_components first principal components and their scores + and returns them. + Args: + X (FDataGrid or FDataBasis): + the functional data object to be analysed + y (None, not used): + only present for convention of a fit function + + Returns: + (array_like): the scores of the data with reference to the + principal components + """ + self.fit(X, y) + return self.transform(X, y) diff --git a/skfda/preprocessing/registration/__init__.py b/skfda/preprocessing/registration/__init__.py index 3ac379682..ce4a52cae 100644 --- a/skfda/preprocessing/registration/__init__.py +++ b/skfda/preprocessing/registration/__init__.py @@ -9,14 +9,10 @@ landmark_registration_warping, landmark_registration) -from ._shift_registration import shift_registration, shift_registration_deltas +from ._shift_registration import ShiftRegistration -from ._registration_utils import (mse_decomposition, - invert_warping, - normalize_warping, - _normalize_scale) +from ._warping import invert_warping, normalize_warping -from ._elastic import (to_srsf, from_srsf, - elastic_registration, - elastic_registration_warping, - elastic_mean, warping_mean) +from .elastic import ElasticRegistration + +from . import validation, elastic diff --git a/skfda/preprocessing/registration/_elastic.py b/skfda/preprocessing/registration/_elastic.py deleted file mode 100644 index 1af90a60f..000000000 --- a/skfda/preprocessing/registration/_elastic.py +++ /dev/null @@ -1,663 +0,0 @@ - - -import scipy.integrate - -import numpy as np -import optimum_reparam - -from . import invert_warping -from ... import FDataGrid -from ._registration_utils import _normalize_scale - - -from...representation.interpolation import SplineInterpolator - - -__author__ = "Pablo Marcos Manchón" -__email__ = "pablo.marcosm@estudiante.uam.es" - -############################################################################### -# Based on the original implementation of J. Derek Tucker in # -# *fdasrsf_python* (https://github.com/jdtuck/fdasrsf_python) # -# and *ElasticFDA.jl* (https://github.com/jdtuck/ElasticFDA.jl). # -############################################################################### - - -def to_srsf(fdatagrid, eval_points=None): - r"""Calculate the square-root slope function (SRSF) transform. - - Let :math:`f_i : [a,b] \rightarrow \mathbb{R}` be an absolutely continuous - function, the SRSF transform is defined as - - .. math:: - SRSF(f_i(t)) = sgn(f_i(t)) \sqrt{|Df_i(t)|} = q_i(t) - - This representation it is used to compute the extended non-parametric - Fisher-Rao distance between functions, wich under the SRSF representation - becomes the usual :math:`\mathbb{L}^2` distance between functions. - See [SK16-4-6-1]_ . - - Args: - fdatagrid (:class:`FDataGrid`): Functions to be transformed. - eval_points: (array_like, optional): Set of points where the - functions are evaluated, by default uses the sample points of the - fdatagrid. - - Returns: - :class:`FDataGrid`: SRSF functions. - - Raises: - ValueError: If functions are multidimensional. - - References: - .. [SK16-4-6-1] Srivastava, Anuj & Klassen, Eric P. (2016). Functional - and shape data analysis. In *Square-Root Slope Function - Representation* (pp. 91-93). Springer. - - """ - - if fdatagrid.dim_domain > 1: - raise ValueError("Only support functional objects with unidimensional " - "domain.") - - elif fdatagrid.dim_codomain > 1: - raise ValueError("Only support functional objects with unidimensional " - "codomain.") - - elif eval_points is None: - eval_points = fdatagrid.sample_points[0] - - g = fdatagrid.derivative() - - # Evaluation with the corresponding interpolation - g_data_matrix = g(eval_points, keepdims=False) - - # SRSF(f) = sign(f) * sqrt|Df| - q_data_matrix = np.sign(g_data_matrix) * np.sqrt(np.abs(g_data_matrix)) - - return fdatagrid.copy(data_matrix=q_data_matrix, sample_points=eval_points) - - -def from_srsf(fdatagrid, initial=None, *, eval_points=None): - r"""Given a SRSF calculate the corresponding function in the original space. - - Let :math:`f_i : [a,b]\rightarrow \mathbb{R}` be an absolutely continuous - function, the SRSF transform is defined as - - .. math:: - SRSF(f_i(t)) = sgn(f_i(t)) \sqrt{|Df_i(t)|} = q_i(t) - - This transformation is a mapping up to constant. Given the srsf and the - initial value the original function can be obtained as - - .. math:: - f_i(t) = f(a) + \int_{a}^t q(t)|q(t)|dt - - This representation it is used to compute the extended non-parametric - Fisher-Rao distance between functions, wich under the SRSF representation - becomes the usual :math:`\mathbb{L}^2` distance between functions. - See [SK16-4-6-2]_ . - - Args: - fdatagrid (:class:`FDataGrid`): SRSF to be transformed. - initial (array_like): List of values of initial values of the original - functions. - eval_points: (array_like, optional): Set of points where the - functions are evaluated, by default uses the sample points of the - fdatagrid. - - Returns: - :class:`FDataGrid`: Functions in the original space. - - Raises: - ValueError: If functions are multidimensional. - - References: - .. [SK16-4-6-2] Srivastava, Anuj & Klassen, Eric P. (2016). Functional - and shape data analysis. In *Square-Root Slope Function - Representation* (pp. 91-93). Springer. - - """ - - if fdatagrid.dim_domain > 1: - raise ValueError("Only support functional objects with " - "unidimensional domain.") - - elif fdatagrid.dim_codomain > 1: - raise ValueError("Only support functional objects with unidimensional " - "image.") - - elif eval_points is None: - eval_points = fdatagrid.sample_points[0] - - q_data_matrix = fdatagrid(eval_points, keepdims=True) - - f_data_matrix = q_data_matrix * np.abs(q_data_matrix) - - f_data_matrix = scipy.integrate.cumtrapz(f_data_matrix, - x=eval_points, - axis=1, - initial=0) - - if initial is not None: - initial = np.atleast_1d(initial) - initial = initial.reshape( - fdatagrid.n_samples, 1, fdatagrid.dim_codomain) - initial = np.repeat(initial, len(eval_points), axis=1) - f_data_matrix += initial - - return fdatagrid.copy(data_matrix=f_data_matrix, sample_points=eval_points) - - -def _elastic_alignment_array(template_data, q_data, - eval_points, lam, grid_dim): - r"""Wrapper between the cython interface and python. - - Selects the corresponding routine depending on the dimensions of the - arrays. - - Args: - template_data (numpy.ndarray): Array with the srsf of the template. - q_data (numpy.ndarray): Array with the srsf of the curves - to be aligned. - eval_points (numpy.ndarray): Discretisation points of the functions. - lam (float): Penalisation term. - grid_dim (int): Dimension of the grid used in the alignment algorithm. - - Return: - (numpy.ndarray): Array with the same shape than q_data with the srsf of - the functions aligned to the template(s). - """ - - # Select cython function - if template_data.ndim == 1 and q_data.ndim == 1: - reparam = optimum_reparam.coptimum_reparam - - elif template_data.ndim == 1: - reparam = optimum_reparam.coptimum_reparam_n - - else: - reparam = optimum_reparam.coptimum_reparam_n2 - - return reparam(np.ascontiguousarray(template_data.T), - np.ascontiguousarray(eval_points), - np.ascontiguousarray(q_data.T), - lam, grid_dim).T - - -def elastic_registration_warping(fdatagrid, template=None, *, lam=0., - eval_points=None, fdatagrid_srsf=None, - template_srsf=None, grid_dim=7, **kwargs): - r"""Calculate the warping to align a FDatagrid using the SRSF framework. - - Let :math:`f` be a function of the functional data object wich will be - aligned to the template :math:`g`. Calculates the warping wich minimises - the Fisher-Rao distance between :math:`g` and the registered function - :math:`f^*(t)=f(\gamma^*(t))=f \circ \gamma^*`. - - .. math:: - \gamma^* = argmin_{\gamma \in \Gamma} d_{\lambda}(f \circ - \gamma, g) - - Where :math:`d_{\lambda}` denotes the extended amplitude distance with a - penalty term, used to control the amount of warping. - - .. math:: - d_{\lambda}^2(f \circ \gamma, g) = \| SRSF(f \circ \gamma) - \sqrt{\dot{\gamma}} - SRSF(g)\|_{\mathbb{L}^2}^2 + \lambda - \mathcal{R}(\gamma) - - In the implementation it is used as penalty term - - .. math:: - \mathcal{R}(\gamma) = \|\sqrt{\dot{\gamma}}- 1 \|_{\mathbb{L}^2}^2 - - Wich restrict the amount of elasticity employed in the alignment. - - The registered function :math:`f^*(t)` can be calculated using the - composition :math:`f^*(t)=f(\gamma^*(t))`. - - If the template is not specified it is used the Karcher mean of the set of - functions under the Fisher-Rao metric to perform the alignment, wich is - the local minimum of the sum of squares of elastic distances. - See :func:`elastic_mean`. - - In [SK16-4-3]_ are described extensively the algorithms employed and - the SRSF framework. - - Args: - fdatagrid (:class:`FDataGrid`): Functional data object to be aligned. - template (:class:`FDataGrid`, optional): Template to align the curves. - Can contain 1 sample to align all the curves to it or the same - number of samples than the fdatagrid. By default it is used the - elastic mean. - lam (float, optional): Controls the amount of elasticity. - Defaults to 0. - eval_points (array_like, optional): Set of points where the - functions are evaluated, by default uses the sample points of the - fdatagrid. - fdatagrid_srsf (:class:`FDataGrid`, optional): SRSF of the fdatagrid, - may be passed to avoid repeated calculation. - template_srsf (:class:`FDataGrid`, optional): SRSF of the template, - may be passed to avoid repeated calculation. - grid_dim (int, optional): Dimension of the grid used in the alignment - algorithm. Defaults 7. - **kwargs: Named arguments to be passed to :func:`elastic_mean`. - - Returns: - (:class:`FDataGrid`): Warping to align the given fdatagrid to the - template. - - Raises: - ValueError: If functions are multidimensional or the number of samples - are different. - - References: - .. [SK16-4-3] Srivastava, Anuj & Klassen, Eric P. (2016). Functional - and shape data analysis. In *Functional Data and Elastic - Registration* (pp. 73-122). Springer. - - """ - - # Check of params - if fdatagrid.dim_domain != 1 or fdatagrid.dim_codomain != 1: - - raise ValueError("Not supported multidimensional functional objects.") - - if template is None: - template = elastic_mean(fdatagrid, lam=lam, eval_points=eval_points, - **kwargs) - - elif ((template.n_samples != 1 and template.n_samples != fdatagrid.n_samples) - or template.dim_domain != 1 or template.dim_codomain != 1): - - raise ValueError("The template should contain one sample to align all" - "the curves to the same function or the same number " - "of samples than the fdatagrid") - - # Construction of srsfs - if fdatagrid_srsf is None: - fdatagrid_srsf = to_srsf(fdatagrid, eval_points=eval_points) - - if template_srsf is None: - template_srsf = to_srsf(template, eval_points=eval_points) - - if eval_points is None: - eval_points = fdatagrid_srsf.sample_points[0] - - # Discretizacion in evaluation points - q_data = fdatagrid_srsf(eval_points, keepdims=False).squeeze() - template_data = template_srsf(eval_points, keepdims=False).squeeze() - - # Values of the warping - gamma = _elastic_alignment_array(template_data, q_data, - _normalize_scale(eval_points), - lam, grid_dim) - - # Normalize warping to original interval - gamma = _normalize_scale(gamma, a=eval_points[0], b=eval_points[-1]) - - # Interpolator - interpolator = SplineInterpolator(interpolation_order=3, monotone=True) - - return FDataGrid(gamma, eval_points, interpolator=interpolator) - - -def elastic_registration(fdatagrid, template=None, *, lam=0., eval_points=None, - fdatagrid_srsf=None, template_srsf=None, grid_dim=7, - **kwargs): - r"""Align a FDatagrid using the SRSF framework. - - Let :math:`f` be a function of the functional data object wich will be - aligned to the template :math:`g`. Calculates the warping wich minimises - the Fisher-Rao distance between :math:`g` and the registered function - :math:`f^*(t)=f(\gamma^*(t))=f \circ \gamma^*`. - - .. math:: - \gamma^* = argmin_{\gamma \in \Gamma} d_{\lambda}(f \circ - \gamma, g) - - Where :math:`d_{\lambda}` denotes the extended Fisher-Rao distance with a - penalty term, used to control the amount of warping. - - .. math:: - d_{\lambda}^2(f \circ \gamma, g) = \| SRSF(f \circ \gamma) - \sqrt{\dot{\gamma}} - SRSF(g)\|_{\mathbb{L}^2}^2 + \lambda - \mathcal{R}(\gamma) - - In the implementation it is used as penalty term - - .. math:: - \mathcal{R}(\gamma) = \|\sqrt{\dot{\gamma}}- 1 \|_{\mathbb{L}^2}^2 - - Wich restrict the amount of elasticity employed in the alignment. - - The registered function :math:`f^*(t)` can be calculated using the - composition :math:`f^*(t)=f(\gamma^*(t))`. - - If the template is not specified it is used the Karcher mean of the set of - functions under the elastic metric to perform the alignment, wich is - the local minimum of the sum of squares of elastic distances. - See :func:`elastic_mean`. - - In [SK16-4-2]_ are described extensively the algorithms employed and - the SRSF framework. - - Args: - fdatagrid (:class:`FDataGrid`): Functional data object to be aligned. - template (:class:`FDataGrid`, optional): Template to align the curves. - Can contain 1 sample to align all the curves to it or the same - number of samples than the fdatagrid. By default it is used the - elastic mean. - lam (float, optional): Controls the amount of elasticity. - Defaults to 0. - eval_points (array_like, optional): Set of points where the - functions are evaluated, by default uses the sample points of the - fdatagrid. - fdatagrid_srsf (:class:`FDataGrid`, optional): SRSF of the fdatagrid, - may be passed to avoid repeated calculation. - template_srsf (:class:`FDataGrid`, optional): SRSF of the template, - may be passed to avoid repeated calculation. - grid_dim (int, optional): Dimension of the grid used in the alignment - algorithm. Defaults 7. - **kwargs: Named arguments to be passed to :func:`elastic_mean`. - - Returns: - (:class:`FDataGrid`): FDatagrid with the samples aligned to the - template. - - Raises: - ValueError: If functions are multidimensional or the number of samples - are different. - - References: - .. [SK16-4-2] Srivastava, Anuj & Klassen, Eric P. (2016). Functional - and shape data analysis. In *Functional Data and Elastic - Registration* (pp. 73-122). Springer. - - """ - - # Calculates corresponding set of warpings - warping = elastic_registration_warping(fdatagrid, - template=template, - lam=lam, - eval_points=eval_points, - fdatagrid_srsf=fdatagrid_srsf, - template_srsf=template_srsf, - grid_dim=grid_dim, - **kwargs) - - return fdatagrid.compose(warping, eval_points=eval_points) - - -def warping_mean(warping, *, iter=20, tol=1e-5, step_size=1., eval_points=None, - return_shooting=False): - r"""Compute the karcher mean of a set of warpings. - - Let :math:`\gamma_i i=1...n` be a set of warping functions - :math:`\gamma_i:[a,b] \rightarrow [a,b]` in :math:`\Gamma`, i.e., - monotone increasing and with the restriction :math:`\gamma_i(a)=a \, - \gamma_i(b)=b`. - - The karcher mean :math:`\bar \gamma` is defined as the warping that - minimises locally the sum of Fisher-Rao squared distances. - [SK16-8-3-2]_. - - .. math:: - \bar \gamma = argmin_{\gamma \in \Gamma} \sum_{i=1}^{n} - d_{FR}^2(\gamma, \gamma_i) - - The computation is performed using the structure of Hilbert Sphere obtained - after a transformation of the warpings, see [S11-3-3]_. - - Args: - warping (:class:`FDataGrid`): Set of warpings. - iter (int): Maximun number of interations. Defaults to 20. - tol (float): Convergence criterion, if the norm of the mean of the - shooting vectors, :math:`| \bar v | = S psi(t) mu(t) dt - dot = scipy.integrate.simps(np.multiply(psi, mu, out=dot_aux), - eval_points, axis=0) - - # Theorically is not possible (Cauchy–Schwarz inequallity), but due to - # numerical approximation could be greater than 1 - dot[dot < -1] = -1 - dot[dot > 1] = 1 - theta = np.arccos(dot)[:, np.newaxis] - - # Be carefully with tangent vectors and division by 0 - idx = theta[:, 0] > tol - sine[idx] = theta[idx] / np.sin(theta[idx]) - sine[~idx] = 0. - - # compute shooting vector - cos_theta = np.repeat(np.cos(theta), n_points, axis=1) - shooting = np.multiply(sine, (psi - np.multiply(cos_theta.T, mu)).T) - - # Mean of shooting vectors - vmean = shooting.mean(axis=0, keepdims=True) - v_norm = scipy.integrate.simps(np.square(vmean[0]))**(.5) - - # Convergence criterion - if v_norm < tol: - break - - # Update of mu - mu *= np.cos(step_size * v_norm) - vmean += np.sin(step_size * v_norm) / v_norm - mu += vmean.T - - # Recover mean in original gamma space - warping_mean = scipy.integrate.cumtrapz(np.square(mu, out=mu)[:, 0], - x=eval_points, initial=0) - - # Affine traslation - warping_mean = _normalize_scale(warping_mean, - a=original_eval_points[0], - b=original_eval_points[-1]) - - monotone_interpolator = SplineInterpolator(interpolation_order=3, - monotone=True) - - mean = FDataGrid([warping_mean], sample_points=original_eval_points, - interpolator=monotone_interpolator) - - # Shooting vectors are used in models based in the amplitude-phase - # decomposition under this metric. - if return_shooting: - return mean, shooting - - return mean - - -def elastic_mean(fdatagrid, *, lam=0., center=True, iter=20, tol=1e-3, - initial=None, eval_points=None, fdatagrid_srsf=None, - grid_dim=7, **kwargs): - r"""Compute the karcher mean under the elastic metric. - - Calculates the karcher mean of a set of functional samples in the amplitude - space :math:`\mathcal{A}=\mathcal{F}/\Gamma`. - - Let :math:`q_i` the corresponding SRSF of the observation :math:`f_i`. - The space :math:`\mathcal{A}` is defined using the equivalence classes - :math:`[q_i]=\{ q_i \circ \gamma \| \gamma \in \Gamma \}`, where - :math:`\Gamma` denotes the space of warping functions. The karcher mean - in this space is defined as - - .. math:: - [\mu_q] = argmin_{[q] \in \mathcal{A}} \sum_{i=1}^n - d_{\lambda}^2([q],[q_i]) - - Once :math:`[\mu_q]` is obtained it is selected the element of the - equivalence class which makes the mean of the warpings employed be the - identity. - - See [SK16-8-3-1]_ and [S11-3]_. - - Args: - fdatagrid (:class:`FDataGrid`): Set of functions to compute the mean. - lam (float): Penalisation term. Defaults to 0. - center (boolean): If true it is computed the mean of the warpings and - used to select a central mean. Defaults True. - iter (int): Maximun number of iterations. Defaults to 20. - tol (float): Convergence criterion, the algorithm will stop if - :math:´\|mu^{(\nu)} - mu^{(\nu - 1)} \|_2 / \| mu^{(\nu-1)} \|_2 - < tol´. - initial (float): Value of the mean at the starting point. By default - takes the average of the initial points of the samples. - eval_points (array_like): Points of discretization of the fdatagrid. - fdatagrid_srsf (:class:`FDataGrid`): SRSF if the fdatagrid, if it is - passed it is not computed in the algorithm. - grid_dim (int, optional): Dimension of the grid used in the alignment - algorithm. Defaults 7. - ** kwargs : Named options to be pased to :func:`warping_mean`. - - Return: - (:class:`FDataGrid`): FDatagrid with the mean of the functions. - - Raises: - ValueError: If the object is multidimensional or the shape of the srsf - do not match with the fdatagrid. - - References: - .. [SK16-8-3-1] Srivastava, Anuj & Klassen, Eric P. (2016). Functional - and shape data analysis. In *Karcher Mean of Amplitudes* - (pp. 273-274). Springer. - - .. [S11-3] Srivastava, Anuj et. al. Registration of Functional Data - Using Fisher-Rao Metric (2011). In *Karcher Mean and Function - Alignment* (pp. 7-10). arXiv:1103.3817v2. - - """ - - if fdatagrid.dim_domain != 1 or fdatagrid.dim_codomain != 1: - raise ValueError("Not supported multidimensional functional objects.") - - if fdatagrid_srsf is not None and (fdatagrid_srsf.dim_domain != 1 or - fdatagrid_srsf.dim_codomain != 1): - raise ValueError("Not supported multidimensional functional objects.") - - elif fdatagrid_srsf is None: - fdatagrid_srsf = to_srsf(fdatagrid, eval_points=eval_points) - - if eval_points is not None: - eval_points = np.asarray(eval_points) - else: - eval_points = fdatagrid.sample_points[0] - - eval_points_normalized = _normalize_scale(eval_points) - y_scale = eval_points[-1] - eval_points[0] - - interpolator = SplineInterpolator(interpolation_order=3, monotone=True) - - # Discretisation points - fdatagrid_normalized = FDataGrid(fdatagrid(eval_points) / y_scale, - sample_points=eval_points_normalized) - - srsf = fdatagrid_srsf(eval_points, keepdims=False) - - # Initialize with function closest to the L2 mean with the L2 distance - centered = (srsf.T - srsf.mean(axis=0, keepdims=True).T).T - - distances = scipy.integrate.simps(np.square(centered, out=centered), - eval_points_normalized, axis=1) - - # Initialization of iteration - mu = srsf[np.argmin(distances)] - mu_aux = np.empty(mu.shape) - mu_1 = np.empty(mu.shape) - - # Main iteration - for _ in range(iter): - - gammas = _elastic_alignment_array( - mu, srsf, eval_points_normalized, lam, grid_dim) - gammas = FDataGrid(gammas, sample_points=eval_points_normalized, - interpolator=interpolator) - - fdatagrid_normalized = fdatagrid_normalized.compose(gammas) - srsf = to_srsf(fdatagrid_normalized).data_matrix[..., 0] - - # Next iteration - mu_1 = srsf.mean(axis=0, out=mu_1) - - # Convergence criterion - mu_norm = np.sqrt(scipy.integrate.simps(np.square(mu, out=mu_aux), - eval_points_normalized)) - - mu_diff = np.sqrt(scipy.integrate.simps(np.square(mu - mu_1, - out=mu_aux), - eval_points_normalized)) - - if mu_diff / mu_norm < tol: - break - - mu = mu_1 - - if initial is None: - initial = fdatagrid.data_matrix[:, 0].mean() - - # Karcher mean orbit in space L2/Gamma - karcher_mean = from_srsf(fdatagrid.copy(data_matrix=[mu], - sample_points=eval_points), - initial=initial) - - if center: - # Gamma mean in Hilbert Sphere - mean_normalized = warping_mean(gammas, return_shooting=False, **kwargs) - - gamma_mean = FDataGrid(_normalize_scale( - mean_normalized.data_matrix[..., 0], - a=eval_points[0], - b=eval_points[-1]), - sample_points=eval_points) - - gamma_inverse = invert_warping(gamma_mean) - - karcher_mean = karcher_mean.compose(gamma_inverse) - - # Return center of the orbit - return karcher_mean diff --git a/skfda/preprocessing/registration/_landmark_registration.py b/skfda/preprocessing/registration/_landmark_registration.py index 2036569fa..5e8a96208 100644 --- a/skfda/preprocessing/registration/_landmark_registration.py +++ b/skfda/preprocessing/registration/_landmark_registration.py @@ -6,7 +6,7 @@ import numpy as np from ... import FDataGrid -from ...representation.interpolation import SplineInterpolator +from ...representation.interpolation import SplineInterpolation __author__ = "Pablo Marcos Manchón" __email__ = "pablo.marcosm@estudiante.uam.es" @@ -251,11 +251,11 @@ def landmark_registration_warping(fd, landmarks, *, location=None, sample_points[-1] = fd.domain_range[0][1] sample_points[1:-1] = location - interpolator = SplineInterpolator(interpolation_order=3, monotone=True) + interpolation = SplineInterpolation(interpolation_order=3, monotone=True) warping = FDataGrid(data_matrix=data_matrix, sample_points=sample_points, - interpolator=interpolator, + interpolation=interpolation, extrapolation='bounds') try: diff --git a/skfda/preprocessing/registration/_registration_utils.py b/skfda/preprocessing/registration/_registration_utils.py deleted file mode 100644 index e8735c584..000000000 --- a/skfda/preprocessing/registration/_registration_utils.py +++ /dev/null @@ -1,333 +0,0 @@ -"""Registration of functional data module. - -This module contains routines related to the registration procedure. -""" -import collections - -import scipy.integrate -from scipy.interpolate import PchipInterpolator - -import numpy as np - - -__author__ = "Pablo Marcos Manchón" -__email__ = "pablo.marcosm@estudiante.uam.es" - - -def mse_decomposition(original_fdata, registered_fdata, warping_function=None, - *, eval_points=None): - r"""Compute mean square error measures for amplitude and phase variation. - - Once the registration has taken place, this function computes two mean - squared error measures, one for amplitude variation, and the other for - phase variation. It also computes a squared multiple correlation index - of the amount of variation in the unregistered functions is due to phase. - - Let :math:`x_i(t),y_i(t)` be the unregistered and registered functions - respectively. The total mean square error measure (see [RGS09-8-5]_) is - defined as - - - .. math:: - \text{MSE}_{total}= - \frac{1}{N}\sum_{i=1}^{N}\int[x_i(t)-\overline x(t)]^2dt - - We define the constant :math:`C_R` as - - .. math:: - - C_R = 1 + \frac{\frac{1}{N}\sum_{i}^{N}\int [Dh_i(t)-\overline{Dh}(t)] - [ y_i^2(t)- \overline{y^2}(t) ]dt} - {\frac{1}{N} \sum_{i}^{N} \int y_i^2(t)dt} - - Whose structure is related to the covariation between the deformation - functions :math:`Dh_i(t)` and the squared registered functions - :math:`y_i^2(t)`. When these two sets of functions are independents - :math:`C_R=1`, as in the case of shift registration. - - The measures of amplitude and phase mean square error are - - .. math:: - \text{MSE}_{amp} = C_R \frac{1}{N} - \sum_{i=1}^{N} \int \left [ y_i(t) - \overline{y}(t) \right ]^2 dt - - .. math:: - \text{MSE}_{phase}= - \int \left [C_R \overline{y}^2(t) - \overline{x}^2(t) \right]dt - - It can be shown that - - .. math:: - \text{MSE}_{total} = \text{MSE}_{amp} + \text{MSE}_{phase} - - The squared multiple correlation index of the proportion of the total - variation due to phase is defined as: - - .. math:: - R^2 = \frac{\text{MSE}_{phase}}{\text{MSE}_{total}} - - See [KR08-3]_ for a detailed explanation. - - - Args: - original_fdata (:class:`FData`): Unregistered functions. - regfd (:class:`FData`): Registered functions. - warping_function (:class:`FData`): Warping functions. - eval_points: (array_like, optional): Set of points where the - functions are evaluated to obtain a discrete representation. - - - Returns: - :class:`collections.namedtuple`: Tuple with amplitude mean square error - :math:`\text{MSE}_{amp}`, phase mean square error - :math:`\text{MSE}_{phase}`, squared correlation index :math:`R^2` - and constant :math:`C_R`. - - Raises: - ValueError: If the curves do not have the same number of samples. - - References: - .. [KR08-3] Kneip, Alois & Ramsay, James. (2008). Quantifying - amplitude and phase variation. In *Combining Registration and - Fitting for Functional Models* (pp. 14-15). Journal of the American - Statistical Association. - .. [RGS09-8-5] Ramsay J.O., Giles Hooker & Spencer Graves (2009). In - *Functional Data Analysis with R and Matlab* (pp. 125-126). - Springer. - - Examples: - - >>> from skfda.datasets import make_multimodal_landmarks - >>> from skfda.datasets import make_multimodal_samples - >>> from skfda.preprocessing.registration import ( - ... landmark_registration_warping, mse_decomposition) - - - We will create and register data. - - >>> fd = make_multimodal_samples(n_samples=3, random_state=1) - >>> landmarks = make_multimodal_landmarks(n_samples=3, random_state=1) - >>> landmarks = landmarks.squeeze() - >>> warping = landmark_registration_warping(fd, landmarks) - >>> fd_registered = fd.compose(warping) - >>> mse_amp, mse_pha, rsq, cr = mse_decomposition(fd, fd_registered, - ... warping) - - Mean square error produced by the amplitude variation. - - >>> f'{mse_amp:.6f}' - '0.000987' - - In this example we can observe that the main part of the mean square - error is due to the phase variation. - - >>> f'{mse_pha:.6f}' - '0.115769' - - Nearly 99% of the variation is due to phase. - - >>> f'{rsq:.6f}' - '0.991549' - - """ - - if registered_fdata.dim_domain != 1 or registered_fdata.dim_codomain != 1: - raise NotImplementedError - - if original_fdata.n_samples != registered_fdata.n_samples: - raise ValueError(f"the registered and unregistered curves must have " - f"the same number of samples " - f"({registered_fdata.n_samples})!= " - f"({original_fdata.n_samples})") - - if warping_function is not None and (warping_function.n_samples - != original_fdata.n_samples): - raise ValueError(f"the registered curves and the warping functions " - f"must have the same number of samples " - f"({registered_fdata.n_samples})" - f"!=({warping_function.n_samples})") - - # Creates the mesh to discretize the functions - if eval_points is None: - try: - eval_points = registered_fdata.sample_points[0] - - except AttributeError: - nfine = max(registered_fdata.basis.n_basis * 10 + 1, 201) - domain_range = registered_fdata.domain_range[0] - eval_points = np.linspace(*domain_range, nfine) - else: - eval_points = np.asarray(eval_points) - - x_fine = original_fdata.evaluate(eval_points, keepdims=False) - y_fine = registered_fdata.evaluate(eval_points, keepdims=False) - mu_fine = x_fine.mean(axis=0) # Mean unregistered function - eta_fine = y_fine.mean(axis=0) # Mean registered function - mu_fine_sq = np.square(mu_fine) - eta_fine_sq = np.square(eta_fine) - - # Total mean square error of the original funtions - # mse_total = scipy.integrate.simps( - # np.mean(np.square(x_fine - mu_fine), axis=0), - # eval_points) - - cr = 1. # Constant related to the covariation between the deformation - # functions and y^2 - - # If the warping functions are not provided, are suppose to be independent - if warping_function is not None: - # Derivates warping functions - dh_fine = warping_function.evaluate(eval_points, derivative=1, - keepdims=False) - dh_fine_mean = dh_fine.mean(axis=0) - dh_fine_center = dh_fine - dh_fine_mean - - y_fine_sq = np.square(y_fine) # y^2 - y_fine_sq_center = np.subtract( - y_fine_sq, eta_fine_sq) # y^2 - E[y^2] - - covariate = np.inner(dh_fine_center.T, y_fine_sq_center.T) - covariate = covariate.mean(axis=0) - cr += np.divide(scipy.integrate.simps(covariate, - eval_points), - scipy.integrate.simps(eta_fine_sq, - eval_points)) - - # mse due to phase variation - mse_pha = scipy.integrate.simps(cr * eta_fine_sq - mu_fine_sq, eval_points) - - # mse due to amplitude variation - # mse_amp = mse_total - mse_pha - y_fine_center = np.subtract(y_fine, eta_fine) - y_fine_center_sq = np.square(y_fine_center, out=y_fine_center) - y_fine_center_sq_mean = y_fine_center_sq.mean(axis=0) - - mse_amp = scipy.integrate.simps(y_fine_center_sq_mean, eval_points) - - # Total mean square error of the original funtions - mse_total = mse_pha + mse_amp - - # squared correlation measure of proportion of phase variation - rsq = mse_pha / (mse_total) - - mse_decomp = collections.namedtuple('mse_decomposition', - 'mse_amp mse_pha rsq cr') - - return mse_decomp(mse_amp, mse_pha, rsq, cr) - - -def invert_warping(fdatagrid, *, eval_points=None): - r"""Compute the inverse of a diffeomorphism. - - Let :math:`\gamma : [a,b] \rightarrow [a,b]` be a function strictly - increasing, calculates the corresponding inverse - :math:`\gamma^{-1} : [a,b] \rightarrow [a,b]` such that - :math:`\gamma^{-1} \circ \gamma = \gamma \circ \gamma^{-1} = \gamma_{id}`. - - Uses a PCHIP interpolator to compute approximately the inverse. - - Args: - fdatagrid (:class:`FDataGrid`): Functions to be inverted. - eval_points: (array_like, optional): Set of points where the - functions are interpolated to obtain the inverse, by default uses - the sample points of the fdatagrid. - - Returns: - :class:`FDataGrid`: Inverse of the original functions. - - Raises: - ValueError: If the functions are not strictly increasing or are - multidimensional. - - Examples: - - >>> import numpy as np - >>> from skfda import FDataGrid - >>> from skfda.preprocessing.registration import invert_warping - - We will construct the warping :math:`\gamma : [0,1] \rightarrow [0,1]` - wich maps t to t^3. - - >>> t = np.linspace(0, 1) - >>> gamma = FDataGrid(t**3, t) - >>> gamma - FDataGrid(...) - - We will compute the inverse. - - >>> inverse = invert_warping(gamma) - >>> inverse - FDataGrid(...) - - The result of the composition should be approximately the identity - function . - - >>> identity = gamma.compose(inverse) - >>> identity([0, 0.25, 0.5, 0.75, 1]).round(3) - array([[ 0. , 0.25, 0.5 , 0.75, 1. ]]) - - """ - - if fdatagrid.dim_codomain != 1 or fdatagrid.dim_domain != 1: - raise ValueError("Multidimensional object not supported.") - - if eval_points is None: - eval_points = fdatagrid.sample_points[0] - - y = fdatagrid(eval_points, keepdims=False) - - data_matrix = np.empty((fdatagrid.n_samples, len(eval_points))) - - for i in range(fdatagrid.n_samples): - data_matrix[i] = PchipInterpolator(y[i], eval_points)(eval_points) - - return fdatagrid.copy(data_matrix=data_matrix, sample_points=eval_points) - - -def _normalize_scale(t, a=0, b=1): - """Perfoms an afine translation to normalize an interval. - - Args: - t (numpy.ndarray): Array of dim 1 or 2 with at least 2 values. - a (float): Starting point of the new interval. Defaults 0. - b (float): Stopping point of the new interval. Defaults 1. - - Returns: - (numpy.ndarray): Array with the transformed interval. - """ - - t = t.T # Broadcast to normalize multiple arrays - t1 = (t - t[0]).astype(float) # Translation to [0, t[-1] - t[0]] - t1 *= (b - a) / (t[-1] - t[0]) # Scale to [0, b-a] - t1 += a # Translation to [a, b] - t1[0] = a # Fix possible round errors - t1[-1] = b - - return t1.T - - -def normalize_warping(warping, domain_range=None): - r"""Rescale a warping to normalize their domain. - - Given a set of warpings :math:`\gamma_i:[a,b]\rightarrow [a,b]` it is - used an affine traslation to change the domain of the transformation to - other domain, :math:`\tilde \gamma_i:[\tilde a,\tilde b] \rightarrow - [\tilde a, \tilde b]`. - - Args: - warping (:class:`FDatagrid`): Set of warpings to rescale. - domain_range (tuple, optional): New domain range of the warping. By - default it is used the same domain range. - Return: - (:class:`FDataGrid`): FDataGrid with the warpings normalized. - - """ - - if domain_range is None: - domain_range = warping.domain_range[0] - - data_matrix = _normalize_scale(warping.data_matrix[..., 0], *domain_range) - sample_points = _normalize_scale(warping.sample_points[0], *domain_range) - - return warping.copy(data_matrix=data_matrix, sample_points=sample_points, - domain_range=domain_range) diff --git a/skfda/preprocessing/registration/_shift_registration.py b/skfda/preprocessing/registration/_shift_registration.py index 491188654..237165b8d 100644 --- a/skfda/preprocessing/registration/_shift_registration.py +++ b/skfda/preprocessing/registration/_shift_registration.py @@ -1,293 +1,407 @@ -"""Shift Registration of functional data module. +"""Class to apply Shift Registration to functional data""" -This module contains methods to perform the registration of -functional data using shifts, in basis as well in discretized form. -""" +# Pablo Marcos Manchón +# pablo.marcosm@protonmail.com -import scipy.integrate +from scipy.integrate import simps +from sklearn.utils.validation import check_is_fitted import numpy as np -from ..._utils import constants +from ... import FData, FDataGrid +from ..._utils import constants, check_is_univariate +from .base import RegistrationTransformer -__author__ = "Pablo Marcos Manchón" -__email__ = "pablo.marcosm@estudiante.uam.es" +class ShiftRegistration(RegistrationTransformer): + r"""Register a functional dataset using shift alignment. + Realizes the registration of a set of curves using a shift aligment + [RaSi2005-7-2]_. Let :math:`\{x_i(t)\}_{i=1}^{N}` be a functional dataset, + calculates :math:`\delta_{i}` for each sample such that + :math:`x_i(t + \delta_{i})` minimizes the least squares criterion: -def shift_registration_deltas(fd, *, maxiter=5, tol=1e-2, - restrict_domain=False, extrapolation=None, - step_size=1, initial=None, eval_points=None): - r"""Return the lists of shifts used in the shift registration procedure. + .. math:: + \text{REGSSE} = \sum_{i=1}^{N} \int_{\mathcal{T}} + [x_i(t + \delta_i) - \hat\mu(t)]^2 ds - Realizes a registration of the curves, using shift aligment, as is - defined in [RS05-7-2-1]_. Calculates :math:`\delta_{i}` for each sample - such that :math:`x_i(t + \delta_{i})` minimizes the least squares - criterion: + Estimates each shift parameter :math:`\delta_i` iteratively by + using a modified Newton-Raphson algorithm, updating the template + :math:`\mu` in each iteration as is described in detail in + [RaSi2005-7-9-1]_. - .. math:: - \text{REGSSE} = \sum_{i=1}^{N} \int_{\mathcal{T}} - [x_i(t + \delta_i) - \hat\mu(t)]^2 ds - - Estimates the shift parameter :math:`\delta_i` iteratively by - using a modified Newton-Raphson algorithm, updating the mean - in each iteration, as is described in detail in [RS05-7-9-1-1]_. - - Method only implemented for Funtional objects with domain and image - dimension equal to 1. + Method only implemented for univariate functional data. Args: - fd (:class:`FData`): Functional data object to be registered. - maxiter (int, optional): Maximun number of iterations. - Defaults to 5. + max_iter (int, optional): Maximun number of iterations. + Defaults sets to 5. Generally 2 or 3 iterations are sufficient to + obtain a good alignment. tol (float, optional): Tolerance allowable. The process will stop if :math:`\max_{i}|\delta_{i}^{(\nu)}-\delta_{i}^{(\nu-1)}|>> from skfda.preprocessing.registration import ShiftRegistration >>> from skfda.datasets import make_sinusoidal_process >>> from skfda.representation.basis import Fourier - >>> from skfda.preprocessing.registration import ( - ... shift_registration_deltas) - >>> fd = make_sinusoidal_process(n_samples=2, error_std=0, + + + Registration and creation of dataset in discretized form: + + >>> fd = make_sinusoidal_process(n_samples=10, error_std=0, ... random_state=1) + >>> reg = ShiftRegistration(extrapolation="periodic") + >>> fd_registered = reg.fit_transform(fd) + >>> fd_registered + FDataGrid(...) + + Shifts applied during the transformation - Registration of data in discretized form: + >>> reg.deltas_.round(3) + array([-0.128, 0.187, 0.027, 0.034, -0.106, 0.114, ..., -0.06 ]) - >>> shift_registration_deltas(fd).round(3) - array([-0.022, 0.03 ]) - Registration of data in basis form: + Registration and creation of a dataset in basis form using the + transformation previosly fitted: - >>> fd = fd.to_basis(Fourier()) - >>> shift_registration_deltas(fd).round(3) - array([-0.022, 0.03 ]) + >>> fd = make_sinusoidal_process(n_samples=2, error_std=0, + ... random_state=2) + >>> fd_basis = fd.to_basis(Fourier()) + >>> reg.transform(fd_basis) + FDataBasis(...) References: - .. [RS05-7-2-1] Ramsay, J., Silverman, B. W. (2005). Shift + .. [RaSi2005-7-2] Ramsay, J., Silverman, B. W. (2005). Shift registration. In *Functional Data Analysis* (pp. 129-132). Springer. - .. [RS05-7-9-1-1] Ramsay, J., Silverman, B. W. (2005). Shift + .. [RaSi2005-7-9-1] Ramsay, J., Silverman, B. W. (2005). Shift registration by the Newton-Raphson algorithm. In *Functional Data Analysis* (pp. 142-144). Springer. """ - # Initial estimation of the shifts - - if fd.dim_codomain > 1 or fd.dim_domain > 1: - raise NotImplementedError("Method for unidimensional data.") - - domain_range = fd.domain_range[0] - - if initial is None: - delta = np.zeros(fd.n_samples) - - elif len(initial) != fd.n_samples: - raise ValueError(f"the initial shift ({len(initial)}) must have the " - f"same length than the number of samples " - f"({fd.n_samples})") - else: - delta = np.asarray(initial) - - # Fine equispaced mesh to evaluate the samples - if eval_points is None: - - try: - eval_points = fd.sample_points[0] - nfine = len(eval_points) - except AttributeError: - nfine = max(fd.n_basis * constants.BASIS_MIN_FACTOR + 1, - constants.N_POINTS_COARSE_MESH) - eval_points = np.linspace(*domain_range, nfine) - - else: - nfine = len(eval_points) - eval_points = np.asarray(eval_points) - - # Auxiliar arrays to avoid multiple memory allocations - delta_aux = np.empty(fd.n_samples) - tfine_aux = np.empty(nfine) - - # Computes the derivate of originals curves in the mesh points - D1x = fd.evaluate(eval_points, derivative=1, keepdims=False) - - # Second term of the second derivate estimation of REGSSE. The - # first term has been dropped to improve convergence (see references) - d2_regsse = scipy.integrate.trapz(np.square(D1x), eval_points, - axis=1) - - max_diff = tol + 1 - iter = 0 - - # Auxiliar array if the domain will be restricted - if restrict_domain: - D1x_tmp = D1x - tfine_tmp = eval_points - tfine_aux_tmp = tfine_aux - domain = np.empty(nfine, dtype=np.dtype(bool)) - - ones = np.ones(fd.n_samples) - eval_points_rep = np.outer(ones, eval_points) - - # Newton-Rhapson iteration - while max_diff > tol and iter < maxiter: - - # Updates the limits for non periodic functions ignoring the ends - if restrict_domain: - # Calculates the new limits - a = domain_range[0] - min(np.min(delta), 0) - b = domain_range[1] - max(np.max(delta), 0) - - # New interval is (a,b) - np.logical_and(tfine_tmp >= a, tfine_tmp <= b, out=domain) - eval_points = tfine_tmp[domain] - tfine_aux = tfine_aux_tmp[domain] - D1x = D1x_tmp[:, domain] - # Reescale the second derivate could be other approach - # d2_regsse = - # d2_regsse_original * ( 1 + (a - b) / (domain[1] - domain[0])) - d2_regsse = scipy.integrate.trapz(np.square(D1x), - eval_points, axis=1) - eval_points_rep = np.outer(ones, eval_points) - - # Computes the new values shifted - x = fd.evaluate(eval_points_rep + np.atleast_2d(delta).T, - aligned_evaluation=False, - extrapolation=extrapolation, - keepdims=False) - - x.mean(axis=0, out=tfine_aux) - - # Calculates x - mean - np.subtract(x, tfine_aux, out=x) - - d1_regsse = scipy.integrate.trapz(np.multiply(x, D1x, out=x), - eval_points, axis=1) - # Updates the shifts by the Newton-Rhapson iteration - # delta = delta - step_size * d1_regsse / d2_regsse - np.divide(d1_regsse, d2_regsse, out=delta_aux) - np.multiply(delta_aux, step_size, out=delta_aux) - np.subtract(delta, delta_aux, out=delta) - - # Updates convergence criterions - max_diff = np.abs(delta_aux, out=delta_aux).max() - iter += 1 - - return delta - - -def shift_registration(fd, *, maxiter=5, tol=1e-2, restrict_domain=False, - extrapolation=None, step_size=1, initial=None, - eval_points=None, **kwargs): - r"""Perform shift registration of the curves. - - Realizes a registration of the curves, using shift aligment, as is - defined in [RS05-7-2]_. Calculates :math:`\delta_{i}` for each sample - such that :math:`x_i(t + \delta_{i})` minimizes the least squares - criterion: - - .. math:: - \text{REGSSE} = \sum_{i=1}^{N} \int_{\mathcal{T}} - [x_i(t + \delta_i) - \hat\mu(t)]^2 ds - - Estimates the shift parameter :math:`\delta_i` iteratively by - using a modified Newton-Raphson algorithm, updating the mean - in each iteration, as is described in detail in [RS05-7-9-1]_. + def __init__(self, max_iter=5, tol=1e-2, template="mean", + extrapolation=None, step_size=1, restrict_domain=False, + initial="zeros", output_points=None): + self.max_iter = max_iter + self.tol = tol + self.template = template + self.restrict_domain = restrict_domain + self.extrapolation = extrapolation + self.step_size = step_size + self.initial = initial + self.output_points = output_points + + def _compute_deltas(self, fd, template): + r"""Compute the shifts to perform the registration. + + Args: + fd (FData: Functional object to be registered. + template (str, FData or callable): Template to align the + the samples. "mean" to compute the mean iteratively as in + the original paper, an FData with the templated calculated or + a callable wich constructs the template. + + Returns: + tuple: A tuple with an array of deltas and an FDataGrid with the + template. + + """ + check_is_univariate(fd) + + domain_range = fd.domain_range[0] + + # Initial estimation of the shifts + if self.initial is "zeros": + delta = np.zeros(fd.n_samples) + + elif len(self.initial) != fd.n_samples: + raise ValueError(f"the initial shift ({len(self.initial)}) must " + f"have the same length than the number of samples" + f" ({fd.n_samples})") + else: + delta = np.asarray(self.initial) + + # Fine equispaced mesh to evaluate the samples + if self.output_points is None: + + try: + output_points = fd.sample_points[0] + nfine = len(output_points) + except AttributeError: + nfine = max(fd.n_basis * constants.BASIS_MIN_FACTOR + 1, + constants.N_POINTS_COARSE_MESH) + output_points = np.linspace(*domain_range, nfine) + + else: + nfine = len(self.output_points) + output_points = np.asarray(self.output_points) + + # Auxiliar array to avoid multiple memory allocations + delta_aux = np.empty(fd.n_samples) + + # Computes the derivate of originals curves in the mesh points + fd_deriv = fd.derivative(order=1) + D1x = fd_deriv(output_points)[..., 0] + + # Second term of the second derivate estimation of REGSSE. The + # first term has been dropped to improve convergence (see references) + d2_regsse = simps(np.square(D1x), output_points, axis=1) + + max_diff = self.tol + 1 + self.n_iter_ = 0 + + # Case template fixed + if isinstance(template, FData): + original_template = template + tfine_aux = template.evaluate(output_points)[0, ..., 0] + + if self.restrict_domain: + template_points_aux = tfine_aux + + template = "fixed" + else: + tfine_aux = np.empty(nfine) + + # Auxiliar array if the domain will be restricted + if self.restrict_domain: + D1x_tmp = D1x + tfine_tmp = output_points + tfine_aux_tmp = tfine_aux + domain = np.empty(nfine, dtype=np.dtype(bool)) + + ones = np.ones(fd.n_samples) + output_points_rep = np.outer(ones, output_points) + + # Newton-Rhapson iteration + while max_diff > self.tol and self.n_iter_ < self.max_iter: + + # Updates the limits for non periodic functions ignoring the ends + if self.restrict_domain: + # Calculates the new limits + a = domain_range[0] - min(np.min(delta), 0) + b = domain_range[1] - max(np.max(delta), 0) + + # New interval is (a,b) + np.logical_and(tfine_tmp >= a, tfine_tmp <= b, out=domain) + output_points = tfine_tmp[domain] + tfine_aux = tfine_aux_tmp[domain] + D1x = D1x_tmp[:, domain] + # Reescale the second derivate could be other approach + # d2_regsse = + # d2_regsse_original * ( 1 + (a - b) / (domain[1] - domain[0])) + d2_regsse = simps(np.square(D1x), output_points, axis=1) + + # Recompute base points for evaluation + output_points_rep = np.outer(ones, output_points) + + # Computes the new values shifted + x = fd(output_points_rep + np.atleast_2d(delta).T, + aligned=False, + extrapolation=self.extrapolation)[..., 0] + + if template == "mean": + x.mean(axis=0, out=tfine_aux) + elif template == "fixed" and self.restrict_domain: + tfine_aux = template_points_aux[domain] + elif callable(template): # Callable + fd_x = FDataGrid(x, sample_points=output_points) + fd_tfine = template(fd_x) + tfine_aux = fd_tfine.data_matrix.ravel() + + # Calculates x - mean + np.subtract(x, tfine_aux, out=x) + + d1_regsse = simps(np.multiply(x, D1x, out=x), + output_points, axis=1) + # Updates the shifts by the Newton-Rhapson iteration + # delta = delta - step_size * d1_regsse / d2_regsse + np.divide(d1_regsse, d2_regsse, out=delta_aux) + np.multiply(delta_aux, self.step_size, out=delta_aux) + np.subtract(delta, delta_aux, out=delta) + + # Updates convergence criterions + max_diff = np.abs(delta_aux, out=delta_aux).max() + self.n_iter_ += 1 - Args: - fd (:class:`FData`): Functional data object to be registered. - maxiter (int, optional): Maximun number of iterations. - Defaults to 5. - tol (float, optional): Tolerance allowable. The process will stop if - :math:`\max_{i}|\delta_{i}^{(\nu)}-\delta_{i}^{(\nu-1)}|>> from skfda.datasets import make_sinusoidal_process - >>> from skfda.representation.basis import Fourier - >>> from skfda.preprocessing.registration import shift_registration - >>> fd = make_sinusoidal_process(n_samples=2, error_std=0, - ... random_state=1) + Args: + X (FData): Functional dataset to be transformed. + y (ignored): not used, present for API consistency by convention. - Registration of data in discretized form: + Returns: + FData: Functional data registered. - >>> shift_registration(fd) - FDataGrid(...) + """ + self.deltas_, self.template_ = self._compute_deltas(X, self.template) - Registration of data in basis form: + return X.shift(self.deltas_, restrict_domain=self.restrict_domain, + extrapolation=self.extrapolation, + eval_points=self.output_points) - >>> fd = fd.to_basis(Fourier()) - >>> shift_registration(fd) - FDataBasis(...) + def fit(self, X: FData, y=None): + """Fit the estimator. - References: - .. [RS05-7-2] Ramsay, J., Silverman, B. W. (2005). Shift - registration. In *Functional Data Analysis* (pp. 129-132). - Springer. - .. [RS05-7-9-1] Ramsay, J., Silverman, B. W. (2005). Shift - registration by the Newton-Raphson algorithm. In *Functional - Data Analysis* (pp. 142-144). Springer. - """ + Args: + X (FData): Functional dataset used to construct the template for + the alignment. + y (ignored): not used, present for API consistency by convention. + + Returns: + RegistrationTransformer: self + + Raises: + AttributeError: If this method is call when restrict_domain=True. + + """ + if self.restrict_domain: + raise AttributeError("fit and predict are not available when " + "restrict_domain=True, fitting and " + "transformation should be done together. Use " + "an extrapolation method with " + "restrict_domain=False or fit_predict") + + # If the template is an FData, fit doesnt learn anything + if isinstance(self.template, FData): + self.template_ = self.template + + else: + _, self.template_ = self._compute_deltas(X, self.template) + + return self + + def transform(self, X: FData, y=None): + """Register the data. + + Transforms the data using the template previously learned during + fitting. + + Args: + X (FData): Functional dataset to be transformed. + y (ignored): not used, present for API consistency by convention. + + Returns: + FData: Functional data registered. + + Raises: + AttributeError: If this method is call when restrict_domain=True. + + """ + + if self.restrict_domain: + raise AttributeError("fit and predict are not available when " + "restrict_domain=True, fitting and " + "transformation should be done together. Use " + "an extrapolation method with " + "restrict_domain=False or fit_predict") - delta = shift_registration_deltas(fd, maxiter=maxiter, tol=tol, - restrict_domain=restrict_domain, - extrapolation=extrapolation, - step_size=step_size, initial=initial, - eval_points=eval_points) + # Check is fitted + check_is_fitted(self, 'template_') + + deltas, template = self._compute_deltas(X, self.template_) + self.template_ = template + self.deltas_ = deltas + + return X.shift(deltas, restrict_domain=self.restrict_domain, + extrapolation=self.extrapolation, + eval_points=self.output_points) + + def inverse_transform(self, X: FData, y=None): + """Applies the inverse transformation. + + Applies the opossite shift used in the last call to `transform`. + + Args: + X (FData): Functional dataset to be transformed. + y (ignored): not used, present for API consistency by convention. + + Returns: + FData: Functional data registered. + + Examples: + + Creates a synthetic functional dataset. + + >>> from skfda.preprocessing.registration import ShiftRegistration + >>> from skfda.datasets import make_sinusoidal_process + >>> fd = make_sinusoidal_process(error_std=0, random_state=1) + >>> fd.extrapolation = 'periodic' + + Dataset registration and centering. + + >>> reg = ShiftRegistration() + >>> fd_registered = reg.fit_transform(fd) + >>> fd_centered = fd_registered - fd_registered.mean() + + Reverse the translation applied during the registration. + + >>> reg.inverse_transform(fd_centered) + FDataGrid(...) - # Computes the values with the final shift to construct the FDataBasis - return fd.shift(delta, restrict_domain=restrict_domain, - extrapolation=extrapolation, - eval_points=eval_points, **kwargs) + """ + if not hasattr(self, "deltas_"): + raise AttributeError("Data must be previously transformed to learn" + " the inverse transformation") + elif len(X) != len(self.deltas_): + raise ValueError("Data must contain the same number of samples " + "than the dataset previously transformed") + + return X.shift(-self.deltas_, restrict_domain=self.restrict_domain, + extrapolation=self.extrapolation, + eval_points=self.output_points) diff --git a/skfda/preprocessing/registration/_warping.py b/skfda/preprocessing/registration/_warping.py new file mode 100644 index 000000000..90c5391ca --- /dev/null +++ b/skfda/preprocessing/registration/_warping.py @@ -0,0 +1,136 @@ +"""Registration of functional data module. + +This module contains routines related to the registration procedure. +""" +import collections + +import scipy.integrate +from scipy.interpolate import PchipInterpolator + +import numpy as np + +from ..._utils import check_is_univariate + + +__author__ = "Pablo Marcos Manchón" +__email__ = "pablo.marcosm@estudiante.uam.es" + + +def invert_warping(fdatagrid, *, output_points=None): + r"""Compute the inverse of a diffeomorphism. + + Let :math:`\gamma : [a,b] \rightarrow [a,b]` be a function strictly + increasing, calculates the corresponding inverse + :math:`\gamma^{-1} : [a,b] \rightarrow [a,b]` such that + :math:`\gamma^{-1} \circ \gamma = \gamma \circ \gamma^{-1} = \gamma_{id}`. + + Uses a PCHIP interpolator to compute approximately the inverse. + + Args: + fdatagrid (:class:`FDataGrid`): Functions to be inverted. + eval_points: (array_like, optional): Set of points where the + functions are interpolated to obtain the inverse, by default uses + the sample points of the fdatagrid. + + Returns: + :class:`FDataGrid`: Inverse of the original functions. + + Raises: + ValueError: If the functions are not strictly increasing or are + multidimensional. + + Examples: + + >>> import numpy as np + >>> from skfda import FDataGrid + >>> from skfda.preprocessing.registration import invert_warping + + We will construct the warping :math:`\gamma : [0,1] \rightarrow [0,1]` + wich maps t to t^3. + + >>> t = np.linspace(0, 1) + >>> gamma = FDataGrid(t**3, t) + >>> gamma + FDataGrid(...) + + We will compute the inverse. + + >>> inverse = invert_warping(gamma) + >>> inverse + FDataGrid(...) + + The result of the composition should be approximately the identity + function . + + >>> identity = gamma.compose(inverse) + >>> identity([0, 0.25, 0.5, 0.75, 1]).round(3) + array([[[ 0. ], + [ 0.25], + [ 0.5 ], + [ 0.75], + [ 1. ]]]) + + """ + + check_is_univariate(fdatagrid) + + if output_points is None: + output_points = fdatagrid.sample_points[0] + + y = fdatagrid(output_points)[..., 0] + + data_matrix = np.empty((fdatagrid.n_samples, len(output_points))) + + for i in range(fdatagrid.n_samples): + data_matrix[i] = PchipInterpolator(y[i], output_points)(output_points) + + return fdatagrid.copy(data_matrix=data_matrix, sample_points=output_points) + + +def _normalize_scale(t, a=0, b=1): + """Perfoms an afine translation to normalize an interval. + + Args: + t (numpy.ndarray): Array of dim 1 or 2 with at least 2 values. + a (float): Starting point of the new interval. Defaults 0. + b (float): Stopping point of the new interval. Defaults 1. + + Returns: + (numpy.ndarray): Array with the transformed interval. + """ + + t = t.T # Broadcast to normalize multiple arrays + t1 = (t - t[0]).astype(float) # Translation to [0, t[-1] - t[0]] + t1 *= (b - a) / (t[-1] - t[0]) # Scale to [0, b-a] + t1 += a # Translation to [a, b] + t1[0] = a # Fix possible round errors + t1[-1] = b + + return t1.T + + +def normalize_warping(warping, domain_range=None): + r"""Rescale a warping to normalize their domain. + + Given a set of warpings :math:`\gamma_i:[a,b]\rightarrow [a,b]` it is + used an affine traslation to change the domain of the transformation to + other domain, :math:`\tilde \gamma_i:[\tilde a,\tilde b] \rightarrow + [\tilde a, \tilde b]`. + + Args: + warping (:class:`FDatagrid`): Set of warpings to rescale. + domain_range (tuple, optional): New domain range of the warping. By + default it is used the same domain range. + Return: + (:class:`FDataGrid`): FDataGrid with the warpings normalized. + + """ + + if domain_range is None: + domain_range = warping.domain_range[0] + + data_matrix = _normalize_scale(warping.data_matrix[..., 0], *domain_range) + sample_points = _normalize_scale(warping.sample_points[0], *domain_range) + + return warping.copy(data_matrix=data_matrix, sample_points=sample_points, + domain_range=domain_range) diff --git a/skfda/preprocessing/registration/base.py b/skfda/preprocessing/registration/base.py new file mode 100644 index 000000000..a705c52a0 --- /dev/null +++ b/skfda/preprocessing/registration/base.py @@ -0,0 +1,44 @@ +# -*- coding: utf-8 -*- +"""Registration method. +This module contains the abstract base class for all registration methods. +""" + +from abc import ABC +from sklearn.base import BaseEstimator, TransformerMixin +from ... import FData + +class RegistrationTransformer(ABC, BaseEstimator, TransformerMixin): + """Base class for the registration methods.""" + + def score(self, X: FData, y=None): + r"""Returns the percentage of total variation removed. + + Computes the squared multiple correlation index of the proportion of + the total variation due to phase, defined as: + + .. math:: + R^2 = \frac{\text{MSE}_{phase}}{\text{MSE}_{total}}, + + where :math:`\text{MSE}_{total}` is the mean squared error and + :math:`\text{MSE}_{phase}` is the mean squared error due to the phase + explained by the registration procedure. See + :class:`~.validation.AmplitudePhaseDecomposition` for a detailed + explanation. + + Args: + X (FData): Functional data to be registered + y (Ignored): Ignored, only for API conventions. + + Returns: + float. + + See also: + :class:`~.validation.AmplitudePhaseDecomposition` + :class:`~.validation.LeastSquares` + :class:`~.validation.SobolevLeastSquares` + :class:`~.validation.PairwiseCorrelation` + + """ + from .validation import AmplitudePhaseDecomposition + + return AmplitudePhaseDecomposition()(self, X, y) diff --git a/skfda/preprocessing/registration/elastic.py b/skfda/preprocessing/registration/elastic.py new file mode 100644 index 000000000..a073c2438 --- /dev/null +++ b/skfda/preprocessing/registration/elastic.py @@ -0,0 +1,774 @@ + +import optimum_reparam + +import scipy.integrate +from sklearn.base import BaseEstimator, TransformerMixin +from sklearn.utils.validation import check_is_fitted + +import numpy as np + +from . import invert_warping +from ... import FDataGrid +from ..._utils import check_is_univariate +from ...representation.interpolation import SplineInterpolation +from ._warping import _normalize_scale +from .base import RegistrationTransformer + + +__author__ = "Pablo Marcos Manchón" +__email__ = "pablo.marcosm@estudiante.uam.es" + +############################################################################### +# Based on the original implementation of J. Derek Tucker in # +# *fdasrsf_python* (https://github.com/jdtuck/fdasrsf_python) # +# and *ElasticFDA.jl* (https://github.com/jdtuck/ElasticFDA.jl). # +############################################################################### + + +class SRSF(BaseEstimator, TransformerMixin): + r"""Square-Root Slope Function (SRSF) transform. + + Let :math:`f : [a,b] \rightarrow \mathbb{R}` be an absolutely continuous + function, the SRSF transform is defined as + + .. math:: + SRSF(f(t)) = sgn(f(t)) \sqrt{|\dot f(t)|} = q(t) + + This representation it is used to compute the extended non-parametric + Fisher-Rao distance between functions, wich under the SRSF representation + becomes the usual :math:`\mathbb{L}^2` distance between functions. + See [SK16-4-6]_ . + + The inverse SRSF transform is defined as + + .. math:: + f(t) = f(a) + \int_{a}^t q(t)|q(t)|dt . + + This transformation is a mapping up to constant. Given the SRSF and the + initial value :math:`f(a)` the original function can be obtained, for this + reason it is necessary to store the value :math:`f(a)` during the fit, + which is dropped due to derivation. If it is applied the inverse + transformation without fit the estimator it is assumed that :math:`f(a)=0`. + + Attributes: + eval_points (array_like, optional): Set of points where the + functions are evaluated, by default uses the sample points of the + fdatagrid. + initial_value (float, optional): Initial value to apply in the + inverse transformation. If `None` there are stored the initial + values of the functions during the transformation to apply + during the inverse transformation. Defaults None. + + Note: + Due to the use of derivatives it is recommended that the samples are + sufficiently smooth, or have passed a smoothing preprocessing before, + in order to achieve good results. + + References: + .. [SK16-4-6] Srivastava, Anuj & Klassen, Eric P. (2016). Functional + and shape data analysis. In *Square-Root Slope Function + Representation* (pp. 91-93). Springer. + + Examples: + + Create a toy dataset and apply the transformation and its inverse. + + >>> from skfda.datasets import make_sinusoidal_process + >>> from skfda.preprocessing.registration.elastic import SRSF + >>> fd = make_sinusoidal_process(error_std=0, random_state=0) + >>> srsf = SRSF() + >>> srsf + SRSF(...) + + Fits the estimator (to apply the inverse transform) and apply the SRSF + + >>> q = srsf.fit_transform(fd) + + Apply the inverse transform. + + >>> fd_pull_back = srsf.inverse_transform(q) + + The original and the pull back `fd` are almost equal + + >>> zero = fd - fd_pull_back + >>> zero.data_matrix.flatten().round(3) + array([ 0., 0., 0., ..., -0., -0., -0.]) + + """ + + def __init__(self, output_points=None, initial_value=None): + """Initializes the transformer. + + Args: + eval_points: (array_like, optional): Set of points where the + functions are evaluated, by default uses the sample points of + the :class:`FDataGrid ` transformed. + initial_value (float, optional): Initial value to apply in the + inverse transformation. If `None` there are stored the initial + values of the functions during the transformation to apply + during the inverse transformation. Defaults None. + + """ + self.output_points = output_points + self.initial_value = initial_value + + def fit(self, X=None, y=None): + """This transformer do not need to be fitted. + + Args: + X (Ignored): Present for API conventions. + y (Ignored): Present for API conventions. + + Returns: + (Estimator): self + + """ + return self + + def transform(self, X: FDataGrid, y=None): + r"""Computes the square-root slope function (SRSF) transform. + + Let :math:`f : [a,b] \rightarrow \mathbb{R}` be an absolutely continuous + function, the SRSF transform is defined as [SK16-4-6-1]_: + + .. math:: + + SRSF(f(t)) = sgn(f(t)) \sqrt{\dot f(t)|} = q(t) + + Args: + X (:class:`FDataGrid`): Functions to be transformed. + y (Ignored): Present for API conventions. + + Returns: + :class:`FDataGrid`: SRSF functions. + + Raises: + ValueError: If functions are not univariate. + + References: + .. [SK16-4-6-1] Srivastava, Anuj & Klassen, Eric P. (2016). + Functional and shape data analysis. In *Square-Root Slope + Function Representation* (pp. 91-93). Springer. + + """ + check_is_univariate(X) + + if self.output_points is None: + output_points = X.sample_points[0] + else: + output_points = self.output_points + + g = X.derivative() + + # Evaluation with the corresponding interpolation + data_matrix = g(output_points)[..., 0] + + # SRSF(f) = sign(f) * sqrt|Df| (avoiding multiple allocation) + sign_g = np.sign(data_matrix) + data_matrix = np.abs(data_matrix, out=data_matrix) + data_matrix = np.sqrt(data_matrix, out=data_matrix) + data_matrix *= sign_g + + # Store the values of the transformation + if self.initial_value is None: + a = X.domain_range[0][0] + self.initial_value_ = X(a).reshape(X.n_samples, 1, X.dim_codomain) + + return X.copy(data_matrix=data_matrix, sample_points=output_points) + + def inverse_transform(self, X: FDataGrid, y=None): + r"""Computes the inverse SRSF transform. + + Given the srsf and the initial value the original function can be + obtained as [SK16-4-6-2]_ : + + .. math:: + f(t) = f(a) + \int_{a}^t q(t)|q(t)|dt + + where :math:`q(t)=SRSF(f(t))`. + + If it is applied this inverse transformation without fitting the + estimator it is assumed that :math:`f(a)=0`. + + Args: + X (:class:`FDataGrid`): SRSF to be transformed. + y (Ignored): Present for API conventions. + + Returns: + :class:`FDataGrid`: Functions in the original space. + + Raises: + ValueError: If functions are multidimensional. + + References: + .. [SK16-4-6-2] Srivastava, Anuj & Klassen, Eric P. (2016). + Functional and shape data analysis. In *Square-Root Slope + Function Representation* (pp. 91-93). Springer. + + """ + check_is_univariate(X) + + if self.initial_value is None and not hasattr(self, 'initial_value_'): + raise AttributeError("When initial_value=None is expected a " + "previous transformation of the data to " + "store the initial values to apply in the " + "inverse transformation. Also it is possible " + "to fix these values setting the attribute" + "initial value without a previous " + "transformation.") + + if self.output_points is None: + output_points = X.sample_points[0] + else: + output_points = self.output_points + + data_matrix = X(output_points) + + data_matrix *= np.abs(data_matrix) + + f_data_matrix = scipy.integrate.cumtrapz(data_matrix, x=output_points, + axis=1, initial=0) + + # If the transformer was fitted, sum the initial value + if self.initial_value is None: + f_data_matrix += self.initial_value_ + else: + f_data_matrix += self.initial_value + + return X.copy(data_matrix=f_data_matrix, sample_points=output_points) + + +def _elastic_alignment_array(template_data, q_data, + eval_points, penalty, grid_dim): + r"""Wrapper between the cython interface and python. + + Selects the corresponding routine depending on the dimensions of the + arrays. + + Args: + template_data (numpy.ndarray): Array with the srsf of the template. + q_data (numpy.ndarray): Array with the srsf of the curves + to be aligned. + eval_points (numpy.ndarray): Discretisation points of the functions. + penalty (float): Penalisation term. + grid_dim (int): Dimension of the grid used in the alignment algorithm. + + Return: + (numpy.ndarray): Array with the same shape than q_data with the srsf of + the functions aligned to the template(s). + """ + + # Select cython function + if template_data.ndim == 1 and q_data.ndim == 1: + reparam = optimum_reparam.coptimum_reparam + + elif template_data.ndim == 1: + reparam = optimum_reparam.coptimum_reparam_n + + else: + reparam = optimum_reparam.coptimum_reparam_n2 + + return reparam(np.ascontiguousarray(template_data.T), + np.ascontiguousarray(eval_points), + np.ascontiguousarray(q_data.T), + penalty, grid_dim).T + + +class ElasticRegistration(RegistrationTransformer): + r"""Align a FDatagrid using the SRSF framework. + + Let :math:`f` be a function of the functional data object wich will be + aligned to the template :math:`g`. Calculates the warping wich minimises + the Fisher-Rao distance between :math:`g` and the registered function + :math:`f^*(t)=f(\gamma^*(t))=f \circ \gamma^*`. + + .. math:: + \gamma^* = argmin_{\gamma \in \Gamma} d_{\lambda}(f \circ + \gamma, g) + + Where :math:`d_{\lambda}` denotes the extended Fisher-Rao distance with a + penalty term, used to control the amount of warping. + + .. math:: + d_{\lambda}^2(f \circ \gamma, g) = \| SRSF(f \circ \gamma) + \sqrt{\dot{\gamma}} - SRSF(g)\|_{\mathbb{L}^2}^2 + \lambda + \mathcal{R}(\gamma) + + In the implementation it is used as penalty term + + .. math:: + \mathcal{R}(\gamma) = \|\sqrt{\dot{\gamma}}- 1 \|_{\mathbb{L}^2}^2 + + Wich restrict the amount of elasticity employed in the alignment. + + The registered function :math:`f^*(t)` can be calculated using the + composition :math:`f^*(t)=f(\gamma^*(t))`. + + If the template is not specified it is used the Karcher mean of the set of + functions under the elastic metric to perform the alignment, also known as + `elastic mean`, wich is the local minimum of the sum of squares of elastic + distances. See :func:`~elastic_mean`. + + In [SK16-4-2]_ are described extensively the algorithms employed and + the SRSF framework. + + Args: + template (str, :class:`FDataGrid` or callable, optional): Template to + align the curves. Can contain 1 sample to align all the curves to + it or the same number of samples than the fdatagrid. By default + `elastic mean`, in which case :func:`elastic_mean` is called. + penalty_term (float, optional): Controls the amount of elasticity. + Defaults to 0. + output_points (array_like, optional): Set of points where the + functions are evaluated, by default uses the sample points of the + fdatagrid which will be transformed. + grid_dim (int, optional): Dimension of the grid used in the DP + alignment algorithm. Defaults 7. + + Attributes: + template_ (:class:`FDataGrid`): Template learned during fitting, + used for alignment in :meth:`transform`. + warping_ (:class:`FDataGrid`): Warping applied during the last + transformation. + + References: + .. [SK16-4-2] Srivastava, Anuj & Klassen, Eric P. (2016). Functional + and shape data analysis. In *Functional Data and Elastic + Registration* (pp. 73-122). Springer. + + Examples: + + Elastic registration of with train/test sets. + + >>> from skfda.preprocessing.registration import \ + ... ElasticRegistration + >>> from skfda.datasets import make_multimodal_samples + >>> X_train = make_multimodal_samples(n_samples=15, random_state=0) + >>> X_test = make_multimodal_samples(n_samples=3, random_state=1) + + Fit the transformer, which learns the elastic mean of the train + set as template. + + >>> elastic_registration = ElasticRegistration() + >>> elastic_registration.fit(X_train) + ElasticRegistration(...) + + Registration of the test set. + + >>> elastic_registration.transform(X_test) + FDataGrid(...) + + """ + + def __init__(self, template="elastic mean", penalty=0., output_points=None, + grid_dim=7): + """Initializes the registration transformer""" + + self.template = template + self.penalty = penalty + self.output_points = output_points + self.grid_dim = grid_dim + + def fit(self, X: FDataGrid=None, y=None): + """Fit the transformer. + + Learns the template used during the transformation. + + Args: + X (FDataGrid, optionl): Functional samples used as training + samples. If the template provided it is an FDataGrid this + samples are it is not need to construct the template from the + samples and this argument is ignored. + y (Ignored): Present for API conventions. + + Returns: + RegistrationTransformer: self. + + """ + if isinstance(self.template, FDataGrid): + self.template_ = self.template # Template already constructed + elif X is None: + raise ValueError("Must be provided a dataset X to construct the " + "template.") + elif self.template == "elastic mean": + self.template_ = elastic_mean(X) + else: + self.template_ = self.template(X) + + # Constructs the SRSF of the template + srsf = SRSF(output_points=self.output_points, initial_value=0) + self._template_srsf = srsf.fit_transform(self.template_) + + return self + + def transform(self, X: FDataGrid, y=None): + """Apply elastic registration to the data. + + Args: + X (:class:`FDataGrid`): Functional data to be registered. + y (ignored): Present for API conventions. + + Returns: + :class:`FDataGrid`: Registered samples. + + """ + check_is_fitted(self, '_template_srsf') + check_is_univariate(X) + + if (len(self._template_srsf) != 1 and + len(X) != len(self._template_srsf)): + + raise ValueError("The template should contain one sample to align " + "all the curves to the same function or the " + "same number of samples than X.") + + srsf = SRSF(output_points=self.output_points, initial_value=0) + fdatagrid_srsf = srsf.fit_transform(X) + + # Points of discretization + if self.output_points is None: + output_points = fdatagrid_srsf.sample_points[0] + else: + output_points = self.output_points + + # Discretizacion in evaluation points + q_data = fdatagrid_srsf(output_points)[..., 0] + template_data = self._template_srsf(output_points)[..., 0] + + if q_data.shape[0] == 1: + q_data = q_data[0] + + if template_data.shape[0] == 1: + template_data = template_data[0] + + # Values of the warping + gamma = _elastic_alignment_array(template_data, q_data, + _normalize_scale(output_points), + self.penalty, self.grid_dim) + + # Normalize warping to original interval + gamma = _normalize_scale( + gamma, a=output_points[0], b=output_points[-1]) + + # Interpolation + interpolation = SplineInterpolation( + interpolation_order=3, monotone=True) + + self.warping_ = FDataGrid(gamma, output_points, + interpolation=interpolation) + + return X.compose(self.warping_, eval_points=output_points) + + def inverse_transform(self, X: FDataGrid, y=None): + r"""Reverse the registration procedure previosly applied. + + Let :math:`gamma(t)` the warping applied to construct a registered + functional datum :math:`f^*(t)=f(\gamma(t))`. + + Given a functional datum :math:`f^*(t) it is computed + :math:`\gamma^{-1}(t)` to reverse the registration procedure + :math:`f(t)=f^*(\gamma^{-1}(t))`. + + Args: + X (:class:`FDataGrid`): Functional data to apply the reverse + transform. + y (Ignored): Present for API conventions. + + Returns: + :class:`FDataGrid`: Functional data compose by the inverse warping. + + Raises: + ValueError: If the warpings :math:`\gamma` were not build via + :meth:`transform` or if the number of samples of `X` is different + than the number of samples of the dataset previosly transformed. + + Examples: + + Center the datasets taking into account the misalignment. + + >>> from skfda.preprocessing.registration import \ + ... ElasticRegistration + >>> from skfda.datasets import make_multimodal_samples + >>> X = make_multimodal_samples(random_state=0) + + Registration of the dataset. + + >>> elastic_registration = ElasticRegistration() + >>> X = elastic_registration.fit_transform(X) + + Substract the elastic mean build as template during the + registration and reverse the transformation. + + >>> X = X - elastic_registration.template_ + >>> X_center = elastic_registration.inverse_transform(X) + >>> X_center + FDataGrid(...) + + + See also: + :func:`invert_warping` + + """ + if not hasattr(self, 'warping_'): + raise ValueError("Data must be previosly transformed to apply the " + "inverse transform") + elif len(X) != len(self.warping_): + raise ValueError("Data must contain the same number of samples " + "than the dataset previously transformed") + + inverse_warping = invert_warping(self.warping_) + + return X.compose(inverse_warping, eval_points=self.output_points) + + +def warping_mean(warping, *, max_iter=100, tol=1e-6, step_size=.3): + r"""Compute the karcher mean of a set of warpings. + + Let :math:`\gamma_i i=1...n` be a set of warping functions + :math:`\gamma_i:[a,b] \rightarrow [a,b]` in :math:`\Gamma`, i.e., + monotone increasing and with the restriction :math:`\gamma_i(a)=a \, + \gamma_i(b)=b`. + + The karcher mean :math:`\bar \gamma` is defined as the warping that + minimises locally the sum of Fisher-Rao squared distances. + [SK16-8-3-2]_. + + .. math:: + \bar \gamma = argmin_{\gamma \in \Gamma} \sum_{i=1}^{n} + d_{FR}^2(\gamma, \gamma_i) + + The computation is performed using the structure of Hilbert Sphere obtained + after a transformation of the warpings, see [S11-3-3]_. + + Args: + warping (:class:`~skfda.FDataGrid`): Set of warpings. + max_iter (int): Maximum number of interations. Defaults to 100. + tol (float): Convergence criterion, if the norm of the mean of the + shooting vectors, :math:`| \bar v | 1e-10: + vmean += theta / np.sin(theta) * (psi_i - np.cos(theta) * mu) + + # Mean of shooting vectors + vmean /= warping.n_samples + v_norm = np.sqrt(scipy.integrate.simps(np.square(vmean))) + + # Convergence criterion + if v_norm < tol: + break + + # Calculate exponential map of mu + a = np.cos(step_size * v_norm) + b = np.sin(step_size * v_norm) / v_norm + mu = a * mu + b * vmean + + # Recover mean in original gamma space + warping_mean = scipy.integrate.cumtrapz(np.square(mu, out=mu)[0], + x=eval_points, initial=0) + + # Affine traslation to original scale + warping_mean = _normalize_scale(warping_mean, + a=original_eval_points[0], + b=original_eval_points[-1]) + + monotone_interpolation = SplineInterpolation(interpolation_order=3, + monotone=True) + + mean = FDataGrid([warping_mean], sample_points=original_eval_points, + interpolation=monotone_interpolation) + + return mean + + +def elastic_mean(fdatagrid, *, penalty=0., center=True, max_iter=20, tol=1e-3, + initial=None, grid_dim=7, **kwargs): + r"""Compute the karcher mean under the elastic metric. + + Calculates the karcher mean of a set of functional samples in the amplitude + space :math:`\mathcal{A}=\mathcal{F}/\Gamma`. + + Let :math:`q_i` the corresponding SRSF of the observation :math:`f_i`. + The space :math:`\mathcal{A}` is defined using the equivalence classes + :math:`[q_i]=\{ q_i \circ \gamma \| \gamma \in \Gamma \}`, where + :math:`\Gamma` denotes the space of warping functions. The karcher mean + in this space is defined as + + .. math:: + [\mu_q] = argmin_{[q] \in \mathcal{A}} \sum_{i=1}^n + d_{\lambda}^2([q],[q_i]) + + Once :math:`[\mu_q]` is obtained it is selected the element of the + equivalence class which makes the mean of the warpings employed be the + identity. + + See [SK16-8-3-1]_ and [S11-3]_. + + Args: + fdatagrid (:class:`~skfda.FDataGrid`): Set of functions to compute the + mean. + penalty (float): Penalisation term. Defaults to 0. + center (boolean): If true it is computed the mean of the warpings and + used to select a central mean. Defaults True. + max_iter (int): Maximum number of iterations. Defaults to 20. + tol (float): Convergence criterion, the algorithm will stop if + :math:`|mu_{(\nu)} - mu_{(\nu - 1)}|_2 / | mu_{(\nu-1)} |_2 < tol`. + initial (float): Value of the mean at the starting point. By default + takes the average of the initial points of the samples. + grid_dim (int, optional): Dimension of the grid used in the alignment + algorithm. Defaults 7. + ** kwargs : Named options to be pased to :func:`warping_mean`. + + Return: + :class:`~skfda.FDataGrid`: FDatagrid with the mean of the functions. + + Raises: + ValueError: If the object is multidimensional or the shape of the srsf + do not match with the fdatagrid. + + References: + .. [SK16-8-3-1] Srivastava, Anuj & Klassen, Eric P. (2016). Functional + and shape data analysis. In *Karcher Mean of Amplitudes* + (pp. 273-274). Springer. + + .. [S11-3] Srivastava, Anuj et. al. Registration of Functional Data + Using Fisher-Rao Metric (2011). In *Karcher Mean and Function + Alignment* (pp. 7-10). arXiv:1103.3817v2. + + """ + check_is_univariate(fdatagrid) + + srsf_transformer = SRSF(initial_value=0) + fdatagrid_srsf = srsf_transformer.fit_transform(fdatagrid) + eval_points = fdatagrid.sample_points[0] + + eval_points_normalized = _normalize_scale(eval_points) + y_scale = eval_points[-1] - eval_points[0] + + interpolation = SplineInterpolation(interpolation_order=3, monotone=True) + + # Discretisation points + fdatagrid_normalized = FDataGrid(fdatagrid(eval_points) / y_scale, + sample_points=eval_points_normalized) + + srsf = fdatagrid_srsf(eval_points)[..., 0] + + # Initialize with function closest to the L2 mean with the L2 distance + centered = (srsf.T - srsf.mean(axis=0, keepdims=True).T).T + + distances = scipy.integrate.simps(np.square(centered, out=centered), + eval_points_normalized, axis=1) + + # Initialization of iteration + mu = srsf[np.argmin(distances)] + mu_aux = np.empty(mu.shape) + mu_1 = np.empty(mu.shape) + + # Main iteration + for _ in range(max_iter): + + gammas = _elastic_alignment_array( + mu, srsf, eval_points_normalized, penalty, grid_dim) + gammas = FDataGrid(gammas, sample_points=eval_points_normalized, + interpolation=interpolation) + + fdatagrid_normalized = fdatagrid_normalized.compose(gammas) + srsf = srsf_transformer.transform( + fdatagrid_normalized).data_matrix[..., 0] + + # Next iteration + mu_1 = srsf.mean(axis=0, out=mu_1) + + # Convergence criterion + mu_norm = np.sqrt(scipy.integrate.simps(np.square(mu, out=mu_aux), + eval_points_normalized)) + + mu_diff = np.sqrt(scipy.integrate.simps(np.square(mu - mu_1, + out=mu_aux), + eval_points_normalized)) + + if mu_diff / mu_norm < tol: + break + + mu = mu_1 + + if initial is None: + initial = fdatagrid.data_matrix[:, 0].mean() + + srsf_transformer.set_params(initial_value=initial) + + # Karcher mean orbit in space L2/Gamma + karcher_mean = srsf_transformer.inverse_transform( + fdatagrid.copy(data_matrix=[mu], sample_points=eval_points)) + + if center: + # Gamma mean in Hilbert Sphere + mean_normalized = warping_mean(gammas, **kwargs) + + gamma_mean = FDataGrid(_normalize_scale( + mean_normalized.data_matrix[..., 0], + a=eval_points[0], + b=eval_points[-1]), + sample_points=eval_points) + + gamma_inverse = invert_warping(gamma_mean) + + karcher_mean = karcher_mean.compose(gamma_inverse) + + # Return center of the orbit + return karcher_mean diff --git a/skfda/preprocessing/registration/validation.py b/skfda/preprocessing/registration/validation.py new file mode 100644 index 000000000..38870cdaa --- /dev/null +++ b/skfda/preprocessing/registration/validation.py @@ -0,0 +1,667 @@ +"""Methods and classes for validation of the registration procedures""" + +from typing import NamedTuple + +import numpy as np + +from ..._utils import check_is_univariate, _to_grid + + +class RegistrationScorer(): + r"""Cross validation scoring for registration procedures. + + It calculates the score of a registration procedure, used to perform + model validation or parameter selection. + + Attributes: + eval_points (array_like, optional): Set of points where the + functions are evaluated to obtain a discrete representation and + perform the calculation. + + Args: + estimator (Estimator): Registration method estimator. The estimator + should be fitted. + X (:class:`FData `): Functional data to be registered. + y (:class:`FData `, optional): Functional data target. + If provided should be the same as `X` in general. + + Returns: + float: Cross validation score. + + Note: + The scorer passes the warpings generated in the registration procedure + to the `score_function` when necessary. + + See also: + :class:`~AmplitudePhaseDecomposition` + :class:`~LeastSquares` + :class:`~SobolevLeastSquares` + :class:`~PairwiseCorrelation` + + """ + + def __init__(self, eval_points=None): + """Initialize the transformer""" + self.eval_points = eval_points + + def __call__(self, estimator, X, y=None): + """Compute the score of the transformation. + + Args: + estimator (Estimator): Registration method estimator. The estimator + should be fitted. + X (:class:`FData `): Functional data to be registered. + y (:class:`FData `, optional): Functional data target. + If provided should be the same as `X` in general. + + Returns: + float: Cross validation score. + """ + if y is None: + y = X + + # Register the data + X_reg = estimator.transform(X) + + return self.score_function(y, X_reg) + + +class AmplitudePhaseDecompositionStats(NamedTuple): + r"""Named tuple to store the values of the amplitude-phase decomposition. + + Values of the amplitude phase decomposition computed in + :func:`mse_r_squared`, returned when `return_stats` is `True`. + + Args: + r_square (float): Squared correlation index :math:`R^2`. + mse_amp (float): Mean square error of amplitude + :math:`\text{MSE}_{amp}`. + mse_pha (float): Mean square error of phase :math:`\text{MSE}_{pha}`. + c_r (float): Constant :math:`C_R`. + + """ + r_squared: float + mse_amp: float + mse_pha: float + c_r: float + + +class AmplitudePhaseDecomposition(RegistrationScorer): + r"""Compute mean square error measures for amplitude and phase variation. + + Once the registration has taken place, this function computes two mean + squared error measures, one for amplitude variation, and the other for + phase variation and returns a squared multiple correlation index + of the amount of variation in the unregistered functions is due to phase. + + Let :math:`x_i(t),y_i(t)` be the unregistered and registered functions + respectively. The total mean square error measure (see [RGS09-8-5]_) is + defined as + + + .. math:: + \text{MSE}_{total}= + \frac{1}{N}\sum_{i=1}^{N}\int[x_i(t)-\overline x(t)]^2dt + + The measures of amplitude and phase mean square error are + + .. math:: + \text{MSE}_{amp} = C_R \frac{1}{N} + \sum_{i=1}^{N} \int \left [ y_i(t) - \overline{y}(t) \right ]^2 dt + + .. math:: + \text{MSE}_{phase}= + \int \left [C_R \overline{y}^2(t) - \overline{x}^2(t) \right]dt + + where the constant :math:`C_R` is defined as + + .. math:: + + C_R = 1 + \frac{\frac{1}{N}\sum_{i}^{N}\int [Dh_i(t)-\overline{Dh}(t)] + [ y_i^2(t)- \overline{y^2}(t) ]dt} + {\frac{1}{N} \sum_{i}^{N} \int y_i^2(t)dt} + + whose structure is related to the covariation between the deformation + functions :math:`Dh_i(t)` and the squared registered functions + :math:`y_i^2(t)`. When these two sets of functions are independents + :math:`C_R=1`, as in the case of shift registration. + + The total mean square error is decomposed in the two sources of + variability. + + .. math:: + \text{MSE}_{total} = \text{MSE}_{amp} + \text{MSE}_{phase} + + The squared multiple correlation index of the proportion of the total + variation due to phase is defined as: + + .. math:: + R^2 = \frac{\text{MSE}_{phase}}{\text{MSE}_{total}} + + See [KR08-3]_ for a detailed explanation. + + Attributes: + return_stats (boolean, optional): If `true` returns a named tuple + with four values: :math:`R^2`, :math:`MSE_{amp}`, :math:`MSE_{pha}` + and :math:`C_R`. Otherwise the squared correlation index + :math:`R^2` is returned. Default `False`. + + eval_points (array_like, optional): Set of points where the + functions are evaluated to obtain a discrete representation and + perform the calculation. + + + Args: + estimator (RegistrationTransformer): Registration transformer. + X (:class:`FData`): Unregistered functions. + y (:class:`FData`, optional): Target data, generally the same as X. By + default 'None', which uses `X` as target. + + + Returns: + (float or :class:`NamedTuple `): squared correlation + index :math:`R^2` if `return_stats` is `False`. Otherwise a named + tuple containing: + + * `r_squared`: Squared correlation index :math:`R^2`. + * `mse_amp`: Mean square error of amplitude + :math:`\text{MSE}_{amp}`. + * `mse_pha`: Mean square error of phase :math:`\text{MSE}_{pha}`. + * `c_r`: Constant :math:`C_R`. + + + Raises: + ValueError: If the functional data is not univariate. + + References: + .. [KR08-3] Kneip, Alois & Ramsay, James. (2008). Quantifying + amplitude and phase variation. In *Combining Registration and + Fitting for Functional Models* (pp. 14-15). Journal of the American + Statistical Association. + .. [RGS09-8-5] Ramsay J.O., Giles Hooker & Spencer Graves (2009). In + *Functional Data Analysis with R and Matlab* (pp. 125-126). + Springer. + + Examples: + + Calculate the score of the shift registration of a sinusoidal process + synthetically generated. + + >>> from skfda.preprocessing.registration.validation import \ + ... AmplitudePhaseDecomposition + >>> from skfda.preprocessing.registration import ShiftRegistration + >>> from skfda.datasets import make_sinusoidal_process + >>> X = make_sinusoidal_process(error_std=0, random_state=0) + + Fit the registration procedure. + + >>> shift_registration = ShiftRegistration() + >>> shift_registration.fit(X) + ShiftRegistration(...) + + Compute the :math:`R^2` correlation index + + >>> scorer = AmplitudePhaseDecomposition() + >>> score = scorer(shift_registration, X) + >>> round(score, 3) + 0.972 + + Also it is possible to get all the values of the decomposition. + + >>> scorer = AmplitudePhaseDecomposition(return_stats=True) + >>> stats = scorer(shift_registration, X) + >>> round(stats.r_squared, 3) + 0.972 + >>> round(stats.mse_amp, 3) + 0.007 + >>> round(stats.mse_pha, 3) + 0.227 + >>> round(stats.c_r, 3) + 1.0 + + + See also: + :class:`~LeastSquares` + :class:`~SobolevLeastSquares` + :class:`~PairwiseCorrelation` + + """ + + def __init__(self, return_stats=False, eval_points=None): + """Initialize the transformer""" + super().__init__(eval_points) + self.return_stats = return_stats + + def __call__(self, estimator, X, y=None): + """Compute the score of the transformation. + + Args: + estimator (Estimator): Registration method estimator. The estimator + should be fitted. + X (:class:`FData `): Functional data to be registered. + y (:class:`FData `, optional): Functional data target. + If provided should be the same as `X` in general. + + Returns: + float: Cross validation score. + """ + if y is None: + y = X + + # Register the data + X_reg = estimator.transform(X) + + # Pass the warpings if are generated in the transformer + if hasattr(estimator, 'warping_'): + return self.score_function(y, X_reg, warping=estimator.warping_) + else: + return self.score_function(y, X_reg) + + def score_function(self, X, y, *, warping=None): + """Compute the score of the transformation performed. + + Args: + X (FData): Original functional data. + y (FData): Functional data registered. + + Returns: + float: Score of the transformation. + + """ + from scipy.integrate import simps + + check_is_univariate(X) + check_is_univariate(y) + + if len(y) != len(X): + raise ValueError(f"the registered and unregistered curves must have " + f"the same number of samples ({len(y)})!=({len(X)})") + + if warping is not None and len(warping) != len(X): + raise ValueError(f"The registered curves and the warping functions " + f"must have the same number of samples " + f"({len(X)})!=({len(warping)})") + + # Creates the mesh to discretize the functions + if self.eval_points is None: + try: + eval_points = y.sample_points[0] + + except AttributeError: + nfine = max(y.basis.n_basis * 10 + 1, 201) + eval_points = np.linspace(*y.domain_range[0], nfine) + else: + eval_points = np.asarray(self.eval_points) + + x_fine = X.evaluate(eval_points)[..., 0] + y_fine = y.evaluate(eval_points)[..., 0] + mu_fine = x_fine.mean(axis=0) # Mean unregistered function + eta_fine = y_fine.mean(axis=0) # Mean registered function + mu_fine_sq = np.square(mu_fine) + eta_fine_sq = np.square(eta_fine) + + # Total mean square error of the original funtions + # mse_total = scipy.integrate.simps( + # np.mean(np.square(x_fine - mu_fine), axis=0), + # eval_points) + + cr = 1. # Constant related to the covariation between the deformation + # functions and y^2 + + # If the warping functions are not provided, are suppose independent + if warping is not None: + # Derivates warping functions + warping_deriv = warping.derivative() + dh_fine = warping_deriv(eval_points)[..., 0] + dh_fine_mean = dh_fine.mean(axis=0) + dh_fine_center = dh_fine - dh_fine_mean + + y_fine_sq = np.square(y_fine) # y^2 + y_fine_sq_center = np.subtract(y_fine_sq, eta_fine_sq) # y^2-E[y2] + + covariate = np.inner(dh_fine_center.T, y_fine_sq_center.T) + covariate = covariate.mean(axis=0) + cr += np.divide(simps(covariate, eval_points), + simps(eta_fine_sq, eval_points)) + + # mse due to phase variation + mse_pha = simps(cr * eta_fine_sq - mu_fine_sq, eval_points) + + # mse due to amplitude variation + # mse_amp = mse_total - mse_pha + y_fine_center = np.subtract(y_fine, eta_fine) + y_fine_center_sq = np.square(y_fine_center, out=y_fine_center) + y_fine_center_sq_mean = y_fine_center_sq.mean(axis=0) + + mse_amp = simps(y_fine_center_sq_mean, eval_points) + + # Total mean square error of the original funtions + mse_total = mse_pha + mse_amp + + # squared correlation measure of proportion of phase variation + rsq = mse_pha / (mse_total) + + if self.return_stats is True: + stats = AmplitudePhaseDecompositionStats(rsq, mse_amp, mse_pha, cr) + return stats + + return rsq + + +class LeastSquares(AmplitudePhaseDecomposition): + r"""Cross-validated measure of the registration procedure. + + Computes a cross-validated measure of the level of synchronization + [James07]_: + + .. math:: + ls=1 - \frac{1}{N} \sum_{i=1}^{N} \frac{\int\left(\tilde{f}_{i}(t)- + \frac{1}{N-1} \sum_{j \neq i} \tilde{f}_{j}(t)\right)^{2} dt}{\int + \left(f_{i}(t)-\frac{1}{N-1} \sum_{j \neq i} f_{j}(t)\right)^{2} dt} + + where :math:`f_i` and :math:`\tilde f_i` are the original and the + registered data respectively. + + The :math:`ls` measures the total cross-sectional variance of the aligned + functions, relative to the original value. + A value of :math:`1` would indicate an identical shape for all registered + curves, while zero corresponds to no improvement in the synchronization. It + can be negative because the model can be arbitrarily worse. + + Attributes: + eval_points (array_like, optional): Set of points where the + functions are evaluated to obtain a discrete representation and + perform the calculation. + + Args: + estimator (RegistrationTransformer): Registration transformer. + X (:class:`FData `): Original functional data. + y (:class:`FData `): Registered functional data. + + + Note: + The original least square measure used in [S11-5-2-1]_ is defined as + :math:`1 - ls`, but has been modified according to the scikit-learn + scorers, where higher values correspond to better cross-validated + measures. + + + References: + .. [James07] G. James. Curve alignments by moments. Annals of Applied + Statistics, 1(2):480–501, 2007. + .. [S11-5-2-1] Srivastava, Anuj et. al. Registration of Functional Data + Using Fisher-Rao Metric (2011). In *Comparisons with other Methods* + (p. 18). arXiv:1103.3817v2. + + Examples: + + Calculate the score of the shift registration of a sinusoidal process + synthetically generated. + + >>> from skfda.preprocessing.registration.validation import \ + ... LeastSquares + >>> from skfda.preprocessing.registration import ShiftRegistration + >>> from skfda.datasets import make_sinusoidal_process + >>> X = make_sinusoidal_process(error_std=0, random_state=0) + + Fit the registration procedure. + + >>> shift_registration = ShiftRegistration() + >>> shift_registration.fit(X) + ShiftRegistration(...) + + Compute the least squares score. + >>> scorer = LeastSquares() + >>> score = scorer(shift_registration, X) + >>> round(score, 3) + 0.796 + + + See also: + :class:`~AmplitudePhaseDecomposition` + :class:`~SobolevLeastSquares` + :class:`~PairwiseCorrelation` + + """ + + def score_function(self, X, y): + """Compute the score of the transformation performed. + + Args: + X (FData): Original functional data. + y (FData): Functional data registered. + + Returns: + float: Score of the transformation. + + """ + from ...misc.metrics import pairwise_distance, lp_distance + + check_is_univariate(X) + check_is_univariate(y) + + X, y = _to_grid(X, y, eval_points=self.eval_points) + + # Instead of compute f_i - 1/(N-1) sum(j!=i)f_j for each i = 1 ... N + # It is used (1 + 1/(N-1))f_i - 1/(N-1) sum(j=1 ... N) f_j = + # (1 + 1/(N-1))f_i - N/(N-1) mean(f) = + # C1 * f_1 - C2 mean(f) for each i= 1 ... N + N = len(X) + C1 = 1 + 1 / (N - 1) + C2 = N / (N - 1) + + X = C1 * X + y = C1 * y + mean_X = C2 * X.mean() + mean_y = C2 * y.mean() + + # Compute distance to mean + distance = pairwise_distance(lp_distance) + ls_x = distance(X, mean_X).flatten() + ls_y = distance(y, mean_y).flatten() + + # Quotient of distance + quotient = ls_y / ls_x + + return 1 - 1. / N * quotient.sum() + + +class SobolevLeastSquares(RegistrationScorer): + r"""Cross-validated measure of the registration procedure. + + Computes a cross-validated measure of the level of synchronization + [S11-5-2-3]_: + + .. math:: + sls=1 - \frac{\sum_{i=1}^{N} \int\left(\dot{\tilde{f}}_{i}(t)- + \frac{1}{N} \sum_{j=1}^{N} \dot{\tilde{f}}_{j}\right)^{2} dt} + {\sum_{i=1}^{N} \int\left(\dot{f}_{i}(t)-\frac{1}{N} \sum_{j=1}^{N} + \dot{f}_{j}\right)^{2} dt} + + where :math:`\dot f_i` and :math:`\dot \tilde f_i` are the derivatives of + the original and the registered data respectively. + + This criterion measures the total cross-sectional variance of the + derivatives of the aligned functions, relative to the original value. + A value of :math:`1` would indicate an identical shape for all registered + curves, while zero corresponds to no improvement in the registration. It + can be negative because the model can be arbitrarily worse. + + Attributes: + eval_points (array_like, optional): Set of points where the + functions are evaluated to obtain a discrete representation and + perform the calculation. + + Args: + estimator (RegistrationTransformer): Registration transformer. + X (:class:`FData `): Original functional data. + y (:class:`FData `): Registered functional data. + + Note: + The original sobolev least square measure used in [S11-5-2-3]_ is + defined as :math:`1 - sls`, but has been modified according to the + scikit-learn scorers, where higher values correspond to better + cross-validated measures. + + + References: + .. [S11-5-2-3] Srivastava, Anuj et. al. Registration of Functional Data + Using Fisher-Rao Metric (2011). In *Comparisons with other Methods* + (p. 18). arXiv:1103.3817v2. + + Examples: + + Calculate the score of the shift registration of a sinusoidal process + synthetically generated. + + >>> from skfda.preprocessing.registration.validation import \ + ... SobolevLeastSquares + >>> from skfda.preprocessing.registration import ShiftRegistration + >>> from skfda.datasets import make_sinusoidal_process + >>> X = make_sinusoidal_process(error_std=0, random_state=0) + + Fit the registration procedure. + + >>> shift_registration = ShiftRegistration() + >>> shift_registration.fit(X) + ShiftRegistration(...) + + Compute the sobolev least squares score. + >>> scorer = SobolevLeastSquares() + >>> score = scorer(shift_registration, X) + >>> round(score, 3) + 0.761 + + See also: + :class:`~AmplitudePhaseDecomposition` + :class:`~LeastSquares` + :class:`~PairwiseCorrelation` + + """ + + def score_function(self, X, y): + """Compute the score of the transformation performed. + + Args: + X (FData): Original functional data. + y (FData): Functional data registered. + + Returns: + float: Score of the transformation. + + """ + from ...misc.metrics import pairwise_distance, lp_distance + + check_is_univariate(X) + check_is_univariate(y) + + # Compute derivative + X = X.derivative() + y = y.derivative() + + # Discretize if needed + X, y = _to_grid(X, y, eval_points=self.eval_points) + + # L2 distance to mean + distance = pairwise_distance(lp_distance) + + sls_x = distance(X, X.mean()) + sls_y = distance(y, y.mean()) + + return 1 - sls_y.sum() / sls_x.sum() + + +class PairwiseCorrelation(RegistrationScorer): + r"""Cross-validated measure of pairwise correlation between functions. + + Computes a cross-validated pairwise correlation between functions + to compare registration methods [S11-5-2-2]_ : + + .. math:: + pc=\frac{\sum_{i \neq j} \operatorname{cc}\left(\tilde{f}_{i}(t), + \tilde{f}_{j}(t)\right)}{\sum_{i \neq j} + \operatorname{cc}\left(f_{i}(t), f_{j}(t)\right)} + + where :math:`f_i` and :math:`\tilde f_i` are the original and registered + data respectively and :math:`cc(f, g)` is the pairwise Pearson’s + correlation between functions. + + The larger the value of :math:`pc`, the better the alignment between + functions in general. + + Attributes: + eval_points (array_like, optional): Set of points where the + functions are evaluated to obtain a discrete representation and + perform the calculation. + + Args: + estimator (RegistrationTransformer): Registration transformer. + X (:class:`FData `): Original functional data. + y (:class:`FData `): Registered functional data. + + Note: + Pearson’s correlation between functions is calculated assuming + the samples are equiespaciated. + + References: + .. [S11-5-2-2] Srivastava, Anuj et. al. Registration of Functional Data + Using Fisher-Rao Metric (2011). In *Comparisons with other Methods* + (p. 18). arXiv:1103.3817v2. + + Examples: + + Calculate the score of the shift registration of a sinusoidal process + synthetically generated. + + >>> from skfda.preprocessing.registration.validation import \ + ... PairwiseCorrelation + >>> from skfda.preprocessing.registration import ShiftRegistration + >>> from skfda.datasets import make_sinusoidal_process + >>> X = make_sinusoidal_process(error_std=0, random_state=0) + + Fit the registration procedure. + + >>> shift_registration = ShiftRegistration() + >>> shift_registration.fit(X) + ShiftRegistration(...) + + Compute the pairwise correlation score. + >>> scorer = PairwiseCorrelation() + >>> score = scorer(shift_registration, X) + >>> round(score, 3) + 1.816 + + See also: + :class:`~AmplitudePhaseDecomposition` + :class:`~LeastSquares` + :class:`~SobolevLeastSquares` + + """ + + def score_function(self, X, y): + """Compute the score of the transformation performed. + + Args: + X (FData): Original functional data. + y (FData): Functional data registered. + + Returns: + float: Score of the transformation. + + """ + check_is_univariate(X) + check_is_univariate(y) + + # Discretize functional data if needed + X, y = _to_grid(X, y, eval_points=self.eval_points) + + # Compute correlation matrices with zeros in diagonal + # corrcoefs computes the correlation between vector, without weights + # due to the sample points + X_corr = np.corrcoef(X.data_matrix[..., 0]) + np.fill_diagonal(X_corr, 0.) + + y_corr = np.corrcoef(y.data_matrix[..., 0]) + np.fill_diagonal(y_corr, 0.) + + return y_corr.sum() / X_corr.sum() diff --git a/skfda/preprocessing/smoothing/_basis.py b/skfda/preprocessing/smoothing/_basis.py index f86623de7..8258dbbb9 100644 --- a/skfda/preprocessing/smoothing/_basis.py +++ b/skfda/preprocessing/smoothing/_basis.py @@ -4,7 +4,6 @@ This module contains the class for the basis smoothing. """ -import collections from enum import Enum from typing import Union, Iterable @@ -14,7 +13,8 @@ from ... import FDataBasis from ... import FDataGrid -from ._linear import _LinearSmoother, _check_r_to_r +from ..._utils import _cartesian_product +from ._linear import _LinearSmoother class _Cholesky(): @@ -23,8 +23,13 @@ class _Cholesky(): def __call__(self, *, basis_values, weight_matrix, data_matrix, penalty_matrix, **_): - right_matrix = basis_values.T @ weight_matrix @ data_matrix - left_matrix = basis_values.T @ weight_matrix @ basis_values + common_matrix = basis_values.T + + if weight_matrix is not None: + common_matrix @= weight_matrix + + right_matrix = common_matrix @ data_matrix + left_matrix = common_matrix @ basis_values # Adds the roughness penalty to the equation if penalty_matrix is not None: @@ -44,7 +49,7 @@ class _QR(): """Solve the linear equation using qr factorization""" def __call__(self, *, basis_values, weight_matrix, data_matrix, - penalty_matrix, ndegenerated, **_): + penalty_matrix, **_): if weight_matrix is not None: # Decompose W in U'U and calculate UW and Uy @@ -52,17 +57,13 @@ def __call__(self, *, basis_values, weight_matrix, data_matrix, basis_values = upper @ basis_values data_matrix = upper @ data_matrix - if penalty_matrix is not None: + if not np.all(penalty_matrix == 0): w, v = np.linalg.eigh(penalty_matrix) - # Reduction of the penalty matrix taking away 0 or almost - # zeros eigenvalues - if ndegenerated: - index = ndegenerated - 1 - else: - index = None - w = w[:index:-1] - v = v[:, :index:-1] + w = w[::-1] + v = v[:, ::-1] + + w = np.maximum(w, 0) penalty_matrix = v @ np.diag(np.sqrt(w)) # Augment the basis matrix with the square root of the @@ -71,9 +72,9 @@ def __call__(self, *, basis_values, weight_matrix, data_matrix, basis_values, penalty_matrix.T], axis=0) - # Augment data matrix by n - ndegenerated zeros + # Augment data matrix by n zeros data_matrix = np.pad(data_matrix, - ((0, len(v) - ndegenerated), + ((0, len(v)), (0, 0)), mode='constant') @@ -113,7 +114,7 @@ def __call__(self, *, estimator, **_): def transform(self, estimator, X, y=None): if estimator.return_basis: - coefficients = (X.data_matrix[..., 0] + coefficients = (X.data_matrix.reshape((X.n_samples, -1)) @ estimator._cached_coef_matrix.T) fdatabasis = FDataBasis( @@ -133,7 +134,7 @@ class BasisSmoother(_LinearSmoother): to the closest function that can be generated by the basis.a. The fit is made so as to reduce the penalized sum of squared errors - [RS05-5-2-5]_: + [RS05-5-2-6]_: .. math:: @@ -163,29 +164,20 @@ class BasisSmoother(_LinearSmoother): method for the resolution of a LS problem. If this method throughs a rounding error warning you may want to use the QR factorisation that is more numerically stable despite being more expensive to compute. - [RS05-5-2-7]_ + [RS05-5-2-8]_ Args: basis: (Basis): Basis used. weights (array_like, optional): Matrix to weight the observations. Defaults to the identity matrix. smoothing_parameter (int or float, optional): Smoothing - parameter. Trying with several factors in a logarythm scale is - suggested. If 0 no smoothing is performed. Defaults to 0. - penalty (int, iterable or :class:`LinearDifferentialOperator`): If it - is an integer, it indicates the order of the - derivative used in the computing of the penalty matrix. For - instance 2 means that the differential operator is - :math:`f''(x)`. If it is an iterable, it consists on coefficients - representing the differential operator used in the computing of - the penalty matrix. For instance the tuple (1, 0, - numpy.sin) means :math:`1 + sin(x)D^{2}`. It is possible to - supply directly the LinearDifferentialOperator object. - If not supplied this defaults to 2. Only used if penalty_matrix is - ``None``. - penalty_matrix (array_like, optional): Penalty matrix. If - supplied the differential operator is not used and instead - the matrix supplied by this argument is used. + parameter. Trying with several factors in a logarithm scale is + suggested. If 0 no smoothing is performed. Defaults to 1. + regularization (int, iterable or :class:`Regularization`): + Regularization object. This allows the penalization of + complicated models, which applies additional smoothing. By default + is ``None`` meaning that no additional smoothing has to take + place. method (str): Algorithm used for calculating the coefficients using the least squares method. The values admitted are 'cholesky', 'qr' and 'matrix' for Cholesky and QR factorisation methods, and matrix @@ -205,9 +197,9 @@ class BasisSmoother(_LinearSmoother): >>> import numpy as np >>> import skfda >>> t = np.linspace(0, 1, 5) - >>> x = np.sin(2 * np.pi * t) + np.cos(2 * np.pi * t) + >>> x = np.sin(2 * np.pi * t) + np.cos(2 * np.pi * t) + 2 >>> x - array([ 1., 1., -1., -1., 1.]) + array([ 3., 3., 1., 1., 3.]) >>> fd = skfda.FDataGrid(data_matrix=x, sample_points=t) >>> basis = skfda.representation.basis.Fourier((0, 1), n_basis=3) @@ -215,11 +207,11 @@ class BasisSmoother(_LinearSmoother): ... basis, method='cholesky') >>> fd_smooth = smoother.fit_transform(fd) >>> fd_smooth.data_matrix.round(2) - array([[[ 1.], + array([[[ 3.], + [ 3.], + [ 1.], [ 1.], - [-1.], - [-1.], - [ 1.]]]) + [ 3.]]]) However, the parameter ``return_basis`` can be used to return the data in basis form, by default, without extra smoothing: @@ -230,19 +222,19 @@ class BasisSmoother(_LinearSmoother): ... basis, method='cholesky', return_basis=True) >>> fd_basis = smoother.fit_transform(fd) >>> fd_basis.coefficients.round(2) - array([[ 0. , 0.71, 0.71]]) + array([[ 2. , 0.71, 0.71]]) >>> smoother = skfda.preprocessing.smoothing.BasisSmoother( ... basis, method='qr', return_basis=True) >>> fd_basis = smoother.fit_transform(fd) >>> fd_basis.coefficients.round(2) - array([[-0. , 0.71, 0.71]]) + array([[ 2. , 0.71, 0.71]]) >>> smoother = skfda.preprocessing.smoothing.BasisSmoother( ... basis, method='matrix', return_basis=True) >>> fd_basis = smoother.fit_transform(fd) >>> fd_basis.coefficients.round(2) - array([[ 0. , 0.71, 0.71]]) + array([[ 2. , 0.71, 0.71]]) >>> smoother.hat_matrix().round(2) array([[ 0.43, 0.14, -0.14, 0.14, 0.43], [ 0.14, 0.71, 0.29, -0.29, 0.14], @@ -250,52 +242,51 @@ class BasisSmoother(_LinearSmoother): [ 0.14, -0.29, 0.29, 0.71, 0.14], [ 0.43, 0.14, -0.14, 0.14, 0.43]]) - If the smoothing parameter is set to something else than zero, we can - penalize approximations that are not smooth enough using a linear - differential operator: + We can penalize approximations that are not smooth enough using some + kind of regularization: - >>> from skfda.misc import LinearDifferentialOperator + >>> from skfda.misc.regularization import TikhonovRegularization + >>> from skfda.misc.operators import LinearDifferentialOperator + >>> >>> fd = skfda.FDataGrid(data_matrix=x, sample_points=t) >>> basis = skfda.representation.basis.Fourier((0, 1), n_basis=3) >>> smoother = skfda.preprocessing.smoothing.BasisSmoother( ... basis, method='cholesky', - ... smoothing_parameter=1, - ... penalty=LinearDifferentialOperator(weights=[3, 5]), + ... regularization=TikhonovRegularization( + ... LinearDifferentialOperator([0.1, 0.2])), ... return_basis=True) >>> fd_basis = smoother.fit_transform(fd) >>> fd_basis.coefficients.round(2) - array([[ 0.18, 0.07, 0.09]]) + array([[ 2.04, 0.51, 0.55]]) - >>> from skfda.misc import LinearDifferentialOperator >>> fd = skfda.FDataGrid(data_matrix=x, sample_points=t) >>> basis = skfda.representation.basis.Fourier((0, 1), n_basis=3) >>> smoother = skfda.preprocessing.smoothing.BasisSmoother( ... basis, method='qr', - ... smoothing_parameter=1, - ... penalty=LinearDifferentialOperator(weights=[3, 5]), + ... regularization=TikhonovRegularization( + ... LinearDifferentialOperator([0.1, 0.2])), ... return_basis=True) >>> fd_basis = smoother.fit_transform(fd) >>> fd_basis.coefficients.round(2) - array([[ 0.18, 0.07, 0.09]]) + array([[ 2.04, 0.51, 0.55]]) - >>> from skfda.misc import LinearDifferentialOperator >>> fd = skfda.FDataGrid(data_matrix=x, sample_points=t) >>> basis = skfda.representation.basis.Fourier((0, 1), n_basis=3) >>> smoother = skfda.preprocessing.smoothing.BasisSmoother( ... basis, method='matrix', - ... smoothing_parameter=1, - ... penalty=LinearDifferentialOperator(weights=[3, 5]), + ... regularization=TikhonovRegularization( + ... LinearDifferentialOperator([0.1, 0.2])), ... return_basis=True) >>> fd_basis = smoother.fit_transform(fd) >>> fd_basis.coefficients.round(2) - array([[ 0.18, 0.07, 0.09]]) + array([[ 2.04, 0.51, 0.55]]) References: - .. [RS05-5-2-5] Ramsay, J., Silverman, B. W. (2005). How spline + .. [RS05-5-2-6] Ramsay, J., Silverman, B. W. (2005). How spline smooths are computed. In *Functional Data Analysis* (pp. 86-87). Springer. - .. [RS05-5-2-7] Ramsay, J., Silverman, B. W. (2005). HSpline + .. [RS05-5-2-8] Ramsay, J., Silverman, B. W. (2005). HSpline smoothing as an augmented least squares problem. In *Functional Data Analysis* (pp. 86-87). Springer. @@ -311,19 +302,17 @@ class SolverMethod(Enum): def __init__(self, basis, *, - smoothing_parameter: float = 0, + smoothing_parameter: float = 1., weights=None, - penalty: Union[int, Iterable[float], - 'LinearDifferentialOperator'] = None, - penalty_matrix=None, + regularization: Union[int, Iterable[float], + 'LinearDifferentialOperator'] = None, output_points=None, method='cholesky', return_basis=False): self.basis = basis self.smoothing_parameter = smoothing_parameter self.weights = weights - self.penalty = penalty - self.penalty_matrix = penalty_matrix + self.regularization = regularization self.output_points = output_points self.method = method self.return_basis = return_basis @@ -331,70 +320,45 @@ def __init__(self, def _method_function(self): """ Return the method function""" method_function = self.method - if not callable(method_function): + if not isinstance(method_function, self.SolverMethod): method_function = self.SolverMethod[ - method_function.lower()].value - - return method_function - - def _penalty(self): - from ...misc import LinearDifferentialOperator - - """Get the penalty differential operator.""" - if self.penalty is None: - penalty = LinearDifferentialOperator(order=2) - elif isinstance(self.penalty, int): - penalty = LinearDifferentialOperator(order=self.penalty) - elif isinstance(self.penalty, collections.abc.Iterable): - penalty = LinearDifferentialOperator(weights=self.penalty) - else: - penalty = self.penalty - - return penalty - - def _penalty_matrix(self): - """Get the final penalty matrix. + method_function.lower()] - The smoothing parameter is already multiplied by it. - - """ - - if self.penalty_matrix is not None: - penalty_matrix = self.penalty_matrix - else: - penalty = self._penalty() - - if self.smoothing_parameter > 0: - penalty_matrix = self.basis.penalty(penalty.order, - penalty.weights) - else: - penalty_matrix = None - - if penalty_matrix is not None: - penalty_matrix *= self.smoothing_parameter - - return penalty_matrix + return method_function.value def _coef_matrix(self, input_points): """Get the matrix that gives the coefficients""" - basis_values_input = self.basis.evaluate(input_points).T + from ...misc.regularization import compute_penalty_matrix + + basis_values_input = self.basis.evaluate( + _cartesian_product(input_points)).reshape( + (self.basis.n_basis, -1)).T # If no weight matrix is given all the weights are one - weight_matrix = (self.weights if self.weights is not None - else np.identity(basis_values_input.shape[0])) + if self.weights is not None: + ols_matrix = (basis_values_input.T @ self.weights + @ basis_values_input) + else: + ols_matrix = basis_values_input.T @ basis_values_input - inv = basis_values_input.T @ weight_matrix @ basis_values_input + penalty_matrix = compute_penalty_matrix( + basis_iterable=(self.basis,), + regularization_parameter=self.smoothing_parameter, + regularization=self.regularization) - penalty_matrix = self._penalty_matrix() - if penalty_matrix is not None: - inv += penalty_matrix + ols_matrix += penalty_matrix - inv = np.linalg.inv(inv) + right_side = basis_values_input.T + if self.weights is not None: + right_side @= self.weights - return inv @ basis_values_input.T @ weight_matrix + return np.linalg.solve( + ols_matrix, right_side) def _hat_matrix(self, input_points, output_points): - basis_values_output = self.basis.evaluate(output_points).T + basis_values_output = self.basis.evaluate(_cartesian_product( + output_points)).reshape( + (self.basis.n_basis, -1)).T return basis_values_output @ self._coef_matrix(input_points) @@ -409,9 +373,8 @@ def fit(self, X: FDataGrid, y=None): self (object) """ - _check_r_to_r(X) - self.input_points_ = X.sample_points[0] + self.input_points_ = X.sample_points self.output_points_ = (self.output_points if self.output_points is not None else self.input_points_) @@ -434,29 +397,32 @@ def fit_transform(self, X: FDataGrid, y=None): self (object) """ + from ...misc.regularization import compute_penalty_matrix - _check_r_to_r(X) - - self.input_points_ = X.sample_points[0] + self.input_points_ = X.sample_points self.output_points_ = (self.output_points if self.output_points is not None else self.input_points_) - penalty_matrix = self._penalty_matrix() + penalty_matrix = compute_penalty_matrix( + basis_iterable=(self.basis,), + regularization_parameter=self.smoothing_parameter, + regularization=self.regularization) # n is the samples # m is the observations # k is the number of elements of the basis # Each sample in a column (m x n) - data_matrix = X.data_matrix[..., 0].T + data_matrix = X.data_matrix.reshape((X.n_samples, -1)).T # Each basis in a column - basis_values = self.basis.evaluate(self.input_points_).T + basis_values = self.basis.evaluate( + _cartesian_product(self.input_points_)).reshape( + (self.basis.n_basis, -1)).T # If no weight matrix is given all the weights are one - weight_matrix = (self.weights if self.weights is not None - else np.identity(basis_values.shape[0])) + weight_matrix = self.weights # We need to solve the equation # (phi' W phi + lambda * R) C = phi' W Y @@ -470,9 +436,6 @@ def fit_transform(self, X: FDataGrid, y=None): if(data_matrix.shape[0] > self.basis.n_basis or self.smoothing_parameter > 0): - # TODO: The penalty could be None (if the matrix is passed) - ndegenerated = self.basis._ndegenerated(self._penalty().order) - method = self._method_function() # If the method provides the complete transformation use it @@ -485,8 +448,7 @@ def fit_transform(self, X: FDataGrid, y=None): basis_values=basis_values, weight_matrix=weight_matrix, data_matrix=data_matrix, - penalty_matrix=penalty_matrix, - ndegenerated=ndegenerated) + penalty_matrix=penalty_matrix) elif data_matrix.shape[0] == self.basis.n_basis: # If the number of basis equals the number of points and no @@ -505,7 +467,7 @@ def fit_transform(self, X: FDataGrid, y=None): if self.return_basis: return fdatabasis else: - return fdatabasis.to_grid(eval_points=self.output_points_) + return fdatabasis.to_grid(sample_points=self.output_points_) return self @@ -521,7 +483,8 @@ def transform(self, X: FDataGrid, y=None): """ - assert all(self.input_points_ == X.sample_points[0]) + assert all([all(i == s) + for i, s in zip(self.input_points_, X.sample_points)]) method = self._method_function() diff --git a/skfda/representation/__init__.py b/skfda/representation/__init__.py index da6e4fa05..d6a18fc9b 100644 --- a/skfda/representation/__init__.py +++ b/skfda/representation/__init__.py @@ -1,8 +1,8 @@ -from ._functional_data import FData -from .basis import FDataBasis -from .grid import FDataGrid - from . import basis from . import extrapolation -from . import interpolation from . import grid +from . import interpolation +from ._evaluation_trasformer import EvaluationTransformer +from ._functional_data import FData +from .basis import FDataBasis +from .grid import FDataGrid diff --git a/skfda/representation/_evaluation_trasformer.py b/skfda/representation/_evaluation_trasformer.py new file mode 100644 index 000000000..927304a30 --- /dev/null +++ b/skfda/representation/_evaluation_trasformer.py @@ -0,0 +1,113 @@ +from sklearn.base import BaseEstimator, TransformerMixin +from sklearn.utils.validation import check_is_fitted +from ._functional_data import FData +from .grid import FDataGrid + + +class EvaluationTransformer(BaseEstimator, TransformerMixin): + r""" + Transformer returning the evaluations of FData objects as a matrix. + + Args: + eval_points (array_like): List of points where the functions are + evaluated. If `None`, the functions must be `FDatagrid` objects + and all points will be returned. + extrapolation (str or Extrapolation, optional): Controls the + extrapolation mode for elements outside the domain range. By + default it is used the mode defined during the instance of the + object. + grid (bool, optional): Whether to evaluate the results on a grid + spanned by the input arrays, or at points specified by the + input arrays. If true the eval_points should be a list of size + dim_domain with the corresponding times for each axis. The + return matrix has shape n_samples x len(t1) x len(t2) x ... x + len(t_dim_domain) x dim_codomain. If the domain dimension is 1 + the parameter has no efect. Defaults to False. + + Attributes: + shape_ (tuple): original shape of coefficients per sample. + + Examples: + + >>> from skfda.representation import (FDataGrid, FDataBasis, + ... EvaluationTransformer) + >>> from skfda.representation.basis import Monomial + + Functional data object with 2 samples + representing a function :math:`f : \mathbb{R}\longmapsto\mathbb{R}`. + + >>> data_matrix = [[1, 2], [2, 3]] + >>> sample_points = [2, 4] + >>> fd = FDataGrid(data_matrix, sample_points) + >>> + >>> transformer = EvaluationTransformer() + >>> transformer.fit_transform(fd) + array([[1, 2], + [2, 3]]) + + Functional data object with 2 samples + representing a function :math:`f : \mathbb{R}\longmapsto\mathbb{R}^2`. + + >>> data_matrix = [[[1, 0.3], [2, 0.4]], [[2, 0.5], [3, 0.6]]] + >>> sample_points = [2, 4] + >>> fd = FDataGrid(data_matrix, sample_points) + >>> + >>> transformer = EvaluationTransformer() + >>> transformer.fit_transform(fd) + array([[ 1. , 0.3, 2. , 0.4], + [ 2. , 0.5, 3. , 0.6]]) + + Representation of a functional data object with 2 samples + representing a function :math:`f : \mathbb{R}^2\longmapsto\mathbb{R}`. + + >>> data_matrix = [[[1, 0.3], [2, 0.4]], [[2, 0.5], [3, 0.6]]] + >>> sample_points = [[2, 4], [3, 6]] + >>> fd = FDataGrid(data_matrix, sample_points) + >>> + >>> transformer = EvaluationTransformer() + >>> transformer.fit_transform(fd) + array([[ 1. , 0.3, 2. , 0.4], + [ 2. , 0.5, 3. , 0.6]]) + + Evaluation of a functional data object at several points. + + >>> basis = Monomial(n_basis=4) + >>> coefficients = [[0.5, 1, 2, .5], [1.5, 1, 4, .5]] + >>> fd = FDataBasis(basis, coefficients) + >>> + >>> transformer = EvaluationTransformer([0, 0.2, 0.5, 0.7, 1]) + >>> transformer.fit_transform(fd) + array([[ 0.5 , 0.784 , 1.5625, 2.3515, 4. ], + [ 1.5 , 1.864 , 3.0625, 4.3315, 7. ]]) + + """ + + def __init__(self, eval_points=None, *, + extrapolation=None, grid=False): + self.eval_points = eval_points + self.extrapolation = extrapolation + self.grid = grid + + def fit(self, X: FData, y=None): + + if self.eval_points is None and not isinstance(X, FDataGrid): + raise ValueError("If no eval_points are passed, the functions " + "should be FDataGrid objects.") + + self._is_fitted = True + + return self + + def transform(self, X, y=None): + + check_is_fitted(self, '_is_fitted') + + if self.eval_points is None: + evaluation = X.data_matrix.copy() + else: + evaluation = X(self.eval_points, + extrapolation=self.extrapolation, grid=self.grid) + + evaluation = evaluation.reshape((X.n_samples, -1)) + + return evaluation diff --git a/skfda/representation/_functional_data.py b/skfda/representation/_functional_data.py index e3d0af9da..b62acdb92 100644 --- a/skfda/representation/_functional_data.py +++ b/skfda/representation/_functional_data.py @@ -5,11 +5,13 @@ """ from abc import ABC, abstractmethod +import warnings import pandas.api.extensions + import numpy as np -from .._utils import _coordinate_list, _list_of_arrays +from .._utils import (_evaluate_grid, _reshape_eval_points) from .extrapolation import _parse_extrapolation @@ -21,26 +23,84 @@ class FData(ABC, pandas.api.extensions.ExtensionArray): dim_domain (int): Dimension of the domain. dim_codomain (int): Dimension of the image. extrapolation (Extrapolation): Default extrapolation mode. - dataset_label (str): name of the dataset. - axes_labels (list): list containing the labels of the different - axis. The first element is the x label, the second the y label - and so on. - keepdims (bool): Default value of argument keepdims in - :func:`evaluate`. + dataset_name (str): name of the dataset. + argument_names (tuple): tuple containing the names of the different + arguments. + coordinate_names (tuple): tuple containing the names of the different + coordinate functions. """ - def __init__(self, extrapolation, dataset_label, axes_labels, keepdims): + def __init__(self, *, extrapolation, + dataset_name=None, + dataset_label=None, + axes_labels=None, + argument_names=None, + coordinate_names=None): self.extrapolation = extrapolation - self.dataset_label = dataset_label + self.dataset_name = dataset_name + + if dataset_label is not None: + self.dataset_label = dataset_label + + self.argument_names = argument_names + self.coordinate_names = coordinate_names self.axes_labels = axes_labels - self.keepdims = keepdims + + @property + def dataset_label(self): + warnings.warn("Parameter dataset_label is deprecated. Use the " + "parameter dataset_name instead.", + DeprecationWarning) + return self.dataset_name + + @dataset_label.setter + def dataset_label(self, name): + warnings.warn("Parameter dataset_label is deprecated. Use the " + "parameter dataset_name instead.", + DeprecationWarning) + self.dataset_name = name + + @property + def argument_names(self): + return self._argument_names + + @argument_names.setter + def argument_names(self, names): + if names is None: + names = (None,) * self.dim_domain + else: + names = tuple(names) + if len(names) != self.dim_domain: + raise ValueError("There must be a name for each of the " + "dimensions of the domain.") + + self._argument_names = names + + @property + def coordinate_names(self): + return self._coordinate_names + + @coordinate_names.setter + def coordinate_names(self, names): + if names is None: + names = (None,) * self.dim_codomain + else: + names = tuple(names) + if len(names) != self.dim_codomain: + raise ValueError("There must be a name for each of the " + "dimensions of the codomain.") + + self._coordinate_names = names @property def axes_labels(self): - """Return the list of axes labels""" - return self._axes_labels + warnings.warn("Parameter axes_labels is deprecated. Use the " + "parameters argument_names and " + "coordinate_names instead.", DeprecationWarning) + + return self.argument_names + self.coordinate_names @axes_labels.setter def axes_labels(self, labels): @@ -48,6 +108,10 @@ def axes_labels(self, labels): if labels is not None: + warnings.warn("Parameter axes_labels is deprecated. Use the " + "parameters argument_names and " + "coordinate_names instead.", DeprecationWarning) + labels = np.asarray(labels) if len(labels) > (self.dim_domain + self.dim_codomain): raise ValueError("There must be a label for each of the " @@ -56,7 +120,8 @@ def axes_labels(self, labels): diff = (self.dim_domain + self.dim_codomain) - len(labels) labels = np.concatenate((labels, diff * [None])) - self._axes_labels = labels + self.argument_names = labels[:self.dim_domain] + self.coordinate_names = labels[self.dim_domain:] @property @abstractmethod @@ -118,19 +183,6 @@ def extrapolation(self, value): else: self._extrapolation = _parse_extrapolation(value) - self._extrapolator_evaluator = None - - @property - def extrapolator_evaluator(self): - """Return the evaluator constructed by the extrapolator.""" - if self.extrapolation is None: - return None - - elif self._extrapolator_evaluator is None: - self._extrapolator_evaluator = self._extrapolation.evaluator(self) - - return self._extrapolator_evaluator - @property @abstractmethod def domain_range(self): @@ -141,50 +193,6 @@ def domain_range(self): """ pass - def _reshape_eval_points(self, eval_points, evaluation_aligned): - """Convert and reshape the eval_points to ndarray with the - corresponding shape. - - Args: - eval_points (array_like): Evaluation points to be reshaped. - evaluation_aligned (bool): Boolean flag. True if all the samples - will be evaluated at the same evaluation_points. - - Returns: - (np.ndarray): Numpy array with the eval_points, if - evaluation_aligned is True with shape `number of evaluation points` - x `dim_domain`. If the points are not aligned the shape of the - points will be `n_samples` x `number of evaluation points` - x `dim_domain`. - - """ - - # Case evaluation of a scalar value, i.e., f(0) - if np.isscalar(eval_points): - eval_points = [eval_points] - - # Creates a copy of the eval points, and convert to np.array - eval_points = np.array(eval_points, dtype=float) - - if evaluation_aligned: # Samples evaluated at same eval points - - eval_points = eval_points.reshape((eval_points.shape[0], - self.dim_domain)) - - else: # Different eval_points for each sample - - if eval_points.ndim < 2 or eval_points.shape[0] != self.n_samples: - - raise ValueError(f"eval_points should be a list " - f"of length {self.n_samples} with the " - f"evaluation points for each sample.") - - eval_points = eval_points.reshape((eval_points.shape[0], - eval_points.shape[1], - self.dim_domain)) - - return eval_points - def _extrapolation_index(self, eval_points): """Checks the points that need to be extrapolated. @@ -210,111 +218,6 @@ def _extrapolation_index(self, eval_points): return index - def _evaluate_grid(self, axes, *, derivative=0, extrapolation=None, - aligned_evaluation=True, keepdims=None): - """Evaluate the functional object in the cartesian grid. - - This method is called internally by :meth:`evaluate` when the argument - `grid` is True. - - Evaluates the functional object in the grid generated by the cartesian - product of the axes. The length of the list of axes should be equal - than the domain dimension of the object. - - If the list of axes has lengths :math:`n_1, n_2, ..., n_m`, where - :math:`m` is equal than the dimension of the domain, the result of the - evaluation in the grid will be a matrix with :math:`m+1` dimensions and - shape :math:`n_{samples} x n_1 x n_2 x ... x n_m`. - - If `aligned_evaluation` is false each sample is evaluated in a - different grid, and the list of axes should contain a list of axes for - each sample. - - If the domain dimension is 1, the result of the behaviour of the - evaluation will be the same than :meth:`evaluate` without the grid - option, but with worst performance. - - Args: - axes (array_like): List of axes to generated the grid where the - object will be evaluated. - derivative (int, optional): Order of the derivative. Defaults to 0. - extrapolation (str or Extrapolation, optional): Controls the - extrapolation mode for elements outside the domain range. By - default it is used the mode defined during the instance of the - object. - aligned_evaluation (bool, optional): If False evaluates each sample - in a different grid. - keepdims (bool, optional): If the image dimension is equal to 1 and - keepdims is True the return matrix has shape - n_samples x eval_points x 1 else n_samples x eval_points. - By default is used the value given during the instance of the - object. - - Returns: - (numpy.darray): Numpy array with dim_domain + 1 dimensions with - the result of the evaluation. - - Raises: - ValueError: If there are a different number of axes than the domain - dimension. - - """ - axes = _list_of_arrays(axes) - - if aligned_evaluation: - - lengths = [len(ax) for ax in axes] - - if len(axes) != self.dim_domain: - raise ValueError(f"Length of axes should be " - f"{self.dim_domain}") - - eval_points = _coordinate_list(axes) - - res = self.evaluate(eval_points, derivative=derivative, - extrapolation=extrapolation, keepdims=True) - - elif self.dim_domain == 1: - - eval_points = [ax.squeeze(0) for ax in axes] - - return self.evaluate(eval_points, - derivative=derivative, - extrapolation=extrapolation, - keepdims=keepdims, - aligned_evaluation=False) - else: - - if len(axes) != self.n_samples: - raise ValueError("Should be provided a list of axis per " - "sample") - elif len(axes[0]) != self.dim_domain: - raise ValueError(f"Incorrect length of axes. " - f"({self.dim_domain}) != {len(axes[0])}") - - lengths = [len(ax) for ax in axes[0]] - eval_points = np.empty((self.n_samples, - np.prod(lengths), - self.dim_domain)) - - for i in range(self.n_samples): - eval_points[i] = _coordinate_list(axes[i]) - - res = self.evaluate(eval_points, derivative=derivative, - extrapolation=extrapolation, - keepdims=True, aligned_evaluation=False) - - shape = [self.n_samples] + lengths - - if keepdims is None: - keepdims = self.keepdims - - if self.dim_codomain != 1 or keepdims: - shape += [self.dim_codomain] - - # Roll the list of result in a list - return res.reshape(shape) - def _join_evaluation(self, index_matrix, index_ext, index_ev, res_extrapolation, res_evaluation): """Join the points evaluated. @@ -352,43 +255,20 @@ def _join_evaluation(self, index_matrix, index_ext, index_ev, return res @abstractmethod - def _evaluate(self, eval_points, *, derivative=0): + def _evaluate(self, eval_points, *, aligned=True): """Internal evaluation method, defines the evaluation of the FData. - Evaluates the samples of an FData object at the same eval_points. - - This method is called internally by :meth:`evaluate` when the argument - `aligned_evaluation` is True. - - Args: - eval_points (numpy.ndarray): Numpy array with shape - `(len(eval_points), dim_domain)` with the evaluation points. - Each entry represents the coordinate of a point. - derivative (int, optional): Order of the derivative. Defaults to 0. - - Returns: - (numpy.darray): Numpy 3d array with shape `(n_samples, - len(eval_points), dim_codomain)` with the result of the - evaluation. The entry (i,j,k) will contain the value k-th image - dimension of the i-th sample, at the j-th evaluation point. - - """ - pass - - @abstractmethod - def _evaluate_composed(self, eval_points, *, derivative=0): - """Internal evaluation method, defines the evaluation of a FData. - - Evaluates the samples of an FData object at different eval_points. + Evaluates the samples of an FData object at several points. - This method is called internally by :meth:`evaluate` when the argument - `aligned_evaluation` is False. + Subclasses must override this method to implement evaluation. Args: - eval_points (numpy.ndarray): Numpy array with shape - `(n_samples, len(eval_points), dim_domain)` with the - evaluation points for each sample. - derivative (int, optional): Order of the derivative. Defaults to 0. + eval_points (array_like): List of points where the functions are + evaluated. If `aligned` is `True`, then a list of + lists of points must be passed, with one list per sample. + aligned (bool, optional): Whether the input points are + the same for each sample, or an array of points per sample is + passed. Returns: (numpy.darray): Numpy 3d array with shape `(n_samples, @@ -400,16 +280,16 @@ def _evaluate_composed(self, eval_points, *, derivative=0): pass def evaluate(self, eval_points, *, derivative=0, extrapolation=None, - grid=False, aligned_evaluation=True, keepdims=None): + grid=False, aligned=True): """Evaluate the object or its derivatives at a list of values or a grid. Args: eval_points (array_like): List of points where the functions are - evaluated. If a matrix of shape nsample x eval_points is given - each sample is evaluated at the values in the corresponding row - in eval_points. - derivative (int, optional): Order of the derivative. Defaults to 0. + evaluated. If ``grid`` is ``True``, a list of axes, one per + domain dimension, must be passed instead. If ``aligned`` is + ``True``, then a list of lists (of points or axes, as + explained) must be passed, with one list per sample. extrapolation (str or Extrapolation, optional): Controls the extrapolation mode for elements outside the domain range. By default it is used the mode defined during the instance of the @@ -421,35 +301,44 @@ def evaluate(self, eval_points, *, derivative=0, extrapolation=None, return matrix has shape n_samples x len(t1) x len(t2) x ... x len(t_dim_domain) x dim_codomain. If the domain dimension is 1 the parameter has no efect. Defaults to False. - keepdims (bool, optional): If the image dimension is equal to 1 and - keepdims is True the return matrix has shape - n_samples x eval_points x 1 else n_samples x eval_points. - By default is used the value given during the instance of the - object. + aligned (bool, optional): Whether the input points are + the same for each sample, or an array of points per sample is + passed. Returns: (np.darray): Matrix whose rows are the values of the each function at the values specified in eval_points. """ + if derivative != 0: + warnings.warn("Parameter derivative is deprecated. Use the " + "derivative function instead.", DeprecationWarning) + return self.derivative(order=derivative)( + eval_points, + extrapolation=extrapolation, + grid=grid, + aligned=aligned) + + if grid: # Evaluation of a grid performed in auxiliar function + return _evaluate_grid(eval_points, + evaluate_method=self.evaluate, + n_samples=self.n_samples, + dim_domain=self.dim_domain, + dim_codomain=self.dim_codomain, + extrapolation=extrapolation, + aligned=aligned) + if extrapolation is None: extrapolation = self.extrapolation - extrapolator_evaluator = self.extrapolator_evaluator else: # Gets the function to perform extrapolation or None extrapolation = _parse_extrapolation(extrapolation) - extrapolator_evaluator = None - - if grid: # Evaluation of a grid performed in auxiliar function - return self._evaluate_grid(eval_points, - derivative=derivative, - extrapolation=extrapolation, - aligned_evaluation=aligned_evaluation, - keepdims=keepdims) # Convert to array and check dimensions of eval points - eval_points = self._reshape_eval_points(eval_points, - aligned_evaluation) + eval_points = _reshape_eval_points(eval_points, + aligned=aligned, + n_samples=self.n_samples, + dim_domain=self.dim_domain) # Check if extrapolation should be applied if extrapolation is not None: @@ -461,19 +350,12 @@ def evaluate(self, eval_points, *, derivative=0, extrapolation=None, if not extrapolate: # Direct evaluation - if aligned_evaluation: - res = self._evaluate(eval_points, derivative=derivative) - else: - res = self._evaluate_composed(eval_points, - derivative=derivative) + res = self._evaluate( + eval_points, aligned=aligned) else: - # Evaluation using extrapolation - if extrapolator_evaluator is None: - extrapolator_evaluator = extrapolation.evaluator(self) - # Partition of eval points - if aligned_evaluation: + if aligned: index_ext = index_matrix index_ev = ~index_matrix @@ -481,13 +363,6 @@ def evaluate(self, eval_points, *, derivative=0, extrapolation=None, eval_points_extrapolation = eval_points[index_ext] eval_points_evaluation = eval_points[index_ev] - # Direct evaluation - res_evaluation = self._evaluate(eval_points_evaluation, - derivative=derivative) - res_extrapolation = extrapolator_evaluator.evaluate( - eval_points_extrapolation, - derivative=derivative) - else: index_ext = np.logical_or.reduce(index_matrix, axis=0) eval_points_extrapolation = eval_points[:, index_ext] @@ -495,31 +370,23 @@ def evaluate(self, eval_points, *, derivative=0, extrapolation=None, index_ev = np.logical_or.reduce(~index_matrix, axis=0) eval_points_evaluation = eval_points[:, index_ev] - # Direct evaluation - res_evaluation = self._evaluate_composed( - eval_points_evaluation, - derivative=derivative - ) + # Direct evaluation + res_evaluation = self._evaluate( + eval_points_evaluation, + aligned=aligned) - res_extrapolation = extrapolator_evaluator.evaluate_composed( - eval_points_extrapolation, - derivative=derivative) + res_extrapolation = extrapolation.evaluate( + self, + eval_points_extrapolation, + aligned=aligned) res = self._join_evaluation(index_matrix, index_ext, index_ev, res_extrapolation, res_evaluation) - # If not provided gets default value of keepdims - if keepdims is None: - keepdims = self.keepdims - - # Delete last axis if not keepdims and - if self.dim_codomain == 1 and not keepdims: - res = res.reshape(res.shape[:-1]) - return res def __call__(self, eval_points, *, derivative=0, extrapolation=None, - grid=False, aligned_evaluation=True, keepdims=None): + grid=False, aligned=True): """Evaluate the object or its derivatives at a list of values or a grid. This method is a wrapper of :meth:`evaluate`. @@ -540,11 +407,6 @@ def __call__(self, eval_points, *, derivative=0, extrapolation=None, return matrix has shape n_samples x len(t1) x len(t2) x ... x len(t_dim_domain) x dim_codomain. If the domain dimension is 1 the parameter has no efect. Defaults to False. - keepdims (bool, optional): If the image dimension is equal to 1 and - keepdims is True the return matrix has shape - n_samples x eval_points x 1 else n_samples x eval_points. - By default is used the value given during the instance of the - object. Returns: (np.ndarray): Matrix whose rows are the values of the each @@ -553,8 +415,7 @@ def __call__(self, eval_points, *, derivative=0, extrapolation=None, """ return self.evaluate(eval_points, derivative=derivative, extrapolation=extrapolation, grid=grid, - aligned_evaluation=aligned_evaluation, - keepdims=keepdims) + aligned=aligned) @abstractmethod def derivative(self, order=1): @@ -597,58 +458,6 @@ def shift(self, shifts, *, restrict_domain=False, extrapolation=None, """ pass - def _get_labels_coordinates(self, key): - """Return the labels of a function when it is indexed by its components. - - Args: - key (int, tuple, slice): Key used to index the coordinates. - - Returns: - (list): labels of the object fd.coordinates[key. - - """ - if self.axes_labels is None: - labels = None - else: - - labels = self.axes_labels[:self.dim_domain].tolist() - image_label = np.atleast_1d( - self.axes_labels[self.dim_domain:][key]) - labels.extend(image_label.tolist()) - - return labels - - def _join_labels_coordinates(self, *others): - """Return the labels of the concatenation as new coordinates of multiple - functional objects. - - Args: - others (:obj:`FData`) Objects to be concatenated. - - Returns: - (list): labels of the object - self.concatenate(*others, as_coordinates=True). - - """ - # Labels should be None or a list of length self.dim_domain + - # self.dim_codomain. - - if self.axes_labels is None: - labels = (self.dim_domain + self.dim_codomain) * [None] - else: - labels = self.axes_labels.tolist() - - for other in others: - if other.axes_labels is None: - labels.extend(other.dim_codomain * [None]) - else: - labels.extend(list(other.axes_labels[self.dim_domain:])) - - if all(label is None for label in labels): - labels = None - - return labels - def plot(self, *args, **kwargs): """Plot the FDatGrid object. @@ -685,15 +494,15 @@ def plot(self, *args, **kwargs): interval; in the case of surfaces a list with 2 tuples with the ranges for each dimension. Default uses the domain range of the functional object. - sample_labels (list of int): contains integers from [0 to number of + group (list of int): contains integers from [0 to number of labels) indicating to which group each sample belongs to. Then, the samples with the same label are plotted in the same color. If None, the default value, each sample is plotted in the color assigned by matplotlib.pyplot.rcParams['axes.prop_cycle']. - label_colors (list of colors): colors in which groups are + group_colors (list of colors): colors in which groups are represented, there must be one for each group. If None, each group is shown with distict colors in the "Greys" colormap. - label_names (list of str): name of each of the groups which appear + group_names (list of str): name of each of the groups which appear in a legend, there must be one for each one. Defaults to None and the legend is not shown. **kwargs: if dim_domain is 1, keyword arguments to be passed to @@ -737,12 +546,12 @@ def mean(self, weights=None): pass @abstractmethod - def to_grid(self, eval_points=None): + def to_grid(self, sample_points=None): """Return the discrete representation of the object. Args: - eval_points (array_like, optional): Set of points where the - functions are evaluated. + sample_points (array_like, optional): Points per axis + where the function is going to be evaluated. Returns: FDataGrid: Discrete representation of the functional data @@ -811,6 +620,14 @@ def __getitem__(self, key): pass + def __eq__(self, other): + return ( + self.extrapolation == other.extrapolation + and self.dataset_name == other.dataset_name + and self.argument_names == other.argument_names + and self.coordinate_names == other.coordinate_names + ) + @abstractmethod def __add__(self, other): """Addition for FData object.""" @@ -885,10 +702,6 @@ def to_numpy(self): return array - def __array__(self, dtype=None): - """Automatic conversion to numpy array""" - return self.to_numpy() - ##################################################################### # Pandas ExtensionArray methods ##################################################################### @@ -981,7 +794,7 @@ def take(self, indices, allow_fill=False, fill_value=None, axis=0): # If the ExtensionArray is backed by an ndarray, then # just pass that here instead of coercing to object. - data = self.astype(object) + data = self.to_numpy() if allow_fill and fill_value is None: fill_value = self.dtype.na_value # fill value should always be translated from the scalar @@ -1009,3 +822,34 @@ def _concat_same_type( first, *others = to_concat return first.concatenate(*others) + + +def concatenate(objects, as_coordinates=False): + """ + Join samples from an iterable of similar FData objects. + + Joins samples of FData objects if they have the same + dimensions and sampling points. + Args: + objects (list of :obj:`FDataBasis`): Objects to be concatenated. + as_coordinates (boolean, optional): If False concatenates as + new samples, else, concatenates the other functions as + new components of the image. Defaults to False. + Returns: + :obj:`FData`: FData object with the samples from the + original objects. + Raises: + ValueError: In case the provided list of FData objects is + empty. + Todo: + By the moment, only unidimensional objects are supported in basis + representation. + """ + objects = iter(objects) + first = next(objects, None) + + if not first: + raise ValueError("At least one FData object must be provided " + "to concatenate.") + + return first.concatenate(*objects, as_coordinates=as_coordinates) diff --git a/skfda/representation/basis.py b/skfda/representation/basis.py deleted file mode 100644 index 9709ae469..000000000 --- a/skfda/representation/basis.py +++ /dev/null @@ -1,2454 +0,0 @@ -"""Module for functional data manipulation in a basis system. - -Defines functional data object in a basis function system representation and -the corresponding basis classes. - -""" -from abc import ABC, abstractmethod -import copy - -from numpy import polyder, polyint, polymul, polyval -import pandas.api.extensions -import scipy.integrate -from scipy.interpolate import BSpline as SciBSpline -from scipy.interpolate import PPoly -import scipy.interpolate -import scipy.linalg -from scipy.special import binom - -import numpy as np - -from . import FData -from . import grid -from .._utils import _list_of_arrays, constants - - -__author__ = "Miguel Carbajo Berrocal" -__email__ = "miguel.carbajo@estudiante.uam.es" - -# aux functions - - -def _polypow(p, n=2): - if n > 2: - return polymul(p, _polypow(p, n - 1)) - if n == 2: - return polymul(p, p) - elif n == 1: - return p - elif n == 0: - return [1] - else: - raise ValueError("n must be greater than 0.") - - -def _check_domain(domain_range): - for domain in domain_range: - if len(domain) != 2 or domain[0] >= domain[1]: - raise ValueError(f"The interval {domain} is not well-defined.") - - -def _same_domain(one_domain_range, other_domain_range): - return np.array_equal(one_domain_range, other_domain_range) - - -class Basis(ABC): - """Defines the structure of a basis function system. - - Attributes: - domain_range (tuple): a tuple of length 2 containing the initial and - end values of the interval over which the basis can be evaluated. - n_basis (int): number of functions in the basis. - - """ - - def __init__(self, domain_range=None, n_basis=1): - """Basis constructor. - - Args: - domain_range (tuple or list of tuples, optional): Definition of the - interval where the basis defines a space. Defaults to (0,1). - n_basis: Number of functions that form the basis. Defaults to 1. - """ - - if domain_range is not None: - # TODO: Allow multiple dimensions - domain_range = _list_of_arrays(domain_range) - - # Some checks - _check_domain(domain_range) - - if n_basis < 1: - raise ValueError("The number of basis has to be strictly " - "possitive.") - - self._domain_range = domain_range - self.n_basis = n_basis - self._drop_index_lst = [] - - super().__init__() - - @property - def domain_range(self): - if self._domain_range is None: - return [np.array([0, 1])] - else: - return self._domain_range - - @domain_range.setter - def domain_range(self, value): - self._domain_range = value - - @abstractmethod - def _compute_matrix(self, eval_points, derivative=0): - """Compute the basis or its derivatives given a list of values. - - Args: - eval_points (array_like): List of points where the basis is - evaluated. - derivative (int, optional): Order of the derivative. Defaults to 0. - - Returns: - (:obj:`numpy.darray`): Matrix whose rows are the values of the each - basis function or its derivatives at the values specified in - eval_points. - - """ - pass - - @abstractmethod - def _ndegenerated(self, penalty_degree): - """Return number of 0 or nearly 0 eigenvalues of the penalty matrix. - - Args: - penalty_degree (int): Degree of the derivative used in the - calculation of the penalty matrix. - - Returns: - int: number of close to 0 eigenvalues. - - """ - pass - - @abstractmethod - def _derivative(self, coefs, order=1): - pass - - def evaluate(self, eval_points, derivative=0): - """Evaluate Basis objects and its derivatives. - - Evaluates the basis function system or its derivatives at a list of - given values. - - Args: - eval_points (array_like): List of points where the basis is - evaluated. - derivative (int, optional): Order of the derivative. Defaults to 0. - - Returns: - (numpy.darray): Matrix whose rows are the values of the each - basis function or its derivatives at the values specified in - eval_points. - - """ - eval_points = np.asarray(eval_points) - if np.any(np.isnan(eval_points)): - raise ValueError("The list of points where the function is " - "evaluated can not contain nan values.") - - return self._compute_matrix(eval_points, derivative) - - def plot(self, chart=None, *, derivative=0, **kwargs): - """Plot the basis object or its derivatives. - - Args: - chart (figure object, axe or list of axes, optional): figure over - with the graphs are plotted or axis over where the graphs are - plotted. - derivative (int or tuple, optional): Order of derivative to be - plotted. Defaults 0. - **kwargs: keyword arguments to be passed to the - fdata.plot function. - - Returns: - fig (figure): figure object in which the graphs are plotted. - - """ - self.to_basis().plot(chart=chart, derivative=derivative, **kwargs) - - def _evaluate_single_basis_coefficients(self, coefficients, basis_index, x, - cache): - """Evaluate a differential operator over one of the basis. - - Computes the result of evaluating a the result of applying a - differential operator over one of the basis functions. It also admits a - "cache" dictionary to store the results for the other basis not - returned because they are evaluated by the function and may be needed - later. - - Args: - coefficients (list): List of coefficients representing a - differential operator. An iterable indicating - coefficients of derivatives (which can be functions). For - instance the tuple (1, 0, numpy.sin) means :math:`1 - + sin(x)D^{2}`. - basis_index (int): index in self.basis of the basis that is - evaluated. - x (number): Point of evaluation. - cache (dict): Dictionary with the values of previous evaluation - for all the basis function and where the results of the - evalaution are stored. This is done because later evaluation - of the same differential operator and same x may be needed - for other of the basis functions. - - """ - if x not in cache: - res = np.zeros(self.n_basis) - for i, k in enumerate(coefficients): - if callable(k): - res += k(x) * self._compute_matrix([x], i)[:, 0] - else: - res += k * self._compute_matrix([x], i)[:, 0] - cache[x] = res - return cache[x][basis_index] - - def _numerical_penalty(self, coefficients): - """Return a penalty matrix using a numerical approach. - - See :func:`~basis.Basis.penalty`. - - Args: - coefficients (list): List of coefficients representing a - differential operator. An iterable indicating - coefficients of derivatives (which can be functions). For - instance the tuple (1, 0, numpy.sin) means :math:`1 - + sin(x)D^{2}`. - """ - - # Range of first dimension - domain_range = self.domain_range[0] - penalty_matrix = np.zeros((self.n_basis, self.n_basis)) - cache = {} - for i in range(self.n_basis): - penalty_matrix[i, i] = scipy.integrate.quad( - lambda x: (self._evaluate_single_basis_coefficients( - coefficients, i, x, cache) ** 2), - domain_range[0], domain_range[1] - )[0] - for j in range(i + 1, self.n_basis): - penalty_matrix[i, j] = scipy.integrate.quad( - (lambda x: (self._evaluate_single_basis_coefficients( - coefficients, i, x, cache) * - self._evaluate_single_basis_coefficients( - coefficients, j, x, cache))), - domain_range[0], domain_range[1] - )[0] - penalty_matrix[j, i] = penalty_matrix[i, j] - return penalty_matrix - - @abstractmethod - def penalty(self, derivative_degree=None, coefficients=None): - r"""Return a penalty matrix given a differential operator. - - The differential operator can be either a derivative of a certain - degree or a more complex operator. - - The penalty matrix is defined as [RS05-5-6-2]_: - - .. math:: - R_{ij} = \int L\phi_i(s) L\phi_j(s) ds - - where :math:`\phi_i(s)` for :math:`i=1, 2, ..., n` are the basis - functions and :math:`L` is a differential operator. - - Args: - derivative_degree (int): Integer indicating the order of the - derivative or . For instance 2 means that the differential - operator is :math:`f''(x)`. - coefficients (list): List of coefficients representing a - differential operator. An iterable indicating - coefficients of derivatives (which can be functions). For - instance the tuple (1, 0, numpy.sin) means :math:`1 - + sin(x)D^{2}`. Only used if derivative degree is None. - - Returns: - numpy.array: Penalty matrix. - - References: - .. [RS05-5-6-2] Ramsay, J., Silverman, B. W. (2005). Specifying the - roughness penalty. In *Functional Data Analysis* (pp. 106-107). - Springer. - - """ - pass - - @abstractmethod - def basis_of_product(self, other): - pass - - @abstractmethod - def rbasis_of_product(self, other): - pass - - @staticmethod - def default_basis_of_product(one, other): - """Default multiplication for a pair of basis""" - if not _same_domain(one.domain_range, other.domain_range): - raise ValueError("Ranges are not equal.") - - norder = min(8, one.n_basis + other.n_basis) - n_basis = max(one.n_basis + other.n_basis, norder + 1) - return BSpline(one.domain_range, n_basis, norder) - - def rescale(self, domain_range=None): - r"""Return a copy of the basis with a new domain range, with the - corresponding values rescaled to the new bounds. - - Args: - domain_range (tuple, optional): Definition of the interval - where the basis defines a space. Defaults uses the same as - the original basis. - """ - - if domain_range is None: - domain_range = self.domain_range - - return type(self)(domain_range, self.n_basis) - - def same_domain(self, other): - r"""Returns if two basis are defined on the same domain range. - - Args: - other (Basis): Basis to check the domain range definition - """ - return _same_domain(self.domain_range, other.domain_range) - - def copy(self): - """Basis copy""" - return copy.deepcopy(self) - - def to_basis(self): - return FDataBasis(self.copy(), np.identity(self.n_basis)) - - def _list_to_R(self, knots): - retstring = "c(" - for i in range(0, len(knots)): - retstring = retstring + str(knots[i]) + ", " - return retstring[0:len(retstring) - 2] + ")" - - def _to_R(self): - raise NotImplementedError - - def _inner_matrix(self, other=None): - r"""Return the Inner Product Matrix of a pair of basis. - - The Inner Product Matrix is defined as - - .. math:: - IP_{ij} = \langle\phi_i, \theta_j\rangle - - where :math:`\phi_i` is the ith element of the basi and - :math:`\theta_j` is the jth element of the second basis. - This matrix helps on the calculation of the inner product - between objects on two basis and for the change of basis. - - Args: - other (:class:`Basis`): Basis to compute the inner product - matrix. If not basis is given, it computes the matrix with - itself returning the Gram Matrix - - Returns: - numpy.array: Inner Product Matrix of two basis - - """ - if other is None or self == other: - return self.gram_matrix() - - first = self.to_basis() - second = other.to_basis() - - inner = np.zeros((self.n_basis, other.n_basis)) - - for i in range(self.n_basis): - for j in range(other.n_basis): - inner[i, j] = first[i].inner_product(second[j], None, None) - - return inner - - def gram_matrix(self): - r"""Return the Gram Matrix of a basis - - The Gram Matrix is defined as - - .. math:: - G_{ij} = \langle\phi_i, \phi_j\rangle - - where :math:`\phi_i` is the ith element of the basis. This is a - symmetric matrix and positive-semidefinite. - - Returns: - numpy.array: Gram Matrix of the basis. - - """ - fbasis = self.to_basis() - - gram = np.zeros((self.n_basis, self.n_basis)) - - for i in range(fbasis.n_basis): - for j in range(i, fbasis.n_basis): - gram[i, j] = fbasis[i].inner_product(fbasis[j], None, None) - gram[j, i] = gram[i, j] - - return gram - - def inner_product(self, other): - return np.transpose(other.inner_product(self.to_basis())) - - def _add_same_basis(self, coefs1, coefs2): - return self.copy(), coefs1 + coefs2 - - def _add_constant(self, coefs, constant): - coefs = coefs.copy() - constant = np.array(constant) - coefs[:, 0] = coefs[:, 0] + constant - - return self.copy(), coefs - - def _sub_same_basis(self, coefs1, coefs2): - return self.copy(), coefs1 - coefs2 - - def _sub_constant(self, coefs, other): - coefs = coefs.copy() - other = np.array(other) - coefs[:, 0] = coefs[:, 0] - other - - return self.copy(), coefs - - def _mul_constant(self, coefs, other): - coefs = coefs.copy() - other = np.atleast_2d(other).reshape(-1, 1) - coefs = coefs * other - - return self.copy(), coefs - - def __repr__(self): - """Representation of a Basis object.""" - return (f"{self.__class__.__name__}(domain_range={self.domain_range}, " - f"n_basis={self.n_basis})") - - def __eq__(self, other): - """Equality of Basis""" - return (type(self) == type(other) - and _same_domain(self.domain_range, other.domain_range) - and self.n_basis == other.n_basis) - - -class Constant(Basis): - """Constant basis. - - Basis for constant functions - - Attributes: - domain_range (tuple): a tuple of length 2 containing the initial and - end values of the interval over which the basis can be evaluated. - - Examples: - Defines a contant base over the interval :math:`[0, 5]` consisting - on the constant function 1 on :math:`[0, 5]`. - - >>> bs_cons = Constant((0,5)) - - """ - - def __init__(self, domain_range=None): - """Constant basis constructor. - - Args: - domain_range (tuple): Tuple defining the domain over which the - function is defined. - - """ - super().__init__(domain_range, 1) - - def _ndegenerated(self, penalty_degree): - """Return number of 0 or nearly 0 eigenvalues of the penalty matrix. - - Args: - penalty_degree (int): Degree of the derivative used in the - calculation of the penalty matrix. - - Returns: - int: number of close to 0 eigenvalues. - - """ - return penalty_degree - - def _derivative(self, coefs, order=1): - return (self.copy(), coefs.copy() if order == 0 - else self.copy(), np.zeros(coefs.shape)) - - def _compute_matrix(self, eval_points, derivative=0): - """Compute the basis or its derivatives given a list of values. - - For each of the basis computes its value for each of the points in - the list passed as argument to the method. - - Args: - eval_points (array_like): List of points where the basis is - evaluated. - derivative (int, optional): Order of the derivative. Defaults to 0. - - Returns: - (:obj:`numpy.darray`): Matrix whose rows are the values of the each - basis function or its derivatives at the values specified in - eval_points. - - """ - return np.ones((1, len(eval_points))) if derivative == 0\ - else np.zeros((1, len(eval_points))) - - def penalty(self, derivative_degree=None, coefficients=None): - r"""Return a penalty matrix given a differential operator. - - The differential operator can be either a derivative of a certain - degree or a more complex operator. - - The penalty matrix is defined as [RS05-5-6-2-1]_: - - .. math:: - R_{ij} = \int L\phi_i(s) L\phi_j(s) ds - - where :math:`\phi_i(s)` for :math:`i=1, 2, ..., n` are the basis - functions and :math:`L` is a differential operator. - - Args: - derivative_degree (int): Integer indicating the order of the - derivative or . For instance 2 means that the differential - operator is :math:`f''(x)`. - coefficients (list): List of coefficients representing a - differential operator. An iterable indicating - coefficients of derivatives (which can be functions). For - instance the tuple (1, 0, numpy.sin) means :math:`1 - + sin(x)D^{2}`. Only used if derivative degree is None. - - - Returns: - numpy.array: Penalty matrix. - - Examples: - >>> Constant((0,5)).penalty(0) - array([[5]]) - >>> Constant().penalty(1) - array([[ 0.]]) - - References: - .. [RS05-5-6-2-1] Ramsay, J., Silverman, B. W. (2005). Specifying - the roughness penalty. In *Functional Data Analysis* - (pp. 106-107). Springer. - - """ - if derivative_degree is None: - return self._numerical_penalty(coefficients) - - return (np.full((1, 1), - (self.domain_range[0][1] - self.domain_range[0][0])) - if derivative_degree == 0 else np.zeros((1, 1))) - - def basis_of_product(self, other): - """Multiplication of a Constant Basis with other Basis""" - if not _same_domain(self.domain_range, other.domain_range): - raise ValueError("Ranges are not equal.") - - return other.copy() - - def rbasis_of_product(self, other): - """Multiplication of a Constant Basis with other Basis""" - return other.copy() - - def _to_R(self): - drange = self.domain_range[0] - return "create.constant.basis(rangeval = c(" + str(drange[0]) + "," +\ - str(drange[1]) + "))" - - -class Monomial(Basis): - """Monomial basis. - - Basis formed by powers of the argument :math:`t`: - - .. math:: - 1, t, t^2, t^3... - - Attributes: - domain_range (tuple): a tuple of length 2 containing the initial and - end values of the interval over which the basis can be evaluated. - n_basis (int): number of functions in the basis. - - Examples: - Defines a monomial base over the interval :math:`[0, 5]` consisting - on the first 3 powers of :math:`t`: :math:`1, t, t^2`. - - >>> bs_mon = Monomial((0,5), n_basis=3) - - And evaluates all the functions in the basis in a list of descrete - values. - - >>> bs_mon.evaluate([0, 1, 2]) - array([[ 1., 1., 1.], - [ 0., 1., 2.], - [ 0., 1., 4.]]) - - And also evaluates its derivatives - - >>> bs_mon.evaluate([0, 1, 2], derivative=1) - array([[ 0., 0., 0.], - [ 1., 1., 1.], - [ 0., 2., 4.]]) - >>> bs_mon.evaluate([0, 1, 2], derivative=2) - array([[ 0., 0., 0.], - [ 0., 0., 0.], - [ 2., 2., 2.]]) - - """ - - def _ndegenerated(self, penalty_degree): - """Return number of 0 or nearly 0 eigenvalues of the penalty matrix. - - Args: - penalty_degree (int): Degree of the derivative used in the - calculation of the penalty matrix. - - Returns: - int: number of close to 0 eigenvalues. - - """ - return penalty_degree - - def _compute_matrix(self, eval_points, derivative=0): - """Compute the basis or its derivatives given a list of values. - - For each of the basis computes its value for each of the points in - the list passed as argument to the method. - - Args: - eval_points (array_like): List of points where the basis is - evaluated. - derivative (int, optional): Order of the derivative. Defaults to 0. - - Returns: - (:obj:`numpy.darray`): Matrix whose rows are the values of the each - basis function or its derivatives at the values specified in - eval_points. - - """ - # Initialise empty matrix - mat = np.zeros((self.n_basis, len(eval_points))) - - # For each basis computes its value for each evaluation - if derivative == 0: - for i in range(self.n_basis): - mat[i] = eval_points ** i - else: - for i in range(self.n_basis): - if derivative <= i: - factor = i - for j in range(2, derivative + 1): - factor *= (i - j + 1) - mat[i] = factor * eval_points ** (i - derivative) - - return mat - - def _derivative(self, coefs, order=1): - return (Monomial(self.domain_range, self.n_basis - order), - np.array([np.polyder(x[::-1], order)[::-1] - for x in coefs])) - - def penalty(self, derivative_degree=None, coefficients=None): - r"""Return a penalty matrix given a differential operator. - - The differential operator can be either a derivative of a certain - degree or a more complex operator. - - The penalty matrix is defined as [RS05-5-6-2-2]_: - - .. math:: - R_{ij} = \int L\phi_i(s) L\phi_j(s) ds - - where :math:`\phi_i(s)` for :math:`i=1, 2, ..., n` are the basis - functions and :math:`L` is a differential operator. - - Args: - derivative_degree (int): Integer indicating the order of the - derivative or . For instance 2 means that the differential - operator is :math:`f''(x)`. - coefficients (list): List of coefficients representing a - differential operator. An iterable indicating - coefficients of derivatives (which can be functions). For - instance the tuple (1, 0, numpy.sin) means :math:`1 - + sin(x)D^{2}`. Only used if derivative degree is None. - - - Returns: - numpy.array: Penalty matrix. - - Examples: - >>> Monomial(n_basis=4).penalty(2) - array([[ 0., 0., 0., 0.], - [ 0., 0., 0., 0.], - [ 0., 0., 4., 6.], - [ 0., 0., 6., 12.]]) - - References: - .. [RS05-5-6-2-1] Ramsay, J., Silverman, B. W. (2005). Specifying - the roughness penalty. In *Functional Data Analysis* - (pp. 106-107). Springer. - - """ - - if derivative_degree is None: - return self._numerical_penalty(coefficients) - - integration_domain = self.domain_range[0] - - # initialize penalty matrix as all zeros - penalty_matrix = np.zeros((self.n_basis, self.n_basis)) - # iterate over the cartesion product of the basis system with itself - for ibasis in range(self.n_basis): - # notice that the index ibasis it is also the exponent of the - # monomial - # ifac is the factor resulting of deriving the monomial as many - # times as indicates de differential operator - if derivative_degree > 0: - ifac = ibasis - for k in range(2, derivative_degree + 1): - ifac *= ibasis - k + 1 - else: - ifac = 1 - - for jbasis in range(self.n_basis): - # notice that the index jbasis it is also the exponent of the - # monomial - # jfac is the factor resulting of deriving the monomial as - # many times as indicates de differential operator - if derivative_degree > 0: - jfac = jbasis - for k in range(2, derivative_degree + 1): - jfac *= jbasis - k + 1 - else: - jfac = 1 - - # if any of the two monomial has lower degree than the order of - # the derivative indicated by the differential operator that - # factor equals 0, so no calculation are needed - if (ibasis >= derivative_degree - and jbasis >= derivative_degree): - # Calculates exactly the result of the integral - # Exponent after applying the differential operator and - # integrating - ipow = ibasis + jbasis - 2 * derivative_degree + 1 - # coefficient after integrating - penalty_matrix[ibasis, jbasis] = ( - ((integration_domain[1] ** ipow) - - (integration_domain[0] ** ipow)) * - ifac * jfac / ipow) - penalty_matrix[jbasis, ibasis] = penalty_matrix[ibasis, - jbasis] - - return penalty_matrix - - def basis_of_product(self, other): - """Multiplication of a Monomial Basis with other Basis""" - if not _same_domain(self.domain_range, other.domain_range): - raise ValueError("Ranges are not equal.") - - if isinstance(other, Monomial): - return Monomial(self.domain_range, self.n_basis + other.n_basis) - - return other.rbasis_of_product(self) - - def rbasis_of_product(self, other): - """Multiplication of a Monomial Basis with other Basis""" - return Basis.default_basis_of_product(self, other) - - def _to_R(self): - drange = self.domain_range[0] - return "create.monomial.basis(rangeval = c(" + str(drange[0]) + "," +\ - str(drange[1]) + "), n_basis = " + str(self.n_basis) + ")" - - -class BSpline(Basis): - r"""BSpline basis. - - BSpline basis elements are defined recursively as: - - .. math:: - B_{i, 1}(x) = 1 \quad \text{if } t_i \le x < t_{i+1}, - \quad 0 \text{ otherwise} - - .. math:: - B_{i, k}(x) = \frac{x - t_i}{t_{i+k} - t_i} B_{i, k-1}(x) - + \frac{t_{i+k+1} - x}{t_{i+k+1} - t_{i+1}} B_{i+1, k-1}(x) - - Where k indicates the order of the spline. - - Implementation details: In order to allow a discontinuous behaviour at - the boundaries of the domain it is necessary to placing m knots at the - boundaries [RS05]_. This is automatically done so that the user only has to - specify a single knot at the boundaries. - - Attributes: - domain_range (tuple): A tuple of length 2 containing the initial and - end values of the interval over which the basis can be evaluated. - n_basis (int): Number of functions in the basis. - order (int): Order of the splines. One greather than their degree. - knots (list): List of knots of the spline functions. - - Examples: - Constructs specifying number of basis and order. - - >>> bss = BSpline(n_basis=8, order=4) - - If no order is specified defaults to 4 because cubic splines are - the most used. So the previous example is the same as: - - >>> bss = BSpline(n_basis=8) - - It is also possible to create a BSpline basis specifying the knots. - - >>> bss = BSpline(knots=[0, 0.2, 0.4, 0.6, 0.8, 1]) - - Once we create a basis we can evaluate each of its functions at a - set of points. - - >>> bss = BSpline(n_basis=3, order=3) - >>> bss.evaluate([0, 0.5, 1]) - array([[ 1. , 0.25, 0. ], - [ 0. , 0.5 , 0. ], - [ 0. , 0.25, 1. ]]) - - And evaluates first derivative - - >>> bss.evaluate([0, 0.5, 1], derivative=1) - array([[-2., -1., 0.], - [ 2., 0., -2.], - [ 0., 1., 2.]]) - - References: - .. [RS05] Ramsay, J., Silverman, B. W. (2005). *Functional Data - Analysis*. Springer. 50-51. - - """ - - def __init__(self, domain_range=None, n_basis=None, order=4, knots=None): - """Bspline basis constructor. - - Args: - domain_range (tuple, optional): Definition of the interval where - the basis defines a space. Defaults to (0,1) if knots are not - specified. If knots are specified defaults to the first and - last element of the knots. - n_basis (int, optional): Number of splines that form the basis. - order (int, optional): Order of the splines. One greater that - their degree. Defaults to 4 which mean cubic splines. - knots (array_like): List of knots of the splines. If domain_range - is specified the first and last elements of the knots have to - match with it. - - """ - - if domain_range is not None: - domain_range = _list_of_arrays(domain_range) - - if len(domain_range) != 1: - raise ValueError("Domain range should be unidimensional.") - - domain_range = domain_range[0] - - # Knots default to equally space points in the domain_range - if knots is None: - if n_basis is None: - raise ValueError("Must provide either a list of knots or the" - "number of basis.") - else: - knots = list(knots) - knots.sort() - if domain_range is None: - domain_range = (knots[0], knots[-1]) - else: - if domain_range[0] != knots[0] or domain_range[1] != knots[-1]: - raise ValueError("The ends of the knots must be the same " - "as the domain_range.") - - # n_basis default to number of knots + order of the splines - 2 - if n_basis is None: - n_basis = len(knots) + order - 2 - - if (n_basis - order + 2) < 2: - raise ValueError(f"The number of basis ({n_basis}) minus the order " - f"of the bspline ({order}) should be greater " - f"than 3.") - - self.order = order - self.knots = None if knots is None else list(knots) - super().__init__(domain_range, n_basis) - - # Checks - if self.n_basis != self.order + len(self.knots) - 2: - raise ValueError(f"The number of basis ({self.n_basis}) has to " - f"equal the order ({self.order}) plus the " - f"number of knots ({len(self.knots)}) minus 2.") - - @property - def knots(self): - if self._knots is None: - return list(np.linspace(*self.domain_range[0], - self.n_basis - self.order + 2)) - else: - return self._knots - - @knots.setter - def knots(self, value): - self._knots = value - - def _ndegenerated(self, penalty_degree): - """Return number of 0 or nearly to 0 eigenvalues of the penalty matrix. - - Args: - penalty_degree (int): Degree of the derivative used in the - calculation of the penalty matrix. - - Returns: - int: number of close to 0 eigenvalues. - - """ - return penalty_degree - - def _compute_matrix(self, eval_points, derivative=0): - """Compute the basis or its derivatives given a list of values. - - It uses the scipy implementation of BSplines to compute the values - for each element of the basis. - - Args: - eval_points (array_like): List of points where the basis system is - evaluated. - derivative (int, optional): Order of the derivative. Defaults to 0. - - Returns: - (:obj:`numpy.darray`): Matrix whose rows are the values of the each - basis function or its derivatives at the values specified in - eval_points. - - Implementation details: In order to allow a discontinuous behaviour at - the boundaries of the domain it is necessary to placing m knots at the - boundaries [RS05]_. This is automatically done so that the user only - has to specify a single knot at the boundaries. - - References: - .. [RS05] Ramsay, J., Silverman, B. W. (2005). *Functional Data - Analysis*. Springer. 50-51. - - """ - # Places m knots at the boundaries - knots = np.array([self.knots[0]] * (self.order - 1) + self.knots + - [self.knots[-1]] * (self.order - 1)) - # c is used the select which spline the function splev below computes - c = np.zeros(len(knots)) - - # Initialise empty matrix - mat = np.empty((self.n_basis, len(eval_points))) - - # For each basis computes its value for each evaluation point - for i in range(self.n_basis): - # write a 1 in c in the position of the spline calculated in each - # iteration - c[i] = 1 - # compute the spline - mat[i] = scipy.interpolate.splev(eval_points, (knots, c, - self.order - 1), - der=derivative) - c[i] = 0 - - return mat - - def _derivative(self, coefs, order=1): - deriv_splines = [self._to_scipy_BSpline(coefs[i]).derivative(order) - for i in range(coefs.shape[0])] - - deriv_coefs = [BSpline._from_scipy_BSpline(spline)[1] - for spline in deriv_splines] - - deriv_basis = BSpline._from_scipy_BSpline(deriv_splines[0])[0] - - return deriv_basis, np.array(deriv_coefs)[:, 0:deriv_basis.n_basis] - - def penalty(self, derivative_degree=None, coefficients=None): - r"""Return a penalty matrix given a differential operator. - - The differential operator can be either a derivative of a certain - degree or a more complex operator. - - The penalty matrix is defined as [RS05-5-6-2-3]_: - - .. math:: - R_{ij} = \int L\phi_i(s) L\phi_j(s) ds - - where :math:`\phi_i(s)` for :math:`i=1, 2, ..., n` are the basis - functions and :math:`L` is a differential operator. - - Args: - derivative_degree (int): Integer indicating the order of the - derivative or . For instance 2 means that the differential - operator is :math:`f''(x)`. - coefficients (list): List of coefficients representing a - differential operator. An iterable indicating - coefficients of derivatives (which can be functions). For - instance the tuple (1, 0, numpy.sin) means :math:`1 - + sin(x)D^{2}`. Only used if derivative degree is None. - - Returns: - numpy.array: Penalty matrix. - - References: - .. [RS05-5-6-2-1] Ramsay, J., Silverman, B. W. (2005). Specifying - the roughness penalty. In *Functional Data Analysis* - (pp. 106-107). Springer. - - """ - if derivative_degree is not None: - if derivative_degree >= self.order: - raise ValueError(f"Penalty matrix cannot be evaluated for " - f"derivative of order {derivative_degree} for" - f" B-splines of order {self.order}") - if derivative_degree == self.order - 1: - # The derivative of the bsplines are constant in the intervals - # defined between knots - knots = np.array(self.knots) - mid_inter = (knots[1:] + knots[:-1]) / 2 - constants = self.evaluate(mid_inter, - derivative=derivative_degree).T - knots_intervals = np.diff(self.knots) - # Integration of product of constants - return constants.T @ np.diag(knots_intervals) @ constants - - if np.all(np.diff(self.knots) != 0): - # Compute exactly using the piecewise polynomial - # representation of splines - - # Places m knots at the boundaries - knots = np.array( - [self.knots[0]] * (self.order - 1) + self.knots - + [self.knots[-1]] * (self.order - 1)) - # c is used the select which spline the function - # PPoly.from_spline below computes - c = np.zeros(len(knots)) - - # Initialise empty list to store the piecewise polynomials - ppoly_lst = [] - - no_0_intervals = np.where(np.diff(knots) > 0)[0] - - # For each basis gets its piecewise polynomial representation - for i in range(self.n_basis): - # write a 1 in c in the position of the spline - # transformed in each iteration - c[i] = 1 - # gets the piecewise polynomial representation and gets - # only the positions for no zero length intervals - # This polynomial are defined relatively to the knots - # meaning that the column i corresponds to the ith knot. - # Let the ith not be a - # Then f(x) = pp(x - a) - pp = (PPoly.from_spline( - (knots, c, self.order - 1)).c[:, no_0_intervals]) # We need the actual coefficients of f, not pp. So we - # just recursively calculate the new coefficients - coeffs = pp.copy() - for j in range(self.order - 1): - coeffs[j + 1:] += ( - (binom(self.order - j - 1, - range(1, self.order - j)) * - np.vstack([(-a) ** - np.array(range(1, self.order - j)) - for a in self.knots[:-1]])).T * - pp[j]) - ppoly_lst.append(coeffs) - c[i] = 0 - - # Now for each pair of basis computes the inner product after - # applying the linear differential operator - penalty_matrix = np.zeros((self.n_basis, self.n_basis)) - for interval in range(len(no_0_intervals)): - for i in range(self.n_basis): - poly_i = np.trim_zeros(ppoly_lst[i][:, - interval], 'f') - if len(poly_i) <= derivative_degree: - # if the order of the polynomial is lesser or - # equal to the derivative the result of the - # integral will be 0 - continue - # indefinite integral - integral = polyint(_polypow(polyder( - poly_i, derivative_degree), 2)) - # definite integral - penalty_matrix[i, i] += np.diff(polyval( - integral, self.knots[interval: interval + 2]))[0] - - for j in range(i + 1, self.n_basis): - poly_j = np.trim_zeros(ppoly_lst[j][:, - interval], 'f') - if len(poly_j) <= derivative_degree: - # if the order of the polynomial is lesser - # or equal to the derivative the result of - # the integral will be 0 - continue - # indefinite integral - integral = polyint( - polymul(polyder(poly_i, derivative_degree), - polyder(poly_j, derivative_degree))) - # definite integral - penalty_matrix[i, j] += np.diff(polyval( - integral, self.knots[interval: interval + 2]) - )[0] - penalty_matrix[j, i] = penalty_matrix[i, j] - return penalty_matrix - else: - # if the order of the derivative is greater or equal to the order - # of the bspline minus 1 - if len(coefficients) >= self.order: - raise ValueError(f"Penalty matrix cannot be evaluated for " - f"derivative of order {len(coefficients) - 1}" - f" for B-splines of order {self.order}") - - # compute using the inner product - return self._numerical_penalty(coefficients) - - def rescale(self, domain_range=None): - r"""Return a copy of the basis with a new domain range, with the - corresponding values rescaled to the new bounds. - The knots of the BSpline will be rescaled in the new interval. - - Args: - domain_range (tuple, optional): Definition of the interval - where the basis defines a space. Defaults uses the same as - the original basis. - """ - - knots = np.array(self.knots, dtype=np.dtype('float')) - - if domain_range is not None: # Rescales the knots - knots -= knots[0] - knots *= ((domain_range[1] - domain_range[0] - ) / (self.knots[-1] - self.knots[0])) - knots += domain_range[0] - - # Fix possible round error - knots[0] = domain_range[0] - knots[-1] = domain_range[1] - - else: - # TODO: Allow multiple dimensions - domain_range = self.domain_range[0] - - return BSpline(domain_range, self.n_basis, self.order, knots) - - def __repr__(self): - """Representation of a BSpline basis.""" - return (f"{self.__class__.__name__}(domain_range={self.domain_range}, " - f"n_basis={self.n_basis}, order={self.order}, " - f"knots={self.knots})") - - def __eq__(self, other): - """Equality of Basis""" - return (super().__eq__(other) - and self.order == other.order - and self.knots == other.knots) - - def basis_of_product(self, other): - """Multiplication of two Bspline Basis""" - if not _same_domain(self.domain_range, other.domain_range): - raise ValueError("Ranges are not equal.") - - if isinstance(other, Constant): - return other.rbasis_of_product(self) - - if isinstance(other, BSpline): - uniqueknots = np.union1d(self.inknots, other.inknots) - - multunique = np.zeros(len(uniqueknots), dtype=np.int32) - for i in range(len(uniqueknots)): - mult1 = np.count_nonzero(self.inknots == uniqueknots[i]) - mult2 = np.count_nonzero(other.inknots == uniqueknots[i]) - multunique[i] = max(mult1, mult2) - - m2 = 0 - allknots = np.zeros(np.sum(multunique)) - for i in range(len(uniqueknots)): - m1 = m2 - m2 = m2 + multunique[i] - allknots[m1:m2] = uniqueknots[i] - - norder1 = self.n_basis - len(self.inknots) - norder2 = other.n_basis - len(other.inknots) - norder = min(norder1 + norder2 - 1, 20) - - allbreaks = ([self.domain_range[0][0]] + - np.ndarray.tolist(allknots) + - [self.domain_range[0][1]]) - n_basis = len(allbreaks) + norder - 2 - return BSpline(self.domain_range, n_basis, norder, allbreaks) - else: - norder = min(self.n_basis - len(self.inknots) + 2, 8) - n_basis = max(self.n_basis + other.n_basis, norder + 1) - return BSpline(self.domain_range, n_basis, norder) - - def rbasis_of_product(self, other): - """Multiplication of a Bspline Basis with other basis""" - - norder = min(self.n_basis - len(self.inknots) + 2, 8) - n_basis = max(self.n_basis + other.n_basis, norder + 1) - return BSpline(self.domain_range, n_basis, norder) - - def _to_R(self): - drange = self.domain_range[0] - return ("create.bspline.basis(rangeval = c(" + str(drange[0]) + "," + - str(drange[1]) + "), n_basis = " + str(self.n_basis) + - ", norder = " + str(self.order) + ", breaks = " + - self._list_to_R(self.knots) + ")") - - def _to_scipy_BSpline(self, coefs): - - knots = np.concatenate(( - np.repeat(self.knots[0], self.order - 1), - self.knots, - np.repeat(self.knots[-1], self.order - 1))) - - return SciBSpline(knots, coefs, self.order - 1) - - @staticmethod - def _from_scipy_BSpline(bspline): - order = bspline.k - knots = bspline.t[order: -order] - coefs = bspline.c - domain_range = [knots[0], knots[-1]] - - return BSpline(domain_range, order=order + 1, knots=knots), coefs - - @property - def inknots(self): - """Return number of basis.""" - return self.knots[1:len(self.knots) - 1] - - -class Fourier(Basis): - r"""Fourier basis. - - Defines a functional basis for representing functions on a fourier - series expansion of period :math:`T`. The number of basis is always odd. - If instantiated with an even number of basis, they will be incremented - automatically by one. - - .. math:: - \phi_0(t) = \frac{1}{\sqrt{2}} - - .. math:: - \phi_{2n -1}(t) = sin\left(\frac{2 \pi n}{T} t\right) - - .. math:: - \phi_{2n}(t) = cos\left(\frac{2 \pi n}{T} t\right) - - Actually this basis functions are not orthogonal but not orthonormal. To - achieve this they are divided by its norm: :math:`\sqrt{\frac{T}{2}}`. - - Attributes: - domain_range (tuple): A tuple of length 2 containing the initial and - end values of the interval over which the basis can be evaluated. - n_basis (int): Number of functions in the basis. - period (int or float): Period (:math:`T`). - - Examples: - Constructs specifying number of basis, definition interval and period. - - >>> fb = Fourier((0, np.pi), n_basis=3, period=1) - >>> fb.evaluate([0, np.pi / 4, np.pi / 2, np.pi]).round(2) - array([[ 1. , 1. , 1. , 1. ], - [ 0. , -1.38, -0.61, 1.1 ], - [ 1.41, 0.31, -1.28, 0.89]]) - - And evaluate second derivative - - >>> fb.evaluate([0, np.pi / 4, np.pi / 2, np.pi], - ... derivative = 2).round(2) - array([[ 0. , 0. , 0. , 0. ], - [ -0. , 54.46, 24.02, -43.37], - [-55.83, -12.32, 50.4 , -35.16]]) - - - - """ - - def __init__(self, domain_range=None, n_basis=3, period=None): - """Construct a Fourier object. - - It forces the object to have an odd number of basis. If n_basis is - even, it is incremented by one. - - Args: - domain_range (tuple): Tuple defining the domain over which the - function is defined. - n_basis (int): Number of basis functions. - period (int or float): Period of the trigonometric functions that - define the basis. - - """ - - if domain_range is not None: - domain_range = _list_of_arrays(domain_range) - - if len(domain_range) != 1: - raise ValueError("Domain range should be unidimensional.") - - domain_range = domain_range[0] - - self.period = period - # If number of basis is even, add 1 - n_basis += 1 - n_basis % 2 - super().__init__(domain_range, n_basis) - - @property - def period(self): - if self._period is None: - return self.domain_range[0][1] - self.domain_range[0][0] - else: - return self._period - - @period.setter - def period(self, value): - self._period = value - - def _compute_matrix(self, eval_points, derivative=0): - """Compute the basis or its derivatives given a list of values. - - Args: - eval_points (array_like): List of points where the basis is - evaluated. - derivative (int, optional): Order of the derivative. Defaults to 0. - - Returns: - (:obj:`numpy.darray`): Matrix whose rows are the values of the each - basis function or its derivatives at the values specified in - eval_points. - - """ - if derivative < 0: - raise ValueError("derivative only takes non-negative values.") - - omega = 2 * np.pi / self.period - omega_t = omega * eval_points - n_basis = self.n_basis if self.n_basis % 2 != 0 else self.n_basis + 1 - - # Initialise empty matrix - mat = np.empty((self.n_basis, len(eval_points))) - if derivative == 0: - # First base function is a constant - # The division by numpy.sqrt(2) is so that it has the same norm as - # the sine and cosine: sqrt(period / 2) - mat[0] = np.ones(len(eval_points)) / np.sqrt(2) - if n_basis > 1: - # 2*pi*n*x / period - args = np.outer(range(1, n_basis // 2 + 1), omega_t) - index = range(1, n_basis - 1, 2) - # odd indexes are sine functions - mat[index] = np.sin(args) - index = range(2, n_basis, 2) - # even indexes are cosine functions - mat[index] = np.cos(args) - # evaluates the derivatives - else: - # First base function is a constant, so its derivative is 0. - mat[0] = np.zeros(len(eval_points)) - if n_basis > 1: - # (2*pi*n / period) ^ n_derivative - factor = np.outer( - (-1) ** (derivative // 2) * - (np.array(range(1, n_basis // 2 + 1)) * omega) ** - derivative, - np.ones(len(eval_points))) - # 2*pi*n*x / period - args = np.outer(range(1, n_basis // 2 + 1), omega_t) - # even indexes - index_e = range(2, n_basis, 2) - # odd indexes - index_o = range(1, n_basis - 1, 2) - if derivative % 2 == 0: - mat[index_o] = factor * np.sin(args) - mat[index_e] = factor * np.cos(args) - else: - mat[index_o] = factor * np.cos(args) - mat[index_e] = -factor * np.sin(args) - - # normalise - mat = mat / np.sqrt(self.period / 2) - return mat - - def _ndegenerated(self, penalty_degree): - """Return number of 0 or nearly 0 eigenvalues of the penalty matrix. - - Args: - penalty_degree (int): Degree of the derivative used in the - calculation of the penalty matrix. - - Returns: - int: number of close to 0 eigenvalues. - - """ - return 0 if penalty_degree == 0 else 1 - - def _derivative(self, coefs, order=1): - - omega = 2 * np.pi / self.period - deriv_factor = (np.arange(1, (self.n_basis + 1) / 2) * omega) ** order - - deriv_coefs = np.zeros(coefs.shape) - - cos_sign, sin_sign = ((-1) ** int((order + 1) / 2), - (-1) ** int(order / 2)) - - if order % 2 == 0: - deriv_coefs[:, 1::2] = sin_sign * coefs[:, 1::2] * deriv_factor - deriv_coefs[:, 2::2] = cos_sign * coefs[:, 2::2] * deriv_factor - else: - deriv_coefs[:, 2::2] = sin_sign * coefs[:, 1::2] * deriv_factor - deriv_coefs[:, 1::2] = cos_sign * coefs[:, 2::2] * deriv_factor - - # normalise - return self.copy(), deriv_coefs - - def penalty(self, derivative_degree=None, coefficients=None): - r"""Return a penalty matrix given a differential operator. - - The differential operator can be either a derivative of a certain - degree or a more complex operator. - - The penalty matrix is defined as [RS05-5-6-2-4]_: - - .. math:: - R_{ij} = \int L\phi_i(s) L\phi_j(s) ds - - where :math:`\phi_i(s)` for :math:`i=1, 2, ..., n` are the basis - functions and :math:`L` is a differential operator. - - Args: - derivative_degree (int): Integer indicating the order of the - derivative or . For instance 2 means that the differential - operator is :math:`f''(x)`. - coefficients (list): List of coefficients representing a - differential operator. An iterable indicating - coefficients of derivatives (which can be functions). For - instance the tuple (1, 0, numpy.sin) means :math:`1 - + sin(x)D^{2}`. Only used if derivative degree is None. - - Returns: - numpy.array: Penalty matrix. - - References: - .. [RS05-5-6-2-1] Ramsay, J., Silverman, B. W. (2005). Specifying - the roughness penalty. In *Functional Data Analysis* - (pp. 106-107). Springer. - - """ - if isinstance(derivative_degree, int): - omega = 2 * np.pi / self.period - # the derivatives of the functions of the basis are also orthogonal - # so only the diagonal is different from 0. - penalty_matrix = np.zeros(self.n_basis) - if derivative_degree == 0: - penalty_matrix[0] = 1 - else: - # the derivative of a constant is 0 - # the first basis function is a constant - penalty_matrix[0] = 0 - index_even = np.array(range(2, self.n_basis, 2)) - exponents = index_even / 2 - # factor resulting of deriving the basis function the times - # indcated in the derivative_degree - factor = (exponents * omega) ** (2 * derivative_degree) - # the norm of the basis functions is 1 so only the result of the - # integral is just the factor - penalty_matrix[index_even - 1] = factor - penalty_matrix[index_even] = factor - return np.diag(penalty_matrix) - else: - # implement using inner product - return self._numerical_penalty(coefficients) - - def basis_of_product(self, other): - """Multiplication of two Fourier Basis""" - if not _same_domain(self.domain_range, other.domain_range): - raise ValueError("Ranges are not equal.") - - if isinstance(other, Fourier) and self.period == other.period: - return Fourier(self.domain_range, self.n_basis + other.n_basis - 1, - self.period) - else: - return other.rbasis_of_product(self) - - def rbasis_of_product(self, other): - """Multiplication of a Fourier Basis with other Basis""" - return Basis.default_basis_of_product(other, self) - - def rescale(self, domain_range=None, *, rescale_period=False): - r"""Return a copy of the basis with a new domain range, with the - corresponding values rescaled to the new bounds. - - Args: - domain_range (tuple, optional): Definition of the interval - where the basis defines a space. Defaults uses the same as - the original basis. - rescale_period (bool, optional): If true the period will be - rescaled using the ratio between the lengths of the new - and old interval. Defaults to False. - """ - - rescale_basis = super().rescale(domain_range) - - if rescale_period is False: - rescale_basis.period = self.period - else: - domain_rescaled = rescale_basis.domain_range[0] - domain = self.domain_range[0] - - rescale_basis.period = (self.period * - (domain_rescaled[1] - domain_rescaled[0]) / - (domain[1] - domain[0])) - - return rescale_basis - - def _to_R(self): - drange = self.domain_range[0] - return ("create.fourier.basis(rangeval = c(" + str(drange[0]) + "," + - str(drange[1]) + "), n_basis = " + str(self.n_basis) + - ", period = " + str(self.period) + ")") - - def __repr__(self): - """Representation of a Fourier basis.""" - return (f"{self.__class__.__name__}(domain_range={self.domain_range}, " - f"n_basis={self.n_basis}, period={self.period})") - - def __eq__(self, other): - """Equality of Basis""" - return super().__eq__(other) and self.period == other.period - - -class FDataBasis(FData): - r"""Basis representation of functional data. - - Class representation for functional data in the form of a set of basis - functions multplied by a set of coefficients. - - .. math:: - f(x) = \sum_{k=1}{K}c_k\phi_k - - Where n is the number of basis functions, :math:`c = (c_1, c_2, ..., - c_K)` the vector of coefficients and :math:`\phi = (\phi_1, \phi_2, - ..., \phi_K)` the basis function system. - - Attributes: - basis (:obj:`Basis`): Basis function system. - coefficients (numpy.darray): List or matrix of coefficients. Has to - have the same length or number of columns as the number of basis - function in the basis. If a matrix, each row contains the - coefficients that multiplied by the basis functions produce each - functional datum. - - Examples: - >>> basis = Monomial(n_basis=4) - >>> coefficients = [1, 1, 3, .5] - >>> FDataBasis(basis, coefficients) - FDataBasis( - basis=Monomial(domain_range=[array([0, 1])], n_basis=4), - coefficients=[[ 1. 1. 3. 0.5]], - ...) - - """ - class _CoordinateIterator: - """Internal class to iterate through the image coordinates. - - Dummy object. Should be change to support multidimensional objects. - - """ - - def __init__(self, fdatabasis): - """Create an iterator through the image coordinates.""" - self._fdatabasis = fdatabasis - - def __iter__(self): - """Return an iterator through the image coordinates.""" - yield self._fdatabasis.copy() - - def __getitem__(self, key): - """Get a specific coordinate.""" - - if key != 0: - return NotImplemented - - return self._fdatabasis.copy() - - def __len__(self): - """Return the number of coordinates.""" - return self._fdatabasis.dim_codomain - - def __init__(self, basis, coefficients, *, dataset_label=None, - axes_labels=None, extrapolation=None, keepdims=False): - """Construct a FDataBasis object. - - Args: - basis (:obj:`Basis`): Basis function system. - coefficients (array_like): List or matrix of coefficients. Has to - have the same length or number of columns as the number of - basis function in the basis. - """ - coefficients = np.atleast_2d(coefficients) - if coefficients.shape[1] != basis.n_basis: - raise ValueError("The length or number of columns of coefficients " - "has to be the same equal to the number of " - "elements of the basis.") - self.basis = basis - self.coefficients = coefficients - - super().__init__(extrapolation, dataset_label, axes_labels, keepdims) - - @classmethod - def from_data(cls, data_matrix, sample_points, basis, - method='cholesky', keepdims=False): - r"""Transform raw data to a smooth functional form. - - Takes functional data in a discrete form and makes an approximates it - to the closest function that can be generated by the basis. This - function does not attempt to smooth the original data. If smoothing - is desired, it is better to use :class:`BasisSmoother`. - - The fit is made so as to reduce the sum of squared errors - [RS05-5-2-5]_: - - .. math:: - - SSE(c) = (y - \Phi c)' (y - \Phi c) - - where :math:`y` is the vector or matrix of observations, :math:`\Phi` - the matrix whose columns are the basis functions evaluated at the - sampling points and :math:`c` the coefficient vector or matrix to be - estimated. - - By deriving the first formula we obtain the closed formed of the - estimated coefficients matrix: - - .. math:: - - \hat{c} = \left( \Phi' \Phi \right)^{-1} \Phi' y - - The solution of this matrix equation is done using the cholesky - method for the resolution of a LS problem. If this method throughs a - rounding error warning you may want to use the QR factorisation that - is more numerically stable despite being more expensive to compute. - [RS05-5-2-7]_ - - Args: - data_matrix (array_like): List or matrix containing the - observations. If a matrix each row represents a single - functional datum and the columns the different observations. - sample_points (array_like): Values of the domain where the previous - data were taken. - basis: (Basis): Basis used. - method (str): Algorithm used for calculating the coefficients using - the least squares method. The values admitted are 'cholesky' - and 'qr' for Cholesky and QR factorisation methods - respectively. - - Returns: - FDataBasis: Represention of the data in a functional form as - product of coefficients by basis functions. - - Examples: - >>> import numpy as np - >>> t = np.linspace(0, 1, 5) - >>> x = np.sin(2 * np.pi * t) + np.cos(2 * np.pi * t) - >>> x - array([ 1., 1., -1., -1., 1.]) - - >>> basis = Fourier((0, 1), n_basis=3) - >>> fd = FDataBasis.from_data(x, t, basis) - >>> fd.coefficients.round(2) - array([[ 0. , 0.71, 0.71]]) - - References: - .. [RS05-5-2-5] Ramsay, J., Silverman, B. W. (2005). How spline - smooths are computed. In *Functional Data Analysis* - (pp. 86-87). Springer. - - .. [RS05-5-2-7] Ramsay, J., Silverman, B. W. (2005). HSpline - smoothing as an augmented least squares problem. In *Functional - Data Analysis* (pp. 86-87). Springer. - - """ - from ..preprocessing.smoothing import BasisSmoother - from .grid import FDataGrid - - # n is the samples - # m is the observations - # k is the number of elements of the basis - - # Each sample in a column (m x n) - data_matrix = np.atleast_2d(data_matrix) - - fd = FDataGrid(data_matrix=data_matrix, sample_points=sample_points) - - smoother = BasisSmoother( - basis=basis, - method=method, - return_basis=True) - - return smoother.fit_transform(fd) - - @property - def n_samples(self): - """Return number of samples.""" - return self.coefficients.shape[0] - - @property - def dim_domain(self): - """Return number of dimensions of the domain.""" - - # Only domain dimension equal to 1 is supported - return 1 - - @property - def dim_codomain(self): - """Return number of dimensions of the image.""" - - # Only image dimension equal to 1 is supported - return 1 - - @property - def coordinates(self): - r"""Return a component of the FDataBasis. - - If the functional object contains samples - :math:`f: \mathbb{R}^n \rightarrow \mathbb{R}^d`, this object allows - a component of the vector :math:`f = (f_1, ..., f_d)`. - - - Todo: - By the moment, only unidimensional objects are supported in basis - form. - - """ - - return FDataBasis._CoordinateIterator(self) - - @property - def n_basis(self): - """Return number of basis.""" - return self.basis.n_basis - - @property - def domain_range(self): - """Definition range.""" - return self.basis.domain_range - - def _evaluate(self, eval_points, *, derivative=0): - """"Evaluate the object or its derivatives at a list of values. - - Args: - eval_points (array_like): List of points where the functions are - evaluated. If a matrix of shape `n_samples` x eval_points is - given each sample is evaluated at the values in the - corresponding row. - derivative (int, optional): Order of the derivative. Defaults to 0. - - - Returns: - (numpy.darray): Matrix whose rows are the values of the each - function at the values specified in eval_points. - - """ - #  Only suported 1D objects - eval_points = eval_points[:, 0] - - # each row contains the values of one element of the basis - basis_values = self.basis.evaluate(eval_points, derivative) - - res = np.tensordot(self.coefficients, basis_values, axes=(1, 0)) - - return res.reshape((self.n_samples, len(eval_points), 1)) - - def _evaluate_composed(self, eval_points, *, derivative=0): - r"""Evaluate the object or its derivatives at a list of values with a - different time for each sample. - - Returns a numpy array with the component (i,j) equal to :math:`f_i(t_j - + \delta_i)`. - - This method has to evaluate the basis values once per sample - instead of reuse the same evaluation for all the samples - as :func:`evaluate`. - - Args: - eval_points (numpy.ndarray): Matrix of size `n_samples`x n_points - derivative (int, optional): Order of the derivative. Defaults to 0. - extrapolation (str or Extrapolation, optional): Controls the - extrapolation mode for elements outside the domain range. - By default uses the method defined in fd. See extrapolation to - more information. - Returns: - (numpy.darray): Matrix whose rows are the values of the each - function at the values specified in eval_points with the - corresponding shift. - """ - - eval_points = eval_points[..., 0] - - res_matrix = np.empty((self.n_samples, eval_points.shape[1])) - - _matrix = np.empty((eval_points.shape[1], self.n_basis)) - - for i in range(self.n_samples): - basis_values = self.basis.evaluate(eval_points[i], derivative).T - - np.multiply(basis_values, self.coefficients[i], out=_matrix) - np.sum(_matrix, axis=1, out=res_matrix[i]) - - return res_matrix.reshape((self.n_samples, eval_points.shape[1], 1)) - - def shift(self, shifts, *, restrict_domain=False, extrapolation=None, - eval_points=None, **kwargs): - r"""Perform a shift of the curves. - - Args: - shifts (array_like or numeric): List with the the shift - corresponding for each sample or numeric with the shift to - apply to all samples. - restrict_domain (bool, optional): If True restricts the domain to - avoid evaluate points outside the domain using extrapolation. - Defaults uses extrapolation. - extrapolation (str or Extrapolation, optional): Controls the - extrapolation mode for elements outside the domain range. - By default uses the method defined in fd. See extrapolation to - more information. - eval_points (array_like, optional): Set of points where - the functions are evaluated to obtain the discrete - representation of the object to operate. If an empty list is - passed it calls numpy.linspace with bounds equal to the ones - defined in fd.domain_range and the number of points the maximum - between 201 and 10 times the number of basis plus 1. - **kwargs: Keyword arguments to be passed to :meth:`from_data`. - - Returns: - :obj:`FDataBasis` with the shifted data. - """ - - if self.dim_codomain > 1 or self.dim_domain > 1: - raise ValueError - - domain_range = self.domain_range[0] - - if eval_points is None: # Grid to discretize the function - nfine = max(self.n_basis * 10 + 1, constants.N_POINTS_COARSE_MESH) - eval_points = np.linspace(*domain_range, nfine) - else: - eval_points = np.asarray(eval_points) - - if np.isscalar(shifts): # Special case, all curves with same shift - - _basis = self.basis.rescale((domain_range[0] + shifts, - domain_range[1] + shifts)) - - return FDataBasis.from_data(self.evaluate(eval_points, - keepdims=False), - eval_points + shifts, - _basis, **kwargs) - - elif len(shifts) != self.n_samples: - raise ValueError(f"shifts vector ({len(shifts)}) must have the " - f"same length than the number of samples " - f"({self.n_samples})") - - if restrict_domain: - a = domain_range[0] - min(np.min(shifts), 0) - b = domain_range[1] - max(np.max(shifts), 0) - domain = (a, b) - eval_points = eval_points[ - np.logical_and(eval_points >= a, - eval_points <= b)] - else: - domain = domain_range - - points_shifted = np.outer(np.ones(self.n_samples), - eval_points) - - points_shifted += np.atleast_2d(shifts).T - - # Matrix of shifted values - _data_matrix = self.evaluate(points_shifted, - aligned_evaluation=False, - extrapolation=extrapolation, - keepdims=False) - - _basis = self.basis.rescale(domain) - - return FDataBasis.from_data(_data_matrix, eval_points, - _basis, **kwargs) - - def derivative(self, order=1): - r"""Differentiate a FDataBasis object. - - - Args: - order (int, optional): Order of the derivative. Defaults to one. - """ - - if order < 0: - raise ValueError("order only takes non-negative integer values.") - - if order == 0: - return self.copy() - - basis, coefficients = self.basis._derivative(self.coefficients, order) - - return FDataBasis(basis, coefficients) - - def mean(self, weights=None): - """Compute the mean of all the samples in a FDataBasis object. - - Returns: - :obj:`FDataBasis`: A FDataBais object with just one sample - representing the mean of all the samples in the original - FDataBasis object. - - Examples: - >>> basis = Monomial(n_basis=4) - >>> coefficients = [[0.5, 1, 2, .5], [1.5, 1, 4, .5]] - >>> FDataBasis(basis, coefficients).mean() - FDataBasis( - basis=Monomial(domain_range=[array([0, 1])], n_basis=4), - coefficients=[[ 1. 1. 3. 0.5]], - ...) - - """ - - if weights is not None: - return self.copy(coefficients=np.average(self.coefficients, - weights=weights, - axis=0 - )[np.newaxis, ...] - ) - - return self.copy(coefficients=np.mean(self.coefficients, axis=0)) - - def gmean(self, eval_points=None): - """Compute the geometric mean of the functional data object. - - A numerical approach its used. The object its transformed into its - discrete representation and then the geometric mean is computed and - then the object is taken back to the basis representation. - - Args: - eval_points (array_like, optional): Set of points where the - functions are evaluated to obtain the discrete - representation of the object. If none are passed it calls - numpy.linspace with bounds equal to the ones defined in - self.domain_range and the number of points the maximum - between 501 and 10 times the number of basis. - - Returns: - FDataBasis: Geometric mean of the original object. - - """ - return self.to_grid(eval_points).gmean().to_basis(self.basis) - - def var(self, eval_points=None): - """Compute the variance of the functional data object. - - A numerical approach its used. The object its transformed into its - discrete representation and then the variance is computed and - then the object is taken back to the basis representation. - - Args: - eval_points (array_like, optional): Set of points where the - functions are evaluated to obtain the discrete - representation of the object. If none are passed it calls - numpy.linspace with bounds equal to the ones defined in - self.domain_range and the number of points the maximum - between 501 and 10 times the number of basis. - - Returns: - FDataBasis: Variance of the original object. - - """ - return self.to_grid(eval_points).var().to_basis(self.basis) - - def cov(self, eval_points=None): - """Compute the covariance of the functional data object. - - A numerical approach its used. The object its transformed into its - discrete representation and then the covariance matrix is computed. - - Args: - eval_points (array_like, optional): Set of points where the - functions are evaluated to obtain the discrete - representation of the object. If none are passed it calls - numpy.linspace with bounds equal to the ones defined in - self.domain_range and the number of points the maximum - between 501 and 10 times the number of basis. - - Returns: - numpy.darray: Matrix of covariances. - - """ - return self.to_grid(eval_points).cov() - - def to_grid(self, eval_points=None): - """Return the discrete representation of the object. - - Args: - eval_points (array_like, optional): Set of points where the - functions are evaluated. If none are passed it calls - numpy.linspace with bounds equal to the ones defined in - self.domain_range and the number of points the maximum - between 501 and 10 times the number of basis. - - Returns: - FDataGrid: Discrete representation of the functional data - object. - - Examples: - >>> fd = FDataBasis(coefficients=[[1, 1, 1], [1, 0, 1]], - ... basis=Monomial((0,5), n_basis=3)) - >>> fd.to_grid([0, 1, 2]) - FDataGrid( - array([[[ 1.], - [ 3.], - [ 7.]], - - [[ 1.], - [ 2.], - [ 5.]]]), - sample_points=[array([0, 1, 2])], - domain_range=array([[0, 5]]), - ...) - - """ - - if self.dim_codomain > 1 or self.dim_domain > 1: - raise NotImplementedError - - if eval_points is None: - npoints = max(constants.N_POINTS_FINE_MESH, - constants.BASIS_MIN_FACTOR * self.n_basis) - eval_points = np.linspace(*self.domain_range[0], npoints) - - return grid.FDataGrid(self.evaluate(eval_points, keepdims=False), - sample_points=eval_points, - domain_range=self.domain_range, - keepdims=self.keepdims) - - def to_basis(self, basis, eval_points=None, **kwargs): - """Return the basis representation of the object. - - Args: - basis(Basis): basis object in which the functional data are - going to be represented. - **kwargs: keyword arguments to be passed to - FDataBasis.from_data(). - - Returns: - FDataBasis: Basis representation of the funtional data - object. - """ - - return self.to_grid(eval_points=eval_points).to_basis(basis, **kwargs) - - def to_list(self): - """Splits FDataBasis samples into a list""" - return [self[i] for i in range(self.n_samples)] - - def copy(self, *, basis=None, coefficients=None, dataset_label=None, - axes_labels=None, extrapolation=None, keepdims=None): - """FDataBasis copy""" - - if basis is None: - basis = copy.deepcopy(self.basis) - - if coefficients is None: - coefficients = self.coefficients - - if dataset_label is None: - dataset_label = copy.deepcopy(dataset_label) - - if axes_labels is None: - axes_labels = copy.deepcopy(axes_labels) - - if extrapolation is None: - extrapolation = self.extrapolation - - if keepdims is None: - keepdims = self.keepdims - - return FDataBasis(basis, coefficients, dataset_label=dataset_label, - axes_labels=axes_labels, extrapolation=extrapolation, - keepdims=keepdims) - - def times(self, other): - """"Provides a numerical approximation of the multiplication between - an FDataObject to other object - - Args: - other (int, list, FDataBasis): Object to multiply with the - FDataBasis object. - - * int: Multiplies all samples with the value - * list: multiply each values with the samples respectively. - Length should match with FDataBasis samples - * FDataBasis: if there is one sample it multiplies this with - all the samples in the object. If not, it multiplies each - sample respectively. Samples should match - - Returns: - (FDataBasis): FDataBasis object containing the multiplication - - """ - if isinstance(other, FDataBasis): - - if not _same_domain(self.domain_range, other.domain_range): - raise ValueError("The functions domains are different.") - - basisobj = self.basis.basis_of_product(other.basis) - neval = max(constants.BASIS_MIN_FACTOR * - max(self.n_basis, other.n_basis) + 1, - constants.N_POINTS_COARSE_MESH) - (left, right) = self.domain_range[0] - evalarg = np.linspace(left, right, neval) - - first = self.copy(coefficients=(np.repeat(self.coefficients, - other.n_samples, axis=0) - if (self.n_samples == 1 and - other.n_samples > 1) - else self.coefficients.copy())) - second = other.copy(coefficients=(np.repeat(other.coefficients, - self.n_samples, axis=0) - if (other.n_samples == 1 and - self.n_samples > 1) - else other.coefficients.copy())) - - fdarray = first.evaluate(evalarg) * second.evaluate(evalarg) - - return FDataBasis.from_data(fdarray, evalarg, basisobj) - - if isinstance(other, int): - other = [other for _ in range(self.n_samples)] - - coefs = np.transpose(np.atleast_2d(other)) - return self.copy(coefficients=self.coefficients * coefs) - - def inner_product(self, other, lfd_self=None, lfd_other=None, - weights=None): - r"""Return an inner product matrix given a FDataBasis object. - - The inner product of two functions is defined as - - .. math:: - = \int_a^b x(t)y(t) dt - - When we talk abaout FDataBasis objects, they have many samples, so we - talk about inner product matrix instead. So, for two FDataBasis objects - we define the inner product matrix as - - .. math:: - a_{ij} = = \int_a^b x_i(s) y_j(s) ds - - where :math:`f_i(s), g_j(s)` are the :math:`i^{th} j^{th}` sample of - each object. The return matrix has a shape of :math:`IxJ` where I and - J are the number of samples of each object respectively. - - Args: - other (FDataBasis, Basis): FDataBasis object containing the second - object to make the inner product - - lfd_self (Lfd): LinearDifferentialOperator object for the first - function evaluation - - lfd_other (Lfd): LinearDifferentialOperator object for the second - function evaluation - - weights(FDataBasis): a FDataBasis object with only one sample that - defines the weight to calculate the inner product - - Returns: - numpy.array: Inner Product matrix. - - """ - from ..misc import LinearDifferentialOperator - - if not _same_domain(self.domain_range, other.domain_range): - raise ValueError("Both Objects should have the same domain_range") - if isinstance(other, Basis): - other = other.to_basis() - - # TODO this will be used when lfd evaluation is ready - lfd_self = (LinearDifferentialOperator(0) if lfd_self is None - else lfd_self) - lfd_other = (LinearDifferentialOperator(0) if (lfd_other is None) - else lfd_other) - - if weights is not None: - other = other.times(weights) - - if self.n_samples * other.n_samples > self.n_basis * other.n_basis: - return (self.coefficients @ - self.basis._inner_matrix(other.basis) @ - other.coefficients.T) - else: - return self._inner_product_integrate(other, lfd_self, lfd_other) - - def _inner_product_integrate(self, other, lfd_self, lfd_other): - - matrix = np.empty((self.n_samples, other.n_samples)) - (left, right) = self.domain_range[0] - - for i in range(self.n_samples): - for j in range(other.n_samples): - fd = self[i].times(other[j]) - matrix[i, j] = scipy.integrate.quad( - lambda x: fd.evaluate([x])[0], left, right)[0] - - return matrix - - def _to_R(self): - """Gives the code to build the object on fda package on R""" - return ("fd(coef = " + self._array_to_R(self.coefficients, True) + - ", basisobj = " + self.basis._to_R() + ")") - - def _array_to_R(self, coefficients, transpose=False): - if len(coefficients.shape) == 1: - coefficients = coefficients.reshape((1, coefficients.shape[0])) - - if len(coefficients.shape) > 2: - return NotImplementedError - - if transpose is True: - coefficients = np.transpose(coefficients) - - (rows, cols) = coefficients.shape - retstring = "matrix(c(" - for j in range(cols): - for i in range(rows): - retstring = retstring + str(coefficients[i, j]) + ", " - - return (retstring[0:len(retstring) - 2] + "), nrow = " + str(rows) + - ", ncol = " + str(cols) + ")") - - def __repr__(self): - """Representation of FDataBasis object.""" - if self.axes_labels is None: - axes_labels = None - else: - axes_labels = self.axes_labels.tolist() - - return (f"{self.__class__.__name__}(" - f"\nbasis={self.basis}," - f"\ncoefficients={self.coefficients}," - f"\ndataset_label={self.dataset_label}," - f"\naxes_labels={axes_labels}," - f"\nextrapolation={self.extrapolation}," - f"\nkeepdims={self.keepdims})").replace('\n', '\n ') - - def __str__(self): - """Return str(self).""" - - return (f"{self.__class__.__name__}(" - f"\n_basis={self.basis}," - f"\ncoefficients={self.coefficients})").replace('\n', '\n ') - - def __eq__(self, other): - """Equality of FDataBasis""" - # TODO check all other params - return (self.basis == other.basis and - np.all(self.coefficients == other.coefficients)) - - def concatenate(self, *others, as_coordinates=False): - """Join samples from a similar FDataBasis object. - - Joins samples from another FDataBasis object if they have the same - basis. - - Args: - others (:class:`FDataBasis`): Objects to be concatenated. - as_coordinates (boolean, optional): If False concatenates as - new samples, else, concatenates the other functions as - new components of the image. Defaults to False. - - Returns: - :class:`FDataBasis`: FDataBasis object with the samples from the - original objects. - - Todo: - By the moment, only unidimensional objects are supported in basis - representation. - """ - - # TODO: Change to support multivariate functions - # in basis representation - if as_coordinates: - return NotImplemented - - for other in others: - if other.basis != self.basis: - raise ValueError("The objects should have the same basis.") - - data = [self.coefficients] + [other.coefficients for other in others] - - return self.copy(coefficients=np.concatenate(data, axis=0)) - - def compose(self, fd, *, eval_points=None, **kwargs): - """Composition of functions. - - Performs the composition of functions. The basis is discretized to - compute the composition. - - Args: - fd (:class:`FData`): FData object to make the composition. Should - have the same number of samples and image dimension equal to 1. - eval_points (array_like): Points to perform the evaluation. - kwargs: Named arguments to be passed to :func:`from_data`. - """ - - grid = self.to_grid().compose(fd, eval_points=eval_points) - - if fd.dim_domain == 1: - basis = self.basis.rescale(fd.domain_range[0]) - composition = grid.to_basis(basis, **kwargs) - else: - #  Cant be convertered to basis due to the dimensions - composition = grid - - return composition - - def __getitem__(self, key): - """Return self[key].""" - - if isinstance(key, int): - return self.copy(coefficients=self.coefficients[key:key + 1]) - else: - return self.copy(coefficients=self.coefficients[key]) - - def __add__(self, other): - """Addition for FDataBasis object.""" - if isinstance(other, FDataBasis): - if self.basis != other.basis: - raise NotImplementedError - else: - basis, coefs = self.basis._add_same_basis(self.coefficients, - other.coefficients) - else: - try: - basis, coefs = self.basis._add_constant(self.coefficients, - other) - except TypeError: - return NotImplemented - - return self.copy(basis=basis, coefficients=coefs) - - def __radd__(self, other): - """Addition for FDataBasis object.""" - - return self.__add__(other) - - def __sub__(self, other): - """Subtraction for FDataBasis object.""" - if isinstance(other, FDataBasis): - if self.basis != other.basis: - raise NotImplementedError - else: - basis, coefs = self.basis._sub_same_basis(self.coefficients, - other.coefficients) - else: - try: - basis, coefs = self.basis._sub_constant(self.coefficients, - other) - except TypeError: - return NotImplemented - - return self.copy(basis=basis, coefficients=coefs) - - def __rsub__(self, other): - """Right subtraction for FDataBasis object.""" - return (self * -1).__add__(other) - - def __mul__(self, other): - """Multiplication for FDataBasis object.""" - if isinstance(other, FDataBasis): - raise NotImplementedError - - try: - basis, coefs = self.basis._mul_constant(self.coefficients, other) - except TypeError: - return NotImplemented - - return self.copy(basis=basis, coefficients=coefs) - - def __rmul__(self, other): - """Multiplication for FDataBasis object.""" - return self.__mul__(other) - - def __truediv__(self, other): - """Division for FDataBasis object.""" - - other = np.array(other) - - try: - other = 1 / other - except TypeError: - return NotImplemented - - return self * other - - def __rtruediv__(self, other): - """Right division for FDataBasis object.""" - - raise NotImplementedError - - ##################################################################### - # Pandas ExtensionArray methods - ##################################################################### - @property - def dtype(self): - """The dtype for this extension array, FDataGridDType""" - return FDataBasisDType - - @property - def nbytes(self) -> int: - """ - The number of bytes needed to store this object in memory. - """ - return self.coefficients.nbytes() - - -class FDataBasisDType(pandas.api.extensions.ExtensionDtype): - """ - DType corresponding to FDataBasis in Pandas - """ - name = 'functional data (basis)' - kind = 'O' - type = FDataBasis - na_value = None - - @classmethod - def construct_from_string(cls, string): - if string == cls.name: - return cls() - else: - raise TypeError("Cannot construct a '{}' from " - "'{}'".format(cls, string)) - - @classmethod - def construct_array_type(cls): - return FDataBasis diff --git a/skfda/representation/basis/__init__.py b/skfda/representation/basis/__init__.py new file mode 100644 index 000000000..7b2fa39e7 --- /dev/null +++ b/skfda/representation/basis/__init__.py @@ -0,0 +1,9 @@ +from ._basis import Basis +from ._bspline import BSpline +from ._coefficients_transformer import CoefficientsTransformer +from ._constant import Constant +from ._fdatabasis import FDataBasis, FDataBasisDType +from ._fourier import Fourier +from ._monomial import Monomial +from ._tensor_basis import Tensor +from ._vector_basis import VectorValued diff --git a/skfda/representation/basis/_basis.py b/skfda/representation/basis/_basis.py new file mode 100644 index 000000000..4bc3e3ed1 --- /dev/null +++ b/skfda/representation/basis/_basis.py @@ -0,0 +1,358 @@ +"""Module for functional data manipulation in a basis system. + +Defines functional data object in a basis function system representation and +the corresponding basis classes. + +""" +from abc import ABC, abstractmethod +import copy +import warnings + +import numpy as np + +from ..._utils import (_list_of_arrays, _same_domain, + _reshape_eval_points, _evaluate_grid) + + +__author__ = "Miguel Carbajo Berrocal" +__email__ = "miguel.carbajo@estudiante.uam.es" + +# aux functions + + +def _check_domain(domain_range): + for domain in domain_range: + if len(domain) != 2 or domain[0] >= domain[1]: + raise ValueError(f"The interval {domain} is not well-defined.") + + +class Basis(ABC): + """Defines the structure of a basis function system. + + Attributes: + domain_range (tuple): a tuple of length 2 containing the initial and + end values of the interval over which the basis can be evaluated. + n_basis (int): number of functions in the basis. + + """ + + def __init__(self, domain_range=None, n_basis=1): + """Basis constructor. + + Args: + domain_range (tuple or list of tuples, optional): Definition of the + interval where the basis defines a space. Defaults to (0,1). + n_basis: Number of functions that form the basis. Defaults to 1. + """ + + if domain_range is not None: + # TODO: Allow multiple dimensions + domain_range = _list_of_arrays(domain_range) + + # Some checks + _check_domain(domain_range) + + if n_basis < 1: + raise ValueError("The number of basis has to be strictly " + "possitive.") + + self._domain_range = domain_range + self.n_basis = n_basis + + super().__init__() + + @property + def dim_domain(self): + return 1 + + @property + def dim_codomain(self): + return 1 + + @property + def domain_range(self): + if self._domain_range is None: + return [np.array([0, 1])] + else: + return self._domain_range + + @domain_range.setter + def domain_range(self, value): + self._domain_range = value + + @abstractmethod + def _evaluate(self, eval_points): + """Subclasses must override this to provide basis evaluation.""" + pass + + def evaluate(self, eval_points, *, derivative=0): + """Evaluate Basis objects and its derivatives. + + Evaluates the basis function system or its derivatives at a list of + given values. + + Args: + eval_points (array_like): List of points where the basis is + evaluated. + + Returns: + (numpy.darray): Matrix whose rows are the values of the each + basis function or its derivatives at the values specified in + eval_points. + + """ + if derivative < 0: + raise ValueError("derivative only takes non-negative values.") + elif derivative != 0: + warnings.warn("Parameter derivative is deprecated. Use the " + "derivative function instead.", DeprecationWarning) + return self.derivative(order=derivative)(eval_points) + + eval_points = _reshape_eval_points(eval_points, + aligned=True, + n_samples=self.n_basis, + dim_domain=self.dim_domain) + + return self._evaluate(eval_points).reshape( + (self.n_basis, len(eval_points), self.dim_codomain)) + + def __call__(self, *args, **kwargs): + return self.evaluate(*args, **kwargs) + + def __len__(self): + return self.n_basis + + def derivative(self, *, order=1): + """Construct a FDataBasis object containing the derivative. + + Args: + order (int, optional): Order of the derivative. Defaults to 1. + + Returns: + (FDataBasis): Derivative object. + + """ + + return self.to_basis().derivative(order=order) + + def _derivative_basis_and_coefs(self, coefs, order=1): + """ + Subclasses can override this to provide derivative construction. + + A basis can provide derivative evaluation at given points + without providing a basis representation for its derivatives, + although is recommended to provide both if possible. + + """ + raise NotImplementedError(f"{type(self)} basis does not support " + "the construction of a basis of the " + "derivatives.") + + def plot(self, chart=None, **kwargs): + """Plot the basis object or its derivatives. + + Args: + chart (figure object, axe or list of axes, optional): figure over + with the graphs are plotted or axis over where the graphs are + plotted. + **kwargs: keyword arguments to be passed to the + fdata.plot function. + + Returns: + fig (figure): figure object in which the graphs are plotted. + + """ + self.to_basis().plot(chart=chart, **kwargs) + + def _coordinate_nonfull(self, fdatabasis, key): + """ + Returns a fdatagrid for the coordinate functions indexed by key. + + Subclasses can override this to provide coordinate indexing. + + The key parameter has been already validated and is an integer or + slice in the range [0, self.dim_codomain. + + """ + raise NotImplementedError("Coordinate indexing not implemented") + + def _coordinate(self, fdatabasis, key): + """Returns a fdatagrid for the coordinate functions indexed by key.""" + + # Raises error if not in range and normalize key + r_key = range(self.dim_codomain)[key] + + if isinstance(r_key, range) and len(r_key) == 0: + raise IndexError("Empty number of coordinates selected") + + # Full fdatabasis case + if (self.dim_codomain == 1 and r_key == 0) or ( + isinstance(r_key, range) and len(r_key) == self.dim_codomain): + + return fdatabasis.copy() + + else: + + return self._coordinate_nonfull(fdatabasis=fdatabasis, key=r_key) + + @abstractmethod + def basis_of_product(self, other): + pass + + @abstractmethod + def rbasis_of_product(self, other): + pass + + @staticmethod + def default_basis_of_product(one, other): + """Default multiplication for a pair of basis""" + from ._bspline import BSpline + + if not _same_domain(one, other): + raise ValueError("Ranges are not equal.") + + norder = min(8, one.n_basis + other.n_basis) + n_basis = max(one.n_basis + other.n_basis, norder + 1) + return BSpline(one.domain_range, n_basis, norder) + + def rescale(self, domain_range=None): + r"""Return a copy of the basis with a new domain range, with the + corresponding values rescaled to the new bounds. + + Args: + domain_range (tuple, optional): Definition of the interval + where the basis defines a space. Defaults uses the same as + the original basis. + """ + + if domain_range is None: + domain_range = self.domain_range + + return type(self)(domain_range, self.n_basis) + + def copy(self): + """Basis copy""" + return copy.deepcopy(self) + + def to_basis(self): + from . import FDataBasis + return FDataBasis(self.copy(), np.identity(self.n_basis)) + + def _list_to_R(self, knots): + retstring = "c(" + for i in range(0, len(knots)): + retstring = retstring + str(knots[i]) + ", " + return retstring[0:len(retstring) - 2] + ")" + + def _to_R(self): + raise NotImplementedError + + def inner_product_matrix(self, other=None): + r"""Return the Inner Product Matrix of a pair of basis. + + The Inner Product Matrix is defined as + + .. math:: + IP_{ij} = \langle\phi_i, \theta_j\rangle + + where :math:`\phi_i` is the ith element of the basi and + :math:`\theta_j` is the jth element of the second basis. + This matrix helps on the calculation of the inner product + between objects on two basis and for the change of basis. + + Args: + other (:class:`Basis`): Basis to compute the inner product + matrix. If not basis is given, it computes the matrix with + itself returning the Gram Matrix + + Returns: + numpy.array: Inner Product Matrix of two basis + + """ + from ...misc import inner_product_matrix + + if other is None or self == other: + return self.gram_matrix() + + return inner_product_matrix(self, other) + + def _gram_matrix_numerical(self): + """ + Compute the Gram matrix numerically. + + """ + from ...misc import inner_product_matrix + + return inner_product_matrix(self, force_numerical=True) + + def _gram_matrix(self): + """ + Compute the Gram matrix. + + Subclasses may override this method for improving computation + of the Gram matrix. + + """ + return self._gram_matrix_numerical() + + def gram_matrix(self): + r"""Return the Gram Matrix of a basis + + The Gram Matrix is defined as + + .. math:: + G_{ij} = \langle\phi_i, \phi_j\rangle + + where :math:`\phi_i` is the ith element of the basis. This is a + symmetric matrix and positive-semidefinite. + + Returns: + numpy.array: Gram Matrix of the basis. + + """ + + gram = getattr(self, "_gram_matrix_cached", None) + + if gram is None: + gram = self._gram_matrix() + self._gram_matrix_cached = gram + + return gram + + def _add_same_basis(self, coefs1, coefs2): + return self.copy(), coefs1 + coefs2 + + def _add_constant(self, coefs, constant): + coefs = coefs.copy() + constant = np.array(constant) + coefs[:, 0] = coefs[:, 0] + constant + + return self.copy(), coefs + + def _sub_same_basis(self, coefs1, coefs2): + return self.copy(), coefs1 - coefs2 + + def _sub_constant(self, coefs, other): + coefs = coefs.copy() + other = np.array(other) + coefs[:, 0] = coefs[:, 0] - other + + return self.copy(), coefs + + def _mul_constant(self, coefs, other): + coefs = coefs.copy() + other = np.atleast_2d(other).reshape(-1, 1) + coefs = coefs * other + + return self.copy(), coefs + + def __repr__(self): + """Representation of a Basis object.""" + return (f"{self.__class__.__name__}(domain_range={self.domain_range}, " + f"n_basis={self.n_basis})") + + def __eq__(self, other): + """Equality of Basis""" + return (type(self) == type(other) + and _same_domain(self, other) + and self.n_basis == other.n_basis) diff --git a/skfda/representation/basis/_bspline.py b/skfda/representation/basis/_bspline.py new file mode 100644 index 000000000..a85ce8da9 --- /dev/null +++ b/skfda/representation/basis/_bspline.py @@ -0,0 +1,405 @@ +from numpy import polyder, polyint, polymul, polyval +from scipy.interpolate import BSpline as SciBSpline +from scipy.interpolate import PPoly +import scipy.interpolate + +import numpy as np + +from ..._utils import _list_of_arrays +from ..._utils import _same_domain +from ._basis import Basis + + +class BSpline(Basis): + r"""BSpline basis. + + BSpline basis elements are defined recursively as: + + .. math:: + B_{i, 1}(x) = 1 \quad \text{if } t_i \le x < t_{i+1}, + \quad 0 \text{ otherwise} + + .. math:: + B_{i, k}(x) = \frac{x - t_i}{t_{i+k} - t_i} B_{i, k-1}(x) + + \frac{t_{i+k+1} - x}{t_{i+k+1} - t_{i+1}} B_{i+1, k-1}(x) + + Where k indicates the order of the spline. + + Implementation details: In order to allow a discontinuous behaviour at + the boundaries of the domain it is necessary to placing m knots at the + boundaries [RS05]_. This is automatically done so that the user only has to + specify a single knot at the boundaries. + + Attributes: + domain_range (tuple): A tuple of length 2 containing the initial and + end values of the interval over which the basis can be evaluated. + n_basis (int): Number of functions in the basis. + order (int): Order of the splines. One greather than their degree. + knots (list): List of knots of the spline functions. + + Examples: + Constructs specifying number of basis and order. + + >>> bss = BSpline(n_basis=8, order=4) + + If no order is specified defaults to 4 because cubic splines are + the most used. So the previous example is the same as: + + >>> bss = BSpline(n_basis=8) + + It is also possible to create a BSpline basis specifying the knots. + + >>> bss = BSpline(knots=[0, 0.2, 0.4, 0.6, 0.8, 1]) + + Once we create a basis we can evaluate each of its functions at a + set of points. + + >>> bss = BSpline(n_basis=3, order=3) + >>> bss([0, 0.5, 1]) + array([[[ 1. ], + [ 0.25], + [ 0. ]], + [[ 0. ], + [ 0.5 ], + [ 0. ]], + [[ 0. ], + [ 0.25], + [ 1. ]]]) + + And evaluates first derivative + + >>> deriv = bss.derivative() + >>> deriv([0, 0.5, 1]) + array([[[-2.], + [-1.], + [ 0.]], + [[ 2.], + [ 0.], + [-2.]], + [[ 0.], + [ 1.], + [ 2.]]]) + + References: + .. [RS05] Ramsay, J., Silverman, B. W. (2005). *Functional Data + Analysis*. Springer. 50-51. + + """ + + def __init__(self, domain_range=None, n_basis=None, order=4, knots=None): + """Bspline basis constructor. + + Args: + domain_range (tuple, optional): Definition of the interval where + the basis defines a space. Defaults to (0,1) if knots are not + specified. If knots are specified defaults to the first and + last element of the knots. + n_basis (int, optional): Number of splines that form the basis. + order (int, optional): Order of the splines. One greater that + their degree. Defaults to 4 which mean cubic splines. + knots (array_like): List of knots of the splines. If domain_range + is specified the first and last elements of the knots have to + match with it. + + """ + + if domain_range is not None: + domain_range = _list_of_arrays(domain_range) + + if len(domain_range) != 1: + raise ValueError("Domain range should be unidimensional.") + + domain_range = domain_range[0] + + # Knots default to equally space points in the domain_range + if knots is None: + if n_basis is None: + raise ValueError("Must provide either a list of knots or the" + "number of basis.") + else: + knots = list(knots) + knots.sort() + if domain_range is None: + domain_range = (knots[0], knots[-1]) + else: + if domain_range[0] != knots[0] or domain_range[1] != knots[-1]: + raise ValueError("The ends of the knots must be the same " + "as the domain_range.") + + # n_basis default to number of knots + order of the splines - 2 + if n_basis is None: + n_basis = len(knots) + order - 2 + + if (n_basis - order + 2) < 2: + raise ValueError(f"The number of basis ({n_basis}) minus the " + f"order of the bspline ({order}) should be " + f"greater than 3.") + + self.order = order + self.knots = None if knots is None else list(knots) + super().__init__(domain_range, n_basis) + + # Checks + if self.n_basis != self.order + len(self.knots) - 2: + raise ValueError(f"The number of basis ({self.n_basis}) has to " + f"equal the order ({self.order}) plus the " + f"number of knots ({len(self.knots)}) minus 2.") + + @property + def knots(self): + if self._knots is None: + return list(np.linspace(*self.domain_range[0], + self.n_basis - self.order + 2)) + else: + return self._knots + + @knots.setter + def knots(self, value): + self._knots = value + + def _evaluation_knots(self): + """ + Get the knots adding m knots to the boundary in order to allow a + discontinuous behaviour at the boundaries of the domain [RS05]_. + + References: + .. [RS05] Ramsay, J., Silverman, B. W. (2005). *Functional Data + Analysis*. Springer. 50-51. + """ + return np.array([self.knots[0]] * (self.order - 1) + self.knots + + [self.knots[-1]] * (self.order - 1)) + + def _evaluate(self, eval_points): + + # Input is scalar + eval_points = eval_points[..., 0] + + # Places m knots at the boundaries + knots = self._evaluation_knots() + + # c is used the select which spline the function splev below computes + c = np.zeros(len(knots)) + + # Initialise empty matrix + mat = np.empty((self.n_basis, len(eval_points))) + + # For each basis computes its value for each evaluation point + for i in range(self.n_basis): + # write a 1 in c in the position of the spline calculated in each + # iteration + c[i] = 1 + # compute the spline + mat[i] = scipy.interpolate.splev(eval_points, + (knots, c, self.order - 1)) + c[i] = 0 + + return mat + + def _derivative_basis_and_coefs(self, coefs, order=1): + if order >= self.order: + return ( + BSpline(n_basis=1, domain_range=self.domain_range, order=1), + np.zeros((len(coefs), 1))) + + deriv_splines = [self._to_scipy_BSpline(coefs[i]).derivative(order) + for i in range(coefs.shape[0])] + + deriv_coefs = [BSpline._from_scipy_BSpline(spline)[1] + for spline in deriv_splines] + + deriv_basis = BSpline._from_scipy_BSpline(deriv_splines[0])[0] + + return deriv_basis, np.array(deriv_coefs)[:, 0:deriv_basis.n_basis] + + def rescale(self, domain_range=None): + r"""Return a copy of the basis with a new domain range, with the + corresponding values rescaled to the new bounds. + The knots of the BSpline will be rescaled in the new interval. + + Args: + domain_range (tuple, optional): Definition of the interval + where the basis defines a space. Defaults uses the same as + the original basis. + """ + + knots = np.array(self.knots, dtype=np.dtype('float')) + + if domain_range is not None: # Rescales the knots + knots -= knots[0] + knots *= ((domain_range[1] - domain_range[0] + ) / (self.knots[-1] - self.knots[0])) + knots += domain_range[0] + + # Fix possible round error + knots[0] = domain_range[0] + knots[-1] = domain_range[1] + + else: + # TODO: Allow multiple dimensions + domain_range = self.domain_range[0] + + return BSpline(domain_range, self.n_basis, self.order, knots) + + def __repr__(self): + """Representation of a BSpline basis.""" + return (f"{self.__class__.__name__}(domain_range={self.domain_range}, " + f"n_basis={self.n_basis}, order={self.order}, " + f"knots={self.knots})") + + def __eq__(self, other): + """Equality of Basis""" + return (super().__eq__(other) + and self.order == other.order + and self.knots == other.knots) + + def _gram_matrix(self): + # Places m knots at the boundaries + knots = self._evaluation_knots() + + # c is used the select which spline the function + # PPoly.from_spline below computes + c = np.zeros(len(knots)) + + # Initialise empty list to store the piecewise polynomials + ppoly_lst = [] + + no_0_intervals = np.where(np.diff(knots) > 0)[0] + + # For each basis gets its piecewise polynomial representation + for i in range(self.n_basis): + + # Write a 1 in c in the position of the spline + # transformed in each iteration + c[i] = 1 + + # Gets the piecewise polynomial representation and gets + # only the positions for no zero length intervals + # This polynomial are defined relatively to the knots + # meaning that the column i corresponds to the ith knot. + # Let the ith knot be a + # Then f(x) = pp(x - a) + pp = PPoly.from_spline((knots, c, self.order - 1)) + pp_coefs = pp.c[:, no_0_intervals] + + # We have the coefficients for each interval in coordinates + # (x - a), so we will need to subtract a when computing the + # definite integral + ppoly_lst.append(pp_coefs) + c[i] = 0 + + # Now for each pair of basis computes the inner product after + # applying the linear differential operator + matrix = np.zeros((self.n_basis, self.n_basis)) + + for interval in range(len(no_0_intervals)): + for i in range(self.n_basis): + poly_i = np.trim_zeros(ppoly_lst[i][:, + interval], 'f') + # Indefinite integral + square = polymul(poly_i, poly_i) + integral = polyint(square) + + # Definite integral + matrix[i, i] += np.diff(polyval( + integral, self.knots[interval: interval + 2] + - self.knots[interval]))[0] + + # The Gram matrix is banded, so not all intervals are used + for j in range(i + 1, min(i + self.order, self.n_basis)): + poly_j = np.trim_zeros(ppoly_lst[j][:, interval], 'f') + + # Indefinite integral + integral = polyint(polymul(poly_i, poly_j)) + + # Definite integral + matrix[i, j] += np.diff(polyval( + integral, self.knots[interval: interval + 2] + - self.knots[interval]) + )[0] + + # The matrix is symmetric + matrix[j, i] = matrix[i, j] + + return matrix + + def basis_of_product(self, other): + from ._constant import Constant + + """Multiplication of two Bspline Basis""" + if not _same_domain(self, other): + raise ValueError("Ranges are not equal.") + + if isinstance(other, Constant): + return other.rbasis_of_product(self) + + if isinstance(other, BSpline): + uniqueknots = np.union1d(self.inknots, other.inknots) + + multunique = np.zeros(len(uniqueknots), dtype=np.int32) + for i in range(len(uniqueknots)): + mult1 = np.count_nonzero(self.inknots == uniqueknots[i]) + mult2 = np.count_nonzero(other.inknots == uniqueknots[i]) + multunique[i] = max(mult1, mult2) + + m2 = 0 + allknots = np.zeros(np.sum(multunique)) + for i in range(len(uniqueknots)): + m1 = m2 + m2 = m2 + multunique[i] + allknots[m1:m2] = uniqueknots[i] + + norder1 = self.n_basis - len(self.inknots) + norder2 = other.n_basis - len(other.inknots) + norder = min(norder1 + norder2 - 1, 20) + + allbreaks = ([self.domain_range[0][0]] + + np.ndarray.tolist(allknots) + + [self.domain_range[0][1]]) + n_basis = len(allbreaks) + norder - 2 + return BSpline(self.domain_range, n_basis, norder, allbreaks) + else: + norder = min(self.n_basis - len(self.inknots) + 2, 8) + n_basis = max(self.n_basis + other.n_basis, norder + 1) + return BSpline(self.domain_range, n_basis, norder) + + def rbasis_of_product(self, other): + """Multiplication of a Bspline Basis with other basis""" + + norder = min(self.n_basis - len(self.inknots) + 2, 8) + n_basis = max(self.n_basis + other.n_basis, norder + 1) + return BSpline(self.domain_range, n_basis, norder) + + def _to_R(self): + drange = self.domain_range[0] + return ("create.bspline.basis(rangeval = c(" + str(drange[0]) + "," + + str(drange[1]) + "), nbasis = " + str(self.n_basis) + + ", norder = " + str(self.order) + ", breaks = " + + self._list_to_R(self.knots) + ")") + + def _to_scipy_BSpline(self, coefs): + + knots = np.concatenate(( + np.repeat(self.knots[0], self.order - 1), + self.knots, + np.repeat(self.knots[-1], self.order - 1))) + + return SciBSpline(knots, coefs, self.order - 1) + + @staticmethod + def _from_scipy_BSpline(bspline): + order = bspline.k + knots = bspline.t + + # Remove additional knots at the borders + if order != 0: + knots = knots[order: -order] + + coefs = bspline.c + domain_range = [knots[0], knots[-1]] + + return BSpline(domain_range, order=order + 1, knots=knots), coefs + + @property + def inknots(self): + """Return number of basis.""" + return self.knots[1:len(self.knots) - 1] diff --git a/skfda/representation/basis/_coefficients_transformer.py b/skfda/representation/basis/_coefficients_transformer.py new file mode 100644 index 000000000..073c2eb63 --- /dev/null +++ b/skfda/representation/basis/_coefficients_transformer.py @@ -0,0 +1,44 @@ +from sklearn.base import BaseEstimator, TransformerMixin +from sklearn.utils.validation import check_is_fitted + +from ._fdatabasis import FDataBasis + + +class CoefficientsTransformer(BaseEstimator, TransformerMixin): + """ + Transformer returning the coefficients of FDataBasis objects as a matrix. + + Attributes: + shape_ (tuple): original shape of coefficients per sample. + + Examples: + >>> from skfda.representation.basis import (FDataBasis, Monomial, + ... CoefficientsTransformer) + >>> + >>> basis = Monomial(n_basis=4) + >>> coefficients = [[0.5, 1, 2, .5], [1.5, 1, 4, .5]] + >>> fd = FDataBasis(basis, coefficients) + >>> + >>> transformer = CoefficientsTransformer() + >>> transformer.fit_transform(fd) + array([[ 0.5, 1. , 2. , 0.5], + [ 1.5, 1. , 4. , 0.5]]) + + """ + + def fit(self, X: FDataBasis, y=None): + + self.shape_ = X.coefficients.shape[1:] + + return self + + def transform(self, X, y=None): + + check_is_fitted(self) + + assert X.coefficients.shape[1:] == self.shape_ + + coefficients = X.coefficients.copy() + coefficients = coefficients.reshape((X.n_samples, -1)) + + return coefficients diff --git a/skfda/representation/basis/_constant.py b/skfda/representation/basis/_constant.py new file mode 100644 index 000000000..fe139b826 --- /dev/null +++ b/skfda/representation/basis/_constant.py @@ -0,0 +1,58 @@ +import numpy as np +from ..._utils import _same_domain +from ._basis import Basis + + +class Constant(Basis): + """Constant basis. + + Basis for constant functions + + Attributes: + domain_range (tuple): a tuple of length 2 containing the initial and + end values of the interval over which the basis can be evaluated. + + Examples: + Defines a contant base over the interval :math:`[0, 5]` consisting + on the constant function 1 on :math:`[0, 5]`. + + >>> bs_cons = Constant((0,5)) + + """ + + def __init__(self, domain_range=None): + """Constant basis constructor. + + Args: + domain_range (tuple): Tuple defining the domain over which the + function is defined. + + """ + super().__init__(domain_range, 1) + + def _evaluate(self, eval_points): + return np.ones((1, len(eval_points))) + + def _derivative_basis_and_coefs(self, coefs, order=1): + return ((self.copy(), coefs.copy()) if order == 0 + else (self.copy(), np.zeros(coefs.shape))) + + def _gram_matrix(self): + return np.array([[self.domain_range[0][1] - + self.domain_range[0][0]]]) + + def basis_of_product(self, other): + """Multiplication of a Constant Basis with other Basis""" + if not _same_domain(self, other): + raise ValueError("Ranges are not equal.") + + return other.copy() + + def rbasis_of_product(self, other): + """Multiplication of a Constant Basis with other Basis""" + return other.copy() + + def _to_R(self): + drange = self.domain_range[0] + return "create.constant.basis(rangeval = c(" + str(drange[0]) + "," +\ + str(drange[1]) + "))" diff --git a/skfda/representation/basis/_fdatabasis.py b/skfda/representation/basis/_fdatabasis.py new file mode 100644 index 000000000..8a719a7db --- /dev/null +++ b/skfda/representation/basis/_fdatabasis.py @@ -0,0 +1,837 @@ +from builtins import isinstance +import copy + +import pandas.api.extensions + +import numpy as np + +from .. import grid +from ..._utils import constants +from .._functional_data import FData + + +def _same_domain(one_domain_range, other_domain_range): + return np.array_equal(one_domain_range, other_domain_range) + + +class FDataBasis(FData): + r"""Basis representation of functional data. + + Class representation for functional data in the form of a set of basis + functions multplied by a set of coefficients. + + .. math:: + f(x) = \sum_{k=1}{K}c_k\phi_k + + Where n is the number of basis functions, :math:`c = (c_1, c_2, ..., + c_K)` the vector of coefficients and :math:`\phi = (\phi_1, \phi_2, + ..., \phi_K)` the basis function system. + + Attributes: + basis (:obj:`Basis`): Basis function system. + coefficients (numpy.darray): List or matrix of coefficients. Has to + have the same length or number of columns as the number of basis + function in the basis. If a matrix, each row contains the + coefficients that multiplied by the basis functions produce each + functional datum. + domain_range (numpy.ndarray): 2 dimension matrix where each row + contains the bounds of the interval in which the functional data + is considered to exist for each one of the axies. + dataset_name (str): name of the dataset. + argument_names (tuple): tuple containing the names of the different + arguments. + coordinate_names (tuple): tuple containing the names of the different + coordinate functions. + extrapolation (str or Extrapolation): defines the default type of + extrapolation. By default None, which does not apply any type of + extrapolation. See `Extrapolation` for detailled information of the + types of extrapolation. + + Examples: + >>> from skfda.representation.basis import FDataBasis, Monomial + >>> + >>> basis = Monomial(n_basis=4) + >>> coefficients = [1, 1, 3, .5] + >>> FDataBasis(basis, coefficients) + FDataBasis( + basis=Monomial(domain_range=[array([0, 1])], n_basis=4), + coefficients=[[ 1. 1. 3. 0.5]], + ...) + + """ + class _CoordinateIterator: + """Internal class to iterate through the image coordinates. + + Dummy object. Should be change to support multidimensional objects. + + """ + + def __init__(self, fdatabasis): + """Create an iterator through the image coordinates.""" + self._fdatabasis = fdatabasis + + def __iter__(self): + """Return an iterator through the image coordinates.""" + + for i in range(len(self)): + yield self[i] + + def __getitem__(self, key): + """Get a specific coordinate.""" + + return self._fdatabasis.basis._coordinate(self._fdatabasis, key) + + def __len__(self): + """Return the number of coordinates.""" + return self._fdatabasis.dim_codomain + + def __init__(self, basis, coefficients, *, dataset_label=None, + dataset_name=None, + axes_labels=None, argument_names=None, + coordinate_names=None, extrapolation=None): + """Construct a FDataBasis object. + + Args: + basis (:obj:`Basis`): Basis function system. + coefficients (array_like): List or matrix of coefficients. Has to + have the same length or number of columns as the number of + basis function in the basis. + """ + coefficients = np.atleast_2d(coefficients) + if coefficients.shape[1] != basis.n_basis: + raise ValueError("The length or number of columns of coefficients " + "has to be the same equal to the number of " + "elements of the basis.") + self.basis = basis + self.coefficients = coefficients + + super().__init__(extrapolation=extrapolation, + dataset_label=dataset_label, + dataset_name=dataset_name, + axes_labels=axes_labels, + argument_names=argument_names, + coordinate_names=coordinate_names) + + @classmethod + def from_data(cls, data_matrix, sample_points, basis, + method='cholesky'): + r"""Transform raw data to a smooth functional form. + + Takes functional data in a discrete form and makes an approximates it + to the closest function that can be generated by the basis. This + function does not attempt to smooth the original data. If smoothing + is desired, it is better to use :class:`BasisSmoother`. + + The fit is made so as to reduce the sum of squared errors + [RS05-5-2-5]_: + + .. math:: + + SSE(c) = (y - \Phi c)' (y - \Phi c) + + where :math:`y` is the vector or matrix of observations, :math:`\Phi` + the matrix whose columns are the basis functions evaluated at the + sampling points and :math:`c` the coefficient vector or matrix to be + estimated. + + By deriving the first formula we obtain the closed formed of the + estimated coefficients matrix: + + .. math:: + + \hat{c} = \left( \Phi' \Phi \right)^{-1} \Phi' y + + The solution of this matrix equation is done using the cholesky + method for the resolution of a LS problem. If this method throughs a + rounding error warning you may want to use the QR factorisation that + is more numerically stable despite being more expensive to compute. + [RS05-5-2-7]_ + + Args: + data_matrix (array_like): List or matrix containing the + observations. If a matrix each row represents a single + functional datum and the columns the different observations. + sample_points (array_like): Values of the domain where the previous + data were taken. + basis: (Basis): Basis used. + method (str): Algorithm used for calculating the coefficients using + the least squares method. The values admitted are 'cholesky' + and 'qr' for Cholesky and QR factorisation methods + respectively. + + Returns: + FDataBasis: Represention of the data in a functional form as + product of coefficients by basis functions. + + Examples: + >>> import numpy as np + >>> t = np.linspace(0, 1, 5) + >>> x = np.sin(2 * np.pi * t) + np.cos(2 * np.pi * t) + 2 + >>> x + array([ 3., 3., 1., 1., 3.]) + + >>> from skfda.representation.basis import FDataBasis, Fourier + >>> basis = Fourier((0, 1), n_basis=3) + >>> fd = FDataBasis.from_data(x, t, basis) + >>> fd.coefficients.round(2) + array([[ 2. , 0.71, 0.71]]) + + References: + .. [RS05-5-2-5] Ramsay, J., Silverman, B. W. (2005). How spline + smooths are computed. In *Functional Data Analysis* + (pp. 86-87). Springer. + + .. [RS05-5-2-7] Ramsay, J., Silverman, B. W. (2005). HSpline + smoothing as an augmented least squares problem. In *Functional + Data Analysis* (pp. 86-87). Springer. + + """ + from ...preprocessing.smoothing import BasisSmoother + from ..grid import FDataGrid + + # n is the samples + # m is the observations + # k is the number of elements of the basis + + # Each sample in a column (m x n) + data_matrix = np.atleast_2d(data_matrix) + + fd = FDataGrid(data_matrix=data_matrix, sample_points=sample_points) + + smoother = BasisSmoother( + basis=basis, + method=method, + return_basis=True) + + return smoother.fit_transform(fd) + + @property + def n_samples(self): + """Return number of samples.""" + return self.coefficients.shape[0] + + @property + def dim_domain(self): + """Return number of dimensions of the domain.""" + + return self.basis.dim_domain + + @property + def dim_codomain(self): + """Return number of dimensions of the image.""" + + return self.basis.dim_codomain + + @property + def coordinates(self): + r"""Return a component of the FDataBasis. + + If the functional object contains samples + :math:`f: \mathbb{R}^n \rightarrow \mathbb{R}^d`, this object allows + a component of the vector :math:`f = (f_1, ..., f_d)`. + + + Todo: + By the moment, only unidimensional objects are supported in basis + form. + + """ + + return FDataBasis._CoordinateIterator(self) + + @property + def n_basis(self): + """Return number of basis.""" + return self.basis.n_basis + + @property + def domain_range(self): + + return self.basis.domain_range + + def _evaluate(self, eval_points, *, aligned=True): + + if aligned: + + # Each row contains the values of one element of the basis + basis_values = self.basis.evaluate(eval_points) + + res = np.tensordot(self.coefficients, basis_values, axes=(1, 0)) + + return res.reshape( + (self.n_samples, len(eval_points), self.dim_codomain)) + + else: + + res_matrix = np.empty( + (self.n_samples, eval_points.shape[1], self.dim_codomain)) + + for i in range(self.n_samples): + basis_values = self.basis.evaluate(eval_points[i]) + + values = self.coefficients[i] * basis_values.T + np.sum(values.T, axis=0, out=res_matrix[i]) + + return res_matrix + + def shift(self, shifts, *, restrict_domain=False, extrapolation=None, + eval_points=None, **kwargs): + r"""Perform a shift of the curves. + + Args: + shifts (array_like or numeric): List with the the shift + corresponding for each sample or numeric with the shift to + apply to all samples. + restrict_domain (bool, optional): If True restricts the domain to + avoid evaluate points outside the domain using extrapolation. + Defaults uses extrapolation. + extrapolation (str or Extrapolation, optional): Controls the + extrapolation mode for elements outside the domain range. + By default uses the method defined in fd. See extrapolation to + more information. + eval_points (array_like, optional): Set of points where + the functions are evaluated to obtain the discrete + representation of the object to operate. If an empty list is + passed it calls numpy.linspace with bounds equal to the ones + defined in fd.domain_range and the number of points the maximum + between 201 and 10 times the number of basis plus 1. + **kwargs: Keyword arguments to be passed to :meth:`from_data`. + + Returns: + :obj:`FDataBasis` with the shifted data. + """ + + if self.dim_codomain > 1 or self.dim_domain > 1: + raise ValueError + + domain_range = self.domain_range[0] + + if eval_points is None: # Grid to discretize the function + nfine = max(self.n_basis * 10 + 1, constants.N_POINTS_COARSE_MESH) + eval_points = np.linspace(*domain_range, nfine) + else: + eval_points = np.asarray(eval_points) + + if np.isscalar(shifts): # Special case, all curves with same shift + + _basis = self.basis.rescale((domain_range[0] + shifts, + domain_range[1] + shifts)) + + return FDataBasis.from_data(self.evaluate(eval_points), + eval_points + shifts, + _basis, **kwargs) + + elif len(shifts) != self.n_samples: + raise ValueError(f"shifts vector ({len(shifts)}) must have the " + f"same length than the number of samples " + f"({self.n_samples})") + + if restrict_domain: + a = domain_range[0] - min(np.min(shifts), 0) + b = domain_range[1] - max(np.max(shifts), 0) + domain = (a, b) + eval_points = eval_points[ + np.logical_and(eval_points >= a, + eval_points <= b)] + else: + domain = domain_range + + points_shifted = np.outer(np.ones(self.n_samples), + eval_points) + + points_shifted += np.atleast_2d(shifts).T + + # Matrix of shifted values + _data_matrix = self(points_shifted, + aligned=False, + extrapolation=extrapolation)[..., 0] + + _basis = self.basis.rescale(domain) + + return FDataBasis.from_data(_data_matrix, eval_points, + _basis, **kwargs) + + def derivative(self, *, order=1): + r"""Differentiate a FDataBasis object. + + + Args: + order (int, optional): Order of the derivative. Defaults to one. + """ + + if order < 0: + raise ValueError("order only takes non-negative integer values.") + + if order == 0: + return self.copy() + + basis, coefficients = self.basis._derivative_basis_and_coefs( + self.coefficients, order) + + return FDataBasis(basis, coefficients) + + def mean(self, weights=None): + """Compute the mean of all the samples in a FDataBasis object. + + Returns: + :obj:`FDataBasis`: A FDataBais object with just one sample + representing the mean of all the samples in the original + FDataBasis object. + + Examples: + + >>> from skfda.representation.basis import FDataBasis, Monomial + >>> basis = Monomial(n_basis=4) + >>> coefficients = [[0.5, 1, 2, .5], [1.5, 1, 4, .5]] + >>> FDataBasis(basis, coefficients).mean() + FDataBasis( + basis=Monomial(domain_range=[array([0, 1])], n_basis=4), + coefficients=[[ 1. 1. 3. 0.5]], + ...) + + """ + + if weights is not None: + return self.copy(coefficients=np.average(self.coefficients, + weights=weights, + axis=0 + )[np.newaxis, ...] + ) + + return self.copy(coefficients=np.mean(self.coefficients, axis=0)) + + def gmean(self, eval_points=None): + """Compute the geometric mean of the functional data object. + + A numerical approach its used. The object its transformed into its + discrete representation and then the geometric mean is computed and + then the object is taken back to the basis representation. + + Args: + eval_points (array_like, optional): Set of points where the + functions are evaluated to obtain the discrete + representation of the object. If none are passed it calls + numpy.linspace with bounds equal to the ones defined in + self.domain_range and the number of points the maximum + between 501 and 10 times the number of basis. + + Returns: + FDataBasis: Geometric mean of the original object. + + """ + return self.to_grid(eval_points).gmean().to_basis(self.basis) + + def var(self, eval_points=None): + """Compute the variance of the functional data object. + + A numerical approach its used. The object its transformed into its + discrete representation and then the variance is computed and + then the object is taken back to the basis representation. + + Args: + eval_points (array_like, optional): Set of points where the + functions are evaluated to obtain the discrete + representation of the object. If none are passed it calls + numpy.linspace with bounds equal to the ones defined in + self.domain_range and the number of points the maximum + between 501 and 10 times the number of basis. + + Returns: + FDataBasis: Variance of the original object. + + """ + return self.to_grid(eval_points).var().to_basis(self.basis) + + def cov(self, eval_points=None): + """Compute the covariance of the functional data object. + + A numerical approach its used. The object its transformed into its + discrete representation and then the covariance matrix is computed. + + Args: + eval_points (array_like, optional): Set of points where the + functions are evaluated to obtain the discrete + representation of the object. If none are passed it calls + numpy.linspace with bounds equal to the ones defined in + self.domain_range and the number of points the maximum + between 501 and 10 times the number of basis. + + Returns: + numpy.darray: Matrix of covariances. + + """ + return self.to_grid(eval_points).cov() + + def to_grid(self, sample_points=None): + """Return the discrete representation of the object. + + Args: + sample_points (array_like, optional): Points per axis where the + functions are evaluated. If none are passed it calls + numpy.linspace with bounds equal to the ones defined in + self.domain_range and the number of points the maximum + between 501 and 10 times the number of basis. + + Returns: + FDataGrid: Discrete representation of the functional data + object. + + Examples: + + >>> from skfda.representation.basis import FDataBasis, Monomial + >>> fd = FDataBasis(coefficients=[[1, 1, 1], [1, 0, 1]], + ... basis=Monomial((0,5), n_basis=3)) + >>> fd.to_grid([0, 1, 2]) + FDataGrid( + array([[[1], + [3], + [7]], + [[1], + [2], + [5]]]), + sample_points=[array([0, 1, 2])], + domain_range=array([[0, 5]]), + ...) + + """ + + if sample_points is None: + npoints = max(constants.N_POINTS_FINE_MESH, + constants.BASIS_MIN_FACTOR * self.n_basis) + sample_points = [np.linspace(*r, npoints) + for r in self.domain_range] + + return grid.FDataGrid(self.evaluate(sample_points, grid=True), + sample_points=sample_points, + domain_range=self.domain_range) + + def to_basis(self, basis, eval_points=None, **kwargs): + """Return the basis representation of the object. + + Args: + basis(Basis): basis object in which the functional data are + going to be represented. + **kwargs: keyword arguments to be passed to + FDataBasis.from_data(). + + Returns: + FDataBasis: Basis representation of the funtional data + object. + """ + + if basis == self.basis: + return self.copy() + + return self.to_grid(eval_points=eval_points).to_basis(basis, **kwargs) + + def copy(self, *, basis=None, coefficients=None, + dataset_name=None, + argument_names=None, + coordinate_names=None, + extrapolation=None): + """FDataBasis copy""" + + if basis is None: + basis = copy.deepcopy(self.basis) + + if coefficients is None: + coefficients = self.coefficients + + if dataset_name is None: + dataset_name = self.dataset_name + + if argument_names is None: + argument_names = self.argument_names + + if coordinate_names is None: + coordinate_names = self.coordinate_names + + if extrapolation is None: + extrapolation = self.extrapolation + + return FDataBasis(basis, coefficients, + dataset_name=dataset_name, + argument_names=argument_names, + coordinate_names=coordinate_names, + extrapolation=extrapolation) + + def times(self, other): + """"Provides a numerical approximation of the multiplication between + an FDataObject to other object + + Args: + other (int, list, FDataBasis): Object to multiply with the + FDataBasis object. + + * int: Multiplies all samples with the value + * list: multiply each values with the samples respectively. + Length should match with FDataBasis samples + * FDataBasis: if there is one sample it multiplies this with + all the samples in the object. If not, it multiplies each + sample respectively. Samples should match + + Returns: + (FDataBasis): FDataBasis object containing the multiplication + + """ + if isinstance(other, FDataBasis): + + if not _same_domain(self.domain_range, other.domain_range): + raise ValueError("The functions domains are different.") + + basisobj = self.basis.basis_of_product(other.basis) + neval = max(constants.BASIS_MIN_FACTOR * + max(self.n_basis, other.n_basis) + 1, + constants.N_POINTS_COARSE_MESH) + (left, right) = self.domain_range[0] + evalarg = np.linspace(left, right, neval) + + first = self.copy(coefficients=(np.repeat(self.coefficients, + other.n_samples, axis=0) + if (self.n_samples == 1 and + other.n_samples > 1) + else self.coefficients.copy())) + second = other.copy(coefficients=(np.repeat(other.coefficients, + self.n_samples, axis=0) + if (other.n_samples == 1 and + self.n_samples > 1) + else other.coefficients.copy())) + + fdarray = first.evaluate(evalarg) * second.evaluate(evalarg) + + return FDataBasis.from_data(fdarray, evalarg, basisobj) + + if isinstance(other, int): + other = [other for _ in range(self.n_samples)] + + coefs = np.transpose(np.atleast_2d(other)) + return self.copy(coefficients=self.coefficients * coefs) + + def _to_R(self): + """Gives the code to build the object on fda package on R""" + return ("fd(coef = " + self._array_to_R(self.coefficients, True) + + ", basisobj = " + self.basis._to_R() + ")") + + def _array_to_R(self, coefficients, transpose=False): + if len(coefficients.shape) == 1: + coefficients = coefficients.reshape((1, coefficients.shape[0])) + + if len(coefficients.shape) > 2: + return NotImplementedError + + if transpose is True: + coefficients = np.transpose(coefficients) + + (rows, cols) = coefficients.shape + retstring = "matrix(c(" + for j in range(cols): + for i in range(rows): + retstring = retstring + str(coefficients[i, j]) + ", " + + return (retstring[0:len(retstring) - 2] + "), nrow = " + str(rows) + + ", ncol = " + str(cols) + ")") + + def __repr__(self): + """Representation of FDataBasis object.""" + + return (f"{self.__class__.__name__}(" + f"\nbasis={self.basis}," + f"\ncoefficients={self.coefficients}," + f"\ndataset_name={self.dataset_name}," + f"\nargument_names={repr(self.argument_names)}," + f"\ncoordinate_names={repr(self.coordinate_names)}," + f"\nextrapolation={self.extrapolation})").replace( + '\n', '\n ') + + def __str__(self): + """Return str(self).""" + + return (f"{self.__class__.__name__}(" + f"\n_basis={self.basis}," + f"\ncoefficients={self.coefficients})").replace('\n', '\n ') + + def __eq__(self, other): + """Equality of FDataBasis""" + # TODO check all other params + return (super().__eq__(other) + and self.basis == other.basis + and np.all(self.coefficients == other.coefficients)) + + def concatenate(self, *others, as_coordinates=False): + """Join samples from a similar FDataBasis object. + + Joins samples from another FDataBasis object if they have the same + basis. + + Args: + others (:class:`FDataBasis`): Objects to be concatenated. + as_coordinates (boolean, optional): If False concatenates as + new samples, else, concatenates the other functions as + new components of the image. Defaults to False. + + Returns: + :class:`FDataBasis`: FDataBasis object with the samples from the + original objects. + + Todo: + By the moment, only unidimensional objects are supported in basis + representation. + """ + + # TODO: Change to support multivariate functions + # in basis representation + if as_coordinates: + return NotImplemented + + for other in others: + if other.basis != self.basis: + raise ValueError("The objects should have the same basis.") + + data = [self.coefficients] + [other.coefficients for other in others] + + return self.copy(coefficients=np.concatenate(data, axis=0)) + + def compose(self, fd, *, eval_points=None, **kwargs): + """Composition of functions. + + Performs the composition of functions. The basis is discretized to + compute the composition. + + Args: + fd (:class:`FData`): FData object to make the composition. Should + have the same number of samples and image dimension equal to 1. + eval_points (array_like): Points to perform the evaluation. + kwargs: Named arguments to be passed to :func:`from_data`. + """ + + grid = self.to_grid().compose(fd, eval_points=eval_points) + + if fd.dim_domain == 1: + basis = self.basis.rescale(fd.domain_range[0]) + composition = grid.to_basis(basis, **kwargs) + else: + #  Cant be convertered to basis due to the dimensions + composition = grid + + return composition + + def __getitem__(self, key): + """Return self[key].""" + + if isinstance(key, int): + return self.copy(coefficients=self.coefficients[key:key + 1]) + else: + return self.copy(coefficients=self.coefficients[key]) + + def __add__(self, other): + """Addition for FDataBasis object.""" + if isinstance(other, FDataBasis): + if self.basis != other.basis: + return NotImplemented + else: + basis, coefs = self.basis._add_same_basis(self.coefficients, + other.coefficients) + else: + try: + basis, coefs = self.basis._add_constant(self.coefficients, + other) + except TypeError: + return NotImplemented + + return self.copy(basis=basis, coefficients=coefs) + + def __radd__(self, other): + """Addition for FDataBasis object.""" + + return self.__add__(other) + + def __sub__(self, other): + """Subtraction for FDataBasis object.""" + if isinstance(other, FDataBasis): + if self.basis != other.basis: + return NotImplemented + else: + basis, coefs = self.basis._sub_same_basis(self.coefficients, + other.coefficients) + else: + try: + basis, coefs = self.basis._sub_constant(self.coefficients, + other) + except TypeError: + return NotImplemented + + return self.copy(basis=basis, coefficients=coefs) + + def __rsub__(self, other): + """Right subtraction for FDataBasis object.""" + return (self * -1).__add__(other) + + def __mul__(self, other): + """Multiplication for FDataBasis object.""" + if isinstance(other, FDataBasis): + return NotImplemented + + try: + basis, coefs = self.basis._mul_constant(self.coefficients, other) + except TypeError: + return NotImplemented + + return self.copy(basis=basis, coefficients=coefs) + + def __rmul__(self, other): + """Multiplication for FDataBasis object.""" + return self.__mul__(other) + + def __truediv__(self, other): + """Division for FDataBasis object.""" + + other = np.array(other) + + try: + other = 1 / other + except TypeError: + return NotImplemented + + return self * other + + def __rtruediv__(self, other): + """Right division for FDataBasis object.""" + + return NotImplemented + + ##################################################################### + # Pandas ExtensionArray methods + ##################################################################### + @property + def dtype(self): + """The dtype for this extension array, FDataGridDType""" + return FDataBasisDType + + @property + def nbytes(self) -> int: + """ + The number of bytes needed to store this object in memory. + """ + return self.coefficients.nbytes() + + +class FDataBasisDType(pandas.api.extensions.ExtensionDtype): + """ + DType corresponding to FDataBasis in Pandas + """ + name = 'functional data (basis)' + kind = 'O' + type = FDataBasis + na_value = None + + @classmethod + def construct_from_string(cls, string): + if string == cls.name: + return cls() + else: + raise TypeError("Cannot construct a '{}' from " + "'{}'".format(cls, string)) + + @classmethod + def construct_array_type(cls): + return FDataBasis diff --git a/skfda/representation/basis/_fourier.py b/skfda/representation/basis/_fourier.py new file mode 100644 index 000000000..4da88672a --- /dev/null +++ b/skfda/representation/basis/_fourier.py @@ -0,0 +1,225 @@ +import numpy as np + +from ..._utils import _list_of_arrays +from ..._utils import _same_domain +from ._basis import Basis + + +class Fourier(Basis): + r"""Fourier basis. + + Defines a functional basis for representing functions on a fourier + series expansion of period :math:`T`. The number of basis is always odd. + If instantiated with an even number of basis, they will be incremented + automatically by one. + + .. math:: + \phi_0(t) = \frac{1}{\sqrt{2}} + + .. math:: + \phi_{2n -1}(t) = sin\left(\frac{2 \pi n}{T} t\right) + + .. math:: + \phi_{2n}(t) = cos\left(\frac{2 \pi n}{T} t\right) + + Actually this basis functions are not orthogonal but not orthonormal. To + achieve this they are divided by its norm: :math:`\sqrt{\frac{T}{2}}`. + + Attributes: + domain_range (tuple): A tuple of length 2 containing the initial and + end values of the interval over which the basis can be evaluated. + n_basis (int): Number of functions in the basis. + period (int or float): Period (:math:`T`). + + Examples: + Constructs specifying number of basis, definition interval and period. + + >>> fb = Fourier((0, np.pi), n_basis=3, period=1) + >>> fb([0, np.pi / 4, np.pi / 2, np.pi]).round(2) + array([[[ 1. ], + [ 1. ], + [ 1. ], + [ 1. ]], + [[ 0. ], + [-1.38], + [-0.61], + [ 1.1 ]], + [[ 1.41], + [ 0.31], + [-1.28], + [ 0.89]]]) + + And evaluate second derivative + + >>> deriv2 = fb.derivative(order=2) + >>> deriv2([0, np.pi / 4, np.pi / 2, np.pi]).round(2) + array([[[ 0. ], + [ 0. ], + [ 0. ], + [ 0. ]], + [[ 0. ], + [ 54.46], + [ 24.02], + [-43.37]], + [[-55.83], + [-12.32], + [ 50.4 ], + [-35.16]]]) + + """ + + def __init__(self, domain_range=None, n_basis=3, period=None): + """Construct a Fourier object. + + It forces the object to have an odd number of basis. If n_basis is + even, it is incremented by one. + + Args: + domain_range (tuple): Tuple defining the domain over which the + function is defined. + n_basis (int): Number of basis functions. + period (int or float): Period of the trigonometric functions that + define the basis. + + """ + + if domain_range is not None: + domain_range = _list_of_arrays(domain_range) + + if len(domain_range) != 1: + raise ValueError("Domain range should be unidimensional.") + + domain_range = domain_range[0] + + self.period = period + # If number of basis is even, add 1 + n_basis += 1 - n_basis % 2 + super().__init__(domain_range, n_basis) + + @property + def period(self): + if self._period is None: + return self.domain_range[0][1] - self.domain_range[0][0] + else: + return self._period + + @period.setter + def period(self, value): + self._period = value + + def _evaluate(self, eval_points): + + # Input is scalar + eval_points = eval_points[..., 0] + + functions = [np.sin, np.cos] + omega = 2 * np.pi / self.period + + normalization_denominator = np.sqrt(self.period / 2) + + seq = 1 + np.arange((self.n_basis - 1) // 2) + seq_pairs = np.array([seq, seq]).T + phase_coefs = omega * seq_pairs + + # Multiply the phase coefficients elementwise + res = np.einsum('ij,k->ijk', phase_coefs, eval_points) + + # Apply odd and even functions + for i in [0, 1]: + functions[i](res[:, i, :], out=res[:, i, :]) + + res = res.reshape(-1, len(eval_points)) + res /= normalization_denominator + + constant_basis = np.full( + shape=(1, len(eval_points)), + fill_value=1 / (np.sqrt(2) * normalization_denominator)) + + res = np.concatenate((constant_basis, res)) + + return res + + def _derivative_basis_and_coefs(self, coefs, order=1): + + omega = 2 * np.pi / self.period + deriv_factor = (np.arange(1, (self.n_basis + 1) / 2) * omega) ** order + + deriv_coefs = np.zeros(coefs.shape) + + cos_sign, sin_sign = ((-1) ** int((order + 1) / 2), + (-1) ** int(order / 2)) + + if order % 2 == 0: + deriv_coefs[:, 1::2] = sin_sign * coefs[:, 1::2] * deriv_factor + deriv_coefs[:, 2::2] = cos_sign * coefs[:, 2::2] * deriv_factor + else: + deriv_coefs[:, 2::2] = sin_sign * coefs[:, 1::2] * deriv_factor + deriv_coefs[:, 1::2] = cos_sign * coefs[:, 2::2] * deriv_factor + + # normalise + return self.copy(), deriv_coefs + + def _gram_matrix(self): + + # Orthogonal in this case + if self.period == (self.domain_range[0][1] - self.domain_range[0][0]): + return np.identity(self.n_basis) + else: + return super()._gram_matrix() + + def basis_of_product(self, other): + """Multiplication of two Fourier Basis""" + if not _same_domain(self, other): + raise ValueError("Ranges are not equal.") + + if isinstance(other, Fourier) and self.period == other.period: + return Fourier(self.domain_range, self.n_basis + other.n_basis - 1, + self.period) + else: + return other.rbasis_of_product(self) + + def rbasis_of_product(self, other): + """Multiplication of a Fourier Basis with other Basis""" + return Basis.default_basis_of_product(other, self) + + def rescale(self, domain_range=None, *, rescale_period=False): + r"""Return a copy of the basis with a new domain range, with the + corresponding values rescaled to the new bounds. + + Args: + domain_range (tuple, optional): Definition of the interval + where the basis defines a space. Defaults uses the same as + the original basis. + rescale_period (bool, optional): If true the period will be + rescaled using the ratio between the lengths of the new + and old interval. Defaults to False. + """ + + rescale_basis = super().rescale(domain_range) + + if rescale_period is False: + rescale_basis.period = self.period + else: + domain_rescaled = rescale_basis.domain_range[0] + domain = self.domain_range[0] + + rescale_basis.period = (self.period * + (domain_rescaled[1] - domain_rescaled[0]) / + (domain[1] - domain[0])) + + return rescale_basis + + def _to_R(self): + drange = self.domain_range[0] + return ("create.fourier.basis(rangeval = c(" + str(drange[0]) + "," + + str(drange[1]) + "), nbasis = " + str(self.n_basis) + + ", period = " + str(self.period) + ")") + + def __repr__(self): + """Representation of a Fourier basis.""" + return (f"{self.__class__.__name__}(domain_range={self.domain_range}, " + f"n_basis={self.n_basis}, period={self.period})") + + def __eq__(self, other): + """Equality of Basis""" + return super().__eq__(other) and self.period == other.period diff --git a/skfda/representation/basis/_monomial.py b/skfda/representation/basis/_monomial.py new file mode 100644 index 000000000..ce1442cdd --- /dev/null +++ b/skfda/representation/basis/_monomial.py @@ -0,0 +1,127 @@ +import scipy.linalg + +import numpy as np + +from ..._utils import _same_domain +from ._basis import Basis + + +class Monomial(Basis): + """Monomial basis. + + Basis formed by powers of the argument :math:`t`: + + .. math:: + 1, t, t^2, t^3... + + Attributes: + domain_range (tuple): a tuple of length 2 containing the initial and + end values of the interval over which the basis can be evaluated. + n_basis (int): number of functions in the basis. + + Examples: + Defines a monomial base over the interval :math:`[0, 5]` consisting + on the first 3 powers of :math:`t`: :math:`1, t, t^2`. + + >>> bs_mon = Monomial((0,5), n_basis=3) + + And evaluates all the functions in the basis in a list of descrete + values. + + >>> bs_mon([0., 1., 2.]) + array([[[ 1.], + [ 1.], + [ 1.]], + [[ 0.], + [ 1.], + [ 2.]], + [[ 0.], + [ 1.], + [ 4.]]]) + + And also evaluates its derivatives + + >>> deriv = bs_mon.derivative() + >>> deriv([0, 1, 2]) + array([[[ 0.], + [ 0.], + [ 0.]], + [[ 1.], + [ 1.], + [ 1.]], + [[ 0.], + [ 2.], + [ 4.]]]) + >>> deriv2 = bs_mon.derivative(order=2) + >>> deriv2([0, 1, 2]) + array([[[ 0.], + [ 0.], + [ 0.]], + [[ 0.], + [ 0.], + [ 0.]], + [[ 2.], + [ 2.], + [ 2.]]]) + """ + + def _evaluate(self, eval_points): + + # Input is scalar + eval_points = eval_points[..., 0] + + exps = np.arange(self.n_basis) + raised = np.power.outer(eval_points, exps) + + return raised.T + + def _derivative_basis_and_coefs(self, coefs, order=1): + if order >= self.n_basis: + return (Monomial(self.domain_range, 1), + np.zeros((len(coefs), 1))) + else: + return (Monomial(self.domain_range, self.n_basis - order), + np.array([np.polyder(x[::-1], order)[::-1] + for x in coefs])) + + def _gram_matrix(self): + integral_coefs = np.polyint(np.ones(2 * self.n_basis - 1)) + + # We obtain the powers of both extremes in the domain range + power_domain_limits = np.vander( + self.domain_range[0], 2 * self.n_basis) + + # Subtract the powers (Barrow's rule) + power_domain_limits_diff = ( + power_domain_limits[1] - power_domain_limits[0]) + + # Multiply the constants that appear in the integration + evaluated_points = integral_coefs * power_domain_limits_diff + + # Order the powers, lower to higher, discarding the constant + # (it does not appear in the integral) + ordered_evaluated_points = evaluated_points[-2::-1] + + # Build the matrix + return scipy.linalg.hankel( + ordered_evaluated_points[:self.n_basis], + ordered_evaluated_points[self.n_basis - 1:]) + + def basis_of_product(self, other): + """Multiplication of a Monomial Basis with other Basis""" + if not _same_domain(self, other): + raise ValueError("Ranges are not equal.") + + if isinstance(other, Monomial): + return Monomial(self.domain_range, self.n_basis + other.n_basis) + + return other.rbasis_of_product(self) + + def rbasis_of_product(self, other): + """Multiplication of a Monomial Basis with other Basis""" + return Basis.default_basis_of_product(self, other) + + def _to_R(self): + drange = self.domain_range[0] + return "create.monomial.basis(rangeval = c(" + str(drange[0]) + "," +\ + str(drange[1]) + "), nbasis = " + str(self.n_basis) + ")" diff --git a/skfda/representation/basis/_tensor_basis.py b/skfda/representation/basis/_tensor_basis.py new file mode 100644 index 000000000..b1d96aa35 --- /dev/null +++ b/skfda/representation/basis/_tensor_basis.py @@ -0,0 +1,112 @@ +import itertools + +import numpy as np + +from ..._utils import _same_domain +from ._basis import Basis + + +class Tensor(Basis): + r"""Tensor basis. + + Basis for multivariate functions constructed as a tensor product of + :math:`\mathbb{R} \to \mathbb{R}` bases. + + + Attributes: + domain_range (tuple): a tuple of length ``dim_domain`` containing + the range of input values for each dimension. + n_basis (int): number of functions in the basis. + + Examples: + + Defines a tensor basis over the interval :math:`[0, 5] \times [0, 3]` + consisting on the functions + + .. math:: + + 1, v, u, uv, u^2, u^2v + + >>> from skfda.representation.basis import Tensor, Monomial + >>> + >>> basis_x = Monomial((0,5), n_basis=3) + >>> basis_y = Monomial((0,3), n_basis=2) + >>> + >>> basis = Tensor([basis_x, basis_y]) + + + And evaluates all the functions in the basis in a list of descrete + values. + + >>> basis([(0., 2.), (3., 0), (2., 3.)]) + array([[[ 1.], + [ 1.], + [ 1.]], + [[ 2.], + [ 0.], + [ 3.]], + [[ 0.], + [ 3.], + [ 2.]], + [[ 0.], + [ 0.], + [ 6.]], + [[ 0.], + [ 9.], + [ 4.]], + [[ 0.], + [ 0.], + [ 12.]]]) + + """ + + def __init__(self, basis_list): + + if not all(b.dim_domain == 1 and b.dim_codomain == 1 + for b in basis_list): + raise ValueError("The basis functions must be " + "univariate and scalar valued") + + self.basis_list = basis_list + + super().__init__( + domain_range=[b.domain_range[0] for b in basis_list], + n_basis=np.prod([b.n_basis for b in basis_list])) + + @property + def dim_domain(self): + return len(self.basis_list) + + def _evaluate(self, eval_points): + + matrix = np.zeros((self.n_basis, len(eval_points), self.dim_codomain)) + + basis_evaluations = [b._evaluate(eval_points[:, i:i + 1]) + for i, b in enumerate(self.basis_list)] + + for i, ev in enumerate(itertools.product(*basis_evaluations)): + + matrix[i, :, 0] = np.prod(ev, axis=0) + + return matrix + + def _derivative_basis_and_coefs(self, coefs, order=1): + + pass + + def _gram_matrix(self): + + gram_matrices = [b.gram_matrix().ravel() for b in self.basis_list] + + gram = gram_matrices[0] + + for g in gram_matrices[1:]: + gram = np.outer(gram, g).ravel() + + return gram.reshape((self.n_basis, self.n_basis)) + + def basis_of_product(self, other): + pass + + def rbasis_of_product(self, other): + pass diff --git a/skfda/representation/basis/_vector_basis.py b/skfda/representation/basis/_vector_basis.py new file mode 100644 index 000000000..c59c046c2 --- /dev/null +++ b/skfda/representation/basis/_vector_basis.py @@ -0,0 +1,151 @@ +import scipy.linalg + +import numpy as np + +from ..._utils import _same_domain +from ._basis import Basis + + +class VectorValued(Basis): + r"""Vector-valued basis. + + Basis for vector-valued functions constructed from scalar-valued bases. + + For each dimension in the codomain, it uses a scalar-valued basis + multiplying each basis by the corresponding unitary vector. + + Attributes: + domain_range (tuple): a tuple of length ``dim_domain`` containing + the range of input values for each dimension. + n_basis (int): number of functions in the basis. + + Examples: + Defines a vector-valued base over the interval :math:`[0, 5]` + consisting on the functions + + .. math:: + + 1 \vec{i}, t \vec{i}, t^2 \vec{i}, 1 \vec{j}, t \vec{j} + + >>> from skfda.representation.basis import VectorValued, Monomial + >>> + >>> basis_x = Monomial((0,5), n_basis=3) + >>> basis_y = Monomial((0,5), n_basis=2) + >>> + >>> basis = VectorValued([basis_x, basis_y]) + + + And evaluates all the functions in the basis in a list of descrete + values. + + >>> basis([0., 1., 2.]) + array([[[ 1., 0.], + [ 1., 0.], + [ 1., 0.]], + [[ 0., 0.], + [ 1., 0.], + [ 2., 0.]], + [[ 0., 0.], + [ 1., 0.], + [ 4., 0.]], + [[ 0., 1.], + [ 0., 1.], + [ 0., 1.]], + [[ 0., 0.], + [ 0., 1.], + [ 0., 2.]]]) + + """ + + def __init__(self, basis_list): + + if not all(b.dim_codomain == 1 for b in basis_list): + raise ValueError("The basis functions must be " + "scalar valued") + + if any(b.dim_domain != basis_list[0].dim_domain or + not _same_domain(b, basis_list[0]) + for b in basis_list): + raise ValueError("The basis must all have the same domain " + "dimension an range") + + self.basis_list = basis_list + + super().__init__( + domain_range=basis_list[0].domain_range, + n_basis=sum(b.n_basis for b in basis_list)) + + @property + def dim_domain(self): + return self.basis_list[0].dim_domain + + @property + def dim_codomain(self): + return len(self.basis_list) + + def _evaluate(self, eval_points): + matrix = np.zeros((self.n_basis, len(eval_points), self.dim_codomain)) + + n_basis_evaluated = 0 + + basis_evaluations = [b._evaluate(eval_points) for b in self.basis_list] + + for i, ev in enumerate(basis_evaluations): + + matrix[n_basis_evaluated:n_basis_evaluated + len(ev), :, i] = ev + n_basis_evaluated += len(ev) + + return matrix + + def _derivative_basis_and_coefs(self, coefs, order=1): + + n_basis_list = [b.n_basis for b in self.basis_list] + indexes = np.cumsum(n_basis_list) + + coefs_per_basis = np.hsplit(coefs, indexes[:-1]) + + basis_and_coefs = [b._derivative_basis_and_coefs( + c, order=order) for b, c in zip(self.basis_list, coefs_per_basis)] + + new_basis_list, new_coefs_list = zip(*basis_and_coefs) + + new_basis = VectorValued(new_basis_list) + new_coefs = np.hstack(new_coefs_list) + + return new_basis, new_coefs + + def _gram_matrix(self): + + gram_matrices = [b.gram_matrix() for b in self.basis_list] + + return scipy.linalg.block_diag(*gram_matrices) + + def _coordinate_nonfull(self, fdatabasis, key): + + r_key = key + if isinstance(r_key, int): + r_key = range(r_key, r_key + 1) + s_key = slice(r_key.start, r_key.stop, r_key.step) + + coef_indexes = np.concatenate([ + np.ones(b.n_basis, dtype=np.bool_) if i in r_key + else np.zeros(b.n_basis, dtype=np.bool_) + for i, b in enumerate(self.basis_list)]) + + new_basis_list = self.basis_list[key] + + basis = (new_basis_list if isinstance(new_basis_list, Basis) + else VectorValued(new_basis_list)) + + coefs = fdatabasis.coefficients[:, coef_indexes] + + coordinate_names = np.array(fdatabasis.coordinate_names)[s_key] + + return fdatabasis.copy(basis=basis, coefficients=coefs, + coordinate_names=coordinate_names) + + def basis_of_product(self, other): + pass + + def rbasis_of_product(self, other): + pass diff --git a/skfda/representation/evaluator.py b/skfda/representation/evaluator.py index 896a17147..7cdd3a41e 100644 --- a/skfda/representation/evaluator.py +++ b/skfda/representation/evaluator.py @@ -4,44 +4,12 @@ from abc import ABC, abstractmethod -class EvaluatorConstructor(ABC): - """Constructor of an evaluator. - - A constructor builds an Evaluator from a :class:`FData`, which is - used to the evaluation in the functional data object. - - The evaluator constructor should have a method :func:`evaluator` which - receives an fdata object and returns an :class:`Evaluator`. - - """ - - @abstractmethod - def evaluator(self, fdata): - """Construct an evaluator. - - Builds the evaluator from an functional data object. - - Args: - fdata (:class:`FData`): Functional object where the evaluator will - be used. - - Returns: - (:class:`Evaluator`): Evaluator of the fdata. - - """ - pass - - def __eq__(self, other): - """Equality operator between evaluators constructors""" - return type(self) == type(other) - - class Evaluator(ABC): """Structure of an evaluator. An evaluator defines how to evaluate points of a functional object, it can be used as extrapolator to evaluate points outside the domain range or - as interpolator in a :class:`FDataGrid`. The corresponding examples of + as interpolation in a :class:`FDataGrid`. The corresponding examples of Interpolation and Extrapolation shows the basic usage of this class. The evaluator is called internally by :func:`evaluate`. @@ -52,19 +20,18 @@ class Evaluator(ABC): """ @abstractmethod - def evaluate(self, eval_points, *, derivative=0): + def evaluate(self, fdata, eval_points, *, aligned=True): """Evaluation method. - Evaluates the samples at the same evaluation points. The evaluation - call will receive a 2-d array with the evaluation points. - This method is called internally by :meth:`evaluate` when the - argument ``aligned_evaluation`` is True. + Evaluates the samples at evaluation points. The evaluation + call will receive a 2-d array with the evaluation points, or + a 3-d array with the evaluation points per sample if ``aligned`` + is ``False``. Args: eval_points (numpy.ndarray): Numpy array with shape ``(number_eval_points, dim_domain)`` with the evaluation points. - derivative (int, optional): Order of the derivative. Defaults to 0. Returns: (numpy.darray): Numpy 3d array with shape @@ -76,98 +43,25 @@ def evaluate(self, eval_points, *, derivative=0): """ pass - @abstractmethod - def evaluate_composed(self, eval_points, *, derivative=0): - """Evaluation method. - - Evaluates the samples at different evaluation points. The evaluation - call will receive a 3-d array with the evaluation points for each - sample. This method is called internally by :func:`evaluate` when - the argument ``aligned_evaluation`` is False. - - Args: - eval_points (numpy.ndarray): Numpy array with shape - ``(n_samples, number_eval_points, dim_domain)`` with the - evaluation points for each sample. - derivative (int, optional): Order of the derivative. Defaults to 0. - - Returns: - (numpy.darray): Numpy 3d array with shape - ``(n_samples, number_eval_points, dim_codomain)`` with the - result of the evaluation. The entry ``(i,j,k)`` will contain - the value k-th image dimension of the i-th sample, at the - j-th evaluation point. + def __repr__(self): + return f"{type(self)}()" - """ - pass + def __eq__(self, other): + """Equality operator between evaluators.""" + return type(self) == type(other) class GenericEvaluator(Evaluator): """Generic Evaluator. - Generic evaluator that recibes two functions to construct the evaluator. - The functions will recieve an :class:`FData` as first argument, a numpy - array with the eval_points and a named argument derivative. + Generic evaluator that recibes a functions to construct the evaluator. + The function will recieve an :class:`FData` as first argument, a numpy + array with the eval_points and the ``aligned`` parameter. """ - def __init__(self, fdata, evaluate_func, evaluate_composed_func=None): - self.fdata = fdata - self.evaluate_func = evaluate_func - - if evaluate_composed_func is None: - self.evaluate_composed_func = evaluate_func - else: - self.evaluate_composed_func = evaluate_composed_func - - def evaluate(self, eval_points, *, derivative=0): - """Evaluation method. - - Evaluates the samples at the same evaluation points. The evaluation - call will receive a 2-d array with the evaluation points. - - This method is called internally by :meth:`evaluate` when the argument - `aligned_evaluation` is True. - - Args: - eval_points (numpy.ndarray): Numpy array with shape - `(len(eval_points), dim_domain)` with the evaluation points. - Each entry represents the coordinate of a point. - derivative (int, optional): Order of the derivative. Defaults to 0. + def __init__(self, evaluate_function): + self.evaluate_function = evaluate_function - Returns: - (numpy.darray): Numpy 3-d array with shape `(n_samples, - len(eval_points), dim_codomain)` with the result of the - evaluation. The entry (i,j,k) will contain the value k-th - image dimension of the i-th sample, at the j-th evaluation - point. - - """ - return self.evaluate_func(self.fdata, eval_points, - derivative=derivative) - - def evaluate_composed(self, eval_points, *, derivative=0): - """Evaluation method. - - Evaluates the samples at different evaluation points. The evaluation - call will receive a 3-d array with the evaluation points for each - sample. - - This method is called internally by :meth:`evaluate` when the argument - `aligned_evaluation` is False. - - Args: - eval_points (numpy.ndarray): Numpy array with shape - `(n_samples, number_eval_points, dim_domain)` with the - evaluation points for each sample. - derivative (int, optional): Order of the derivative. Defaults to 0. - - Returns: - (numpy.darray): Numpy 3d array with shape `(n_samples, - number_eval_points, dim_codomain)` with the result of the - evaluation. The entry (i,j,k) will contain the value k-th image - dimension of the i-th sample, at the j-th evaluation point. - - """ - return self.evaluate_composed_func(self.fdata, eval_points, - derivative=derivative) + def evaluate(self, fdata, eval_points, *, aligned=True): + return self.evaluate_function(fdata, eval_points, aligned=aligned) diff --git a/skfda/representation/extrapolation.py b/skfda/representation/extrapolation.py index 60baddaca..80aaec35a 100644 --- a/skfda/representation/extrapolation.py +++ b/skfda/representation/extrapolation.py @@ -6,10 +6,10 @@ import numpy as np -from .evaluator import EvaluatorConstructor, Evaluator, GenericEvaluator +from .evaluator import Evaluator -class PeriodicExtrapolation(EvaluatorConstructor): +class PeriodicExtrapolation(Evaluator): """Extends the domain range periodically. Examples: @@ -23,59 +23,40 @@ class PeriodicExtrapolation(EvaluatorConstructor): >>> fd.extrapolation = PeriodicExtrapolation() >>> fd([-.5, 0, 1.5]).round(3) - array([[-0.724, 0.976, -0.724], - [-1.086, 0.759, -1.086]]) + array([[[-0.724], + [ 0.976], + [-0.724]], + [[-1.086], + [ 0.759], + [-1.086]]]) This extrapolator is equivalent to the string `"periodic"` >>> fd.extrapolation = 'periodic' >>> fd([-.5, 0, 1.5]).round(3) - array([[-0.724, 0.976, -0.724], - [-1.086, 0.759, -1.086]]) + array([[[-0.724], + [ 0.976], + [-0.724]], + [[-1.086], + [ 0.759], + [-1.086]]]) """ - def evaluator(self, fdata): - """Returns the evaluator used by :class:`FData`. + def evaluate(self, fdata, eval_points, *, aligned=True): - Returns: - (:class:`Evaluator`): Evaluator of the periodic extrapolation. + domain_range = np.asarray(fdata.domain_range) - """ - return GenericEvaluator(fdata, _periodic_evaluation) + # Extends the domain periodically in each dimension + eval_points -= domain_range[:, 0] + eval_points %= domain_range[:, 1] - domain_range[:, 0] + eval_points += domain_range[:, 0] + res = fdata(eval_points, aligned=aligned) -def _periodic_evaluation(fdata, eval_points, *, derivative=0): - """Evaluate points outside the domain range. - - Args: - fdata (:class:´FData´): Object where the evaluation is taken place. - eval_points (:class: numpy.ndarray): Numpy array with the evalation - points outside the domain range. The shape of the array may be - `n_eval_points` x `dim_codomain` or `n_samples` x `n_eval_points` - x `dim_codomain`. - derivate (numeric, optional): Order of derivative to be evaluated. - - Returns: - (numpy.ndarray): numpy array with the evaluation of the points in - a matrix with shape `n_samples` x `n_eval_points`x `dim_codomain`. - """ - - domain_range = np.asarray(fdata.domain_range) - - # Extends the domain periodically in each dimension - eval_points -= domain_range[:, 0] - eval_points %= domain_range[:, 1] - domain_range[:, 0] - eval_points += domain_range[:, 0] - - if eval_points.ndim == 3: - res = fdata._evaluate_composed(eval_points, derivative=derivative) - else: - res = fdata._evaluate(eval_points, derivative=derivative) - - return res + return res -class BoundaryExtrapolation(EvaluatorConstructor): +class BoundaryExtrapolation(Evaluator): """Extends the domain range using the boundary values. Examples: @@ -89,61 +70,40 @@ class BoundaryExtrapolation(EvaluatorConstructor): >>> fd.extrapolation = BoundaryExtrapolation() >>> fd([-.5, 0, 1.5]).round(3) - array([[ 0.976, 0.976, 0.797], - [ 0.759, 0.759, 1.125]]) + array([[[ 0.976], + [ 0.976], + [ 0.797]], + [[ 0.759], + [ 0.759], + [ 1.125]]]) This extrapolator is equivalent to the string `"bounds"`. >>> fd.extrapolation = 'bounds' >>> fd([-.5, 0, 1.5]).round(3) - array([[ 0.976, 0.976, 0.797], - [ 0.759, 0.759, 1.125]]) - """ - - def evaluator(self, fdata): - """Returns the evaluator used by :class:`FData`. - - Returns: - (:class:`Evaluator`): Evaluator of the periodic boundary. - - """ - return GenericEvaluator(fdata, _boundary_evaluation) - - -def _boundary_evaluation(fdata, eval_points, *, derivative=0): - """Evaluate points outside the domain range. - - Args: - fdata (:class:´FData´): Object where the evaluation is taken place. - eval_points (:class: numpy.ndarray): Numpy array with the evalation - points outside the domain range. The shape of the array may be - `n_eval_points` x `dim_codomain` or `n_samples` x `n_eval_points` - x `dim_codomain`. - derivate (numeric, optional): Order of derivative to be evaluated. - - Returns: - (numpy.ndarray): numpy array with the evaluation of the points in - a matrix with shape `n_samples` x `n_eval_points`x `dim_codomain`. + array([[[ 0.976], + [ 0.976], + [ 0.797]], + [[ 0.759], + [ 0.759], + [ 1.125]]]) """ - domain_range = fdata.domain_range + def evaluate(self, fdata, eval_points, *, aligned=True): - for i in range(fdata.dim_domain): - a, b = domain_range[i] - eval_points[eval_points[..., i] < a, i] = a - eval_points[eval_points[..., i] > b, i] = b + domain_range = fdata.domain_range - if eval_points.ndim == 3: + for i in range(fdata.dim_domain): + a, b = domain_range[i] + eval_points[eval_points[..., i] < a, i] = a + eval_points[eval_points[..., i] > b, i] = b - res = fdata._evaluate_composed(eval_points, derivative=derivative) - else: - - res = fdata._evaluate(eval_points, derivative=derivative) + res = fdata(eval_points, aligned=aligned) - return res + return res -class ExceptionExtrapolation(EvaluatorConstructor): +class ExceptionExtrapolation(Evaluator): """Raise and exception. Examples: @@ -173,38 +133,15 @@ class ExceptionExtrapolation(EvaluatorConstructor): """ - def evaluator(self, fdata): - """Returns the evaluator used by :class:`FData`. - - Returns: - (:class:`Evaluator`): Evaluator of the periodic extrapolation. - - """ - return GenericEvaluator(fdata, _exception_evaluation) - + def evaluate(self, fdata, eval_points, *, aligned=True): -def _exception_evaluation(fdata, eval_points, *, derivative=0): - """Evaluate points outside the domain range. + n_points = eval_points.shape[-2] - Args: - fdata (:class:´FData´): Object where the evaluation is taken place. - eval_points (:class: numpy.ndarray): Numpy array with the evalation - points outside the domain range. The shape of the array may be - `n_eval_points` x `dim_codomain` or `n_samples` x `n_eval_points` - x `dim_codomain`. - derivate (numeric, optional): Order of derivative to be evaluated. - - Raises: - ValueError: when the extrapolation method is called. - """ - - n_points = eval_points.shape[-2] + raise ValueError(f"Attempt to evaluate {n_points} points outside the " + f"domain range.") - raise ValueError(f"Attempt to evaluate {n_points} points outside the " - f"domain range.") - -class FillExtrapolation(EvaluatorConstructor): +class FillExtrapolation(Evaluator): """Values outside the domain range will be filled with a fixed value. Examples: @@ -217,99 +154,50 @@ class FillExtrapolation(EvaluatorConstructor): >>> fd.extrapolation = FillExtrapolation(0) >>> fd([-.5, 0, 1.5]).round(3) - array([[ 0. , 0.976, 0. ], - [ 0. , 0.759, 0. ]]) + array([[[ 0. ], + [ 0.976], + [ 0. ]], + [[ 0. ], + [ 0.759], + [ 0. ]]]) The previous extrapolator is equivalent to the string `"zeros"`. In the same way FillExtrapolation(np.nan) is equivalent to `"nan"`. >>> fd.extrapolation = "nan" >>> fd([-.5, 0, 1.5]).round(3) - array([[ nan, 0.976, nan], - [ nan, 0.759, nan]]) + array([[[ nan], + [ 0.976], + [ nan]], + [[ nan], + [ 0.759], + [ nan]]]) """ def __init__(self, fill_value): - """Returns the evaluator used by :class:`FData`. + self.fill_value = fill_value - Returns: - (:class:`Evaluator`): Evaluator of the periodic extrapolation. + def _fill(self, fdata, eval_points): + shape = (fdata.n_samples, eval_points.shape[-2], + fdata.dim_codomain) + return np.full(shape, self.fill_value) - """ - self._fill_value = fill_value + def evaluate(self, fdata, eval_points, *, aligned=True): - super().__init__() + return self._fill(fdata, eval_points) - @property - def fill_value(self): - """Returns the fill value of the extrapolation""" - return self._fill_value + def __repr__(self): + """repr method of FillExtrapolation""" + return (f"{type(self).__name__}(" + f"fill_value={self.fill_value})") def __eq__(self, other): - """Equality operator bethween evaluator constructors""" + """Equality operator bethween FillExtrapolation instances.""" return (super().__eq__(other) and - (self.fill_value == other.fill_value - or self.fill_value is other.fill_value)) - - def evaluator(self, fdata): - - return FillExtrapolationEvaluator(fdata, self.fill_value) - - -class FillExtrapolationEvaluator(Evaluator): - - def __init__(self, fdata, fill_value): - self.fill_value = fill_value - self.fdata = fdata - - def _fill(self, eval_points): - shape = (self.fdata.n_samples, eval_points.shape[-2], - self.fdata.dim_codomain) - return np.full(shape, self.fill_value) - - def evaluate(self, eval_points, *, derivative=0): - """ - Evaluate points outside the domain range. - - Args: - fdata (:class:´FData´): Object where the evaluation is taken place. - eval_points (:class: numpy.ndarray): Numpy array with the evalation - points outside the domain range. The shape of the array may be - `n_eval_points` x `dim_codomain` or `n_samples` x `n_eval_points` - x `dim_codomain`. - derivate (numeric, optional): Order of derivative to be evaluated. - - Returns: - (numpy.ndarray): numpy array with the evaluation of the points in - a matrix with shape `n_samples` x `n_eval_points`x `dim_codomain`. - - """ - return self._fill(eval_points) - - def evaluate_composed(self, eval_points, *, derivative=0): - """Evaluation method. - - Evaluates the samples at different evaluation points. The evaluation - call will receive a 3-d array with the evaluation points for - each sample. - - This method is called internally by :meth:`evaluate` when the argument - `aligned_evaluation` is False. - - Args: - eval_points (numpy.ndarray): Numpy array with shape - `(n_samples, number_eval_points, dim_domain)` with the - evaluation points for each sample. - derivative (int, optional): Order of the derivative. Defaults to 0. - - Returns: - (numpy.darray): Numpy 3d array with shape `(n_samples, - number_eval_points, dim_codomain)` with the result of the - evaluation. The entry (i,j,k) will contain the value k-th image - dimension of the i-th sample, at the j-th evaluation point. - - """ - return self._fill(eval_points) + self.fill_value == other.fill_value + # NaNs compare unequal. Should we distinguish between + # different NaN types and payloads? + or np.isnan(self.fill_value) and np.isnan(other.fill_value)) def _parse_extrapolation(extrapolation): diff --git a/skfda/representation/grid.py b/skfda/representation/grid.py index 7adfdbb69..7d67d965e 100644 --- a/skfda/representation/grid.py +++ b/skfda/representation/grid.py @@ -8,16 +8,17 @@ import copy import numbers +import findiff import pandas.api.extensions import scipy.stats.mstats import numpy as np -from . import FData from . import basis as fdbasis from .._utils import _list_of_arrays, constants -from .interpolation import SplineInterpolator +from ._functional_data import FData +from .interpolation import SplineInterpolation __author__ = "Miguel Carbajo Berrocal" @@ -39,32 +40,36 @@ class FDataGrid(FData): domain_range (numpy.ndarray): 2 dimension matrix where each row contains the bounds of the interval in which the functional data is considered to exist for each one of the axies. - dataset_label (str): name of the dataset. - axes_labels (list): list containing the labels of the different - axis. + dataset_name (str): name of the dataset. + argument_names (tuple): tuple containing the names of the different + arguments. + coordinate_names (tuple): tuple containing the names of the different + coordinate functions. extrapolation (str or Extrapolation): defines the default type of extrapolation. By default None, which does not apply any type of extrapolation. See `Extrapolation` for detailled information of the types of extrapolation. - interpolator (GridInterpolator): Defines the type of interpolation + interpolation (GridInterpolation): Defines the type of interpolation applied in `evaluate`. - keepdims (bool): Examples: Representation of a functional data object with 2 samples - representing a function :math:`f : \mathbb{R}\longmapsto\mathbb{R}`. + representing a function :math:`f : \mathbb{R}\longmapsto\mathbb{R}`, + with 3 discretization points. - >>> data_matrix = [[1, 2], [2, 3]] - >>> sample_points = [2, 4] + >>> data_matrix = [[1, 2, 3], [4, 5, 6]] + >>> sample_points = [2, 4, 5] >>> FDataGrid(data_matrix, sample_points) FDataGrid( array([[[1], - [2]], + [2], + [3]], - [[2], - [3]]]), - sample_points=[array([2, 4])], - domain_range=array([[2, 4]]), + [[4], + [5], + [6]]]), + sample_points=[array([2, 4, 5])], + domain_range=array([[2, 5]]), ...) The number of columns of data_matrix have to be the length of @@ -112,20 +117,30 @@ def __iter__(self): def __getitem__(self, key): """Get a specific coordinate.""" - axes_labels = self._fdatagrid._get_labels_coordinates(key) + + s_key = key + if isinstance(s_key, int): + s_key = slice(s_key, s_key + 1) + + coordinate_names = np.array( + self._fdatagrid.coordinate_names)[s_key] return self._fdatagrid.copy( data_matrix=self._fdatagrid.data_matrix[..., key], - axes_labels=axes_labels) + coordinate_names=coordinate_names) def __len__(self): """Return the number of coordinates.""" return self._fdatagrid.dim_codomain def __init__(self, data_matrix, sample_points=None, - domain_range=None, dataset_label=None, + domain_range=None, + dataset_label=None, + dataset_name=None, + argument_names=None, + coordinate_names=None, axes_labels=None, extrapolation=None, - interpolator=None, keepdims=False): + interpolation=None): """Construct a FDataGrid object. Args: @@ -196,11 +211,14 @@ def __init__(self, data_matrix, sample_points=None, if self.data_matrix.ndim == 1 + self.dim_domain: self.data_matrix = self.data_matrix[..., np.newaxis] - self.interpolator = interpolator + self.interpolation = interpolation - super().__init__(extrapolation, dataset_label, axes_labels, keepdims) - - return + super().__init__(extrapolation=extrapolation, + dataset_label=dataset_label, + dataset_name=dataset_name, + axes_labels=axes_labels, + argument_names=argument_names, + coordinate_names=coordinate_names) def round(self, decimals=0): """Evenly round to the given number of decimals. @@ -344,86 +362,29 @@ def domain_range(self): return self._domain_range @property - def interpolator(self): + def interpolation(self): """Defines the type of interpolation applied in `evaluate`.""" - return self._interpolator - - @interpolator.setter - def interpolator(self, new_interpolator): - """Sets the interpolator of the FDataGrid.""" - if new_interpolator is None: - new_interpolator = SplineInterpolator() + return self._interpolation - self._interpolator = new_interpolator - self._interpolator_evaluator = None + @interpolation.setter + def interpolation(self, new_interpolation): + """Sets the interpolation of the FDataGrid.""" + if new_interpolation is None: + new_interpolation = SplineInterpolation() - @property - def _evaluator(self): - """Return the evaluator constructed by the interpolator.""" - - if self._interpolator_evaluator is None: - self._interpolator_evaluator = self._interpolator.evaluator(self) - - return self._interpolator_evaluator - - def _evaluate(self, eval_points, *, derivative=0): - """"Evaluate the object or its derivatives at a list of values. - - Args: - eval_points (array_like): List of points where the functions are - evaluated. If a matrix of shape nsample x eval_points is given - each sample is evaluated at the values in the corresponding row - in eval_points. - derivative (int, optional): Order of the derivative. Defaults to 0. + self._interpolation = new_interpolation - Returns: - (numpy.darray): Matrix whose rows are the values of the each - function at the values specified in eval_points. - - """ - - return self._evaluator.evaluate(eval_points, derivative=derivative) - - def _evaluate_composed(self, eval_points, *, derivative=0): - """"Evaluate the object or its derivatives at a list of values. - - Args: - eval_points (array_like): List of points where the functions are - evaluated. If a matrix of shape nsample x eval_points is given - each sample is evaluated at the values in the corresponding row - in eval_points. - derivative (int, optional): Order of the derivative. Defaults to 0. - - Returns: - (numpy.darray): Matrix whose rows are the values of the each - function at the values specified in eval_points. - - """ + def _evaluate(self, eval_points, *, aligned=True): - return self._evaluator.evaluate_composed(eval_points, - derivative=derivative) + return self.interpolation.evaluate(self, eval_points, + aligned=aligned) - def derivative(self, order=1): + def derivative(self, *, order=1): r"""Differentiate a FDataGrid object. - It is calculated using lagged differences. If we call :math:`D` the - data_matrix, :math:`D^1` the derivative of order 1 and :math:`T` the - vector contaning the points of discretisation; :math:`D^1` is - calculated as it follows: - - .. math:: - - D^{1}_{ij} = \begin{cases} - \frac{D_{i1} - D_{i2}}{ T_{1} - T_{2}} & \mbox{if } j = 1 \\ - \frac{D_{i(m-1)} - D_{im}}{ T_{m-1} - T_m} & \mbox{if } - j = m \\ - \frac{D_{i(j-1)} - D_{i(j+1)}}{ T_{j-1} - T_{j+1}} & \mbox{if } - 1 < j < m - \end{cases} - - Where m is the number of columns of the matrix :math:`D`. - - Order > 1 derivatives are calculated by using derivative recursively. + It is calculated using central finite differences when possible. In + the extremes, forward and backward finite differences with accuracy + 2 are used. Args: order (int, optional): Order of the derivative. Defaults to one. @@ -434,11 +395,11 @@ def derivative(self, order=1): >>> fdata = FDataGrid([1,2,4,5,8], range(5)) >>> fdata.derivative() FDataGrid( - array([[[ 1. ], + array([[[ 0.5], [ 1.5], [ 1.5], [ 2. ], - [ 3. ]]]), + [ 4. ]]]), sample_points=[array([0, 1, 2, 3, 4])], domain_range=array([[0, 4]]), ...) @@ -446,57 +407,40 @@ def derivative(self, order=1): Second order derivative >>> fdata = FDataGrid([1,2,4,5,8], range(5)) - >>> fdata.derivative(2) + >>> fdata.derivative(order=2) FDataGrid( - array([[[ 0.5 ], - [ 0.25], - [ 0.25], - [ 0.75], - [ 1. ]]]), + array([[[ 3.], + [ 1.], + [-1.], + [ 2.], + [ 5.]]]), sample_points=[array([0, 1, 2, 3, 4])], domain_range=array([[0, 4]]), ...) """ - if self.dim_domain != 1: - raise NotImplementedError( - "This method only works when the dimension " - "of the domain of the FDatagrid object is " - "one.") - if order < 1: - raise ValueError("The order of a derivative has to be greater " - "or equal than 1.") - if self.dim_domain > 1 or self.dim_codomain > 1: - raise NotImplementedError("Not implemented for 2 or more" - " dimensional data.") - if np.isnan(self.data_matrix).any(): - raise ValueError("The FDataGrid object cannot contain nan " - "elements.") - data_matrix = self.data_matrix[..., 0] - sample_points = self.sample_points[0] - for _ in range(order): - mdata = [] - for i in range(self.n_samples): - arr = (np.diff(data_matrix[i]) / - (sample_points[1:] - - sample_points[:-1])) - arr = np.append(arr, arr[-1]) - arr[1:-1] += arr[:-2] - arr[1:-1] /= 2 - mdata.append(arr) - data_matrix = np.array(mdata) - - if self.dataset_label: - dataset_label = "{} - {} derivative".format(self.dataset_label, - order) + order_list = np.atleast_1d(order) + if order_list.ndim != 1 or len(order_list) != self.dim_domain: + raise ValueError("The order for each partial should be specified.") + + operator = findiff.FinDiff(*[(1 + i, p, o) + for i, (p, o) in enumerate( + zip(self.sample_points, order_list))]) + data_matrix = operator(self.data_matrix.astype(float)) + + if self.dataset_name: + dataset_name = "{} - {} derivative".format(self.dataset_name, + order) else: - dataset_label = None + dataset_name = None + + fdatagrid = self.copy(data_matrix=data_matrix, + dataset_name=dataset_name) - return self.copy(data_matrix=data_matrix, sample_points=sample_points, - dataset_label=dataset_label) + return fdatagrid def __check_same_dimensions(self, other): - if self.data_matrix.shape[1] != other.data_matrix.shape[1]: + if self.data_matrix.shape[1:-1] != other.data_matrix.shape[1:-1]: raise ValueError("Error in columns dimensions") if not np.array_equal(self.sample_points, other.sample_points): raise ValueError("Sample points for both objects must be equal") @@ -504,7 +448,9 @@ def __check_same_dimensions(self, other): def mean(self, weights=None): """Compute the mean of all the samples. - weights (array-like, optional): List of weights. + Args: + weights (array-like, optional): List of weights. + Returns: FDataGrid : A FDataGrid object with just one sample representing the mean of all the samples in the original object. @@ -540,18 +486,23 @@ def cov(self): """ - if self.dataset_label is not None: - dataset_label = self.dataset_label + ' - covariance' + if self.dataset_name is not None: + dataset_name = self.dataset_name + ' - covariance' else: - dataset_label = None + dataset_name = None + + if self.dim_domain != 1 or self.dim_codomain != 1: + raise NotImplementedError("Covariance only implemented " + "for univariate functions") - return self.copy(data_matrix=np.cov(self.data_matrix, + return self.copy(data_matrix=np.cov(self.data_matrix[..., 0], rowvar=False)[np.newaxis, ...], sample_points=[self.sample_points[0], self.sample_points[0]], domain_range=[self.domain_range[0], self.domain_range[0]], - dataset_label=dataset_label) + dataset_name=dataset_name, + argument_names=self.argument_names * 2) def gmean(self): """Compute the geometric mean of all samples in the FDataGrid object. @@ -570,6 +521,9 @@ def __eq__(self, other): if not isinstance(other, FDataGrid): return NotImplemented + if not super().__eq__(other): + return False + if not np.array_equal(self.data_matrix, other.data_matrix): return False @@ -583,41 +537,42 @@ def __eq__(self, other): if not np.array_equal(self.domain_range, other.domain_range): return False - if self.dataset_label != other.dataset_label: - return False - - if self.axes_labels is None or other.axes_labels is None: - # Both must be None - if self.axes_labels is not other.axes_labels: - return False - else: - if len(self.axes_labels) != len(other.axes_labels): - return False - - for a, b in zip(self.axes_labels, other.axes_labels): - if a != b: - return False - - if self.extrapolation != other.extrapolation: - return False - - if self.interpolator != other.interpolator: + if self.interpolation != other.interpolation: return False return True + def _get_op_matrix(self, other): + if isinstance(other, numbers.Number): + return other + elif isinstance(other, np.ndarray): + # Product by number or matrix with equal dimensions, or + # matrix with same shape but only one sample + if(other.shape == () or other.shape == (1) + or other.shape == self.data_matrix.shape + or other.shape == self.data_matrix.shape[1:]): + return other + # Missing last dimension (codomain dimension) + elif (other.shape == self.data_matrix.shape[:-1] + or other.shape == self.data_matrix.shape[1:-1]): + return other[..., np.newaxis] + else: + return None + elif isinstance(other, FDataGrid): + self.__check_same_dimensions(other) + return other.data_matrix + else: + return None + def __add__(self, other): """Addition for FDataGrid object. It supports other FDataGrid objects, numpy.ndarray and numbers. """ - if isinstance(other, (np.ndarray, numbers.Number)): - data_matrix = other - elif isinstance(other, FDataGrid): - self.__check_same_dimensions(other) - data_matrix = other.data_matrix - else: + + data_matrix = self._get_op_matrix(other) + if data_matrix is None: return NotImplemented return self.copy(data_matrix=self.data_matrix + data_matrix) @@ -637,12 +592,8 @@ def __sub__(self, other): It supports other FDataGrid objects, numpy.ndarray and numbers. """ - if isinstance(other, (np.ndarray, numbers.Number)): - data_matrix = other - elif isinstance(other, FDataGrid): - self.__check_same_dimensions(other) - data_matrix = other.data_matrix - else: + data_matrix = self._get_op_matrix(other) + if data_matrix is None: return NotImplemented return self.copy(data_matrix=self.data_matrix - data_matrix) @@ -653,12 +604,8 @@ def __rsub__(self, other): It supports other FDataGrid objects, numpy.ndarray and numbers. """ - if isinstance(other, (np.ndarray, numbers.Number)): - data_matrix = other - elif isinstance(other, FDataGrid): - self.__check_same_dimensions(other) - data_matrix = other.data_matrix - else: + data_matrix = self._get_op_matrix(other) + if data_matrix is None: return NotImplemented return self.copy(data_matrix=data_matrix - self.data_matrix) @@ -669,12 +616,8 @@ def __mul__(self, other): It supports other FDataGrid objects, numpy.ndarray and numbers. """ - if isinstance(other, (np.ndarray, numbers.Number)): - data_matrix = other - elif isinstance(other, FDataGrid): - self.__check_same_dimensions(other) - data_matrix = other.data_matrix - else: + data_matrix = self._get_op_matrix(other) + if data_matrix is None: return NotImplemented return self.copy(data_matrix=self.data_matrix * data_matrix) @@ -693,12 +636,8 @@ def __truediv__(self, other): It supports other FDataGrid objects, numpy.ndarray and numbers. """ - if isinstance(other, (np.ndarray, numbers.Number)): - data_matrix = other - elif isinstance(other, FDataGrid): - self.__check_same_dimensions(other) - data_matrix = other.data_matrix - else: + data_matrix = self._get_op_matrix(other) + if data_matrix is None: return NotImplemented return self.copy(data_matrix=self.data_matrix / data_matrix) @@ -709,12 +648,8 @@ def __rtruediv__(self, other): It supports other FDataGrid objects, numpy.ndarray and numbers. """ - if isinstance(other, (np.ndarray, numbers.Number)): - data_matrix = other - elif isinstance(other, FDataGrid): - self.__check_same_dimensions(other) - data_matrix = other.data_matrix - else: + data_matrix = self._get_op_matrix(other) + if data_matrix is None: return NotImplemented return self.copy(data_matrix=data_matrix / self.data_matrix) @@ -775,9 +710,12 @@ def concatenate(self, *others, as_coordinates=False): data = [self.data_matrix] + [other.data_matrix for other in others] if as_coordinates: + + coordinate_names = [ + fd.coordinate_names for fd in [self, *others]] + return self.copy(data_matrix=np.concatenate(data, axis=-1), - axes_labels=( - self._join_labels_coordinates(*others))) + coordinate_names=sum(coordinate_names, ())) else: return self.copy(data_matrix=np.concatenate(data, axis=0)) @@ -826,48 +764,40 @@ def to_basis(self, basis, **kwargs): >>> import numpy as np >>> import skfda >>> t = np.linspace(0, 1, 5) - >>> x = np.sin(2 * np.pi * t) + np.cos(2 * np.pi * t) + >>> x = np.sin(2 * np.pi * t) + np.cos(2 * np.pi * t) + 2 >>> x - array([ 1., 1., -1., -1., 1.]) + array([ 3., 3., 1., 1., 3.]) >>> fd = FDataGrid(x, t) >>> basis = skfda.representation.basis.Fourier(n_basis=3) >>> fd_b = fd.to_basis(basis) >>> fd_b.coefficients.round(2) - array([[ 0. , 0.71, 0.71]]) + array([[ 2. , 0.71, 0.71]]) """ - if self.dim_domain > 1: - raise NotImplementedError("Only support 1 dimension on the " - "domain.") - elif self.dim_codomain > 1: - raise NotImplementedError("Only support 1 dimension on the " - "image.") + if self.dim_domain != basis.dim_domain: + raise ValueError(f"The domain of the function has " + f"dimension {self.dim_domain} " + f"but the domain of the basis has " + f"dimension {basis.dim_domain}") + elif self.dim_codomain != basis.dim_codomain: + raise ValueError(f"The codomain of the function has " + f"dimension {self.dim_codomain} " + f"but the codomain of the basis has " + f"dimension {basis.dim_codomain}") # Readjust the domain range if there was not an explicit one if basis._domain_range is None: basis = basis.copy() basis.domain_range = self.domain_range - return fdbasis.FDataBasis.from_data(self.data_matrix[..., 0], - self.sample_points[0], + return fdbasis.FDataBasis.from_data(self.data_matrix, + self.sample_points, basis, - keepdims=self.keepdims, **kwargs) def to_grid(self, sample_points=None): - """Return the discrete representation of the object. - - Args: - sample_points (array_like, optional): 2 dimension matrix where - each row contains the points of dicretisation for each axis of - data_matrix. - - Returns: - FDataGrid: Discrete representation of the functional data - object. - """ if sample_points is None: sample_points = self.sample_points @@ -877,9 +807,12 @@ def to_grid(self, sample_points=None): def copy(self, *, deep=False, # For Pandas compatibility data_matrix=None, sample_points=None, - domain_range=None, dataset_label=None, - axes_labels=None, extrapolation=None, - interpolator=None, keepdims=None): + domain_range=None, + dataset_name=None, + argument_names=None, + coordinate_names=None, + extrapolation=None, + interpolation=None): """Returns a copy of the FDataGrid. If an argument is provided the corresponding attribute in the new copy @@ -898,26 +831,30 @@ def copy(self, *, if domain_range is None: domain_range = copy.deepcopy(self.domain_range) - if dataset_label is None: - dataset_label = copy.copy(self.dataset_label) + if dataset_name is None: + dataset_name = self.dataset_name + + if argument_names is None: + # Tuple, immutable + argument_names = self.argument_names - if axes_labels is None: - axes_labels = copy.copy(self.axes_labels) + if coordinate_names is None: + # Tuple, immutable + coordinate_names = self.coordinate_names if extrapolation is None: extrapolation = self.extrapolation - if interpolator is None: - interpolator = self.interpolator - - if keepdims is None: - keepdims = self.keepdims + if interpolation is None: + interpolation = self.interpolation return FDataGrid(data_matrix, sample_points=sample_points, domain_range=domain_range, - dataset_label=dataset_label, - axes_labels=axes_labels, extrapolation=extrapolation, - interpolator=interpolator, keepdims=keepdims) + dataset_name=dataset_name, + argument_names=argument_names, + coordinate_names=coordinate_names, + extrapolation=extrapolation, + interpolation=interpolation) def shift(self, shifts, *, restrict_domain=False, extrapolation=None, eval_points=None): @@ -971,6 +908,8 @@ def shift(self, shifts, *, restrict_domain=False, extrapolation=None, if eval_points is None: eval_points = self.sample_points + else: + eval_points = np.atleast_2d(eval_points) if restrict_domain: domain = np.asarray(self.domain_range) @@ -1003,7 +942,7 @@ def shift(self, shifts, *, restrict_domain=False, extrapolation=None, data_matrix = self.evaluate(eval_points_shifted, extrapolation=extrapolation, - aligned_evaluation=False, + aligned=False, grid=True) return self.copy(data_matrix=data_matrix, sample_points=eval_points, @@ -1038,14 +977,14 @@ def compose(self, fd, *, eval_points=None): eval_points = np.linspace(*fd.domain_range[0], constants.N_POINTS_COARSE_MESH) - eval_points_transformation = fd(eval_points, keepdims=False) + eval_points_transformation = fd(eval_points) data_matrix = self(eval_points_transformation, - aligned_evaluation=False) + aligned=False) else: if eval_points is None: eval_points = fd.sample_points - grid_transformation = fd(eval_points, grid=True, keepdims=True) + grid_transformation = fd(eval_points, grid=True) lengths = [len(ax) for ax in eval_points] @@ -1058,15 +997,13 @@ def compose(self, fd, *, eval_points=None): list(map(np.ravel, grid_transformation[i].T)) ).T - data_flatten = self(eval_points_transformation, - aligned_evaluation=False) - - data_matrix = data_flatten.reshape((self.n_samples, *lengths, - self.dim_codomain)) + data_matrix = self(eval_points_transformation, + aligned=False) return self.copy(data_matrix=data_matrix, sample_points=eval_points, - domain_range=fd.domain_range) + domain_range=fd.domain_range, + argument_names=fd.argument_names) def __str__(self): """Return str(self).""" @@ -1077,34 +1014,19 @@ def __str__(self): def __repr__(self): """Return repr(self).""" - if self.axes_labels is None: - axes_labels = None - else: - axes_labels = self.axes_labels.tolist() - return (f"FDataGrid(" f"\n{repr(self.data_matrix)}," f"\nsample_points={repr(self.sample_points)}," f"\ndomain_range={repr(self.domain_range)}," - f"\ndataset_label={repr(self.dataset_label)}," - f"\naxes_labels={repr(axes_labels)}," + f"\ndataset_name={repr(self.dataset_name)}," + f"\nargument_names={repr(self.argument_names)}," + f"\ncoordinate_names={repr(self.coordinate_names)}," f"\nextrapolation={repr(self.extrapolation)}," - f"\ninterpolator={repr(self.interpolator)}," - f"\nkeepdims={repr(self.keepdims)})").replace('\n', '\n ') + f"\ninterpolation={repr(self.interpolation)})").replace( + '\n', '\n ') def __getitem__(self, key): """Return self[key].""" - if isinstance(key, tuple): - # If there are not values for every dimension, the remaining ones - # are kept - key += (slice(None),) * (self.dim_domain + 1 - len(key)) - - sample_points = [self.sample_points[i][subkey] - for i, subkey in enumerate( - key[1:1 + self.dim_domain])] - - return self.copy(data_matrix=self.data_matrix[key], - sample_points=sample_points) if isinstance(key, numbers.Integral): # To accept also numpy ints key = int(key) @@ -1120,8 +1042,8 @@ def __getitem__(self, key): def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): for i in inputs: - if isinstance(i, FDataGrid) and not np.all(i.sample_points == - self.sample_points): + if isinstance(i, FDataGrid) and not np.array_equal( + i.sample_points, self.sample_points): return NotImplemented new_inputs = [i.data_matrix if isinstance(i, FDataGrid) diff --git a/skfda/representation/interpolation.py b/skfda/representation/interpolation.py index 1bf6d9390..2967c29a8 100644 --- a/skfda/representation/interpolation.py +++ b/skfda/representation/interpolation.py @@ -3,127 +3,70 @@ """ +import abc + from scipy.interpolate import (PchipInterpolator, UnivariateSpline, RectBivariateSpline, RegularGridInterpolator) import numpy as np -from .evaluator import Evaluator, EvaluatorConstructor - - -# Scipy interpolator methods used internally -class SplineInterpolator(EvaluatorConstructor): - r"""Spline interpolator of :class:`FDataGrid`. - - Spline interpolator of discretized functional objects. Implements different - interpolation methods based in splines, using the sample points of the - grid as nodes to interpolate. - - See the interpolation example to a detailled explanation. - - Attributes: - interpolator_order (int, optional): Order of the interpolation, 1 - for linear interpolation, 2 for cuadratic, 3 for cubic and so - on. In case of curves and surfaces there is available - interpolation up to degree 5. For higher dimensional objects - only linear or nearest interpolation is available. Default - lineal interpolation. - smoothness_parameter (float, optional): Penalisation to perform - smoothness interpolation. Option only available for curves and - surfaces. If 0 the residuals of the interpolation will be 0. - Defaults 0. - monotone (boolean, optional): Performs monotone interpolation in - curves using a PCHIP interpolator. Only valid for curves (domain - dimension equal to 1) and interpolation order equal to 1 or 3. - Defaults false. - - """ - - def __init__(self, interpolation_order=1, smoothness_parameter=0., - monotone=False): - r"""Constructor of the SplineInterpolator. - - Args: - interpolator_order (int, optional): Order of the interpolation, 1 - for linear interpolation, 2 for cuadratic, 3 for cubic and so - on. In case of curves and surfaces there is available - interpolation up to degree 5. For higher dimensional objects - only linear or nearest interpolation is available. Default - lineal interpolation. - smoothness_parameter (float, optional): Penalisation to perform - smoothness interpolation. Option only available for curves and - surfaces. If 0 the residuals of the interpolation will be 0. - Defaults 0. - monotone (boolean, optional): Performs monotone interpolation in - curves using a PCHIP interpolator. Only valid for curves - (domain dimension equal to 1) and interpolation order equal - to 1 or 3. - Defaults false. +from .evaluator import Evaluator - """ - self._interpolation_order = interpolation_order - self._smoothness_parameter = smoothness_parameter - self._monotone = monotone - @property - def interpolation_order(self): - "Returns the interpolation order" - return self._interpolation_order +class _SplineList(abc.ABC): + r"""ABC for list of interpolations.""" - @property - def smoothness_parameter(self): - "Returns the smoothness parameter" - return self._smoothness_parameter + def __init__(self, fdatagrid, + interpolation_order=1, + smoothness_parameter=0.): - @property - def monotone(self): - "Returns flag to perform monotone interpolation" - return self._monotone + super().__init__() - def __eq__(self, other): - """Equality operator between SplineInterpolator""" - return (super().__eq__(other) and - self.interpolation_order == other.interpolation_order and - self.smoothness_parameter == other.smoothness_parameter and - self.monotone == other.monotone) + self.fdatagrid = fdatagrid + self.interpolation_order = interpolation_order + self.smoothness_parameter = smoothness_parameter - def evaluator(self, fdatagrid): - """Construct a SplineInterpolatorEvaluator used in the evaluation. + @abc.abstractmethod + def _evaluate_one(self, spl, t, derivative=0): + """Evaluates one spline of the list.""" + pass - Args: - fdatagrid (:class:`FDataGrid`): Functional object where the - evaluator will be used. + def _evaluate_codomain(self, spl_m, t, derivative=0): + """Evaluator of multidimensional sample""" + return np.array([self._evaluate_one(spl, t, derivative) + for spl in spl_m]).T - Returns: - (:class:`SplineInterpolatorEvaluator`): Evaluator of the fdatagrid. + def evaluate(self, fdata, eval_points, *, derivative=0, aligned=True): - """ - return SplineInterpolatorEvaluator(fdatagrid, self.interpolation_order, - self.smoothness_parameter, - self.monotone) + if aligned: + # Points evaluated inside the domain + res = np.apply_along_axis( + self._evaluate_codomain, 1, + self.splines, eval_points, derivative) + res = res.reshape(fdata.n_samples, eval_points.shape[0], + fdata.dim_codomain) - def __repr__(self): - """repr method of the interpolator""" - return (f"{type(self).__name__}(" - f"interpolation_order={self.interpolation_order}, " - f"smoothness_parameter={self.smoothness_parameter}, " - f"monotone={self.monotone})") + else: + res = np.array([self._evaluate_codomain( + s, e, derivative=derivative) + for s, e in zip(self.splines, eval_points)]) + return res -class SplineInterpolatorEvaluator(Evaluator): - r"""Spline interpolator evaluator of :class:`FDataGrid`. - It is generated by the SplineInterpolator, and it is used internally - during the evaluation. +class _SplineList1D(_SplineList): + r"""List of interpolations for curves. - Spline interpolator of discretized functional objects. Implements different - interpolation methods based in splines, using the sample points of the - grid as nodes to interpolate. + List of interpolations for objects with domain + dimension = 1. Calling internally during the creation of the + evaluator. - See the interpolation example to a detailled explanation. + Uses internally the scipy interpolation UnivariateSpline or + PchipInterpolator. - Attributes: - interpolator_order (int, optional): Order of the interpolation, 1 + Args: + fdatagrid (FDatagrid): Fdatagrid to interpolate. + interpolation_order (int, optional): Order of the interpolation, 1 for linear interpolation, 2 for cuadratic, 3 for cubic and so on. In case of curves and surfaces there is available interpolation up to degree 5. For higher dimensional objects @@ -138,356 +81,332 @@ class SplineInterpolatorEvaluator(Evaluator): dimension equal to 1) and interpolation order equal to 1 or 3. Defaults false. - """ - - def __init__(self, fdatagrid, k=1, s=0., monotone=False): - r"""Constructor of the SplineInterpolatorEvaluator. - - Args: - fdatagir (fdatagrid): Grid to be interpolated. - interpolator_order (int, optional): Order of the interpolation, 1 - for linear interpolation, 2 for cuadratic, 3 for cubic and so - on. In case of curves and surfaces there is available - interpolation up to degree 5. For higher dimensional objects - only linear or nearest interpolation is available. Default - lineal interpolation. - smoothness_parameter (float, optional): Penalisation to perform - smoothness interpolation. Option only available for curves and - surfaces. If 0 the residuals of the interpolation will be 0. - Defaults 0. - monotone (boolean, optional): Performs monotone interpolation in - curves using a PCHIP interpolator. Only valid for curves - (domain dimension equal to 1) and interpolation order equal to - 1 or 3. - Defaults false. - - """ - sample_points = fdatagrid.sample_points - data_matrix = fdatagrid.data_matrix - - self._fdatagrid = fdatagrid - self._dim_codomain = fdatagrid.dim_codomain - self._dim_domain = fdatagrid.dim_domain - self._n_samples = fdatagrid.n_samples - self._keepdims = fdatagrid.keepdims - self._domain_range = fdatagrid.domain_range - - if self._dim_domain == 1: - self._splines = self._construct_spline_1_m(sample_points, - data_matrix, - k, s, monotone) - elif monotone: - raise ValueError("Monotone interpolation is only supported with " - "domain dimension equal to 1.") - - elif self._dim_domain == 2: - self._splines = self._construct_spline_2_m(sample_points, - data_matrix, k, s) - - elif s != 0: - raise ValueError("Smoothing interpolation is only supported with " - "domain dimension up to 2, s should be 0.") - - else: - self._splines = self._construct_spline_n_m(sample_points, - data_matrix, k) - - # After the creation of the splines the fdatagrid reference can - # be deleted - self._fdatagrid = None + Returns: + (np.ndarray): Array of size n_samples x dim_codomain with the + corresponding interpolation of the sample i, and image dimension j + in the entry (i,j) of the array. - def _construct_spline_1_m(self, sample_points, data_matrix, - k, s, monotone): - r"""Construct the matrix of interpolators for curves. + Raises: + ValueError: If the value of the interpolation k is not valid. - Constructs the matrix of interpolators for objects with domain - dimension = 1. Calling internally during the creationg of the - evaluator. - - Uses internally the scipy interpolator UnivariateSpline or - PchipInterpolator. + """ - Args: - sample_points (np.ndarray): Sample points of the fdatagrid. - data_matrix (np.ndarray): Data matrix of the fdatagrid. - k (integer): Order of the spline interpolators. + def __init__(self, fdatagrid, + interpolation_order=1, + smoothness_parameter=0., + monotone=False): - Returns: - (np.ndarray): Array of size n_samples x dim_codomain with the - corresponding interpolator of the sample i, and image dimension j - in the entry (i,j) of the array. + super().__init__( + fdatagrid=fdatagrid, + interpolation_order=interpolation_order, + smoothness_parameter=smoothness_parameter) - Raises: - ValueError: If the value of the interpolation k is not valid. + self.monotone = monotone - """ - if k > 5 or k < 1: - raise ValueError(f"Invalid degree of interpolation ({k}). Must be " + if self.interpolation_order > 5 or self.interpolation_order < 1: + raise ValueError(f"Invalid degree of interpolation " + f"({self.interpolation_order}). Must be " f"an integer greater than 0 and lower or " f"equal than 5.") - if monotone and s != 0: + if self.monotone and self.smoothness_parameter != 0: raise ValueError("Smoothing interpolation is not supported with " "monotone interpolation") - if monotone and (k == 2 or k == 4): - raise ValueError(f"monotone interpolation of degree {k}" + if self.monotone and (self.interpolation_order == 2 + or self.interpolation_order == 4): + raise ValueError(f"monotone interpolation of degree " + f"{self.interpolation_order}" f"not supported.") # Monotone interpolation of degree 1 is performed with linear spline - if monotone and k == 1: + monotone = self.monotone + if self.monotone and self.interpolation_order == 1: monotone = False - # Evaluator of splines called in evaluate - - def _spline_evaluator_1_m(spl, t, der): - - return spl(t, der) - - def _process_derivative_1_m(derivative): - - return derivative - - self._spline_evaluator = _spline_evaluator_1_m - - self._process_derivative = _process_derivative_1_m - - sample_points = sample_points[0] + sample_points = fdatagrid.sample_points[0] if monotone: def constructor(data): - """Constructs an unidimensional cubic monotone interpolator""" + """Constructs an unidimensional cubic monotone interpolation""" return PchipInterpolator(sample_points, data) else: def constructor(data): - """Constructs an unidimensional interpolator""" - return UnivariateSpline(sample_points, data, s=s, k=k) + """Constructs an unidimensional interpolation""" + return UnivariateSpline( + sample_points, data, + s=self.smoothness_parameter, + k=self.interpolation_order) - return np.apply_along_axis(constructor, 1, data_matrix) + self.splines = np.apply_along_axis( + constructor, 1, fdatagrid.data_matrix) - def _construct_spline_2_m(self, sample_points, data_matrix, k, s): - r"""Construct the matrix of interpolators for surfaces. + def _evaluate_one(self, spl, t, derivative=0): + try: + return spl(t, derivative)[:, 0] + except ValueError: + return np.zeros_like(t) - Constructs the matrix of interpolators for objects with domain - dimension = 2. Calling internally during the creationg of the - evaluator. - Uses internally the scipy interpolator RectBivariateSpline. +class _SplineList2D(_SplineList): + r"""List of interpolations for surfaces. - Args: - sample_points (np.ndarray): Sample points of the fdatagrid. - data_matrix (np.ndarray): Data matrix of the fdatagrid. - k (integer): Order of the spline interpolators. + List of interpolations for objects with domain + dimension = 2. Calling internally during the creationg of the + evaluator. + + Uses internally the scipy interpolation RectBivariateSpline. + + Args: + fdatagrid (FDatagrid): Fdatagrid to interpolate. + interpolation_order (int, optional): Order of the interpolation, 1 + for linear interpolation, 2 for cuadratic, 3 for cubic and so + on. In case of curves and surfaces there is available + interpolation up to degree 5. For higher dimensional objects + only linear or nearest interpolation is available. Default + lineal interpolation. + smoothness_parameter (float, optional): Penalisation to perform + smoothness interpolation. Option only available for curves and + surfaces. If 0 the residuals of the interpolation will be 0. + Defaults 0. + monotone (boolean, optional): Performs monotone interpolation in + curves using a PCHIP interpolator. Only valid for curves (domain + dimension equal to 1) and interpolation order equal to 1 or 3. + Defaults false. - Returns: - (np.ndarray): Array of size n_samples x dim_codomain with the - corresponding interpolator of the sample i, and image dimension j - in the entry (i,j) of the array. + Returns: + (np.ndarray): Array of size n_samples x dim_codomain with the + corresponding interpolation of the sample i, and image dimension j + in the entry (i,j) of the array. - Raises: - ValueError: If the value of the interpolation k is not valid. + Raises: + ValueError: If the value of the interpolation k is not valid. - """ - if np.isscalar(k): - kx = ky = k - elif len(k) != 2: + """ + + def __init__(self, fdatagrid, + interpolation_order=1, + smoothness_parameter=0.): + + super().__init__( + fdatagrid=fdatagrid, + interpolation_order=interpolation_order, + smoothness_parameter=smoothness_parameter) + + if np.isscalar(self.interpolation_order): + kx = ky = self.interpolation_order + elif len(self.interpolation_order) != 2: raise ValueError("k should be numeric or a tuple of length 2.") else: - kx = k[0] - ky = k[1] + kx = self.interpolation_order[0] + ky = self.interpolation_order[1] if kx > 5 or kx <= 0 or ky > 5 or ky <= 0: raise ValueError(f"Invalid degree of interpolation ({kx},{ky}). " f"Must be an integer greater than 0 and lower or " f"equal than 5.") - def _spline_evaluator_2_m(spl, t, der): + # Matrix of splines + self.splines = np.empty( + (fdatagrid.n_samples, fdatagrid.dim_codomain), dtype=object) - return spl(t[:, 0], t[:, 1], dx=der[0], dy=der[1], grid=False) + for i in range(fdatagrid.n_samples): + for j in range(fdatagrid.dim_codomain): + self.splines[i, j] = RectBivariateSpline( + fdatagrid.sample_points[0], + fdatagrid.sample_points[1], + fdatagrid.data_matrix[i, :, :, j], + kx=kx, ky=ky, + s=self.smoothness_parameter) - def _process_derivative_2_m(derivative): - if np.isscalar(derivative): - derivative = 2 * [derivative] - elif len(derivative) != 2: - raise ValueError("derivative should be a numeric value " - "or a tuple of length 2 with (dx,dy).") + def _evaluate_one(self, spl, t, derivative=0): + if np.isscalar(derivative): + derivative = 2 * [derivative] + elif len(derivative) != 2: + raise ValueError("derivative should be a numeric value " + "or a tuple of length 2 with (dx,dy).") - return derivative + return spl(t[:, 0], t[:, 1], dx=derivative[0], dy=derivative[1], + grid=False) - # Evaluator of splines called in evaluate - self._spline_evaluator = _spline_evaluator_2_m - self._process_derivative = _process_derivative_2_m - # Matrix of splines - spline = np.empty((self._n_samples, self._dim_codomain), dtype=object) +class _SplineListND(_SplineList): + r"""List of interpolations. - for i in range(self._n_samples): - for j in range(self._dim_codomain): - spline[i, j] = RectBivariateSpline(sample_points[0], - sample_points[1], - data_matrix[i, :, :, j], - kx=kx, ky=ky, s=s) + List of interpolations for objects with domain + dimension > 2. Calling internally during the creationg of the + evaluator. - return spline + Only linear and nearest interpolations are available for objects with + domain dimension >= 3. Uses internally the scipy interpolation + RegularGridInterpolator. - def _construct_spline_n_m(self, sample_points, data_matrix, k): - r"""Construct the matrix of interpolators. + Args: + sample_points (np.ndarray): Sample points of the fdatagrid. + data_matrix (np.ndarray): Data matrix of the fdatagrid. + k (integer): Order of the spline interpolations. - Constructs the matrix of interpolators for objects with domain - dimension > 2. Calling internally during the creationg of the - evaluator. + Returns: + (np.ndarray): Array of size n_samples x dim_codomain with the + corresponding interpolation of the sample i, and image dimension j + in the entry (i,j) of the array. - Only linear and nearest interpolators are available for objects with - domain dimension >= 3. Uses internally the scipy interpolator - RegularGridInterpolator. + Raises: + ValueError: If the value of the interpolation k is not valid. - Args: - sample_points (np.ndarray): Sample points of the fdatagrid. - data_matrix (np.ndarray): Data matrix of the fdatagrid. - k (integer): Order of the spline interpolators. + """ - Returns: - (np.ndarray): Array of size n_samples x dim_codomain with the - corresponding interpolator of the sample i, and image dimension j - in the entry (i,j) of the array. + def __init__(self, fdatagrid, + interpolation_order=1, + smoothness_parameter=0.): - Raises: - ValueError: If the value of the interpolation k is not valid. + super().__init__( + fdatagrid=fdatagrid, + interpolation_order=interpolation_order, + smoothness_parameter=smoothness_parameter) + + if self.smoothness_parameter != 0: + raise ValueError("Smoothing interpolation is only supported with " + "domain dimension up to 2, s should be 0.") - """ # Parses method of interpolation - if k == 0: + if self.interpolation_order == 0: method = 'nearest' - elif k == 1: + elif self.interpolation_order == 1: method = 'linear' else: raise ValueError("interpolation order should be 0 (nearest) or 1 " "(linear).") - def _process_derivative_n_m(derivative): - if derivative != 0: - raise ValueError("derivates not suported for functional data " - " with domain dimension greater than 2.") + self.splines = np.empty( + (fdatagrid.n_samples, fdatagrid.dim_codomain), dtype=object) - return derivative + for i in range(fdatagrid.n_samples): + for j in range(fdatagrid.dim_codomain): + self.splines[i, j] = RegularGridInterpolator( + fdatagrid.sample_points, fdatagrid.data_matrix[i, ..., j], + method, False) - def _spline_evaluator_n_m(spl, t, derivative): + def _evaluate_one(self, spl, t, derivative=0): - return spl(t) + if derivative != 0: + raise ValueError("derivates not suported for functional data " + " with domain dimension greater than 2.") - # Method to process derivative argument - self._process_derivative = _process_derivative_n_m + return spl(t) - # Evaluator of splines called in evaluate - self._spline_evaluator = _spline_evaluator_n_m - spline = np.empty((self._n_samples, self._dim_codomain), dtype=object) +class SplineInterpolation(Evaluator): + r"""Spline interpolation of :class:`FDataGrid`. - for i in range(self._n_samples): - for j in range(self._dim_codomain): - spline[i, j] = RegularGridInterpolator( - sample_points, data_matrix[i, ..., j], method, False) + Spline interpolation of discretized functional objects. Implements + different interpolation methods based in splines, using the sample + points of the grid as nodes to interpolate. - return spline + See the interpolation example to a detailled explanation. - def evaluate(self, eval_points, *, derivative=0): - r"""Evaluation method. + Attributes: + interpolation_order (int, optional): Order of the interpolation, 1 + for linear interpolation, 2 for cuadratic, 3 for cubic and so + on. In case of curves and surfaces there is available + interpolation up to degree 5. For higher dimensional objects + only linear or nearest interpolation is available. Default + lineal interpolation. + smoothness_parameter (float, optional): Penalisation to perform + smoothness interpolation. Option only available for curves and + surfaces. If 0 the residuals of the interpolation will be 0. + Defaults 0. + monotone (boolean, optional): Performs monotone interpolation in + curves using a PCHIP interpolator. Only valid for curves (domain + dimension equal to 1) and interpolation order equal to 1 or 3. + Defaults false. - Evaluates the samples at different evaluation points. The evaluation - call will receive a 3-d array with the evaluation points for - each sample. + """ - This method is called internally by :meth:`evaluate` when the argument - `aligned_evaluation` is False. + def __init__(self, interpolation_order=1, *, smoothness_parameter=0., + monotone=False): + r"""Constructor of the SplineInterpolation. Args: - eval_points (np.ndarray): Numpy array with shape - `(n_samples, number_eval_points, dim_domain)` with the - evaluation points for each sample. - derivative (int, optional): Order of the derivative. Defaults to 0. - - Returns: - (np.darray): Numpy 3d array with shape `(n_samples, - number_eval_points, dim_codomain)` with the result of the - evaluation. The entry (i,j,k) will contain the value k-th image - dimension of the i-th sample, at the j-th evaluation point. - - Raises: - ValueError: In case of an incorrect value of the derivative - argument. + interpolation_order (int, optional): Order of the interpolation, 1 + for linear interpolation, 2 for cuadratic, 3 for cubic and so + on. In case of curves and surfaces there is available + interpolation up to degree 5. For higher dimensional objects + only linear or nearest interpolation is available. Default + lineal interpolation. + smoothness_parameter (float, optional): Penalisation to perform + smoothness interpolation. Option only available for curves and + surfaces. If 0 the residuals of the interpolation will be 0. + Defaults 0. + monotone (boolean, optional): Performs monotone interpolation in + curves using a PCHIP interpolation. Only valid for curves + (domain dimension equal to 1) and interpolation order equal + to 1 or 3. + Defaults false. """ - derivative = self._process_derivative(derivative) - - # Constructs the evaluator for t_eval - if self._dim_codomain == 1: - def evaluator(spl): - """Evaluator of object with image dimension equal to 1.""" - return self._spline_evaluator(spl[0], eval_points, derivative) - else: - def evaluator(spl_m): - """Evaluator of multimensional object""" - return np.dstack( - [self._spline_evaluator(spl, eval_points, derivative) - for spl in spl_m]).flatten() - - # Points evaluated inside the domain - res = np.apply_along_axis(evaluator, 1, self._splines) - res = res.reshape(self._n_samples, eval_points.shape[0], - self._dim_codomain) - - return res + self._interpolation_order = interpolation_order + self._smoothness_parameter = smoothness_parameter + self._monotone = monotone - def evaluate_composed(self, eval_points, *, derivative=0): - """Evaluation method. + @property + def interpolation_order(self): + "Returns the interpolation order" + return self._interpolation_order - Evaluates the samples at different evaluation points. The evaluation - call will receive a 3-d array with the evaluation points for - each sample. + @property + def smoothness_parameter(self): + "Returns the smoothness parameter" + return self._smoothness_parameter - This method is called internally by :meth:`evaluate` when the argument - `aligned_evaluation` is False. + @property + def monotone(self): + "Returns flag to perform monotone interpolation" + return self._monotone - Args: - eval_points (np.ndarray): Numpy array with shape - `(n_samples, number_eval_points, dim_domain)` with the - evaluation points for each sample. - derivative (int, optional): Order of the derivative. Defaults to 0. + def _build_interpolator(self, fdatagrid): - Returns: - (np.darray): Numpy 3d array with shape `(n_samples, - number_eval_points, dim_codomain)` with the result of the - evaluation. The entry (i,j,k) will contain the value k-th image - dimension of the i-th sample, at the j-th evaluation point. + if fdatagrid.dim_domain == 1: + return _SplineList1D( + fdatagrid=fdatagrid, + interpolation_order=self.interpolation_order, + smoothness_parameter=self.smoothness_parameter, + monotone=self.monotone) - Raises: - ValueError: In case of an incorrect value of the derivative - argument. + elif self.monotone: + raise ValueError("Monotone interpolation is only supported with " + "domain dimension equal to 1.") - """ - shape = (self._n_samples, eval_points.shape[1], self._dim_codomain) - res = np.empty(shape) + elif fdatagrid.dim_domain == 2: + return _SplineList2D( + fdatagrid=fdatagrid, + interpolation_order=self.interpolation_order, + smoothness_parameter=self.smoothness_parameter) - derivative = self._process_derivative(derivative) + else: + return _SplineListND( + fdatagrid=fdatagrid, + interpolation_order=self.interpolation_order, + smoothness_parameter=self.smoothness_parameter) - if self._dim_codomain == 1: - def evaluator(t, spl): - """Evaluator of sample with image dimension equal to 1""" - return self._spline_evaluator(spl[0], t, derivative) + def evaluate(self, fdata, eval_points, *, aligned=True): - for i in range(self._n_samples): - res[i] = evaluator(eval_points[i], self._splines[i]).reshape( - (eval_points.shape[1], self._dim_codomain)) + spline_list = self._build_interpolator(fdata) - else: - def evaluator(t, spl_m): - """Evaluator of multidimensional sample""" - return np.array([self._spline_evaluator(spl, t, derivative) - for spl in spl_m]).T + return spline_list.evaluate(fdata, eval_points, aligned=aligned) - for i in range(self._n_samples): - res[i] = evaluator(eval_points[i], self._splines[i]) + def __repr__(self): + """repr method of the interpolation""" + return (f"{type(self).__name__}(" + f"interpolation_order={self.interpolation_order}, " + f"smoothness_parameter={self.smoothness_parameter}, " + f"monotone={self.monotone})") - return res + def __eq__(self, other): + """Equality operator between SplineInterpolation""" + return (super().__eq__(other) and + self.interpolation_order == other.interpolation_order and + self.smoothness_parameter == other.smoothness_parameter and + self.monotone == other.monotone) diff --git a/tests/test_basis.py b/tests/test_basis.py index 28cef06c5..da2531eaf 100644 --- a/tests/test_basis.py +++ b/tests/test_basis.py @@ -1,8 +1,12 @@ +from skfda import concatenate +import skfda +from skfda.misc import inner_product, inner_product_matrix +from skfda.representation.basis import (Basis, FDataBasis, Constant, Monomial, + BSpline, Fourier) +from skfda.representation.grid import FDataGrid import unittest import numpy as np -from skfda.representation.basis import (Basis, FDataBasis, Constant, Monomial, - BSpline, Fourier) class TestBasis(unittest.TestCase): @@ -29,46 +33,6 @@ def test_from_data_qr(self): np.array([[1., 2.78, -3., -0.78, 1.]]) ) - def test_bspline_penalty_special_case(self): - basis = BSpline(n_basis=5) - np.testing.assert_array_almost_equal( - basis.penalty(basis.order - 1), - np.array([[1152., -2016., 1152., -288., 0.], - [-2016., 3600., -2304., 1008., -288.], - [1152., -2304., 2304., -2304., 1152.], - [-288., 1008., -2304., 3600., -2016.], - [0., -288., 1152., -2016., 1152.]])) - - def test_fourier_penalty(self): - basis = Fourier(n_basis=5) - np.testing.assert_array_almost_equal( - basis.penalty(2).round(2), - np.array([[0., 0., 0., 0., 0.], - [0., 1558.55, 0., 0., 0.], - [0., 0., 1558.55, 0., 0.], - [0., 0., 0., 24936.73, 0.], - [0., 0., 0., 0., 24936.73]])) - - def test_bspline_penalty(self): - basis = BSpline(n_basis=5) - np.testing.assert_array_almost_equal( - basis.penalty(2).round(2), - np.array([[96., -132., 24., 12., 0.], - [-132., 192., -48., -24., 12.], - [24., -48., 48., -48., 24.], - [12., -24., -48., 192., -132.], - [0., 12., 24., -132., 96.]])) - - def test_bspline_penalty_numerical(self): - basis = BSpline(n_basis=5) - np.testing.assert_array_almost_equal( - basis.penalty(coefficients=[0, 0, 1]).round(2), - np.array([[96., -132., 24., 12., 0.], - [-132., 192., -48., -24., 12.], - [24., -48., 48., -48., 24.], - [12., -24., -48., 192., -132.], - [0., 12., 24., -132., 96.]])) - def test_basis_product_generic(self): monomial = Monomial(n_basis=5) fourier = Fourier(n_basis=3) @@ -115,49 +79,86 @@ def test_basis_bspline_product(self): self.assertEqual(bspline.basis_of_product(bspline2), prod) def test_basis_inner_matrix(self): - np.testing.assert_array_almost_equal(Monomial(n_basis=3)._inner_matrix(), - [[1, 1 / 2, 1 / 3], [1 / 2, 1 / 3, 1 / 4], [1 / 3, 1 / 4, 1 / 5]]) + np.testing.assert_array_almost_equal( + Monomial(n_basis=3).inner_product_matrix(), + [[1, 1 / 2, 1 / 3], [1 / 2, 1 / 3, 1 / 4], [1 / 3, 1 / 4, 1 / 5]]) - np.testing.assert_array_almost_equal(Monomial(n_basis=3)._inner_matrix(Monomial(n_basis=3)), - [[1, 1 / 2, 1 / 3], [1 / 2, 1 / 3, 1 / 4], [1 / 3, 1 / 4, 1 / 5]]) + np.testing.assert_array_almost_equal( + Monomial(n_basis=3).inner_product_matrix(Monomial(n_basis=3)), + [[1, 1 / 2, 1 / 3], [1 / 2, 1 / 3, 1 / 4], [1 / 3, 1 / 4, 1 / 5]]) - np.testing.assert_array_almost_equal(Monomial(n_basis=3)._inner_matrix(Monomial(n_basis=4)), - [[1, 1 / 2, 1 / 3, 1 / 4], [1 / 2, 1 / 3, 1 / 4, 1 / 5], [1 / 3, 1 / 4, 1 / 5, 1 / 6]]) + np.testing.assert_array_almost_equal( + Monomial(n_basis=3).inner_product_matrix(Monomial(n_basis=4)), + [[1, 1 / 2, 1 / 3, 1 / 4], + [1 / 2, 1 / 3, 1 / 4, 1 / 5], + [1 / 3, 1 / 4, 1 / 5, 1 / 6]]) # TODO testing with other basis - def test_basis_gram_matrix(self): - np.testing.assert_array_almost_equal(Monomial(n_basis=3).gram_matrix(), - [[1, 1 / 2, 1 / 3], [1 / 2, 1 / 3, 1 / 4], [1 / 3, 1 / 4, 1 / 5]]) - np.testing.assert_almost_equal(Fourier(n_basis=3).gram_matrix(), - np.identity(3)) - np.testing.assert_almost_equal(BSpline(n_basis=6).gram_matrix().round(4), - np.array([[4.760e-02, 2.920e-02, 6.200e-03, 4.000e-04, 0.000e+00, 0.000e+00], - [2.920e-02, 7.380e-02, 5.210e-02, - 1.150e-02, 1.000e-04, 0.000e+00], - [6.200e-03, 5.210e-02, 1.090e-01, - 7.100e-02, 1.150e-02, 4.000e-04], - [4.000e-04, 1.150e-02, 7.100e-02, - 1.090e-01, 5.210e-02, 6.200e-03], - [0.000e+00, 1.000e-04, 1.150e-02, - 5.210e-02, 7.380e-02, 2.920e-02], - [0.000e+00, 0.000e+00, 4.000e-04, 6.200e-03, 2.920e-02, 4.760e-02]])) + def test_basis_gram_matrix_monomial(self): + + basis = Monomial(n_basis=3) + gram_matrix = basis.gram_matrix() + gram_matrix_numerical = basis._gram_matrix_numerical() + gram_matrix_res = np.array([[1, 1 / 2, 1 / 3], + [1 / 2, 1 / 3, 1 / 4], + [1 / 3, 1 / 4, 1 / 5]]) + + np.testing.assert_allclose( + gram_matrix, gram_matrix_res) + np.testing.assert_allclose( + gram_matrix_numerical, gram_matrix_res) + + def test_basis_gram_matrix_fourier(self): + + basis = Fourier(n_basis=3) + gram_matrix = basis.gram_matrix() + gram_matrix_numerical = basis._gram_matrix_numerical() + gram_matrix_res = np.identity(3) + + np.testing.assert_allclose( + gram_matrix, gram_matrix_res) + np.testing.assert_allclose( + gram_matrix_numerical, gram_matrix_res, atol=1e-15, rtol=1e-15) + + def test_basis_gram_matrix_bspline(self): + + basis = BSpline(n_basis=6) + gram_matrix = basis.gram_matrix() + gram_matrix_numerical = basis._gram_matrix_numerical() + gram_matrix_res = np.array( + [[0.04761905, 0.02916667, 0.00615079, + 0.00039683, 0., 0.], + [0.02916667, 0.07380952, 0.05208333, + 0.01145833, 0.00014881, 0.], + [0.00615079, 0.05208333, 0.10892857, 0.07098214, + 0.01145833, 0.00039683], + [0.00039683, 0.01145833, 0.07098214, 0.10892857, + 0.05208333, 0.00615079], + [0., 0.00014881, 0.01145833, 0.05208333, + 0.07380952, 0.02916667], + [0., 0., 0.00039683, 0.00615079, + 0.02916667, 0.04761905]]) + + np.testing.assert_allclose( + gram_matrix, gram_matrix_res, rtol=1e-4) + np.testing.assert_allclose( + gram_matrix_numerical, gram_matrix_res, rtol=1e-4) def test_basis_basis_inprod(self): monomial = Monomial(n_basis=4) bspline = BSpline(n_basis=5, order=4) - np.testing.assert_array_almost_equal( - monomial.inner_product(bspline).round(3), + np.testing.assert_allclose( + monomial.inner_product_matrix(bspline), np.array( [[0.12499983, 0.25000035, 0.24999965, 0.25000035, 0.12499983], [0.01249991, 0.07500017, 0.12499983, 0.17500017, 0.11249991], [0.00208338, 0.02916658, 0.07083342, 0.12916658, 0.10208338], - [0.00044654, 0.01339264, 0.04375022, 0.09910693, 0.09330368]]) - .round(3) - ) + [0.00044654, 0.01339264, 0.04375022, 0.09910693, 0.09330368] + ]), rtol=1e-3) np.testing.assert_array_almost_equal( - monomial.inner_product(bspline), - bspline.inner_product(monomial).T + monomial.inner_product_matrix(bspline), + bspline.inner_product_matrix(monomial).T ) def test_basis_fdatabasis_inprod(self): @@ -165,13 +166,12 @@ def test_basis_fdatabasis_inprod(self): bspline = BSpline(n_basis=5, order=3) bsplinefd = FDataBasis(bspline, np.arange(0, 15).reshape(3, 5)) - np.testing.assert_array_almost_equal( - monomial.inner_product(bsplinefd).round(3), + np.testing.assert_allclose( + inner_product_matrix(monomial, bsplinefd), np.array([[2., 7., 12.], [1.29626206, 3.79626206, 6.29626206], [0.96292873, 2.62959539, 4.29626206], - [0.7682873, 2.0182873, 3.2682873]]).round(3) - ) + [0.7682873, 2.0182873, 3.2682873]]), rtol=1e-4) def test_fdatabasis_fdatabasis_inprod(self): monomial = Monomial(n_basis=4) @@ -183,33 +183,23 @@ def test_fdatabasis_fdatabasis_inprod(self): bspline = BSpline(n_basis=5, order=3) bsplinefd = FDataBasis(bspline, np.arange(0, 15).reshape(3, 5)) - np.testing.assert_array_almost_equal( - monomialfd.inner_product(bsplinefd).round(3), - np.array([[16.14797697, 52.81464364, 89.4813103], - [11.55565285, 38.22211951, 64.88878618], - [18.14698361, 55.64698361, 93.14698361], - [15.2495976, 48.9995976, 82.7495976], - [19.70392982, 63.03676315, 106.37009648]]).round(3) - ) - - np.testing.assert_array_almost_equal( - monomialfd._inner_product_integrate( - bsplinefd, None, None).round(3), + np.testing.assert_allclose( + inner_product_matrix(monomialfd, bsplinefd), np.array([[16.14797697, 52.81464364, 89.4813103], [11.55565285, 38.22211951, 64.88878618], [18.14698361, 55.64698361, 93.14698361], [15.2495976, 48.9995976, 82.7495976], - [19.70392982, 63.03676315, 106.37009648]]).round(3) - ) + [19.70392982, 63.03676315, 106.37009648]]), + rtol=1e-4) def test_comutativity_inprod(self): monomial = Monomial(n_basis=4) bspline = BSpline(n_basis=5, order=3) bsplinefd = FDataBasis(bspline, np.arange(0, 15).reshape(3, 5)) - np.testing.assert_array_almost_equal( - bsplinefd.inner_product(monomial).round(3), - np.transpose(monomial.inner_product(bsplinefd).round(3)) + np.testing.assert_allclose( + inner_product_matrix(bsplinefd, monomial), + np.transpose(inner_product_matrix(monomial, bsplinefd)) ) def test_fdatabasis_times_fdatabasis_fdatabasis(self): @@ -268,9 +258,9 @@ def test_fdatabasis__add__(self): FDataBasis(Monomial(n_basis=3), [[2, 2, 3], [5, 4, 5]])) - np.testing.assert_raises(NotImplementedError, monomial2.__add__, - FDataBasis(Fourier(n_basis=3), - [[2, 2, 3], [5, 4, 5]])) + with np.testing.assert_raises(TypeError): + monomial2 + FDataBasis(Fourier(n_basis=3), + [[2, 2, 3], [5, 4, 5]]) def test_fdatabasis__sub__(self): monomial1 = FDataBasis(Monomial(n_basis=3), [1, 2, 3]) @@ -292,9 +282,9 @@ def test_fdatabasis__sub__(self): FDataBasis(Monomial(n_basis=3), [[0, -2, -3], [-1, -4, -5]])) - np.testing.assert_raises(NotImplementedError, monomial2.__sub__, - FDataBasis(Fourier(n_basis=3), - [[2, 2, 3], [5, 4, 5]])) + with np.testing.assert_raises(TypeError): + monomial2 - FDataBasis(Fourier(n_basis=3), + [[2, 2, 3], [5, 4, 5]]) def test_fdatabasis__mul__(self): monomial1 = FDataBasis(Monomial(n_basis=3), [1, 2, 3]) @@ -316,13 +306,14 @@ def test_fdatabasis__mul__(self): FDataBasis(Monomial(n_basis=3), [[1, 2, 3], [6, 8, 10]])) - np.testing.assert_raises(NotImplementedError, monomial2.__mul__, - FDataBasis(Fourier(n_basis=3), - [[2, 2, 3], [5, 4, 5]])) - np.testing.assert_raises(NotImplementedError, monomial2.__mul__, - monomial2) + with np.testing.assert_raises(TypeError): + monomial2 * FDataBasis(Fourier(n_basis=3), + [[2, 2, 3], [5, 4, 5]]) - def test_fdatabasis__mul__(self): + with np.testing.assert_raises(TypeError): + monomial2 * monomial2 + + def test_fdatabasis__mul__2(self): monomial1 = FDataBasis(Monomial(n_basis=3), [1, 2, 3]) monomial2 = FDataBasis(Monomial(n_basis=3), [[1, 2, 3], [3, 4, 5]]) @@ -472,6 +463,43 @@ def test_fdatabasis_derivative_bspline(self): [-120, -18, -60], [-48, 0, 48]]) + def test_concatenate(self): + sample1 = np.arange(0, 10) + sample2 = np.arange(10, 20) + fd1 = FDataGrid([sample1]).to_basis(Fourier(n_basis=5)) + fd2 = FDataGrid([sample2]).to_basis(Fourier(n_basis=5)) + + fd = concatenate([fd1, fd2]) + + np.testing.assert_equal(fd.n_samples, 2) + np.testing.assert_equal(fd.dim_codomain, 1) + np.testing.assert_equal(fd.dim_domain, 1) + np.testing.assert_array_equal(fd.coefficients, np.concatenate( + [fd1.coefficients, fd2.coefficients])) + + def test_vector_valued(self): + X, y = skfda.datasets.fetch_weather(return_X_y=True) + + basis_dim = skfda.representation.basis.Fourier( + n_basis=7, domain_range=X.domain_range) + basis = skfda.representation.basis.VectorValued( + [basis_dim] * 2 + ) + + X_basis = X.to_basis(basis) + + self.assertEqual(X_basis.dim_codomain, 2) + + self.assertEqual(X_basis.coordinates[0].basis, basis_dim) + np.testing.assert_allclose( + X_basis.coordinates[0].coefficients, + X.coordinates[0].to_basis(basis_dim).coefficients) + + self.assertEqual(X_basis.coordinates[1].basis, basis_dim) + np.testing.assert_allclose( + X_basis.coordinates[1].coefficients, + X.coordinates[1].to_basis(basis_dim).coefficients) + if __name__ == '__main__': print() diff --git a/tests/test_basis_evaluation.py b/tests/test_basis_evaluation.py index 05a95edf5..0ff5727a0 100644 --- a/tests/test_basis_evaluation.py +++ b/tests/test_basis_evaluation.py @@ -1,30 +1,32 @@ +from skfda.representation.basis import ( + FDataBasis, Monomial, BSpline, Fourier, Constant, VectorValued, Tensor) import unittest import numpy as np -from skfda.representation.basis import FDataBasis, Monomial, BSpline, Fourier class TestBasisEvaluationFourier(unittest.TestCase): def test_evaluation_simple_fourier(self): """Test the evaluation of FDataBasis""" - fourier = Fourier(domain_range=(0, 1), n_basis=3) + fourier = Fourier(domain_range=(0, 2), n_basis=5) - coefficients = np.array([[0.00078238, 0.48857741, 0.63971985], - [0.01778079, 0.73440271, 0.20148638]]) + coefficients = np.array([[1, 2, 3, 4, 5], + [6, 7, 8, 9, 10]]) f = FDataBasis(fourier, coefficients) - t = np.linspace(0, 1, 4) + t = np.linspace(0, 2, 11) - res = np.array([0.905482867989282, 0.146814813180645, -1.04995054116993, - 0.905482867989282, 0.302725561229459, - 0.774764356993855, -1.02414754822331, 0.302725561229459] - ).reshape((2, 4)).round(3) + # Results in R package fda + res = np.array([[8.71, 9.66, 1.84, -4.71, -2.80, 2.71, + 2.45, -3.82, -6.66, -0.30, 8.71], + [22.24, 26.48, 10.57, -4.95, -3.58, 6.24, + 5.31, -7.69, -13.32, 1.13, 22.24]])[..., np.newaxis] - np.testing.assert_array_almost_equal(f(t).round(3), res) - np.testing.assert_array_almost_equal(f.evaluate(t).round(3), res) + np.testing.assert_array_almost_equal(f(t).round(2), res) + np.testing.assert_array_almost_equal(f.evaluate(t).round(2), res) def test_evaluation_point_fourier(self): """Test the evaluation of a single point FDataBasis""" @@ -37,7 +39,7 @@ def test_evaluation_point_fourier(self): # Test different ways of call f with a point res = np.array([-0.903918107989282, -0.267163981229459] - ).reshape((2, 1)).round(4) + ).reshape((2, 1, 1)).round(4) np.testing.assert_array_almost_equal(f([0.5]).round(4), res) np.testing.assert_array_almost_equal(f((0.5,)).round(4), res) @@ -61,10 +63,11 @@ def test_evaluation_derivative_fourier(self): res = np.array([4.34138447771721, -7.09352774867064, 2.75214327095343, 4.34138447771721, 6.52573053999253, -4.81336320468984, -1.7123673353027, 6.52573053999253] - ).reshape((2, 4)).round(3) + ).reshape((2, 4, 1)).round(3) + f_deriv = f.derivative() np.testing.assert_array_almost_equal( - f(t, derivative=1).round(3), res + f_deriv(t).round(3), res ) def test_evaluation_grid_fourier(self): @@ -103,149 +106,21 @@ def test_evaluation_composed_fourier(self): f = FDataBasis(fourier, coefficients) t = np.linspace(0, 1, 4) - res_test = f(t) - # Test same result than evaluation standart - np.testing.assert_array_almost_equal(f([1]), f([[1], [1]], - aligned_evaluation=False)) + np.testing.assert_array_almost_equal(f([1]), + f([[1], [1]], + aligned=False)) np.testing.assert_array_almost_equal(f(t), f(np.vstack((t, t)), - aligned_evaluation=False)) + aligned=False)) # Different evaluation times t_multiple = [[0, 0.5], [0.2, 0.7]] np.testing.assert_array_almost_equal(f(t_multiple[0])[0], f(t_multiple, - aligned_evaluation=False)[0]) + aligned=False)[0]) np.testing.assert_array_almost_equal(f(t_multiple[1])[1], f(t_multiple, - aligned_evaluation=False)[1]) - - def test_evaluation_keepdims_fourier(self): - """Test behaviour of keepdims """ - fourier = Fourier(domain_range=(0, 1), n_basis=3) - - coefficients = np.array([[0.00078238, 0.48857741, 0.63971985], - [0.01778079, 0.73440271, 0.20148638]]) - - f = FDataBasis(fourier, coefficients) - f_keepdims = FDataBasis(fourier, coefficients, keepdims=True) - - np.testing.assert_equal(f.keepdims, False) - np.testing.assert_equal(f_keepdims.keepdims, True) - - t = np.linspace(0, 1, 4) - - res = np.array([0.905482867989282, 0.146814813180645, -1.04995054116993, - 0.905482867989282, 0.302725561229459, - 0.774764356993855, -1.02414754822331, 0.302725561229459] - ).reshape((2, 4)).round(3) - - res_keepdims = res.reshape((2, 4, 1)) - - # Case default behaviour keepdims=False - np.testing.assert_array_almost_equal(f(t).round(3), res) - np.testing.assert_array_almost_equal( - f(t, keepdims=False).round(3), res) - np.testing.assert_array_almost_equal(f(t, keepdims=True).round(3), - res_keepdims) - - # Case default behaviour keepdims=True - np.testing.assert_array_almost_equal( - f_keepdims(t).round(3), res_keepdims) - np.testing.assert_array_almost_equal(f_keepdims(t, keepdims=False - ).round(3), - res) - np.testing.assert_array_almost_equal(f_keepdims(t, keepdims=True - ).round(3), - res_keepdims) - - def test_evaluation_composed_keepdims_fourier(self): - """Test behaviour of keepdims with composed evaluation""" - fourier = Fourier(domain_range=(0, 1), n_basis=3) - - coefficients = np.array([[0.00078238, 0.48857741, 0.63971985], - [0.01778079, 0.73440271, 0.20148638]]) - - f = FDataBasis(fourier, coefficients) - f_keepdims = FDataBasis(fourier, coefficients, keepdims=True) - - t = [[0, 0.5, 0.6], [0.2, 0.7, 0.1]] - - res = np.array([[0.69173518, -0.69017042, -1.08997978], - [0.60972512, -0.57416354, 1.02551401]]).round(3) - - res = np.array([0.905482867989282, -0.903918107989282, - -1.13726755517372, 1.09360302608278, - -1.05804144608278, 0.85878105128844] - ).reshape((2, 3)).round(3) - - res_keepdims = res.reshape((2, 3, 1)) - - # Case default behaviour keepdims=False - np.testing.assert_array_almost_equal(f(t, aligned_evaluation=False - ).round(3), - res) - np.testing.assert_array_almost_equal(f(t, aligned_evaluation=False, - keepdims=False).round(3), res) - np.testing.assert_array_almost_equal(f(t, aligned_evaluation=False, - keepdims=True).round(3), - res_keepdims) - - # Case default behaviour keepdims=True - np.testing.assert_array_almost_equal(f_keepdims(t, - aligned_evaluation=False - ).round(3), - res_keepdims) - np.testing.assert_array_almost_equal( - f_keepdims(t, aligned_evaluation=False, keepdims=False).round(3), - res) - np.testing.assert_array_almost_equal( - f_keepdims(t, aligned_evaluation=False, keepdims=True).round(3), - res_keepdims) - - def test_evaluation_grid_keepdims_fourier(self): - """Test behaviour of keepdims with grid evaluation""" - - fourier = Fourier(domain_range=(0, 1), n_basis=3) - - coefficients = np.array([[0.00078238, 0.48857741, 0.63971985], - [0.01778079, 0.73440271, 0.20148638]]) - - f = FDataBasis(fourier, coefficients) - f_keepdims = FDataBasis(fourier, coefficients, keepdims=True) - - np.testing.assert_equal(f.keepdims, False) - np.testing.assert_equal(f_keepdims.keepdims, True) - - t = np.linspace(0, 1, 4) - - res = np.array([0.905482867989282, 0.146814813180645, -1.04995054116993, - 0.905482867989282, 0.302725561229459, - 0.774764356993855, -1.02414754822331, 0.302725561229459] - ).reshape((2, 4)).round(3) - - res_keepdims = res.reshape((2, 4, 1)) - - # Case default behaviour keepdims=False - np.testing.assert_array_almost_equal(f(t, grid=True).round(3), res) - np.testing.assert_array_almost_equal(f(t, grid=True, keepdims=False - ).round(3), - res) - - np.testing.assert_array_almost_equal(f(t, grid=True, keepdims=True - ).round(3), - res_keepdims) - - # Case default behaviour keepdims=True - np.testing.assert_array_almost_equal(f_keepdims(t, grid=True - ).round(3), - res_keepdims) - np.testing.assert_array_almost_equal(f_keepdims(t, grid=True, - keepdims=False - ).round(3), res) - np.testing.assert_array_almost_equal(f_keepdims(t, grid=True, - keepdims=True).round(3), - res_keepdims) + aligned=False)[1]) def test_domain_in_list_fourier(self): """Test the evaluation of FDataBasis""" @@ -262,7 +137,7 @@ def test_domain_in_list_fourier(self): t = np.linspace(0, 1, 4) res = np.array([0.905, 0.147, -1.05, 0.905, 0.303, - 0.775, -1.024, 0.303]).reshape((2, 4)) + 0.775, -1.024, 0.303]).reshape((2, 4, 1)) np.testing.assert_array_almost_equal(f(t).round(3), res) np.testing.assert_array_almost_equal(f.evaluate(t).round(3), res) @@ -272,20 +147,23 @@ class TestBasisEvaluationBSpline(unittest.TestCase): def test_evaluation_simple_bspline(self): """Test the evaluation of FDataBasis""" - bspline = BSpline(domain_range=(0, 1), n_basis=5, order=3) + bspline = BSpline(domain_range=(0, 2), n_basis=5) - coefficients = [[0.00078238, 0.48857741, 0.63971985, 0.23, 0.33], - [0.01778079, 0.73440271, 0.20148638, 0.54, 0.12]] + coefficients = np.array([[1, 2, 3, 4, 5], + [6, 7, 8, 9, 10]]) f = FDataBasis(bspline, coefficients) - t = np.linspace(0, 1, 4) + t = np.linspace(0, 2, 11) - res = np.array([[0.001, 0.564, 0.435, 0.33], - [0.018, 0.468, 0.371, 0.12]]) + # Results in R package fda + res = np.array([[1, 1.54, 1.99, 2.37, 2.7, 3, + 3.3, 3.63, 4.01, 4.46, 5], + [6, 6.54, 6.99, 7.37, 7.7, 8, + 8.3, 8.63, 9.01, 9.46, 10]])[..., np.newaxis] - np.testing.assert_array_almost_equal(f(t).round(3), res) - np.testing.assert_array_almost_equal(f.evaluate(t).round(3), res) + np.testing.assert_array_almost_equal(f(t).round(2), res) + np.testing.assert_array_almost_equal(f.evaluate(t).round(2), res) def test_evaluation_point_bspline(self): """Test the evaluation of a single point FDataBasis""" @@ -297,7 +175,7 @@ def test_evaluation_point_bspline(self): f = FDataBasis(bspline, coefficients) # Test different ways of call f with a point - res = np.array([[0.5696], [0.3104]]) + res = np.array([[0.5696], [0.3104]])[..., np.newaxis] np.testing.assert_array_almost_equal(f([0.5]).round(4), res) np.testing.assert_array_almost_equal(f((0.5,)).round(4), res) @@ -318,10 +196,11 @@ def test_evaluation_derivative_bspline(self): t = np.linspace(0, 1, 4) + f_deriv = f.derivative() np.testing.assert_array_almost_equal( - f(t, derivative=1).round(3), + f_deriv(t).round(3), np.array([[2.927, 0.453, -1.229, 0.6], - [4.3, -1.599, 1.016, -2.52]]) + [4.3, -1.599, 1.016, -2.52]])[..., np.newaxis] ) def test_evaluation_grid_bspline(self): @@ -360,139 +239,21 @@ def test_evaluation_composed_bspline(self): f = FDataBasis(bspline, coefficients) t = np.linspace(0, 1, 4) - res_test = f(t) - # Test same result than evaluation standart np.testing.assert_array_almost_equal(f([1]), f([[1], [1]], - aligned_evaluation=False)) + aligned=False)) np.testing.assert_array_almost_equal(f(t), f(np.vstack((t, t)), - aligned_evaluation=False)) + aligned=False)) # Different evaluation times t_multiple = [[0, 0.5], [0.2, 0.7]] np.testing.assert_array_almost_equal(f(t_multiple[0])[0], f(t_multiple, - aligned_evaluation=False)[0]) + aligned=False)[0]) np.testing.assert_array_almost_equal(f(t_multiple[1])[1], f(t_multiple, - aligned_evaluation=False)[1]) - - def test_evaluation_keepdims_bspline(self): - """Test behaviour of keepdims """ - bspline = BSpline(domain_range=(0, 1), n_basis=5, order=3) - - coefficients = [[0.00078238, 0.48857741, 0.63971985, 0.23, 0.33], - [0.01778079, 0.73440271, 0.20148638, 0.54, 0.12]] - - f = FDataBasis(bspline, coefficients) - f_keepdims = FDataBasis(bspline, coefficients, keepdims=True) - - np.testing.assert_equal(f.keepdims, False) - np.testing.assert_equal(f_keepdims.keepdims, True) - - t = np.linspace(0, 1, 4) - - res = np.array([[0.001, 0.564, 0.435, 0.33], - [0.018, 0.468, 0.371, 0.12]]) - - res_keepdims = res.reshape((2, 4, 1)) - - # Case default behaviour keepdims=False - np.testing.assert_array_almost_equal(f(t).round(3), res) - np.testing.assert_array_almost_equal( - f(t, keepdims=False).round(3), res) - np.testing.assert_array_almost_equal(f(t, keepdims=True).round(3), - res_keepdims) - - # Case default behaviour keepdims=True - np.testing.assert_array_almost_equal( - f_keepdims(t).round(3), res_keepdims) - np.testing.assert_array_almost_equal(f_keepdims(t, keepdims=False - ).round(3), - res) - np.testing.assert_array_almost_equal(f_keepdims(t, keepdims=True - ).round(3), - res_keepdims) - - def test_evaluation_composed_keepdims_bspline(self): - """Test behaviour of keepdims with composed evaluation""" - bspline = BSpline(domain_range=(0, 1), n_basis=5, order=3) - - coefficients = [[0.00078238, 0.48857741, 0.63971985, 0.23, 0.33], - [0.01778079, 0.73440271, 0.20148638, 0.54, 0.12]] - - f = FDataBasis(bspline, coefficients) - f_keepdims = FDataBasis(bspline, coefficients, keepdims=True) - - t = [[0, 0.5, 0.6], [0.2, 0.7, 0.1]] - - res = np.array([[0.001, 0.57, 0.506], - [0.524, 0.399, 0.359]]) - - res_keepdims = res.reshape((2, 3, 1)) - - # Case default behaviour keepdims=False - np.testing.assert_array_almost_equal(f(t, aligned_evaluation=False - ).round(3), - res) - np.testing.assert_array_almost_equal(f(t, aligned_evaluation=False, - keepdims=False).round(3), - res) - np.testing.assert_array_almost_equal(f(t, aligned_evaluation=False, - keepdims=True).round(3), - res_keepdims) - - # Case default behaviour keepdims=True - np.testing.assert_array_almost_equal(f_keepdims(t, - aligned_evaluation=False - ).round(3), - res_keepdims) - np.testing.assert_array_almost_equal( - f_keepdims(t, aligned_evaluation=False, keepdims=False).round(3), - res) - np.testing.assert_array_almost_equal( - f_keepdims(t, aligned_evaluation=False, keepdims=True).round(3), - res_keepdims) - - def test_evaluation_grid_keepdims_bspline(self): - """Test behaviour of keepdims with grid evaluation""" - - bspline = BSpline(domain_range=(0, 1), n_basis=5, order=3) - - coefficients = [[0.00078238, 0.48857741, 0.63971985, 0.23, 0.33], - [0.01778079, 0.73440271, 0.20148638, 0.54, 0.12]] - - f = FDataBasis(bspline, coefficients) - f_keepdims = FDataBasis(bspline, coefficients, keepdims=True) - - np.testing.assert_equal(f.keepdims, False) - np.testing.assert_equal(f_keepdims.keepdims, True) - - t = np.linspace(0, 1, 4) - - res = np.array([[0.001, 0.564, 0.435, 0.33], - [0.018, 0.468, 0.371, 0.12]]) - - res_keepdims = res.reshape((2, 4, 1)) - - # Case default behaviour keepdims=False - np.testing.assert_array_almost_equal(f(t, grid=True).round(3), res) - np.testing.assert_array_almost_equal( - f(t, grid=True, keepdims=False).round(3), res) - - np.testing.assert_array_almost_equal( - f(t, grid=True, keepdims=True).round(3), - res_keepdims) - - # Case default behaviour keepdims=True - np.testing.assert_array_almost_equal(f_keepdims(t, grid=True).round(3), - res_keepdims) - np.testing.assert_array_almost_equal( - f_keepdims(t, grid=True, keepdims=False).round(3), res) - np.testing.assert_array_almost_equal( - f_keepdims(t, grid=True, keepdims=True).round(3), - res_keepdims) + aligned=False)[1]) def test_domain_in_list_bspline(self): """Test the evaluation of FDataBasis""" @@ -513,7 +274,7 @@ def test_domain_in_list_bspline(self): t = np.linspace(0, 1, 4) res = np.array([[0.001, 0.564, 0.435, 0.33], - [0.018, 0.468, 0.371, 0.12]]) + [0.018, 0.468, 0.371, 0.12]])[..., np.newaxis] np.testing.assert_array_almost_equal(f(t).round(3), res) np.testing.assert_array_almost_equal(f.evaluate(t).round(3), res) @@ -528,19 +289,24 @@ class TestBasisEvaluationMonomial(unittest.TestCase): def test_evaluation_simple_monomial(self): """Test the evaluation of FDataBasis""" - monomial = Monomial(domain_range=(0, 1), n_basis=3) + monomial = Monomial(domain_range=(0, 2), n_basis=5) - coefficients = [[1, 2, 3], [0.5, 1.4, 1.3]] + coefficients = np.array([[1, 2, 3, 4, 5], + [6, 7, 8, 9, 10]]) f = FDataBasis(monomial, coefficients) - t = np.linspace(0, 1, 4) + t = np.linspace(0, 2, 11) - res = np.array([[1., 2., 3.667, 6.], - [0.5, 1.111, 2.011, 3.2]]) + # Results in R package fda + res = np.array( + [[1.00, 1.56, 2.66, 4.79, 8.62, 15.00, + 25.00, 39.86, 61.03, 90.14, 129.00], + [6.00, 7.81, 10.91, 16.32, 25.42, 40.00, + 62.21, 94.59, 140.08, 201.98, 284.00]])[..., np.newaxis] - np.testing.assert_array_almost_equal(f(t).round(3), res) - np.testing.assert_array_almost_equal(f.evaluate(t).round(3), res) + np.testing.assert_array_almost_equal(f(t).round(2), res) + np.testing.assert_array_almost_equal(f.evaluate(t).round(2), res) def test_evaluation_point_monomial(self): """Test the evaluation of a single point FDataBasis""" @@ -551,7 +317,7 @@ def test_evaluation_point_monomial(self): f = FDataBasis(monomial, coefficients) # Test different ways of call f with a point - res = np.array([[2.75], [1.525]]) + res = np.array([[2.75], [1.525]])[..., np.newaxis] np.testing.assert_array_almost_equal(f([0.5]).round(4), res) np.testing.assert_array_almost_equal(f((0.5,)).round(4), res) @@ -571,10 +337,11 @@ def test_evaluation_derivative_monomial(self): t = np.linspace(0, 1, 4) + f_deriv = f.derivative() np.testing.assert_array_almost_equal( - f(t, derivative=1).round(3), + f_deriv(t).round(3), np.array([[2., 4., 6., 8.], - [1.4, 2.267, 3.133, 4.]]) + [1.4, 2.267, 3.133, 4.]])[..., np.newaxis] ) def test_evaluation_grid_monomial(self): @@ -611,149 +378,103 @@ def test_evaluation_composed_monomial(self): f = FDataBasis(monomial, coefficients) t = np.linspace(0, 1, 4) - res_test = f(t) - # Test same result than evaluation standart - np.testing.assert_array_almost_equal(f([1]), f([[1], [1]], - aligned_evaluation=False)) + np.testing.assert_array_almost_equal(f([1]), + f([[1], [1]], + aligned=False)) np.testing.assert_array_almost_equal(f(t), f(np.vstack((t, t)), - aligned_evaluation=False)) + aligned=False)) # Different evaluation times t_multiple = [[0, 0.5], [0.2, 0.7]] np.testing.assert_array_almost_equal(f(t_multiple[0])[0], f(t_multiple, - aligned_evaluation=False)[0]) + aligned=False)[0]) np.testing.assert_array_almost_equal(f(t_multiple[1])[1], f(t_multiple, - aligned_evaluation=False)[1]) + aligned=False)[1]) - def test_evaluation_keepdims_monomial(self): - """Test behaviour of keepdims """ - monomial = Monomial(domain_range=(0, 1), n_basis=3) + def test_domain_in_list_monomial(self): + """Test the evaluation of FDataBasis""" - coefficients = [[1, 2, 3], [0.5, 1.4, 1.3]] + for monomial in (Monomial(domain_range=[(0, 1)], n_basis=3), + Monomial(domain_range=((0, 1),), n_basis=3), + Monomial(domain_range=np.array((0, 1)), n_basis=3), + Monomial(domain_range=np.array([(0, 1)]), n_basis=3)): - f = FDataBasis(monomial, coefficients) - f_keepdims = FDataBasis(monomial, coefficients, keepdims=True) + coefficients = [[1, 2, 3], [0.5, 1.4, 1.3]] - np.testing.assert_equal(f.keepdims, False) - np.testing.assert_equal(f_keepdims.keepdims, True) + f = FDataBasis(monomial, coefficients) - t = np.linspace(0, 1, 4) + t = np.linspace(0, 1, 4) - res = np.array([[1., 2., 3.667, 6.], - [0.5, 1.111, 2.011, 3.2]]) + res = np.array([[1., 2., 3.667, 6.], + [0.5, 1.111, 2.011, 3.2]])[..., np.newaxis] - res_keepdims = res.reshape((2, 4, 1)) + np.testing.assert_array_almost_equal(f(t).round(3), res) + np.testing.assert_array_almost_equal(f.evaluate(t).round(3), res) - # Case default behaviour keepdims=False - np.testing.assert_array_almost_equal(f(t).round(3), res) - np.testing.assert_array_almost_equal( - f(t, keepdims=False).round(3), res) - np.testing.assert_array_almost_equal(f(t, keepdims=True).round(3), - res_keepdims) - # Case default behaviour keepdims=True - np.testing.assert_array_almost_equal( - f_keepdims(t).round(3), res_keepdims) - np.testing.assert_array_almost_equal( - f_keepdims(t, keepdims=False).round(3), res) - np.testing.assert_array_almost_equal( - f_keepdims(t, keepdims=True).round(3), res_keepdims) +class TestBasisEvaluationVectorValued(unittest.TestCase): - def test_evaluation_composed_keepdims_monomial(self): - """Test behaviour of keepdims with composed evaluation""" - monomial = Monomial(domain_range=(0, 1), n_basis=3) + def test_vector_valued_constant(self): - coefficients = [[1, 2, 3], [0.5, 1.4, 1.3]] + basis_first = Constant() + basis_second = Constant() - f = FDataBasis(monomial, coefficients) - f_keepdims = FDataBasis(monomial, coefficients, keepdims=True) + basis = VectorValued([basis_first, basis_second]) - t = [[0, 0.5, 0.6], [0.2, 0.7, 0.1]] + fd = FDataBasis(basis=basis, coefficients=[[1, 2], [3, 4]]) - res = np.array([[1., 2.75, 3.28], - [0.832, 2.117, 0.653]]) + self.assertEqual(fd.dim_codomain, 2) - res_keepdims = res.reshape((2, 3, 1)) + res = np.array([[[1, 2]], [[3, 4]]]) - # Case default behaviour keepdims=False - np.testing.assert_array_almost_equal( - f(t, aligned_evaluation=False).round(3), res) - np.testing.assert_array_almost_equal(f(t, aligned_evaluation=False, - keepdims=False).round(3), res) - np.testing.assert_array_almost_equal(f(t, aligned_evaluation=False, - keepdims=True).round(3), - res_keepdims) - - # Case default behaviour keepdims=True - np.testing.assert_array_almost_equal( - f_keepdims(t, aligned_evaluation=False).round(3), - res_keepdims) - np.testing.assert_array_almost_equal( - f_keepdims(t, aligned_evaluation=False, keepdims=False).round(3), - res) - np.testing.assert_array_almost_equal( - f_keepdims(t, aligned_evaluation=False, keepdims=True).round(3), - res_keepdims) + np.testing.assert_allclose(fd(0), res) - def test_evaluation_grid_keepdims_monomial(self): - """Test behaviour of keepdims with grid evaluation""" + def test_vector_valued_constant_monomial(self): - monomial = Monomial(domain_range=(0, 1), n_basis=3) + basis_first = Constant(domain_range=(0, 5)) + basis_second = Monomial(n_basis=3, domain_range=(0, 5)) - coefficients = [[1, 2, 3], [0.5, 1.4, 1.3]] + basis = VectorValued([basis_first, basis_second]) - f = FDataBasis(monomial, coefficients) - f_keepdims = FDataBasis(monomial, coefficients, keepdims=True) + fd = FDataBasis(basis=basis, coefficients=[ + [1, 2, 3, 4], [3, 4, 5, 6]]) - np.testing.assert_equal(f.keepdims, False) - np.testing.assert_equal(f_keepdims.keepdims, True) + self.assertEqual(fd.dim_codomain, 2) - t = np.linspace(0, 1, 4) + np.testing.assert_allclose(fd.domain_range[0], (0, 5)) - res = np.array([[1., 2., 3.667, 6.], - [0.5, 1.111, 2.011, 3.2]]) + res = np.array([[[1, 2], [1, 9], [1, 24]], + [[3, 4], [3, 15], [3, 38]]]) - res_keepdims = res.reshape((2, 4, 1)) + np.testing.assert_allclose(fd([0, 1, 2]), res) - # Case default behaviour keepdims=False - np.testing.assert_array_almost_equal(f(t, grid=True).round(3), res) - np.testing.assert_array_almost_equal( - f(t, grid=True, keepdims=False).round(3), - res) - np.testing.assert_array_almost_equal( - f(t, grid=True, keepdims=True).round(3), res_keepdims) +class TestBasisEvaluationTensor(unittest.TestCase): - # Case default behaviour keepdims=True - np.testing.assert_array_almost_equal(f_keepdims(t, grid=True).round(3), - res_keepdims) - np.testing.assert_array_almost_equal( - f_keepdims(t, grid=True, keepdims=False).round(3), res) - np.testing.assert_array_almost_equal( - f_keepdims(t, grid=True, keepdims=True).round(3), res_keepdims) + def test_tensor_monomial_constant(self): - def test_domain_in_list_monomial(self): - """Test the evaluation of FDataBasis""" + basis = Tensor([Monomial(n_basis=2), Constant()]) - for monomial in (Monomial(domain_range=[(0, 1)], n_basis=3), - Monomial(domain_range=((0, 1),), n_basis=3), - Monomial(domain_range=np.array((0, 1)), n_basis=3), - Monomial(domain_range=np.array([(0, 1)]), n_basis=3)): + fd = FDataBasis(basis=basis, coefficients=[1, 1]) - coefficients = [[1, 2, 3], [0.5, 1.4, 1.3]] + self.assertEqual(fd.dim_domain, 2) + self.assertEqual(fd.dim_codomain, 1) - f = FDataBasis(monomial, coefficients) + np.testing.assert_allclose(fd([0., 0.]), [[[1.]]]) - t = np.linspace(0, 1, 4) + np.testing.assert_allclose(fd([0.5, 0.5]), [[[1.5]]]) - res = np.array([[1., 2., 3.667, 6.], - [0.5, 1.111, 2.011, 3.2]]) + np.testing.assert_allclose( + fd([(0., 0.), (0.5, 0.5)]), [[[1.0], [1.5]]]) - np.testing.assert_array_almost_equal(f(t).round(3), res) - np.testing.assert_array_almost_equal(f.evaluate(t).round(3), res) + fd_grid = fd.to_grid() + + fd2 = fd_grid.to_basis(basis) + + np.testing.assert_allclose(fd.coefficients, fd2.coefficients) if __name__ == '__main__': diff --git a/tests/test_clustering.py b/tests/test_clustering.py index 8de97af24..3bdc5bbbd 100644 --- a/tests/test_clustering.py +++ b/tests/test_clustering.py @@ -1,8 +1,8 @@ +from skfda.ml.clustering import KMeans, FuzzyCMeans +from skfda.representation.grid import FDataGrid import unittest -import numpy as np -from skfda.representation.grid import FDataGrid -from skfda.ml.clustering.base_kmeans import KMeans, FuzzyKMeans +import numpy as np class TestClustering(unittest.TestCase): @@ -70,28 +70,26 @@ def test_fuzzy_kmeans_univariate(self): [-0.5, -0.5, -0.5, -1, -1, -1]] sample_points = [0, 2, 4, 6, 8, 10] fd = FDataGrid(data_matrix, sample_points) - fuzzy_kmeans = FuzzyKMeans() + fuzzy_kmeans = FuzzyCMeans() fuzzy_kmeans.fit(fd) - np.testing.assert_array_equal(fuzzy_kmeans.predict(fd), + np.testing.assert_array_equal(fuzzy_kmeans.predict(fd).round(3), np.array([[0.965, 0.035], [0.94, 0.06], [0.227, 0.773], [0.049, 0.951]])) - np.testing.assert_allclose(fuzzy_kmeans.transform(fd), - np.array([[1.49228858, 7.87898791], - [1.29380155, 5.12696975], - [4.85542339, 2.63309793], - [7.77455633, 1.75920889]])) - centers = FDataGrid(data_matrix=np.array( - [[0.7065078, 0.7065078, 1.45508111, 2.46698825, - 1.98143302, 1.48206743], - [-0.69456401, -0.69456401, -0.49444239, -0.19713489, - -0.19872214, -0.39844583]]), sample_points=sample_points) - np.testing.assert_allclose(fuzzy_kmeans.cluster_centers_.data_matrix, - centers.data_matrix) + np.testing.assert_allclose(fuzzy_kmeans.transform(fd).round(3), + np.array([[1.492, 7.879], + [1.294, 5.127], + [4.856, 2.633], + [7.775, 1.759]])) + centers = np.array([[0.707, 0.707, 1.455, 2.467, 1.981, 1.482], + [-0.695, -0.695, -0.494, -0.197, -0.199, -0.398]]) + np.testing.assert_allclose( + fuzzy_kmeans.cluster_centers_.data_matrix[..., 0].round(3), + centers) np.testing.assert_allclose(fuzzy_kmeans.score(fd), - np.array([-13.928868250627902])) - np.testing.assert_array_equal(fuzzy_kmeans.n_iter_, np.array([18.])) + np.array([-12.025179])) + self.assertEqual(fuzzy_kmeans.n_iter_, 19) # def test_fuzzy_kmeans_multivariate(self): # data_matrix = [[[1, 0.3], [2, 0.4], [3, 0.5], [4, 0.6]], diff --git a/tests/test_covariances.py b/tests/test_covariances.py index 8eccd8066..a4e29024d 100644 --- a/tests/test_covariances.py +++ b/tests/test_covariances.py @@ -13,9 +13,8 @@ def setUp(self): def _test_compare_sklearn(self, cov: skfda.misc.covariances.Covariance): cov_sklearn = cov.to_sklearn() - cov_matrix = cov(self.x, self.x) - cov_sklearn_matrix = cov_sklearn(self.x, self.x) + cov_sklearn_matrix = cov_sklearn(self.x) np.testing.assert_array_almost_equal(cov_matrix, cov_sklearn_matrix) @@ -62,3 +61,10 @@ def test_exponential(self): cov = skfda.misc.covariances.Exponential( variance=variance, length_scale=length_scale) self._test_compare_sklearn(cov) + + def test_white_noise(self): + + for variance in [1, 2]: + with self.subTest(variance=variance): + cov = skfda.misc.covariances.WhiteNoise(variance=variance) + self._test_compare_sklearn(cov) diff --git a/tests/test_elastic.py b/tests/test_elastic.py index 5552aaf44..ea980d882 100644 --- a/tests/test_elastic.py +++ b/tests/test_elastic.py @@ -1,15 +1,17 @@ -import unittest - -import numpy as np - from skfda import FDataGrid -from skfda.datasets import make_multimodal_samples +from skfda.datasets import make_multimodal_samples, make_random_warping from skfda.misc.metrics import (fisher_rao_distance, amplitude_distance, phase_distance, pairwise_distance, lp_distance, warping_distance) -from skfda.preprocessing.registration import ( - elastic_registration, to_srsf, from_srsf, - elastic_registration_warping, invert_warping, normalize_warping) +from skfda.preprocessing.registration import (ElasticRegistration, + invert_warping, + normalize_warping) +from skfda.preprocessing.registration.elastic import (SRSF, elastic_mean, + warping_mean) +import unittest + +import numpy as np + metric = pairwise_distance(lp_distance) pairwise_fisher_rao = pairwise_distance(fisher_rao_distance) @@ -18,90 +20,175 @@ class TestElasticRegistration(unittest.TestCase): """Test elastic registration""" - def setUp(self): """Initialization of samples""" template = make_multimodal_samples(n_samples=1, std=0, random_state=1) self.template = template - self.template_rep = template.concatenate(template).concatenate(template) + self.template_rep = template.concatenate( + template).concatenate(template) self.unimodal_samples = make_multimodal_samples(n_samples=3, random_state=1) t = np.linspace(-3, 3, 9) self.dummy_sample = FDataGrid([np.sin(t)], t) - def test_to_srsf(self): """Test to srsf""" # Checks SRSF conversion - srsf = to_srsf(self.dummy_sample) - data_matrix = [[[-0.92155896], [-0.75559027], [ 0.25355399], - [ 0.81547327], [ 0.95333713], [ 0.81547327], - [ 0.25355399], [-0.75559027], [-0.92155896]]] + srsf = SRSF().fit_transform(self.dummy_sample) - np.testing.assert_almost_equal(data_matrix, srsf.data_matrix) + data_matrix = [[[-1.061897], [-0.75559027], [0.25355399], + [0.81547327], [0.95333713], [0.81547327], + [0.25355399], [-0.75559027], [-1.06189697]]] + np.testing.assert_almost_equal(data_matrix, srsf.data_matrix) def test_from_srsf(self): """Test from srsf""" # Checks SRSF conversion - srsf = from_srsf(self.dummy_sample) + srsf = SRSF(initial_value=0).inverse_transform(self.dummy_sample) - data_matrix = [[[ 0. ], [-0.23449228], [-0.83464009], + data_matrix = [[[0.], [-0.23449228], [-0.83464009], [-1.38200046], [-1.55623723], [-1.38200046], - [-0.83464009], [-0.23449228], [ 0. ]]] + [-0.83464009], [-0.23449228], [0.]]] np.testing.assert_almost_equal(data_matrix, srsf.data_matrix) + def test_from_srsf_with_output_points(self): + """Test from srsf""" + + # Checks SRSF conversion + srsf_transformer = SRSF( + initial_value=0, + output_points=self.dummy_sample.sample_points[0]) + srsf = srsf_transformer.inverse_transform(self.dummy_sample) + + data_matrix = [[[0.], [-0.23449228], [-0.83464009], + [-1.38200046], [-1.55623723], [-1.38200046], + [-0.83464009], [-0.23449228], [0.]]] + + np.testing.assert_almost_equal(data_matrix, srsf.data_matrix) def test_srsf_conversion(self): """Converts to srsf and pull backs""" - initial = self.unimodal_samples(-1) - converted = from_srsf(to_srsf(self.unimodal_samples), initial=initial) + + srsf = SRSF() + + converted = srsf.fit_transform(self.unimodal_samples) + converted = srsf.inverse_transform(converted) # Distances between original samples and s -> to_srsf -> from_srsf distances = np.diag(metric(converted, self.unimodal_samples)) np.testing.assert_allclose(distances, 0, atol=8e-3) - def test_template_alignment(self): """Test alignment to 1 template""" - register = elastic_registration(self.unimodal_samples, self.template) + reg = ElasticRegistration(template=self.template) + register = reg.fit_transform(self.unimodal_samples) distances = metric(self.template, register) np.testing.assert_allclose(distances, 0, atol=12e-3) def test_one_to_one_alignment(self): """Test alignment to 1 sample to a template""" - register = elastic_registration(self.unimodal_samples[0], self.template) + reg = ElasticRegistration(template=self.template) + register = reg.fit_transform(self.unimodal_samples[0]) distances = metric(self.template, register) np.testing.assert_allclose(distances, 0, atol=12e-3) - def test_set_alignment(self): """Test alignment 3 curves to set with 3 templates""" # Should give same result than test_template_alignment - register = elastic_registration(self.unimodal_samples, - self.template_rep) + reg = ElasticRegistration(template=self.template_rep) + register = reg.fit_transform(self.unimodal_samples) distances = metric(self.template, register) np.testing.assert_allclose(distances, 0, atol=12e-3) + def test_default_alignment(self): + """Test alignment by default""" + # Should give same result than test_template_alignment + reg = ElasticRegistration() + register = reg.fit_transform(self.unimodal_samples) + + values = register([-.25, -.1, 0, .1, .25]) + + expected = [[[0.599058], [0.997427], [0.772248], + [0.412342], [0.064725]], + [[0.626875], [0.997155], [0.791649], + [0.382181], [0.050098]], + [[0.620992], [0.997369], [0.785886], + [0.376556], [0.048804]]] + + np.testing.assert_allclose(values, expected, atol=1e-4) - def test_simetry_of_aligment(self): + def test_callable_alignment(self): + """Test alignment by default""" + # Should give same result than test_template_alignment + reg = ElasticRegistration(template=elastic_mean) + register = reg.fit_transform(self.unimodal_samples) + + values = register([-.25, -.1, 0, .1, .25]) + expected = [[[0.599058], [0.997427], [0.772248], + [0.412342], [0.064725]], + [[0.626875], [0.997155], [0.791649], + [0.382181], [0.050098]], + [[0.620992], [0.997369], [0.785886], + [0.376556], [0.048804]]] + + np.testing.assert_allclose(values, expected, atol=1e-4) + + def test_simmetry_of_aligment(self): """Check registration using inverse composition""" - warping = elastic_registration_warping(self.unimodal_samples, - self.template) + reg = ElasticRegistration(template=self.template) + reg.fit_transform(self.unimodal_samples) + warping = reg.warping_ inverse = invert_warping(warping) register = self.template_rep.compose(inverse) distances = np.diag(metric(self.unimodal_samples, register)) np.testing.assert_allclose(distances, 0, atol=12e-3) + def test_raises(self): + reg = ElasticRegistration() + + # X not in fit, but template is not an FDataGrid + with np.testing.assert_raises(ValueError): + reg.fit() + + # Inverse transform without previous transform + with np.testing.assert_raises(ValueError): + reg.inverse_transform(self.unimodal_samples) + + # Inverse transform with different number of samples than transform + reg.fit_transform(self.unimodal_samples) + with np.testing.assert_raises(ValueError): + reg.inverse_transform(self.unimodal_samples[0]) + + # FDataGrid as template with n != 1 and n!= n_samples to transform + reg = ElasticRegistration(template=self.unimodal_samples).fit() + with np.testing.assert_raises(ValueError): + reg.transform(self.unimodal_samples[0]) + + def test_score(self): + """Test score method of the transformer""" + reg = ElasticRegistration() + reg.fit(self.unimodal_samples) + score = reg.score(self.unimodal_samples) + np.testing.assert_almost_equal(score, 0.9994225) + + def test_warping_mean(self): + warping = make_random_warping(start=-1, random_state=0) + mean = warping_mean(warping) + values = mean([-1, -.5, 0, .5, 1]) + expected = [[[-1.], [-0.376241], [0.136193], [0.599291], [1.]]] + np.testing.assert_array_almost_equal(values, expected) + + class TestElasticDistances(unittest.TestCase): """Test elastic distances""" @@ -109,11 +196,11 @@ def test_fisher_rao(self): """Test fisher rao distance""" t = np.linspace(0, 1, 100) - sample = FDataGrid([t, 1-t], t) + sample = FDataGrid([t, 1 - t], t) f = np.square(sample) g = np.power(sample, 0.5) - distance = [[0.62825868, 1.98009242], [1.98009242, 0.62825868]] + distance = [[0.64, 1.984], [1.984, 0.64]] res = pairwise_fisher_rao(f, g) np.testing.assert_almost_equal(res, distance, decimal=3) @@ -121,7 +208,7 @@ def test_fisher_rao(self): def test_fisher_rao_invariance(self): """Test invariance of fisher rao metric: d(f,g)= d(foh, goh)""" - t = np.linspace(0, np.pi) + t = np.linspace(0, np.pi, 1000) id = FDataGrid([t], t) cos = np.cos(id) sin = np.sin(id) @@ -134,14 +221,14 @@ def test_fisher_rao_invariance(self): distance_warping = fisher_rao_distance(cos.compose(gamma), sin.compose(gamma)) distance_warping2 = fisher_rao_distance(cos.compose(gamma2), - sin.compose(gamma2)) + sin.compose(gamma2)) # The error ~0.001 due to the derivation - np.testing.assert_almost_equal(distance_original, distance_warping, - decimal=2) + np.testing.assert_allclose(distance_original, distance_warping, + atol=0.01) - np.testing.assert_almost_equal(distance_original, distance_warping2, - decimal=2) + np.testing.assert_allclose(distance_original, distance_warping2, + atol=0.01) def test_amplitude_distance_limit(self): """Test limit of amplitude distance penalty""" @@ -154,30 +241,27 @@ def test_amplitude_distance_limit(self): np.testing.assert_almost_equal(amplitude_limit, fr_distance) - def test_phase_distance_id(self): """Test of phase distance invariance""" f = make_multimodal_samples(n_samples=1, random_state=1) - phase = phase_distance(f, 2*f) + phase = phase_distance(f, 2 * f) np.testing.assert_allclose(phase, 0, atol=1e-7) def test_warping_distance(self): """Test of warping distance""" - t = np.linspace(0, 1) + t = np.linspace(0, 1, 1000) w1 = FDataGrid([t**5], t) w2 = FDataGrid([t**3], t) d = warping_distance(w1, w2) - np.testing.assert_allclose(d, np.arccos(np.sqrt(15)/4), atol=1e-3) + np.testing.assert_allclose(d, np.arccos(np.sqrt(15) / 4), atol=1e-3) d = warping_distance(w2, w2) np.testing.assert_allclose(d, 0, atol=2e-2) - - if __name__ == '__main__': print() unittest.main() diff --git a/tests/test_extrapolation.py b/tests/test_extrapolation.py index 993da57db..56281e702 100644 --- a/tests/test_extrapolation.py +++ b/tests/test_extrapolation.py @@ -1,14 +1,14 @@ """Test to check the extrapolation module""" -import unittest - -import numpy as np from skfda import FDataGrid, FDataBasis from skfda.datasets import make_sinusoidal_process from skfda.representation.basis import Fourier from skfda.representation.extrapolation import ( PeriodicExtrapolation, BoundaryExtrapolation, ExceptionExtrapolation, FillExtrapolation) +import unittest + +import numpy as np class TestBasis(unittest.TestCase): @@ -106,27 +106,31 @@ def test_periodic(self): self.grid.extrapolation = PeriodicExtrapolation() data = self.grid([-.5, 0, 1.5]).round(3) - np.testing.assert_almost_equal(data, [[-0.724, 0.976, -0.724], - [-1.086, 0.759, -1.086]]) + np.testing.assert_almost_equal(data[..., 0], + [[-0.724, 0.976, -0.724], + [-1.086, 0.759, -1.086]]) self.basis.extrapolation = "periodic" data = self.basis([-.5, 0, 1.5]).round(3) - np.testing.assert_almost_equal(data, [[-0.69, 0.692, -0.69], - [-1.021, 1.056, -1.021]]) + np.testing.assert_almost_equal(data[..., 0], + [[-0.69, 0.692, -0.69], + [-1.021, 1.056, -1.021]]) def test_boundary(self): self.grid.extrapolation = "bounds" data = self.grid([-.5, 0, 1.5]).round(3) - np.testing.assert_almost_equal(data, [[0.976, 0.976, 0.797], - [0.759, 0.759, 1.125]]) + np.testing.assert_almost_equal(data[..., 0], + [[0.976, 0.976, 0.797], + [0.759, 0.759, 1.125]]) self.basis.extrapolation = "bounds" data = self.basis([-.5, 0, 1.5]).round(3) - np.testing.assert_almost_equal(data, [[0.692, 0.692, 0.692], - [1.056, 1.056, 1.056]]) + np.testing.assert_almost_equal(data[..., 0], + [[0.692, 0.692, 0.692], + [1.056, 1.056, 1.056]]) def test_exception(self): self.grid.extrapolation = "exception" @@ -143,27 +147,31 @@ def test_zeros(self): self.grid.extrapolation = "zeros" data = self.grid([-.5, 0, 1.5]).round(3) - np.testing.assert_almost_equal(data, [[0., 0.976, 0.], - [0., 0.759, 0.]]) + np.testing.assert_almost_equal(data[..., 0], + [[0., 0.976, 0.], + [0., 0.759, 0.]]) self.basis.extrapolation = "zeros" data = self.basis([-.5, 0, 1.5]).round(3) - np.testing.assert_almost_equal(data, [[0, 0.692, 0], - [0, 1.056, 0]]) + np.testing.assert_almost_equal(data[..., 0], + [[0, 0.692, 0], + [0, 1.056, 0]]) def test_nan(self): self.grid.extrapolation = "nan" data = self.grid([-.5, 0, 1.5]).round(3) - np.testing.assert_almost_equal(data, [[np.nan, 0.976, np.nan], - [np.nan, 0.759, np.nan]]) + np.testing.assert_almost_equal(data[..., 0], + [[np.nan, 0.976, np.nan], + [np.nan, 0.759, np.nan]]) self.basis.extrapolation = "nan" data = self.basis([-.5, 0, 1.5]).round(3) - np.testing.assert_almost_equal(data, [[np.nan, 0.692, np.nan], - [np.nan, 1.056, np.nan]]) + np.testing.assert_almost_equal(data[..., 0], + [[np.nan, 0.692, np.nan], + [np.nan, 1.056, np.nan]]) if __name__ == '__main__': diff --git a/tests/test_fdatagrid_numpy.py b/tests/test_fdatagrid_numpy.py new file mode 100644 index 000000000..b1a3b13cb --- /dev/null +++ b/tests/test_fdatagrid_numpy.py @@ -0,0 +1,47 @@ +from skfda import FDataGrid +import unittest +import numpy as np + + +class TestFDataGridNumpy(unittest.TestCase): + + def test_monary_ufunc(self): + data_matrix = np.arange(15).reshape(3, 5) + + fd = FDataGrid(data_matrix) + + fd_sqrt = np.sqrt(fd) + + fd_sqrt_build = FDataGrid(np.sqrt(data_matrix)) + + self.assertEqual(fd_sqrt, fd_sqrt_build) + + def test_binary_ufunc(self): + data_matrix = np.arange(15).reshape(3, 5) + data_matrix2 = 2 * np.arange(15).reshape(3, 5) + + fd = FDataGrid(data_matrix) + fd2 = FDataGrid(data_matrix2) + + fd_mul = np.multiply(fd, fd2) + + fd_mul_build = FDataGrid(data_matrix * data_matrix2) + + self.assertEqual(fd_mul, fd_mul_build) + + def test_out_ufunc(self): + data_matrix = np.arange(15.).reshape(3, 5) + data_matrix_copy = np.copy(data_matrix) + + fd = FDataGrid(data_matrix) + + np.sqrt(fd, out=fd) + + fd_sqrt_build = FDataGrid(np.sqrt(data_matrix_copy)) + + self.assertEqual(fd, fd_sqrt_build) + + +if __name__ == '__main__': + print() + unittest.main() diff --git a/tests/test_fpca.py b/tests/test_fpca.py new file mode 100644 index 000000000..98f3c499f --- /dev/null +++ b/tests/test_fpca.py @@ -0,0 +1,417 @@ +from skfda import FDataGrid, FDataBasis +from skfda.datasets import fetch_weather +from skfda.misc.operators import LinearDifferentialOperator +from skfda.misc.regularization import TikhonovRegularization +from skfda.preprocessing.dim_reduction.projection import FPCA +from skfda.representation.basis import Fourier +import unittest + +import numpy as np + + +class FPCATestCase(unittest.TestCase): + + def test_basis_fpca_fit_attributes(self): + fpca = FPCA() + with self.assertRaises(AttributeError): + fpca.fit(None) + + basis = Fourier(n_basis=1) + # check that if n_components is bigger than the number of samples then + # an exception should be thrown + fd = FDataBasis(basis, [[0.9]]) + with self.assertRaises(AttributeError): + fpca.fit(fd) + + # check that n_components must be smaller than the number of elements + # of target basis + fd = FDataBasis(basis, [[0.9], [0.7], [0.5]]) + with self.assertRaises(AttributeError): + fpca.fit(fd) + + def test_discretized_fpca_fit_attributes(self): + fpca = FPCA() + with self.assertRaises(AttributeError): + fpca.fit(None) + + # check that if n_components is bigger than the number of samples then + # an exception should be thrown + fd = FDataGrid([[0.5], [0.1]], sample_points=[0]) + with self.assertRaises(AttributeError): + fpca.fit(fd) + + # check that n_components must be smaller than the number of attributes + # in the FDataGrid object + fd = FDataGrid([[0.9], [0.7], [0.5]], sample_points=[0]) + with self.assertRaises(AttributeError): + fpca.fit(fd) + + def test_basis_fpca_fit_result(self): + + n_basis = 9 + n_components = 3 + + fd_data = fetch_weather()['data'].coordinates[0] + fd_data = FDataGrid(np.squeeze(fd_data.data_matrix), + np.arange(0.5, 365, 1)) + + # initialize basis data + basis = Fourier(n_basis=n_basis, domain_range=(0, 365)) + fd_basis = fd_data.to_basis(basis) + + fpca = FPCA(n_components=n_components, + regularization=TikhonovRegularization( + LinearDifferentialOperator(2), + regularization_parameter=1e5)) + fpca.fit(fd_basis) + + # results obtained using Ramsay's R package + results = [[0.92407552, 0.13544888, 0.35399023, 0.00805966, + -0.02148108, + -0.01709549, -0.00208469, -0.00297439, -0.00308224], + [-0.33314436, -0.05116842, 0.89443418, 0.14673902, + 0.21559073, + 0.02046924, 0.02203431, -0.00787185, 0.00247492], + [-0.14241092, 0.92131899, 0.00514715, 0.23391411, + -0.19497613, + 0.09800817, 0.01754439, -0.00205874, 0.01438185]] + results = np.array(results) + + # compare results obtained using this library. There are slight + # variations due to the fact that we are in two different packages + for i in range(n_components): + if np.sign(fpca.components_.coefficients[i][0]) != np.sign( + results[i][0]): + results[i, :] *= -1 + np.testing.assert_allclose(fpca.components_.coefficients, results, + atol=1e-7) + + def test_basis_fpca_transform_result(self): + + n_basis = 9 + n_components = 3 + + fd_data = fetch_weather()['data'].coordinates[0] + fd_data = FDataGrid(np.squeeze(fd_data.data_matrix), + np.arange(0.5, 365, 1)) + + # initialize basis data + basis = Fourier(n_basis=n_basis, domain_range=(0, 365)) + fd_basis = fd_data.to_basis(basis) + + fpca = FPCA(n_components=n_components, + regularization=TikhonovRegularization( + LinearDifferentialOperator(2), + regularization_parameter=1e5)) + fpca.fit(fd_basis) + scores = fpca.transform(fd_basis) + + # results obtained using Ramsay's R package + results = [[-7.68307641e+01, 5.69034443e+01, -1.22440149e+01], + [-9.02873996e+01, 1.46262257e+01, -1.78574536e+01], + [-8.21155683e+01, 3.19159491e+01, -2.56212328e+01], + [-1.14163637e+02, 3.66425562e+01, -1.00810836e+01], + [-6.97263223e+01, 1.22817168e+01, -2.39417618e+01], + [-6.41886364e+01, -1.07261045e+01, -1.10587407e+01], + [1.35824412e+02, 2.03484658e+01, -9.04815324e+00], + [-1.46816399e+01, -2.66867491e+01, -1.20233465e+01], + [1.02507511e+00, -2.29840736e+01, -9.06081296e+00], + [-3.62936903e+01, -2.09520442e+01, -1.14799951e+01], + [-4.20649313e+01, -1.13618094e+01, -6.24909009e+00], + [-7.38115985e+01, -3.18423866e+01, -1.50298626e+01], + [-6.69822456e+01, -3.35518632e+01, -1.25167352e+01], + [-1.03534763e+02, -1.29513941e+01, -1.49103879e+01], + [-1.04542036e+02, -1.36794907e+01, -1.41555965e+01], + [-7.35863347e+00, -1.41171956e+01, -2.97562788e+00], + [7.28804530e+00, -5.34421830e+01, -3.39823418e+00], + [5.59974094e+01, -4.02154080e+01, 3.78800103e-01], + [1.80778702e+02, 1.87798201e+01, -1.99043247e+01], + [-3.69700617e+00, -4.19441020e+01, 6.45820740e+00], + [3.76527216e+01, -4.23056953e+01, 1.04221757e+01], + [1.23850646e+02, -4.24648130e+01, -2.22336786e-01], + [-7.23588457e+00, -1.20579536e+01, 2.07502089e+01], + [-4.96871011e+01, 8.88483448e+00, 2.02882768e+01], + [-1.36726355e+02, -1.86472599e+01, 1.89076217e+01], + [-1.83878661e+02, 4.12118550e+01, 1.78960356e+01], + [-1.81568820e+02, 5.20817910e+01, 2.01078870e+01], + [-5.08775852e+01, 1.34600555e+01, 3.18602712e+01], + [-1.37633866e+02, 7.50809631e+01, 2.42320782e+01], + [4.98276375e+01, 1.33401270e+00, 3.50611066e+01], + [1.51149934e+02, -5.47417776e+01, 3.97592325e+01], + [1.58366096e+02, -3.80762686e+01, -5.62415023e+00], + [2.17139548e+02, 6.34055987e+01, -1.98853635e+01], + [2.33615480e+02, -7.90787574e-02, 2.69069525e+00], + [3.45371437e+02, 9.58703622e+01, 8.47570770e+00]] + results = np.array(results) + + # compare results + np.testing.assert_allclose(scores, results, atol=1e-7) + + def test_basis_fpca_regularization_fit_result(self): + + n_basis = 9 + n_components = 3 + + fd_data = fetch_weather()['data'].coordinates[0] + fd_data = FDataGrid(np.squeeze(fd_data.data_matrix), + np.arange(0.5, 365, 1)) + + # initialize basis data + basis = Fourier(n_basis=n_basis, domain_range=(0, 365)) + fd_basis = fd_data.to_basis(basis) + + fpca = FPCA(n_components=n_components) + fpca.fit(fd_basis) + + # results obtained using Ramsay's R package + results = [[0.9231551, 0.1364966, 0.3569451, 0.0092012, -0.0244525, + -0.02923873, -0.003566887, -0.009654571, -0.0100063], + [-0.3315211, -0.0508643, 0.89218521, 0.1669182, 0.2453900, + 0.03548997, 0.037938051, -0.025777507, 0.008416904], + [-0.1379108, 0.9125089, 0.00142045, 0.2657423, -0.2146497, + 0.16833314, 0.031509179, -0.006768189, 0.047306718]] + results = np.array(results) + + # compare results obtained using this library. There are slight + # variations due to the fact that we are in two different packages + for i in range(n_components): + if np.sign(fpca.components_.coefficients[i][0]) != np.sign( + results[i][0]): + results[i, :] *= -1 + np.testing.assert_allclose(fpca.components_.coefficients, results, + atol=1e-7) + + def test_grid_fpca_fit_result(self): + + n_components = 1 + + fd_data = fetch_weather()['data'].coordinates[0] + + fpca = FPCA(n_components=n_components, weights=[1] * 365) + fpca.fit(fd_data) + + # results obtained using fda.usc for the first component + results = [ + [-0.06958281, -0.07015412, -0.07095115, -0.07185632, -0.07128256, + -0.07124209, -0.07364828, -0.07297663, -0.07235438, -0.07307498, + -0.07293423, -0.07449293, -0.07647909, -0.07796823, -0.07582476, + -0.07263243, -0.07241871, -0.0718136, -0.07015477, -0.07132331, + -0.0711527, -0.07435933, -0.07602666, -0.0769783, -0.07707199, + -0.07503802, -0.0770302, -0.07705581, -0.07633515, -0.07624817, + -0.07631568, -0.07619913, -0.07568, -0.07595155, -0.07506939, + -0.07181941, -0.06907624, -0.06735476, -0.06853985, -0.06902363, + -0.07098882, -0.07479412, -0.07425241, -0.07555835, -0.0765903, + -0.07651853, -0.07682536, -0.07458996, -0.07631711, -0.07726509, + -0.07641246, -0.0744066, -0.07501397, -0.07302722, -0.07045571, + -0.06912529, -0.06792186, -0.06830739, -0.06898433, -0.07000192, + -0.07014513, -0.06994886, -0.07115909, -0.073999, -0.07292669, + -0.07139879, -0.07226865, -0.07187915, -0.07122995, -0.06975022, + -0.06800613, -0.06900793, -0.07186378, -0.07114479, -0.07015252, + -0.06944782, -0.068291, -0.06905348, -0.06925773, -0.06834624, + -0.06837319, -0.06824067, -0.06644614, -0.06637313, -0.06626312, + -0.06470209, -0.0645058, -0.06477729, -0.06411049, -0.06158499, + -0.06305197, -0.06398006, -0.06277579, -0.06282124, -0.06317684, + -0.0614125, -0.05961922, -0.05875443, -0.05845781, -0.05828608, + -0.05666474, -0.05495706, -0.05446301, -0.05468254, -0.05478609, + -0.05440798, -0.05312339, -0.05102368, -0.05160285, -0.05077954, + -0.04979648, -0.04890853, -0.04745462, -0.04496763, -0.0448713, + -0.04599596, -0.04688998, -0.04488872, -0.04404507, -0.04420729, + -0.04368153, -0.04254381, -0.0411764, -0.04022811, -0.03999746, + -0.03963634, -0.03832502, -0.0383956, -0.04015374, -0.0387544, + -0.03777315, -0.03830728, -0.03768616, -0.03714081, -0.03781918, + -0.03739374, -0.03659894, -0.03563342, -0.03658407, -0.03686991, + -0.03543746, -0.03518799, -0.03361226, -0.0321534, -0.03050438, + -0.02958411, -0.02855023, -0.02913402, -0.02992464, -0.02899548, + -0.02891629, -0.02809554, -0.02702642, -0.02672194, -0.02678648, + -0.02698471, -0.02628085, -0.02674285, -0.02658515, -0.02604447, + -0.0245711, -0.02413174, -0.02342496, -0.022898, -0.02216152, + -0.02272283, -0.02199741, -0.02305362, -0.02371371, -0.02320865, + -0.02234777, -0.0225018, -0.02104359, -0.02203346, -0.02052545, + -0.01987457, -0.01947911, -0.01986949, -0.02012196, -0.01958515, + -0.01906753, -0.01857869, -0.01874101, -0.01827973, -0.017752, + -0.01702056, -0.01759611, -0.01888485, -0.01988159, -0.01951675, + -0.01872967, -0.01866667, -0.0183576, -0.01909758, -0.018599, + -0.01910036, -0.01930315, -0.01958856, -0.02129936, -0.0216614, + -0.0204397, -0.02002368, -0.02058828, -0.02149915, -0.02167326, + -0.02238569, -0.02211907, -0.02168336, -0.02124387, -0.02131655, + -0.02130508, -0.02181227, -0.02230632, -0.02223732, -0.0228216, + -0.02355137, -0.02275145, -0.02286893, -0.02437776, -0.02523897, + -0.0248354, -0.02319174, -0.02335831, -0.02405789, -0.02483273, + -0.02428119, -0.02395295, -0.02437185, -0.02476434, -0.02347973, + -0.02385957, -0.02451257, -0.02414586, -0.02439035, -0.02357782, + -0.02417295, -0.02504764, -0.02682569, -0.02807111, -0.02886335, + -0.02943406, -0.02956806, -0.02893096, -0.02903812, -0.02999862, + -0.029421, -0.03016203, -0.03118823, -0.03076205, -0.03005985, + -0.03079187, -0.03215188, -0.03271075, -0.03146124, -0.03040965, + -0.03008436, -0.03085897, -0.03015341, -0.03014661, -0.03110255, + -0.03271278, -0.03217399, -0.0331721, -0.03459221, -0.03572073, + -0.03560707, -0.03531492, -0.03687657, -0.03800143, -0.0373808, + -0.03729927, -0.03748666, -0.03754171, -0.03790408, -0.03963726, + -0.03992153, -0.03812243, -0.0373844, -0.0385394, -0.03849716, + -0.03826345, -0.03743958, -0.0380861, -0.03857622, -0.04099357, + -0.04102509, -0.04170207, -0.04283573, -0.04320618, -0.04269438, + -0.04467527, -0.04470603, -0.04496092, -0.04796417, -0.04796633, + -0.047863, -0.04883668, -0.0505939, -0.05112441, -0.04960962, + -0.05000041, -0.04962112, -0.05087008, -0.0521671, -0.05369792, + -0.05478139, -0.05559221, -0.05669698, -0.05654505, -0.05731113, + -0.05783543, -0.05766056, -0.05754354, -0.05724272, -0.05831026, + -0.05847512, -0.05804533, -0.05875046, -0.06021703, -0.06147975, + -0.06213918, -0.0645805, -0.06500849, -0.06361716, -0.06315227, + -0.06306436, -0.06425743, -0.06626847, -0.06615213, -0.06881004, + -0.06942296, -0.06889225, -0.06868663, -0.0678667, -0.06720133, + -0.06771172, -0.06885042, -0.06896979, -0.06961627, -0.07211988, + -0.07252956, -0.07265559, -0.07264195, -0.07306334, -0.07282035, + -0.07196505, -0.07210595, -0.07203942, -0.07105821, -0.06920599, + -0.06892264, -0.06699939, -0.06537829, -0.06543323, -0.06913186, + -0.07210039, -0.07219987, -0.07124228, -0.07065497, -0.06996833, + -0.0674457, -0.06800847, -0.06784175, -0.06592871, -0.06723401]] + + results = np.array(results) + + # compare results obtained using this library. There are slight + # variations due to the fact that we are in two different packages + for i in range(n_components): + if np.sign(fpca.components_.data_matrix[i][0]) != np.sign( + results[i][0]): + results[i, :] *= -1 + np.testing.assert_allclose( + fpca.components_.data_matrix.reshape( + fpca.components_.data_matrix.shape[:-1]), + results, + rtol=1e-6) + + def test_grid_fpca_transform_result(self): + + n_components = 1 + + fd_data = fetch_weather()['data'].coordinates[0] + + fpca = FPCA(n_components=n_components, weights=[1] * 365) + fpca.fit(fd_data) + scores = fpca.transform(fd_data) + + # results obtained + results = [[-77.05020176], [-90.56072204], [-82.39565947], + [-114.45375934], [-69.99735931], [-64.44894047], + [135.58336775], [-14.93460852], [0.75024737], + [-36.4781038], [-42.35637749], [-73.98910492], + [-67.11253749], [-103.68269798], [-104.65948079], + [-7.42817782], [7.48125036], [56.29792942], + [181.00258791], [-3.53294736], [37.94673912], + [124.43819913], [-7.04274676], [-49.61134859], + [-136.86256785], [-184.03502398], [-181.72835749], + [-51.06323208], [-137.85606731], [50.10941466], + [151.68118097], [159.01360046], [217.17981302], + [234.40195237], [345.39374006]] + results = np.array(results) + + np.testing.assert_allclose(scores, results, rtol=1e-6) + + def test_grid_fpca_regularization_fit_result(self): + + n_components = 1 + + fd_data = fetch_weather()['data'].coordinates[0] + + fd_data = FDataGrid(np.squeeze(fd_data.data_matrix), + np.arange(0.5, 365, 1)) + + fpca = FPCA( + n_components=n_components, weights=[1] * 365, + regularization=TikhonovRegularization( + LinearDifferentialOperator(2))) + fpca.fit(fd_data) + + # results obtained using fda.usc for the first component + results = [ + [-0.06961236, -0.07027042, -0.07090496, -0.07138247, -0.07162215, + -0.07202264, -0.07264893, -0.07279174, -0.07274672, -0.07300075, + -0.07365471, -0.07489002, -0.07617455, -0.07658708, -0.07551923, + -0.07375128, -0.0723776, -0.07138373, -0.07080555, -0.07111745, + -0.0721514, -0.07395427, -0.07558341, -0.07650959, -0.0766541, + -0.07641352, -0.07660864, -0.07669081, -0.0765396, -0.07640671, + -0.07634668, -0.07626304, -0.07603638, -0.07549114, -0.07410347, + -0.07181791, -0.06955356, -0.06824034, -0.06834077, -0.06944125, + -0.07133598, -0.07341109, -0.07471501, -0.07568844, -0.07631904, + -0.07647264, -0.07629453, -0.07598431, -0.07628157, -0.07654062, + -0.07616026, -0.07527189, -0.07426683, -0.07267961, -0.07079998, + -0.06927394, -0.068412, -0.06838534, -0.06888439, -0.0695309, + -0.07005508, -0.07066637, -0.07167196, -0.07266978, -0.07275299, + -0.07235183, -0.07207819, -0.07159814, -0.07077697, -0.06977026, + -0.0691952, -0.06965756, -0.07058327, -0.07075751, -0.07025415, + -0.06954233, -0.06899785, -0.06891026, -0.06887079, -0.06862183, + -0.06830082, -0.06777765, -0.06700202, -0.06639394, -0.06582435, + -0.06514987, -0.06467236, -0.06425272, -0.06359187, -0.062922, + -0.06300068, -0.06325494, -0.06316979, -0.06296254, -0.06246343, + -0.06136836, -0.0600936, -0.05910688, -0.05840872, -0.0576547, + -0.05655684, -0.05546518, -0.05484433, -0.05465746, -0.05449286, + -0.05397004, -0.05300742, -0.05196686, -0.05133129, -0.05064617, + -0.04973418, -0.04855687, -0.04714356, -0.04588103, -0.04547284, + -0.04571493, -0.04580704, -0.04523509, -0.04457293, -0.04405309, + -0.04338468, -0.04243512, -0.04137278, -0.04047946, -0.03984531, + -0.03931376, -0.0388847, -0.03888507, -0.03908662, -0.03877577, + -0.03830952, -0.03802713, -0.03773521, -0.03752388, -0.03743759, + -0.03714113, -0.03668387, -0.0363703, -0.03642288, -0.03633051, + -0.03574618, -0.03486536, -0.03357797, -0.03209969, -0.0306837, + -0.02963987, -0.029102, -0.0291513, -0.02932013, -0.02912619, + -0.02869407, -0.02801974, -0.02732363, -0.02690451, -0.02676622, + -0.0267323, -0.02664896, -0.02661708, -0.02637166, -0.02577496, + -0.02490428, -0.02410813, -0.02340367, -0.02283356, -0.02246305, + -0.0224229, -0.0225435, -0.02295603, -0.02324663, -0.02310005, + -0.02266893, -0.02221522, -0.02168056, -0.02129419, -0.02064909, + -0.02007801, -0.01979083, -0.01979541, -0.01978879, -0.01954269, + -0.0191623, -0.01879572, -0.01849678, -0.01810297, -0.01769666, + -0.01753802, -0.01794351, -0.01871307, -0.01930005, -0.01933, + -0.01901017, -0.01873486, -0.01861838, -0.01870777, -0.01879, + -0.01904219, -0.01945078, -0.0200607, -0.02076936, -0.02100213, + -0.02071439, -0.02052113, -0.02076313, -0.02128468, -0.02175631, + -0.02206387, -0.02201054, -0.02172142, -0.02143092, -0.02133647, + -0.02144956, -0.02176286, -0.02212579, -0.02243861, -0.02278316, + -0.02304113, -0.02313356, -0.02349275, -0.02417028, -0.0245954, + -0.0244062, -0.02388557, -0.02374682, -0.02401071, -0.02431126, + -0.02433125, -0.02427656, -0.02430442, -0.02424977, -0.02401619, + -0.02402294, -0.02415424, -0.02413262, -0.02404076, -0.02397651, + -0.0243893, -0.0253322, -0.02664395, -0.0278802, -0.02877936, + -0.02927182, -0.02937318, -0.02926277, -0.02931632, -0.02957945, + -0.02982133, -0.03023224, -0.03060406, -0.03066011, -0.03070932, + -0.03116429, -0.03179009, -0.03198094, -0.03149462, -0.03082037, + -0.03041594, -0.0303307, -0.03028465, -0.03052841, -0.0311837, + -0.03199307, -0.03262025, -0.03345083, -0.03442665, -0.03521313, + -0.0356433, -0.03606037, -0.03677406, -0.03735165, -0.03746578, + -0.03744154, -0.03752143, -0.03780898, -0.03837639, -0.03903232, + -0.03911629, -0.03857567, -0.03816592, -0.03819285, -0.03818405, + -0.03801684, -0.03788493, -0.03823232, -0.03906142, -0.04023251, + -0.04112434, -0.04188011, -0.04254759, -0.043, -0.04340181, + -0.04412687, -0.04484482, -0.04577669, -0.04700832, -0.04781373, + -0.04842662, -0.04923723, -0.05007637, -0.05037817, -0.05009794, + -0.04994083, -0.05012712, -0.05094001, -0.05216065, -0.05350458, + -0.05469781, -0.05566309, -0.05641011, -0.05688106, -0.05730818, + -0.05759156, -0.05763771, -0.05760073, -0.05766117, -0.05794587, + -0.05816696, -0.0584046, -0.05905105, -0.06014331, -0.06142231, + -0.06270788, -0.06388225, -0.06426245, -0.06386721, -0.0634656, + -0.06358049, -0.06442514, -0.06570047, -0.06694328, -0.0682621, + -0.06897846, -0.06896583, -0.06854621, -0.06797142, -0.06763755, + -0.06784024, -0.06844314, -0.06918567, -0.07021928, -0.07148473, + -0.07232504, -0.07272276, -0.07287021, -0.07289836, -0.07271531, + -0.07239956, -0.07214086, -0.07170078, -0.07081195, -0.06955202, + -0.06825156, -0.06690167, -0.06617102, -0.06683291, -0.06887539, + -0.07089424, -0.07174837, -0.07150888, -0.07070378, -0.06960066, + -0.06842496, -0.06777666, -0.06728403, -0.06681262, -0.06679066]] + + results = np.array(results) + + # compare results obtained using this library. There are slight + # variations due to the fact that we are in two different packages + for i in range(n_components): + if np.sign(fpca.components_.data_matrix[i][0]) != np.sign( + results[i][0]): + results[i, :] *= -1 + np.testing.assert_allclose( + fpca.components_.data_matrix.reshape( + fpca.components_.data_matrix.shape[:-1]), + results, + rtol=1e-2) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_grid.py b/tests/test_grid.py index daaeb054e..e39db303a 100644 --- a/tests/test_grid.py +++ b/tests/test_grid.py @@ -1,10 +1,11 @@ +from skfda import FDataGrid, concatenate +from skfda.exploratory import stats import unittest +from mpl_toolkits.mplot3d import axes3d import scipy.stats.mstats import numpy as np -from skfda import FDataGrid -from skfda.exploratory import stats class TestFDataGrid(unittest.TestCase): @@ -48,21 +49,19 @@ def test_gmean(self): np.array([[0., 0.25, 0.5, 0.75, 1.]])) def test_slice(self): - t = 10 + t = (5, 3) fd = FDataGrid(data_matrix=np.ones(t)) - fd = fd[:, 0] + fd = fd[1:3] np.testing.assert_array_equal( fd.data_matrix[..., 0], - np.array([[1]])) - np.testing.assert_array_equal( - fd.sample_points, - np.array([[0]])) + np.array([[1, 1, 1], [1, 1, 1]])) def test_concatenate(self): fd1 = FDataGrid([[1, 2, 3, 4, 5], [2, 3, 4, 5, 6]]) fd2 = FDataGrid([[3, 4, 5, 6, 7], [4, 5, 6, 7, 8]]) - fd1.axes_labels = ["x", "y"] + fd1.argument_names = ["x"] + fd1.coordinate_names = ["y"] fd = fd1.concatenate(fd2) np.testing.assert_equal(fd.n_samples, 4) @@ -71,14 +70,18 @@ def test_concatenate(self): np.testing.assert_array_equal(fd.data_matrix[..., 0], [[1, 2, 3, 4, 5], [2, 3, 4, 5, 6], [3, 4, 5, 6, 7], [4, 5, 6, 7, 8]]) - np.testing.assert_array_equal(fd1.axes_labels, fd.axes_labels) + np.testing.assert_array_equal(fd1.argument_names, fd.argument_names) + np.testing.assert_array_equal( + fd1.coordinate_names, fd.coordinate_names) def test_concatenate_coordinates(self): fd1 = FDataGrid([[1, 2, 3, 4], [2, 3, 4, 5]]) fd2 = FDataGrid([[3, 4, 5, 6], [4, 5, 6, 7]]) - fd1.axes_labels = ["x", "y"] - fd2.axes_labels = ["w", "t"] + fd1.argument_names = ["x"] + fd1.coordinate_names = ["y"] + fd2.argument_names = ["w"] + fd2.coordinate_names = ["t"] fd = fd1.concatenate(fd2, as_coordinates=True) np.testing.assert_equal(fd.n_samples, 2) @@ -90,18 +93,37 @@ def test_concatenate_coordinates(self): [[2, 4], [3, 5], [4, 6], [5, 7]]]) # Testing labels - np.testing.assert_array_equal(["x", "y", "t"], fd.axes_labels) - fd1.axes_labels = ["x", "y"] - fd2.axes_labels = None + np.testing.assert_array_equal(("y", "t"), fd.coordinate_names) + fd2.coordinate_names = None fd = fd1.concatenate(fd2, as_coordinates=True) - np.testing.assert_array_equal(["x", "y", None], fd.axes_labels) - fd1.axes_labels = None + np.testing.assert_array_equal(("y", None), fd.coordinate_names) + fd1.coordinate_names = None fd = fd1.concatenate(fd2, as_coordinates=True) - np.testing.assert_equal(None, fd.axes_labels) + np.testing.assert_equal((None, None), fd.coordinate_names) + + def test_concatenate2(self): + sample1 = np.arange(0, 10) + sample2 = np.arange(10, 20) + fd1 = FDataGrid([sample1]) + fd2 = FDataGrid([sample2]) + + fd1.argument_names = ["x"] + fd1.coordinate_names = ["y"] + fd = concatenate([fd1, fd2]) + + np.testing.assert_equal(fd.n_samples, 2) + np.testing.assert_equal(fd.dim_codomain, 1) + np.testing.assert_equal(fd.dim_domain, 1) + np.testing.assert_array_equal(fd.data_matrix[..., 0], [sample1, + sample2]) + np.testing.assert_array_equal(fd1.argument_names, fd.argument_names) + np.testing.assert_array_equal( + fd1.coordinate_names, fd.coordinate_names) def test_coordinates(self): fd1 = FDataGrid([[1, 2, 3, 4], [2, 3, 4, 5]]) - fd1.axes_labels = ["x", "y"] + fd1.argument_names = ["x"] + fd1.coordinate_names = ["y"] fd2 = FDataGrid([[3, 4, 5, 6], [4, 5, 6, 7]]) fd = fd1.concatenate(fd2, as_coordinates=True) @@ -124,9 +146,145 @@ def test_coordinates(self): np.testing.assert_array_equal(fd3.coordinates[-2:].data_matrix, fd.data_matrix) np.testing.assert_array_equal( - fd3.coordinates[(False, False, True, False, True)].data_matrix, + fd3.coordinates[np.array( + (False, False, True, False, True))].data_matrix, fd.data_matrix) + def test_add(self): + fd1 = FDataGrid([[1, 2, 3, 4], [2, 3, 4, 5]]) + + fd2 = fd1 + fd1 + np.testing.assert_array_equal(fd2.data_matrix[..., 0], + [[2, 4, 6, 8], [4, 6, 8, 10]]) + + fd2 = fd1 + 2 + np.testing.assert_array_equal(fd2.data_matrix[..., 0], + [[3, 4, 5, 6], [4, 5, 6, 7]]) + + fd2 = fd1 + np.array(2) + np.testing.assert_array_equal(fd2.data_matrix[..., 0], + [[3, 4, 5, 6], [4, 5, 6, 7]]) + + fd2 = fd1 + np.array([2]) + np.testing.assert_array_equal(fd2.data_matrix[..., 0], + [[3, 4, 5, 6], [4, 5, 6, 7]]) + + fd2 = fd1 + np.array([1, 2, 3, 4]) + np.testing.assert_array_equal(fd2.data_matrix[..., 0], + [[2, 4, 6, 8], [3, 5, 7, 9]]) + + fd2 = fd1 + fd1.data_matrix + np.testing.assert_array_equal(fd2.data_matrix[..., 0], + [[2, 4, 6, 8], [4, 6, 8, 10]]) + + fd2 = fd1 + fd1.data_matrix[..., 0] + np.testing.assert_array_equal(fd2.data_matrix[..., 0], + [[2, 4, 6, 8], [4, 6, 8, 10]]) + + def test_composition(self): + X, Y, Z = axes3d.get_test_data(1.2) + + data_matrix = [Z.T] + sample_points = [X[0, :], Y[:, 0]] + + g = FDataGrid(data_matrix, sample_points) + self.assertEqual(g.dim_domain, 2) + self.assertEqual(g.dim_codomain, 1) + + t = np.linspace(0, 2 * np.pi, 100) + + data_matrix = [10 * np.array([np.cos(t), np.sin(t)]).T] + f = FDataGrid(data_matrix, t) + self.assertEqual(f.dim_domain, 1) + self.assertEqual(f.dim_codomain, 2) + + gof = g.compose(f) + self.assertEqual(gof.dim_domain, 1) + self.assertEqual(gof.dim_codomain, 1) + + +class TestEvaluateFDataGrid(unittest.TestCase): + + def setUp(self): + data_matrix = np.array( + [ + [ + [[0, 1, 2], [0, 1, 2]], + [[0, 1, 2], [0, 1, 2]] + ], + [ + [[3, 4, 5], [3, 4, 5]], + [[3, 4, 5], [3, 4, 5]] + ] + ]) + + sample_points = [[0, 1], [0, 1]] + + fd = FDataGrid(data_matrix, sample_points=sample_points) + self.assertEqual(fd.n_samples, 2) + self.assertEqual(fd.dim_domain, 2) + self.assertEqual(fd.dim_codomain, 3) + + self.fd = fd + + def test_evaluate_aligned(self): + + res = self.fd([(0, 0), (1, 1), (2, 2), (3, 3)]) + expected = np.array([[[0, 1, 2], [0, 1, 2], [0, 1, 2], [0, 1, 2]], + [[3, 4, 5], [3, 4, 5], [3, 4, 5], [3, 4, 5]]]) + + np.testing.assert_allclose(res, expected) + + def test_evaluate_unaligned(self): + + res = self.fd([[(0, 0), (1, 1), (2, 2), (3, 3)], + [(1, 7), (5, 2), (3, 4), (6, 1)]], + aligned=False) + expected = np.array([[[0, 1, 2], [0, 1, 2], [0, 1, 2], [0, 1, 2]], + [[3, 4, 5], [3, 4, 5], [3, 4, 5], [3, 4, 5]]]) + + np.testing.assert_allclose(res, expected) + + def test_evaluate_unaligned_ragged(self): + + res = self.fd([[(0, 0), (1, 1), (2, 2), (3, 3)], + [(1, 7), (5, 2), (3, 4)]], + aligned=False) + expected = ([[[0, 1, 2], [0, 1, 2], [0, 1, 2], [0, 1, 2]], + [[3, 4, 5], [3, 4, 5], [3, 4, 5]]]) + + self.assertEqual(len(res), self.fd.n_samples) + + for r, e in zip(res, expected): + np.testing.assert_allclose(r, e) + + def test_evaluate_grid_aligned(self): + + res = self.fd([[0, 1], [1, 2]], grid=True) + expected = np.array([[[[0, 1, 2], [0, 1, 2]], [[0, 1, 2], [0, 1, 2]]], + [[[3, 4, 5], [3, 4, 5]], [[3, 4, 5], [3, 4, 5]]]]) + + np.testing.assert_allclose(res, expected) + + def test_evaluate_grid_unaligned(self): + + res = self.fd([[[0, 1], [1, 2]], [[3, 4], [5, 6]]], + grid=True, aligned=False) + expected = np.array([[[[0, 1, 2], [0, 1, 2]], [[0, 1, 2], [0, 1, 2]]], + [[[3, 4, 5], [3, 4, 5]], [[3, 4, 5], [3, 4, 5]]]]) + + np.testing.assert_allclose(res, expected) + + def test_evaluate_grid_unaligned_ragged(self): + + res = self.fd([[[0, 1], [1, 2]], [[3, 4], [5]]], + grid=True, aligned=False) + expected = ([[[[0, 1, 2], [0, 1, 2]], [[0, 1, 2], [0, 1, 2]]], + [[[3, 4, 5]], [[3, 4, 5]]]]) + + for r, e in zip(res, expected): + np.testing.assert_allclose(r, e) + if __name__ == '__main__': print() diff --git a/tests/test_hotelling.py b/tests/test_hotelling.py new file mode 100644 index 000000000..fdea10d27 --- /dev/null +++ b/tests/test_hotelling.py @@ -0,0 +1,62 @@ +from skfda.inference.hotelling import hotelling_t2, hotelling_test_ind +from skfda.representation import FDataGrid +from skfda.representation.basis import Fourier +import unittest + +import pytest + + +class HotellingTests(unittest.TestCase): + + def test_hotelling_test_ind_args(self): + fd1 = FDataGrid([[1, 1, 1]]) + with self.assertRaises(TypeError): + hotelling_test_ind(fd1, []) + with self.assertRaises(TypeError): + hotelling_test_ind([], fd1) + with self.assertRaises(TypeError): + hotelling_test_ind(fd1.to_basis(Fourier(n_basis=3)), fd1) + with self.assertRaises(TypeError): + hotelling_test_ind(fd1, fd1.to_basis(Fourier(n_basis=3))) + with self.assertRaises(ValueError): + hotelling_test_ind(fd1, fd1, n_reps=0) + + def test_hotelling_t2_args(self): + fd1 = FDataGrid([[1, 1, 1]]) + with self.assertRaises(TypeError): + hotelling_t2(fd1, []) + with self.assertRaises(TypeError): + hotelling_t2([], fd1) + with self.assertRaises(TypeError): + hotelling_t2(fd1.to_basis(Fourier(n_basis=3)), fd1) + with self.assertRaises(TypeError): + hotelling_t2(fd1, fd1.to_basis(Fourier(n_basis=3))) + + def test_hotelling_t2(self): + fd1 = FDataGrid([[1, 1, 1], [1, 1, 1]]) + fd2 = FDataGrid([[1, 1, 1], [2, 2, 2]]) + self.assertAlmostEqual(hotelling_t2(fd1, fd1), 0) + self.assertAlmostEqual(hotelling_t2(fd1, fd2), 1) + + fd1 = fd1.to_basis(Fourier(n_basis=3)) + fd2 = fd2.to_basis(Fourier(n_basis=3)) + self.assertAlmostEqual(hotelling_t2(fd1, fd1), 0) + self.assertAlmostEqual(hotelling_t2(fd1, fd2), 1) + + def test_hotelling_test(self): + fd1 = FDataGrid([[1, 1, 1], [1, 1, 1]]) + fd2 = FDataGrid([[3, 3, 3], [2, 2, 2]]) + t2, pval, dist = hotelling_test_ind(fd1, fd2, return_dist=True, + random_state=0) + self.assertAlmostEqual(t2, 9) + self.assertAlmostEqual(pval, 0) + self.assertEqual(len(dist), 6) + reps = 5 + t2, pval, dist = hotelling_test_ind(fd1, fd2, return_dist=True, + n_reps=reps, random_state=1) + self.assertEqual(len(dist), reps) + + +if __name__ == '__main__': + print() + unittest.main() diff --git a/tests/test_interpolation.py b/tests/test_interpolation.py index 369f21242..170a6204f 100644 --- a/tests/test_interpolation.py +++ b/tests/test_interpolation.py @@ -1,13 +1,14 @@ -import unittest from skfda import FDataGrid -from skfda.representation.interpolation import SplineInterpolator +from skfda.representation.interpolation import SplineInterpolation +import unittest + import numpy as np -# TODO: Unitest for grids with domain dimension > 1 +# TODO: Unitest for grids with domain dimension > 1 class TestEvaluationSpline1_1(unittest.TestCase): - """Test the evaluation of a grid spline interpolator with + """Test the evaluation of a grid spline interpolation with domain and image dimension equal to 1. """ @@ -15,11 +16,10 @@ def setUp(self): # Data matrix of a datagrid with a dimension of domain and image equal # to 1. - # Matrix of functions (x**2, (9-x)**2) + # Matrix of functions (x**2, (9-x)**2) self.data_matrix_1_1 = [np.arange(10)**2, np.arange(start=9, stop=-1, step=-1)**2] - def test_evaluation_linear_simple(self): """Test basic usage of evaluation""" @@ -27,12 +27,13 @@ def test_evaluation_linear_simple(self): # Test interpolation in nodes np.testing.assert_array_almost_equal( - f(np.arange(10)), self.data_matrix_1_1) + f(np.arange(10))[..., 0], self.data_matrix_1_1) # Test evaluation in a list of times - np.testing.assert_array_almost_equal(f([0.5,1.5,2.5]), - np.array([[ 0.5, 2.5, 6.5], - [72.5, 56.5, 42.5]])) + np.testing.assert_array_almost_equal( + f([0.5, 1.5, 2.5]), + np.array([[[0.5], [2.5], [6.5]], + [[72.5], [56.5], [42.5]]])) def test_evaluation_linear_point(self): """Test the evaluation of a single point""" @@ -41,20 +42,11 @@ def test_evaluation_linear_point(self): # Test a single point np.testing.assert_array_almost_equal(f(5.3).round(1), - np.array([[28.3], [13.9]])) - np.testing.assert_array_almost_equal(f([3]), np.array([[9.], [36.]])) - np.testing.assert_array_almost_equal(f((2,)), np.array([[4.], [49.]])) - - - def test_evaluation_linear_derivative(self): - """Test derivative""" - f = FDataGrid(self.data_matrix_1_1, sample_points=np.arange(10)) - - # Derivate = [2*x, 2*(9-x)] + np.array([[[28.3]], [[13.9]]])) + np.testing.assert_array_almost_equal( + f([3]), np.array([[[9.]], [[36.]]])) np.testing.assert_array_almost_equal( - f([0.5,1.5,2.5], derivative=1).round(3), - np.array([[ 1., 3., 5.], - [-17., -15., -13.]])) + f((2,)), np.array([[[4.]], [[49.]]])) def test_evaluation_linear_grid(self): """Test grid evaluation. With domain dimension = 1""" @@ -62,247 +54,151 @@ def test_evaluation_linear_grid(self): f = FDataGrid(self.data_matrix_1_1, sample_points=np.arange(10)) # Test interpolation in nodes - np.testing.assert_array_almost_equal(f(np.arange(10)), + np.testing.assert_array_almost_equal(f(np.arange(10))[..., 0], self.data_matrix_1_1) - res = np.array([[ 0.5, 2.5, 6.5], [72.5, 56.5, 42.5]]) - t = [0.5,1.5,2.5] + res = np.array([[[0.5], [2.5], [6.5]], [[72.5], [56.5], [42.5]]]) + t = [0.5, 1.5, 2.5] # Test evaluation in a list of times np.testing.assert_array_almost_equal(f(t, grid=True), res) np.testing.assert_array_almost_equal(f((t,), grid=True), res) np.testing.assert_array_almost_equal(f([t], grid=True), res) - # Single point with grid + # Single point with grid np.testing.assert_array_almost_equal(f(3, grid=True), - np.array([[9.], [36.]])) + np.array([[[9.]], [[36.]]])) # Check erroneous axis - with np.testing.assert_raises(ValueError): f((t,t), grid=True) + with np.testing.assert_raises(ValueError): + f((t, t), grid=True) def test_evaluation_linear_composed(self): f = FDataGrid(self.data_matrix_1_1, sample_points=np.arange(10)) # Evaluate (x**2, (9-x)**2) in (1,8) - np.testing.assert_array_almost_equal(f([[1],[8]], - aligned_evaluation=False), - np.array([[1.], [1.]])) + np.testing.assert_array_almost_equal(f([[1], [8]], + aligned=False), + np.array([[[1.]], [[1.]]])) - t = np.linspace(4,6,4) + t = np.linspace(4, 6, 4) np.testing.assert_array_almost_equal( - f([t,9-t], aligned_evaluation=False).round(2), - np.array([[16. , 22. , 28.67, 36. ], - [16. , 22. , 28.67, 36. ]])) + f([t, 9 - t], aligned=False).round(2), + np.array([[[16.], [22.], [28.67], [36.]], + [[16.], [22.], [28.67], [36.]]])) # Same length than nsample - t = np.linspace(4,6,2) + t = np.linspace(4, 6, 2) np.testing.assert_array_almost_equal( - f([t,9-t], aligned_evaluation=False).round(2), - np.array([[16. , 36.], [16. , 36.]])) - - def test_evaluation_linear_keepdims(self): - """Test parameter keepdims""" - - # Default keepdims = False - f = FDataGrid(self.data_matrix_1_1, sample_points=np.arange(10), - keepdims=False) - - # Default keepdims = True - fk = FDataGrid(self.data_matrix_1_1, sample_points=np.arange(10), - keepdims=True) - - t = [0.5,1.5,2.5] - res = np.array([[ 0.5, 2.5, 6.5], [72.5, 56.5, 42.5]]) - res_keepdims = res.reshape((2,3,1)) - - - # Test combinations of keepdims with list - np.testing.assert_array_almost_equal(f(t), res) - np.testing.assert_array_almost_equal(f(t, keepdims=False), res) - np.testing.assert_array_almost_equal(f(t, keepdims=True), res_keepdims) - - np.testing.assert_array_almost_equal(fk(t), res_keepdims) - np.testing.assert_array_almost_equal(fk(t, keepdims=False), res) - np.testing.assert_array_almost_equal(fk(t, keepdims=True), res_keepdims) - - t2 = 4 - res2 = np.array([[16.], [25.]]) - res2_keepdims = res2.reshape(2,1,1) - - # Test combinations of keepdims with a single point - np.testing.assert_array_almost_equal(f(t2), res2) - np.testing.assert_array_almost_equal(f(t2, keepdims=False), res2) - np.testing.assert_array_almost_equal(f(t2, keepdims=True), res2_keepdims) - - np.testing.assert_array_almost_equal(fk(t2), res2_keepdims) - np.testing.assert_array_almost_equal(fk(t2, keepdims=False), res2) - np.testing.assert_array_almost_equal(fk(t2, keepdims=True), res2_keepdims) - - def test_evaluation_composed_linear_keepdims(self): - """Test parameter keepdims with composed evaluation""" - - # Default keepdims = False - f = FDataGrid(self.data_matrix_1_1, sample_points=np.arange(10), - keepdims=False) - - # Default keepdims = True - fk = FDataGrid(self.data_matrix_1_1, sample_points=np.arange(10), - keepdims=True) - - t = np.array([1, 2, 3]) - t = [t, 9 - t] - res = np.array([[ 1., 4., 9.], [ 1., 4., 9.]]) - res_keepdims = res.reshape((2,3,1)) - - # Test combinations of keepdims with list - np.testing.assert_array_almost_equal(f(t, aligned_evaluation=False), res) - np.testing.assert_array_almost_equal(f(t, aligned_evaluation=False, - keepdims=False), res) - np.testing.assert_array_almost_equal(f(t, aligned_evaluation=False, - keepdims=True), res_keepdims) - - np.testing.assert_array_almost_equal(fk(t, aligned_evaluation=False), - res_keepdims) - np.testing.assert_array_almost_equal(fk(t, aligned_evaluation=False, - keepdims=False), res) - np.testing.assert_array_almost_equal(fk(t, aligned_evaluation=False, - keepdims=True), res_keepdims) - - def test_evaluation_grid_linear_keepdims(self): - """Test grid evaluation with keepdims""" - - # Default keepdims = False - f = FDataGrid(self.data_matrix_1_1, sample_points=np.arange(10), - keepdims=False) - - # Default keepdims = True - fk = FDataGrid(self.data_matrix_1_1, sample_points=np.arange(10), - keepdims=True) - - t = [0.5,1.5,2.5] - res = np.array([[ 0.5, 2.5, 6.5], [72.5, 56.5, 42.5]]) - res_keepdims = res.reshape(2,3,1) - - np.testing.assert_array_almost_equal(f(t, grid=True), res) - np.testing.assert_array_almost_equal(f((t,), grid=True, keepdims=True), - res_keepdims) - np.testing.assert_array_almost_equal(f([t], grid=True, keepdims=False), res) - - np.testing.assert_array_almost_equal(fk(t, grid=True), res_keepdims) - np.testing.assert_array_almost_equal(fk((t,), grid=True, keepdims=True), - res_keepdims) - np.testing.assert_array_almost_equal(fk([t], grid=True, keepdims=False), res) + f([t, 9 - t], aligned=False).round(2), + np.array([[[16.], [36.]], [[16.], [36.]]])) def test_evaluation_cubic_simple(self): """Test basic usage of evaluation""" f = FDataGrid(self.data_matrix_1_1, sample_points=np.arange(10), - interpolator=SplineInterpolator(3)) + interpolation=SplineInterpolation(3)) # Test interpolation in nodes - np.testing.assert_array_almost_equal(f(np.arange(10)).round(1), - self.data_matrix_1_1) + np.testing.assert_array_almost_equal(f(np.arange(10)).round(1)[..., 0], + self.data_matrix_1_1) # Test evaluation in a list of times - np.testing.assert_array_almost_equal(f([0.5,1.5,2.5]).round(2), - np.array([[ 0.25, 2.25, 6.25], - [72.25, 56.25, 42.25]])) + np.testing.assert_array_almost_equal( + f([0.5, 1.5, 2.5]).round(2), + np.array([[[0.25], [2.25], [6.25]], + [[72.25], [56.25], [42.25]]])) def test_evaluation_cubic_point(self): """Test the evaluation of a single point""" f = FDataGrid(self.data_matrix_1_1, sample_points=np.arange(10), - interpolator=SplineInterpolator(3)) + interpolation=SplineInterpolation(3)) # Test a single point - np.testing.assert_array_almost_equal(f(5.3).round(3), np.array([[28.09], - [13.69]])) + np.testing.assert_array_almost_equal(f(5.3).round(3), + np.array([[[28.09]], [[13.69]]])) - np.testing.assert_array_almost_equal(f([3]).round(3), np.array([[9.], [36.]])) - np.testing.assert_array_almost_equal(f((2,)).round(3), np.array([[4.], [49.]])) - - - def test_evaluation_cubic_derivative(self): - """Test derivative""" - f = FDataGrid(self.data_matrix_1_1, sample_points=np.arange(10), - interpolator=SplineInterpolator(3)) - - # Derivate = [2*x, 2*(9-x)] - np.testing.assert_array_almost_equal(f([0.5,1.5,2.5], derivative=1).round(3), - np.array([[ 1., 3., 5.], - [-17., -15., -13.]])) + np.testing.assert_array_almost_equal( + f([3]).round(3), np.array([[[9.]], [[36.]]])) + np.testing.assert_array_almost_equal( + f((2,)).round(3), np.array([[[4.]], [[49.]]])) def test_evaluation_cubic_grid(self): """Test grid evaluation. With domain dimension = 1""" f = FDataGrid(self.data_matrix_1_1, sample_points=np.arange(10), - interpolator=SplineInterpolator(3)) - - t = [0.5,1.5,2.5] - res = np.array([[ 0.25, 2.25, 6.25], [72.25, 56.25, 42.25]]) + interpolation=SplineInterpolation(3)) + t = [0.5, 1.5, 2.5] + res = np.array([[[0.25], [2.25], [6.25]], + [[72.25], [56.25], [42.25]]]) # Test evaluation in a list of times np.testing.assert_array_almost_equal(f(t, grid=True).round(3), res) np.testing.assert_array_almost_equal(f((t,), grid=True).round(3), res) np.testing.assert_array_almost_equal(f([t], grid=True).round(3), res) - # Single point with grid - np.testing.assert_array_almost_equal(f(3, grid=True), np.array([[9.], [36.]])) + # Single point with grid + np.testing.assert_array_almost_equal( + f(3, grid=True), np.array([[[9.]], [[36.]]])) # Check erroneous axis - with np.testing.assert_raises(ValueError): f((t,t), grid=True) + with np.testing.assert_raises(ValueError): + f((t, t), grid=True) def test_evaluation_cubic_composed(self): f = FDataGrid(self.data_matrix_1_1, sample_points=np.arange(10), - interpolator=SplineInterpolator(3)) - + interpolation=SplineInterpolation(3)) # Evaluate (x**2, (9-x)**2) in (1,8) - np.testing.assert_array_almost_equal(f([[1],[8]], aligned_evaluation=False).round(3) - ,np.array([[1.], [1.]])) + np.testing.assert_array_almost_equal( + f([[1], [8]], aligned=False).round(3), + np.array([[[1.]], [[1.]]])) - t = np.linspace(4,6,4) - np.testing.assert_array_almost_equal(f([t,9-t], aligned_evaluation=False).round(2), - np.array([[16. , 21.78, 28.44, 36. ], - [16. , 21.78, 28.44, 36. ]])) + t = np.linspace(4, 6, 4) + np.testing.assert_array_almost_equal( + f([t, 9 - t], aligned=False).round(2), + np.array([[[16.], [21.78], [28.44], [36.]], + [[16.], [21.78], [28.44], [36.]]])) # Same length than nsample - t = np.linspace(4,6,2) - np.testing.assert_array_almost_equal(f([t,9-t], aligned_evaluation=False).round(3), - np.array([[16. , 36.], [16. , 36.]])) + t = np.linspace(4, 6, 2) + np.testing.assert_array_almost_equal( + f([t, 9 - t], aligned=False).round(3), + np.array([[[16.], [36.]], [[16.], [36.]]])) def test_evaluation_nodes(self): """Test interpolation in nodes for all dimensions""" - for degree in range(1,6): - interpolator = SplineInterpolator(degree) + for degree in range(1, 6): + interpolation = SplineInterpolation(degree) f = FDataGrid(self.data_matrix_1_1, sample_points=np.arange(10), - interpolator=interpolator) + interpolation=interpolation) # Test interpolation in nodes - np.testing.assert_array_almost_equal(f(np.arange(10)).round(5), - self.data_matrix_1_1) + np.testing.assert_array_almost_equal( + f(np.arange(10)).round(5)[..., 0], + self.data_matrix_1_1) def test_error_degree(self): - with np.testing.assert_raises(ValueError): - interpolator = SplineInterpolator(7) + interpolation = SplineInterpolation(7) f = FDataGrid(self.data_matrix_1_1, sample_points=np.arange(10), - interpolator=interpolator) + interpolation=interpolation) f(1) with np.testing.assert_raises(ValueError): - interpolator = SplineInterpolator(0) + interpolation = SplineInterpolation(0) f = FDataGrid(self.data_matrix_1_1, sample_points=np.arange(10), - interpolator=interpolator) + interpolation=interpolation) f(1) class TestEvaluationSpline1_n(unittest.TestCase): - """Test the evaluation of a grid spline interpolator with + """Test the evaluation of a grid spline interpolation with domain dimension equal to 1 and arbitary image dimension. """ @@ -310,35 +206,34 @@ def setUp(self): # Data matrix of a datagrid with a dimension of domain and image equal # to 1. - # Matrix of functions (x**2, (9-x)**2) + # Matrix of functions (x**2, (9-x)**2) self.t = np.arange(10) data_1 = np.array([np.arange(10)**2, np.arange(start=9, stop=-1, step=-1)**2]) - data_2 = np.sin(np.pi/81 * data_1) + data_2 = np.sin(np.pi / 81 * data_1) - self.data_matrix_1_n = np.dstack((data_1,data_2)) - - self.interpolator = SplineInterpolator(interpolation_order=2) + self.data_matrix_1_n = np.dstack((data_1, data_2)) + self.interpolation = SplineInterpolation(interpolation_order=2) def test_evaluation_simple(self): """Test basic usage of evaluation""" f = FDataGrid(self.data_matrix_1_n, sample_points=np.arange(10), - interpolator=self.interpolator) + interpolation=self.interpolation) # Test interpolation in nodes np.testing.assert_array_almost_equal(f(self.t), self.data_matrix_1_n) # Test evaluation in a list of times - np.testing.assert_array_almost_equal(f([1.5,2.5,3.5]), - np.array([[[ 2.25 , 0.087212], - [ 6.25 , 0.240202], - [12.25 , 0.45773 ]], - [[56.25 , 0.816142], - [42.25 , 0.997589], - [30.25 , 0.922146]]] + np.testing.assert_array_almost_equal(f([1.5, 2.5, 3.5]), + np.array([[[2.25, 0.087212], + [6.25, 0.240202], + [12.25, 0.45773]], + [[56.25, 0.816142], + [42.25, 0.997589], + [30.25, 0.922146]]] ) ) @@ -346,42 +241,28 @@ def test_evaluation_point(self): """Test the evaluation of a single point""" f = FDataGrid(self.data_matrix_1_n, sample_points=np.arange(10), - interpolator=self.interpolator) + interpolation=self.interpolation) # Test a single point np.testing.assert_array_almost_equal(f(5.3), - np.array([[[28.09 , 0.885526]], - [[13.69 , 0.50697 ]]] + np.array([[[28.09, 0.885526]], + [[13.69, 0.50697]]] ) ) - def test_evaluation_derivative(self): - """Test derivative""" - f = FDataGrid(self.data_matrix_1_n, sample_points=self.t, - interpolator=self.interpolator) - - # [(2*x, d/dx sin(pi/81*x**2)), (2*(9-x), d/dx sin(pi/81*(9-x)**2))] - np.testing.assert_array_almost_equal(f([1.5,2.5,3.5], derivative=1), - np.array([[[ 3. , 0.1162381], - [ 5. , 0.1897434], - [ 7. , 0.2453124]], - [[-15. , 0.3385772], - [-13. , 0.0243172], - [-11. ,-0.1752035]]])) - def test_evaluation_grid(self): """Test grid evaluation. With domain dimension = 1""" f = FDataGrid(self.data_matrix_1_n, sample_points=np.arange(10), - interpolator=SplineInterpolator(2)) + interpolation=SplineInterpolation(2)) - t = [1.5,2.5,3.5] - res = np.array([[[ 2.25 , 0.08721158], - [ 6.25 , 0.24020233], - [12.25 , 0.4577302 ]], - [[56.25 , 0.81614206], - [42.25 , 0.99758925], - [30.25 , 0.92214607]]]) + t = [1.5, 2.5, 3.5] + res = np.array([[[2.25, 0.08721158], + [6.25, 0.24020233], + [12.25, 0.4577302]], + [[56.25, 0.81614206], + [42.25, 0.99758925], + [30.25, 0.92214607]]]) # Test evaluation in a list of times np.testing.assert_array_almost_equal(f(t, grid=True), res) @@ -389,57 +270,70 @@ def test_evaluation_grid(self): np.testing.assert_array_almost_equal(f([t], grid=True), res) # Check erroneous axis - with np.testing.assert_raises(ValueError): f((t,t), grid=True) + with np.testing.assert_raises(ValueError): + f((t, t), grid=True) def test_evaluation_composed(self): f = FDataGrid(self.data_matrix_1_n, sample_points=self.t, - interpolator=self.interpolator) - + interpolation=self.interpolation) # Evaluate (x**2, (9-x)**2) in (1,8) - np.testing.assert_array_almost_equal(f([[1],[4]], - aligned_evaluation=False)[0], + np.testing.assert_array_almost_equal(f([[1], [4]], + aligned=False)[0], f(1)[0]) - np.testing.assert_array_almost_equal(f([[1],[4]], - aligned_evaluation=False)[1], + np.testing.assert_array_almost_equal(f([[1], [4]], + aligned=False)[1], f(4)[1]) - - def test_evaluation_keepdims(self): - """Test keepdims""" - - f = FDataGrid(self.data_matrix_1_n, sample_points=np.arange(10), - interpolator=self.interpolator, keepdims=True) - - fk = FDataGrid(self.data_matrix_1_n, sample_points=np.arange(10), - interpolator=self.interpolator, keepdims=False) - - res = f(self.t) - # Test interpolation in nodes - np.testing.assert_array_almost_equal(f(self.t, keepdims=False), res) - np.testing.assert_array_almost_equal(f(self.t, keepdims=True), res) - np.testing.assert_array_almost_equal(fk(self.t), res) - np.testing.assert_array_almost_equal(fk(self.t, keepdims=False), res) - np.testing.assert_array_almost_equal(fk(self.t, keepdims=True), res) - - def test_evaluation_nodes(self): """Test interpolation in nodes for all dimensions""" - for degree in range(1,6): - interpolator = SplineInterpolator(degree) + for degree in range(1, 6): + interpolation = SplineInterpolation(degree) f = FDataGrid(self.data_matrix_1_n, sample_points=np.arange(10), - interpolator=interpolator) + interpolation=interpolation) # Test interpolation in nodes np.testing.assert_array_almost_equal(f(np.arange(10)), self.data_matrix_1_n) +class TestEvaluationSplinem_n(unittest.TestCase): + """Test the evaluation of a grid spline interpolation with + arbitrary domain dimension and arbitary image dimension. + """ + + def test_evaluation_center_and_extreme_points_linear(self): + """Test linear interpolation in the middle point of a grid square.""" + + dim_codomain = 4 + n_samples = 2 + + @np.vectorize + def coordinate_function(*args): + _, *domain_indexes, _ = args + return np.sum(domain_indexes) + + for dim_domain in range(1, 6): + sample_points = [np.array([0, 1]) for _ in range(dim_domain)] + data_matrix = np.fromfunction( + function=coordinate_function, + shape=(n_samples,) + (2,) * dim_domain + (dim_codomain,)) + + f = FDataGrid(data_matrix, sample_points=sample_points) + + evaluation = f([[0.] * dim_domain, [0.5] * + dim_domain, [1.] * dim_domain]) + self.assertEqual(evaluation.shape, (n_samples, 3, dim_codomain)) + for i in range(n_samples): + for j in range(dim_codomain): + np.testing.assert_array_almost_equal( + evaluation[i, ..., j], + [0, dim_domain * 0.5, dim_domain]) if __name__ == '__main__': diff --git a/tests/test_lfd.py b/tests/test_lfd.py deleted file mode 100644 index 3a0f6e920..000000000 --- a/tests/test_lfd.py +++ /dev/null @@ -1,77 +0,0 @@ -import unittest - -import numpy as np -from skfda.misc import LinearDifferentialOperator -from skfda.representation.basis import FDataBasis, Constant, Monomial - - -class TestBasis(unittest.TestCase): - - def test_init_integer(self): - # Checks for a zero order Lfd object - lfd_0 = LinearDifferentialOperator(order=0) - weightfd = [FDataBasis(Constant((0, 1)), 1)] - - np.testing.assert_equal(lfd_0.order, 0, - "Wrong deriv order of the linear operator") - np.testing.assert_equal( - lfd_0.weights, weightfd, - "Wrong list of weight functions of the linear operator") - - # Checks for a non zero order Lfd object - lfd_3 = LinearDifferentialOperator(3) - consfd = FDataBasis(Constant((0, 1)), np.identity(4)[3].reshape(-1, 1)) - bwtlist3 = consfd.to_list() - - np.testing.assert_equal(lfd_3.order, 3, - "Wrong deriv order of the linear operator") - np.testing.assert_equal( - lfd_3.weights, bwtlist3, - "Wrong list of weight functions of the linear operator") - - np.testing.assert_raises(ValueError, LinearDifferentialOperator, -1) - - def test_init_list_int(self): - coefficients = [1, 3, 4, 5, 6, 7] - - constant = Constant((0, 1)) - fd = FDataBasis(constant, np.array(coefficients).reshape(-1, 1)) - lfd = LinearDifferentialOperator(weights=coefficients) - - np.testing.assert_equal(lfd.order, 5, - "Wrong deriv order of the linear operator") - np.testing.assert_equal( - lfd.weights, fd.to_list(), - "Wrong list of weight functions of the linear operator") - - def test_init_list_fdatabasis(self): - weights = np.arange(4 * 5).reshape((5, 4)) - monomial = Monomial((0, 1), n_basis=4) - fd = FDataBasis(monomial, weights) - - fdlist = [FDataBasis(monomial, weights[i]) - for i in range(len(weights))] - - lfd = LinearDifferentialOperator(weights=fdlist) - - np.testing.assert_equal(lfd.order, 4, - "Wrong deriv order of the linear operator") - np.testing.assert_equal( - lfd.weights, fd.to_list(), - "Wrong list of weight functions of the linear operator") - - contant = Constant((0, 2)) - fdlist.append(FDataBasis(contant, 1)) - np.testing.assert_raises(ValueError, LinearDifferentialOperator, - None, fdlist) - - def test_init_wrong_params(self): - np.testing.assert_raises(ValueError, - LinearDifferentialOperator, 0, ['a']) - np.testing.assert_raises(ValueError, - LinearDifferentialOperator, 0, 'a') - - -if __name__ == '__main__': - print() - unittest.main() diff --git a/tests/test_linear_differential_operator.py b/tests/test_linear_differential_operator.py new file mode 100644 index 000000000..9bdd506a5 --- /dev/null +++ b/tests/test_linear_differential_operator.py @@ -0,0 +1,106 @@ +from skfda.misc.operators import LinearDifferentialOperator +from skfda.representation.basis import FDataBasis, Constant, Monomial +import unittest + +import numpy as np + + +class TestLinearDifferentialOperator(unittest.TestCase): + + def test_init_default(self): + """Tests default initialization (do not penalize).""" + lfd = LinearDifferentialOperator() + weightfd = [FDataBasis(Constant((0, 1)), 0)] + + np.testing.assert_equal( + lfd.weights, weightfd, + "Wrong list of weight functions of the linear operator") + + def test_init_integer(self): + """Tests initializations which only specify the order.""" + + # Checks for a zero order Lfd object + lfd_0 = LinearDifferentialOperator(order=0) + weightfd = [FDataBasis(Constant((0, 1)), 1)] + + np.testing.assert_equal( + lfd_0.weights, weightfd, + "Wrong list of weight functions of the linear operator") + + # Checks for a non zero order Lfd object + lfd_3 = LinearDifferentialOperator(3) + consfd = FDataBasis(Constant((0, 1)), [[0], [0], [0], [1]]) + bwtlist3 = list(consfd) + + np.testing.assert_equal( + lfd_3.weights, bwtlist3, + "Wrong list of weight functions of the linear operator") + + # Negative order must fail + with np.testing.assert_raises(ValueError): + LinearDifferentialOperator(-1) + + def test_init_list_int(self): + """Tests initializations with integer weights.""" + + coefficients = [1, 3, 4, 5, 6, 7] + + constant = Constant((0, 1)) + fd = FDataBasis(constant, np.array(coefficients).reshape(-1, 1)) + + lfd = LinearDifferentialOperator(weights=coefficients) + + np.testing.assert_equal( + lfd.weights, list(fd), + "Wrong list of weight functions of the linear operator") + + def test_init_list_fdatabasis(self): + """Test initialization with functional weights.""" + + n_basis = 4 + n_weights = 6 + + monomial = Monomial((0, 1), n_basis=n_basis) + + weights = np.arange(n_basis * n_weights).reshape((n_weights, n_basis)) + + fd = FDataBasis(monomial, weights) + + fdlist = [FDataBasis(monomial, w) for w in weights] + lfd = LinearDifferentialOperator(weights=fdlist) + + np.testing.assert_equal( + lfd.weights, list(fd), + "Wrong list of weight functions of the linear operator") + + # Check failure if intervals do not match + constant = Constant((0, 2)) + fdlist.append(FDataBasis(constant, 1)) + with np.testing.assert_raises(ValueError): + LinearDifferentialOperator(weights=fdlist) + + def test_init_wrong_params(self): + + # Check specifying both arguments fail + with np.testing.assert_raises(ValueError): + LinearDifferentialOperator(1, weights=[1, 1]) + + # Check invalid domain range + monomial = Monomial((0, 1), n_basis=3) + fdlist = [FDataBasis(monomial, [1, 2, 3])] + + with np.testing.assert_raises(ValueError): + LinearDifferentialOperator(weights=fdlist, + domain_range=(0, 2)) + + # Check wrong types fail + with np.testing.assert_raises(ValueError): + LinearDifferentialOperator(weights=['a']) + + with np.testing.assert_raises(ValueError): + LinearDifferentialOperator(weights='a') + + +if __name__ == '__main__': + print() + unittest.main() diff --git a/tests/test_magnitude_shape.py b/tests/test_magnitude_shape.py index 77fd0a4d5..50509e483 100644 --- a/tests/test_magnitude_shape.py +++ b/tests/test_magnitude_shape.py @@ -1,20 +1,17 @@ -import unittest - -import numpy as np from skfda import FDataGrid from skfda.datasets import fetch_weather from skfda.exploratory.depth import modified_band_depth from skfda.exploratory.visualization import MagnitudeShapePlot +import unittest + +import numpy as np class TestMagnitudeShapePlot(unittest.TestCase): def test_magnitude_shape_plot(self): fd = fetch_weather()["data"] - fd_temperatures = FDataGrid(data_matrix=fd.data_matrix[:, :, 0], - sample_points=fd.sample_points, - dataset_label=fd.dataset_label, - axes_labels=fd.axes_labels[0:2]) + fd_temperatures = fd.coordinates[0] msplot = MagnitudeShapePlot( fd_temperatures, depth_method=modified_band_depth) np.testing.assert_allclose(msplot.points, diff --git a/tests/test_math.py b/tests/test_math.py new file mode 100644 index 000000000..a53729c8c --- /dev/null +++ b/tests/test_math.py @@ -0,0 +1,75 @@ +import skfda +from skfda.representation.basis import Monomial, Tensor, VectorValued +import unittest +import numpy as np + + +def ndm(*args): + return [x[(None,) * i + (slice(None),) + (None,) * (len(args) - i - 1)] + for i, x in enumerate(args)] + + +class InnerProductTest(unittest.TestCase): + + def test_several_variables(self): + + def f(x, y, z): + return x * y * z + + t = np.linspace(0, 1, 100) + + x2, y2, z2 = ndm(t, 2 * t, 3 * t) + + data_matrix = f(x2, y2, z2) + + sample_points = [t, 2 * t, 3 * t] + + fd = skfda.FDataGrid( + data_matrix[np.newaxis, ...], sample_points=sample_points) + + basis = Tensor([Monomial(n_basis=5, domain_range=(0, 1)), + Monomial(n_basis=5, domain_range=(0, 2)), + Monomial(n_basis=5, domain_range=(0, 3))]) + + fd_basis = fd.to_basis(basis) + + res = 8 + + np.testing.assert_allclose( + skfda.misc.inner_product(fd, fd), res, rtol=1e-5) + np.testing.assert_allclose( + skfda.misc.inner_product(fd_basis, fd_basis), res, rtol=1e-5) + + def test_vector_valued(self): + + def f(x): + return x**2 + + def g(y): + return 3 * y + + t = np.linspace(0, 1, 100) + + data_matrix = np.array([np.array([f(t), g(t)]).T]) + + sample_points = [t] + + fd = skfda.FDataGrid( + data_matrix, sample_points=sample_points) + + basis = VectorValued([Monomial(n_basis=5), + Monomial(n_basis=5)]) + + fd_basis = fd.to_basis(basis) + + res = 1 / 5 + 3 + + np.testing.assert_allclose( + skfda.misc.inner_product(fd, fd), res, rtol=1e-5) + np.testing.assert_allclose( + skfda.misc.inner_product(fd_basis, fd_basis), res, rtol=1e-5) + + +if __name__ == "__main__": + #import sys;sys.argv = ['', 'Test.testName'] + unittest.main() diff --git a/tests/test_metrics.py b/tests/test_metrics.py index aa6dc39f8..e95371f1b 100644 --- a/tests/test_metrics.py +++ b/tests/test_metrics.py @@ -1,13 +1,13 @@ +from skfda import FDataGrid, FDataBasis +from skfda.datasets import make_multimodal_samples +from skfda.exploratory import stats +from skfda.misc.metrics import lp_distance, lp_norm +from skfda.representation.basis import Monomial import unittest import scipy.stats.mstats import numpy as np -from skfda import FDataGrid, FDataBasis -from skfda.datasets import make_multimodal_samples -from skfda.exploratory import stats -from skfda.misc.metrics import lp_distance, norm_lp, vectorial_norm -from skfda.representation.basis import Monomial class TestLpMetrics(unittest.TestCase): @@ -22,48 +22,26 @@ def setUp(self): self.fd_surface = make_multimodal_samples(n_samples=3, dim_domain=2, random_state=0) - def test_vectorial_norm(self): + def test_lp_norm(self): - vec = vectorial_norm(self.fd_curve, p=2) - np.testing.assert_array_almost_equal(vec.data_matrix, - np.sqrt(2) * self.fd.data_matrix) + np.testing.assert_allclose(lp_norm(self.fd, p=1), [16., 41.33333333]) + np.testing.assert_allclose(lp_norm(self.fd, p='inf'), [6, 25]) - vec = vectorial_norm(self.fd_curve, p='inf') - np.testing.assert_array_almost_equal(vec.data_matrix, - self.fd.data_matrix) + def test_lp_norm_curve(self): - def test_vectorial_norm_surface(self): - - fd_surface_curve = self.fd_surface.concatenate(self.fd_surface, - as_coordinates=True) - vec = vectorial_norm(fd_surface_curve, p=2) - np.testing.assert_array_almost_equal( - vec.data_matrix, np.sqrt(2) * self.fd_surface.data_matrix) - - vec = vectorial_norm(fd_surface_curve, p='inf') - np.testing.assert_array_almost_equal(vec.data_matrix, - self.fd_surface.data_matrix) - - def test_norm_lp(self): - - np.testing.assert_allclose(norm_lp(self.fd, p=1), [16., 41.33333333]) - np.testing.assert_allclose(norm_lp(self.fd, p='inf'), [6, 25]) - - def test_norm_lp_curve(self): - - np.testing.assert_allclose(norm_lp(self.fd_curve, p=1, p2=1), + np.testing.assert_allclose(lp_norm(self.fd_curve, p=1, p2=1), [32., 82.666667]) - np.testing.assert_allclose(norm_lp(self.fd_curve, p='inf', p2='inf'), + np.testing.assert_allclose(lp_norm(self.fd_curve, p='inf', p2='inf'), [6, 25]) - def test_norm_lp_surface_inf(self): - np.testing.assert_allclose(norm_lp(self.fd_surface, p='inf').round(5), + def test_lp_norm_surface_inf(self): + np.testing.assert_allclose(lp_norm(self.fd_surface, p='inf').round(5), [0.99994, 0.99793, 0.99868]) - def test_norm_lp_surface(self): + def test_lp_norm_surface(self): # Integration of surfaces not implemented, add test case after # implementation - self.assertEqual(norm_lp(self.fd_surface), NotImplemented) + self.assertEqual(lp_norm(self.fd_surface, p=1), NotImplemented) def test_lp_error_dimensions(self): # Case internal arrays @@ -92,16 +70,6 @@ def test_lp_error_sample_points(self): with np.testing.assert_raises(ValueError): lp_distance(self.fd, fd2) - def test_lp_grid_basis(self): - - np.testing.assert_allclose(lp_distance(self.fd, self.fd_basis), 0) - np.testing.assert_allclose(lp_distance(self.fd_basis, self.fd), 0) - np.testing.assert_allclose( - lp_distance(self.fd_basis, - self.fd_basis, eval_points=[1, 2, 3, 4, 5]), 0) - np.testing.assert_allclose(lp_distance(self.fd_basis, self.fd_basis), - 0) - if __name__ == '__main__': print() diff --git a/tests/test_neighbors.py b/tests/test_neighbors.py index 98199da0e..22cded6c3 100644 --- a/tests/test_neighbors.py +++ b/tests/test_neighbors.py @@ -3,14 +3,16 @@ import unittest import numpy as np -from skfda.datasets import make_multimodal_samples +from skfda.datasets import make_multimodal_samples, make_sinusoidal_process from skfda.exploratory.stats import mean as l2_mean from skfda.misc.metrics import lp_distance, pairwise_distance from skfda.ml.classification import (KNeighborsClassifier, RadiusNeighborsClassifier, - NearestCentroids) + NearestCentroid) from skfda.ml.clustering import NearestNeighbors from skfda.ml.regression import KNeighborsRegressor, RadiusNeighborsRegressor +#from skfda.exploratory.outliers import LocalOutlierFactor +from skfda._neighbors.outlier import LocalOutlierFactor # Pending theory from skfda.representation.basis import Fourier @@ -41,13 +43,20 @@ def setUp(self): self.probs = np.array(15 * [[1., 0.]] + 15 * [[0., 1.]])[idx] + # Dataset with outliers + fd_clean = make_sinusoidal_process(n_samples=25, error_std=0, + phase_std=0.1, random_state=0) + fd_outliers = make_sinusoidal_process(n_samples=2, error_std=0, + phase_mean=0.5, random_state=5) + self.fd_lof = fd_outliers.concatenate(fd_clean) + def test_predict_classifier(self): """Tests predict for neighbors classifier""" for neigh in (KNeighborsClassifier(), RadiusNeighborsClassifier(radius=.1), - NearestCentroids(), - NearestCentroids(metric=lp_distance, mean=l2_mean)): + NearestCentroid(), + NearestCentroid(metric=lp_distance, mean=l2_mean)): neigh.fit(self.X, self.y) pred = neigh.predict(self.X) @@ -86,13 +95,16 @@ def test_kneighbors(self): nn = NearestNeighbors() nn.fit(self.X) + lof = LocalOutlierFactor(n_neighbors=5) + lof.fit(self.X) + knn = KNeighborsClassifier() knn.fit(self.X, self.y) knnr = KNeighborsRegressor() knnr.fit(self.X, self.modes_location) - for neigh in [nn, knn, knnr]: + for neigh in [nn, knn, knnr, lof]: dist, links = neigh.kneighbors(self.X[:4]) @@ -101,12 +113,12 @@ def test_kneighbors(self): [2, 17, 22, 27, 26], [3, 4, 9, 5, 25]]) + graph = neigh.kneighbors_graph(self.X[:4]) + dist_kneigh = lp_distance(self.X[0], self.X[7]) np.testing.assert_array_almost_equal(dist[0, 1], dist_kneigh) - graph = neigh.kneighbors_graph(self.X[:4]) - for i in range(30): self.assertEqual(graph[0, i] == 1.0, i in links[0]) self.assertEqual(graph[0, i] == 0.0, i not in links[0]) @@ -243,12 +255,12 @@ def test_radius_outlier_functional_response(self): def test_nearest_centroids_exceptions(self): # Test more than one class - nn = NearestCentroids() + nn = NearestCentroid() with np.testing.assert_raises(ValueError): nn.fit(self.X[0:3], 3 * [0]) # Precomputed not supported - nn = NearestCentroids(metric='precomputed') + nn = NearestCentroid(metric='precomputed') with np.testing.assert_raises(ValueError): nn.fit(self.X[0:3], 3 * [0]) @@ -324,6 +336,91 @@ def test_multivariate_response_score(self): with np.testing.assert_raises(ValueError): neigh.score(self.X[:5], y) + def test_lof_fit_predict(self): + """ Test same results with different forms to call fit_predict""" + + # Outliers + expected = np.ones(len(self.fd_lof)) + expected[0:2] = -1 + + # With default l2 distance + lof = LocalOutlierFactor() + res = lof.fit_predict(self.fd_lof) + np.testing.assert_array_equal(expected, res) + + # With explicit l2 distance + lof2 = LocalOutlierFactor(metric=lp_distance) + res2 = lof2.fit_predict(self.fd_lof) + np.testing.assert_array_equal(expected, res2) + + d = pairwise_distance(lp_distance) + distances = d(self.fd_lof, self.fd_lof) + + # With precompute distances + lof3 = LocalOutlierFactor(metric="precomputed") + res3 = lof3.fit_predict(distances) + np.testing.assert_array_equal(expected, res3) + + # With multivariate sklearn + lof4 = LocalOutlierFactor(metric="euclidean", multivariate_metric=True) + res4 = lof4.fit_predict(self.fd_lof) + np.testing.assert_array_equal(expected, res4) + + # Other way of call fit_predict, undocumented in sklearn + lof5 = LocalOutlierFactor(novelty=True) + res5 = lof5.fit(self.fd_lof).predict() + np.testing.assert_array_equal(expected, res5) + + # Check values of negative outlier factor + negative_lof = [-7.1068, -1.5412, -0.9961, -0.9854, -0.9896, -1.0993, + -1.065, -0.9871, -0.9821, -0.9955, -1.0385, -1.0072, + -0.9832, -1.0134, -0.9939, -1.0074, -0.992, -0.992, + -0.9883, -1.0012, -1.1149, -1.002, -0.9994, -0.9869, + -0.9726, -0.9989, -0.9904] + + np.testing.assert_array_almost_equal( + lof.negative_outlier_factor_.round(4), negative_lof) + + # Check same negative outlier factor + np.testing.assert_array_almost_equal(lof.negative_outlier_factor_, + lof2.negative_outlier_factor_) + + np.testing.assert_array_almost_equal(lof.negative_outlier_factor_, + lof3.negative_outlier_factor_) + + def test_lof_decision_function(self): + """ Test decision function and score samples of LOF""" + + lof = LocalOutlierFactor(novelty=True) + lof.fit(self.fd_lof[5:]) + + score = lof.score_samples(self.fd_lof[:5]) + + np.testing.assert_array_almost_equal( + score.round(4), [-5.9726, -1.3445, -0.9853, -0.9817, -0.985], + err_msg='Error in LocalOutlierFactor.score_samples') + + # Test decision_function = score_function - offset + np.testing.assert_array_almost_equal( + lof.decision_function(self.fd_lof[:5]), score - lof.offset_, + err_msg='Error in LocalOutlierFactor.decision_function') + + def test_lof_exceptions(self): + """ Test error due to novelty attribute""" + + lof = LocalOutlierFactor(novelty=True) + + # Error in fit_predict function + with np.testing.assert_raises(AttributeError): + lof.fit_predict(self.fd_lof[5:]) + + lof.set_params(novelty=False) + lof.fit(self.fd_lof[5:]) + + # Error in predict function + with np.testing.assert_raises(AttributeError): + lof.predict(self.fd_lof[5:]) + if __name__ == '__main__': print() diff --git a/tests/test_oneway_anova.py b/tests/test_oneway_anova.py new file mode 100644 index 000000000..31eed81b7 --- /dev/null +++ b/tests/test_oneway_anova.py @@ -0,0 +1,82 @@ +from skfda.datasets import fetch_gait +from skfda.inference.anova import oneway_anova, v_asymptotic_stat, \ + v_sample_stat +from skfda.representation import FDataGrid +from skfda.representation.basis import Fourier +import unittest + +import pytest + +import numpy as np + + +class OnewayAnovaTests(unittest.TestCase): + + def test_oneway_anova_args(self): + with self.assertRaises(ValueError): + oneway_anova() + with self.assertRaises(ValueError): + oneway_anova(1, '2') + with self.assertRaises(ValueError): + oneway_anova(FDataGrid([0]), n_reps=-2) + + def test_v_stats_args(self): + with self.assertRaises(ValueError): + v_sample_stat(1, [1]) + with self.assertRaises(ValueError): + v_sample_stat(FDataGrid([0]), [0, 1]) + with self.assertRaises(ValueError): + v_asymptotic_stat(1, [1]) + with self.assertRaises(ValueError): + v_asymptotic_stat(FDataGrid([0]), [0, 1]) + with self.assertRaises(ValueError): + v_asymptotic_stat(FDataGrid([[1, 1, 1], [1, 1, 1]]), [0, 0]) + + def test_v_stats(self): + n_features = 50 + weights = [1, 2, 3] + t = np.linspace(0, 1, n_features) + m1 = [1 for _ in range(n_features)] + m2 = [2 for _ in range(n_features)] + m3 = [3 for _ in range(n_features)] + fd = FDataGrid([m1, m2, m3], sample_points=t) + self.assertEqual(v_sample_stat(fd, weights), 7.0) + self.assertAlmostEqual(v_sample_stat(fd.to_basis(Fourier(n_basis=5)), + weights), 7.0) + res = (1 - 2 * np.sqrt(1 / 2)) ** 2 + (1 - 3 * np.sqrt(1 / 3)) ** 2 \ + + (2 - 3 * np.sqrt(2 / 3)) ** 2 + self.assertAlmostEqual(v_asymptotic_stat(fd, weights), res) + self.assertAlmostEqual(v_asymptotic_stat(fd.to_basis(Fourier( + n_basis=5)), weights), res) + + def test_asymptotic_behaviour(self): + dataset = fetch_gait() + fd = dataset['data'].coordinates[1] + fd1 = fd[0:5] + fd2 = fd[5:10] + fd3 = fd[10:15] + + n_little_sim = 10 + + sims = np.array([oneway_anova( + fd1, fd2, fd3, n_reps=500, random_state=i)[1] + for i in range(n_little_sim)]) + little_sim = np.mean(sims) + big_sim = oneway_anova(fd1, fd2, fd3, n_reps=2000, random_state=100)[1] + self.assertAlmostEqual(little_sim, big_sim, delta=0.05) + + fd = fd.to_basis(Fourier(n_basis=5)) + fd1 = fd[0:5] + fd2 = fd[5:10] + + sims = np.array([oneway_anova( + fd1, fd2, n_reps=500, random_state=i)[1] + for i in range(n_little_sim)]) + little_sim = np.mean(sims) + big_sim = oneway_anova(fd1, fd2, n_reps=2000, random_state=100)[1] + self.assertAlmostEqual(little_sim, big_sim, delta=0.05) + + +if __name__ == '__main__': + print() + unittest.main() diff --git a/tests/test_registration.py b/tests/test_registration.py index f23e86690..411b0cacc 100644 --- a/tests/test_registration.py +++ b/tests/test_registration.py @@ -1,16 +1,21 @@ -import unittest - -import numpy as np - from skfda import FDataGrid -from skfda.representation.interpolation import SplineInterpolator -from skfda.representation.basis import Fourier +from skfda._utils import _check_estimator from skfda.datasets import (make_multimodal_samples, make_multimodal_landmarks, make_sinusoidal_process) +from skfda.exploratory.stats import mean from skfda.preprocessing.registration import ( normalize_warping, invert_warping, landmark_shift_deltas, landmark_shift, - landmark_registration_warping, landmark_registration, mse_decomposition, - shift_registration_deltas, shift_registration) + landmark_registration_warping, landmark_registration, ShiftRegistration) +from skfda.preprocessing.registration.validation import ( + AmplitudePhaseDecomposition, LeastSquares, + SobolevLeastSquares, PairwiseCorrelation) +from skfda.representation.basis import Fourier +from skfda.representation.interpolation import SplineInterpolation +import unittest + +from sklearn.exceptions import NotFittedError + +import numpy as np class TestWarping(unittest.TestCase): @@ -20,9 +25,9 @@ def setUp(self): """Initialization of samples""" self.time = np.linspace(-1, 1, 50) - interpolator = SplineInterpolator(3, monotone=True) + interpolation = SplineInterpolation(3, monotone=True) self.polynomial = FDataGrid([self.time**3, self.time**5], - self.time, interpolator=interpolator) + self.time, interpolation=interpolation) def test_invert_warping(self): @@ -46,11 +51,30 @@ def test_standard_normalize_warping(self): np.testing.assert_array_almost_equal(normalized.sample_points[0], np.linspace(0, 1, 50)) - np.testing.assert_array_almost_equal(normalized(0), [[0.], [0.]]) + np.testing.assert_array_almost_equal( + normalized(0)[..., 0], [[0.], [0.]]) - np.testing.assert_array_almost_equal(normalized(1), [[1.], [1.]]) + np.testing.assert_array_almost_equal( + normalized(1)[..., 0], [[1.], [1.]]) - def test_normalize_warpig(self): + def test_standard_normalize_warping_default_value(self): + """Test normalization """ + + normalized = normalize_warping(self.polynomial) + + # Test new domain range (0, 1) + np.testing.assert_array_equal(normalized.domain_range, [(-1, 1)]) + + np.testing.assert_array_almost_equal(normalized.sample_points[0], + np.linspace(-1, 1, 50)) + + np.testing.assert_array_almost_equal( + normalized(-1)[..., 0], [[-1], [-1]]) + + np.testing.assert_array_almost_equal( + normalized(1)[..., 0], [[1.], [1.]]) + + def test_normalize_warping(self): """Test normalization to (a, b)""" a = -4 b = 3 @@ -63,9 +87,9 @@ def test_normalize_warpig(self): np.testing.assert_array_almost_equal(normalized.sample_points[0], np.linspace(*domain, 50)) - np.testing.assert_array_equal(normalized(a), [[a], [a]]) + np.testing.assert_array_equal(normalized(a)[..., 0], [[a], [a]]) - np.testing.assert_array_equal(normalized(b), [[b], [b]]) + np.testing.assert_array_equal(normalized(b)[..., 0], [[b], [b]]) def test_landmark_shift_deltas(self): @@ -83,13 +107,13 @@ def test_landmark_shift(self): landmarks = landmarks.squeeze() original_modes = fd(landmarks.reshape((3, 1, 1)), - aligned_evaluation=False) - # Test default location + aligned=False) + # Test default location fd_registered = landmark_shift(fd, landmarks) - center = (landmarks.max() + landmarks.min())/2 + center = (landmarks.max() + landmarks.min()) / 2 reg_modes = fd_registered(center) - # Test callable location + # Test callable location np.testing.assert_almost_equal(reg_modes, original_modes, decimal=2) fd_registered = landmark_shift(fd, landmarks, location=np.mean) @@ -105,9 +129,9 @@ def test_landmark_shift(self): np.testing.assert_almost_equal(reg_modes, original_modes, decimal=2) - # Test array location + # Test array location fd_registered = landmark_shift(fd, landmarks, location=[0, 0.1, 0.2]) - reg_modes = fd_registered([[0], [.1], [.2]], aligned_evaluation=False) + reg_modes = fd_registered([[0], [.1], [.2]], aligned=False) np.testing.assert_almost_equal(reg_modes, original_modes, decimal=2) @@ -120,12 +144,14 @@ def test_landmark_registration_warping(self): # Default location warping = landmark_registration_warping(fd, landmarks) center = (landmarks.max(axis=0) + landmarks.min(axis=0)) / 2 - np.testing.assert_almost_equal(warping(center), landmarks, decimal=1) + np.testing.assert_almost_equal( + warping(center)[..., 0], landmarks, decimal=1) # Fixed location center = [.3, .6] warping = landmark_registration_warping(fd, landmarks, location=center) - np.testing.assert_almost_equal(warping(center), landmarks, decimal=3) + np.testing.assert_almost_equal( + warping(center)[..., 0], landmarks, decimal=3) def test_landmark_registration(self): fd = make_multimodal_samples(n_samples=3, n_modes=2, random_state=9) @@ -133,7 +159,7 @@ def test_landmark_registration(self): random_state=9) landmarks = landmarks.squeeze() - original_values = fd(landmarks.reshape(3, 2), aligned_evaluation=False) + original_values = fd(landmarks.reshape(3, 2), aligned=False) # Default location fd_reg = landmark_registration(fd, landmarks) @@ -147,39 +173,228 @@ def test_landmark_registration(self): np.testing.assert_array_almost_equal(fd_reg(center), original_values, decimal=2) - def test_mse_decomposition(self): - fd = make_multimodal_samples(n_samples=3, random_state=1) - landmarks = make_multimodal_landmarks(n_samples=3, random_state=1) - landmarks = landmarks.squeeze() - warping = landmark_registration_warping(fd, landmarks) - fd_registered = fd.compose(warping) - ret = mse_decomposition(fd, fd_registered, warping) - np.testing.assert_almost_equal(ret.mse_amp, 0.0009866997121476962) - np.testing.assert_almost_equal(ret.mse_pha, 0.11576861468435257) - np.testing.assert_almost_equal(ret.rsq, 0.9915489952877273) - np.testing.assert_almost_equal(ret.cr, 0.9999963424653829) +class TestShiftRegistration(unittest.TestCase): + """Test shift registration""" + + def setUp(self): + """Initialization of samples""" + self.fd = make_sinusoidal_process(n_samples=2, error_std=0, + random_state=1) + self.fd.extrapolation = "periodic" + + def test_fit_transform(self): - def test_shift_registration_deltas(self): + reg = ShiftRegistration() - fd = make_sinusoidal_process(n_samples=2, error_std=0, random_state=1) + # Test fit transform with FDataGrid + fd_reg = reg.fit_transform(self.fd) - deltas = shift_registration_deltas(fd).round(3) - np.testing.assert_array_almost_equal(deltas, [-0.022, 0.03]) + # Check attributes fitted + self.assertTrue(hasattr(reg, 'deltas_')) + self.assertTrue(hasattr(reg, 'template_')) + self.assertTrue(hasattr(reg, 'n_iter_')) + self.assertTrue(isinstance(fd_reg, FDataGrid)) - fd = fd.to_basis(Fourier()) - deltas = shift_registration_deltas(fd).round(3) - np.testing.assert_array_almost_equal(deltas, [-0.022, 0.03]) + deltas = reg.deltas_.round(3) + np.testing.assert_array_almost_equal(deltas, [-0.022, 0.03]) - def test_shift_registration(self): + # Test with Basis + fd = self.fd.to_basis(Fourier()) + reg.fit_transform(fd) + deltas = reg.deltas_.round(3) + np.testing.assert_array_almost_equal(deltas, [-0.022, 0.03]) + + def test_fit_and_transform(self): """Test wrapper of shift_registration_deltas""" - fd = make_sinusoidal_process(n_samples=2, error_std=0, random_state=1) + fd = make_sinusoidal_process(n_samples=2, error_std=0, random_state=10) + + reg = ShiftRegistration() + response = reg.fit(self.fd) + + # Check attributes and returned value + self.assertTrue(hasattr(reg, 'template_')) + self.assertTrue(response is reg) + + fd_registered = reg.transform(fd) + deltas = reg.deltas_.round(3) + np.testing.assert_allclose(deltas, [0.071, -0.072]) + + def test_inverse_transform(self): + + reg = ShiftRegistration() + fd = reg.fit_transform(self.fd) + fd = reg.inverse_transform(fd) + + np.testing.assert_array_almost_equal(fd.data_matrix, + self.fd.data_matrix, decimal=3) + + def test_raises(self): + + reg = ShiftRegistration() + + # Test not fitted + with np.testing.assert_raises(NotFittedError): + reg.transform(self.fd) + + reg.fit(self.fd) + reg.set_params(restrict_domain=True) - fd_reg = shift_registration(fd) - deltas = shift_registration_deltas(fd) - np.testing.assert_array_almost_equal(fd_reg.data_matrix, - fd.shift(deltas).data_matrix) + # Test use fit or transform with restrict_domain=True + with np.testing.assert_raises(AttributeError): + reg.transform(self.fd) + + with np.testing.assert_raises(AttributeError): + reg.fit(self.fd) + + # Test inverse_transform without previous transformation + with np.testing.assert_raises(AttributeError): + reg.inverse_transform(self.fd) + + reg.fit_transform(self.fd) + + # Test inverse transform with different number of sample + with np.testing.assert_raises(ValueError): + reg.inverse_transform(self.fd[:1]) + + fd = make_multimodal_samples(dim_domain=2, random_state=0) + + with np.testing.assert_raises(ValueError): + reg.fit_transform(fd) + + reg.set_params(initial=[0.]) + + # Wrong initial estimation + with np.testing.assert_raises(ValueError): + reg.fit_transform(self.fd) + + def test_template(self): + + reg = ShiftRegistration() + fd_registered_1 = reg.fit_transform(self.fd) + + reg_2 = ShiftRegistration(template=reg.template_) + fd_registered_2 = reg_2.fit_transform(self.fd) + + reg_3 = ShiftRegistration(template=mean) + fd_registered_3 = reg_3.fit_transform(self.fd) + + reg_4 = ShiftRegistration(template=reg.template_) + fd_registered_4 = reg_4.fit(self.fd).transform(self.fd) + + np.testing.assert_array_almost_equal(fd_registered_1.data_matrix, + fd_registered_3.data_matrix) + + # With the template fixed could vary the convergence + np.testing.assert_array_almost_equal(fd_registered_1.data_matrix, + fd_registered_2.data_matrix, + decimal=3) + + np.testing.assert_array_almost_equal(fd_registered_2.data_matrix, + fd_registered_4.data_matrix) + + def test_restrict_domain(self): + reg = ShiftRegistration(restrict_domain=True) + fd_registered_1 = reg.fit_transform(self.fd) + + np.testing.assert_array_almost_equal( + fd_registered_1.domain_range.round(3), [[0.022, 0.969]]) + + reg2 = ShiftRegistration(restrict_domain=True, template=reg.template_) + fd_registered_2 = reg2.fit_transform(self.fd) + + np.testing.assert_array_almost_equal( + fd_registered_2.data_matrix, fd_registered_1.data_matrix, + decimal=3) + + reg3 = ShiftRegistration(restrict_domain=True, template=mean) + fd_registered_3 = reg3.fit_transform(self.fd) + + np.testing.assert_array_almost_equal( + fd_registered_3.data_matrix, fd_registered_1.data_matrix) + + def test_initial_estimation(self): + reg = ShiftRegistration(initial=[-0.02161235, 0.03032652]) + reg.fit_transform(self.fd) + + # Only needed 1 iteration until convergence + self.assertEqual(reg.n_iter_, 1) + + def test_custom_output_points(self): + reg = ShiftRegistration(output_points=np.linspace(0, 1, 50)) + reg.fit_transform(self.fd) + + +class TestRegistrationValidation(unittest.TestCase): + """Test shift registration""" + + def setUp(self): + """Initialization of samples""" + self.X = make_sinusoidal_process(error_std=0, random_state=0) + self.shift_registration = ShiftRegistration().fit(self.X) + + def test_amplitude_phase_score(self): + scorer = AmplitudePhaseDecomposition() + score = scorer(self.shift_registration, self.X) + np.testing.assert_allclose(score, 0.972095, rtol=1e-6) + + def test_amplitude_phase_score_with_output_points(self): + eval_points = self.X.sample_points[0] + scorer = AmplitudePhaseDecomposition(eval_points=eval_points) + score = scorer(self.shift_registration, self.X) + np.testing.assert_allclose(score, 0.972095, rtol=1e-6) + + def test_amplitude_phase_score_with_basis(self): + scorer = AmplitudePhaseDecomposition() + X = self.X.to_basis(Fourier()) + score = scorer(self.shift_registration, X) + np.testing.assert_allclose(score, 0.995087, rtol=1e-6) + + def test_default_score(self): + + score = self.shift_registration.score(self.X) + np.testing.assert_allclose(score, 0.972095, rtol=1e-6) + + def test_least_squares_score(self): + scorer = LeastSquares() + score = scorer(self.shift_registration, self.X) + np.testing.assert_allclose(score, 0.795933, rtol=1e-6) + + def test_sobolev_least_squares_score(self): + scorer = SobolevLeastSquares() + score = scorer(self.shift_registration, self.X) + np.testing.assert_allclose(score, 0.76124, rtol=1e-6) + + def test_pairwise_correlation(self): + scorer = PairwiseCorrelation() + score = scorer(self.shift_registration, self.X) + np.testing.assert_allclose(score, 1.816228, rtol=1e-6) + + def test_mse_decomposition(self): + + fd = make_multimodal_samples(n_samples=3, random_state=1) + landmarks = make_multimodal_landmarks(n_samples=3, random_state=1) + landmarks = landmarks.squeeze() + warping = landmark_registration_warping(fd, landmarks) + fd_registered = fd.compose(warping) + scorer = AmplitudePhaseDecomposition(return_stats=True) + ret = scorer.score_function(fd, fd_registered, warping=warping) + np.testing.assert_allclose(ret.mse_amp, 0.0009866997121476962) + np.testing.assert_allclose(ret.mse_pha, 0.11576935495450151) + np.testing.assert_allclose(ret.r_squared, 0.9915489952877273) + np.testing.assert_allclose(ret.c_r, 0.999999, rtol=1e-6) + + def test_raises_amplitude_phase(self): + scorer = AmplitudePhaseDecomposition() + + # Inconsistent number of functions registered + with np.testing.assert_raises(ValueError): + scorer.score_function(self.X, self.X[:2]) + + # Inconsistent number of functions registered + with np.testing.assert_raises(ValueError): + scorer.score_function(self.X, self.X, warping=self.X[:2]) if __name__ == '__main__': diff --git a/tests/test_regression.py b/tests/test_regression.py index 3531df513..edd661582 100644 --- a/tests/test_regression.py +++ b/tests/test_regression.py @@ -1,40 +1,160 @@ +from skfda.misc.operators import LinearDifferentialOperator +from skfda.misc.regularization import TikhonovRegularization +from skfda.ml.regression import LinearRegression +from skfda.representation.basis import (FDataBasis, Monomial, + Fourier, BSpline) import unittest import numpy as np -from skfda.ml.regression import LinearScalarRegression -from skfda.representation.basis import (FDataBasis, Constant, Monomial, - Fourier, BSpline) -class TestLinearScalarRegression(unittest.TestCase): +class TestScalarLinearRegression(unittest.TestCase): - def test_regression_fit(self): + def test_regression_single_explanatory(self): x_basis = Monomial(n_basis=7) x_fd = FDataBasis(x_basis, np.identity(7)) beta_basis = Fourier(n_basis=5) beta_fd = FDataBasis(beta_basis, [1, 1, 1, 1, 1]) - y = [1.0000684777229512, - 0.1623672257830915, - 0.08521053851548224, - 0.08514200869281137, - 0.09529138749665378, - 0.10549625973303875, - 0.11384314859153018] + y = [0.9999999999999993, + 0.162381381441085, + 0.08527083481359901, + 0.08519946930844623, + 0.09532291032042489, + 0.10550022969639987, + 0.11382675064746171] + + scalar = LinearRegression(coef_basis=[beta_basis]) + scalar.fit(x_fd, y) + np.testing.assert_allclose(scalar.coef_[0].coefficients, + beta_fd.coefficients) + np.testing.assert_allclose(scalar.intercept_, + 0.0, atol=1e-6) + + y_pred = scalar.predict(x_fd) + np.testing.assert_allclose(y_pred, y) + + scalar = LinearRegression(coef_basis=[beta_basis], + fit_intercept=False) + scalar.fit(x_fd, y) + np.testing.assert_allclose(scalar.coef_[0].coefficients, + beta_fd.coefficients) + np.testing.assert_equal(scalar.intercept_, + 0.0) + + y_pred = scalar.predict(x_fd) + np.testing.assert_allclose(y_pred, y) + + def test_regression_multiple_explanatory(self): + y = [1, 2, 3, 4, 5, 6, 7] + + X = FDataBasis(Monomial(n_basis=7), np.identity(7)) + + beta1 = BSpline(domain_range=(0, 1), n_basis=5) + + scalar = LinearRegression(coef_basis=[beta1]) + + scalar.fit(X, y) + + np.testing.assert_allclose(scalar.intercept_.round(4), + np.array([32.65]), rtol=1e-3) + + np.testing.assert_allclose( + scalar.coef_[0].coefficients.round(4), + np.array([[-28.6443, + 80.3996, + -188.587, + 236.5832, + -481.3449]]), rtol=1e-3) + + y_pred = scalar.predict(X) + np.testing.assert_allclose(y_pred, y, atol=0.01) + + def test_regression_mixed(self): + + multivariate = np.array([[0, 0], [2, 7], [1, 7], [3, 9], + [4, 16], [2, 14], [3, 5]]) + + X = [multivariate, + FDataBasis(Monomial(n_basis=3), [[1, 0, 0], [0, 1, 0], [0, 0, 1], + [1, 0, 1], [1, 0, 0], [0, 1, 0], + [0, 0, 1]])] + + # y = 2 + sum([3, 1] * array) + int(3 * function) + intercept = 2 + coefs_multivariate = np.array([3, 1]) + coefs_functions = FDataBasis( + Monomial(n_basis=3), [[3, 0, 0]]) + y_integral = np.array([3, 3 / 2, 1, 4, 3, 3 / 2, 1]) + y_sum = multivariate @ coefs_multivariate + y = 2 + y_sum + y_integral + + scalar = LinearRegression() + scalar.fit(X, y) - scalar = LinearScalarRegression([beta_basis]) - scalar.fit([x_fd], y) - np.testing.assert_array_almost_equal(scalar.beta_[0].coefficients, - beta_fd.coefficients) + np.testing.assert_allclose(scalar.intercept_, + intercept, atol=0.01) - def test_regression_predict_single_explanatory(self): + np.testing.assert_allclose( + scalar.coef_[0], + coefs_multivariate, atol=0.01) + + np.testing.assert_allclose( + scalar.coef_[1].coefficients, + coefs_functions.coefficients, atol=0.01) + + y_pred = scalar.predict(X) + np.testing.assert_allclose(y_pred, y, atol=0.01) + + def test_regression_mixed_regularization(self): + + multivariate = np.array([[0, 0], [2, 7], [1, 7], [3, 9], + [4, 16], [2, 14], [3, 5]]) + + X = [multivariate, + FDataBasis(Monomial(n_basis=3), [[1, 0, 0], [0, 1, 0], [0, 0, 1], + [1, 0, 1], [1, 0, 0], [0, 1, 0], + [0, 0, 1]])] + + # y = 2 + sum([3, 1] * array) + int(3 * function) + intercept = 2 + coefs_multivariate = np.array([3, 1]) + y_integral = np.array([3, 3 / 2, 1, 4, 3, 3 / 2, 1]) + y_sum = multivariate @ coefs_multivariate + y = 2 + y_sum + y_integral + + scalar = LinearRegression( + regularization=[TikhonovRegularization(lambda x: x), + TikhonovRegularization( + LinearDifferentialOperator(2))]) + scalar.fit(X, y) + + np.testing.assert_allclose(scalar.intercept_, + intercept, atol=0.01) + + np.testing.assert_allclose( + scalar.coef_[0], + [2.536739, 1.072186], atol=0.01) + + np.testing.assert_allclose( + scalar.coef_[1].coefficients, + [[2.125676, 2.450782, 5.808745e-4]], atol=0.01) + + y_pred = scalar.predict(X) + np.testing.assert_allclose( + y_pred, + [5.349035, 16.456464, 13.361185, 23.930295, + 32.650965, 23.961766, 16.29029], + atol=0.01) + + def test_regression_regularization(self): x_basis = Monomial(n_basis=7) x_fd = FDataBasis(x_basis, np.identity(7)) beta_basis = Fourier(n_basis=5) - beta_fd = FDataBasis(beta_basis, [1, 1, 1, 1, 1]) + beta_fd = FDataBasis(beta_basis, [1.0403, 0, 0, 0, 0]) y = [1.0000684777229512, 0.1623672257830915, 0.08521053851548224, @@ -43,35 +163,62 @@ def test_regression_predict_single_explanatory(self): 0.10549625973303875, 0.11384314859153018] - scalar = LinearScalarRegression([beta_basis]) - scalar.fit([x_fd], y) - np.testing.assert_array_almost_equal(scalar.beta_[0].coefficients, - beta_fd.coefficients) - - def test_regression_predict_multiple_explanatory(self): - y = [1, 2, 3, 4, 5, 6, 7] - - x0 = FDataBasis(Constant(domain_range=(0, 1)), np.ones((7, 1))) - x1 = FDataBasis(Monomial(n_basis=7), np.identity(7)) - - beta0 = Constant(domain_range=(0, 1)) - beta1 = BSpline(domain_range=(0, 1), n_basis=5) - - scalar = LinearScalarRegression([beta0, beta1]) - - scalar.fit([x0, x1], y) - - betas = scalar.beta_ - - np.testing.assert_array_almost_equal(betas[0].coefficients.round(4), - np.array([[32.6518]])) - - np.testing.assert_array_almost_equal(betas[1].coefficients.round(4), - np.array([[-28.6443, - 80.3996, - -188.587, - 236.5832, - -481.3449]])) + y_pred_compare = [0.890341, + 0.370162, + 0.196773, + 0.110079, + 0.058063, + 0.023385, + -0.001384] + + scalar = LinearRegression( + coef_basis=[beta_basis], + regularization=TikhonovRegularization( + LinearDifferentialOperator(2))) + scalar.fit(x_fd, y) + np.testing.assert_allclose(scalar.coef_[0].coefficients, + beta_fd.coefficients, atol=1e-3) + np.testing.assert_allclose(scalar.intercept_, + -0.15, atol=1e-4) + + y_pred = scalar.predict(x_fd) + np.testing.assert_allclose(y_pred, y_pred_compare, atol=1e-4) + + x_basis = Monomial(n_basis=3) + x_fd = FDataBasis(x_basis, [[1, 0, 0], + [0, 1, 0], + [0, 0, 1], + [2, 0, 1]]) + + beta_fd = FDataBasis(x_basis, [3, 2, 1]) + y = [1 + 13 / 3, 1 + 29 / 12, 1 + 17 / 10, 1 + 311 / 30] + + # Non regularized + scalar = LinearRegression() + scalar.fit(x_fd, y) + np.testing.assert_allclose(scalar.coef_[0].coefficients, + beta_fd.coefficients) + np.testing.assert_allclose(scalar.intercept_, + 1) + + y_pred = scalar.predict(x_fd) + np.testing.assert_allclose(y_pred, y) + + # Regularized + beta_fd_reg = FDataBasis(x_basis, [2.812, 3.043, 0]) + y_reg = [5.333, 3.419, 2.697, 11.366] + + scalar_reg = LinearRegression( + regularization=TikhonovRegularization( + LinearDifferentialOperator(2))) + scalar_reg.fit(x_fd, y) + np.testing.assert_allclose(scalar_reg.coef_[0].coefficients, + beta_fd_reg.coefficients, atol=0.001) + np.testing.assert_allclose(scalar_reg.intercept_, + 0.998, atol=0.001) + + y_pred = scalar_reg.predict(x_fd) + np.testing.assert_allclose(y_pred, y_reg, atol=0.001) def test_error_X_not_FData(self): """Tests that at least one of the explanatory variables @@ -80,9 +227,10 @@ def test_error_X_not_FData(self): x_fd = np.identity(7) y = np.zeros(7) - scalar = LinearScalarRegression([Fourier(n_basis=5)]) + scalar = LinearRegression(coef_basis=[Fourier(n_basis=5)]) - np.testing.assert_raises(ValueError, scalar.fit, [x_fd], y) + with np.testing.assert_warns(UserWarning): + scalar.fit([x_fd], y) def test_error_y_is_FData(self): """Tests that none of the explained variables is an FData object @@ -90,9 +238,10 @@ def test_error_y_is_FData(self): x_fd = FDataBasis(Monomial(n_basis=7), np.identity(7)) y = list(FDataBasis(Monomial(n_basis=7), np.identity(7))) - scalar = LinearScalarRegression([Fourier(n_basis=5)]) + scalar = LinearRegression(coef_basis=[Fourier(n_basis=5)]) - np.testing.assert_raises(ValueError, scalar.fit, [x_fd], y) + with np.testing.assert_raises(ValueError): + scalar.fit([x_fd], y) def test_error_X_beta_len_distinct(self): """ Test that the number of beta bases and explanatory variables @@ -102,11 +251,13 @@ def test_error_X_beta_len_distinct(self): y = [1 for _ in range(7)] beta = Fourier(n_basis=5) - scalar = LinearScalarRegression([beta]) - np.testing.assert_raises(ValueError, scalar.fit, [x_fd, x_fd], y) + scalar = LinearRegression(coef_basis=[beta]) + with np.testing.assert_raises(ValueError): + scalar.fit([x_fd, x_fd], y) - scalar = LinearScalarRegression([beta, beta]) - np.testing.assert_raises(ValueError, scalar.fit, [x_fd], y) + scalar = LinearRegression(coef_basis=[beta, beta]) + with np.testing.assert_raises(ValueError): + scalar.fit([x_fd], y) def test_error_y_X_samples_different(self): """ Test that the number of response samples and explanatory samples @@ -116,15 +267,17 @@ def test_error_y_X_samples_different(self): y = [1 for _ in range(8)] beta = Fourier(n_basis=5) - scalar = LinearScalarRegression([beta]) - np.testing.assert_raises(ValueError, scalar.fit, [x_fd], y) + scalar = LinearRegression(coef_basis=[beta]) + with np.testing.assert_raises(ValueError): + scalar.fit([x_fd], y) x_fd = FDataBasis(Monomial(n_basis=8), np.identity(8)) y = [1 for _ in range(7)] beta = Fourier(n_basis=5) - scalar = LinearScalarRegression([beta]) - np.testing.assert_raises(ValueError, scalar.fit, [x_fd], y) + scalar = LinearRegression(coef_basis=[beta]) + with np.testing.assert_raises(ValueError): + scalar.fit([x_fd], y) def test_error_beta_not_basis(self): """ Test that all beta are Basis objects. """ @@ -133,8 +286,9 @@ def test_error_beta_not_basis(self): y = [1 for _ in range(7)] beta = FDataBasis(Monomial(n_basis=7), np.identity(7)) - scalar = LinearScalarRegression([beta]) - np.testing.assert_raises(ValueError, scalar.fit, [x_fd], y) + scalar = LinearRegression(coef_basis=[beta]) + with np.testing.assert_raises(TypeError): + scalar.fit([x_fd], y) def test_error_weights_lenght(self): """ Test that the number of weights is equal to the @@ -145,8 +299,9 @@ def test_error_weights_lenght(self): weights = [1 for _ in range(8)] beta = Monomial(n_basis=7) - scalar = LinearScalarRegression([beta]) - np.testing.assert_raises(ValueError, scalar.fit, [x_fd], y, weights) + scalar = LinearRegression(coef_basis=[beta]) + with np.testing.assert_raises(ValueError): + scalar.fit([x_fd], y, weights) def test_error_weights_negative(self): """ Test that none of the weights are negative. """ @@ -156,8 +311,9 @@ def test_error_weights_negative(self): weights = [-1 for _ in range(7)] beta = Monomial(n_basis=7) - scalar = LinearScalarRegression([beta]) - np.testing.assert_raises(ValueError, scalar.fit, [x_fd], y, weights) + scalar = LinearRegression(coef_basis=[beta]) + with np.testing.assert_raises(ValueError): + scalar.fit([x_fd], y, weights) if __name__ == '__main__': diff --git a/tests/test_regularization.py b/tests/test_regularization.py new file mode 100644 index 000000000..3daadcb33 --- /dev/null +++ b/tests/test_regularization.py @@ -0,0 +1,243 @@ +import skfda +from skfda.misc.operators import LinearDifferentialOperator, gramian_matrix +from skfda.misc.operators._linear_differential_operator import ( + _monomial_evaluate_constant_linear_diff_op) +from skfda.misc.operators._operators import gramian_matrix_numerical +from skfda.misc.regularization import TikhonovRegularization, L2Regularization +from skfda.ml.regression.linear import LinearRegression +from skfda.representation.basis import Constant, Monomial, BSpline, Fourier +import unittest +import warnings + +from sklearn.datasets import make_regression +from sklearn.linear_model import Ridge +from sklearn.model_selection._split import train_test_split + +import numpy as np + + +class TestLinearDifferentialOperatorRegularization(unittest.TestCase): + + # def setUp(self): could be defined for set up before any test + + def _test_penalty(self, basis, linear_diff_op, atol=0, result=None): + + operator = LinearDifferentialOperator(linear_diff_op) + + penalty = gramian_matrix(operator, basis) + numerical_penalty = gramian_matrix_numerical(operator, basis) + + np.testing.assert_allclose( + penalty, + numerical_penalty, + atol=atol + ) + + if result is not None: + np.testing.assert_allclose( + penalty, + result, + atol=atol + ) + + def test_constant_penalty(self): + basis = Constant(domain_range=(0, 3)) + + res = np.array([[12]]) + + self._test_penalty(basis, linear_diff_op=[2, 3, 4], result=res) + + def test_monomial_linear_diff_op(self): + n_basis = 5 + + basis = Monomial(n_basis=n_basis) + + linear_diff_op = [3] + res = np.array([[0., 0., 0., 0., 3.], + [0., 0., 0., 3., 0.], + [0., 0., 3., 0., 0.], + [0., 3., 0., 0., 0.], + [3., 0., 0., 0., 0.]]) + + np.testing.assert_allclose( + _monomial_evaluate_constant_linear_diff_op(basis, linear_diff_op), + res + ) + + linear_diff_op = [3, 2] + res = np.array([[0., 0., 0., 0., 3.], + [0., 0., 0., 3., 2.], + [0., 0., 3., 4., 0.], + [0., 3., 6., 0., 0.], + [3., 8., 0., 0., 0.]]) + + np.testing.assert_allclose( + _monomial_evaluate_constant_linear_diff_op(basis, linear_diff_op), + res + ) + + linear_diff_op = [3, 0, 5] + res = np.array([[0., 0., 0., 0., 3.], + [0., 0., 0., 3., 0.], + [0., 0., 3., 0., 10.], + [0., 3., 0., 30., 0.], + [3., 0., 60., 0., 0.]]) + + np.testing.assert_allclose( + _monomial_evaluate_constant_linear_diff_op(basis, linear_diff_op), + res + ) + + def test_monomial_penalty(self): + basis = Monomial(n_basis=5, domain_range=(0, 3)) + + # Theorethical result + res = np.array([[0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0.], + [0., 0., 12., 54., 216.], + [0., 0., 54., 324., 1458.], + [0., 0., 216., 1458., 6998.4]]) + + self._test_penalty(basis, linear_diff_op=2, result=res) + + basis = Monomial(n_basis=8, domain_range=(1, 5)) + + self._test_penalty(basis, linear_diff_op=[1, 2, 3]) + self._test_penalty(basis, linear_diff_op=7) + self._test_penalty(basis, linear_diff_op=0) + self._test_penalty(basis, linear_diff_op=1) + self._test_penalty(basis, linear_diff_op=27) + + def test_fourier_penalty(self): + basis = Fourier(n_basis=5) + + res = np.array([[0., 0., 0., 0., 0.], + [0., 1558.55, 0., 0., 0.], + [0., 0., 1558.55, 0., 0.], + [0., 0., 0., 24936.73, 0.], + [0., 0., 0., 0., 24936.73]]) + + # Those comparisons require atol as there are zeros involved + self._test_penalty(basis, linear_diff_op=2, atol=0.01, result=res) + + basis = Fourier(n_basis=9, domain_range=(1, 5)) + self._test_penalty(basis, linear_diff_op=[1, 2, 3], atol=1e-7) + self._test_penalty(basis, linear_diff_op=[2, 3, 0.1, 1], atol=1e-7) + self._test_penalty(basis, linear_diff_op=0, atol=1e-7) + self._test_penalty(basis, linear_diff_op=1, atol=1e-7) + self._test_penalty(basis, linear_diff_op=3, atol=1e-7) + + def test_bspline_penalty(self): + basis = BSpline(n_basis=5) + + res = np.array([[96., -132., 24., 12., 0.], + [-132., 192., -48., -24., 12.], + [24., -48., 48., -48., 24.], + [12., -24., -48., 192., -132.], + [0., 12., 24., -132., 96.]]) + + self._test_penalty(basis, linear_diff_op=2, result=res) + + basis = BSpline(n_basis=9, domain_range=(1, 5)) + self._test_penalty(basis, linear_diff_op=[1, 2, 3]) + self._test_penalty(basis, linear_diff_op=[2, 3, 0.1, 1]) + self._test_penalty(basis, linear_diff_op=0) + self._test_penalty(basis, linear_diff_op=1) + self._test_penalty(basis, linear_diff_op=3) + self._test_penalty(basis, linear_diff_op=4) + + basis = BSpline(n_basis=16, order=8) + self._test_penalty(basis, linear_diff_op=0, atol=1e-7) + + def test_bspline_penalty_special_case(self): + basis = BSpline(n_basis=5) + + res = np.array([[1152., -2016., 1152., -288., 0.], + [-2016., 3600., -2304., 1008., -288.], + [1152., -2304., 2304., -2304., 1152.], + [-288., 1008., -2304., 3600., -2016.], + [0., -288., 1152., -2016., 1152.]]) + + operator = LinearDifferentialOperator(basis.order - 1) + penalty = gramian_matrix(operator, basis) + numerical_penalty = gramian_matrix_numerical(operator, basis) + + np.testing.assert_allclose( + penalty, + res + ) + + np.testing.assert_allclose( + numerical_penalty, + res + ) + + +class TestEndpointsDifferenceRegularization(unittest.TestCase): + + def test_basis_conversion(self): + + data_matrix = np.linspace([0, 1, 2, 3], [1, 2, 3, 4], 100) + + fd = skfda.FDataGrid(data_matrix.T) + + smoother = skfda.preprocessing.smoothing.BasisSmoother( + basis=skfda.representation.basis.BSpline( + n_basis=10, domain_range=fd.domain_range), + regularization=TikhonovRegularization( + lambda x: x(1)[:, 0] - x(0)[:, 0]), + smoothing_parameter=10000) + + fd_basis = smoother.fit_transform(fd) + + np.testing.assert_allclose( + fd_basis(0), + fd_basis(1), + atol=0.001 + ) + + +class TestL2Regularization(unittest.TestCase): + + def test_multivariate(self): + + def ignore_scalar_warning(): + warnings.filterwarnings( + "ignore", category=UserWarning, + message="All the covariates are scalar.") + + X, y = make_regression(n_samples=20, n_features=10, + random_state=1, bias=3.5) + + X_train, X_test, y_train, _ = train_test_split( + X, y, random_state=2) + + for regularization_parameter in [0, 1, 10, 100]: + + with self.subTest( + regularization_parameter=regularization_parameter): + + sklearn_l2 = Ridge(alpha=regularization_parameter) + skfda_l2 = LinearRegression( + regularization=L2Regularization( + regularization_parameter=regularization_parameter), + ) + + sklearn_l2.fit(X_train, y_train) + with warnings.catch_warnings(): + ignore_scalar_warning() + skfda_l2.fit(X_train, y_train) + + sklearn_y_pred = sklearn_l2.predict(X_test) + with warnings.catch_warnings(): + ignore_scalar_warning() + skfda_y_pred = skfda_l2.predict(X_test) + + np.testing.assert_allclose( + sklearn_l2.coef_, skfda_l2.coef_[0]) + + np.testing.assert_allclose( + sklearn_l2.intercept_, skfda_l2.intercept_) + + np.testing.assert_allclose( + sklearn_y_pred, skfda_y_pred) diff --git a/tests/test_smoothing.py b/tests/test_smoothing.py index 097929afb..076ca5ed9 100644 --- a/tests/test_smoothing.py +++ b/tests/test_smoothing.py @@ -1,15 +1,17 @@ +import skfda +from skfda._utils import _check_estimator +from skfda.misc.operators import LinearDifferentialOperator +from skfda.misc.regularization import TikhonovRegularization +from skfda.representation.basis import BSpline, Monomial +from skfda.representation.grid import FDataGrid import unittest import sklearn import numpy as np -import skfda -from skfda._utils import _check_estimator import skfda.preprocessing.smoothing as smoothing import skfda.preprocessing.smoothing.kernel_smoothers as kernel_smoothers import skfda.preprocessing.smoothing.validation as validation -from skfda.representation.basis import BSpline, Monomial -from skfda.representation.grid import FDataGrid class TestSklearnEstimators(unittest.TestCase): @@ -77,10 +79,13 @@ def test_cholesky(self): x = np.sin(2 * np.pi * t) + np.cos(2 * np.pi * t) basis = BSpline((0, 1), n_basis=5) fd = FDataGrid(data_matrix=x, sample_points=t) - smoother = smoothing.BasisSmoother(basis=basis, - smoothing_parameter=10, - penalty=2, method='cholesky', - return_basis=True) + smoother = smoothing.BasisSmoother( + basis=basis, + smoothing_parameter=10, + regularization=TikhonovRegularization( + LinearDifferentialOperator(2)), + method='cholesky', + return_basis=True) fd_basis = smoother.fit_transform(fd) np.testing.assert_array_almost_equal( fd_basis.coefficients.round(2), @@ -92,10 +97,13 @@ def test_qr(self): x = np.sin(2 * np.pi * t) + np.cos(2 * np.pi * t) basis = BSpline((0, 1), n_basis=5) fd = FDataGrid(data_matrix=x, sample_points=t) - smoother = smoothing.BasisSmoother(basis=basis, - smoothing_parameter=10, - penalty=2, method='qr', - return_basis=True) + smoother = smoothing.BasisSmoother( + basis=basis, + smoothing_parameter=10, + regularization=TikhonovRegularization( + LinearDifferentialOperator(2)), + method='qr', + return_basis=True) fd_basis = smoother.fit_transform(fd) np.testing.assert_array_almost_equal( fd_basis.coefficients.round(2), @@ -109,12 +117,58 @@ def test_monomial_smoothing(self): x = np.sin(2 * np.pi * t) + np.cos(2 * np.pi * t) basis = Monomial(n_basis=4) fd = FDataGrid(data_matrix=x, sample_points=t) - smoother = smoothing.BasisSmoother(basis=basis, - smoothing_parameter=1, - penalty=2, - return_basis=True) + smoother = smoothing.BasisSmoother( + basis=basis, + smoothing_parameter=1, + regularization=TikhonovRegularization( + LinearDifferentialOperator(2)), + return_basis=True) fd_basis = smoother.fit_transform(fd) # These results where extracted from the R package fda np.testing.assert_array_almost_equal( fd_basis.coefficients.round(2), np.array([[0.61, -0.88, 0.06, 0.02]])) + + def test_vector_valued_smoothing(self): + X, _ = skfda.datasets.fetch_weather(return_X_y=True) + + basis_dim = skfda.representation.basis.Fourier( + n_basis=7, domain_range=X.domain_range) + basis = skfda.representation.basis.VectorValued( + [basis_dim] * 2 + ) + + for method in smoothing.BasisSmoother.SolverMethod: + with self.subTest(method=method): + + basis_smoother = smoothing.BasisSmoother( + basis, + regularization=TikhonovRegularization( + LinearDifferentialOperator(2)), + return_basis=True, + smoothing_parameter=1, + method=method) + + basis_smoother_dim = smoothing.BasisSmoother( + basis_dim, + regularization=TikhonovRegularization( + LinearDifferentialOperator(2)), + return_basis=True, + smoothing_parameter=1, + method=method) + + X_basis = basis_smoother.fit_transform(X) + + self.assertEqual(X_basis.dim_codomain, 2) + + self.assertEqual(X_basis.coordinates[0].basis, basis_dim) + np.testing.assert_allclose( + X_basis.coordinates[0].coefficients, + basis_smoother_dim.fit_transform( + X.coordinates[0]).coefficients) + + self.assertEqual(X_basis.coordinates[1].basis, basis_dim) + np.testing.assert_allclose( + X_basis.coordinates[1].coefficients, + basis_smoother_dim.fit_transform( + X.coordinates[1]).coefficients)