Merge branch 'develop' into feature/irregular_operations

GAA-UAM · Mar 7, 2024 · a875bd7 · a875bd7
2 parents cc7d6be + adb80fe
commit a875bd7
Show file tree

Hide file tree

Showing 29 changed files with 402 additions and 428 deletions.
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -11,7 +11,7 @@ jobs:
     strategy:
       matrix:
         os: [ubuntu-latest, macos-latest, windows-latest]
-        python-version: ['3.9', '3.10']
+        python-version: ['3.10', '3.11']
 
     steps:
     - uses: actions/checkout@v2

diff --git a/docs/_static/switcher.json b/docs/_static/switcher.json
@@ -5,7 +5,7 @@
         "url": "https://fda.readthedocs.io/en/latest/"
     },
     {
-    	"name": "0.9 (stable)",
+    	"name": "0.9.1 (stable)",
         "version": "stable",
         "url": "https://fda.readthedocs.io/en/stable/",
         "preferred": true

diff --git a/pyproject.toml b/pyproject.toml
@@ -2,7 +2,7 @@
 name = "scikit-fda"
 description = "Functional Data Analysis Python package."
 readme = "README.rst"
-requires-python = ">=3.9"
+requires-python = ">=3.10"
 license = {file = "LICENSE.txt"}
 keywords = [
 	"functional data",
@@ -20,7 +20,9 @@ classifiers = [
     "Natural Language :: English",
     "Operating System :: OS Independent",
     "Programming Language :: Python :: 3",
-    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
     "Topic :: Scientific/Engineering :: Mathematics",
     "Topic :: Software Development :: Libraries :: Python Modules",
     "Typing :: Typed",
@@ -38,9 +40,9 @@ dependencies = [
 	"numpy>=1.16",
 	"pandas>=1.0",
 	"rdata",
-	"scikit-datasets[cran]>=0.1.24",
+	"scikit-datasets[cran]>=0.2.2",
 	"scikit-learn>=0.20",
-	"scipy>=1.3.0",
+	"scipy>=1.6.0",
 	"typing-extensions",
 ]
 
@@ -62,7 +64,6 @@ test = [
   "pytest",
   "pytest-env",
   "pytest-subtests",
-  "scipy<1.11.0",
 ]
 
 [project.urls]

diff --git a/readthedocs.yml b/readthedocs.yml
@@ -8,7 +8,7 @@ version: 2
 build:
   os: ubuntu-22.04
   tools:
-    python: "3.9"
+    python: "3.11"
 
 # Build documentation in the docs/ directory with Sphinx
 sphinx:

diff --git a/skfda/__init__.py b/skfda/__init__.py
@@ -32,4 +32,4 @@
         concatenate as concatenate,
     )
 
-__version__ = "0.9.1.dev1"
+__version__ = "0.9.2.dev0"
diff --git a/skfda/_utils/_neighbors_base.py b/skfda/_utils/_neighbors_base.py
@@ -227,30 +227,29 @@ def kneighbors(
                 Indices of the nearest points in the population matrix.
 
         Examples:
-            Firstly, we will create a toy dataset.
+            Firstly, we will create a toy dataset
 
-            >>> from skfda.datasets import make_sinusoidal_process
-            >>> fd1 = make_sinusoidal_process(phase_std=.25, random_state=0)
-            >>> fd2 = make_sinusoidal_process(phase_mean=1.8, error_std=0.,
-            ...                               phase_std=.25, random_state=0)
-            >>> fd = fd1.concatenate(fd2)
+            >>> from skfda.datasets import make_gaussian_process
+            >>> X = make_gaussian_process(
+            ...    n_samples=30,
+            ...    random_state=0,
+            ... )
 
             We will fit a Nearest Neighbors estimator
 
             >>> from skfda.ml.clustering import NearestNeighbors
-            >>> neigh = NearestNeighbors()
-            >>> neigh.fit(fd)
+            >>> neigh = NearestNeighbors(n_neighbors=3)
+            >>> neigh.fit(X)
             NearestNeighbors(...)
 
             Now we can query the k-nearest neighbors.
 
-            >>> distances, index = neigh.kneighbors(fd[:2])
-            >>> index # Index of k-neighbors of samples 0 and 1
-            array([[ 0,  7,  6, 11,  2],...)
+            >>> distances, index = neigh.kneighbors(X)
+            >>> index
+            array([[ 0, 8, 1], ...)
 
-            >>> distances.round(2) # Distances to k-neighbors
-            array([[ 0.  ,  0.28,  0.29,  0.29,  0.3 ],
-                   [ 0.  ,  0.27,  0.28,  0.29,  0.3 ]])
+            >>> distances.round(2)
+            array([[ 0.  ,  0.41,  0.58], ...])
 
         Notes:
             This method wraps the corresponding sklearn routine in the
@@ -295,30 +294,28 @@ def kneighbors_graph(
             A[i, j] is assigned the weight of edge that connects i to j.
 
         Examples:
-            Firstly, we will create a toy dataset.
+            Firstly, we will create a toy dataset
 
-            >>> from skfda.datasets import make_sinusoidal_process
-            >>> fd1 = make_sinusoidal_process(phase_std=.25, random_state=0)
-            >>> fd2 = make_sinusoidal_process(phase_mean=1.8, error_std=0.,
-            ...                               phase_std=.25, random_state=0)
-            >>> fd = fd1.concatenate(fd2)
+            >>> from skfda.datasets import make_gaussian_process
+            >>> X = make_gaussian_process(
+            ...    n_samples=30,
+            ...    random_state=0,
+            ... )
 
             We will fit a Nearest Neighbors estimator.
 
             >>> from skfda.ml.clustering import NearestNeighbors
-            >>> neigh = NearestNeighbors()
-            >>> neigh.fit(fd)
+            >>> neigh = NearestNeighbors(n_neighbors=3)
+            >>> neigh.fit(X)
             NearestNeighbors(...)
 
             Now we can obtain the graph of k-neighbors of a sample.
 
-            >>> graph = neigh.kneighbors_graph(fd[0])
+            >>> graph = neigh.kneighbors_graph(X[0])
             >>> print(graph)
-              (0, 0)	1.0
-              (0, 7)	1.0
-              (0, 6)	1.0
-              (0, 11)	1.0
-              (0, 2)	1.0
+                (0, 0)    1.0
+                (0, 8)    1.0
+                (0, 1)    1.0
 
         Notes:
             This method wraps the corresponding sklearn routine in the
@@ -392,29 +389,29 @@ def radius_neighbors(
                 within a ball of size ``radius`` around the query points.
 
         Examples:
-            Firstly, we will create a toy dataset.
+            Firstly, we will create a toy dataset
 
-            >>> from skfda.datasets import make_sinusoidal_process
-            >>> fd1 = make_sinusoidal_process(phase_std=.25, random_state=0)
-            >>> fd2 = make_sinusoidal_process(phase_mean=1.8, error_std=0.,
-            ...                               phase_std=.25, random_state=0)
-            >>> fd = fd1.concatenate(fd2)
+            >>> from skfda.datasets import make_gaussian_process
+            >>> X = make_gaussian_process(
+            ...    n_samples=30,
+            ...    random_state=0,
+            ... )
 
             We will fit a Nearest Neighbors estimator.
 
             >>> from skfda.ml.clustering import NearestNeighbors
-            >>> neigh = NearestNeighbors(radius=.3)
-            >>> neigh.fit(fd)
-            NearestNeighbors(...radius=0.3...)
+            >>> neigh = NearestNeighbors(radius=0.7)
+            >>> neigh.fit(X)
+            NearestNeighbors(...)
 
-            Now we can query the neighbors in the radius.
+            Now we can query the neighbors in a given radius.
 
-            >>> distances, index = neigh.radius_neighbors(fd[:2])
-            >>> index[0] # Neighbors of sample 0
-            array([ 0,  2,  6,  7, 11]...)
+            >>> distances, index = neigh.radius_neighbors(X)
+            >>> index[0]
+            array([ 0,  1,  8, 18]...)
 
-            >>> distances[0].round(2) # Distances to neighbors of the sample 0
-            array([ 0.  ,  0.3 ,  0.29,  0.28,  0.29])
+            >>> distances[0].round(2)
+            array([ 0.  ,  0.58,  0.41,  0.68])
 
 
         See also:

diff --git a/skfda/datasets/_real_datasets.py b/skfda/datasets/_real_datasets.py
@@ -5,27 +5,17 @@
 
 import numpy as np
 import pandas as pd
+import rdata
 from pandas import DataFrame, Series
+from skdatasets.repositories import cran, ucr
 from sklearn.utils import Bunch
 from typing_extensions import Literal
 
-import rdata
-
 from ..representation import FDataGrid
 from ..representation.irregular import FDataIrregular
 from ..typing._numpy import NDArrayFloat, NDArrayInt
 
 
-def _get_skdatasets_repositories() -> Any:
-    import skdatasets
-
-    repositories = getattr(skdatasets, "repositories", None)
-    if repositories is None:
-        repositories = skdatasets
-
-    return repositories
-
-
 def fdata_constructor(
     obj: Any,
     attrs: Mapping[str | bytes, Any],
@@ -117,16 +107,14 @@ def fetch_cran(
         types.
 
     """
-    repositories = _get_skdatasets_repositories()
-
     if converter is None:
         converter = rdata.conversion.SimpleConverter({
             **rdata.conversion.DEFAULT_CLASS_MAP,
             "fdata": fdata_constructor,
             "functional": functional_constructor,
         })
 
-    return repositories.cran.fetch_dataset(
+    return cran.fetch_dataset(
         name,
         package_name,
         converter=converter,
@@ -200,9 +188,7 @@ def fetch_ucr(
         .. footbibliography::
 
     """
-    repositories = _get_skdatasets_repositories()
-
-    dataset = repositories.ucr.fetch(name, **kwargs)
+    dataset = ucr.fetch(name, **kwargs)
 
     dataset['data'] = _ucr_to_fdatagrid(
         name=dataset['name'],

diff --git a/skfda/datasets/_samples_generators.py b/skfda/datasets/_samples_generators.py
@@ -481,7 +481,7 @@ def make_random_warping(
     np.square(v, out=v)
 
     # Creation of FDataGrid in the corresponding domain
-    data_matrix = scipy.integrate.cumtrapz(
+    data_matrix = scipy.integrate.cumulative_trapezoid(
         v,
         dx=1 / n_features,
         initial=0,

diff --git a/skfda/exploratory/depth/_depth.py b/skfda/exploratory/depth/_depth.py
@@ -43,10 +43,10 @@ class IntegratedDepth(Depth[FDataGrid]):
         ...                [-1, -1, -0.5, 1, 1, 0.5],
         ...                [-0.5, -0.5, -0.5, -1, -1, -1]]
         >>> grid_points = [0, 2, 4, 6, 8, 10]
-        >>> fd = skfda.FDataGrid(data_matrix, grid_points)
+        >>> X = skfda.FDataGrid(data_matrix, grid_points)
         >>> depth = skfda.exploratory.depth.IntegratedDepth()
-        >>> depth(fd)
-        array([ 0.5  ,  0.75 ,  0.925,  0.875])
+        >>> depth(X).round(1)
+        array([ 0.5,  0.8,  0.9,  0.9])
 
     References:
         Fraiman, R., & Muniz, G. (2001). Trimmed means for functional
@@ -121,11 +121,11 @@ class ModifiedBandDepth(IntegratedDepth):
         ...                [-1, -1, -0.5, 1, 1, 0.5],
         ...                [-0.5, -0.5, -0.5, -1, -1, -1]]
         >>> grid_points = [0, 2, 4, 6, 8, 10]
-        >>> fd = skfda.FDataGrid(data_matrix, grid_points)
+        >>> X = skfda.FDataGrid(data_matrix, grid_points)
         >>> depth = skfda.exploratory.depth.ModifiedBandDepth()
-        >>> values = depth(fd)
-        >>> values.round(2)
-        array([ 0.5 ,  0.83,  0.73,  0.67])
+        >>> values = depth(X)
+        >>> values.round(1)
+        array([ 0.5,  0.8,  0.7,  0.7])
 
     References:
         López-Pintado, S., & Romo, J. (2009). On the Concept of
@@ -228,10 +228,10 @@ class DistanceBasedDepth(Depth[FDataGrid], BaseEstimator):
         ...                [-1, -1, -0.5, 1, 1, 0.5],
         ...                [-0.5, -0.5, -0.5, -1, -1, -1]]
         >>> grid_points = [0, 2, 4, 6, 8, 10]
-        >>> fd = skfda.FDataGrid(data_matrix, grid_points)
+        >>> X = skfda.FDataGrid(data_matrix, grid_points)
         >>> depth = DistanceBasedDepth(MahalanobisDistance(2))
-        >>> depth(fd)
-        array([ 0.41897777,  0.8058132 ,  0.31097392,  0.31723619])
+        >>> depth(X).round(1)
+        array([ 0.4,  0.8,  0.3,  0.3])
 
     References:
         .. footbibliography::

diff --git a/skfda/exploratory/outliers/_directional_outlyingness.py b/skfda/exploratory/outliers/_directional_outlyingness.py
@@ -107,50 +107,27 @@ def directional_outlyingness_stats(  # noqa: WPS218
 
     Example:
 
-        >>> data_matrix = [[1, 1, 2, 3, 2.5, 2],
-        ...                [0.5, 0.5, 1, 2, 1.5, 1],
-        ...                [-1, -1, -0.5, 1, 1, 0.5],
-        ...                [-0.5, -0.5, -0.5, -1, -1, -1]]
-        >>> grid_points = [0, 2, 4, 6, 8, 10]
-        >>> fd = FDataGrid(data_matrix, grid_points)
-        >>> stats = directional_outlyingness_stats(fd)
-        >>> stats.directional_outlyingness
-        array([[[ 1.33333333],
-                [ 1.33333333],
-                [ 2.33333333],
-                [ 1.5       ],
-                [ 1.66666667],
-                [ 1.66666667]],
-               [[ 0.        ],
-                [ 0.        ],
-                [ 0.        ],
-                [ 0.        ],
-                [ 0.        ],
-                [ 0.        ]],
-               [[-1.33333333],
-                [-1.33333333],
-                [-1.        ],
-                [-0.5       ],
-                [-0.33333333],
-                [-0.33333333]],
-               [[-0.66666667],
-                [-0.66666667],
-                [-1.        ],
-                [-2.5       ],
-                [-3.        ],
-                [-2.33333333]]])
-
-    >>> stats.functional_directional_outlyingness
-    array([ 6.58864198,  6.4608642 ,  6.63753086,  7.40481481])
-
-    >>> stats.mean_directional_outlyingness
-    array([[ 1.66666667],
-           [ 0.        ],
-           [-0.8       ],
-           [-1.74444444]])
-
-    >>> stats.variation_directional_outlyingness
-    array([ 0.12777778,  0.        ,  0.17666667,  0.94395062])
+        >>> import skfda
+        >>> X = skfda.datasets.make_gaussian_process(
+        ...     n_samples=4,
+        ...     n_features=1000,
+        ...     random_state=1,
+        ... )
+        >>> stats = directional_outlyingness_stats(X)
+        >>> stats.directional_outlyingness.shape
+        (4, 1000, 1)
+
+    >>> stats.functional_directional_outlyingness.round()
+    array([ 11.,   8.,   6.,   9.])
+
+    >>> stats.mean_directional_outlyingness.round(1)
+    array([[-2. ],
+           [ 1.4],
+           [-0.1],
+           [ 0.1]])
+
+    >>> stats.variation_directional_outlyingness.round()
+    array([ 5.,  2.,  0.,  3.])
 
     References:
         Dai, Wenlin, and Genton, Marc G. "Directional outlyingness for