Skip to content

Commit

Permalink
Merge branch 'develop' into feature/irregular_operations
Browse files Browse the repository at this point in the history
  • Loading branch information
vnmabus authored Mar 7, 2024
2 parents cc7d6be + adb80fe commit a875bd7
Show file tree
Hide file tree
Showing 29 changed files with 402 additions and 428 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:
strategy:
matrix:
os: [ubuntu-latest, macos-latest, windows-latest]
python-version: ['3.9', '3.10']
python-version: ['3.10', '3.11']

steps:
- uses: actions/checkout@v2
Expand Down
2 changes: 1 addition & 1 deletion docs/_static/switcher.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"url": "https://fda.readthedocs.io/en/latest/"
},
{
"name": "0.9 (stable)",
"name": "0.9.1 (stable)",
"version": "stable",
"url": "https://fda.readthedocs.io/en/stable/",
"preferred": true
Expand Down
11 changes: 6 additions & 5 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
name = "scikit-fda"
description = "Functional Data Analysis Python package."
readme = "README.rst"
requires-python = ">=3.9"
requires-python = ">=3.10"
license = {file = "LICENSE.txt"}
keywords = [
"functional data",
Expand All @@ -20,7 +20,9 @@ classifiers = [
"Natural Language :: English",
"Operating System :: OS Independent",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Topic :: Scientific/Engineering :: Mathematics",
"Topic :: Software Development :: Libraries :: Python Modules",
"Typing :: Typed",
Expand All @@ -38,9 +40,9 @@ dependencies = [
"numpy>=1.16",
"pandas>=1.0",
"rdata",
"scikit-datasets[cran]>=0.1.24",
"scikit-datasets[cran]>=0.2.2",
"scikit-learn>=0.20",
"scipy>=1.3.0",
"scipy>=1.6.0",
"typing-extensions",
]

Expand All @@ -62,7 +64,6 @@ test = [
"pytest",
"pytest-env",
"pytest-subtests",
"scipy<1.11.0",
]

[project.urls]
Expand Down
2 changes: 1 addition & 1 deletion readthedocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ version: 2
build:
os: ubuntu-22.04
tools:
python: "3.9"
python: "3.11"

# Build documentation in the docs/ directory with Sphinx
sphinx:
Expand Down
2 changes: 1 addition & 1 deletion skfda/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,4 +32,4 @@
concatenate as concatenate,
)

__version__ = "0.9.1.dev1"
__version__ = "0.9.2.dev0"
83 changes: 40 additions & 43 deletions skfda/_utils/_neighbors_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,30 +227,29 @@ def kneighbors(
Indices of the nearest points in the population matrix.
Examples:
Firstly, we will create a toy dataset.
Firstly, we will create a toy dataset
>>> from skfda.datasets import make_sinusoidal_process
>>> fd1 = make_sinusoidal_process(phase_std=.25, random_state=0)
>>> fd2 = make_sinusoidal_process(phase_mean=1.8, error_std=0.,
... phase_std=.25, random_state=0)
>>> fd = fd1.concatenate(fd2)
>>> from skfda.datasets import make_gaussian_process
>>> X = make_gaussian_process(
... n_samples=30,
... random_state=0,
... )
We will fit a Nearest Neighbors estimator
>>> from skfda.ml.clustering import NearestNeighbors
>>> neigh = NearestNeighbors()
>>> neigh.fit(fd)
>>> neigh = NearestNeighbors(n_neighbors=3)
>>> neigh.fit(X)
NearestNeighbors(...)
Now we can query the k-nearest neighbors.
>>> distances, index = neigh.kneighbors(fd[:2])
>>> index # Index of k-neighbors of samples 0 and 1
array([[ 0, 7, 6, 11, 2],...)
>>> distances, index = neigh.kneighbors(X)
>>> index
array([[ 0, 8, 1], ...)
>>> distances.round(2) # Distances to k-neighbors
array([[ 0. , 0.28, 0.29, 0.29, 0.3 ],
[ 0. , 0.27, 0.28, 0.29, 0.3 ]])
>>> distances.round(2)
array([[ 0. , 0.41, 0.58], ...])
Notes:
This method wraps the corresponding sklearn routine in the
Expand Down Expand Up @@ -295,30 +294,28 @@ def kneighbors_graph(
A[i, j] is assigned the weight of edge that connects i to j.
Examples:
Firstly, we will create a toy dataset.
Firstly, we will create a toy dataset
>>> from skfda.datasets import make_sinusoidal_process
>>> fd1 = make_sinusoidal_process(phase_std=.25, random_state=0)
>>> fd2 = make_sinusoidal_process(phase_mean=1.8, error_std=0.,
... phase_std=.25, random_state=0)
>>> fd = fd1.concatenate(fd2)
>>> from skfda.datasets import make_gaussian_process
>>> X = make_gaussian_process(
... n_samples=30,
... random_state=0,
... )
We will fit a Nearest Neighbors estimator.
>>> from skfda.ml.clustering import NearestNeighbors
>>> neigh = NearestNeighbors()
>>> neigh.fit(fd)
>>> neigh = NearestNeighbors(n_neighbors=3)
>>> neigh.fit(X)
NearestNeighbors(...)
Now we can obtain the graph of k-neighbors of a sample.
>>> graph = neigh.kneighbors_graph(fd[0])
>>> graph = neigh.kneighbors_graph(X[0])
>>> print(graph)
(0, 0) 1.0
(0, 7) 1.0
(0, 6) 1.0
(0, 11) 1.0
(0, 2) 1.0
(0, 0) 1.0
(0, 8) 1.0
(0, 1) 1.0
Notes:
This method wraps the corresponding sklearn routine in the
Expand Down Expand Up @@ -392,29 +389,29 @@ def radius_neighbors(
within a ball of size ``radius`` around the query points.
Examples:
Firstly, we will create a toy dataset.
Firstly, we will create a toy dataset
>>> from skfda.datasets import make_sinusoidal_process
>>> fd1 = make_sinusoidal_process(phase_std=.25, random_state=0)
>>> fd2 = make_sinusoidal_process(phase_mean=1.8, error_std=0.,
... phase_std=.25, random_state=0)
>>> fd = fd1.concatenate(fd2)
>>> from skfda.datasets import make_gaussian_process
>>> X = make_gaussian_process(
... n_samples=30,
... random_state=0,
... )
We will fit a Nearest Neighbors estimator.
>>> from skfda.ml.clustering import NearestNeighbors
>>> neigh = NearestNeighbors(radius=.3)
>>> neigh.fit(fd)
NearestNeighbors(...radius=0.3...)
>>> neigh = NearestNeighbors(radius=0.7)
>>> neigh.fit(X)
NearestNeighbors(...)
Now we can query the neighbors in the radius.
Now we can query the neighbors in a given radius.
>>> distances, index = neigh.radius_neighbors(fd[:2])
>>> index[0] # Neighbors of sample 0
array([ 0, 2, 6, 7, 11]...)
>>> distances, index = neigh.radius_neighbors(X)
>>> index[0]
array([ 0, 1, 8, 18]...)
>>> distances[0].round(2) # Distances to neighbors of the sample 0
array([ 0. , 0.3 , 0.29, 0.28, 0.29])
>>> distances[0].round(2)
array([ 0. , 0.58, 0.41, 0.68])
See also:
Expand Down
22 changes: 4 additions & 18 deletions skfda/datasets/_real_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,27 +5,17 @@

import numpy as np
import pandas as pd
import rdata
from pandas import DataFrame, Series
from skdatasets.repositories import cran, ucr
from sklearn.utils import Bunch
from typing_extensions import Literal

import rdata

from ..representation import FDataGrid
from ..representation.irregular import FDataIrregular
from ..typing._numpy import NDArrayFloat, NDArrayInt


def _get_skdatasets_repositories() -> Any:
import skdatasets

repositories = getattr(skdatasets, "repositories", None)
if repositories is None:
repositories = skdatasets

return repositories


def fdata_constructor(
obj: Any,
attrs: Mapping[str | bytes, Any],
Expand Down Expand Up @@ -117,16 +107,14 @@ def fetch_cran(
types.
"""
repositories = _get_skdatasets_repositories()

if converter is None:
converter = rdata.conversion.SimpleConverter({
**rdata.conversion.DEFAULT_CLASS_MAP,
"fdata": fdata_constructor,
"functional": functional_constructor,
})

return repositories.cran.fetch_dataset(
return cran.fetch_dataset(
name,
package_name,
converter=converter,
Expand Down Expand Up @@ -200,9 +188,7 @@ def fetch_ucr(
.. footbibliography::
"""
repositories = _get_skdatasets_repositories()

dataset = repositories.ucr.fetch(name, **kwargs)
dataset = ucr.fetch(name, **kwargs)

dataset['data'] = _ucr_to_fdatagrid(
name=dataset['name'],
Expand Down
2 changes: 1 addition & 1 deletion skfda/datasets/_samples_generators.py
Original file line number Diff line number Diff line change
Expand Up @@ -481,7 +481,7 @@ def make_random_warping(
np.square(v, out=v)

# Creation of FDataGrid in the corresponding domain
data_matrix = scipy.integrate.cumtrapz(
data_matrix = scipy.integrate.cumulative_trapezoid(
v,
dx=1 / n_features,
initial=0,
Expand Down
20 changes: 10 additions & 10 deletions skfda/exploratory/depth/_depth.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,10 @@ class IntegratedDepth(Depth[FDataGrid]):
... [-1, -1, -0.5, 1, 1, 0.5],
... [-0.5, -0.5, -0.5, -1, -1, -1]]
>>> grid_points = [0, 2, 4, 6, 8, 10]
>>> fd = skfda.FDataGrid(data_matrix, grid_points)
>>> X = skfda.FDataGrid(data_matrix, grid_points)
>>> depth = skfda.exploratory.depth.IntegratedDepth()
>>> depth(fd)
array([ 0.5 , 0.75 , 0.925, 0.875])
>>> depth(X).round(1)
array([ 0.5, 0.8, 0.9, 0.9])
References:
Fraiman, R., & Muniz, G. (2001). Trimmed means for functional
Expand Down Expand Up @@ -121,11 +121,11 @@ class ModifiedBandDepth(IntegratedDepth):
... [-1, -1, -0.5, 1, 1, 0.5],
... [-0.5, -0.5, -0.5, -1, -1, -1]]
>>> grid_points = [0, 2, 4, 6, 8, 10]
>>> fd = skfda.FDataGrid(data_matrix, grid_points)
>>> X = skfda.FDataGrid(data_matrix, grid_points)
>>> depth = skfda.exploratory.depth.ModifiedBandDepth()
>>> values = depth(fd)
>>> values.round(2)
array([ 0.5 , 0.83, 0.73, 0.67])
>>> values = depth(X)
>>> values.round(1)
array([ 0.5, 0.8, 0.7, 0.7])
References:
López-Pintado, S., & Romo, J. (2009). On the Concept of
Expand Down Expand Up @@ -228,10 +228,10 @@ class DistanceBasedDepth(Depth[FDataGrid], BaseEstimator):
... [-1, -1, -0.5, 1, 1, 0.5],
... [-0.5, -0.5, -0.5, -1, -1, -1]]
>>> grid_points = [0, 2, 4, 6, 8, 10]
>>> fd = skfda.FDataGrid(data_matrix, grid_points)
>>> X = skfda.FDataGrid(data_matrix, grid_points)
>>> depth = DistanceBasedDepth(MahalanobisDistance(2))
>>> depth(fd)
array([ 0.41897777, 0.8058132 , 0.31097392, 0.31723619])
>>> depth(X).round(1)
array([ 0.4, 0.8, 0.3, 0.3])
References:
.. footbibliography::
Expand Down
65 changes: 21 additions & 44 deletions skfda/exploratory/outliers/_directional_outlyingness.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,50 +107,27 @@ def directional_outlyingness_stats( # noqa: WPS218
Example:
>>> data_matrix = [[1, 1, 2, 3, 2.5, 2],
... [0.5, 0.5, 1, 2, 1.5, 1],
... [-1, -1, -0.5, 1, 1, 0.5],
... [-0.5, -0.5, -0.5, -1, -1, -1]]
>>> grid_points = [0, 2, 4, 6, 8, 10]
>>> fd = FDataGrid(data_matrix, grid_points)
>>> stats = directional_outlyingness_stats(fd)
>>> stats.directional_outlyingness
array([[[ 1.33333333],
[ 1.33333333],
[ 2.33333333],
[ 1.5 ],
[ 1.66666667],
[ 1.66666667]],
[[ 0. ],
[ 0. ],
[ 0. ],
[ 0. ],
[ 0. ],
[ 0. ]],
[[-1.33333333],
[-1.33333333],
[-1. ],
[-0.5 ],
[-0.33333333],
[-0.33333333]],
[[-0.66666667],
[-0.66666667],
[-1. ],
[-2.5 ],
[-3. ],
[-2.33333333]]])
>>> stats.functional_directional_outlyingness
array([ 6.58864198, 6.4608642 , 6.63753086, 7.40481481])
>>> stats.mean_directional_outlyingness
array([[ 1.66666667],
[ 0. ],
[-0.8 ],
[-1.74444444]])
>>> stats.variation_directional_outlyingness
array([ 0.12777778, 0. , 0.17666667, 0.94395062])
>>> import skfda
>>> X = skfda.datasets.make_gaussian_process(
... n_samples=4,
... n_features=1000,
... random_state=1,
... )
>>> stats = directional_outlyingness_stats(X)
>>> stats.directional_outlyingness.shape
(4, 1000, 1)
>>> stats.functional_directional_outlyingness.round()
array([ 11., 8., 6., 9.])
>>> stats.mean_directional_outlyingness.round(1)
array([[-2. ],
[ 1.4],
[-0.1],
[ 0.1]])
>>> stats.variation_directional_outlyingness.round()
array([ 5., 2., 0., 3.])
References:
Dai, Wenlin, and Genton, Marc G. "Directional outlyingness for
Expand Down
Loading

0 comments on commit a875bd7

Please sign in to comment.