From a3ab23bd8d26f5a7c90c651ba30fd394d3247eff Mon Sep 17 00:00:00 2001 From: cajchristian <114787994+cajchristian@users.noreply.github.com> Date: Thu, 6 Feb 2025 14:35:17 -0600 Subject: [PATCH 01/13] Fixed `_ndim_coords_from_arrays` import --- src/skmatter/sample_selection/_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/skmatter/sample_selection/_base.py b/src/skmatter/sample_selection/_base.py index f5531d897..67d5f0472 100644 --- a/src/skmatter/sample_selection/_base.py +++ b/src/skmatter/sample_selection/_base.py @@ -4,7 +4,7 @@ import numpy as np from scipy.interpolate import LinearNDInterpolator, interp1d -from scipy.interpolate.interpnd import _ndim_coords_from_arrays +from scipy.interpolate._interpnd import _ndim_coords_from_arrays from scipy.spatial import ConvexHull from sklearn.utils.validation import check_array, check_is_fitted, check_X_y From 941a77f0d32eaaa917c5499ab1155f377b50e7ce Mon Sep 17 00:00:00 2001 From: cajchristian <114787994+cajchristian@users.noreply.github.com> Date: Fri, 7 Feb 2025 14:50:20 -0600 Subject: [PATCH 02/13] Adding `validate_data` calls and tags for CUR, FPS, PCovCUR, and PCovFPS --- src/skmatter/_selection.py | 62 +++++++++++++++++++++++++++++--------- 1 file changed, 47 insertions(+), 15 deletions(-) diff --git a/src/skmatter/_selection.py b/src/skmatter/_selection.py index 6869a2235..c0fa189d9 100644 --- a/src/skmatter/_selection.py +++ b/src/skmatter/_selection.py @@ -83,8 +83,13 @@ from scipy.sparse.linalg import eigsh from sklearn.base import BaseEstimator, MetaEstimatorMixin from sklearn.feature_selection._base import SelectorMixin -from sklearn.utils import check_array, check_random_state, check_X_y, safe_mask -from sklearn.utils.validation import FLOAT_DTYPES, as_float_array, check_is_fitted +from sklearn.utils import check_random_state, safe_mask +from sklearn.utils.validation import ( + FLOAT_DTYPES, + as_float_array, + check_is_fitted, + validate_data, +) from .utils import ( X_orthogonalizer, @@ -157,11 +162,6 @@ def __init__( self.n_to_select = n_to_select self.score_threshold = score_threshold self.score_threshold_type = score_threshold_type - if self.score_threshold_type not in ["relative", "absolute"]: - raise ValueError( - "invalid score_threshold_type, expected one of 'relative' or 'absolute'" - ) - self.full = full self.progress_bar = progress_bar self.random_state = random_state @@ -184,6 +184,11 @@ def fit(self, X, y=None, warm_start=False): ------- self : object """ + if self.score_threshold_type not in ["relative", "absolute"]: + raise ValueError( + "invalid score_threshold_type, expected one of 'relative' or 'absolute'" + ) + if self.selection_type == "feature": self._axis = 1 elif self.selection_type == "sample": @@ -205,7 +210,7 @@ def fit(self, X, y=None, warm_start=False): if hasattr(self, "mixing") or y is not None: X, y = self._validate_data(X, y, **params) - X, y = check_X_y(X, y, multi_output=True) + X, y = validate_data(self, X, y, multi_output=True) if len(y.shape) == 1: # force y to have multi_output 2D format even when it's 1D, since @@ -214,7 +219,7 @@ def fit(self, X, y=None, warm_start=False): y = y.reshape((len(y), 1)) else: - X = check_array(X, **params) + X = validate_data(self, X, **params) if self.full and self.score_threshold is not None: raise ValueError( @@ -308,7 +313,7 @@ def transform(self, X, y=None): mask = self.get_support() - X = check_array(X) + X = validate_data(self, X, reset=False) if len(X.shape) == 1: if self._axis == 0: @@ -486,6 +491,11 @@ def _more_tags(self): "requires_y": False, } + def __sklearn_tags__(self): + tags = super().__sklearn_tags__() + tags.target_tags.required = False + return tags + class _CUR(GreedySelector): """Transformer that performs Greedy Selection by choosing features @@ -560,6 +570,9 @@ def score(self, X, y=None): score : numpy.ndarray of (n_to_select_from_) :math:`\pi` importance for the given samples or features """ + + X, y = validate_data(self, X, y, reset=False) + return self.pi_ def _init_greedy_search(self, X, y, n_to_select): @@ -734,6 +747,9 @@ def score(self, X, y=None): score : numpy.ndarray of (n_to_select_from_) :math:`\pi` importance for the given samples or features """ + + X, y = validate_data(self, X, y, reset=False) + return self.pi_ def _init_greedy_search(self, X, y, n_to_select): @@ -927,6 +943,9 @@ def score(self, X, y=None): ------- hausdorff : Hausdorff distances """ + + X, y = validate_data(self, X, y, reset=False) + return self.hausdorff_ def get_distance(self): @@ -1048,11 +1067,6 @@ def __init__( full=False, random_state=0, ): - if mixing == 1.0: - raise ValueError( - "Mixing = 1.0 corresponds to traditional FPS." - "Please use the FPS class." - ) self.mixing = mixing self.initialize = initialize @@ -1067,6 +1081,16 @@ def __init__( random_state=random_state, ) + def fit(self, X, y=None, warm_start=False): + + if self.mixing == 1.0: + raise ValueError( + "Mixing = 1.0 corresponds to traditional FPS." + "Please use the FPS class." + ) + + return super().fit(X, y) + def score(self, X, y=None): """Returns the Hausdorff distances of all samples to previous selections. @@ -1083,6 +1107,9 @@ def score(self, X, y=None): ------- hausdorff : Hausdorff distances """ + + X, y = validate_data(self, X, y, reset=False) + return self.hausdorff_ def get_distance(self): @@ -1159,3 +1186,8 @@ def _more_tags(self): return { "requires_y": True, } + + def __sklearn_tags__(self): + tags = super().__sklearn_tags__() + tags.target_tags.required = True + return tags From 42d11fb6810d7a506610cd59281368842eb72808 Mon Sep 17 00:00:00 2001 From: cajchristian <114787994+cajchristian@users.noreply.github.com> Date: Fri, 7 Feb 2025 15:07:42 -0600 Subject: [PATCH 03/13] Adding `validate_data` calls and updated tags to Ridge2FoldCV --- src/skmatter/linear_model/_ridge.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/skmatter/linear_model/_ridge.py b/src/skmatter/linear_model/_ridge.py index 6e4fcf1f3..57e5a21a8 100644 --- a/src/skmatter/linear_model/_ridge.py +++ b/src/skmatter/linear_model/_ridge.py @@ -1,13 +1,12 @@ import numpy as np from joblib import Parallel, delayed -from sklearn.base import BaseEstimator, MultiOutputMixin, RegressorMixin +from sklearn.base import RegressorMixin, MultiOutputMixin, BaseEstimator from sklearn.metrics import check_scoring from sklearn.model_selection import KFold, check_cv -from sklearn.utils import check_array -from sklearn.utils.validation import check_is_fitted +from sklearn.utils.validation import check_is_fitted, validate_data -class Ridge2FoldCV(BaseEstimator, MultiOutputMixin, RegressorMixin): +class Ridge2FoldCV(RegressorMixin, MultiOutputMixin, BaseEstimator): r"""Ridge regression with an efficient 2-fold cross-validation method using the SVD solver. @@ -20,7 +19,7 @@ class Ridge2FoldCV(BaseEstimator, MultiOutputMixin, RegressorMixin): while the alpha value is determined with a 2-fold cross-validation from a list of alpha values. It is more efficient version than doing 2-fold cross-validation naively The algorithmic trick is to reuse the matrices obtained by SVD for each - regularization paramater :param alpha: The 2-fold CV can be broken donw to + regularization paramater :param alpha: The 2-fold CV can be broken down to .. math:: @@ -136,6 +135,11 @@ def __init__( self.shuffle = shuffle self.n_jobs = n_jobs + def __sklearn_tags__(self): + tags = super().__sklearn_tags__() + tags.target_tags.single_output = False + return tags + def _more_tags(self): return {"multioutput_only": True} @@ -195,7 +199,7 @@ def predict(self, X): Training data, where n_samples is the number of samples and n_features is the number of features. """ - X = check_array(X) + X = validate_data(self, X, reset=False) check_is_fitted(self, ["coef_"]) From 29c5a9dc33154dce962e8432c9e6d1eea826f586 Mon Sep 17 00:00:00 2001 From: cajchristian <114787994+cajchristian@users.noreply.github.com> Date: Fri, 7 Feb 2025 15:59:08 -0600 Subject: [PATCH 04/13] Added `validate_data` calls to PCovR --- src/skmatter/decomposition/_pcovr.py | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/src/skmatter/decomposition/_pcovr.py b/src/skmatter/decomposition/_pcovr.py index ddaf3bebd..5adfa3f3a 100644 --- a/src/skmatter/decomposition/_pcovr.py +++ b/src/skmatter/decomposition/_pcovr.py @@ -10,10 +10,10 @@ from sklearn.decomposition._pca import _infer_dimension from sklearn.linear_model import LinearRegression, Ridge, RidgeCV from sklearn.linear_model._base import LinearModel -from sklearn.utils import check_array, check_random_state +from sklearn.utils import check_random_state from sklearn.utils._arpack import _init_arpack_v0 from sklearn.utils.extmath import randomized_svd, stable_cumsum, svd_flip -from sklearn.utils.validation import check_is_fitted, check_X_y +from sklearn.utils.validation import check_is_fitted, validate_data from ..utils import check_lr_fit, pcovr_covariance, pcovr_kernel @@ -221,7 +221,8 @@ def fit(self, X, Y, W=None): Regression weights, optional when regressor=`precomputed`. If not passed, it is assumed that `W = np.linalg.lstsq(X, Y, self.tol)[0]` """ - X, Y = check_X_y(X, Y, y_numeric=True, multi_output=True) + + X, Y = validate_data(self, X, Y, y_numeric=True, multi_output=True) # saved for inverse transformations from the latent space, # should be zero in the case that the features have been properly centered @@ -582,10 +583,10 @@ def predict(self, X=None, T=None): raise ValueError("Either X or T must be supplied.") if X is not None: - X = check_array(X) + X = validate_data(self, X, reset=False) return X @ self.pxy_ else: - T = check_array(T) + T = validate_data(self, T, reset=False) return T @ self.pty_ def transform(self, X=None): @@ -604,7 +605,7 @@ def transform(self, X=None): return super().transform(X) - def score(self, X, Y, T=None): + def score(self, X, y, T=None): r"""Return the (negative) total reconstruction error for X and Y, defined as: @@ -635,13 +636,16 @@ def score(self, X, Y, T=None): Negative sum of the loss in reconstructing X from the latent-space projection T and the loss in predicting Y from the latent-space projection T """ + + X, y = validate_data(self, X, y, reset=False) + if T is None: T = self.transform(X) - x = self.inverse_transform(T) - y = self.predict(T=T) + Xrec = self.inverse_transform(T) + ypred = self.predict(T=T) return -( - np.linalg.norm(X - x) ** 2.0 / np.linalg.norm(X) ** 2.0 - + np.linalg.norm(Y - y) ** 2.0 / np.linalg.norm(Y) ** 2.0 + np.linalg.norm(X - Xrec) ** 2.0 / np.linalg.norm(X) ** 2.0 + + np.linalg.norm(y - ypred) ** 2.0 / np.linalg.norm(y) ** 2.0 ) From d38d375c0c42f45750caeeae2518a1a1e2f56996 Mon Sep 17 00:00:00 2001 From: cajchristian <114787994+cajchristian@users.noreply.github.com> Date: Fri, 7 Feb 2025 16:16:06 -0600 Subject: [PATCH 05/13] Added `validate_data` calls to KPCovR --- src/skmatter/decomposition/_kernel_pcovr.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/skmatter/decomposition/_kernel_pcovr.py b/src/skmatter/decomposition/_kernel_pcovr.py index 84a9439e1..65fe39a3e 100644 --- a/src/skmatter/decomposition/_kernel_pcovr.py +++ b/src/skmatter/decomposition/_kernel_pcovr.py @@ -9,10 +9,10 @@ from sklearn.kernel_ridge import KernelRidge from sklearn.linear_model._base import LinearModel from sklearn.metrics.pairwise import pairwise_kernels -from sklearn.utils import check_array, check_random_state +from sklearn.utils import check_random_state from sklearn.utils._arpack import _init_arpack_v0 from sklearn.utils.extmath import randomized_svd, stable_cumsum, svd_flip -from sklearn.utils.validation import check_is_fitted, check_X_y +from sklearn.utils.validation import check_is_fitted, validate_data from ..preprocessing import KernelNormalizer from ..utils import check_krr_fit, pcovr_kernel @@ -270,7 +270,7 @@ def fit(self, X, Y, W=None): ): raise ValueError("Regressor must be an instance of `KernelRidge`") - X, Y = check_X_y(X, Y, y_numeric=True, multi_output=True) + X, Y = validate_data(self, X, Y, y_numeric=True, multi_output=True) self.X_fit_ = X.copy() if self.n_components is None: @@ -387,7 +387,7 @@ def predict(self, X=None): """Predicts the property values""" check_is_fitted(self, ["pky_", "pty_"]) - X = check_array(X) + X = validate_data(self, X, reset=False) K = self._get_kernel(X, self.X_fit_) if self.center: K = self.centerer_.transform(K) @@ -408,7 +408,7 @@ def transform(self, X): """ check_is_fitted(self, ["pkt_", "X_fit_"]) - X = check_array(X) + X = validate_data(self, X, reset=False) K = self._get_kernel(X, self.X_fit_) if self.center: @@ -440,7 +440,7 @@ def inverse_transform(self, T): """ return T @ self.ptx_ - def score(self, X, Y): + def score(self, X, y): r"""Computes the (negative) loss values for KernelPCovR on the given predictor and response variables. The loss in :math:`\mathbf{K}`, as explained in [Helfrecht2020]_ does not correspond to a traditional Gram loss @@ -474,7 +474,7 @@ def score(self, X, Y): """ check_is_fitted(self, ["pkt_", "X_fit_"]) - X = check_array(X) + X, y = validate_data(self, X, y, reset=False) K_NN = self._get_kernel(self.X_fit_, self.X_fit_) K_VN = self._get_kernel(X, self.X_fit_) @@ -485,8 +485,8 @@ def score(self, X, Y): K_VN = self.centerer_.transform(K_VN) K_VV = self.centerer_.transform(K_VV) - y = K_VN @ self.pky_ - Lkrr = np.linalg.norm(Y - y) ** 2 / np.linalg.norm(Y) ** 2 + ypred = K_VN @ self.pky_ + Lkrr = np.linalg.norm(y - ypred) ** 2 / np.linalg.norm(y) ** 2 t_n = K_NN @ self.pkt_ t_v = K_VN @ self.pkt_ From 9ae1cc501b998ad1a6cf1d3d5b4167b583326a04 Mon Sep 17 00:00:00 2001 From: cajchristian <114787994+cajchristian@users.noreply.github.com> Date: Fri, 7 Feb 2025 16:32:08 -0600 Subject: [PATCH 06/13] Fixing linting --- src/skmatter/_selection.py | 4 ---- src/skmatter/decomposition/_pcovr.py | 2 -- src/skmatter/linear_model/_ridge.py | 2 +- 3 files changed, 1 insertion(+), 7 deletions(-) diff --git a/src/skmatter/_selection.py b/src/skmatter/_selection.py index c0fa189d9..50e55cff3 100644 --- a/src/skmatter/_selection.py +++ b/src/skmatter/_selection.py @@ -570,7 +570,6 @@ def score(self, X, y=None): score : numpy.ndarray of (n_to_select_from_) :math:`\pi` importance for the given samples or features """ - X, y = validate_data(self, X, y, reset=False) return self.pi_ @@ -747,7 +746,6 @@ def score(self, X, y=None): score : numpy.ndarray of (n_to_select_from_) :math:`\pi` importance for the given samples or features """ - X, y = validate_data(self, X, y, reset=False) return self.pi_ @@ -943,7 +941,6 @@ def score(self, X, y=None): ------- hausdorff : Hausdorff distances """ - X, y = validate_data(self, X, y, reset=False) return self.hausdorff_ @@ -1107,7 +1104,6 @@ def score(self, X, y=None): ------- hausdorff : Hausdorff distances """ - X, y = validate_data(self, X, y, reset=False) return self.hausdorff_ diff --git a/src/skmatter/decomposition/_pcovr.py b/src/skmatter/decomposition/_pcovr.py index 5adfa3f3a..8cdd24680 100644 --- a/src/skmatter/decomposition/_pcovr.py +++ b/src/skmatter/decomposition/_pcovr.py @@ -221,7 +221,6 @@ def fit(self, X, Y, W=None): Regression weights, optional when regressor=`precomputed`. If not passed, it is assumed that `W = np.linalg.lstsq(X, Y, self.tol)[0]` """ - X, Y = validate_data(self, X, Y, y_numeric=True, multi_output=True) # saved for inverse transformations from the latent space, @@ -636,7 +635,6 @@ def score(self, X, y, T=None): Negative sum of the loss in reconstructing X from the latent-space projection T and the loss in predicting Y from the latent-space projection T """ - X, y = validate_data(self, X, y, reset=False) if T is None: diff --git a/src/skmatter/linear_model/_ridge.py b/src/skmatter/linear_model/_ridge.py index 57e5a21a8..9dd5e1678 100644 --- a/src/skmatter/linear_model/_ridge.py +++ b/src/skmatter/linear_model/_ridge.py @@ -1,6 +1,6 @@ import numpy as np from joblib import Parallel, delayed -from sklearn.base import RegressorMixin, MultiOutputMixin, BaseEstimator +from sklearn.base import BaseEstimator, MultiOutputMixin, RegressorMixin from sklearn.metrics import check_scoring from sklearn.model_selection import KFold, check_cv from sklearn.utils.validation import check_is_fitted, validate_data From ad9e59c916136a0f6f8a7a4a872db2ea4cc1c500 Mon Sep 17 00:00:00 2001 From: Philip Loche Date: Tue, 28 Jan 2025 16:52:28 +0100 Subject: [PATCH 07/13] Fix rendering issues and depencies --- CHANGELOG | 1 + pyproject.toml | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 46008bb47..7d9c0e581 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -13,6 +13,7 @@ The rules for CHANGELOG file: 0.3.0 (XXXX/XX/XX) ------------------ +- Fixed moved function import from scipy and bump scipy dependency to 1.15.0 (#236) - Fix rendering issues for `SparseKDE` and `QuickShift` (#236) - Updating ``FPS`` to allow a numpy array of ints as an initialize parameter (#145) - Supported Python versions are now ranging from 3.9 - 3.12. diff --git a/pyproject.toml b/pyproject.toml index e05c76026..4c31a977e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,8 +38,8 @@ classifiers = [ "Topic :: Scientific/Engineering", ] dependencies = [ - "scikit-learn < 1.6.0", - "scipy < 1.15.0", + "scikit-learn >= 1.1.0", + "scipy >= 1.15.0", # explicit here since need a newer version as scikit-learn ] dynamic = ["version"] From 7f80d783108c7b6af036660dfa6f6a249bd190d5 Mon Sep 17 00:00:00 2001 From: Philip Loche Date: Thu, 13 Feb 2025 12:06:13 +0100 Subject: [PATCH 08/13] update pyproject.toml --- pyproject.toml | 2 +- src/skmatter/sample_selection/_base.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 4c31a977e..db7056b63 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,7 +38,7 @@ classifiers = [ "Topic :: Scientific/Engineering", ] dependencies = [ - "scikit-learn >= 1.1.0", + "scikit-learn >= 1.6.0", "scipy >= 1.15.0", # explicit here since need a newer version as scikit-learn ] dynamic = ["version"] diff --git a/src/skmatter/sample_selection/_base.py b/src/skmatter/sample_selection/_base.py index 67d5f0472..0abdca1fa 100644 --- a/src/skmatter/sample_selection/_base.py +++ b/src/skmatter/sample_selection/_base.py @@ -509,7 +509,7 @@ class DirectionalConvexHull: selected_idx_ : numpy.ndarray Indices of datapoints that form the vertices of the convex hull - interpolator_high_dim_ : scipy.interpolate.interpnd.LinearNDInterpolator + interpolator_high_dim_ : scipy.interpolate._interpnd.LinearNDInterpolator Interpolator for the features in the high- dimensional space From feb10b0bf25ae8586c3b08f862131fdc8c936c19 Mon Sep 17 00:00:00 2001 From: Philip Loche Date: Thu, 13 Feb 2025 14:05:20 +0100 Subject: [PATCH 09/13] fix some tests --- .github/workflows/build.yml | 2 +- .github/workflows/docs.yml | 2 +- .github/workflows/lint.yml | 2 +- .github/workflows/tests.yml | 2 +- .gitignore | 1 + .readthedocs.yaml | 2 +- pyproject.toml | 5 +- src/skmatter/_selection.py | 22 ++-- src/skmatter/decomposition/_kernel_pcovr.py | 2 +- src/skmatter/decomposition/_pcovr.py | 4 +- src/skmatter/utils/_pcovr_utils.py | 109 +++++++++++------- ...e.tsf-492-wpa-0-247.epfl.ch.11311.XpjwIfdx | Bin 53248 -> 0 bytes tests/test_feature_pcov_fps.py | 5 +- tests/test_greedy_selector.py | 8 +- tests/test_kernel_pcovr.py | 4 +- tests/test_pcovr.py | 9 +- tests/test_sample_pcov_fps.py | 5 +- 17 files changed, 105 insertions(+), 79 deletions(-) delete mode 100644 tests/.coverage.tsf-492-wpa-0-247.epfl.ch.11311.XpjwIfdx diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 058303f65..61e753ea9 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -15,7 +15,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v5 with: - python-version: "3.12" + python-version: "3.13" - name: install tests dependencies run: python -m pip install tox diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 75b82499a..1fa0d4420 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -16,7 +16,7 @@ jobs: - name: setup Python uses: actions/setup-python@v5 with: - python-version: "3.12" + python-version: "3.13" - name: install tests dependencies run: python -m pip install tox diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 1108289fd..5ae3af316 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -14,7 +14,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v5 with: - python-version: "3.12" + python-version: "3.13" - name: install tests dependencies run: python -m pip install tox diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index d9ebe3475..fe3609798 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -12,7 +12,7 @@ jobs: strategy: matrix: os: [ubuntu-22.04, macos-14, windows-2022] - python-version: ["3.9", "3.12"] + python-version: ["3.0", "3.13"] steps: - uses: actions/checkout@v4 diff --git a/.gitignore b/.gitignore index 226933088..1f74b6f04 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +*.coverage* *.pyc *.ipynb_checkpoints* __pycache__ diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 71882236f..b4c827c42 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -9,7 +9,7 @@ version: 2 build: os: ubuntu-22.04 tools: - python: "3.12" + python: "3.13" # Build documentation in the docs/ directory with Sphinx sphinx: diff --git a/pyproject.toml b/pyproject.toml index db7056b63..374d06293 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,7 +19,7 @@ authors = [ {name = "Michele Ceriotti"} ] readme = "README.rst" -requires-python = ">=3.9" +requires-python = ">=3.10" license = {text = "BSD-3-Clause"} classifiers = [ "Development Status :: 4 - Beta", @@ -75,9 +75,6 @@ include = [ [tool.coverage.xml] output = 'tests/coverage.xml' -[tool.pytest.ini_options] -testpaths = "tests" - [tool.isort] skip = "__init__.py" profile = "black" diff --git a/src/skmatter/_selection.py b/src/skmatter/_selection.py index 50e55cff3..224c020a7 100644 --- a/src/skmatter/_selection.py +++ b/src/skmatter/_selection.py @@ -561,8 +561,7 @@ def score(self, X, y=None): Parameters ---------- - X : numpy.ndarray of shape [n_samples, n_features] - The input samples. + X : ignored y : ignored Returns @@ -570,8 +569,7 @@ def score(self, X, y=None): score : numpy.ndarray of (n_to_select_from_) :math:`\pi` importance for the given samples or features """ - X, y = validate_data(self, X, y, reset=False) - + validate_data(self, X, y, reset=False) # present for API consistency return self.pi_ def _init_greedy_search(self, X, y, n_to_select): @@ -746,8 +744,7 @@ def score(self, X, y=None): score : numpy.ndarray of (n_to_select_from_) :math:`\pi` importance for the given samples or features """ - X, y = validate_data(self, X, y, reset=False) - + validate_data(self, X, y, reset=False) # present for API consistency return self.pi_ def _init_greedy_search(self, X, y, n_to_select): @@ -941,8 +938,7 @@ def score(self, X, y=None): ------- hausdorff : Hausdorff distances """ - X, y = validate_data(self, X, y, reset=False) - + validate_data(self, X, y, reset=False) return self.hausdorff_ def get_distance(self): @@ -1079,15 +1075,16 @@ def __init__( ) def fit(self, X, y=None, warm_start=False): - if self.mixing == 1.0: raise ValueError( - "Mixing = 1.0 corresponds to traditional FPS." - "Please use the FPS class." + "Mixing = 1.0 corresponds to traditional FPS. Please use the FPS class." ) return super().fit(X, y) + # docstring is inherited and set from the base class + fit.__doc__ = GreedySelector.fit.__doc__ + def score(self, X, y=None): """Returns the Hausdorff distances of all samples to previous selections. @@ -1104,8 +1101,7 @@ def score(self, X, y=None): ------- hausdorff : Hausdorff distances """ - X, y = validate_data(self, X, y, reset=False) - + validate_data(self, X, y, reset=False) return self.hausdorff_ def get_distance(self): diff --git a/src/skmatter/decomposition/_kernel_pcovr.py b/src/skmatter/decomposition/_kernel_pcovr.py index 65fe39a3e..825a0cf92 100644 --- a/src/skmatter/decomposition/_kernel_pcovr.py +++ b/src/skmatter/decomposition/_kernel_pcovr.py @@ -474,7 +474,7 @@ def score(self, X, y): """ check_is_fitted(self, ["pkt_", "X_fit_"]) - X, y = validate_data(self, X, y, reset=False) + X = validate_data(self, X, reset=False) K_NN = self._get_kernel(self.X_fit_, self.X_fit_) K_VN = self._get_kernel(X, self.X_fit_) diff --git a/src/skmatter/decomposition/_pcovr.py b/src/skmatter/decomposition/_pcovr.py index 8cdd24680..bc094a720 100644 --- a/src/skmatter/decomposition/_pcovr.py +++ b/src/skmatter/decomposition/_pcovr.py @@ -10,7 +10,7 @@ from sklearn.decomposition._pca import _infer_dimension from sklearn.linear_model import LinearRegression, Ridge, RidgeCV from sklearn.linear_model._base import LinearModel -from sklearn.utils import check_random_state +from sklearn.utils import check_array, check_random_state from sklearn.utils._arpack import _init_arpack_v0 from sklearn.utils.extmath import randomized_svd, stable_cumsum, svd_flip from sklearn.utils.validation import check_is_fitted, validate_data @@ -585,7 +585,7 @@ def predict(self, X=None, T=None): X = validate_data(self, X, reset=False) return X @ self.pxy_ else: - T = validate_data(self, T, reset=False) + T = check_array(T) return T @ self.pty_ def transform(self, X=None): diff --git a/src/skmatter/utils/_pcovr_utils.py b/src/skmatter/utils/_pcovr_utils.py index 15286e341..8852a6386 100644 --- a/src/skmatter/utils/_pcovr_utils.py +++ b/src/skmatter/utils/_pcovr_utils.py @@ -9,18 +9,30 @@ def check_lr_fit(regressor, X, y): - r""" + """ Checks that a (linear) regressor is fitted, and if not, - fits it with the provided data - - :param regressor: sklearn-style regressor - :type regressor: object - :param X: feature matrix with which to fit the regressor - if it is not already fitted - :type X: array - :param y: target values with which to fit the regressor - if it is not already fitted - :type y: array + fits it with the provided data. + + Parameters + ---------- + regressor : object + sklearn-style regressor + X : array-like + Feature matrix with which to fit the regressor if it is not already fitted + y : array-like + Target values with which to fit the regressor if it is not already fitted + + Returns + ------- + fitted_regressor : object + The fitted regressor. If input regressor was already fitted and compatible with + the data, returns a deep copy. Otherwise returns a newly fitted regressor. + + Raises + ------ + ValueError + If the fitted regressor's coefficients dimensions are incompatible with the + target space. """ try: check_is_fitted(regressor) @@ -32,18 +44,18 @@ def check_lr_fit(regressor, X, y): # Check compatibility with y if fitted_regressor.coef_.ndim != y.ndim: raise ValueError( - "The regressor coefficients have a dimension incompatible " - "with the supplied target space. " - "The coefficients have dimension %d and the targets " - "have dimension %d" % (fitted_regressor.coef_.ndim, y.ndim) + "The regressor coefficients have a dimension incompatible with the " + "supplied target space. The coefficients have dimension " + f"{fitted_regressor.coef_.ndim} and the targets have dimension " + f"{y.ndim}" ) elif y.ndim == 2: if fitted_regressor.coef_.shape[0] != y.shape[1]: raise ValueError( - "The regressor coefficients have a shape incompatible " - "with the supplied target space. " - "The coefficients have shape %r and the targets " - "have shape %r" % (fitted_regressor.coef_.shape, y.shape) + "The regressor coefficients have a shape incompatible with the " + "supplied target space. The coefficients have shape " + f"{fitted_regressor.coef_.shape} and the targets have shape " + f"{y.shape}" ) except NotFittedError: @@ -54,20 +66,37 @@ def check_lr_fit(regressor, X, y): def check_krr_fit(regressor, K, X, y): - r""" + """ Checks that a (kernel ridge) regressor is fitted, and if not, - fits it with the provided data - - :param regressor: sklearn-style regressor - :type regressor: object - :param K: kernel matrix with which to fit the regressor - if it is not already fitted - :type K: array - :param X: feature matrix with which to check the regressor - :type X: array - :param y: target values with which to fit the regressor - if it is not already fitted - :type y: array + fits it with the provided data. + + Parameters + ---------- + regressor : object + sklearn-style regressor + K : array-like + Kernel matrix with which to fit the regressor if it is not already fitted + X : array-like + Feature matrix with which to check the regressor + y : array-like + Target values with which to fit the regressor if it is not already fitted + + Returns + ------- + fitted_regressor : object + The fitted regressor. If input regressor was already fitted and compatible with + the data, returns a deep copy. Otherwise returns a newly fitted regressor. + + Raises + ------ + ValueError + If the fitted regressor's coefficients dimensions are incompatible with the + target space. + + Notes + ----- + For unfitted regressors, sets the kernel to "precomputed" before fitting with the + provided kernel matrix K to avoid recomputation. """ try: check_is_fitted(regressor) @@ -79,18 +108,18 @@ def check_krr_fit(regressor, K, X, y): # Check compatibility with y if fitted_regressor.dual_coef_.ndim != y.ndim: raise ValueError( - "The regressor coefficients have a dimension incompatible " - "with the supplied target space. " - "The coefficients have dimension %d and the targets " - "have dimension %d" % (fitted_regressor.dual_coef_.ndim, y.ndim) + "The regressor coefficients have a dimension incompatible with the " + "supplied target space. The coefficients have dimension " + f"{fitted_regressor.dual_coef_.ndim} and the targets have dimension " + f"{y.ndim}" ) elif y.ndim == 2: if fitted_regressor.dual_coef_.shape[1] != y.shape[1]: raise ValueError( - "The regressor coefficients have a shape incompatible " - "with the supplied target space. " - "The coefficients have shape %r and the targets " - "have shape %r" % (fitted_regressor.dual_coef_.shape, y.shape) + "The regressor coefficients have a shape incompatible with the " + "supplied target space. The coefficients have shape " + f"{fitted_regressor.dual_coef_.shape} and the targets have shape " + f"{y.shape}" ) except NotFittedError: diff --git a/tests/.coverage.tsf-492-wpa-0-247.epfl.ch.11311.XpjwIfdx b/tests/.coverage.tsf-492-wpa-0-247.epfl.ch.11311.XpjwIfdx deleted file mode 100644 index 5d637632bf921d7fd0c0dfe157ca23af76774c48..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 53248 zcmeI)O>Y}T7zglOyYv4m3ax6_rXM;(~-bUXPOn@2<1E z&dUKIr$~{I_y&mY#5dsB6E{v=c%GddubsqIZ&l5I6+8PfGduH}XJ(vi+Hamc@e(Bl zVdTq1yl<^rwrxEU!m_L~z1HYuHivfZm;?H5&+ISSEnDr&M{Bijt!n8vtM>ic)7oD3 z_qF#{|E&7uUswNdWZ9()*dPD_2teTfEiidjb?RGN_T^8B>w!J;EFQV{Fj8r5B-cSiI=zFeAyx>SA$I7H0$Ewc@9i5}4juW=i z{Jhuo`jjhCM^wZp@_ZRxh#%EOD@~ByEm7x*K0$>j?4s_YYm;qoAP)-1-W6==i2uo8MwzPZV5SUp&L>?&lBC+ zDD)pBp%{8W+RPyK5-$vdI#=#EQTx;-z6r#t1jpqZXt5ITH)aoq4InGX^kMZ=*82kDUE+(LH_Mf2>K zd0;SP!~;!)SwgDm8;v%uEY(5uhGB0$*3yWH6%7!Y3rw=XIB$S@+yC+ zF7{64kYH_j(V^67?X>uCgUxjh( z(Xz_RR{XI!D;mc#=2<9ih+Y^Dl??RSKsg@AbWbl9X&r8^N}uLs#1~w~(P<;ClUZDN znm?pLXT}%p_`VGK*?Psxur8J6ejc2BA3cdsM$w=_8PgEGR;DR5yUH_`DZf1;6E4-4 zeLdW{Q)Ws$J61P})2q=JGEs@=E5S+9%A|pHCS&17iZ0Sx+H4eG6$ac}-44I=PsuC% zQEswhJN5hb?a4SBjC!8w#3$X z_R^}oto`#=HV~DD00bZa0SG_<0uX=z1Rwwb2tZ&eP%YlG&94CpmEv|e`z=8F5B%4s z$*=z{}fB*y_009U<00Izz00c4tJpV^1KmY;|fB*y_009U<00Izz00fp_ z0MGxIzmL&E2tWV=5P$##AOHafKmY;|fB>HVBL*M<0SG_<0uX=z1Rwwb2tWV=%P)ZE z|I6RUXdwh3009U<00Izz00bZa0SG_<&;Jnv5P$##AOHafKmY;|fB*y_0De diff --git a/tests/test_feature_pcov_fps.py b/tests/test_feature_pcov_fps.py index 321cc78ee..e6910f9a1 100644 --- a/tests/test_feature_pcov_fps.py +++ b/tests/test_feature_pcov_fps.py @@ -24,11 +24,12 @@ def test_restart(self): def test_no_mixing_1(self): """Check that the model throws an error when mixing = 1.0.""" + selector = PCovFPS(n_to_select=1, mixing=1.0) with self.assertRaises(ValueError) as cm: - _ = PCovFPS(n_to_select=1, mixing=1.0) + selector.fit(self.X, y=self.y) self.assertEqual( str(cm.exception), - "Mixing = 1.0 corresponds to traditional FPS." "Please use the FPS class.", + "Mixing = 1.0 corresponds to traditional FPS. Please use the FPS class.", ) diff --git a/tests/test_greedy_selector.py b/tests/test_greedy_selector.py index 0bfe6de99..f85dbeb34 100644 --- a/tests/test_greedy_selector.py +++ b/tests/test_greedy_selector.py @@ -73,7 +73,7 @@ def test_bad_transform(self): _ = selector.transform(self.X[:, :3]) self.assertEqual( str(cm.exception), - "X has a different shape than during fitting. Reshape your data.", + "X has 3 features, but GreedyTester is expecting 10 features as input.", ) def test_no_nfeatures(self): @@ -124,8 +124,8 @@ def test_size_input(self): selector_feature.fit(X) self.assertEqual( str(cm.exception), - f"Found array with 1 feature(s) (shape={X.shape})" - " while a minimum of 2 is required.", + f"Found array with 1 feature(s) (shape={X.shape}) while a minimum of 2 is " + "required by GreedyTester.", ) X = X.reshape(1, -1) @@ -135,7 +135,7 @@ def test_size_input(self): self.assertEqual( str(cm.exception), f"Found array with 1 sample(s) (shape={X.shape}) while a minimum of 2 is " - "required.", + "required by GreedyTester.", ) diff --git a/tests/test_kernel_pcovr.py b/tests/test_kernel_pcovr.py index e4bbda52e..80adf584e 100644 --- a/tests/test_kernel_pcovr.py +++ b/tests/test_kernel_pcovr.py @@ -182,6 +182,8 @@ def test_centerer(self): self.assertTrue(hasattr(kpcovr, "centerer_")) _ = kpcovr.predict(self.X) _ = kpcovr.transform(self.X) + + print(self.Y.shape) _ = kpcovr.score(self.X, self.Y) def test_prefit_regressor(self): @@ -255,7 +257,7 @@ def test_incompatible_coef_shape(self): # Dimension mismatch with self.assertRaises(ValueError) as cm: - kpcovr.fit(self.X, self.Y[:, 0]) + kpcovr.fit(self.X, np.zeros(self.Y.shape + (2,))) self.assertTrue( str(cm.exception), "The regressor coefficients have a dimension incompatible " diff --git a/tests/test_pcovr.py b/tests/test_pcovr.py index e589978d2..2059eed44 100644 --- a/tests/test_pcovr.py +++ b/tests/test_pcovr.py @@ -491,13 +491,12 @@ def test_incompatible_coef_shape(self): # Dimension mismatch with self.assertRaises(ValueError) as cm: - pcovr.fit(self.X, self.Y.squeeze()) + pcovr.fit(self.X, np.zeros((self.Y.shape[0], 2))) self.assertEqual( str(cm.exception), - "The regressor coefficients have a dimension incompatible " - "with the supplied target space. " - "The coefficients have dimension %d and the targets " - "have dimension %d" % (regressor.coef_.ndim, self.Y.squeeze().ndim), + "The regressor coefficients have a dimension incompatible with the " + "supplied target space. The coefficients have dimension 1 and the targets " + "have dimension 2", ) # Shape mismatch (number of targets) diff --git a/tests/test_sample_pcov_fps.py b/tests/test_sample_pcov_fps.py index 7679abb0a..b6ed08662 100644 --- a/tests/test_sample_pcov_fps.py +++ b/tests/test_sample_pcov_fps.py @@ -24,11 +24,12 @@ def test_restart(self): def test_no_mixing_1(self): """Check that the model throws an error when mixing = 1.0.""" + selector = PCovFPS(n_to_select=1, mixing=1.0) with self.assertRaises(ValueError) as cm: - _ = PCovFPS(n_to_select=1, mixing=1.0) + selector.fit(self.X, y=self.y) self.assertEqual( str(cm.exception), - "Mixing = 1.0 corresponds to traditional FPS." "Please use the FPS class.", + "Mixing = 1.0 corresponds to traditional FPS. Please use the FPS class.", ) From 790eee3680717eb1032f6af5473ac36875431157 Mon Sep 17 00:00:00 2001 From: Philip Loche Date: Thu, 13 Feb 2025 15:24:44 +0100 Subject: [PATCH 10/13] update runners --- .github/workflows/tests.yml | 4 ++-- .readthedocs.yaml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index fe3609798..f8f716183 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -11,8 +11,8 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: [ubuntu-22.04, macos-14, windows-2022] - python-version: ["3.0", "3.13"] + os: [ubuntu-latest, macos-latest, windows-latest] + python-version: ["3.10", "3.13"] steps: - uses: actions/checkout@v4 diff --git a/.readthedocs.yaml b/.readthedocs.yaml index b4c827c42..bee4dd725 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -7,7 +7,7 @@ version: 2 # Set the version of Python and other tools we need build: - os: ubuntu-22.04 + os: ubuntu-lts-latest tools: python: "3.13" From 53ad7b8307945fe5edd3c68cef4303129eb2ac0f Mon Sep 17 00:00:00 2001 From: Philip Loche Date: Thu, 13 Feb 2025 16:33:16 +0100 Subject: [PATCH 11/13] fix docs --- examples/pcovr/PCovR-WHODataset.py | 6 ++++-- examples/pcovr/PCovR_Regressors.py | 3 +-- examples/pcovr/PCovR_Scaling.py | 2 +- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/examples/pcovr/PCovR-WHODataset.py b/examples/pcovr/PCovR-WHODataset.py index 4a1ecf865..a86482fa1 100644 --- a/examples/pcovr/PCovR-WHODataset.py +++ b/examples/pcovr/PCovR-WHODataset.py @@ -109,7 +109,7 @@ r_pcovr = Ridge(alpha=1e-4, fit_intercept=False, random_state=0).fit( T_train_pcovr, y_train ) -yp_pcovr = r_pcovr.predict(T_test_pcovr) +yp_pcovr = r_pcovr.predict(T_test_pcovr).reshape(-1,1) plt.scatter(y_scaler.inverse_transform(y_test), y_scaler.inverse_transform(yp_pcovr)) r_pcovr.score(T_test_pcovr, y_test) @@ -128,7 +128,7 @@ T_pca = pca.transform(X) r_pca = Ridge(alpha=1e-4, fit_intercept=False, random_state=0).fit(T_train_pca, y_train) -yp_pca = r_pca.predict(T_test_pca) +yp_pca = r_pca.predict(T_test_pca).reshape(-1,1) plt.scatter(y_scaler.inverse_transform(y_test), y_scaler.inverse_transform(yp_pca)) r_pca.score(T_test_pca, y_test) @@ -312,3 +312,5 @@ def add_subplot(ax, axy, T, yp, let=""): "Linear and Kernel PCovR for Predicting Life Expectancy", y=0.925, fontsize=10 ) plt.show() + +# %% diff --git a/examples/pcovr/PCovR_Regressors.py b/examples/pcovr/PCovR_Regressors.py index 777009d56..72a122e61 100644 --- a/examples/pcovr/PCovR_Regressors.py +++ b/examples/pcovr/PCovR_Regressors.py @@ -24,13 +24,12 @@ mixing = 0.5 X, y = load_diabetes(return_X_y=True) -y = y.reshape(X.shape[0], -1) X_scaler = StandardScaler() X_scaled = X_scaler.fit_transform(X) y_scaler = StandardScaler() -y_scaled = y_scaler.fit_transform(y) +y_scaled = y_scaler.fit_transform(y.reshape(-1, 1)).ravel() # %% diff --git a/examples/pcovr/PCovR_Scaling.py b/examples/pcovr/PCovR_Scaling.py index 11858ead8..8eb75471e 100644 --- a/examples/pcovr/PCovR_Scaling.py +++ b/examples/pcovr/PCovR_Scaling.py @@ -78,7 +78,7 @@ ax1_Y.set_title("Regression\nWithout Scaling") ax2_Y.scatter( - Yp_scaled, y, c=np.abs(y.flatten() - Yp_scaled.flatten()), cmap="bone_r", ec="k" + Yp_scaled, y, c=np.abs(y.ravel() - Yp_scaled.ravel()), cmap="bone_r", ec="k" ) ax2_Y.plot(ax2_Y.get_xlim(), ax2_Y.get_xlim(), "r--") ax2_Y.set_xlabel("True Y, unscaled") From 5101e66c5aa9c960edba42a625f35dc1979f1689 Mon Sep 17 00:00:00 2001 From: Philip Loche Date: Thu, 13 Feb 2025 16:37:36 +0100 Subject: [PATCH 12/13] lint plus warnings as errors --- examples/pcovr/PCovR-WHODataset.py | 4 ++-- pyproject.toml | 5 +++++ tox.ini | 2 +- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/examples/pcovr/PCovR-WHODataset.py b/examples/pcovr/PCovR-WHODataset.py index a86482fa1..39146f2a8 100644 --- a/examples/pcovr/PCovR-WHODataset.py +++ b/examples/pcovr/PCovR-WHODataset.py @@ -109,7 +109,7 @@ r_pcovr = Ridge(alpha=1e-4, fit_intercept=False, random_state=0).fit( T_train_pcovr, y_train ) -yp_pcovr = r_pcovr.predict(T_test_pcovr).reshape(-1,1) +yp_pcovr = r_pcovr.predict(T_test_pcovr).reshape(-1, 1) plt.scatter(y_scaler.inverse_transform(y_test), y_scaler.inverse_transform(yp_pcovr)) r_pcovr.score(T_test_pcovr, y_test) @@ -128,7 +128,7 @@ T_pca = pca.transform(X) r_pca = Ridge(alpha=1e-4, fit_intercept=False, random_state=0).fit(T_train_pca, y_train) -yp_pca = r_pca.predict(T_test_pca).reshape(-1,1) +yp_pca = r_pca.predict(T_test_pca).reshape(-1, 1) plt.scatter(y_scaler.inverse_transform(y_test), y_scaler.inverse_transform(yp_pca)) r_pca.score(T_test_pca, y_test) diff --git a/pyproject.toml b/pyproject.toml index 374d06293..eec3f58c2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -84,6 +84,11 @@ include_trailing_comma = true lines_after_imports = 2 known_first_party = "skmatter" +[tool.pytest.ini_options] +testpaths = ["tests"] +addopts = ["--cov"] +filterwarnings = ["error"] + [tool.ruff] exclude = ["docs/src/examples/"] lint.ignore = [ diff --git a/tox.ini b/tox.ini index 2c5c64764..d1eda0fa1 100644 --- a/tox.ini +++ b/tox.ini @@ -42,7 +42,7 @@ deps = commands = # Run unit tests - pytest --cov {posargs} + pytest {posargs} # Run documentation tests pytest --doctest-modules --pyargs skmatter {posargs} From f8c2cc8e3e0a2bca72cb51b9808fa674d61e27f3 Mon Sep 17 00:00:00 2001 From: Philip Loche Date: Thu, 13 Feb 2025 16:39:53 +0100 Subject: [PATCH 13/13] update infra --- tox.ini | 1 - 1 file changed, 1 deletion(-) diff --git a/tox.ini b/tox.ini index d1eda0fa1..c1bcac3a6 100644 --- a/tox.ini +++ b/tox.ini @@ -96,5 +96,4 @@ deps = # The documentation runs "examples" to produce outputs via sphinx-gallery. extras = examples commands = - sphinx-build {posargs:-E} -W -b doctest docs/src docs/build/doctest sphinx-build {posargs:-E} -W -b html docs/src docs/build/html