scikit-learn-contrib · PicoCentauri · Feb 6, 2025 · Feb 7, 2025 · Feb 7, 2025 · Feb 7, 2025
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -15,7 +15,7 @@ jobs:
     - name: Set up Python
       uses: actions/setup-python@v5
       with:
-        python-version: "3.12"
+        python-version: "3.13"
 
     - name: install tests dependencies
       run: python -m pip install tox

diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
@@ -16,7 +16,7 @@ jobs:
       - name: setup Python
         uses: actions/setup-python@v5
         with:
-          python-version: "3.12"
+          python-version: "3.13"
 
       - name: install tests dependencies
         run: python -m pip install tox

diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
@@ -14,7 +14,7 @@ jobs:
     - name: Set up Python
       uses: actions/setup-python@v5
       with:
-        python-version: "3.12"
+        python-version: "3.13"
 
     - name: install tests dependencies
       run: python -m pip install tox

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -11,8 +11,8 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        os: [ubuntu-22.04, macos-14, windows-2022]
-        python-version: ["3.9", "3.12"]
+        os: [ubuntu-latest, macos-latest, windows-latest]
+        python-version: ["3.10", "3.13"]
 
     steps:
     - uses: actions/checkout@v4

diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,4 @@
+*.coverage*
 *.pyc
 *.ipynb_checkpoints*
 __pycache__

diff --git a/.readthedocs.yaml b/.readthedocs.yaml
@@ -7,9 +7,9 @@ version: 2
 
 # Set the version of Python and other tools we need
 build:
-  os: ubuntu-22.04
+  os: ubuntu-lts-latest
   tools:
-    python: "3.12"
+    python: "3.13"
 
 # Build documentation in the docs/ directory with Sphinx
 sphinx:

diff --git a/CHANGELOG b/CHANGELOG
@@ -13,6 +13,7 @@ The rules for CHANGELOG file:
 
 0.3.0 (XXXX/XX/XX)
 ------------------
+- Fixed moved function import from scipy and bump scipy dependency to 1.15.0 (#236)
 - Fix rendering issues for `SparseKDE` and `QuickShift` (#236)
 - Updating ``FPS`` to allow a numpy array of ints as an initialize parameter (#145)
 - Supported Python versions are now ranging from 3.9 - 3.12.

diff --git a/examples/pcovr/PCovR-WHODataset.py b/examples/pcovr/PCovR-WHODataset.py
@@ -109,7 +109,7 @@
 r_pcovr = Ridge(alpha=1e-4, fit_intercept=False, random_state=0).fit(
     T_train_pcovr, y_train
 )
-yp_pcovr = r_pcovr.predict(T_test_pcovr)
+yp_pcovr = r_pcovr.predict(T_test_pcovr).reshape(-1, 1)
 
 plt.scatter(y_scaler.inverse_transform(y_test), y_scaler.inverse_transform(yp_pcovr))
 r_pcovr.score(T_test_pcovr, y_test)
@@ -128,7 +128,7 @@
 T_pca = pca.transform(X)
 
 r_pca = Ridge(alpha=1e-4, fit_intercept=False, random_state=0).fit(T_train_pca, y_train)
-yp_pca = r_pca.predict(T_test_pca)
+yp_pca = r_pca.predict(T_test_pca).reshape(-1, 1)
 
 plt.scatter(y_scaler.inverse_transform(y_test), y_scaler.inverse_transform(yp_pca))
 r_pca.score(T_test_pca, y_test)
@@ -312,3 +312,5 @@ def add_subplot(ax, axy, T, yp, let=""):
     "Linear and Kernel PCovR for Predicting Life Expectancy", y=0.925, fontsize=10
 )
 plt.show()
+
+# %%
diff --git a/examples/pcovr/PCovR_Regressors.py b/examples/pcovr/PCovR_Regressors.py
@@ -24,13 +24,12 @@
 mixing = 0.5
 
 X, y = load_diabetes(return_X_y=True)
-y = y.reshape(X.shape[0], -1)
 
 X_scaler = StandardScaler()
 X_scaled = X_scaler.fit_transform(X)
 
 y_scaler = StandardScaler()
-y_scaled = y_scaler.fit_transform(y)
+y_scaled = y_scaler.fit_transform(y.reshape(-1, 1)).ravel()
 
 
 # %%

diff --git a/examples/pcovr/PCovR_Scaling.py b/examples/pcovr/PCovR_Scaling.py
@@ -78,7 +78,7 @@
 ax1_Y.set_title("Regression\nWithout Scaling")
 
 ax2_Y.scatter(
-    Yp_scaled, y, c=np.abs(y.flatten() - Yp_scaled.flatten()), cmap="bone_r", ec="k"
+    Yp_scaled, y, c=np.abs(y.ravel() - Yp_scaled.ravel()), cmap="bone_r", ec="k"
 )
 ax2_Y.plot(ax2_Y.get_xlim(), ax2_Y.get_xlim(), "r--")
 ax2_Y.set_xlabel("True Y, unscaled")

diff --git a/pyproject.toml b/pyproject.toml
@@ -19,7 +19,7 @@ authors = [
     {name = "Michele Ceriotti"}
 ]
 readme = "README.rst"
-requires-python = ">=3.9"
+requires-python = ">=3.10"
 license = {text = "BSD-3-Clause"}
 classifiers = [
     "Development Status :: 4 - Beta",
@@ -38,8 +38,8 @@ classifiers = [
     "Topic :: Scientific/Engineering",
 ]
 dependencies = [
-    "scikit-learn < 1.6.0",
-    "scipy < 1.15.0",
+    "scikit-learn >= 1.6.0",
+    "scipy >= 1.15.0",  # explicit here since need a newer version as scikit-learn
 ]
 dynamic = ["version"]
 
@@ -75,9 +75,6 @@ include = [
 [tool.coverage.xml]
 output = 'tests/coverage.xml'
 
-[tool.pytest.ini_options]
-testpaths = "tests"
-
 [tool.isort]
 skip = "__init__.py"
 profile = "black"
@@ -87,6 +84,11 @@ include_trailing_comma = true
 lines_after_imports = 2
 known_first_party = "skmatter"
 
+[tool.pytest.ini_options]
+testpaths = ["tests"]
+addopts = ["--cov"]
+filterwarnings = ["error"]
+
 [tool.ruff]
 exclude = ["docs/src/examples/"]
 lint.ignore = [

diff --git a/src/skmatter/_selection.py b/src/skmatter/_selection.py
@@ -83,8 +83,13 @@
 from scipy.sparse.linalg import eigsh
 from sklearn.base import BaseEstimator, MetaEstimatorMixin
 from sklearn.feature_selection._base import SelectorMixin
-from sklearn.utils import check_array, check_random_state, check_X_y, safe_mask
-from sklearn.utils.validation import FLOAT_DTYPES, as_float_array, check_is_fitted
+from sklearn.utils import check_random_state, safe_mask
+from sklearn.utils.validation import (
+    FLOAT_DTYPES,
+    as_float_array,
+    check_is_fitted,
+    validate_data,
+)
 
 from .utils import (
     X_orthogonalizer,
@@ -157,11 +162,6 @@ def __init__(
         self.n_to_select = n_to_select
         self.score_threshold = score_threshold
         self.score_threshold_type = score_threshold_type
-        if self.score_threshold_type not in ["relative", "absolute"]:
-            raise ValueError(
-                "invalid score_threshold_type, expected one of 'relative' or 'absolute'"
-            )
-
         self.full = full
         self.progress_bar = progress_bar
         self.random_state = random_state
@@ -184,6 +184,11 @@ def fit(self, X, y=None, warm_start=False):
         -------
         self : object
         """
+        if self.score_threshold_type not in ["relative", "absolute"]:
+            raise ValueError(
+                "invalid score_threshold_type, expected one of 'relative' or 'absolute'"
+            )
+
         if self.selection_type == "feature":
             self._axis = 1
         elif self.selection_type == "sample":
@@ -205,7 +210,7 @@ def fit(self, X, y=None, warm_start=False):
 
         if hasattr(self, "mixing") or y is not None:
             X, y = self._validate_data(X, y, **params)
-            X, y = check_X_y(X, y, multi_output=True)
+            X, y = validate_data(self, X, y, multi_output=True)
 
             if len(y.shape) == 1:
                 # force y to have multi_output 2D format even when it's 1D, since
@@ -214,7 +219,7 @@ def fit(self, X, y=None, warm_start=False):
                 y = y.reshape((len(y), 1))
 
         else:
-            X = check_array(X, **params)
+            X = validate_data(self, X, **params)
 
         if self.full and self.score_threshold is not None:
             raise ValueError(
@@ -308,7 +313,7 @@ def transform(self, X, y=None):
 
         mask = self.get_support()
 
-        X = check_array(X)
+        X = validate_data(self, X, reset=False)
 
         if len(X.shape) == 1:
             if self._axis == 0:
@@ -486,6 +491,11 @@ def _more_tags(self):
             "requires_y": False,
         }
 
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        tags.target_tags.required = False
+        return tags
+
 
 class _CUR(GreedySelector):
     """Transformer that performs Greedy Selection by choosing features
@@ -551,15 +561,15 @@ def score(self, X, y=None):
 
         Parameters
         ----------
-        X : numpy.ndarray of shape [n_samples, n_features]
-            The input samples.
+        X : ignored
         y : ignored
 
         Returns
         -------
         score : numpy.ndarray of (n_to_select_from_)
             :math:`\pi` importance for the given samples or features
         """
+        validate_data(self, X, y, reset=False)  # present for API consistency
         return self.pi_
 
     def _init_greedy_search(self, X, y, n_to_select):
@@ -734,6 +744,7 @@ def score(self, X, y=None):
         score : numpy.ndarray of (n_to_select_from_)
             :math:`\pi` importance for the given samples or features
         """
+        validate_data(self, X, y, reset=False)  # present for API consistency
         return self.pi_
 
     def _init_greedy_search(self, X, y, n_to_select):
@@ -927,6 +938,7 @@ def score(self, X, y=None):
         -------
         hausdorff : Hausdorff distances
         """
+        validate_data(self, X, y, reset=False)
         return self.hausdorff_
 
     def get_distance(self):
@@ -1048,11 +1060,6 @@ def __init__(
         full=False,
         random_state=0,
     ):
-        if mixing == 1.0:
-            raise ValueError(
-                "Mixing = 1.0 corresponds to traditional FPS."
-                "Please use the FPS class."
-            )
 
         self.mixing = mixing
         self.initialize = initialize
@@ -1067,6 +1074,17 @@ def __init__(
             random_state=random_state,
         )
 
+    def fit(self, X, y=None, warm_start=False):
+        if self.mixing == 1.0:
+            raise ValueError(
+                "Mixing = 1.0 corresponds to traditional FPS. Please use the FPS class."
+            )
+
+        return super().fit(X, y)
+
+    # docstring is inherited and set from the base class
+    fit.__doc__ = GreedySelector.fit.__doc__
+
     def score(self, X, y=None):
         """Returns the Hausdorff distances of all samples to previous selections.
 
@@ -1083,6 +1101,7 @@ def score(self, X, y=None):
         -------
         hausdorff : Hausdorff distances
         """
+        validate_data(self, X, y, reset=False)
         return self.hausdorff_
 
     def get_distance(self):
@@ -1159,3 +1178,8 @@ def _more_tags(self):
         return {
             "requires_y": True,
         }
+
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        tags.target_tags.required = True
+        return tags
diff --git a/src/skmatter/decomposition/_kernel_pcovr.py b/src/skmatter/decomposition/_kernel_pcovr.py
@@ -9,10 +9,10 @@
 from sklearn.kernel_ridge import KernelRidge
 from sklearn.linear_model._base import LinearModel
 from sklearn.metrics.pairwise import pairwise_kernels
-from sklearn.utils import check_array, check_random_state
+from sklearn.utils import check_random_state
 from sklearn.utils._arpack import _init_arpack_v0
 from sklearn.utils.extmath import randomized_svd, stable_cumsum, svd_flip
-from sklearn.utils.validation import check_is_fitted, check_X_y
+from sklearn.utils.validation import check_is_fitted, validate_data
 
 from ..preprocessing import KernelNormalizer
 from ..utils import check_krr_fit, pcovr_kernel
@@ -270,7 +270,7 @@ def fit(self, X, Y, W=None):
         ):
             raise ValueError("Regressor must be an instance of `KernelRidge`")
 
-        X, Y = check_X_y(X, Y, y_numeric=True, multi_output=True)
+        X, Y = validate_data(self, X, Y, y_numeric=True, multi_output=True)
         self.X_fit_ = X.copy()
 
         if self.n_components is None:
@@ -387,7 +387,7 @@ def predict(self, X=None):
         """Predicts the property values"""
         check_is_fitted(self, ["pky_", "pty_"])
 
-        X = check_array(X)
+        X = validate_data(self, X, reset=False)
         K = self._get_kernel(X, self.X_fit_)
         if self.center:
             K = self.centerer_.transform(K)
@@ -408,7 +408,7 @@ def transform(self, X):
         """
         check_is_fitted(self, ["pkt_", "X_fit_"])
 
-        X = check_array(X)
+        X = validate_data(self, X, reset=False)
         K = self._get_kernel(X, self.X_fit_)
 
         if self.center:
@@ -440,7 +440,7 @@ def inverse_transform(self, T):
         """
         return T @ self.ptx_
 
-    def score(self, X, Y):
+    def score(self, X, y):
         r"""Computes the (negative) loss values for KernelPCovR on the given predictor
         and response variables. The loss in :math:`\mathbf{K}`, as explained in
         [Helfrecht2020]_ does not correspond to a traditional Gram loss
@@ -474,7 +474,7 @@ def score(self, X, Y):
         """
         check_is_fitted(self, ["pkt_", "X_fit_"])
 
-        X = check_array(X)
+        X = validate_data(self, X, reset=False)
 
         K_NN = self._get_kernel(self.X_fit_, self.X_fit_)
         K_VN = self._get_kernel(X, self.X_fit_)
@@ -485,8 +485,8 @@ def score(self, X, Y):
             K_VN = self.centerer_.transform(K_VN)
             K_VV = self.centerer_.transform(K_VV)
 
-        y = K_VN @ self.pky_
-        Lkrr = np.linalg.norm(Y - y) ** 2 / np.linalg.norm(Y) ** 2
+        ypred = K_VN @ self.pky_
+        Lkrr = np.linalg.norm(y - ypred) ** 2 / np.linalg.norm(y) ** 2
 
         t_n = K_NN @ self.pkt_
         t_v = K_VN @ self.pkt_