scikit-learn-contrib · cajchristian · Feb 14, 2025 · Feb 14, 2025 · Feb 14, 2025 · PicoCentauri
diff --git a/src/skmatter/_selection.py b/src/skmatter/_selection.py
@@ -209,8 +209,7 @@ def fit(self, X, y=None, warm_start=False):
         params = dict(ensure_min_samples=2, ensure_min_features=2, dtype=FLOAT_DTYPES)
 
         if hasattr(self, "mixing") or y is not None:
-            X, y = self._validate_data(X, y, **params)
-            X, y = validate_data(self, X, y, multi_output=True)
+            X, y = validate_data(self, X, y, multi_output=True, **params)
 
             if len(y.shape) == 1:
                 # force y to have multi_output 2D format even when it's 1D, since
@@ -569,7 +568,10 @@ def score(self, X, y=None):
         score : numpy.ndarray of (n_to_select_from_)
             :math:`\pi` importance for the given samples or features
         """
-        validate_data(self, X, y, reset=False)  # present for API consistency
+        if y is not None:
+            validate_data(self, X, y.ravel(), reset=False)
+        else:
+            validate_data(self, X, reset=False)  # present for API consistency
         return self.pi_
 
     def _init_greedy_search(self, X, y, n_to_select):
@@ -744,7 +746,10 @@ def score(self, X, y=None):
         score : numpy.ndarray of (n_to_select_from_)
             :math:`\pi` importance for the given samples or features
         """
-        validate_data(self, X, y, reset=False)  # present for API consistency
+        if y is not None:
+            validate_data(self, X, y.ravel(), reset=False)
+        else:
+            validate_data(self, X, reset=False)  # present for API consistency
         return self.pi_
 
     def _init_greedy_search(self, X, y, n_to_select):
@@ -938,7 +943,10 @@ def score(self, X, y=None):
         -------
         hausdorff : Hausdorff distances
         """
-        validate_data(self, X, y, reset=False)
+        if y is not None:
+            validate_data(self, X, y.ravel(), reset=False)
+        else:
+            validate_data(self, X, reset=False)
         return self.hausdorff_
 
     def get_distance(self):
@@ -1101,7 +1109,11 @@ def score(self, X, y=None):
         -------
         hausdorff : Hausdorff distances
         """
-        validate_data(self, X, y, reset=False)
+        if y is not None:
+            validate_data(self, X, y.ravel(), reset=False)
+        else:
+            validate_data(self, X, reset=False)
+
         return self.hausdorff_
 
     def get_distance(self):

diff --git a/src/skmatter/decomposition/_kernel_pcovr.py b/src/skmatter/decomposition/_kernel_pcovr.py
@@ -12,7 +12,7 @@
 from sklearn.utils import check_random_state
 from sklearn.utils._arpack import _init_arpack_v0
 from sklearn.utils.extmath import randomized_svd, stable_cumsum, svd_flip
-from sklearn.utils.validation import check_is_fitted, validate_data
+from sklearn.utils.validation import check_is_fitted, validate_data, _check_n_features
 
 from ..preprocessing import KernelNormalizer
 from ..utils import check_krr_fit, pcovr_kernel
@@ -347,7 +347,7 @@ def fit(self, X, Y, W=None):
             except NotFittedError:
                 self.regressor_.set_params(**regressor.get_params())
                 self.regressor_.X_fit_ = self.X_fit_
-                self.regressor_._check_n_features(self.X_fit_, reset=True)
+                _check_n_features(self.regressor_, self.X_fit_, reset=True)
         else:
             Yhat = Y.copy()
             if W is None:

diff --git a/src/skmatter/linear_model/_base.py b/src/skmatter/linear_model/_base.py
@@ -1,12 +1,12 @@
 import numpy as np
 from scipy.linalg import orthogonal_procrustes
-from sklearn.base import MultiOutputMixin, RegressorMixin
+from sklearn.base import MultiOutputMixin, RegressorMixin, BaseEstimator
 from sklearn.linear_model import LinearRegression
 from sklearn.utils import check_array, check_X_y
 from sklearn.utils.validation import check_is_fitted
 
 
-class OrthogonalRegression(MultiOutputMixin, RegressorMixin):
+class OrthogonalRegression(MultiOutputMixin, RegressorMixin, BaseEstimator):
     r"""Orthogonal regression by solving the Procrustes problem
 
     Linear regression with the additional constraint that the weight matrix

diff --git a/src/skmatter/linear_model/_ridge.py b/src/skmatter/linear_model/_ridge.py
@@ -170,7 +170,7 @@ def fit(self, X, y):
                 "[0,1)"
             )
 
-        X, y = self._validate_data(X, y, y_numeric=True, multi_output=True)
+        X, y = validate_data(self, X, y, y_numeric=True, multi_output=True)
         self.n_samples_in_, self.n_features_in_ = X.shape
 
         # check_scoring uses estimators scoring function if the scorer is None, this is

diff --git a/src/skmatter/preprocessing/_data.py b/src/skmatter/preprocessing/_data.py
@@ -1,7 +1,12 @@
 import numpy as np
 from sklearn.base import BaseEstimator, TransformerMixin
 from sklearn.preprocessing._data import KernelCenterer
-from sklearn.utils.validation import FLOAT_DTYPES, _check_sample_weight, check_is_fitted
+from sklearn.utils.validation import (
+    FLOAT_DTYPES,
+    _check_sample_weight,
+    check_is_fitted,
+    validate_data,
+)
 
 
 class StandardFlexibleScaler(TransformerMixin, BaseEstimator):
@@ -128,7 +133,8 @@ def fit(self, X, y=None, sample_weight=None):
         self : object
             Fitted scaler.
         """
-        X = self._validate_data(
+        X = validate_data(
+            self,
             X,
             copy=self.copy,
             estimator=self,
@@ -181,7 +187,8 @@ def transform(self, X, y=None, copy=None):
             Transformed array.
         """
         copy = copy if copy is not None else self.copy
-        X = self._validate_data(
+        X = validate_data(
+            self,
             X,
             reset=False,
             copy=copy,
@@ -298,7 +305,7 @@ def fit(self, K, y=None, sample_weight=None):
         self : object
             Fitted transformer.
         """
-        K = self._validate_data(K, copy=True, dtype=FLOAT_DTYPES, reset=False)
+        K = validate_data(self, K, copy=True, dtype=FLOAT_DTYPES, reset=False)
 
         if sample_weight is not None:
             self.sample_weight_ = _check_sample_weight(sample_weight, K, dtype=K.dtype)
@@ -350,7 +357,7 @@ def transform(self, K, copy=True):
             Transformed array
         """
         check_is_fitted(self)
-        K = self._validate_data(K, copy=copy, dtype=FLOAT_DTYPES, reset=False)
+        K = validate_data(self, K, copy=copy, dtype=FLOAT_DTYPES, reset=False)
 
         if self.with_center:
             K_pred_cols = np.average(K, weights=self.sample_weight_, axis=1)[
@@ -391,7 +398,7 @@ def fit_transform(self, K, y=None, sample_weight=None, copy=True, **fit_params):
         return self.transform(K, copy)
 
 
-class SparseKernelCenterer(TransformerMixin):
+class SparseKernelCenterer(TransformerMixin, BaseEstimator):
     r"""Kernel centering method for sparse kernels, similar to
     :class:`KernelFlexibleCenterer`.
 

diff --git a/src/skmatter/utils/_pcovr_utils.py b/src/skmatter/utils/_pcovr_utils.py
@@ -5,7 +5,7 @@
 from sklearn.exceptions import NotFittedError
 from sklearn.metrics.pairwise import pairwise_kernels
 from sklearn.utils.extmath import randomized_svd
-from sklearn.utils.validation import check_is_fitted
+from sklearn.utils.validation import check_is_fitted, validate_data
 
 
 def check_lr_fit(regressor, X, y):
@@ -39,7 +39,7 @@ def check_lr_fit(regressor, X, y):
         fitted_regressor = deepcopy(regressor)
 
         # Check compatibility with X
-        fitted_regressor._validate_data(X, y, reset=False, multi_output=True)
+        validate_data(fitted_regressor, X, y, reset=False, multi_output=True)
 
         # Check compatibility with y
         if fitted_regressor.coef_.ndim != y.ndim:
@@ -103,7 +103,7 @@ def check_krr_fit(regressor, K, X, y):
         fitted_regressor = deepcopy(regressor)
 
         # Check compatibility with K
-        fitted_regressor._validate_data(X, y, reset=False, multi_output=True)
+        validate_data(fitted_regressor, X, y, reset=False, multi_output=True)
 
         # Check compatibility with y
         if fitted_regressor.dual_coef_.ndim != y.ndim: