Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updates to fix sklearn branch #241

Draft
wants to merge 3 commits into
base: fix-sklearn
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 18 additions & 6 deletions src/skmatter/_selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,8 +209,7 @@ def fit(self, X, y=None, warm_start=False):
params = dict(ensure_min_samples=2, ensure_min_features=2, dtype=FLOAT_DTYPES)

if hasattr(self, "mixing") or y is not None:
X, y = self._validate_data(X, y, **params)
X, y = validate_data(self, X, y, multi_output=True)
X, y = validate_data(self, X, y, multi_output=True, **params)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we maybe also use .ravel() here or multi_output=True in the other validate_data calls?


if len(y.shape) == 1:
# force y to have multi_output 2D format even when it's 1D, since
Expand Down Expand Up @@ -569,7 +568,10 @@ def score(self, X, y=None):
score : numpy.ndarray of (n_to_select_from_)
:math:`\pi` importance for the given samples or features
"""
validate_data(self, X, y, reset=False) # present for API consistency
if y is not None:
validate_data(self, X, y.ravel(), reset=False)
else:
validate_data(self, X, reset=False) # present for API consistency
return self.pi_

def _init_greedy_search(self, X, y, n_to_select):
Expand Down Expand Up @@ -744,7 +746,10 @@ def score(self, X, y=None):
score : numpy.ndarray of (n_to_select_from_)
:math:`\pi` importance for the given samples or features
"""
validate_data(self, X, y, reset=False) # present for API consistency
if y is not None:
validate_data(self, X, y.ravel(), reset=False)
else:
validate_data(self, X, reset=False) # present for API consistency
return self.pi_

def _init_greedy_search(self, X, y, n_to_select):
Expand Down Expand Up @@ -938,7 +943,10 @@ def score(self, X, y=None):
-------
hausdorff : Hausdorff distances
"""
validate_data(self, X, y, reset=False)
if y is not None:
validate_data(self, X, y.ravel(), reset=False)
else:
validate_data(self, X, reset=False)
return self.hausdorff_

def get_distance(self):
Expand Down Expand Up @@ -1101,7 +1109,11 @@ def score(self, X, y=None):
-------
hausdorff : Hausdorff distances
"""
validate_data(self, X, y, reset=False)
if y is not None:
validate_data(self, X, y.ravel(), reset=False)
else:
validate_data(self, X, reset=False)

return self.hausdorff_

def get_distance(self):
Expand Down
4 changes: 2 additions & 2 deletions src/skmatter/decomposition/_kernel_pcovr.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from sklearn.utils import check_random_state
from sklearn.utils._arpack import _init_arpack_v0
from sklearn.utils.extmath import randomized_svd, stable_cumsum, svd_flip
from sklearn.utils.validation import check_is_fitted, validate_data
from sklearn.utils.validation import check_is_fitted, validate_data, _check_n_features

from ..preprocessing import KernelNormalizer
from ..utils import check_krr_fit, pcovr_kernel
Expand Down Expand Up @@ -347,7 +347,7 @@ def fit(self, X, Y, W=None):
except NotFittedError:
self.regressor_.set_params(**regressor.get_params())
self.regressor_.X_fit_ = self.X_fit_
self.regressor_._check_n_features(self.X_fit_, reset=True)
_check_n_features(self.regressor_, self.X_fit_, reset=True)
else:
Yhat = Y.copy()
if W is None:
Expand Down
4 changes: 2 additions & 2 deletions src/skmatter/linear_model/_base.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import numpy as np
from scipy.linalg import orthogonal_procrustes
from sklearn.base import MultiOutputMixin, RegressorMixin
from sklearn.base import MultiOutputMixin, RegressorMixin, BaseEstimator
from sklearn.linear_model import LinearRegression
from sklearn.utils import check_array, check_X_y
from sklearn.utils.validation import check_is_fitted


class OrthogonalRegression(MultiOutputMixin, RegressorMixin):
class OrthogonalRegression(MultiOutputMixin, RegressorMixin, BaseEstimator):
r"""Orthogonal regression by solving the Procrustes problem

Linear regression with the additional constraint that the weight matrix
Expand Down
2 changes: 1 addition & 1 deletion src/skmatter/linear_model/_ridge.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ def fit(self, X, y):
"[0,1)"
)

X, y = self._validate_data(X, y, y_numeric=True, multi_output=True)
X, y = validate_data(self, X, y, y_numeric=True, multi_output=True)
self.n_samples_in_, self.n_features_in_ = X.shape

# check_scoring uses estimators scoring function if the scorer is None, this is
Expand Down
19 changes: 13 additions & 6 deletions src/skmatter/preprocessing/_data.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.preprocessing._data import KernelCenterer
from sklearn.utils.validation import FLOAT_DTYPES, _check_sample_weight, check_is_fitted
from sklearn.utils.validation import (
FLOAT_DTYPES,
_check_sample_weight,
check_is_fitted,
validate_data,
)


class StandardFlexibleScaler(TransformerMixin, BaseEstimator):
Expand Down Expand Up @@ -128,7 +133,8 @@ def fit(self, X, y=None, sample_weight=None):
self : object
Fitted scaler.
"""
X = self._validate_data(
X = validate_data(
self,
X,
copy=self.copy,
estimator=self,
Expand Down Expand Up @@ -181,7 +187,8 @@ def transform(self, X, y=None, copy=None):
Transformed array.
"""
copy = copy if copy is not None else self.copy
X = self._validate_data(
X = validate_data(
self,
X,
reset=False,
copy=copy,
Expand Down Expand Up @@ -298,7 +305,7 @@ def fit(self, K, y=None, sample_weight=None):
self : object
Fitted transformer.
"""
K = self._validate_data(K, copy=True, dtype=FLOAT_DTYPES, reset=False)
K = validate_data(self, K, copy=True, dtype=FLOAT_DTYPES, reset=False)

if sample_weight is not None:
self.sample_weight_ = _check_sample_weight(sample_weight, K, dtype=K.dtype)
Expand Down Expand Up @@ -350,7 +357,7 @@ def transform(self, K, copy=True):
Transformed array
"""
check_is_fitted(self)
K = self._validate_data(K, copy=copy, dtype=FLOAT_DTYPES, reset=False)
K = validate_data(self, K, copy=copy, dtype=FLOAT_DTYPES, reset=False)

if self.with_center:
K_pred_cols = np.average(K, weights=self.sample_weight_, axis=1)[
Expand Down Expand Up @@ -391,7 +398,7 @@ def fit_transform(self, K, y=None, sample_weight=None, copy=True, **fit_params):
return self.transform(K, copy)


class SparseKernelCenterer(TransformerMixin):
class SparseKernelCenterer(TransformerMixin, BaseEstimator):
r"""Kernel centering method for sparse kernels, similar to
:class:`KernelFlexibleCenterer`.

Expand Down
6 changes: 3 additions & 3 deletions src/skmatter/utils/_pcovr_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from sklearn.exceptions import NotFittedError
from sklearn.metrics.pairwise import pairwise_kernels
from sklearn.utils.extmath import randomized_svd
from sklearn.utils.validation import check_is_fitted
from sklearn.utils.validation import check_is_fitted, validate_data


def check_lr_fit(regressor, X, y):
Expand Down Expand Up @@ -39,7 +39,7 @@ def check_lr_fit(regressor, X, y):
fitted_regressor = deepcopy(regressor)

# Check compatibility with X
fitted_regressor._validate_data(X, y, reset=False, multi_output=True)
validate_data(fitted_regressor, X, y, reset=False, multi_output=True)

# Check compatibility with y
if fitted_regressor.coef_.ndim != y.ndim:
Expand Down Expand Up @@ -103,7 +103,7 @@ def check_krr_fit(regressor, K, X, y):
fitted_regressor = deepcopy(regressor)

# Check compatibility with K
fitted_regressor._validate_data(X, y, reset=False, multi_output=True)
validate_data(fitted_regressor, X, y, reset=False, multi_output=True)

# Check compatibility with y
if fitted_regressor.dual_coef_.ndim != y.ndim:
Expand Down
Loading