Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix sklearn #239

Open
wants to merge 13 commits into
base: main
Choose a base branch
from
2 changes: 1 addition & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.12"
python-version: "3.13"

- name: install tests dependencies
run: python -m pip install tox
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ jobs:
- name: setup Python
uses: actions/setup-python@v5
with:
python-version: "3.12"
python-version: "3.13"

- name: install tests dependencies
run: python -m pip install tox
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ jobs:
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.12"
python-version: "3.13"

- name: install tests dependencies
run: python -m pip install tox
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ jobs:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-22.04, macos-14, windows-2022]
python-version: ["3.9", "3.12"]
os: [ubuntu-latest, macos-latest, windows-latest]
python-version: ["3.10", "3.13"]

steps:
- uses: actions/checkout@v4
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
*.coverage*
*.pyc
*.ipynb_checkpoints*
__pycache__
Expand Down
4 changes: 2 additions & 2 deletions .readthedocs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@ version: 2

# Set the version of Python and other tools we need
build:
os: ubuntu-22.04
os: ubuntu-lts-latest
tools:
python: "3.12"
python: "3.13"

# Build documentation in the docs/ directory with Sphinx
sphinx:
Expand Down
1 change: 1 addition & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ The rules for CHANGELOG file:

0.3.0 (XXXX/XX/XX)
------------------
- Fixed moved function import from scipy and bump scipy dependency to 1.15.0 (#236)
- Fix rendering issues for `SparseKDE` and `QuickShift` (#236)
- Updating ``FPS`` to allow a numpy array of ints as an initialize parameter (#145)
- Supported Python versions are now ranging from 3.9 - 3.12.
Expand Down
6 changes: 4 additions & 2 deletions examples/pcovr/PCovR-WHODataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@
r_pcovr = Ridge(alpha=1e-4, fit_intercept=False, random_state=0).fit(
T_train_pcovr, y_train
)
yp_pcovr = r_pcovr.predict(T_test_pcovr)
yp_pcovr = r_pcovr.predict(T_test_pcovr).reshape(-1, 1)

plt.scatter(y_scaler.inverse_transform(y_test), y_scaler.inverse_transform(yp_pcovr))
r_pcovr.score(T_test_pcovr, y_test)
Expand All @@ -128,7 +128,7 @@
T_pca = pca.transform(X)

r_pca = Ridge(alpha=1e-4, fit_intercept=False, random_state=0).fit(T_train_pca, y_train)
yp_pca = r_pca.predict(T_test_pca)
yp_pca = r_pca.predict(T_test_pca).reshape(-1, 1)

plt.scatter(y_scaler.inverse_transform(y_test), y_scaler.inverse_transform(yp_pca))
r_pca.score(T_test_pca, y_test)
Expand Down Expand Up @@ -312,3 +312,5 @@ def add_subplot(ax, axy, T, yp, let=""):
"Linear and Kernel PCovR for Predicting Life Expectancy", y=0.925, fontsize=10
)
plt.show()

# %%
3 changes: 1 addition & 2 deletions examples/pcovr/PCovR_Regressors.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,12 @@
mixing = 0.5

X, y = load_diabetes(return_X_y=True)
y = y.reshape(X.shape[0], -1)

X_scaler = StandardScaler()
X_scaled = X_scaler.fit_transform(X)

y_scaler = StandardScaler()
y_scaled = y_scaler.fit_transform(y)
y_scaled = y_scaler.fit_transform(y.reshape(-1, 1)).ravel()


# %%
Expand Down
2 changes: 1 addition & 1 deletion examples/pcovr/PCovR_Scaling.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@
ax1_Y.set_title("Regression\nWithout Scaling")

ax2_Y.scatter(
Yp_scaled, y, c=np.abs(y.flatten() - Yp_scaled.flatten()), cmap="bone_r", ec="k"
Yp_scaled, y, c=np.abs(y.ravel() - Yp_scaled.ravel()), cmap="bone_r", ec="k"
)
ax2_Y.plot(ax2_Y.get_xlim(), ax2_Y.get_xlim(), "r--")
ax2_Y.set_xlabel("True Y, unscaled")
Expand Down
14 changes: 8 additions & 6 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ authors = [
{name = "Michele Ceriotti"}
]
readme = "README.rst"
requires-python = ">=3.9"
requires-python = ">=3.10"
license = {text = "BSD-3-Clause"}
classifiers = [
"Development Status :: 4 - Beta",
Expand All @@ -38,8 +38,8 @@ classifiers = [
"Topic :: Scientific/Engineering",
]
dependencies = [
"scikit-learn < 1.6.0",
"scipy < 1.15.0",
"scikit-learn >= 1.6.0",
"scipy >= 1.15.0", # explicit here since need a newer version as scikit-learn
]
dynamic = ["version"]

Expand Down Expand Up @@ -75,9 +75,6 @@ include = [
[tool.coverage.xml]
output = 'tests/coverage.xml'

[tool.pytest.ini_options]
testpaths = "tests"

[tool.isort]
skip = "__init__.py"
profile = "black"
Expand All @@ -87,6 +84,11 @@ include_trailing_comma = true
lines_after_imports = 2
known_first_party = "skmatter"

[tool.pytest.ini_options]
testpaths = ["tests"]
addopts = ["--cov"]
filterwarnings = ["error"]

[tool.ruff]
exclude = ["docs/src/examples/"]
lint.ignore = [
Expand Down
58 changes: 41 additions & 17 deletions src/skmatter/_selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,13 @@
from scipy.sparse.linalg import eigsh
from sklearn.base import BaseEstimator, MetaEstimatorMixin
from sklearn.feature_selection._base import SelectorMixin
from sklearn.utils import check_array, check_random_state, check_X_y, safe_mask
from sklearn.utils.validation import FLOAT_DTYPES, as_float_array, check_is_fitted
from sklearn.utils import check_random_state, safe_mask
from sklearn.utils.validation import (
FLOAT_DTYPES,
as_float_array,
check_is_fitted,
validate_data,
)

from .utils import (
X_orthogonalizer,
Expand Down Expand Up @@ -157,11 +162,6 @@ def __init__(
self.n_to_select = n_to_select
self.score_threshold = score_threshold
self.score_threshold_type = score_threshold_type
if self.score_threshold_type not in ["relative", "absolute"]:
raise ValueError(
"invalid score_threshold_type, expected one of 'relative' or 'absolute'"
)

self.full = full
self.progress_bar = progress_bar
self.random_state = random_state
Expand All @@ -184,6 +184,11 @@ def fit(self, X, y=None, warm_start=False):
-------
self : object
"""
if self.score_threshold_type not in ["relative", "absolute"]:
raise ValueError(
"invalid score_threshold_type, expected one of 'relative' or 'absolute'"
)

if self.selection_type == "feature":
self._axis = 1
elif self.selection_type == "sample":
Expand All @@ -205,7 +210,7 @@ def fit(self, X, y=None, warm_start=False):

if hasattr(self, "mixing") or y is not None:
X, y = self._validate_data(X, y, **params)
X, y = check_X_y(X, y, multi_output=True)
X, y = validate_data(self, X, y, multi_output=True)

if len(y.shape) == 1:
# force y to have multi_output 2D format even when it's 1D, since
Expand All @@ -214,7 +219,7 @@ def fit(self, X, y=None, warm_start=False):
y = y.reshape((len(y), 1))

else:
X = check_array(X, **params)
X = validate_data(self, X, **params)

if self.full and self.score_threshold is not None:
raise ValueError(
Expand Down Expand Up @@ -308,7 +313,7 @@ def transform(self, X, y=None):

mask = self.get_support()

X = check_array(X)
X = validate_data(self, X, reset=False)

if len(X.shape) == 1:
if self._axis == 0:
Expand Down Expand Up @@ -486,6 +491,11 @@ def _more_tags(self):
"requires_y": False,
}

def __sklearn_tags__(self):
tags = super().__sklearn_tags__()
tags.target_tags.required = False
return tags


class _CUR(GreedySelector):
"""Transformer that performs Greedy Selection by choosing features
Expand Down Expand Up @@ -551,15 +561,15 @@ def score(self, X, y=None):

Parameters
----------
X : numpy.ndarray of shape [n_samples, n_features]
The input samples.
X : ignored
y : ignored

Returns
-------
score : numpy.ndarray of (n_to_select_from_)
:math:`\pi` importance for the given samples or features
"""
validate_data(self, X, y, reset=False) # present for API consistency
return self.pi_

def _init_greedy_search(self, X, y, n_to_select):
Expand Down Expand Up @@ -734,6 +744,7 @@ def score(self, X, y=None):
score : numpy.ndarray of (n_to_select_from_)
:math:`\pi` importance for the given samples or features
"""
validate_data(self, X, y, reset=False) # present for API consistency
return self.pi_

def _init_greedy_search(self, X, y, n_to_select):
Expand Down Expand Up @@ -927,6 +938,7 @@ def score(self, X, y=None):
-------
hausdorff : Hausdorff distances
"""
validate_data(self, X, y, reset=False)
return self.hausdorff_

def get_distance(self):
Expand Down Expand Up @@ -1048,11 +1060,6 @@ def __init__(
full=False,
random_state=0,
):
if mixing == 1.0:
raise ValueError(
"Mixing = 1.0 corresponds to traditional FPS."
"Please use the FPS class."
)

self.mixing = mixing
self.initialize = initialize
Expand All @@ -1067,6 +1074,17 @@ def __init__(
random_state=random_state,
)

def fit(self, X, y=None, warm_start=False):
if self.mixing == 1.0:
raise ValueError(
"Mixing = 1.0 corresponds to traditional FPS. Please use the FPS class."
)

return super().fit(X, y)

# docstring is inherited and set from the base class
fit.__doc__ = GreedySelector.fit.__doc__

def score(self, X, y=None):
"""Returns the Hausdorff distances of all samples to previous selections.

Expand All @@ -1083,6 +1101,7 @@ def score(self, X, y=None):
-------
hausdorff : Hausdorff distances
"""
validate_data(self, X, y, reset=False)
return self.hausdorff_

def get_distance(self):
Expand Down Expand Up @@ -1159,3 +1178,8 @@ def _more_tags(self):
return {
"requires_y": True,
}

def __sklearn_tags__(self):
tags = super().__sklearn_tags__()
tags.target_tags.required = True
return tags
18 changes: 9 additions & 9 deletions src/skmatter/decomposition/_kernel_pcovr.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@
from sklearn.kernel_ridge import KernelRidge
from sklearn.linear_model._base import LinearModel
from sklearn.metrics.pairwise import pairwise_kernels
from sklearn.utils import check_array, check_random_state
from sklearn.utils import check_random_state
from sklearn.utils._arpack import _init_arpack_v0
from sklearn.utils.extmath import randomized_svd, stable_cumsum, svd_flip
from sklearn.utils.validation import check_is_fitted, check_X_y
from sklearn.utils.validation import check_is_fitted, validate_data

from ..preprocessing import KernelNormalizer
from ..utils import check_krr_fit, pcovr_kernel
Expand Down Expand Up @@ -270,7 +270,7 @@ def fit(self, X, Y, W=None):
):
raise ValueError("Regressor must be an instance of `KernelRidge`")

X, Y = check_X_y(X, Y, y_numeric=True, multi_output=True)
X, Y = validate_data(self, X, Y, y_numeric=True, multi_output=True)
self.X_fit_ = X.copy()

if self.n_components is None:
Expand Down Expand Up @@ -387,7 +387,7 @@ def predict(self, X=None):
"""Predicts the property values"""
check_is_fitted(self, ["pky_", "pty_"])

X = check_array(X)
X = validate_data(self, X, reset=False)
K = self._get_kernel(X, self.X_fit_)
if self.center:
K = self.centerer_.transform(K)
Expand All @@ -408,7 +408,7 @@ def transform(self, X):
"""
check_is_fitted(self, ["pkt_", "X_fit_"])

X = check_array(X)
X = validate_data(self, X, reset=False)
K = self._get_kernel(X, self.X_fit_)

if self.center:
Expand Down Expand Up @@ -440,7 +440,7 @@ def inverse_transform(self, T):
"""
return T @ self.ptx_

def score(self, X, Y):
def score(self, X, y):
r"""Computes the (negative) loss values for KernelPCovR on the given predictor
and response variables. The loss in :math:`\mathbf{K}`, as explained in
[Helfrecht2020]_ does not correspond to a traditional Gram loss
Expand Down Expand Up @@ -474,7 +474,7 @@ def score(self, X, Y):
"""
check_is_fitted(self, ["pkt_", "X_fit_"])

X = check_array(X)
X = validate_data(self, X, reset=False)

K_NN = self._get_kernel(self.X_fit_, self.X_fit_)
K_VN = self._get_kernel(X, self.X_fit_)
Expand All @@ -485,8 +485,8 @@ def score(self, X, Y):
K_VN = self.centerer_.transform(K_VN)
K_VV = self.centerer_.transform(K_VV)

y = K_VN @ self.pky_
Lkrr = np.linalg.norm(Y - y) ** 2 / np.linalg.norm(Y) ** 2
ypred = K_VN @ self.pky_
Lkrr = np.linalg.norm(y - ypred) ** 2 / np.linalg.norm(y) ** 2

t_n = K_NN @ self.pkt_
t_v = K_VN @ self.pkt_
Expand Down
Loading
Loading