diff --git a/.github/workflows/cronjob_unit_tests.yml b/.github/workflows/cronjob_unit_tests.yml index 11fb4f67..c9652be9 100644 --- a/.github/workflows/cronjob_unit_tests.yml +++ b/.github/workflows/cronjob_unit_tests.yml @@ -24,7 +24,7 @@ jobs: - build: windows os: windows-latest SKIP_LIGHTGBM: False - python-version: [3.8, 3.9, "3.10", "3.11"] + python-version: [3.8, 3.9, "3.10", "3.11", "3.12"] steps: - uses: actions/checkout@master diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml index 080c8236..65bf2c51 100644 --- a/.github/workflows/unit_tests.yml +++ b/.github/workflows/unit_tests.yml @@ -23,7 +23,7 @@ jobs: - build: windows os: windows-latest SKIP_LIGHTGBM: False - python-version: [3.8, 3.9, "3.10", "3.11"] + python-version: [3.8, 3.9, "3.10", "3.11", "3.12"] steps: - uses: actions/checkout@master diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 34371413..68a7a35d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -35,7 +35,7 @@ repos: hooks: - id: ruff-check name: 'Ruff: Check for errors, styling issues and complexity, and fixes issues if possible (including import order)' - entry: ruff + entry: ruff check language: system args: [ --fix, --no-cache ] - id: ruff-format diff --git a/docs/discussion/contributing.md b/docs/discussion/contributing.md deleted file mode 100644 index e079654f..00000000 --- a/docs/discussion/contributing.md +++ /dev/null @@ -1 +0,0 @@ ---8<-- "CONTRIBUTING.md" \ No newline at end of file diff --git a/docs/discussion/vision.md b/docs/discussion/vision.md deleted file mode 100644 index f0fc236e..00000000 --- a/docs/discussion/vision.md +++ /dev/null @@ -1 +0,0 @@ ---8<-- "VISION.md" \ No newline at end of file diff --git a/probatus/utils/shap_helpers.py b/probatus/utils/shap_helpers.py index 9a8c57b3..88585224 100644 --- a/probatus/utils/shap_helpers.py +++ b/probatus/utils/shap_helpers.py @@ -23,7 +23,7 @@ import numpy as np import pandas as pd from shap import Explainer -from shap.explainers._tree import Tree +from shap.explainers import TreeExplainer from shap.utils import sample from sklearn.pipeline import Pipeline @@ -59,10 +59,10 @@ def shap_calc( - 51 - 100 - shows other warnings and prints - above 100 - presents all prints and all warnings (including SHAP warnings). - approximate (boolean): + approximate (boolean): if True uses shap approximations - less accurate, but very fast. It applies to tree-based explainers only. - check_additivity (boolean): + check_additivity (boolean): if False SHAP will disable the additivity check for tree-based models. **shap_kwargs: kwargs of the shap.Explainer @@ -104,9 +104,13 @@ def shap_calc( explainer = Explainer(model, masker=mask, **shap_kwargs) # For tree-explainers allow for using check_additivity and approximate arguments - if isinstance(explainer, Tree): - # Calculate Shap values + if isinstance(explainer, TreeExplainer): shap_values = explainer.shap_values(X, check_additivity=check_additivity, approximate=approximate) + + # From SHAP version 0.43+ https://github.com/shap/shap/pull/3121 required to + # get the second dimension of calculated Shap values. + if not isinstance(shap_values, list) and len(shap_values.shape) == 3: + shap_values = shap_values[:, :, 1] else: # Calculate Shap values shap_values = explainer.shap_values(X) diff --git a/pyproject.toml b/pyproject.toml index 7c4cee05..51eda8bd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "probatus" -version = "3.0.0" +version = "3.0.1" requires-python= ">=3.8" description = "Validation of binary classifiers and data used to develop them" readme = { file = "README.md", content-type = "text/markdown" } @@ -20,6 +20,7 @@ classifiers = [ "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", "Topic :: Scientific/Engineering", "Topic :: Scientific/Engineering :: Artificial Intelligence", "License :: OSI Approved :: MIT License", @@ -32,7 +33,8 @@ dependencies = [ "scipy>=1.4.0", "joblib>=0.13.2", "tqdm>=4.41.0", - "shap>=0.41.0,<0.43.0", + "shap==0.43.0 ; python_version == '3.8'", + "shap>=0.43.0 ; python_version != '3.8'", "numpy>=1.23.2", "numba>=0.57.0", ] diff --git a/tests/feature_elimination/test_feature_elimination.py b/tests/feature_elimination/test_feature_elimination.py index 16636bd4..a304feaf 100644 --- a/tests/feature_elimination/test_feature_elimination.py +++ b/tests/feature_elimination/test_feature_elimination.py @@ -5,7 +5,6 @@ from sklearn.datasets import make_classification from sklearn.ensemble import RandomForestClassifier from sklearn.linear_model import LogisticRegression -from sklearn.metrics import get_scorer from sklearn.model_selection import RandomizedSearchCV, StratifiedGroupKFold, StratifiedKFold from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler @@ -314,7 +313,7 @@ def test_get_feature_shap_values_per_fold(X, y): Test with ShapRFECV with features per fold. """ clf = DecisionTreeClassifier(max_depth=1) - shap_elimination = ShapRFECV(clf) + shap_elimination = ShapRFECV(clf, scoring="roc_auc") ( shap_values, train_score, @@ -325,7 +324,6 @@ def test_get_feature_shap_values_per_fold(X, y): clf, train_index=[2, 3, 4, 5, 6, 7], val_index=[0, 1], - scorer=get_scorer("roc_auc"), ) assert test_score == 1 assert train_score > 0.9 @@ -545,7 +543,7 @@ def test_get_feature_shap_values_per_fold_early_stopping_lightGBM(complex_data): X, y = complex_data y = preprocess_labels(y, y_name="y", index=X.index) - shap_elimination = EarlyStoppingShapRFECV(clf, early_stopping_rounds=5) + shap_elimination = EarlyStoppingShapRFECV(clf, early_stopping_rounds=5, scoring="roc_auc") ( shap_values, train_score, @@ -556,7 +554,6 @@ def test_get_feature_shap_values_per_fold_early_stopping_lightGBM(complex_data): clf, train_index=list(range(5, 50)), val_index=[0, 1, 2, 3, 4], - scorer=get_scorer("roc_auc"), ) assert test_score > 0.6 assert train_score > 0.6 @@ -573,7 +570,7 @@ def test_get_feature_shap_values_per_fold_early_stopping_CatBoost(complex_data, X["f1_categorical"] = X["f1_categorical"].astype(str).astype("category") y = preprocess_labels(y, y_name="y", index=X.index) - shap_elimination = EarlyStoppingShapRFECV(clf, early_stopping_rounds=5) + shap_elimination = EarlyStoppingShapRFECV(clf, early_stopping_rounds=5, scoring="roc_auc") ( shap_values, train_score, @@ -584,7 +581,6 @@ def test_get_feature_shap_values_per_fold_early_stopping_CatBoost(complex_data, clf, train_index=list(range(5, 50)), val_index=[0, 1, 2, 3, 4], - scorer=get_scorer("roc_auc"), ) assert test_score > 0 assert train_score > 0.6 @@ -603,7 +599,7 @@ def test_get_feature_shap_values_per_fold_early_stopping_XGBoost(complex_data): X["f1_categorical"] = X["f1_categorical"].astype(float) y = preprocess_labels(y, y_name="y", index=X.index) - shap_elimination = EarlyStoppingShapRFECV(clf, early_stopping_rounds=5) + shap_elimination = EarlyStoppingShapRFECV(clf, early_stopping_rounds=5, scoring="roc_auc") ( shap_values, train_score, @@ -614,7 +610,6 @@ def test_get_feature_shap_values_per_fold_early_stopping_XGBoost(complex_data): clf, train_index=list(range(5, 50)), val_index=[0, 1, 2, 3, 4], - scorer=get_scorer("roc_auc"), ) assert test_score > 0 assert train_score > 0.6