From 1df080d3b9e9ac75e52f4b0dba0e02cd27fa4c69 Mon Sep 17 00:00:00 2001 From: Reinier Koops Date: Thu, 28 Mar 2024 09:55:19 +0100 Subject: [PATCH] cleanup docstrings and remove np.random.state() -> replaced by seed set in explainer --- .../feature_elimination.py | 8 -- .../sample_similarity/resemblance_model.py | 4 - probatus/utils/shap_helpers.py | 4 +- tests/conftest.py | 48 ------------ .../test_feature_elimination.py | 78 ------------------- tests/interpret/test_model_interpret.py | 21 ----- tests/interpret/test_shap_dependence.py | 36 --------- .../test_resemblance_model.py | 27 ------- tests/utils/test_base_class.py | 6 -- tests/utils/test_utils_array_funcs.py | 30 ------- 10 files changed, 2 insertions(+), 260 deletions(-) diff --git a/probatus/feature_elimination/feature_elimination.py b/probatus/feature_elimination/feature_elimination.py index 2fe3046..8994262 100644 --- a/probatus/feature_elimination/feature_elimination.py +++ b/probatus/feature_elimination/feature_elimination.py @@ -332,10 +332,6 @@ def fit( Returns: (ShapRFECV): Fitted object. """ - # Set seed for results reproducibility - if self.random_state is not None: - np.random.seed(self.random_state) - # Initialise len_columns_to_keep based on columns_to_keep content validation len_columns_to_keep = 0 if columns_to_keep: @@ -398,10 +394,6 @@ def fit( # Current dataset current_X = self.X[remaining_removeable_features] - # Set seed for results reproducibility - if self.random_state is not None: - np.random.seed(self.random_state) - # Optimize parameters if self.search_model: current_search_model = clone(self.model).fit(current_X, self.y) diff --git a/probatus/sample_similarity/resemblance_model.py b/probatus/sample_similarity/resemblance_model.py index 3e57475..8244094 100644 --- a/probatus/sample_similarity/resemblance_model.py +++ b/probatus/sample_similarity/resemblance_model.py @@ -108,10 +108,6 @@ def fit(self, X1, X2, column_names=None, class_names=None): (BaseResemblanceModel): Fitted object """ - # Set seed for results reproducibility - if self.random_state is not None: - np.random.seed(self.random_state) - # Set class names self.class_names = class_names if self.class_names is None: diff --git a/probatus/utils/shap_helpers.py b/probatus/utils/shap_helpers.py index 5e64d6b..3d4148e 100644 --- a/probatus/utils/shap_helpers.py +++ b/probatus/utils/shap_helpers.py @@ -73,7 +73,7 @@ def shap_calc( # https://github.com/slundberg/shap/issues/480 if shap_kwargs.get("feature_perturbation") == "tree_path_dependent" or X.select_dtypes("category").shape[1] > 0: # Calculate Shap values. - explainer = Explainer(model, **shap_kwargs) + explainer = Explainer(model, seed=random_state, **shap_kwargs) else: # Create the background data,required for non tree based models. # A single datapoint can passed as mask @@ -83,7 +83,7 @@ def shap_calc( else: pass mask = sample(X, sample_size, random_state=random_state) - explainer = Explainer(model, masker=mask, **shap_kwargs) + explainer = Explainer(model, seed=random_state, masker=mask, **shap_kwargs) # For tree-explainers allow for using check_additivity and approximate arguments if isinstance(explainer, TreeExplainer): diff --git a/tests/conftest.py b/tests/conftest.py index ab3501f..c9784a2 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -14,9 +14,6 @@ @pytest.fixture(scope="function") def random_state(): - """ - Fixture to automatically provide a random state. - """ RANDOM_STATE = 0 return RANDOM_STATE @@ -24,9 +21,6 @@ def random_state(): @pytest.fixture(scope="function") def random_state_42(): - """ - Fixture to automatically provide a random state. - """ RANDOM_STATE = 42 return RANDOM_STATE @@ -34,9 +28,6 @@ def random_state_42(): @pytest.fixture(scope="function") def random_state_1234(): - """ - Fixture to automatically provide a random state. - """ RANDOM_STATE = 1234 return RANDOM_STATE @@ -44,9 +35,6 @@ def random_state_1234(): @pytest.fixture(scope="function") def random_state_1(): - """ - Fixture to automatically provide a random state. - """ RANDOM_STATE = 1 return RANDOM_STATE @@ -54,18 +42,11 @@ def random_state_1(): @pytest.fixture(scope="function") def mock_model(): - """ - Fixture. - """ return Mock() @pytest.fixture(scope="function") def complex_data(random_state): - """ - Fixture. - """ - feature_names = ["f1_categorical", "f2_missing", "f3_static", "f4", "f5"] # Prepare two samples @@ -93,9 +74,6 @@ def complex_data_with_categorical(complex_data): @pytest.fixture(scope="function") def complex_data_split(complex_data, random_state_42): - """ - Fixture. - """ X, y = complex_data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=random_state_42) return X_train, X_test, y_train, y_test @@ -112,16 +90,12 @@ def complex_data_split_with_categorical(complex_data_split): @pytest.fixture(scope="function") def complex_lightgbm(random_state_42): - """This fixture allows to reuse the import of the LGBMClassifier class across different tests.""" model = LGBMClassifier(max_depth=5, num_leaves=11, class_weight="balanced", random_state=random_state_42) return model @pytest.fixture(scope="function") def complex_fitted_lightgbm(complex_data_split_with_categorical, complex_lightgbm): - """ - Fixture. - """ X_train, _, y_train, _ = complex_data_split_with_categorical return complex_lightgbm.fit(X_train, y_train) @@ -129,21 +103,18 @@ def complex_fitted_lightgbm(complex_data_split_with_categorical, complex_lightgb @pytest.fixture(scope="function") def catboost_classifier(random_state): - """This fixture allows to reuse the import of the CatboostClassifier class across different tests.""" model = CatBoostClassifier(random_seed=random_state) return model @pytest.fixture(scope="function") def decision_tree_classifier(random_state): - """This fixture allows to reuse the import of the DecisionTreeClassifier class across different tests.""" model = DecisionTreeClassifier(max_depth=1, random_state=random_state) return model @pytest.fixture(scope="function") def randomized_search_decision_tree_classifier(decision_tree_classifier, random_state): - """This fixture allows to reuse the import of the DecisionTreeClassifier in combination with a new CV class across different tests.""" param_grid = {"criterion": ["gini"], "min_samples_split": [1, 2]} cv = RandomizedSearchCV(decision_tree_classifier, param_grid, cv=2, n_iter=2, random_state=random_state) return cv @@ -151,54 +122,35 @@ def randomized_search_decision_tree_classifier(decision_tree_classifier, random_ @pytest.fixture(scope="function") def logistic_regression(random_state): - """This fixture allows to reuse the import of the DecisionTreeClassifier class across different tests.""" model = LogisticRegression(random_state=random_state) return model @pytest.fixture(scope="function") def X_train(): - """ - Fixture. - """ return pd.DataFrame({"col_1": [1, 1, 1, 1], "col_2": [0, 0, 0, 0], "col_3": [1, 0, 1, 0]}, index=[1, 2, 3, 4]) @pytest.fixture(scope="function") def y_train(): - """ - Fixture. - """ return pd.Series([1, 0, 1, 0], index=[1, 2, 3, 4]) @pytest.fixture(scope="function") def X_test(): - """ - Fixture. - """ return pd.DataFrame({"col_1": [1, 1, 1, 1], "col_2": [0, 0, 0, 0], "col_3": [1, 0, 1, 0]}, index=[5, 6, 7, 8]) @pytest.fixture(scope="function") def y_test(): - """ - Fixture. - """ return pd.Series([0, 0, 1, 0], index=[5, 6, 7, 8]) @pytest.fixture(scope="function") def fitted_logistic_regression(X_train, y_train, logistic_regression): - """ - Fixture. - """ return logistic_regression.fit(X_train, y_train) @pytest.fixture(scope="function") def fitted_tree(X_train, y_train, decision_tree_classifier): - """ - Fixture. - """ return decision_tree_classifier.fit(X_train, y_train) diff --git a/tests/feature_elimination/test_feature_elimination.py b/tests/feature_elimination/test_feature_elimination.py index fe8f3d8..d6e6549 100644 --- a/tests/feature_elimination/test_feature_elimination.py +++ b/tests/feature_elimination/test_feature_elimination.py @@ -16,9 +16,6 @@ @pytest.fixture(scope="function") def X(): - """ - Fixture for X. - """ return pd.DataFrame( { "col_1": [1, 1, 1, 1, 1, 1, 1, 0], @@ -31,46 +28,32 @@ def X(): @pytest.fixture(scope="function") def y(): - """ - Fixture for y. - """ return pd.Series([1, 0, 1, 0, 1, 0, 1, 0], index=[1, 2, 3, 4, 5, 6, 7, 8]) @pytest.fixture(scope="function") def sample_weight(): - """ - Fixture for sample_weight. - """ return pd.Series([1, 1, 1, 1, 1, 1, 1, 1], index=[1, 2, 3, 4, 5, 6, 7, 8]) @pytest.fixture(scope="function") def groups(): - """ - Fixture for groups. - """ return pd.Series(["grp1", "grp1", "grp1", "grp1", "grp2", "grp2", "grp2", "grp2"], index=[1, 2, 3, 4, 5, 6, 7, 8]) @pytest.fixture(scope="function") def XGBoost_classifier(random_state): - """This fixture allows to reuse the import of the XGBClassifier class across different tests.""" model = XGBClassifier(n_estimators=200, max_depth=3, random_state=random_state) return model @pytest.fixture(scope="function") def XGBoost_regressor(random_state): - """This fixture allows to reuse the import of the XGBRegressor class across different tests.""" model = XGBRegressor(n_estimators=200, max_depth=3, random_state=random_state) return model def test_shap_rfe_regressor(XGBoost_regressor, random_state): - """ - Test with a Regressor. - """ diabetes = load_diabetes() X = pd.DataFrame(diabetes.data, columns=diabetes.feature_names) y = diabetes.target @@ -85,9 +68,6 @@ def test_shap_rfe_regressor(XGBoost_regressor, random_state): def test_shap_rfe_randomized_search(X, y, randomized_search_decision_tree_classifier, random_state): - """ - Test with RandomizedSearchCV. - """ search = randomized_search_decision_tree_classifier shap_elimination = ShapRFECV(search, step=0.8, cv=2, scoring="roc_auc", n_jobs=4, random_state=random_state) report = shap_elimination.fit_compute(X, y) @@ -113,9 +93,6 @@ def test_shap_rfe_multi_class(X, y, decision_tree_classifier, random_state): def test_shap_rfe(X, y, sample_weight, decision_tree_classifier, random_state): - """ - Test with ShapRFECV. - """ shap_elimination = ShapRFECV( decision_tree_classifier, random_state=random_state, @@ -131,9 +108,6 @@ def test_shap_rfe(X, y, sample_weight, decision_tree_classifier, random_state): def test_shap_rfe_group_cv(X, y, groups, sample_weight, decision_tree_classifier, random_state): - """ - Test ShapRFECV with StratifiedGroupKFold. - """ cv = StratifiedGroupKFold(n_splits=2, shuffle=True, random_state=random_state) shap_elimination = ShapRFECV( decision_tree_classifier, @@ -152,9 +126,6 @@ def test_shap_rfe_group_cv(X, y, groups, sample_weight, decision_tree_classifier def test_shap_pipeline_error(X, y, decision_tree_classifier, random_state): - """ - Test with ShapRFECV for pipelines. - """ model = Pipeline( [ ("scaler", StandardScaler()), @@ -174,9 +145,6 @@ def test_shap_pipeline_error(X, y, decision_tree_classifier, random_state): def test_shap_rfe_linear_model(X, y, random_state): - """ - Test ShapRFECV with linear model. - """ model = LogisticRegression(C=1, random_state=random_state) shap_elimination = ShapRFECV(model, random_state=random_state, step=1, cv=2, scoring="roc_auc", n_jobs=4) report = shap_elimination.fit_compute(X, y) @@ -186,9 +154,6 @@ def test_shap_rfe_linear_model(X, y, random_state): def test_shap_rfe_svm(X, y, random_state): - """ - Test with ShapRFECV with SVM. - """ model = SVC(C=1, kernel="linear", probability=True, random_state=random_state) shap_elimination = ShapRFECV(model, random_state=random_state, step=1, cv=2, scoring="roc_auc", n_jobs=4) shap_elimination = shap_elimination.fit(X, y) @@ -199,9 +164,6 @@ def test_shap_rfe_svm(X, y, random_state): def test_shap_rfe_cols_to_keep(X, y, decision_tree_classifier, random_state): - """ - Test for shap_rfe_cv with features to keep parameter. - """ shap_elimination = ShapRFECV( decision_tree_classifier, random_state=random_state, @@ -219,9 +181,6 @@ def test_shap_rfe_cols_to_keep(X, y, decision_tree_classifier, random_state): def test_shap_rfe_randomized_search_cols_to_keep(X, y, randomized_search_decision_tree_classifier, random_state): - """ - Test with ShapRFECV with column to keep param. - """ search = randomized_search_decision_tree_classifier shap_elimination = ShapRFECV(search, step=0.8, cv=2, scoring="roc_auc", n_jobs=4, random_state=random_state) report = shap_elimination.fit_compute(X, y, columns_to_keep=["col_2", "col_3"]) @@ -232,9 +191,6 @@ def test_shap_rfe_randomized_search_cols_to_keep(X, y, randomized_search_decisio def test_calculate_number_of_features_to_remove(): - """ - Test with ShapRFECV with n features to remove. - """ assert 3 == ShapRFECV._calculate_number_of_features_to_remove( current_num_of_features=10, num_features_to_remove=3, min_num_features_to_keep=5 ) @@ -250,9 +206,6 @@ def test_calculate_number_of_features_to_remove(): def test_shap_automatic_num_feature_selection(decision_tree_classifier, random_state): - """ - Test automatic num feature selection methods - """ X = pd.DataFrame( { "col_1": [1, 0, 1, 0, 1, 0, 1, 0], @@ -284,9 +237,6 @@ def test_shap_automatic_num_feature_selection(decision_tree_classifier, random_s def test_get_feature_shap_values_per_fold(X, y, decision_tree_classifier, random_state): - """ - Test with ShapRFECV with features per fold. - """ shap_elimination = ShapRFECV(decision_tree_classifier, scoring="roc_auc", random_state=random_state) ( shap_values, @@ -369,9 +319,6 @@ def test_shap_rfe_same_features_are_kept_after_each_run(random_state_1234): def test_shap_rfe_penalty_factor(X, y, decision_tree_classifier, random_state): - """ - Test ShapRFECV with shap_variance_penalty_factor - """ shap_elimination = ShapRFECV( decision_tree_classifier, random_state=random_state, @@ -389,9 +336,6 @@ def test_shap_rfe_penalty_factor(X, y, decision_tree_classifier, random_state): def test_complex_dataset(complex_data, complex_lightgbm, random_state_1): - """ - Test on complex dataset. - """ X, y = complex_data param_grid = { @@ -410,9 +354,6 @@ def test_complex_dataset(complex_data, complex_lightgbm, random_state_1): def test_shap_rfe_early_stopping_lightGBM(complex_data, random_state): - """ - Test EarlyStoppingShapRFECV with a LGBMClassifier. - """ model = LGBMClassifier(n_estimators=200, max_depth=3, random_state=random_state) X, y = complex_data @@ -433,9 +374,6 @@ def test_shap_rfe_early_stopping_lightGBM(complex_data, random_state): def test_shap_rfe_early_stopping_XGBoost(XGBoost_classifier, complex_data, random_state): - """ - Test EarlyStoppingShapRFECV with a LGBMClassifier. - """ X, y = complex_data X["f1_categorical"] = X["f1_categorical"].astype(float) @@ -457,9 +395,6 @@ def test_shap_rfe_early_stopping_XGBoost(XGBoost_classifier, complex_data, rando # def test_shap_rfe_early_stopping_CatBoost(complex_data_with_categorical, catboost_classifier, random_state): - """ - Test EarlyStoppingShapRFECV with a CatBoostClassifier. - """ X, y = complex_data_with_categorical shap_elimination = EarlyStoppingShapRFECV( @@ -479,9 +414,6 @@ def test_shap_rfe_early_stopping_CatBoost(complex_data_with_categorical, catboos def test_shap_rfe_randomized_search_early_stopping_lightGBM(complex_data, random_state): - """ - Test EarlyStoppingShapRFECV with RandomizedSearchCV and a LGBMClassifier on complex dataset. - """ model = LGBMClassifier(n_estimators=200, random_state=random_state) X, y = complex_data @@ -509,9 +441,6 @@ def test_shap_rfe_randomized_search_early_stopping_lightGBM(complex_data, random def test_get_feature_shap_values_per_fold_early_stopping_lightGBM(complex_data, random_state): - """ - Test with ShapRFECV with features per fold. - """ model = LGBMClassifier(n_estimators=200, max_depth=3, random_state=random_state) X, y = complex_data y = preprocess_labels(y, y_name="y", index=X.index) @@ -538,9 +467,6 @@ def test_get_feature_shap_values_per_fold_early_stopping_lightGBM(complex_data, def test_get_feature_shap_values_per_fold_early_stopping_CatBoost( complex_data_with_categorical, catboost_classifier, random_state ): - """ - Test with ShapRFECV with features per fold. - """ X, y = complex_data_with_categorical y = preprocess_labels(y, y_name="y", index=X.index) @@ -564,9 +490,6 @@ def test_get_feature_shap_values_per_fold_early_stopping_CatBoost( def test_get_feature_shap_values_per_fold_early_stopping_XGBoost(XGBoost_classifier, complex_data, random_state): - """ - Test with ShapRFECV with features per fold. - """ X, y = complex_data y = preprocess_labels(y, y_name="y", index=X.index) @@ -590,7 +513,6 @@ def test_get_feature_shap_values_per_fold_early_stopping_XGBoost(XGBoost_classif def test_EarlyStoppingShapRFECV_no_categorical(complex_data, random_state): - """Test EarlyStoppingShapRFECV when no categorical features are present.""" model = LGBMClassifier(n_estimators=50, max_depth=3, num_leaves=3, random_state=random_state) shap_elimination = EarlyStoppingShapRFECV( diff --git a/tests/interpret/test_model_interpret.py b/tests/interpret/test_model_interpret.py index 7cb352f..55a0e37 100644 --- a/tests/interpret/test_model_interpret.py +++ b/tests/interpret/test_model_interpret.py @@ -7,9 +7,6 @@ @pytest.fixture(scope="function") def expected_feature_importance(): - """ - Fixture. - """ return pd.DataFrame( { "mean_abs_shap_value_test": [0.5, 0.0, 0.0], @@ -23,9 +20,6 @@ def expected_feature_importance(): @pytest.fixture(scope="function") def expected_feature_importance_lin_models(): - """ - Test. - """ return pd.DataFrame( { "mean_abs_shap_value_test": [0.4, 0.0, 0.0], @@ -38,9 +32,6 @@ def expected_feature_importance_lin_models(): def test_shap_interpret(fitted_tree, X_train, y_train, X_test, y_test, expected_feature_importance, random_state): - """ - Test. - """ class_names = ["neg", "pos"] shap_interpret = ShapModelInterpreter(fitted_tree, random_state=random_state) @@ -82,9 +73,6 @@ def test_shap_interpret(fitted_tree, X_train, y_train, X_test, y_test, expected_ def test_shap_interpret_lin_models( fitted_logistic_regression, X_train, y_train, X_test, y_test, expected_feature_importance_lin_models, random_state ): - """ - Test. - """ class_names = ["neg", "pos"] shap_interpret = ShapModelInterpreter(fitted_logistic_regression, random_state=random_state) @@ -127,9 +115,6 @@ def test_shap_interpret_lin_models( def test_shap_interpret_fit_compute_lin_models( fitted_logistic_regression, X_train, y_train, X_test, y_test, expected_feature_importance_lin_models, random_state ): - """ - Test. - """ class_names = ["neg", "pos"] shap_interpret = ShapModelInterpreter(fitted_logistic_regression, random_state=random_state) @@ -151,9 +136,6 @@ def test_shap_interpret_fit_compute_lin_models( def test_shap_interpret_fit_compute( fitted_tree, X_train, y_train, X_test, y_test, expected_feature_importance, random_state ): - """ - Test. - """ class_names = ["neg", "pos"] shap_interpret = ShapModelInterpreter(fitted_tree, random_state=random_state) @@ -171,9 +153,6 @@ def test_shap_interpret_fit_compute( def test_shap_interpret_complex_data(complex_data_split_with_categorical, complex_fitted_lightgbm, random_state): - """ - Test lightgbm. - """ class_names = ["neg", "pos"] X_train, X_test, y_train, y_test = complex_data_split_with_categorical diff --git a/tests/interpret/test_shap_dependence.py b/tests/interpret/test_shap_dependence.py index f147e2c..181f77a 100644 --- a/tests/interpret/test_shap_dependence.py +++ b/tests/interpret/test_shap_dependence.py @@ -15,9 +15,6 @@ @pytest.fixture(scope="function") def X_y(): - """ - Fixture. - """ return ( pd.DataFrame( [ @@ -44,9 +41,6 @@ def X_y(): @pytest.fixture(scope="function") def expected_shap_vals(): - """ - Fixture. - """ return pd.DataFrame( [ [0.176667, 0.005833, 0.284167], @@ -70,9 +64,6 @@ def expected_shap_vals(): @pytest.fixture(scope="function") def model(X_y, random_state): - """ - Fixture. - """ X, y = X_y model = RandomForestClassifier(random_state=random_state, n_estimators=10, max_depth=5) @@ -83,9 +74,6 @@ def model(X_y, random_state): @pytest.fixture(scope="function") def expected_feat_importances(): - """ - Test. - """ return pd.DataFrame( { "Feature Name": {0: 2, 1: 1, 2: 0}, @@ -96,17 +84,11 @@ def expected_feat_importances(): def test_not_fitted(model, random_state): - """ - Test. - """ plotter = DependencePlotter(model, random_state) assert plotter.fitted is False def test_fit_complex(complex_data_split, complex_fitted_lightgbm, random_state): - """ - Test. - """ _, X_test, _, y_test = complex_data_split plotter = DependencePlotter(complex_fitted_lightgbm, random_state=random_state) @@ -122,9 +104,6 @@ def test_fit_complex(complex_data_split, complex_fitted_lightgbm, random_state): def test_get_X_y_shap_with_q_cut_normal(X_y, model, random_state): - """ - Test. - """ X, y = X_y plotter = DependencePlotter(model, random_state).fit(X, y) @@ -156,44 +135,29 @@ def test_get_X_y_shap_with_q_cut_normal(X_y, model, random_state): def test_get_X_y_shap_with_q_cut_unfitted(model, random_state): - """ - Test. - """ plotter = DependencePlotter(model, random_state) with pytest.raises(NotFittedError): plotter._get_X_y_shap_with_q_cut(0) def test_get_X_y_shap_with_q_cut_input(X_y, model, random_state): - """ - Test. - """ plotter = DependencePlotter(model, random_state).fit(X_y[0], X_y[1]) with pytest.raises(ValueError): plotter._get_X_y_shap_with_q_cut("not a feature") def test_plot_normal(X_y, model, random_state): - """ - Test. - """ plotter = DependencePlotter(model, random_state).fit(X_y[0], X_y[1]) _ = plotter.plot(feature=0) def test_plot_class_names(X_y, model, random_state): - """ - Test. - """ plotter = DependencePlotter(model, random_state).fit(X_y[0], X_y[1], class_names=["a", "b"]) _ = plotter.plot(feature=0) assert plotter.class_names == ["a", "b"] def test_plot_input(X_y, model, random_state): - """ - Test. - """ plotter = DependencePlotter(model, random_state).fit(X_y[0], X_y[1]) with pytest.raises(ValueError): plotter.plot(feature="not a feature") diff --git a/tests/sample_similarity/test_resemblance_model.py b/tests/sample_similarity/test_resemblance_model.py index ad75111..48a37d9 100644 --- a/tests/sample_similarity/test_resemblance_model.py +++ b/tests/sample_similarity/test_resemblance_model.py @@ -14,24 +14,15 @@ @pytest.fixture(scope="function") def X1(): - """ - Fixture. - """ return pd.DataFrame({"col_1": [1, 1, 1, 1], "col_2": [0, 0, 0, 0], "col_3": [0, 0, 0, 0]}, index=[1, 2, 3, 4]) @pytest.fixture(scope="function") def X2(): - """ - Fixture. - """ return pd.DataFrame({"col_1": [0, 0, 0, 0], "col_2": [0, 0, 0, 0], "col_3": [0, 0, 0, 0]}, index=[1, 2, 3, 4]) def test_base_class(X1, X2, decision_tree_classifier, random_state): - """ - Test. - """ rm = BaseResemblanceModel(decision_tree_classifier, test_prc=0.5, n_jobs=1, random_state=random_state) actual_report, train_score, test_score = rm.fit_compute(X1, X2, return_scores=True) @@ -61,9 +52,6 @@ def test_base_class(X1, X2, decision_tree_classifier, random_state): def test_base_class_lin_models(X1, X2, logistic_regression, random_state): - """ - Test. - """ # Test class BaseResemblanceModel for linear models. rm = BaseResemblanceModel(logistic_regression, test_prc=0.5, n_jobs=1, random_state=random_state) @@ -94,9 +82,6 @@ def test_base_class_lin_models(X1, X2, logistic_regression, random_state): def test_shap_resemblance_class(X1, X2, decision_tree_classifier, random_state): - """ - Test. - """ rm = SHAPImportanceResemblance(decision_tree_classifier, test_prc=0.5, n_jobs=1, random_state=random_state) actual_report, train_score, test_score = rm.fit_compute(X1, X2, return_scores=True) @@ -125,9 +110,6 @@ def test_shap_resemblance_class(X1, X2, decision_tree_classifier, random_state): def test_shap_resemblance_class_lin_models(X1, X2, logistic_regression, random_state): - """ - Test. - """ # Test SHAP Resemblance Model for linear models. rm = SHAPImportanceResemblance(logistic_regression, test_prc=0.5, n_jobs=1, random_state=random_state) @@ -159,9 +141,6 @@ def test_shap_resemblance_class_lin_models(X1, X2, logistic_regression, random_s def test_shap_resemblance_class2(complex_data_with_categorical, complex_lightgbm, random_state): - """ - Test. - """ X1, _ = complex_data_with_categorical X2 = X1.copy() X2["f4"] = X2["f4"] + 100 @@ -198,9 +177,6 @@ def test_shap_resemblance_class2(complex_data_with_categorical, complex_lightgbm def test_permutation_resemblance_class(X1, X2, decision_tree_classifier, random_state): - """ - Test. - """ rm = PermutationImportanceResemblance( decision_tree_classifier, test_prc=0.5, n_jobs=1, random_state=random_state, iterations=20 ) @@ -230,9 +206,6 @@ def test_permutation_resemblance_class(X1, X2, decision_tree_classifier, random_ def test_base_class_same_data(X1, decision_tree_classifier, random_state): - """ - Test. - """ rm = BaseResemblanceModel(decision_tree_classifier, test_prc=0.5, n_jobs=1, random_state=random_state) actual_report, train_score, test_score = rm.fit_compute(X1, X1, return_scores=True) diff --git a/tests/utils/test_base_class.py b/tests/utils/test_base_class.py index 892dc5a..6539385 100644 --- a/tests/utils/test_base_class.py +++ b/tests/utils/test_base_class.py @@ -4,9 +4,6 @@ def test_fitted_exception(fitted_tree, X_train, y_train, X_test, y_test, random_state): - """ - Test if fitted works.. - """ class_names = ["neg", "pos"] shap_interpret = ShapModelInterpreter(fitted_tree, random_state=random_state) @@ -24,9 +21,6 @@ def test_fitted_exception(fitted_tree, X_train, y_train, X_test, y_test, random_ @pytest.mark.xfail def test_fitted_exception_is_raised(fitted_tree, random_state): - """ - Test if fitted works fails when not fitted. - """ shap_interpret = ShapModelInterpreter(fitted_tree, random_state=random_state) shap_interpret._check_if_fitted diff --git a/tests/utils/test_utils_array_funcs.py b/tests/utils/test_utils_array_funcs.py index b9ca28d..769fbe6 100644 --- a/tests/utils/test_utils_array_funcs.py +++ b/tests/utils/test_utils_array_funcs.py @@ -7,51 +7,33 @@ @pytest.fixture(scope="function") def expected_df_2d(): - """ - Fixture. - """ return pd.DataFrame({0: [1, 2], 1: [2, 3], 2: [3, 4]}) @pytest.fixture(scope="function") def expected_df(): - """ - Fixture. - """ return pd.DataFrame({0: [1, 2, 3]}) def test_assure_pandas_df_list(expected_df): - """ - Test. - """ x = [1, 2, 3] x_df = assure_pandas_df(x) pd.testing.assert_frame_equal(x_df, expected_df) def test_assure_pandas_df_list_of_lists(expected_df_2d): - """ - Test. - """ x = [[1, 2, 3], [2, 3, 4]] x_df = assure_pandas_df(x) pd.testing.assert_frame_equal(x_df, expected_df_2d) def test_assure_pandas_df_series(expected_df): - """ - Test. - """ x = pd.Series([1, 2, 3]) x_df = assure_pandas_df(x) pd.testing.assert_frame_equal(x_df, expected_df) def test_assure_pandas_df_array(expected_df, expected_df_2d): - """ - Test. - """ x = np.array([[1, 2, 3], [2, 3, 4]], dtype="int64") x_df = assure_pandas_df(x) pd.testing.assert_frame_equal(x_df, expected_df_2d) @@ -62,18 +44,12 @@ def test_assure_pandas_df_array(expected_df, expected_df_2d): def test_assure_pandas_df_df(expected_df_2d): - """ - Test. - """ x = pd.DataFrame([[1, 2, 3], [2, 3, 4]]) x_df = assure_pandas_df(x) pd.testing.assert_frame_equal(x_df, expected_df_2d) def test_assure_pandas_df_types(): - """ - Test. - """ with pytest.raises(TypeError): assure_pandas_df("Test") with pytest.raises(TypeError): @@ -81,9 +57,6 @@ def test_assure_pandas_df_types(): def test_preprocess_labels(): - """ - Test. - """ y1 = pd.Series([1, 0, 1, 0, 1]) index_1 = np.array([5, 4, 3, 2, 1]) @@ -105,9 +78,6 @@ def test_preprocess_labels(): def test_preprocess_data(): - """ - Test. - """ X1 = pd.DataFrame({"cat": ["a", "b", "c"], "missing": [1, np.nan, 2], "num_1": [1, 2, 3]}) target_column_names_X1 = ["1", "2", "3"]