apply suggestions

probabl-ai · Mar 4, 2025 · eccc1e7 · eccc1e7
1 parent c25836b
commit eccc1e7
Show file tree

Hide file tree

Showing 2 changed files with 28 additions and 42 deletions.
diff --git a/skore/src/skore/sklearn/_estimator/feature_importance_accessor.py b/skore/src/skore/sklearn/_estimator/feature_importance_accessor.py
@@ -112,11 +112,13 @@ def coefficients(self) -> pd.DataFrame:
 
     @available_if(_check_has_feature_importances())
     def mean_decrease_impurity(self):
-        """Retrieve the mean decrease impurity of a forest model.
+        """Retrieve the mean decrease impurity (MDI) of a tree-based model.
 
         This method is available for estimators that expose a `feature_importances_`
         attribute. See for example the
         `sklearn.ensemble.GradientBoostingClassifier documentation <https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.GradientBoostingClassifier.html#sklearn.ensemble.GradientBoostingClassifier.feature_importances_>`_.
+        In particular, note that the MDI is computed at fit time, i.e. using the
+        training data.
 
         Examples
         --------
@@ -126,7 +128,7 @@ def mean_decrease_impurity(self):
         >>> from skore import EstimatorReport
         >>> X, y = make_classification(n_features=5, random_state=42)
         >>> X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
-        >>> forest = RandomForestClassifier(random_state=0)
+        >>> forest = RandomForestClassifier(n_estimators=5, random_state=0)
         >>> report = EstimatorReport(
         ...     forest,
         ...     X_train=X_train,

diff --git a/skore/tests/unit/sklearn/estimator/feature_importance/test_mean_decrease_impurity.py b/skore/tests/unit/sklearn/estimator/feature_importance/test_mean_decrease_impurity.py
@@ -10,39 +10,17 @@
 from skore import EstimatorReport
 
 
-def test(classification_data):
-    X, y = classification_data
-    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
-
-    forest = RandomForestClassifier(random_state=0)
-    report = EstimatorReport(
-        forest, X_train=X_train, X_test=X_test, y_train=y_train, y_test=y_test
-    )
-
-    result = report.feature_importance.mean_decrease_impurity()
-
-    assert result.shape == (5, 1)
-    assert result.index.tolist() == [
-        "Feature #0",
-        "Feature #1",
-        "Feature #2",
-        "Feature #3",
-        "Feature #4",
-    ]
-    assert result.columns.tolist() == ["Mean decrease impurity"]
-
-
 @pytest.mark.parametrize(
     "data, estimator, expected_shape",
     [
         (
             make_classification(n_features=5, random_state=42),
-            RandomForestClassifier(random_state=0),
+            RandomForestClassifier(n_estimators=2, random_state=0),
             (5, 1),
         ),
         (
             make_classification(n_features=5, random_state=42),
-            RandomForestClassifier(random_state=0),
+            RandomForestClassifier(n_estimators=2, random_state=0),
             (5, 1),
         ),
         (
@@ -53,7 +31,7 @@ def test(classification_data):
                 n_informative=3,
                 random_state=42,
             ),
-            RandomForestClassifier(random_state=0),
+            RandomForestClassifier(n_estimators=2, random_state=0),
             (5, 1),
         ),
         (
@@ -64,17 +42,21 @@ def test(classification_data):
                 n_informative=3,
                 random_state=42,
             ),
-            make_pipeline(StandardScaler(), RandomForestClassifier(random_state=0)),
+            make_pipeline(
+                StandardScaler(), RandomForestClassifier(n_estimators=2, random_state=0)
+            ),
             (5, 1),
         ),
         (
             make_classification(n_features=5, random_state=42),
-            make_pipeline(StandardScaler(), RandomForestClassifier(random_state=0)),
+            make_pipeline(
+                StandardScaler(), RandomForestClassifier(n_estimators=2, random_state=0)
+            ),
             (5, 1),
         ),
         (
             make_regression(n_features=5, n_targets=3, random_state=42),
-            RandomForestRegressor(random_state=0),
+            RandomForestRegressor(n_estimators=2, random_state=0),
             (5, 1),
         ),
     ],
@@ -101,8 +83,10 @@ def test_numpy_arrays(data, estimator, expected_shape):
 @pytest.mark.parametrize(
     "estimator",
     [
-        RandomForestClassifier(random_state=0),
-        make_pipeline(StandardScaler(), RandomForestClassifier(random_state=0)),
+        RandomForestClassifier(n_estimators=2, random_state=0),
+        make_pipeline(
+            StandardScaler(), RandomForestClassifier(n_estimators=2, random_state=0)
+        ),
     ],
 )
 def test_pandas_dataframe(estimator):
@@ -134,15 +118,15 @@ def _make_estimator_param(estimator):
     [
         _make_estimator_param(estimator)
         for estimator in [
-            sklearn.ensemble.AdaBoostClassifier(),
-            sklearn.ensemble.AdaBoostRegressor(),
-            sklearn.ensemble.ExtraTreesClassifier(),
-            sklearn.ensemble.ExtraTreesRegressor(),
-            sklearn.ensemble.GradientBoostingClassifier(),
-            sklearn.ensemble.GradientBoostingRegressor(),
-            sklearn.ensemble.RandomForestClassifier(),
-            sklearn.ensemble.RandomForestRegressor(),
-            sklearn.ensemble.RandomTreesEmbedding(),
+            sklearn.ensemble.AdaBoostClassifier(n_estimators=2),
+            sklearn.ensemble.AdaBoostRegressor(n_estimators=2),
+            sklearn.ensemble.ExtraTreesClassifier(n_estimators=2),
+            sklearn.ensemble.ExtraTreesRegressor(n_estimators=2),
+            sklearn.ensemble.GradientBoostingClassifier(n_estimators=2),
+            sklearn.ensemble.GradientBoostingRegressor(n_estimators=2),
+            sklearn.ensemble.RandomForestClassifier(n_estimators=2),
+            sklearn.ensemble.RandomForestRegressor(n_estimators=2),
+            sklearn.ensemble.RandomTreesEmbedding(n_estimators=2),
             sklearn.tree.DecisionTreeClassifier(),
             sklearn.tree.DecisionTreeRegressor(),
             sklearn.tree.ExtraTreeClassifier(),
@@ -188,7 +172,7 @@ def test_pipeline_with_transformer(regression_data):
 
     model = make_pipeline(
         PolynomialFeatures(degree=2, interaction_only=True),
-        RandomForestRegressor(random_state=0),
+        RandomForestRegressor(n_estimators=2, random_state=0),
     )
 
     report = EstimatorReport(