feat: Add ComparisonReport to compare instances of EstimatorReport (

probabl-ai#1286) - [x] Rename to `ComparisonReport` - [x] Rebase on top of probabl-ai#1239 and adapt - [x] Raise if `report.metrics.accuracy(data_source="train")` is called with at least one EstimatorReport that does not have training data - [x] Test - [x] Docstrings - [x] MetricsAccessor - [x] Move index column "#0" in front of each metric - [x] Pass report names in comparator - [ ] ~Update plots legend~ see probabl-ai#1309 - The actual `RocCurveDisplay` needs a full refactor to be splitted by use-case: estimator report, cross-validation report and finally comparison report. In each of these use-cases, there is two scenarios with binary classification and multi-class classification. Otherwise, it will be unmaintainable. - [ ] ~Investigate missing metrics in `report_metrics`~ **(deferred to future PR)** - The logic is split between `report_metrics` and `available_if`; it should be merged (ideally everything in `available_if`?) - [ ] ~Refactor to make `CrossValidationReport` depend on it~ **(deferred to future PR)** - [x] ~Change EstimatorReport `repr`?~ Issue probabl-ai#1293 Closes probabl-ai#1245 Co-authored-by: Auguste <[email protected]> Co-authored-by: Sylvain Combettes <[email protected]>
glemaitre · Feb 20, 2025 · 30ed335 · 30ed335
1 parent 8711460
commit 30ed335
Show file tree

Hide file tree

Showing 20 changed files with 1,986 additions and 41 deletions.
diff --git a/README.md b/README.md
@@ -33,7 +33,8 @@ skore is a Python open-source library designed to help data scientists apply rec
   - `train_test_split` supercharged with methodological guidance: the API is the same as scikit-learn's, but skore displays warnings when applicable. For example, it warns you against shuffling time series data or when you have class imbalance.
 - **Evaluate**: automated insightful reports.
   - `EstimatorReport`: feed your scikit-learn compatible estimator and dataset, and it generates recommended metrics and plots to help you analyze your estimator. All these are computed and generated for you in 1 line of code. Under the hood, we use efficient caching to make the computations blazing fast.
-  - `CrossValidationReport`: Get a skore estimator report for each fold of your cross-validation.
+  - `CrossValidationReport`: get a skore estimator report for each fold of your cross-validation.
+  - `ComparisonReport`: benchmark your skore estimator reports.
 
 ## What's next?
 
@@ -91,7 +92,7 @@ You can find information on the latest version [here](https://anaconda.org/conda
     ```python
     # Display the ROC curve that was generated for you:
     roc_plot = cv_report.metrics.roc()
-    roc_plot
+    roc_plot.plot()
     ```
 
 1. Store your results for safe-keeping.
@@ -109,7 +110,8 @@ You can find information on the latest version [here](https://anaconda.org/conda
 
     ```python
     # Get your results
-    df_get = my_project.put("df_cv_report_metrics")
+    df_get = my_project.get("df_cv_report_metrics")
+    df_get
     ```
 
 Learn more in our [documentation](https://skore.probabl.ai).

diff --git a/examples/getting_started/plot_skore_getting_started.py b/examples/getting_started/plot_skore_getting_started.py
@@ -17,6 +17,8 @@
 #       *   :class:`skore.CrossValidationReport`: get an insightful report on your
 #           cross-validation results
 #
+#       *   :class:`skore.ComparisonReport`: benchmark your skore estimator reports
+#
 #       *   :func:`skore.train_test_split`: get diagnostics when splitting your data
 #
 # #.    Track your ML/DS results using skore's :class:`~skore.Project`
@@ -50,33 +52,33 @@
 X, y = make_classification(n_classes=2, n_samples=100_000, n_informative=4)
 X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
 
-clf = LogisticRegression(random_state=0)
+log_reg = LogisticRegression(random_state=0)
 
-est_report = EstimatorReport(
-    clf, X_train=X_train, X_test=X_test, y_train=y_train, y_test=y_test
+log_reg_report = EstimatorReport(
+    log_reg, X_train=X_train, X_test=X_test, y_train=y_train, y_test=y_test
 )
 
 # %%
 # Now, we can display the help tree to see all the insights that are available to us
 # (skore detected that we are doing binary classification):
 
 # %%
-est_report.help()
+log_reg_report.help()
 
 # %%
 # We can get the report metrics that was computed for us:
 
 # %%
-df_est_report_metrics = est_report.metrics.report_metrics()
-df_est_report_metrics
+df_log_reg_report_metrics = log_reg_report.metrics.report_metrics()
+df_log_reg_report_metrics
 
 # %%
 # We can also plot the ROC curve that was generated for us:
 
 # %%
 import matplotlib.pyplot as plt
 
-roc_plot = est_report.metrics.roc()
+roc_plot = log_reg_report.metrics.roc()
 roc_plot.plot()
 plt.tight_layout()
 
@@ -97,7 +99,7 @@
 # %%
 from skore import CrossValidationReport
 
-cv_report = CrossValidationReport(clf, X, y, cv_splitter=5)
+cv_report = CrossValidationReport(log_reg, X, y, cv_splitter=5)
 
 # %%
 # We display the cross-validation report helper:
@@ -125,16 +127,60 @@
 # for example the first fold:
 
 # %%
-est_report_fold = cv_report.estimator_reports_[0]
-df_report_metrics_fold = est_report_fold.metrics.report_metrics()
-df_report_metrics_fold
+log_reg_report_fold = cv_report.estimator_reports_[0]
+df_log_reg_report_fold_metrics = log_reg_report_fold.metrics.report_metrics()
+df_log_reg_report_fold_metrics
 
 # %%
 # .. seealso::
 #
 #   For more information about the motivation and usage of
 #   :class:`skore.CrossValidationReport`, see :ref:`example_use_case_employee_salaries`.
 
+# %%
+# Comparing estimators reports
+# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+#
+# :class:`skore.ComparisonReport` enables users to compare several estimator reports
+# (corresponding to several estimators) on a same test set, as in a benchmark of
+# estimators.
+#
+# Apart from the previous ``log_reg_report``, let use define another estimator report:
+
+# %%
+from sklearn.ensemble import RandomForestClassifier
+
+rf = RandomForestClassifier(max_depth=2, random_state=0)
+rf_report = EstimatorReport(
+    rf, X_train=X_train, X_test=X_test, y_train=y_train, y_test=y_test
+)
+
+# %%
+# Now, let us compare these two estimator reports, that were applied to the exact
+# same test set:
+
+# %%
+from skore import ComparisonReport
+
+comparator = ComparisonReport(reports=[log_reg_report, rf_report])
+
+# %%
+# As for the :class:`~skore.EstimatorReport` and the
+# :class:`~skore.CrossValidationReport`, we have a helper:
+
+# %%
+comparator.help()
+
+# %%
+# Let us display the result of our benchmark:
+
+# %%
+benchmark_metrics = comparator.metrics.report_metrics()
+benchmark_metrics
+
+# %%
+# We have the result of our benchmark.
+
 # %%
 # Train-test split with skore
 # ^^^^^^^^^^^^^^^^^^^^^^^^^^^

diff --git a/examples/use_cases/plot_employee_salaries.py b/examples/use_cases/plot_employee_salaries.py
@@ -298,6 +298,11 @@ def periodic_spline_transformer(period, n_splines=None, degree=3):
 )
 results
 
+# %%
+# .. note::
+#   We could have also used the :class:`skore.ComparisonReport` to compare estimator
+#   reports.
+
 # %%
 #
 # Finally, we can even get the individual :class:`~skore.EstimatorReport` for each fold

diff --git a/skore/src/skore/__init__.py b/skore/src/skore/__init__.py
@@ -8,6 +8,7 @@
 from skore._config import config_context, get_config, set_config
 from skore.project import Project, open
 from skore.sklearn import (
+    ComparisonReport,
     CrossValidationReport,
     EstimatorReport,
     PrecisionRecallCurveDisplay,
@@ -20,6 +21,7 @@
 
 __all__ = [
     "CrossValidationReport",
+    "ComparisonReport",
     "EstimatorReport",
     "PrecisionRecallCurveDisplay",
     "PredictionErrorDisplay",

diff --git a/skore/src/skore/sklearn/__init__.py b/skore/src/skore/sklearn/__init__.py
@@ -1,5 +1,6 @@
 """Enhance `sklearn` functions."""
 
+from skore.sklearn._comparison import ComparisonReport
 from skore.sklearn._cross_validation import CrossValidationReport
 from skore.sklearn._estimator import EstimatorReport
 from skore.sklearn._plot import (
@@ -13,6 +14,7 @@
     "train_test_split",
     "CrossValidationReport",
     "EstimatorReport",
+    "ComparisonReport",
     "RocCurveDisplay",
     "PrecisionRecallCurveDisplay",
     "PredictionErrorDisplay",

diff --git a/skore/src/skore/sklearn/_base.py b/skore/src/skore/sklearn/_base.py
@@ -124,7 +124,7 @@ def _get_attributes_for_help(self):
 
     def _create_help_tree(self):
         """Create a rich Tree with the available tools and accessor methods."""
-        tree = Tree("report")
+        tree = Tree(self.__class__.__name__)
 
         # Add accessor methods first
         for accessor_attr, config in self._ACCESSOR_CONFIG.items():

diff --git a/skore/src/skore/sklearn/_comparison/__init__.py b/skore/src/skore/sklearn/_comparison/__init__.py
@@ -0,0 +1,7 @@
+from skore.externals._pandas_accessors import _register_accessor
+from skore.sklearn._comparison.metrics_accessor import _MetricsAccessor
+from skore.sklearn._comparison.report import ComparisonReport
+
+_register_accessor("metrics", ComparisonReport)(_MetricsAccessor)
+
+__all__ = ["ComparisonReport"]