From 357d8e1752abce3f99cbcf500cd68c31abc26568 Mon Sep 17 00:00:00 2001 From: Martin Hirzel Date: Fri, 12 Aug 2022 16:30:24 -0400 Subject: [PATCH] =?UTF-8?q?In=2006=5Fmultiobj,=20trying=20to=20exit=20more?= =?UTF-8?q?=20cleanly,=20and=20using=20symmetric=20disp=E2=80=A6=20(#1164)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * In 06_multiobj, trying to exit more cleanly, and using symmetric disparate impact. In 05_bias, minor wording tweaks. Signed-off-by: Martin Hirzel * Trying sys.exit(0) to exit 06_multobj "successfully" during tests. Signed-off-by: Martin Hirzel * Using if-statements to skip instead of exiting. Signed-off-by: Martin Hirzel Signed-off-by: Martin Hirzel --- .github/workflows/build.yml | 8 +- examples/kdd22/05_bias.ipynb | 554 ++++++----- examples/kdd22/06_multobj.ipynb | 1641 ++++++++++++++++++------------- 3 files changed, 1251 insertions(+), 952 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 23671539f..305965451 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -320,7 +320,7 @@ jobs: python-version: 3.8 setup-target: '.[tutorial,test]' test-case: 'test/test_notebooks.py' - nbexcludes: '06_multobj.ipynb' + # nbexcludes: '06_multobj.ipynb' steps: - uses: actions/checkout@v2 @@ -377,17 +377,17 @@ jobs: python-version: 3.7 setup-target: '.[tutorial,test]' test-case: 'test/test_notebooks.py' - nbexcludes: '06_multobj.ipynb' + # nbexcludes: '06_multobj.ipynb' - dir: 'examples/kdd22' python-version: 3.8 setup-target: '.[tutorial,test]' test-case: 'test/test_notebooks.py' - nbexcludes: '06_multobj.ipynb' + # nbexcludes: '06_multobj.ipynb' - dir: 'examples/kdd22' python-version: 3.9 setup-target: '.[tutorial,test]' test-case: 'test/test_notebooks.py' - nbexcludes: '06_multobj.ipynb' + # nbexcludes: '06_multobj.ipynb' steps: - uses: actions/checkout@v2 diff --git a/examples/kdd22/05_bias.ipynb b/examples/kdd22/05_bias.ipynb index e5aed167c..8d9f73bdf 100644 --- a/examples/kdd22/05_bias.ipynb +++ b/examples/kdd22/05_bias.ipynb @@ -20,7 +20,7 @@ "distributed with Lale, and shows how to use them either with manual\n", "machine learning or with AutoML.\n", "\n", - "This notebook has following sections:\n", + "This notebook has the following sections:\n", "\n", "- [5.1 Datasets and Fairness Information](#5.1-Dataset-and-Fairness-Information)\n", "- [5.2 Metrics](#5.2-Metrics)\n", @@ -834,8 +834,8 @@ "privileged group.\n", "This is unfair; for instance, it falls short of the 80% rule in US Law.\n", "We can train a model that has less bias than the dataset.\n", - "For instance, a `DummyClassifier` ignores its input and always returns\n", - "the majority label." + "For instance, a [DummyClassifier](https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.dummy_classifier.html#lale.lib.sklearn.dummy_classifier.DummyClassifier)\n", + "ignores its input and always returns the majority label." ] }, { @@ -866,7 +866,7 @@ "The `DummyClassifier` is trivially perfectly fair: irrespective of\n", "whether or not an individual is a member of the privileged group, the\n", "predicted label is always `'good'`.\n", - "On the other hand it is poor at predicting the ground truth labels.\n", + "On the other hand, it is poor at predicting the ground truth labels.\n", "To better evaluate `DummyClassifier` as well as other models, we\n", "create a few more metrics directly from scikit-learn." ] @@ -966,8 +966,8 @@ " \n", " \n", " 0\n", - " 0.01\n", - " 0.06\n", + " 0.00\n", + " 0.02\n", " 1.00\n", " 0.50\n", " 0.00\n", @@ -977,7 +977,7 @@ " \n", " 1\n", " 0.00\n", - " 0.06\n", + " 0.02\n", " 1.00\n", " 0.50\n", " 0.00\n", @@ -987,7 +987,7 @@ " \n", " 2\n", " 0.00\n", - " 0.04\n", + " 0.02\n", " 1.00\n", " 0.50\n", " 0.00\n", @@ -997,7 +997,7 @@ " \n", " mean\n", " 0.00\n", - " 0.05\n", + " 0.02\n", " 1.00\n", " 0.50\n", " 0.00\n", @@ -1007,7 +1007,7 @@ " \n", " std\n", " 0.00\n", - " 0.01\n", + " 0.00\n", " 0.00\n", " 0.00\n", " 0.00\n", @@ -1020,11 +1020,11 @@ ], "text/plain": [ " fit_time score_time test_disparate impact test_balanced accuracy \\\n", - "0 0.01 0.06 1.00 0.50 \n", - "1 0.00 0.06 1.00 0.50 \n", - "2 0.00 0.04 1.00 0.50 \n", - "mean 0.00 0.05 1.00 0.50 \n", - "std 0.00 0.01 0.00 0.00 \n", + "0 0.00 0.02 1.00 0.50 \n", + "1 0.00 0.02 1.00 0.50 \n", + "2 0.00 0.02 1.00 0.50 \n", + "mean 0.00 0.02 1.00 0.50 \n", + "std 0.00 0.00 0.00 0.00 \n", "\n", " test_recall (unfavorable) test_recall (favorable) test_accuracy \n", "0 0.00 1.00 0.70 \n", @@ -1068,7 +1068,7 @@ "\n", "How well does a simple pipeline perform on this dataset if it does not\n", "use any algorithmic bias mitigators?\n", - "First, let's create a preprocessing pipeline to ensure all columns are\n", + "First, let's create a preprocessing sub-pipeline to ensure all columns are\n", "numeric." ] }, @@ -1083,72 +1083,79 @@ "\n", "\n", - "\n", "\n", - "\n", - "\n", + "\n", + "\n", "cluster:(root)\n", "\n", - "\n", + "\n", "\n", "\n", "\n", - "project_0\n", + "\n", + "project_0\n", "\n", - "\n", - "Project\n", + "\n", + "Project\n", "\n", "\n", "\n", "\n", - "concat_features\n", + "\n", + "concat_features\n", "\n", - "\n", - "Concat-\n", - "Features\n", + "\n", + "Concat-\n", + "Features\n", "\n", "\n", "\n", "\n", - "project_0->concat_features\n", - "\n", - "\n", + "\n", + "project_0->concat_features\n", + "\n", + "\n", "\n", "\n", - "project_1\n", + "\n", + "project_1\n", "\n", - "\n", - "Project\n", + "\n", + "Project\n", "\n", "\n", "\n", "\n", - "one_hot_encoder\n", + "\n", + "one_hot_encoder\n", "\n", - "\n", - "One-\n", - "Hot-\n", - "Encoder\n", + "\n", + "One-\n", + "Hot-\n", + "Encoder\n", "\n", "\n", "\n", "\n", - "project_1->one_hot_encoder\n", - "\n", - "\n", + "\n", + "project_1->one_hot_encoder\n", + "\n", + "\n", "\n", "\n", - "one_hot_encoder->concat_features\n", - "\n", - "\n", + "\n", + "one_hot_encoder->concat_features\n", + "\n", + "\n", "\n", "\n", "\n" ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -1215,39 +1222,39 @@ " \n", "
\n", " 0\n", - " 0.60\n", - " 0.13\n", - " 0.85\n", - " 0.69\n", - " 0.46\n", - " 0.92\n", + " 0.34\n", + " 0.07\n", + " 0.81\n", + " 0.68\n", + " 0.44\n", + " 0.93\n", " 0.78\n", "
\n", "
\n", " 1\n", - " 0.46\n", - " 0.19\n", - " 0.82\n", - " 0.67\n", - " 0.38\n", - " 0.95\n", - " 0.78\n", + " 0.24\n", + " 0.08\n", + " 0.68\n", + " 0.65\n", + " 0.36\n", + " 0.94\n", + " 0.77\n", "
\n", "
\n", " 2\n", - " 0.48\n", + " 0.32\n", " 0.13\n", - " 0.79\n", - " 0.64\n", - " 0.37\n", - " 0.91\n", - " 0.75\n", + " 0.77\n", + " 0.66\n", + " 0.40\n", + " 0.93\n", + " 0.77\n", "
\n", "
\n", " mean\n", - " 0.51\n", - " 0.15\n", - " 0.82\n", + " 0.30\n", + " 0.09\n", + " 0.75\n", " 0.67\n", " 0.40\n", " 0.93\n", @@ -1255,13 +1262,13 @@ "
\n", "
\n", " std\n", - " 0.08\n", - " 0.04\n", - " 0.03\n", - " 0.02\n", " 0.05\n", + " 0.03\n", + " 0.06\n", " 0.02\n", - " 0.02\n", + " 0.04\n", + " 0.01\n", + " 0.01\n", "
\n", "
\n", "\n", @@ -1269,18 +1276,18 @@ ], "text/plain": [ " fit_time score_time test_disparate impact test_balanced accuracy \\\n", - "0 0.60 0.13 0.85 0.69 \n", - "1 0.46 0.19 0.82 0.67 \n", - "2 0.48 0.13 0.79 0.64 \n", - "mean 0.51 0.15 0.82 0.67 \n", - "std 0.08 0.04 0.03 0.02 \n", + "0 0.34 0.07 0.81 0.68 \n", + "1 0.24 0.08 0.68 0.65 \n", + "2 0.32 0.13 0.77 0.66 \n", + "mean 0.30 0.09 0.75 0.67 \n", + "std 0.05 0.03 0.06 0.02 \n", "\n", " test_recall (unfavorable) test_recall (favorable) test_accuracy \n", - "0 0.46 0.92 0.78 \n", - "1 0.38 0.95 0.78 \n", - "2 0.37 0.91 0.75 \n", + "0 0.44 0.93 0.78 \n", + "1 0.36 0.94 0.77 \n", + "2 0.40 0.93 0.77 \n", "mean 0.40 0.93 0.77 \n", - "std 0.05 0.02 0.02 " + "std 0.04 0.01 0.01 " ] }, "execution_count": 14, @@ -1310,8 +1317,8 @@ "\n", "Lale provides several [bias mitigators](https://lale.readthedocs.io/en/latest/modules/lale.lib.aif360.html#pre-estimator-mitigation-operators) from AIF360.\n", "This notebook demonstrates three representative ones.\n", - "The first one, `GerryFairClassifier`, is an in-estimator mitigator,\n", - "meaning that it has its own built-in estimator.\n", + "The first one, [GerryFairClassifier](https://lale.readthedocs.io/en/latest/modules/lale.lib.aif360.gerry_fair_classifier.html#lale.lib.aif360.gerry_fair_classifier.GerryFairClassifier),\n", + "is an in-estimator mitigator, meaning that it has its own built-in estimator.\n", "We configure it with the previously defined `fairness_info` and\n", "`prefix`, which is a data preparation sub-pipeline." ] @@ -1354,8 +1361,8 @@ " \n", "
\n", " 0\n", - " 1.31\n", - " 0.39\n", + " 1.18\n", + " 0.32\n", " 0.76\n", " 0.74\n", " 0.71\n", @@ -1364,7 +1371,7 @@ "
\n", "
\n", " 1\n", - " 0.81\n", + " 2.00\n", " 0.33\n", " 0.63\n", " 0.65\n", @@ -1374,8 +1381,8 @@ "
\n", "
\n", " 2\n", - " 0.81\n", - " 0.31\n", + " 0.95\n", + " 0.44\n", " 0.64\n", " 0.70\n", " 0.52\n", @@ -1384,8 +1391,8 @@ "
\n", "
\n", " mean\n", - " 0.97\n", - " 0.34\n", + " 1.38\n", + " 0.36\n", " 0.68\n", " 0.70\n", " 0.54\n", @@ -1394,8 +1401,8 @@ "
\n", "
\n", " std\n", - " 0.29\n", - " 0.04\n", + " 0.55\n", + " 0.07\n", " 0.07\n", " 0.04\n", " 0.16\n", @@ -1408,11 +1415,11 @@ ], "text/plain": [ " fit_time score_time test_disparate impact test_balanced accuracy \\\n", - "0 1.31 0.39 0.76 0.74 \n", - "1 0.81 0.33 0.63 0.65 \n", - "2 0.81 0.31 0.64 0.70 \n", - "mean 0.97 0.34 0.68 0.70 \n", - "std 0.29 0.04 0.07 0.04 \n", + "0 1.18 0.32 0.76 0.74 \n", + "1 2.00 0.33 0.63 0.65 \n", + "2 0.95 0.44 0.64 0.70 \n", + "mean 1.38 0.36 0.68 0.70 \n", + "std 0.55 0.07 0.07 0.04 \n", "\n", " test_recall (unfavorable) test_recall (favorable) test_accuracy \n", "0 0.71 0.76 0.75 \n", @@ -1437,9 +1444,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The second example, `DisparateImpactRemover`, is a pre-estimator\n", - "mitigator, meaning that it transforms the data to make it more fair\n", - "before it is piped to an estimator." + "The second example, [DisparateImpactRemover](https://lale.readthedocs.io/en/latest/modules/lale.lib.aif360.disparate_impact_remover.html#lale.lib.aif360.disparate_impact_remover.DisparateImpactRemover),\n", + "is a pre-estimator mitigator, meaning that it transforms the data to\n", + "make it more fair before it is piped to an estimator." ] }, { @@ -1480,53 +1487,53 @@ " \n", "
\n", " 0\n", - " 0.88\n", - " 0.46\n", - " 0.94\n", - " 0.66\n", + " 0.59\n", + " 0.28\n", + " 0.99\n", + " 0.67\n", " 0.41\n", - " 0.91\n", - " 0.76\n", + " 0.92\n", + " 0.77\n", "
\n", "
\n", " 1\n", - " 0.89\n", - " 0.40\n", - " 0.93\n", - " 0.67\n", - " 0.37\n", - " 0.97\n", - " 0.79\n", + " 0.49\n", + " 0.24\n", + " 0.92\n", + " 0.64\n", + " 0.35\n", + " 0.94\n", + " 0.76\n", "
\n", "
\n", " 2\n", - " 0.83\n", - " 0.45\n", - " 0.91\n", + " 0.53\n", + " 0.23\n", + " 1.02\n", " 0.65\n", - " 0.38\n", - " 0.93\n", - " 0.76\n", + " 0.37\n", + " 0.94\n", + " 0.77\n", "
\n", "
\n", " mean\n", - " 0.87\n", - " 0.44\n", - " 0.93\n", - " 0.66\n", - " 0.39\n", + " 0.54\n", + " 0.25\n", + " 0.97\n", + " 0.65\n", + " 0.38\n", " 0.93\n", - " 0.77\n", + " 0.76\n", "
\n", "
\n", " std\n", + " 0.05\n", " 0.03\n", - " 0.03\n", - " 0.02\n", + " 0.05\n", " 0.01\n", - " 0.02\n", " 0.03\n", " 0.01\n", + " 0.00\n", "
\n", "
\n", "\n", @@ -1534,18 +1541,18 @@ ], "text/plain": [ " fit_time score_time test_disparate impact test_balanced accuracy \\\n", - "0 0.88 0.46 0.94 0.66 \n", - "1 0.89 0.40 0.93 0.67 \n", - "2 0.83 0.45 0.91 0.65 \n", - "mean 0.87 0.44 0.93 0.66 \n", - "std 0.03 0.03 0.02 0.01 \n", + "0 0.59 0.28 0.99 0.67 \n", + "1 0.49 0.24 0.92 0.64 \n", + "2 0.53 0.23 1.02 0.65 \n", + "mean 0.54 0.25 0.97 0.65 \n", + "std 0.05 0.03 0.05 0.01 \n", "\n", " test_recall (unfavorable) test_recall (favorable) test_accuracy \n", - "0 0.41 0.91 0.76 \n", - "1 0.37 0.97 0.79 \n", - "2 0.38 0.93 0.76 \n", - "mean 0.39 0.93 0.77 \n", - "std 0.02 0.03 0.01 " + "0 0.41 0.92 0.77 \n", + "1 0.35 0.94 0.76 \n", + "2 0.37 0.94 0.77 \n", + "mean 0.38 0.93 0.76 \n", + "std 0.03 0.01 0.00 " ] }, "execution_count": 16, @@ -1563,8 +1570,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The third example, `CalibratedEqOddsPostprocessing`, is a\n", - "post-estimator mitigator, meaning that it transforms the predictions\n", + "The third example, [CalibratedEqOddsPostprocessing](https://lale.readthedocs.io/en/latest/modules/lale.lib.aif360.calibrated_eq_odds_postprocessing.html#lale.lib.aif360.calibrated_eq_odds_postprocessing.CalibratedEqOddsPostprocessing),\n", + "is a post-estimator mitigator, meaning that it transforms the predictions\n", "after they are returned from an estimator." ] }, @@ -1606,53 +1613,53 @@ " \n", "
\n", " 0\n", - " 0.83\n", - " 0.45\n", - " 0.89\n", - " 0.60\n", - " 0.52\n", - " 0.68\n", + " 0.54\n", + " 0.21\n", + " 0.91\n", " 0.63\n", + " 0.58\n", + " 0.67\n", + " 0.64\n", "
\n", "
\n", " 1\n", - " 0.72\n", - " 0.36\n", - " 0.91\n", + " 0.46\n", + " 0.29\n", + " 0.88\n", " 0.62\n", " 0.56\n", - " 0.67\n", - " 0.64\n", + " 0.68\n", + " 0.65\n", "
\n", "
\n", " 2\n", - " 0.96\n", - " 0.30\n", - " 0.88\n", - " 0.61\n", - " 0.55\n", - " 0.67\n", - " 0.64\n", + " 0.39\n", + " 0.21\n", + " 0.84\n", + " 0.60\n", + " 0.52\n", + " 0.68\n", + " 0.63\n", "
\n", "
\n", " mean\n", - " 0.84\n", - " 0.37\n", - " 0.89\n", - " 0.61\n", - " 0.54\n", + " 0.46\n", + " 0.24\n", + " 0.88\n", + " 0.62\n", + " 0.55\n", " 0.68\n", " 0.64\n", "
\n", "
\n", " std\n", - " 0.12\n", - " 0.07\n", + " 0.08\n", + " 0.05\n", + " 0.04\n", + " 0.01\n", + " 0.03\n", " 0.01\n", " 0.01\n", - " 0.02\n", - " 0.00\n", - " 0.00\n", "
\n", "
\n", "\n", @@ -1660,18 +1667,18 @@ ], "text/plain": [ " fit_time score_time test_disparate impact test_balanced accuracy \\\n", - "0 0.83 0.45 0.89 0.60 \n", - "1 0.72 0.36 0.91 0.62 \n", - "2 0.96 0.30 0.88 0.61 \n", - "mean 0.84 0.37 0.89 0.61 \n", - "std 0.12 0.07 0.01 0.01 \n", + "0 0.54 0.21 0.91 0.63 \n", + "1 0.46 0.29 0.88 0.62 \n", + "2 0.39 0.21 0.84 0.60 \n", + "mean 0.46 0.24 0.88 0.62 \n", + "std 0.08 0.05 0.04 0.01 \n", "\n", " test_recall (unfavorable) test_recall (favorable) test_accuracy \n", - "0 0.52 0.68 0.63 \n", - "1 0.56 0.67 0.64 \n", - "2 0.55 0.67 0.64 \n", - "mean 0.54 0.68 0.64 \n", - "std 0.02 0.00 0.00 " + "0 0.58 0.67 0.64 \n", + "1 0.56 0.68 0.65 \n", + "2 0.52 0.68 0.63 \n", + "mean 0.55 0.68 0.64 \n", + "std 0.03 0.01 0.01 " ] }, "execution_count": 17, @@ -1736,7 +1743,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "100%|████████| 10/10 [00:41<00:00, 4.19s/trial, best loss: -0.7634963346619189]\n" + "100%|████████| 10/10 [00:28<00:00, 2.88s/trial, best loss: -0.7595250541065992]\n" ] } ], @@ -1768,97 +1775,108 @@ "\n", "\n", - "\n", "\n", - "\n", + "\n", "\n", "cluster:(root)\n", - "\n", - "cluster:(root)\n", - "\n", - "\n", - "PostMit\n", + "\n", + "\n", + "cluster:(root)\n", + "\n", + "\n", + "PostMit\n", "\n", "\n", "\n", - "cluster:pipeline\n", + "\n", + "cluster:pipeline\n", "\n", - "\n", + "\n", "\n", "\n", "\n", "\n", - "project\n", + "\n", + "project\n", "\n", - "\n", - "Project\n", + "\n", + "Project\n", "\n", "\n", "\n", "\n", - "concat_features\n", + "\n", + "concat_features\n", "\n", - "\n", - "Concat-\n", - "Features\n", + "\n", + "Concat-\n", + "Features\n", "\n", "\n", "\n", "\n", - "project->concat_features\n", - "\n", - "\n", + "\n", + "project->concat_features\n", + "\n", + "\n", "\n", "\n", - "project_0\n", + "\n", + "project_0\n", "\n", - "\n", - "Project\n", + "\n", + "Project\n", "\n", "\n", "\n", "\n", - "one_hot_encoder\n", + "\n", + "one_hot_encoder\n", "\n", - "\n", - "One-\n", - "Hot-\n", - "Encoder\n", + "\n", + "One-\n", + "Hot-\n", + "Encoder\n", "\n", "\n", "\n", "\n", - "project_0->one_hot_encoder\n", - "\n", - "\n", + "\n", + "project_0->one_hot_encoder\n", + "\n", + "\n", "\n", "\n", - "one_hot_encoder->concat_features\n", - "\n", - "\n", + "\n", + "one_hot_encoder->concat_features\n", + "\n", + "\n", "\n", "\n", - "random_forest_classifier\n", - "\n", - "\n", - "Random-\n", - "Forest-\n", - "Classifier\n", + "\n", + "random_forest_classifier\n", + "\n", + "\n", + "Random-\n", + "Forest-\n", + "Classifier\n", "\n", "\n", "\n", "\n", - "concat_features->random_forest_classifier\n", - "\n", - "\n", + "\n", + "concat_features->random_forest_classifier\n", + "\n", + "\n", "\n", "\n", "\n" ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -1897,53 +1915,53 @@ " \n", "
\n", " 0\n", - " 0.99\n", - " 0.45\n", - " 0.96\n", - " 0.59\n", - " 0.50\n", - " 0.68\n", - " 0.63\n", + " 0.30\n", + " 0.18\n", + " 1.00\n", + " 0.51\n", + " 0.31\n", + " 0.72\n", + " 0.60\n", "
\n", "
\n", " 1\n", - " 0.80\n", - " 0.42\n", - " 1.02\n", - " 0.55\n", - " 0.40\n", - " 0.70\n", - " 0.61\n", + " 0.38\n", + " 0.17\n", + " 1.00\n", + " 0.52\n", + " 0.32\n", + " 0.73\n", + " 0.60\n", "
\n", "
\n", " 2\n", - " 0.71\n", - " 0.37\n", - " 1.07\n", - " 0.59\n", - " 0.48\n", - " 0.69\n", - " 0.63\n", + " 0.27\n", + " 0.16\n", + " 1.00\n", + " 0.52\n", + " 0.32\n", + " 0.72\n", + " 0.60\n", "
\n", "
\n", " mean\n", - " 0.84\n", - " 0.41\n", - " 1.02\n", - " 0.58\n", - " 0.46\n", - " 0.69\n", - " 0.62\n", + " 0.32\n", + " 0.17\n", + " 1.00\n", + " 0.52\n", + " 0.32\n", + " 0.72\n", + " 0.60\n", "
\n", "
\n", " std\n", - " 0.14\n", - " 0.04\n", - " 0.05\n", - " 0.02\n", - " 0.05\n", + " 0.06\n", " 0.01\n", + " 0.00\n", + " 0.00\n", " 0.01\n", + " 0.00\n", + " 0.00\n", "
\n", "
\n", "\n", @@ -1951,18 +1969,18 @@ ], "text/plain": [ " fit_time score_time test_disparate impact test_balanced accuracy \\\n", - "0 0.99 0.45 0.96 0.59 \n", - "1 0.80 0.42 1.02 0.55 \n", - "2 0.71 0.37 1.07 0.59 \n", - "mean 0.84 0.41 1.02 0.58 \n", - "std 0.14 0.04 0.05 0.02 \n", + "0 0.30 0.18 1.00 0.51 \n", + "1 0.38 0.17 1.00 0.52 \n", + "2 0.27 0.16 1.00 0.52 \n", + "mean 0.32 0.17 1.00 0.52 \n", + "std 0.06 0.01 0.00 0.00 \n", "\n", " test_recall (unfavorable) test_recall (favorable) test_accuracy \n", - "0 0.50 0.68 0.63 \n", - "1 0.40 0.70 0.61 \n", - "2 0.48 0.69 0.63 \n", - "mean 0.46 0.69 0.62 \n", - "std 0.05 0.01 0.01 " + "0 0.31 0.72 0.60 \n", + "1 0.32 0.73 0.60 \n", + "2 0.32 0.72 0.60 \n", + "mean 0.32 0.72 0.60 \n", + "std 0.01 0.00 0.00 " ] }, "execution_count": 20, @@ -2025,7 +2043,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" + "version": "3.7.13" } }, "nbformat": 4, diff --git a/examples/kdd22/06_multobj.ipynb b/examples/kdd22/06_multobj.ipynb index 48ac1e708..2b95cf054 100644 --- a/examples/kdd22/06_multobj.ipynb +++ b/examples/kdd22/06_multobj.ipynb @@ -8,7 +8,11 @@ "\n", "# 6. Multi-objective AutoML with Lale\n", "\n", - "In this notebook, we will present how we can perform a multi-objective refinement of a pipeline found by AutoML utilizing a multi-objective optimizer. First, we will find a pipeline via single-objective AutoML. Then we will optimize the final estimator in the pipeline with a multi-objective optimizer to get a Pareto-front of pipelines.\n", + "This notebook presents how we can perform a multi-objective refinement of a pipeline found by AutoML utilizing a multi-objective optimizer.\n", + "First, we will find a pipeline via single-objective AutoML.\n", + "Then we will optimize the final estimator in the pipeline with a multi-objective optimizer to get a Pareto-front of pipelines.\n", + "\n", + "This notebook has the following sections:\n", "\n", "- [6.1 Pre-requisites](#6.1-Pre-requisites)\n", "- [6.2 Dataset](#6.1-Dataset)\n", @@ -31,7 +35,12 @@ "pip install git+https://github.com/IBM/lale-gpl.git@master\n", "\n", "```\n", - "- `Platypus-opt==1.0.4`: This package provides an implementation of the multi-objective **NSGA2** algorithm." + "- `Platypus-opt==1.0.4`: This package provides an implementation of the multi-objective **NSGA2** algorithm.\n", + "\n", + "```\n", + "pip install Platypus-opt==1.0.4\n", + "\n", + "```" ] }, { @@ -79,7 +88,7 @@ "text/html": [ "\n", - "
\n", + "
\n", " \n", " \n", " \n", @@ -108,202 +117,202 @@ " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", "
 
359bad<030.000000existing paidfurniture/equipment2406.000000<1004<=X<74.000000female div/dep/marnone4.000000real estate23.000000nonerent1.000000skilled1.000000noneyes359bad<030.000000existing paidfurniture/equipment2406.000000<1004<=X<74.000000female div/dep/marnone4.000000real estate23.000000nonerent1.000000skilled1.000000noneyes
707bad0<=X<20012.000000no credits/all paidfurniture/equipment2969.000000<100<14.000000female div/dep/marnone3.000000life insurance25.000000nonerent2.000000skilled1.000000noneyes707bad0<=X<20012.000000no credits/all paidfurniture/equipment2969.000000<100<14.000000female div/dep/marnone3.000000life insurance25.000000nonerent2.000000skilled1.000000noneyes
763badno checking21.000000critical/other existing creditnew car12680.000000no known savings>=74.000000male singlenone4.000000no known property30.000000nonefor free1.000000high qualif/self emp/mgmt1.000000yesyes763badno checking21.000000critical/other existing creditnew car12680.000000no known savings>=74.000000male singlenone4.000000no known property30.000000nonefor free1.000000high qualif/self emp/mgmt1.000000yesyes
835bad<012.000000no credits/all paidnew car1082.000000<1001<=X<44.000000male singlenone4.000000car48.000000bankown2.000000skilled1.000000noneyes835bad<012.000000no credits/all paidnew car1082.000000<1001<=X<44.000000male singlenone4.000000car48.000000bankown2.000000skilled1.000000noneyes
192bad0<=X<20027.000000existing paidbusiness3915.000000<1001<=X<44.000000male singlenone2.000000car36.000000noneown1.000000skilled2.000000yesyes192bad0<=X<20027.000000existing paidbusiness3915.000000<1001<=X<44.000000male singlenone2.000000car36.000000noneown1.000000skilled2.000000yesyes
629goodno checking9.000000existing paideducation3832.000000no known savings>=71.000000male singlenone4.000000real estate64.000000noneown1.000000unskilled resident1.000000noneyes629goodno checking9.000000existing paideducation3832.000000no known savings>=71.000000male singlenone4.000000real estate64.000000noneown1.000000unskilled resident1.000000noneyes
559bad0<=X<20018.000000critical/other existing creditfurniture/equipment1928.000000<100<12.000000male singlenone2.000000real estate31.000000noneown2.000000unskilled resident1.000000noneyes559bad0<=X<20018.000000critical/other existing creditfurniture/equipment1928.000000<100<12.000000male singlenone2.000000real estate31.000000noneown2.000000unskilled resident1.000000noneyes
684good0<=X<20036.000000delayed previouslybusiness9857.000000100<=X<5004<=X<71.000000male singlenone3.000000life insurance31.000000noneown2.000000unskilled resident2.000000yesyes684good0<=X<20036.000000delayed previouslybusiness9857.000000100<=X<5004<=X<71.000000male singlenone3.000000life insurance31.000000noneown2.000000unskilled resident2.000000yesyes
\n" ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -325,7 +334,7 @@ "source": [ "## 6.3 Single-objective AutoML\n", "\n", - "In this subsection, we will create a small pipeline search space and auto-configure the pipeline by optimizing for the balanced accuracy." + "In this subsection, we will create a small pipeline search space and auto-configure the pipeline by optimizing for the [balanced accuracy](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.balanced_accuracy_score.html)." ] }, { @@ -355,30 +364,30 @@ "\n", "\n", - "\n", "\n", - "\n", + "\n", "\n", "cluster:(root)\n", "\n", - "\n", + "\n", "\n", "\n", "\n", "cluster:choice_0\n", "\n", - "\n", - "Choice\n", + "\n", + "Choice\n", "\n", "\n", "\n", "\n", "cluster:choice_1\n", "\n", - "\n", - "Choice\n", + "\n", + "Choice\n", "\n", "\n", "\n", @@ -386,8 +395,8 @@ "\n", "project_0\n", "\n", - "\n", - "Project\n", + "\n", + "Project\n", "\n", "\n", "\n", @@ -395,56 +404,56 @@ "\n", "simple_imputer\n", "\n", - "\n", - "Simple-\n", - "Imputer\n", + "\n", + "Simple-\n", + "Imputer\n", "\n", "\n", "\n", "\n", "\n", "project_0->simple_imputer\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n", "norm\n", "\n", - "\n", - "Norm\n", + "\n", + "Norm\n", "\n", "\n", "\n", "\n", "\n", "simple_imputer->norm\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n", "concat_features\n", "\n", - "\n", - "Concat-\n", - "Features\n", + "\n", + "Concat-\n", + "Features\n", "\n", "\n", "\n", "\n", "\n", "norm->concat_features\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n", "no_op\n", "\n", - "\n", - "No-\n", - "Op\n", + "\n", + "No-\n", + "Op\n", "\n", "\n", "\n", @@ -452,8 +461,8 @@ "\n", "project_1\n", "\n", - "\n", - "Project\n", + "\n", + "Project\n", "\n", "\n", "\n", @@ -461,46 +470,46 @@ "\n", "one_hot\n", "\n", - "\n", - "One-\n", - "Hot\n", + "\n", + "One-\n", + "Hot\n", "\n", "\n", "\n", "\n", "\n", "project_1->one_hot\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n", "one_hot->concat_features\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n", "rf\n", "\n", - "\n", - "RF\n", + "\n", + "RF\n", "\n", "\n", "\n", "\n", "\n", "concat_features->rf\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n", "xg_boost\n", "\n", - "\n", - "XG-\n", - "Boost\n", + "\n", + "XG-\n", + "Boost\n", "\n", "\n", "\n", @@ -508,7 +517,7 @@ "\n" ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -519,10 +528,13 @@ "project_nums = Project(columns={'type': 'number'})\n", "project_cats = Project(columns={'type': 'string'})\n", "planned_pipeline = (\n", - " (project_nums >> SimpleImputer >> (Norm | NoOp) & project_cats >> OneHot(handle_unknown='ignore'))\n", + " (\n", + " (project_nums >> SimpleImputer >> (Norm | NoOp))\n", + " & (project_cats >> OneHot(handle_unknown='ignore'))\n", + " )\n", " >> ConcatFeatures\n", - " >> (RF | XGBoost(objective='binary:hinge')))\n", - "\n", + " >> (RF | XGBoost(objective='binary:hinge'))\n", + ")\n", "planned_pipeline.visualize()" ] }, @@ -535,7 +547,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:05<00:00, 1.02s/trial, best loss: -0.6405579399141631]\n", + "100%|██████████| 5/5 [00:06<00:00, 1.23s/trial, best loss: -0.6405579399141631]\n", "balanced accuracy 68.8%\n" ] } @@ -566,23 +578,23 @@ "\n", "\n", - "\n", "\n", - "\n", - "\n", + "\n", + "\n", "cluster:(root)\n", "\n", - "\n", + "\n", "\n", "\n", "\n", "\n", "project_0\n", "\n", - "\n", - "Project\n", + "\n", + "Project\n", "\n", "\n", "\n", @@ -590,56 +602,56 @@ "\n", "simple_imputer\n", "\n", - "\n", - "Simple-\n", - "Imputer\n", + "\n", + "Simple-\n", + "Imputer\n", "\n", "\n", "\n", "\n", "\n", "project_0->simple_imputer\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n", "no_op\n", "\n", - "\n", - "No-\n", - "Op\n", + "\n", + "No-\n", + "Op\n", "\n", "\n", "\n", "\n", "\n", "simple_imputer->no_op\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n", "concat_features\n", "\n", - "\n", - "Concat-\n", - "Features\n", + "\n", + "Concat-\n", + "Features\n", "\n", "\n", "\n", "\n", "\n", "no_op->concat_features\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n", "project_1\n", "\n", - "\n", - "Project\n", + "\n", + "Project\n", "\n", "\n", "\n", @@ -647,45 +659,45 @@ "\n", "one_hot\n", "\n", - "\n", - "One-\n", - "Hot\n", + "\n", + "One-\n", + "Hot\n", "\n", "\n", "\n", "\n", "\n", "project_1->one_hot\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n", "one_hot->concat_features\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n", "xg_boost\n", "\n", - "\n", - "XG-\n", - "Boost\n", + "\n", + "XG-\n", + "Boost\n", "\n", "\n", "\n", "\n", "\n", "concat_features->xg_boost\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n" ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -702,9 +714,8 @@ "source": [ "## 6.4 Specifying Objectives for Multi-objective AutoML\n", "\n", - "While we obtaine a pipeline by optimizing for balanced accuracy, there might be an application where we wish to simultaneously optimize for balanced accuracy and false positive rate, and select a pipeline from the set of pipelines with the best tradeoff between balanced accuracy and false positive rate.\n", - "\n", - "Here we define a `sklearn` scorer for false positive rate and evaluate the multiple objectives or scores (balanced accuracy and false positive rate) of the pipeline found above." + "While we obtained a pipeline by optimizing for balanced accuracy, there might be an application where we wish to simultaneously optimize for balanced accuracy and [symmetric disparate impact](https://lale.readthedocs.io/en/latest/modules/lale.lib.aif360.util.html#lale.lib.aif360.util.symmetric_disparate_impact), and select a pipeline from the set of pipelines with the best tradeoff between those two.\n", + "Here we define a scikit-learn scorer for symmetric disparate impact and evaluate the multiple objectives or scores (balanced accuracy and symmetric disparate impact) of the pipeline found above." ] }, { @@ -712,35 +723,42 @@ "execution_count": 7, "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:root:No module named 'tensorflow': AdversarialDebiasing will be unavailable. To install, run:\n", + "pip install 'aif360[AdversarialDebiasing]'\n", + "WARNING:root:No module named 'numba': LFR will be unavailable. To install, run:\n", + "pip install 'aif360[LFR]'\n" + ] + }, { "name": "stdout", "output_type": "stream", "text": [ - "Metric values using single obj best pipeline 0.688 0.479\n" + "Metric values using single-objective best pipeline: 0.688, 0.848\n" ] } ], "source": [ "from sklearn.metrics import make_scorer, get_scorer\n", + "from lale.lib.aif360 import symmetric_disparate_impact\n", "\n", - "# Define sklearn scorer for computing False Positive Rate (FPR)\n", - "def compute_fpr(y_true, y_pred):\n", - " from sklearn.metrics import confusion_matrix\n", - " tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()\n", - " fpr = round(fp / (fp + tn), 4)\n", - " return fpr\n", - "\n", - "fpr_scorer = make_scorer(compute_fpr, greater_is_better=False)\n", + "# Only use age, because personal status changes after one-hot encoding\n", + "di_scorer = symmetric_disparate_impact(\n", + " favorable_labels=['good'],\n", + " protected_attributes=[{'feature': 'age', 'reference_group': [[26, 1000]]}]\n", + ")\n", "\n", - "# Specify scoring params passed to Multi-objective Optimizer\n", - "scoring = ['balanced_accuracy', fpr_scorer] \n", - "best_score = [1, 0]\n", + "# Specify scoring arguments passed to Multi-objective Optimizer\n", + "scoring = ['balanced_accuracy', di_scorer] \n", + "best_score = [1, 1]\n", "\n", "# Let's also get scorers to compute the metrics value on optimizer's outputs\n", - "scorer = [get_scorer(scorer) for scorer in scoring]\n", - "print('Metric values using single obj best pipeline %.3f %.3f' \\\n", - " %(scorer[0](auto_trained, test_X, test_y), \\\n", - " -scorer[1](auto_trained, test_X, test_y)))" + "scorers = [get_scorer(scorer) for scorer in scoring]\n", + "print(\"Metric values using single-objective best pipeline: \" +\n", + " \", \".join(f\"{s(auto_trained, test_X, test_y):.3f}\" for s in scorers))" ] }, { @@ -760,11 +778,14 @@ "metadata": {}, "outputs": [], "source": [ + "from lale.lib.lale import OptimizeLast\n", "try:\n", " from lalegpl.lib.lale import NSGA2\n", + " lalegpl_installed = True\n", "except ImportError:\n", - " raise SystemExit('lale-gpl not installed, stopping execution here!')\n", - "from lale.lib.lale import OptimizeLast" + " lalegpl_installed = False\n", + " import sys\n", + " print(\"lale-gpl not installed, the remaining notebook will be skipped\", file=sys.stderr)" ] }, { @@ -782,21 +803,25 @@ "metadata": {}, "outputs": [], "source": [ - "# Parameters used when creating instance of Multi-objective optimizer \n", - "optimizer_args = {\n", - " 'scoring' : scoring,\n", - " 'best_score': best_score,\n", - " 'cv' : 2,\n", - " 'max_evals' : 40,\n", - " 'population_size' : 15,\n", - "}\n", - "\n", - "# Create operator for performing MOO on final estimator of best pipeline from single obj optimizer \n", - "optFinalEstimator = OptimizeLast(\n", - " estimator = auto_trained,\n", - " last_optimizer=NSGA2,\n", - " optimizer_args = optimizer_args\n", - ")" + "if lalegpl_installed:\n", + " optFinalEstimator = OptimizeLast(\n", + " estimator=auto_trained,\n", + " last_optimizer=NSGA2,\n", + " optimizer_args={\n", + " 'scoring' : scoring,\n", + " 'best_score': best_score,\n", + " 'cv' : 2,\n", + " 'max_evals' : 40,\n", + " 'population_size' : 15,\n", + " }\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The `OptimizeLast` operator has a `fit` method that executes the multi-objective AutoML." ] }, { @@ -804,41 +829,140 @@ "execution_count": 10, "metadata": {}, "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 1min 35s, sys: 173 ms, total: 1min 35s\n", + "Wall time: 1min 35s\n" + ] + } + ], + "source": [ + "%%time\n", + "if lalegpl_installed:\n", + " opt_trained = optFinalEstimator.fit(train_X, train_y)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note that the above optimization does not return a single pipeline but rather a set of Pareto-optimal pipelines that span the tradeoff between the multiple objectives (balanced accuracy and false positive rate in this case).\n", + "Calling `predict` on the `opt_trained` object uses the first pipeline found on the Pareto-front to generate the predictions. " + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "balanced accuracy 50.0%\n" + ] + } + ], + "source": [ + "if lalegpl_installed:\n", + " pred_y = opt_trained.predict(test_X)\n", + " acc = sklearn.metrics.balanced_accuracy_score(test_y, pred_y)\n", + " print(f'balanced accuracy {acc:.1%}')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can also access specific pipelines from the Pareto front with the `get_pipeline` function, where we can specify a string name; by default `get_pipeline` returns the first pipeline.\n", + "Here we evaluate the multiple objectives for the first pipeline from the Pareto-front" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Metric values using the first multi-objective pareto pipeline: 0.500, 1.000\n" + ] + } + ], + "source": [ + "if lalegpl_installed:\n", + " pareto_pipeline = opt_trained.get_pipeline()\n", + " print(\"Metric values using the first multi-objective pareto pipeline: \" +\n", + " \", \".join(f\"{s(pareto_pipeline, test_X, test_y):.3f}\" for s in scorers))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 6.6 Pareto-front of Multi-objective AutoML\n", + "\n", + "We can also view the complete Pareto-front of pipelines found with the multi-objective AutoML with the `summary` function." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "if lalegpl_installed:\n", + " opt_trained.summary()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here we visualize the different pipelines and their hyperparameters on the Pareto-front." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Pareto optimal pipeline p0:\n" + ] + }, { "data": { "image/svg+xml": [ "\n", "\n", - "\n", "\n", - "\n", - "\n", - "cluster:(root)\n", - "\n", - "\n", + "\n", + "\n", "cluster:(root)\n", - "\n", - "\n", - "OptimizeLast\n", - "\n", - "\n", - "\n", - "\n", - "cluster:pipeline\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", - "\n", - "\n", + "\n", "\n", - "project\n", - "\n", - "\n", - "Project\n", + "project_0\n", + "\n", + "\n", + "Project\n", "\n", "\n", "\n", @@ -846,56 +970,56 @@ "\n", "simple_imputer\n", "\n", - "\n", - "Simple-\n", - "Imputer\n", + "\n", + "Simple-\n", + "Imputer\n", "\n", "\n", "\n", - "\n", + "\n", "\n", - "project->simple_imputer\n", - "\n", - "\n", + "project_0->simple_imputer\n", + "\n", + "\n", "\n", "\n", "\n", "no_op\n", "\n", - "\n", - "No-\n", - "Op\n", + "\n", + "No-\n", + "Op\n", "\n", "\n", "\n", "\n", "\n", "simple_imputer->no_op\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n", "concat_features\n", "\n", - "\n", - "Concat-\n", - "Features\n", + "\n", + "Concat-\n", + "Features\n", "\n", "\n", "\n", "\n", "\n", "no_op->concat_features\n", - "\n", - "\n", + "\n", + "\n", "\n", - "\n", + "\n", "\n", - "project_0\n", - "\n", - "\n", - "Project\n", + "project_1\n", + "\n", + "\n", + "Project\n", "\n", "\n", "\n", @@ -903,249 +1027,403 @@ "\n", "one_hot\n", "\n", - "\n", - "One-\n", - "Hot\n", + "\n", + "One-\n", + "Hot\n", "\n", "\n", "\n", - "\n", + "\n", "\n", - "project_0->one_hot\n", - "\n", - "\n", + "project_1->one_hot\n", + "\n", + "\n", "\n", "\n", "\n", "one_hot->concat_features\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n", "xg_boost\n", - "\n", - "\n", - "XG-\n", - "Boost\n", + "\n", + "\n", + "XG-\n", + "Boost\n", "\n", "\n", "\n", "\n", "\n", "concat_features->xg_boost\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "nsga2\n", - "\n", - "\n", - "NSGA2\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n" ], "text/plain": [ - "" + "" ] }, "metadata": {}, "output_type": "display_data" - } - ], - "source": [ - "optFinalEstimator.visualize()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The `OptimizeLast` operator has the `fit` operator that executes the multi-objective AutoML." - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ + }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "CPU times: user 1min 11s, sys: 149 ms, total: 1min 12s\n", - "Wall time: 1min 12s\n" - ] - } - ], - "source": [ - "%%time\n", - "opt_trained = optFinalEstimator.fit(train_X, train_y)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Note that the above optimization does not return a single pipeline but rather a set of Pareto-optimal pipelines that span the tradeoff between the multiple objectives (balanced accuracy and false positive rate in this case).\n", - "\n", - "We can still call `predict` on the `opt_trained` object and that uses the first pipeline found on the Pareto-front to generate the predictions. " - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ + "data": { + "text/markdown": [ + "```python\n", + "pipeline = XGBoost(\n", + " objective=\"binary:hinge\",\n", + " gamma=0.859189857095001,\n", + " learning_rate=0.10974219876135785,\n", + " max_depth=4,\n", + " min_child_weight=9,\n", + " n_estimators=435,\n", + " reg_alpha=0.35061976504895453,\n", + " reg_lambda=0.2720394745576489,\n", + " subsample=0.03310406188598723,\n", + ")\n", + "```" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "name": "stdout", "output_type": "stream", "text": [ - "balanced accuracy 67.3%\n" + "Pareto optimal pipeline p1:\n" ] - } - ], - "source": [ - "# Currently predict uses first pipeline in pareto-front\n", - "pred_y = opt_trained.predict(test_X)\n", - "acc = sklearn.metrics.balanced_accuracy_score(test_y, pred_y)\n", - "print(f'balanced accuracy {acc:.1%}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We can also access specific pipelines from the Pareto front with the `get_pipeline` function where we can specify a string name; by default `get_pipeline` returns the first pipeline.\n", - "\n", - "Here we evaluate the multiple objectives for the first pipeline from the Pareto-front" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ + }, + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "cluster:(root)\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "project_0\n", + "\n", + "\n", + "Project\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "simple_imputer\n", + "\n", + "\n", + "Simple-\n", + "Imputer\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "project_0->simple_imputer\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "no_op\n", + "\n", + "\n", + "No-\n", + "Op\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "simple_imputer->no_op\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "concat_features\n", + "\n", + "\n", + "Concat-\n", + "Features\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "no_op->concat_features\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "project_1\n", + "\n", + "\n", + "Project\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "one_hot\n", + "\n", + "\n", + "One-\n", + "Hot\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "project_1->one_hot\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "one_hot->concat_features\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "xg_boost\n", + "\n", + "\n", + "XG-\n", + "Boost\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "concat_features->xg_boost\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "```python\n", + "pipeline = XGBoost(\n", + " objective=\"binary:hinge\",\n", + " gamma=0.2530292257269834,\n", + " learning_rate=0.7562590526098265,\n", + " max_depth=6,\n", + " min_child_weight=14,\n", + " n_estimators=538,\n", + " reg_alpha=0.9097217358808588,\n", + " reg_lambda=0.6764854766703405,\n", + " subsample=0.44405647669435916,\n", + ")\n", + "```" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "name": "stdout", "output_type": "stream", "text": [ - "scorer values 0.673 0.448\n" + "Pareto optimal pipeline p2:\n" ] - } - ], - "source": [ - "# get_pipeline() currently returns first pipeline by default\n", - "pareto_pipeline = opt_trained.get_pipeline()\n", - "\n", - "print('scorer values %.3f %.3f' \\\n", - " %(scorer[0](pareto_pipeline, test_X, test_y), \\\n", - " -scorer[1](pareto_pipeline, test_X, test_y)))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 6.6 Pareto-front of Multi-objective AutoML\n", - "\n", - "We can also view the complete Pareto-front of pipelines found with the multi-objective AutoML with the `summary` function." - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ + }, { "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idloss1loss2
name
p000.3565280.5049
p110.3721700.4804
\n", - "
" + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "cluster:(root)\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "project_0\n", + "\n", + "\n", + "Project\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "simple_imputer\n", + "\n", + "\n", + "Simple-\n", + "Imputer\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "project_0->simple_imputer\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "no_op\n", + "\n", + "\n", + "No-\n", + "Op\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "simple_imputer->no_op\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "concat_features\n", + "\n", + "\n", + "Concat-\n", + "Features\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "no_op->concat_features\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "project_1\n", + "\n", + "\n", + "Project\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "one_hot\n", + "\n", + "\n", + "One-\n", + "Hot\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "project_1->one_hot\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "one_hot->concat_features\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "xg_boost\n", + "\n", + "\n", + "XG-\n", + "Boost\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "concat_features->xg_boost\n", + "\n", + "\n", + "\n", + "\n", + "\n" ], "text/plain": [ - " id loss1 loss2\n", - "name \n", - "p0 0 0.356528 0.5049\n", - "p1 1 0.372170 0.4804" + "" ] }, - "execution_count": 14, "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Call summary() routine to list the pareto-optimal solutions with corresponding loss values\n", - "opt_trained.summary()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Here we visualize the different pipelines and their hyperparameters on the Pareto-front." - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": { - "scrolled": false - }, - "outputs": [ + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "```python\n", + "pipeline = XGBoost(\n", + " objective=\"binary:hinge\",\n", + " gamma=0.13466037305156175,\n", + " learning_rate=0.45227009222474834,\n", + " max_depth=2,\n", + " min_child_weight=19,\n", + " n_estimators=841,\n", + " reg_alpha=0.09716104404748971,\n", + " reg_lambda=0.5899713576903844,\n", + " subsample=0.45730129547152193,\n", + ")\n", + "```" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "name": "stdout", "output_type": "stream", "text": [ - "Pareto optimal pipeline p0:\n" + "Pareto optimal pipeline p3:\n" ] }, { @@ -1154,23 +1432,23 @@ "\n", "\n", - "\n", "\n", - "\n", - "\n", + "\n", + "\n", "cluster:(root)\n", "\n", - "\n", + "\n", "\n", "\n", "\n", "\n", "project_0\n", "\n", - "\n", - "Project\n", + "\n", + "Project\n", "\n", "\n", "\n", @@ -1178,56 +1456,56 @@ "\n", "simple_imputer\n", "\n", - "\n", - "Simple-\n", - "Imputer\n", + "\n", + "Simple-\n", + "Imputer\n", "\n", "\n", "\n", "\n", "\n", "project_0->simple_imputer\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n", "no_op\n", "\n", - "\n", - "No-\n", - "Op\n", + "\n", + "No-\n", + "Op\n", "\n", "\n", "\n", "\n", "\n", "simple_imputer->no_op\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n", "concat_features\n", "\n", - "\n", - "Concat-\n", - "Features\n", + "\n", + "Concat-\n", + "Features\n", "\n", "\n", "\n", "\n", "\n", "no_op->concat_features\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n", "project_1\n", "\n", - "\n", - "Project\n", + "\n", + "Project\n", "\n", "\n", "\n", @@ -1235,45 +1513,45 @@ "\n", "one_hot\n", "\n", - "\n", - "One-\n", - "Hot\n", + "\n", + "One-\n", + "Hot\n", "\n", "\n", "\n", "\n", "\n", "project_1->one_hot\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n", "one_hot->concat_features\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n", "xg_boost\n", - "\n", - "\n", - "XG-\n", - "Boost\n", + "\n", + "\n", + "XG-\n", + "Boost\n", "\n", "\n", "\n", "\n", "\n", "concat_features->xg_boost\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n" ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -1285,14 +1563,14 @@ "```python\n", "pipeline = XGBoost(\n", " objective=\"binary:hinge\",\n", - " gamma=0.18817063046716287,\n", - " learning_rate=0.1326670086631808,\n", - " max_depth=3,\n", - " min_child_weight=4,\n", - " n_estimators=721,\n", - " reg_alpha=0.2846348240483074,\n", - " reg_lambda=0.8623403429578169,\n", - " subsample=0.40876592698273684,\n", + " gamma=0.7628866972001157,\n", + " learning_rate=0.11289406635035952,\n", + " max_depth=2,\n", + " min_child_weight=13,\n", + " n_estimators=793,\n", + " reg_alpha=0.8916440115567629,\n", + " reg_lambda=0.24964146334134232,\n", + " subsample=0.23733262975737876,\n", ")\n", "```" ], @@ -1307,7 +1585,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Pareto optimal pipeline p1:\n" + "Pareto optimal pipeline p4:\n" ] }, { @@ -1316,23 +1594,23 @@ "\n", "\n", - "\n", "\n", - "\n", - "\n", + "\n", + "\n", "cluster:(root)\n", "\n", - "\n", + "\n", "\n", "\n", "\n", "\n", "project_0\n", "\n", - "\n", - "Project\n", + "\n", + "Project\n", "\n", "\n", "\n", @@ -1340,56 +1618,56 @@ "\n", "simple_imputer\n", "\n", - "\n", - "Simple-\n", - "Imputer\n", + "\n", + "Simple-\n", + "Imputer\n", "\n", "\n", "\n", "\n", "\n", "project_0->simple_imputer\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n", "no_op\n", "\n", - "\n", - "No-\n", - "Op\n", + "\n", + "No-\n", + "Op\n", "\n", "\n", "\n", "\n", "\n", "simple_imputer->no_op\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n", "concat_features\n", "\n", - "\n", - "Concat-\n", - "Features\n", + "\n", + "Concat-\n", + "Features\n", "\n", "\n", "\n", "\n", "\n", "no_op->concat_features\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n", "project_1\n", "\n", - "\n", - "Project\n", + "\n", + "Project\n", "\n", "\n", "\n", @@ -1397,45 +1675,45 @@ "\n", "one_hot\n", "\n", - "\n", - "One-\n", - "Hot\n", + "\n", + "One-\n", + "Hot\n", "\n", "\n", "\n", "\n", "\n", "project_1->one_hot\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n", "one_hot->concat_features\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n", "xg_boost\n", - "\n", - "\n", - "XG-\n", - "Boost\n", + "\n", + "\n", + "XG-\n", + "Boost\n", "\n", "\n", "\n", "\n", "\n", "concat_features->xg_boost\n", - "\n", - "\n", + "\n", + "\n", "\n", "\n", "\n" ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -1447,14 +1725,14 @@ "```python\n", "pipeline = XGBoost(\n", " objective=\"binary:hinge\",\n", - " gamma=0.21366688493747565,\n", - " learning_rate=0.8910831102827533,\n", - " max_depth=1,\n", - " min_child_weight=6,\n", - " n_estimators=820,\n", - " reg_alpha=0.4051207340038795,\n", - " reg_lambda=0.3234419809813596,\n", - " subsample=0.7517190652925614,\n", + " gamma=0.24444274758583728,\n", + " learning_rate=0.7562590526098265,\n", + " max_depth=3,\n", + " min_child_weight=14,\n", + " n_estimators=533,\n", + " reg_alpha=0.9097217358808588,\n", + " reg_lambda=0.22943888113151426,\n", + " subsample=0.48633961754946003,\n", ")\n", "```" ], @@ -1467,11 +1745,12 @@ } ], "source": [ - "for pname in opt_trained.summary().index:\n", - " print(f'Pareto optimal pipeline {pname}:')\n", - " p = opt_trained.get_pipeline(pname)\n", - " p.visualize()\n", - " p.get_last().pretty_print(ipython_display=True, show_imports=False) " + "if lalegpl_installed:\n", + " for pname in opt_trained.summary().index:\n", + " print(f\"Pareto optimal pipeline {pname}:\")\n", + " p = opt_trained.get_pipeline(pname)\n", + " p.visualize()\n", + " p.get_last().pretty_print(ipython_display=True, show_imports=False)" ] }, { @@ -1488,12 +1767,14 @@ }, { "cell_type": "code", - "execution_count": 16, - "metadata": {}, + "execution_count": 15, + "metadata": { + "scrolled": false + }, "outputs": [ { "data": { - "image/png": "\n", + "image/png": "\n", "text/plain": [ "
" ] @@ -1505,50 +1786,50 @@ } ], "source": [ - "import matplotlib.pyplot as plt\n", - "figs, axs = plt.subplots(1, 2, sharex=True, sharey=True, figsize=(10, 4.5), squeeze=True)\n", + "if lalegpl_installed:\n", + " import matplotlib.pyplot as plt\n", + " figs, axs = plt.subplots(1, 2, sharex=True, sharey=True, figsize=(10, 4.5), squeeze=True)\n", "\n", - "pareto_pipelines = [opt_trained.get_pipeline(pipeline_name=id) for id in opt_trained.summary().index.tolist()]\n", - "df_cv = opt_trained.summary()\n", + " pareto_pipelines = [\n", + " opt_trained.get_pipeline(pipeline_name=id)\n", + " for id in opt_trained.summary().index.tolist()\n", + " ]\n", + " df_cv = opt_trained.summary()\n", "\n", - "cols = ['loss1' , 'loss2']\n", - "label_dict = {'loss1' : '1 - Balanced Accuracy', 'loss2' : 'False Positive Rate'}\n", + " cols = ['loss1' , 'loss2']\n", + " label_dict = {'loss1' : 'Balanced Accuracy', 'loss2' : 'Symmetric Disparate Impact'}\n", "\n", - "# Let's put loss2 on X-axis\n", - "ax = axs[0]\n", - "ax.scatter(df_cv['loss2'], df_cv['loss1'], s=30, marker='x', label='Pareto-Solutions')\n", - "ax.set_xlabel(label_dict['loss2'], fontsize=15)\n", - "ax.set_ylabel(label_dict['loss1'], fontsize=15)\n", - "ax.grid()\n", - "ax.legend()\n", - "ax.set_title(\"Pareto-front based on k-fold CV scores\", fontsize=17)\n", + " # Let's put loss2 on X-axis\n", + " ax = axs[0]\n", + " ax.scatter(df_cv['loss2'] * -1 + 1, df_cv['loss1'] * -1 + 1, s=30, marker='x', label='Pareto-Solutions')\n", + " ax.set_xlabel(label_dict['loss2'], fontsize=15)\n", + " ax.set_ylabel(label_dict['loss1'], fontsize=15)\n", + " ax.grid()\n", + " ax.legend()\n", + " ax.set_title(\"Pareto-front based on k-fold CV scores\", fontsize=17)\n", "\n", - "# Let's evaluate the pareto-optimal pipelines on test data\n", - "pX, pY = [], []\n", - "for pipeline in pareto_pipelines:\n", - " obj1_value = best_score[0] - scorer[0](pipeline, test_X, test_y)\n", - " obj2_value = best_score[1] - scorer[1](pipeline, test_X, test_y)\n", - " \n", - " pX.append(obj1_value)\n", - " pY.append(obj2_value)\n", + " # Let's evaluate the pareto-optimal pipelines on test data\n", + " pX, pY = [], []\n", + " for pipeline in pareto_pipelines:\n", + " pX.append(scorers[0](pipeline, test_X, test_y))\n", + " pY.append(scorers[1](pipeline, test_X, test_y))\n", "\n", - "dict = {'loss1' : pX, 'loss2' : pY}\n", - "df = pd.DataFrame(dict) \n", + " dict = {'loss1' : pX, 'loss2' : pY}\n", + " df = pd.DataFrame(dict) \n", " \n", - "# Metric values using single objective best pipeline \n", - "sopt_err = best_score[0] - scorer[0](auto_trained, test_X, test_y)\n", - "sopt_fpr = best_score[1] - scorer[1](auto_trained, test_X, test_y)\n", + " # Metric values using single objective best pipeline \n", + " sopt_err = scorers[0](auto_trained, test_X, test_y)\n", + " sopt_fpr = scorers[1](auto_trained, test_X, test_y)\n", " \n", - "\n", - "ax = axs[1]\n", - "ax.scatter(df['loss2'], df['loss1'], s=30, marker='x', label='Pareto-Solutions')\n", - "ax.scatter(sopt_fpr, sopt_err, s=30, marker='o', label='Hyperopt Solution')\n", - "ax.set_xlabel(label_dict['loss2'], fontsize=15)\n", - "ax.grid()\n", - "ax.legend()\n", - "ax.set_title(\"Pareto-front based on Test data\", fontsize=17)\n", - "plt.tight_layout()\n", - "plt.show()\n" + " ax = axs[1]\n", + " ax.scatter(df['loss2'], df['loss1'], s=30, marker='x', label='Pareto-Solutions')\n", + " ax.scatter(sopt_fpr, sopt_err, s=30, marker='o', label='Hyperopt Solution')\n", + " ax.set_xlabel(label_dict['loss2'], fontsize=15)\n", + " ax.grid()\n", + " ax.legend()\n", + " ax.set_title(\"Pareto-front based on Test data\", fontsize=17)\n", + " plt.tight_layout()\n", + " plt.show()" ] } ], @@ -1568,7 +1849,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.12" + "version": "3.7.13" } }, "nbformat": 4,