diff --git a/Tutorial/amltk_search_space_parser_example.ipynb b/Tutorial/amltk_search_space_parser_example.ipynb new file mode 100644 index 00000000..fe2038df --- /dev/null +++ b/Tutorial/amltk_search_space_parser_example.ipynb @@ -0,0 +1,1897 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The AMLTK (https://github.com/automl/amltk) provides a framework for developing AutoML systems. One component of this system is the search space definitions. \n", + "\n", + "TPOT2 provides a function called tpot2.utils.tpot2_parser which can convert a search space defined in the AMLTK API into the search space class used by TPOT2. This allows users to define a single search space to be used by both algorithms, facilitating better comparisons. Below is an example of a few search spaces defined in AMLTK and how to use them in TPOT2.\n", + "\n", + "Note: this feature is still experimental and not all features present in the AMLTK API are fully supported in TPOT2 yet. (For example, automated splitting based on categorical vs numeric with amltk.pipeline.Split is not currently implemented in the parser.)" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
╭─ Split(split_imputation) ───────────────────────────────────────────────────────────────────────────────────────╮\n",
+       " ╭─ Sequential(categories) ───────────────────────────╮ ╭─ Sequential(numerics) ───────────────────────────────╮ \n",
+       "  ╭─ Fixed(ColumnTransformer) ─────────────────────╮   ╭─ Fixed(ColumnTransformer) ───────────────────────╮  \n",
+       "   item ColumnTransformer(transformers=[('passth…     item ColumnTransformer(transformers=[('passthro…   \n",
+       "        'passthrough',                                     'passthrough',                                \n",
+       "                                         <sklear…                                           <sklearn.…   \n",
+       "        object at 0x7d354d946290>)])                       object at 0x7d34edf94fa0>)])                  \n",
+       "  ╰────────────────────────────────────────────────╯   ╰──────────────────────────────────────────────────╯  \n",
+       "       \n",
+       "  ╭─ Fixed(SimpleImputer) ─────────────────────────╮   ╭─ Component(SimpleImputer) ─────────────╮            \n",
+       "   item SimpleImputer(fill_value='missing',           item  class SimpleImputer(...)                     \n",
+       "        strategy='constant')                          space {'strategy': ['mean', 'median']}             \n",
+       "  ╰────────────────────────────────────────────────╯   ╰────────────────────────────────────────╯            \n",
+       "    ╰──────────────────────────────────────────────────────╯ \n",
+       "  ╭─ Fixed(OneHotEncoder) ─────────────────────────╮                                                           \n",
+       "   item OneHotEncoder(drop='first',                                                                          \n",
+       "        sparse_output=False)                                                                                 \n",
+       "  ╰────────────────────────────────────────────────╯                                                           \n",
+       " ╰────────────────────────────────────────────────────╯                                                          \n",
+       "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[38;2;119;125;167m╭─\u001b[0m\u001b[38;2;119;125;167m \u001b[0m\u001b[1;38;2;119;125;167mSplit\u001b[0m\u001b[38;2;119;125;167m(\u001b[0m\u001b[3;38;2;119;125;167msplit_imputation\u001b[0m\u001b[38;2;119;125;167m) \u001b[0m\u001b[38;2;119;125;167m──────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[38;2;119;125;167m─╮\u001b[0m\n", + "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m╭─\u001b[0m\u001b[38;2;126;107;143m \u001b[0m\u001b[1;38;2;126;107;143mSequential\u001b[0m\u001b[38;2;126;107;143m(\u001b[0m\u001b[3;38;2;126;107;143mcategories\u001b[0m\u001b[38;2;126;107;143m) \u001b[0m\u001b[38;2;126;107;143m──────────────────────────\u001b[0m\u001b[38;2;126;107;143m─╮\u001b[0m \u001b[38;2;126;107;143m╭─\u001b[0m\u001b[38;2;126;107;143m \u001b[0m\u001b[1;38;2;126;107;143mSequential\u001b[0m\u001b[38;2;126;107;143m(\u001b[0m\u001b[3;38;2;126;107;143mnumerics\u001b[0m\u001b[38;2;126;107;143m) \u001b[0m\u001b[38;2;126;107;143m──────────────────────────────\u001b[0m\u001b[38;2;126;107;143m─╮\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", + "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╭─\u001b[0m\u001b[38;2;86;53;30m \u001b[0m\u001b[1;38;2;86;53;30mFixed\u001b[0m\u001b[38;2;86;53;30m(\u001b[0m\u001b[3;38;2;86;53;30mColumnTransformer\u001b[0m\u001b[38;2;86;53;30m) \u001b[0m\u001b[38;2;86;53;30m────────────────────\u001b[0m\u001b[38;2;86;53;30m─╮\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╭─\u001b[0m\u001b[38;2;86;53;30m \u001b[0m\u001b[1;38;2;86;53;30mFixed\u001b[0m\u001b[38;2;86;53;30m(\u001b[0m\u001b[3;38;2;86;53;30mColumnTransformer\u001b[0m\u001b[38;2;86;53;30m) \u001b[0m\u001b[38;2;86;53;30m──────────────────────\u001b[0m\u001b[38;2;86;53;30m─╮\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", + "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39mitem\u001b[0m\u001b[39m \u001b[0m\u001b[1;35mColumnTransformer\u001b[0m\u001b[1;39m(\u001b[0m\u001b[33mtransformers\u001b[0m\u001b[39m=\u001b[0m\u001b[1;39m[\u001b[0m\u001b[1;39m(\u001b[0m\u001b[32m'passth…\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39mitem\u001b[0m\u001b[39m \u001b[0m\u001b[1;35mColumnTransformer\u001b[0m\u001b[1;39m(\u001b[0m\u001b[33mtransformers\u001b[0m\u001b[39m=\u001b[0m\u001b[1;39m[\u001b[0m\u001b[1;39m(\u001b[0m\u001b[32m'passthro…\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", + "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[32m'passthrough'\u001b[0m\u001b[39m, \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[32m'passthrough'\u001b[0m\u001b[39m, \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", + "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[39m \u001b[0m\u001b[1;39m<\u001b[0m\u001b[1;95msklear…\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[39m \u001b[0m\u001b[1;39m<\u001b[0m\u001b[1;95msklearn.…\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", + "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[39mobject at \u001b[0m\u001b[1;36m0x7d354d946290\u001b[0m\u001b[1;39m>\u001b[0m\u001b[1;39m)\u001b[0m\u001b[1;39m]\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[39mobject at \u001b[0m\u001b[1;36m0x7d34edf94fa0\u001b[0m\u001b[1;39m>\u001b[0m\u001b[1;39m)\u001b[0m\u001b[1;39m]\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", + "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╰────────────────────────────────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╰──────────────────────────────────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", + "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[1m ↓ \u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[1m ↓ \u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", + "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╭─\u001b[0m\u001b[38;2;86;53;30m \u001b[0m\u001b[1;38;2;86;53;30mFixed\u001b[0m\u001b[38;2;86;53;30m(\u001b[0m\u001b[3;38;2;86;53;30mSimpleImputer\u001b[0m\u001b[38;2;86;53;30m) \u001b[0m\u001b[38;2;86;53;30m────────────────────────\u001b[0m\u001b[38;2;86;53;30m─╮\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;230;175;46m╭─\u001b[0m\u001b[38;2;230;175;46m \u001b[0m\u001b[1;38;2;230;175;46mComponent\u001b[0m\u001b[38;2;230;175;46m(\u001b[0m\u001b[3;38;2;230;175;46mSimpleImputer\u001b[0m\u001b[38;2;230;175;46m) \u001b[0m\u001b[38;2;230;175;46m────────────\u001b[0m\u001b[38;2;230;175;46m─╮\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", + "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39mitem\u001b[0m\u001b[39m \u001b[0m\u001b[1;35mSimpleImputer\u001b[0m\u001b[1;39m(\u001b[0m\u001b[33mfill_value\u001b[0m\u001b[39m=\u001b[0m\u001b[32m'missing'\u001b[0m\u001b[39m, \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mitem \u001b[0m\u001b[39m \u001b[0m\u001b[3;96mclass \u001b[0m\u001b]8;id=861007;https://www.scikit-learn.org/stable/modules/generated/sklearn.impute.SimpleImputer.html\u001b\\\u001b[4;39mSimpleImputer\u001b[0m\u001b]8;;\u001b\\\u001b[1;39m(\u001b[0m\u001b[33m...\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", + "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[33mstrategy\u001b[0m\u001b[39m=\u001b[0m\u001b[32m'constant'\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mspace\u001b[0m\u001b[39m \u001b[0m\u001b[1;39m{\u001b[0m\u001b[32m'strategy'\u001b[0m\u001b[39m: \u001b[0m\u001b[1;39m[\u001b[0m\u001b[32m'mean'\u001b[0m\u001b[39m, \u001b[0m\u001b[32m'median'\u001b[0m\u001b[1;39m]\u001b[0m\u001b[1;39m}\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", + "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╰────────────────────────────────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;230;175;46m╰────────────────────────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", + "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[1m ↓ \u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m╰──────────────────────────────────────────────────────╯\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", + "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╭─\u001b[0m\u001b[38;2;86;53;30m \u001b[0m\u001b[1;38;2;86;53;30mFixed\u001b[0m\u001b[38;2;86;53;30m(\u001b[0m\u001b[3;38;2;86;53;30mOneHotEncoder\u001b[0m\u001b[38;2;86;53;30m) \u001b[0m\u001b[38;2;86;53;30m────────────────────────\u001b[0m\u001b[38;2;86;53;30m─╮\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", + "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39mitem\u001b[0m\u001b[39m \u001b[0m\u001b[1;35mOneHotEncoder\u001b[0m\u001b[1;39m(\u001b[0m\u001b[33mdrop\u001b[0m\u001b[39m=\u001b[0m\u001b[32m'first'\u001b[0m\u001b[39m, \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", + "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[33msparse_output\u001b[0m\u001b[39m=\u001b[0m\u001b[3;91mFalse\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", + "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╰────────────────────────────────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", + "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m╰────────────────────────────────────────────────────╯\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", + "\u001b[38;2;119;125;167m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [], + "text/plain": [ + "Split(name='split_imputation', item=None, nodes=(Sequential(name='categories', item=None, nodes=(Fixed(name='ColumnTransformer', item=ColumnTransformer(transformers=[('passthrough', 'passthrough',\n", + " )]), nodes=(), config=None, space=None, fidelities=None, config_transform=None, meta=None), Fixed(name='SimpleImputer', item=SimpleImputer(fill_value='missing', strategy='constant'), nodes=(), config=None, space=None, fidelities=None, config_transform=None, meta=None), Fixed(name='OneHotEncoder', item=OneHotEncoder(drop='first', sparse_output=False), nodes=(), config=None, space=None, fidelities=None, config_transform=None, meta=None)), config=None, space=None, fidelities=None, config_transform=None, meta=None), Sequential(name='numerics', item=None, nodes=(Fixed(name='ColumnTransformer', item=ColumnTransformer(transformers=[('passthrough', 'passthrough',\n", + " )]), nodes=(), config=None, space=None, fidelities=None, config_transform=None, meta=None), Component(name='SimpleImputer', item=, nodes=(), config=None, space={'strategy': ['mean', 'median']}, fidelities=None, config_transform=None, meta=None)), config=None, space=None, fidelities=None, config_transform=None, meta=None)), config=None, space=None, fidelities=None, config_transform=None, meta=None)" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.compose import make_column_selector\n", + "import numpy as np\n", + "from sklearn.ensemble import RandomForestClassifier\n", + "from sklearn.impute import SimpleImputer\n", + "from sklearn.preprocessing import OneHotEncoder\n", + "from sklearn.svm import SVC\n", + "from amltk.pipeline import Choice, Component, Sequential, Split\n", + "import tpot2\n", + "from sklearn.preprocessing import FunctionTransformer\n", + "from sklearn.compose import make_column_transformer\n", + "import tpot2\n", + "import numpy as np\n", + "import sklearn\n", + "import sklearn.datasets\n", + "import pandas as pd\n", + "# create dummy pandas dataset with both categorical and numerical columns\n", + "X, y = sklearn.datasets.make_classification(n_samples=100, n_features=5, n_informative=3, n_classes=2, random_state=42)\n", + "X = pd.DataFrame(X, columns=[f\"num_{i}\" for i in range(5)])\n", + "# add 5 categorical columns\n", + "for i in range(5):\n", + " X[f\"cat_{i}\"] = np.random.choice([\"A\", \"B\", \"C\"], size=100)\n", + "y = y.flatten()\n", + "# train test split\n", + "X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, test_size=0.5)\n", + "\n", + "# TODO: implement support for this condition\n", + "# select_categories = make_column_selector(dtype_include=object)\n", + "# select_numerical = make_column_selector(dtype_include=np.number)\n", + "\n", + "# split_imputation = Split(\n", + "# {\n", + "# \"categories\": [SimpleImputer(strategy=\"constant\", fill_value=\"missing\"), OneHotEncoder(drop=\"first\")],\n", + "# \"numerics\": Component(SimpleImputer, space={\"strategy\": [\"mean\", \"median\"]}),\n", + "# },\n", + "# config={\"categories\": select_categories, \"numerics\": select_numerical}, #not yet supported\n", + "# name=\"feature_preprocessing\",\n", + "# )\n", + "# split_imputation\n", + "\n", + "select_categories = make_column_selector(dtype_include=object)\n", + "select_numerical = make_column_selector(dtype_include=np.number)\n", + "\n", + "cat_selector = make_column_transformer((\"passthrough\", select_categories))\n", + "num_selector = make_column_transformer((\"passthrough\", select_numerical))\n", + "\n", + "\n", + "split_imputation = Split(\n", + " {\n", + " \"categories\": [cat_selector,SimpleImputer(strategy=\"constant\", fill_value=\"missing\"), OneHotEncoder(drop=\"first\", sparse_output=False)],\n", + " \"numerics\": [num_selector, Component(SimpleImputer, space={\"strategy\": [\"mean\", \"median\"]})],\n", + " },\n", + " name=\"split_imputation\",\n", + ")\n", + "split_imputation" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
╭─ Sequential(my_pipeline) ───────────────────────────────────────────────────────────────────────────────────────╮\n",
+       " ╭─ Split(split_imputation) ───────────────────────────────────────────────────────────────────────────────────╮ \n",
+       "  ╭─ Sequential(categories) ─────────────────────────╮ ╭─ Sequential(numerics) ─────────────────────────────╮  \n",
+       "   ╭─ Fixed(ColumnTransformer) ───────────────────╮   ╭─ Fixed(ColumnTransformer) ─────────────────────╮   \n",
+       "    item ColumnTransformer(transformers=[('pass…     item ColumnTransformer(transformers=[('passth…    \n",
+       "         'passthrough',                                   'passthrough',                               \n",
+       "                                          <skle…                                           <sklear…    \n",
+       "         object at 0x7d354d946290>)])                     object at 0x7d34edf94fa0>)])                 \n",
+       "   ╰──────────────────────────────────────────────╯   ╰────────────────────────────────────────────────╯   \n",
+       "         \n",
+       "   ╭─ Fixed(SimpleImputer) ───────────────────────╮   ╭─ Component(SimpleImputer) ─────────────╮           \n",
+       "    item SimpleImputer(fill_value='missing',         item  class SimpleImputer(...)                    \n",
+       "         strategy='constant')                        space {'strategy': ['mean', 'median']}            \n",
+       "   ╰──────────────────────────────────────────────╯   ╰────────────────────────────────────────╯           \n",
+       "     ╰────────────────────────────────────────────────────╯  \n",
+       "   ╭─ Fixed(OneHotEncoder) ───────────────────────╮                                                          \n",
+       "    item OneHotEncoder(drop='first',                                                                       \n",
+       "         sparse_output=False)                                                                              \n",
+       "   ╰──────────────────────────────────────────────╯                                                          \n",
+       "  ╰──────────────────────────────────────────────────╯                                                         \n",
+       " ╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ \n",
+       "  \n",
+       " ╭─ Choice(selectors) ─────────────────────────────────────────────────────╮                                     \n",
+       "  ╭─ Component(SelectKBest) ─────╮ ╭─ Component(VarianceThreshold) ─────╮                                      \n",
+       "   item  class SelectKBest(...)   item  class VarianceThreshold(...)                                       \n",
+       "   space {'k': (1, 10)}           space {'threshold': (0.1, 1)}                                            \n",
+       "  ╰──────────────────────────────╯ ╰────────────────────────────────────╯                                      \n",
+       " ╰─────────────────────────────────────────────────────────────────────────╯                                     \n",
+       "  \n",
+       " ╭─ Split(transformers) ─────────────────────────────────────────────────────────────────────────────────╮       \n",
+       "  ╭─ Sequential(passthrough) ─╮ ╭─ Sequential(polynomial) ────────────────╮ ╭─ Sequential(zerocount) ─╮        \n",
+       "   ╭─ Fixed(Passthrough) ─╮    ╭─ Component(PolynomialFeatures) ─────╮   ╭─ Fixed(ZeroCount) ─╮          \n",
+       "    item Passthrough()        item  class PolynomialFeatures(...)     item ZeroCount()             \n",
+       "   ╰──────────────────────╯     space {'degree': [2, 3]}               ╰────────────────────╯          \n",
+       "  ╰───────────────────────────╯  ╰─────────────────────────────────────╯  ╰─────────────────────────╯        \n",
+       "                                ╰─────────────────────────────────────────╯                                    \n",
+       " ╰───────────────────────────────────────────────────────────────────────────────────────────────────────╯       \n",
+       "  \n",
+       " ╭─ Choice(estimator) ─────────────────────────────────────────────────────────────────────────────────────────╮ \n",
+       "  ╭─ Component(RandomForestClassifier) ──────────╮ ╭─ Component(SVC) ────────────────────────────╮             \n",
+       "   item   class RandomForestClassifier(...)       item  class SVC(...)                                     \n",
+       "   config {'max_depth': 3}                        space {'kernel': ['linear', 'rbf', 'poly']}              \n",
+       "   space  {                                      ╰─────────────────────────────────────────────╯             \n",
+       "              'n_estimators': (10, 100),                                                                     \n",
+       "              'criterion': [                                                                                 \n",
+       "                  'gini',                                                                                    \n",
+       "                  'log_loss'                                                                                 \n",
+       "              ]                                                                                              \n",
+       "          }                                                                                                  \n",
+       "  ╰──────────────────────────────────────────────╯                                                             \n",
+       " ╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ \n",
+       "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[38;2;126;107;143m╭─\u001b[0m\u001b[38;2;126;107;143m \u001b[0m\u001b[1;38;2;126;107;143mSequential\u001b[0m\u001b[38;2;126;107;143m(\u001b[0m\u001b[3;38;2;126;107;143mmy_pipeline\u001b[0m\u001b[38;2;126;107;143m) \u001b[0m\u001b[38;2;126;107;143m──────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[38;2;126;107;143m─╮\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m╭─\u001b[0m\u001b[38;2;119;125;167m \u001b[0m\u001b[1;38;2;119;125;167mSplit\u001b[0m\u001b[38;2;119;125;167m(\u001b[0m\u001b[3;38;2;119;125;167msplit_imputation\u001b[0m\u001b[38;2;119;125;167m) \u001b[0m\u001b[38;2;119;125;167m──────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[38;2;119;125;167m─╮\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m╭─\u001b[0m\u001b[38;2;126;107;143m \u001b[0m\u001b[1;38;2;126;107;143mSequential\u001b[0m\u001b[38;2;126;107;143m(\u001b[0m\u001b[3;38;2;126;107;143mcategories\u001b[0m\u001b[38;2;126;107;143m) \u001b[0m\u001b[38;2;126;107;143m────────────────────────\u001b[0m\u001b[38;2;126;107;143m─╮\u001b[0m \u001b[38;2;126;107;143m╭─\u001b[0m\u001b[38;2;126;107;143m \u001b[0m\u001b[1;38;2;126;107;143mSequential\u001b[0m\u001b[38;2;126;107;143m(\u001b[0m\u001b[3;38;2;126;107;143mnumerics\u001b[0m\u001b[38;2;126;107;143m) \u001b[0m\u001b[38;2;126;107;143m────────────────────────────\u001b[0m\u001b[38;2;126;107;143m─╮\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╭─\u001b[0m\u001b[38;2;86;53;30m \u001b[0m\u001b[1;38;2;86;53;30mFixed\u001b[0m\u001b[38;2;86;53;30m(\u001b[0m\u001b[3;38;2;86;53;30mColumnTransformer\u001b[0m\u001b[38;2;86;53;30m) \u001b[0m\u001b[38;2;86;53;30m──────────────────\u001b[0m\u001b[38;2;86;53;30m─╮\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╭─\u001b[0m\u001b[38;2;86;53;30m \u001b[0m\u001b[1;38;2;86;53;30mFixed\u001b[0m\u001b[38;2;86;53;30m(\u001b[0m\u001b[3;38;2;86;53;30mColumnTransformer\u001b[0m\u001b[38;2;86;53;30m) \u001b[0m\u001b[38;2;86;53;30m────────────────────\u001b[0m\u001b[38;2;86;53;30m─╮\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39mitem\u001b[0m\u001b[39m \u001b[0m\u001b[1;35mColumnTransformer\u001b[0m\u001b[1;39m(\u001b[0m\u001b[33mtransformers\u001b[0m\u001b[39m=\u001b[0m\u001b[1;39m[\u001b[0m\u001b[1;39m(\u001b[0m\u001b[32m'pass…\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39mitem\u001b[0m\u001b[39m \u001b[0m\u001b[1;35mColumnTransformer\u001b[0m\u001b[1;39m(\u001b[0m\u001b[33mtransformers\u001b[0m\u001b[39m=\u001b[0m\u001b[1;39m[\u001b[0m\u001b[1;39m(\u001b[0m\u001b[32m'passth…\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[32m'passthrough'\u001b[0m\u001b[39m, \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[32m'passthrough'\u001b[0m\u001b[39m, \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[39m \u001b[0m\u001b[1;39m<\u001b[0m\u001b[1;95mskle…\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[39m \u001b[0m\u001b[1;39m<\u001b[0m\u001b[1;95msklear…\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[39mobject at \u001b[0m\u001b[1;36m0x7d354d946290\u001b[0m\u001b[1;39m>\u001b[0m\u001b[1;39m)\u001b[0m\u001b[1;39m]\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[39mobject at \u001b[0m\u001b[1;36m0x7d34edf94fa0\u001b[0m\u001b[1;39m>\u001b[0m\u001b[1;39m)\u001b[0m\u001b[1;39m]\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╰──────────────────────────────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╰────────────────────────────────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[1m ↓ \u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[1m ↓ \u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╭─\u001b[0m\u001b[38;2;86;53;30m \u001b[0m\u001b[1;38;2;86;53;30mFixed\u001b[0m\u001b[38;2;86;53;30m(\u001b[0m\u001b[3;38;2;86;53;30mSimpleImputer\u001b[0m\u001b[38;2;86;53;30m) \u001b[0m\u001b[38;2;86;53;30m──────────────────────\u001b[0m\u001b[38;2;86;53;30m─╮\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;230;175;46m╭─\u001b[0m\u001b[38;2;230;175;46m \u001b[0m\u001b[1;38;2;230;175;46mComponent\u001b[0m\u001b[38;2;230;175;46m(\u001b[0m\u001b[3;38;2;230;175;46mSimpleImputer\u001b[0m\u001b[38;2;230;175;46m) \u001b[0m\u001b[38;2;230;175;46m────────────\u001b[0m\u001b[38;2;230;175;46m─╮\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39mitem\u001b[0m\u001b[39m \u001b[0m\u001b[1;35mSimpleImputer\u001b[0m\u001b[1;39m(\u001b[0m\u001b[33mfill_value\u001b[0m\u001b[39m=\u001b[0m\u001b[32m'missing'\u001b[0m\u001b[39m, \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mitem \u001b[0m\u001b[39m \u001b[0m\u001b[3;96mclass \u001b[0m\u001b]8;id=178888;https://www.scikit-learn.org/stable/modules/generated/sklearn.impute.SimpleImputer.html\u001b\\\u001b[4;39mSimpleImputer\u001b[0m\u001b]8;;\u001b\\\u001b[1;39m(\u001b[0m\u001b[33m...\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[33mstrategy\u001b[0m\u001b[39m=\u001b[0m\u001b[32m'constant'\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mspace\u001b[0m\u001b[39m \u001b[0m\u001b[1;39m{\u001b[0m\u001b[32m'strategy'\u001b[0m\u001b[39m: \u001b[0m\u001b[1;39m[\u001b[0m\u001b[32m'mean'\u001b[0m\u001b[39m, \u001b[0m\u001b[32m'median'\u001b[0m\u001b[1;39m]\u001b[0m\u001b[1;39m}\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╰──────────────────────────────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;230;175;46m╰────────────────────────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[1m ↓ \u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m╰────────────────────────────────────────────────────╯\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╭─\u001b[0m\u001b[38;2;86;53;30m \u001b[0m\u001b[1;38;2;86;53;30mFixed\u001b[0m\u001b[38;2;86;53;30m(\u001b[0m\u001b[3;38;2;86;53;30mOneHotEncoder\u001b[0m\u001b[38;2;86;53;30m) \u001b[0m\u001b[38;2;86;53;30m──────────────────────\u001b[0m\u001b[38;2;86;53;30m─╮\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39mitem\u001b[0m\u001b[39m \u001b[0m\u001b[1;35mOneHotEncoder\u001b[0m\u001b[1;39m(\u001b[0m\u001b[33mdrop\u001b[0m\u001b[39m=\u001b[0m\u001b[32m'first'\u001b[0m\u001b[39m, \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[33msparse_output\u001b[0m\u001b[39m=\u001b[0m\u001b[3;91mFalse\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╰──────────────────────────────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m╰──────────────────────────────────────────────────╯\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[1m ↓ \u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m╭─\u001b[0m\u001b[38;2;255;69;0m \u001b[0m\u001b[1;38;2;255;69;0mChoice\u001b[0m\u001b[38;2;255;69;0m(\u001b[0m\u001b[3;38;2;255;69;0mselectors\u001b[0m\u001b[38;2;255;69;0m) \u001b[0m\u001b[38;2;255;69;0m────────────────────────────────────────────────────\u001b[0m\u001b[38;2;255;69;0m─╮\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;230;175;46m╭─\u001b[0m\u001b[38;2;230;175;46m \u001b[0m\u001b[1;38;2;230;175;46mComponent\u001b[0m\u001b[38;2;230;175;46m(\u001b[0m\u001b[3;38;2;230;175;46mSelectKBest\u001b[0m\u001b[38;2;230;175;46m) \u001b[0m\u001b[38;2;230;175;46m────\u001b[0m\u001b[38;2;230;175;46m─╮\u001b[0m \u001b[38;2;230;175;46m╭─\u001b[0m\u001b[38;2;230;175;46m \u001b[0m\u001b[1;38;2;230;175;46mComponent\u001b[0m\u001b[38;2;230;175;46m(\u001b[0m\u001b[3;38;2;230;175;46mVarianceThreshold\u001b[0m\u001b[38;2;230;175;46m) \u001b[0m\u001b[38;2;230;175;46m────\u001b[0m\u001b[38;2;230;175;46m─╮\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mitem \u001b[0m\u001b[39m \u001b[0m\u001b[3;96mclass \u001b[0m\u001b]8;id=870666;https://www.scikit-learn.org/stable/modules/generated/sklearn.feature_selection.SelectKBest.html\u001b\\\u001b[4;39mSelectKBest\u001b[0m\u001b]8;;\u001b\\\u001b[1;39m(\u001b[0m\u001b[33m...\u001b[0m\u001b[1;39m)\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mitem \u001b[0m\u001b[39m \u001b[0m\u001b[3;96mclass \u001b[0m\u001b]8;id=23174;https://www.scikit-learn.org/stable/modules/generated/sklearn.feature_selection.VarianceThreshold.html\u001b\\\u001b[4;39mVarianceThreshold\u001b[0m\u001b]8;;\u001b\\\u001b[1;39m(\u001b[0m\u001b[33m...\u001b[0m\u001b[1;39m)\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mspace\u001b[0m\u001b[39m \u001b[0m\u001b[1;39m{\u001b[0m\u001b[32m'k'\u001b[0m\u001b[39m: \u001b[0m\u001b[1;39m(\u001b[0m\u001b[1;36m1\u001b[0m\u001b[39m, \u001b[0m\u001b[1;36m10\u001b[0m\u001b[1;39m)\u001b[0m\u001b[1;39m}\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mspace\u001b[0m\u001b[39m \u001b[0m\u001b[1;39m{\u001b[0m\u001b[32m'threshold'\u001b[0m\u001b[39m: \u001b[0m\u001b[1;39m(\u001b[0m\u001b[1;36m0.1\u001b[0m\u001b[39m, \u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;39m)\u001b[0m\u001b[1;39m}\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;230;175;46m╰──────────────────────────────╯\u001b[0m \u001b[38;2;230;175;46m╰────────────────────────────────────╯\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m╰─────────────────────────────────────────────────────────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[1m ↓ \u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m╭─\u001b[0m\u001b[38;2;119;125;167m \u001b[0m\u001b[1;38;2;119;125;167mSplit\u001b[0m\u001b[38;2;119;125;167m(\u001b[0m\u001b[3;38;2;119;125;167mtransformers\u001b[0m\u001b[38;2;119;125;167m) \u001b[0m\u001b[38;2;119;125;167m────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[38;2;119;125;167m─╮\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m╭─\u001b[0m\u001b[38;2;126;107;143m \u001b[0m\u001b[1;38;2;126;107;143mSequential\u001b[0m\u001b[38;2;126;107;143m(\u001b[0m\u001b[3;38;2;126;107;143mpassthrough\u001b[0m\u001b[38;2;126;107;143m) \u001b[0m\u001b[38;2;126;107;143m─╮\u001b[0m \u001b[38;2;126;107;143m╭─\u001b[0m\u001b[38;2;126;107;143m \u001b[0m\u001b[1;38;2;126;107;143mSequential\u001b[0m\u001b[38;2;126;107;143m(\u001b[0m\u001b[3;38;2;126;107;143mpolynomial\u001b[0m\u001b[38;2;126;107;143m) \u001b[0m\u001b[38;2;126;107;143m───────────────\u001b[0m\u001b[38;2;126;107;143m─╮\u001b[0m \u001b[38;2;126;107;143m╭─\u001b[0m\u001b[38;2;126;107;143m \u001b[0m\u001b[1;38;2;126;107;143mSequential\u001b[0m\u001b[38;2;126;107;143m(\u001b[0m\u001b[3;38;2;126;107;143mzerocount\u001b[0m\u001b[38;2;126;107;143m) \u001b[0m\u001b[38;2;126;107;143m─╮\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╭─\u001b[0m\u001b[38;2;86;53;30m \u001b[0m\u001b[1;38;2;86;53;30mFixed\u001b[0m\u001b[38;2;86;53;30m(\u001b[0m\u001b[3;38;2;86;53;30mPassthrough\u001b[0m\u001b[38;2;86;53;30m) \u001b[0m\u001b[38;2;86;53;30m─╮\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;230;175;46m╭─\u001b[0m\u001b[38;2;230;175;46m \u001b[0m\u001b[1;38;2;230;175;46mComponent\u001b[0m\u001b[38;2;230;175;46m(\u001b[0m\u001b[3;38;2;230;175;46mPolynomialFeatures\u001b[0m\u001b[38;2;230;175;46m) \u001b[0m\u001b[38;2;230;175;46m────\u001b[0m\u001b[38;2;230;175;46m─╮\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╭─\u001b[0m\u001b[38;2;86;53;30m \u001b[0m\u001b[1;38;2;86;53;30mFixed\u001b[0m\u001b[38;2;86;53;30m(\u001b[0m\u001b[3;38;2;86;53;30mZeroCount\u001b[0m\u001b[38;2;86;53;30m) \u001b[0m\u001b[38;2;86;53;30m─╮\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39mitem\u001b[0m\u001b[39m \u001b[0m\u001b[1;35mPassthrough\u001b[0m\u001b[1;39m(\u001b[0m\u001b[1;39m)\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mitem \u001b[0m\u001b[39m \u001b[0m\u001b[3;96mclass \u001b[0m\u001b]8;id=605509;https://www.scikit-learn.org/stable/modules/generated/sklearn.preprocessing.PolynomialFeatures.html\u001b\\\u001b[4;39mPolynomialFeatures\u001b[0m\u001b]8;;\u001b\\\u001b[1;39m(\u001b[0m\u001b[33m...\u001b[0m\u001b[1;39m)\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39mitem\u001b[0m\u001b[39m \u001b[0m\u001b[1;35mZeroCount\u001b[0m\u001b[1;39m(\u001b[0m\u001b[1;39m)\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╰──────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mspace\u001b[0m\u001b[39m \u001b[0m\u001b[1;39m{\u001b[0m\u001b[32m'degree'\u001b[0m\u001b[39m: \u001b[0m\u001b[1;39m[\u001b[0m\u001b[1;36m2\u001b[0m\u001b[39m, \u001b[0m\u001b[1;36m3\u001b[0m\u001b[1;39m]\u001b[0m\u001b[1;39m}\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╰────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m╰───────────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;230;175;46m╰─────────────────────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m╰─────────────────────────╯\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m╰─────────────────────────────────────────╯\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m╰───────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[1m ↓ \u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m╭─\u001b[0m\u001b[38;2;255;69;0m \u001b[0m\u001b[1;38;2;255;69;0mChoice\u001b[0m\u001b[38;2;255;69;0m(\u001b[0m\u001b[3;38;2;255;69;0mestimator\u001b[0m\u001b[38;2;255;69;0m) \u001b[0m\u001b[38;2;255;69;0m────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[38;2;255;69;0m─╮\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;230;175;46m╭─\u001b[0m\u001b[38;2;230;175;46m \u001b[0m\u001b[1;38;2;230;175;46mComponent\u001b[0m\u001b[38;2;230;175;46m(\u001b[0m\u001b[3;38;2;230;175;46mRandomForestClassifier\u001b[0m\u001b[38;2;230;175;46m) \u001b[0m\u001b[38;2;230;175;46m─────────\u001b[0m\u001b[38;2;230;175;46m─╮\u001b[0m \u001b[38;2;230;175;46m╭─\u001b[0m\u001b[38;2;230;175;46m \u001b[0m\u001b[1;38;2;230;175;46mComponent\u001b[0m\u001b[38;2;230;175;46m(\u001b[0m\u001b[3;38;2;230;175;46mSVC\u001b[0m\u001b[38;2;230;175;46m) \u001b[0m\u001b[38;2;230;175;46m───────────────────────────\u001b[0m\u001b[38;2;230;175;46m─╮\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mitem \u001b[0m\u001b[39m \u001b[0m\u001b[3;96mclass \u001b[0m\u001b]8;id=470078;https://www.scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html\u001b\\\u001b[4;39mRandomForestClassifier\u001b[0m\u001b]8;;\u001b\\\u001b[1;39m(\u001b[0m\u001b[33m...\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mitem \u001b[0m\u001b[39m \u001b[0m\u001b[3;96mclass \u001b[0m\u001b]8;id=315827;https://www.scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html\u001b\\\u001b[4;39mSVC\u001b[0m\u001b]8;;\u001b\\\u001b[1;39m(\u001b[0m\u001b[33m...\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mconfig\u001b[0m\u001b[39m \u001b[0m\u001b[1;39m{\u001b[0m\u001b[32m'max_depth'\u001b[0m\u001b[39m: \u001b[0m\u001b[1;36m3\u001b[0m\u001b[1;39m}\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mspace\u001b[0m\u001b[39m \u001b[0m\u001b[1;39m{\u001b[0m\u001b[32m'kernel'\u001b[0m\u001b[39m: \u001b[0m\u001b[1;39m[\u001b[0m\u001b[32m'linear'\u001b[0m\u001b[39m, \u001b[0m\u001b[32m'rbf'\u001b[0m\u001b[39m, \u001b[0m\u001b[32m'poly'\u001b[0m\u001b[1;39m]\u001b[0m\u001b[1;39m}\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mspace \u001b[0m\u001b[39m \u001b[0m\u001b[1;39m{\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;230;175;46m╰─────────────────────────────────────────────╯\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39m \u001b[0m\u001b[39m \u001b[0m\u001b[32m'n_estimators'\u001b[0m\u001b[39m: \u001b[0m\u001b[1;39m(\u001b[0m\u001b[1;36m10\u001b[0m\u001b[39m, \u001b[0m\u001b[1;36m100\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m, \u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39m \u001b[0m\u001b[39m \u001b[0m\u001b[32m'criterion'\u001b[0m\u001b[39m: \u001b[0m\u001b[1;39m[\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39m \u001b[0m\u001b[39m \u001b[0m\u001b[32m'gini'\u001b[0m\u001b[39m, \u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39m \u001b[0m\u001b[39m \u001b[0m\u001b[32m'log_loss'\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39m \u001b[0m\u001b[39m \u001b[0m\u001b[1;39m]\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39m \u001b[0m\u001b[1;39m}\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;230;175;46m╰──────────────────────────────────────────────╯\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [], + "text/plain": [ + "Sequential(name='my_pipeline', item=None, nodes=(Split(name='split_imputation', item=None, nodes=(Sequential(name='categories', item=None, nodes=(Fixed(name='ColumnTransformer', item=ColumnTransformer(transformers=[('passthrough', 'passthrough',\n", + " )]), nodes=(), config=None, space=None, fidelities=None, config_transform=None, meta=None), Fixed(name='SimpleImputer', item=SimpleImputer(fill_value='missing', strategy='constant'), nodes=(), config=None, space=None, fidelities=None, config_transform=None, meta=None), Fixed(name='OneHotEncoder', item=OneHotEncoder(drop='first', sparse_output=False), nodes=(), config=None, space=None, fidelities=None, config_transform=None, meta=None)), config=None, space=None, fidelities=None, config_transform=None, meta=None), Sequential(name='numerics', item=None, nodes=(Fixed(name='ColumnTransformer', item=ColumnTransformer(transformers=[('passthrough', 'passthrough',\n", + " )]), nodes=(), config=None, space=None, fidelities=None, config_transform=None, meta=None), Component(name='SimpleImputer', item=, nodes=(), config=None, space={'strategy': ['mean', 'median']}, fidelities=None, config_transform=None, meta=None)), config=None, space=None, fidelities=None, config_transform=None, meta=None)), config=None, space=None, fidelities=None, config_transform=None, meta=None), Choice(name='selectors', item=None, nodes=(Component(name='SelectKBest', item=, nodes=(), config=None, space={'k': (1, 10)}, fidelities=None, config_transform=None, meta=None), Component(name='VarianceThreshold', item=, nodes=(), config=None, space={'threshold': (0.1, 1)}, fidelities=None, config_transform=None, meta=None)), config=None, space=None, fidelities=None, config_transform=None, meta=None), Split(name='transformers', item=None, nodes=(Sequential(name='passthrough', item=None, nodes=(Fixed(name='Passthrough', item=Passthrough(), nodes=(), config=None, space=None, fidelities=None, config_transform=None, meta=None),), config=None, space=None, fidelities=None, config_transform=None, meta=None), Sequential(name='polynomial', item=None, nodes=(Component(name='PolynomialFeatures', item=, nodes=(), config=None, space={'degree': [2, 3]}, fidelities=None, config_transform=None, meta=None),), config=None, space=None, fidelities=None, config_transform=None, meta=None), Sequential(name='zerocount', item=None, nodes=(Fixed(name='ZeroCount', item=ZeroCount(), nodes=(), config=None, space=None, fidelities=None, config_transform=None, meta=None),), config=None, space=None, fidelities=None, config_transform=None, meta=None)), config=None, space=None, fidelities=None, config_transform=None, meta=None), Choice(name='estimator', item=None, nodes=(Component(name='RandomForestClassifier', item=, nodes=(), config={'max_depth': 3}, space={'n_estimators': (10, 100), 'criterion': ['gini', 'log_loss']}, fidelities=None, config_transform=None, meta=None), Component(name='SVC', item=, nodes=(), config=None, space={'kernel': ['linear', 'rbf', 'poly']}, fidelities=None, config_transform=None, meta=None)), config=None, space=None, fidelities=None, config_transform=None, meta=None)), config=None, space=None, fidelities=None, config_transform=None, meta=None)" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from tpot2.builtin_modules import Passthrough, ZeroCount\n", + "from sklearn.preprocessing import PolynomialFeatures\n", + "from sklearn.decomposition import PCA\n", + "\n", + "from sklearn.feature_selection import VarianceThreshold, SelectKBest\n", + "\n", + "selectors = Choice(\n", + " Component(VarianceThreshold, space={\"threshold\": (0.1,1)}),\n", + " Component(SelectKBest, space={\"k\": (1, 10)}),\n", + " name=\"selectors\",\n", + ")\n", + "\n", + "\n", + "transformers = Split(\n", + " {\n", + " \"passthrough\": Passthrough(),\n", + " \"polynomial\": Component(PolynomialFeatures, space={\"degree\": [2, 3]}),\n", + " \"zerocount\" : ZeroCount(),\n", + " },\n", + " # config={\"categories\": select_categories, \"numerics\": select_numerical},\n", + " name=\"transformers\",\n", + ")\n", + "\n", + "pipeline = (\n", + " Sequential(name=\"my_pipeline\")\n", + " >> split_imputation\n", + " # >> Component(SimpleImputer, space={\"strategy\": [\"mean\", \"median\"]}) # Choose either mean or median\n", + " \n", + " >> selectors\n", + " >> transformers\n", + " >> Choice(\n", + " # Our pipeline can choose between two different estimators\n", + " Component(\n", + " RandomForestClassifier,\n", + " space={\"n_estimators\": (10, 100), \"criterion\": [\"gini\", \"log_loss\"]},\n", + " config={\"max_depth\": 3},\n", + " ),\n", + " Component(SVC, space={\"kernel\": [\"linear\", \"rbf\", \"poly\"]}),\n", + " name=\"estimator\",\n", + " )\n", + ")\n", + "\n", + "# Display the amltk Pipeline\n", + "pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
Pipeline(steps=[('featureunion-1',\n",
+       "                 FeatureUnion(transformer_list=[('pipeline-1',\n",
+       "                                                 Pipeline(steps=[('columntransformer',\n",
+       "                                                                  ColumnTransformer(transformers=[('passthrough',\n",
+       "                                                                                                   'passthrough',\n",
+       "                                                                                                   <sklearn.compose._column_transformer.make_column_selector object at 0x7d354d946290>)])),\n",
+       "                                                                 ('simpleimputer',\n",
+       "                                                                  SimpleImputer(fill_value='missing',\n",
+       "                                                                                strategy='constant')),\n",
+       "                                                                 ('onehotencode...\n",
+       "                 VarianceThreshold(threshold=0.6738938110936)),\n",
+       "                ('featureunion-2',\n",
+       "                 FeatureUnion(transformer_list=[('pipeline-1',\n",
+       "                                                 Pipeline(steps=[('passthrough',\n",
+       "                                                                  Passthrough())])),\n",
+       "                                                ('pipeline-2',\n",
+       "                                                 Pipeline(steps=[('polynomialfeatures',\n",
+       "                                                                  PolynomialFeatures(degree=3))])),\n",
+       "                                                ('pipeline-3',\n",
+       "                                                 Pipeline(steps=[('zerocount',\n",
+       "                                                                  ZeroCount())]))])),\n",
+       "                ('randomforestclassifier',\n",
+       "                 RandomForestClassifier(n_estimators=16))])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "Pipeline(steps=[('featureunion-1',\n", + " FeatureUnion(transformer_list=[('pipeline-1',\n", + " Pipeline(steps=[('columntransformer',\n", + " ColumnTransformer(transformers=[('passthrough',\n", + " 'passthrough',\n", + " )])),\n", + " ('simpleimputer',\n", + " SimpleImputer(fill_value='missing',\n", + " strategy='constant')),\n", + " ('onehotencode...\n", + " VarianceThreshold(threshold=0.6738938110936)),\n", + " ('featureunion-2',\n", + " FeatureUnion(transformer_list=[('pipeline-1',\n", + " Pipeline(steps=[('passthrough',\n", + " Passthrough())])),\n", + " ('pipeline-2',\n", + " Pipeline(steps=[('polynomialfeatures',\n", + " PolynomialFeatures(degree=3))])),\n", + " ('pipeline-3',\n", + " Pipeline(steps=[('zerocount',\n", + " ZeroCount())]))])),\n", + " ('randomforestclassifier',\n", + " RandomForestClassifier(n_estimators=16))])" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#convert to tpot search space\n", + "tpot_search_space = tpot2.utils.tpot2_parser(pipeline)\n", + "\n", + "# sample a pipeline from the tpot search space\n", + "tpot_search_space.generate().export_pipeline()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Generation: 50%|█████ | 1/2 [00:02<00:02, 2.60s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generation: 1\n", + "Best roc_auc_score score: 0.976\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Generation: 100%|██████████| 2/2 [00:03<00:00, 1.57s/it]\n", + "2024-09-09 17:25:40,301 - distributed.scheduler - ERROR - Removing worker 'tcp://127.0.0.1:39897' caused the cluster to lose scattered data, which can't be recovered: {'ndarray-3f2f44921e6e9cc40ef07cfcd8ae90fb', 'DataFrame-5551f84174fd651642ff10eb71e30b22'} (stimulus_id='handle-worker-cleanup-1725927940.3010821')\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generation: 2\n", + "Best roc_auc_score score: 0.984\n" + ] + }, + { + "data": { + "text/html": [ + "
TPOTEstimator(classification=True, generations=2, max_eval_time_seconds=300,\n",
+       "              n_jobs=10, population_size=10, scorers=['roc_auc'],\n",
+       "              scorers_weights=[1],\n",
+       "              search_space=<tpot2.search_spaces.pipelines.sequential.SequentialPipeline object at 0x7d34ec1efbb0>,\n",
+       "              verbose=5)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "TPOTEstimator(classification=True, generations=2, max_eval_time_seconds=300,\n", + " n_jobs=10, population_size=10, scorers=['roc_auc'],\n", + " scorers_weights=[1],\n", + " search_space=,\n", + " verbose=5)" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\n", + "\n", + "\n", + "est = tpot2.TPOTEstimator(\n", + " scorers = [\"roc_auc\"],\n", + " scorers_weights = [1],\n", + " classification = True,\n", + " cv = 5,\n", + " search_space = tpot_search_space, #converted search space goes here\n", + " population_size= 10,\n", + " generations = 2,\n", + " max_eval_time_seconds = 60*5,\n", + " verbose = 5,\n", + " n_jobs=10,\n", + ")\n", + "\n", + "est.fit(X_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
Pipeline(steps=[('featureunion-1',\n",
+       "                 FeatureUnion(transformer_list=[('pipeline-1',\n",
+       "                                                 Pipeline(steps=[('columntransformer',\n",
+       "                                                                  ColumnTransformer(transformers=[('passthrough',\n",
+       "                                                                                                   'passthrough',\n",
+       "                                                                                                   <sklearn.compose._column_transformer.make_column_selector object at 0x7d34eb307cd0>)])),\n",
+       "                                                                 ('simpleimputer',\n",
+       "                                                                  SimpleImputer(fill_value='missing',\n",
+       "                                                                                strategy='constant')),\n",
+       "                                                                 ('onehotencode...\n",
+       "                 VarianceThreshold(threshold=0.1557560591318)),\n",
+       "                ('featureunion-2',\n",
+       "                 FeatureUnion(transformer_list=[('pipeline-1',\n",
+       "                                                 Pipeline(steps=[('passthrough',\n",
+       "                                                                  Passthrough())])),\n",
+       "                                                ('pipeline-2',\n",
+       "                                                 Pipeline(steps=[('polynomialfeatures',\n",
+       "                                                                  PolynomialFeatures())])),\n",
+       "                                                ('pipeline-3',\n",
+       "                                                 Pipeline(steps=[('zerocount',\n",
+       "                                                                  ZeroCount())]))])),\n",
+       "                ('randomforestclassifier',\n",
+       "                 RandomForestClassifier(criterion='log_loss',\n",
+       "                                        n_estimators=80))])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "Pipeline(steps=[('featureunion-1',\n", + " FeatureUnion(transformer_list=[('pipeline-1',\n", + " Pipeline(steps=[('columntransformer',\n", + " ColumnTransformer(transformers=[('passthrough',\n", + " 'passthrough',\n", + " )])),\n", + " ('simpleimputer',\n", + " SimpleImputer(fill_value='missing',\n", + " strategy='constant')),\n", + " ('onehotencode...\n", + " VarianceThreshold(threshold=0.1557560591318)),\n", + " ('featureunion-2',\n", + " FeatureUnion(transformer_list=[('pipeline-1',\n", + " Pipeline(steps=[('passthrough',\n", + " Passthrough())])),\n", + " ('pipeline-2',\n", + " Pipeline(steps=[('polynomialfeatures',\n", + " PolynomialFeatures())])),\n", + " ('pipeline-3',\n", + " Pipeline(steps=[('zerocount',\n", + " ZeroCount())]))])),\n", + " ('randomforestclassifier',\n", + " RandomForestClassifier(criterion='log_loss',\n", + " n_estimators=80))])" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "est.fitted_pipeline_" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1,\n", + " 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0,\n", + " 1, 0, 0, 0, 0, 0])" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "est.predict(X_test)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "myenv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/setup.py b/setup.py index 0a404280..8586dbe7 100644 --- a/setup.py +++ b/setup.py @@ -53,7 +53,8 @@ def calculate_version(): extras_require={ 'skrebate': ['skrebate>=0.3.4'], 'mdr': ['scikit-mdr>=0.4.4'], - 'sklearnex' : ['scikit-learn-intelex>=2023.2.1'] + 'sklearnex' : ['scikit-learn-intelex>=2023.2.1'], + 'amltk' : ['amltk>=1.12.1'], }, classifiers=[ 'Intended Audience :: Science/Research', diff --git a/tpot2/__init__.py b/tpot2/__init__.py index 62290884..f7014a29 100644 --- a/tpot2/__init__.py +++ b/tpot2/__init__.py @@ -8,9 +8,9 @@ from .population import Population from . import builtin_modules -from . import utils from . import config from . import search_spaces +from . import utils from . import evolvers from . import objectives from . import selectors diff --git a/tpot2/config/classifiers.py b/tpot2/config/classifiers.py index 49b714ac..2fb09e41 100644 --- a/tpot2/config/classifiers.py +++ b/tpot2/config/classifiers.py @@ -535,7 +535,7 @@ def MLPClassifier_hyperparameter_parser(params): def get_GaussianProcessClassifier_ConfigurationSpace(n_features, random_state): space = { 'n_features': n_features, - 'alpha': Float("alpha", bounds=(1e-14, 1.0), log=True), + 'alpha': Float("alpha", bounds=(1e-10, 1.0), log=True), 'thetaL': Float("thetaL", bounds=(1e-10, 1e-3), log=True), 'thetaU': Float("thetaU", bounds=(1.0, 100000), log=True), } diff --git a/tpot2/config/get_configspace.py b/tpot2/config/get_configspace.py index 4a5cc997..46b13b60 100644 --- a/tpot2/config/get_configspace.py +++ b/tpot2/config/get_configspace.py @@ -45,7 +45,8 @@ from sklearn.feature_selection import f_classif, f_regression #TODO create a selectomixin using these? from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis from sklearn.gaussian_process import GaussianProcessRegressor, GaussianProcessClassifier -from sklearn.impute import SimpleImputer +from sklearn.experimental import enable_iterative_imputer +from sklearn.impute import SimpleImputer, IterativeImputer, KNNImputer all_methods = [SGDClassifier, RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier, MLPClassifier, DecisionTreeClassifier, XGBClassifier, KNeighborsClassifier, SVC, LogisticRegression, LGBMClassifier, LinearSVC, GaussianNB, BernoulliNB, MultinomialNB, ExtraTreesRegressor, RandomForestRegressor, GradientBoostingRegressor, BaggingRegressor, DecisionTreeRegressor, KNeighborsRegressor, XGBRegressor, ZeroCount, ColumnOneHotEncoder, Binarizer, FastICA, FeatureAgglomeration, MaxAbsScaler, MinMaxScaler, Normalizer, Nystroem, PCA, PolynomialFeatures, RBFSampler, RobustScaler, StandardScaler, SelectFwe, SelectPercentile, VarianceThreshold, SGDRegressor, Ridge, Lasso, ElasticNet, Lars, LassoLars, LassoLarsCV, RidgeCV, SVR, LinearSVR, AdaBoostRegressor, GradientBoostingRegressor, RandomForestRegressor, BaggingRegressor, ExtraTreesRegressor, DecisionTreeRegressor, KNeighborsRegressor, ElasticNetCV, AdaBoostClassifier,MLPRegressor, @@ -56,7 +57,7 @@ GaussianProcessClassifier, BaggingClassifier,LGBMRegressor, Passthrough,SkipTransformer, PassKBinsDiscretizer, - SimpleImputer, + SimpleImputer, IterativeImputer, KNNImputer ] @@ -124,7 +125,7 @@ "all_transformers" : ["transformers", "scalers"], "arithmatic": ["AddTransformer", "mul_neg_1_Transformer", "MulTransformer", "SafeReciprocalTransformer", "EQTransformer", "NETransformer", "GETransformer", "GTTransformer", "LETransformer", "LTTransformer", "MinTransformer", "MaxTransformer"], - "imputers": ["SimpleImputer"], + "imputers": ["SimpleImputer", "IterativeImputer", "KNNImputer"], "skrebate": ["ReliefF", "SURF", "SURFstar", "MultiSURF"], "genetic_encoders": ["DominantEncoder", "RecessiveEncoder", "HeterosisEncoder", "UnderDominanceEncoder", "OverDominanceEncoder"], @@ -136,8 +137,6 @@ def get_configspace(name, n_classes=3, n_samples=1000, n_features=100, random_state=None): match name: - case "SimpleImputer": - return imputers.simple_imputer_cs #autoqtl_builtins.py case "FeatureEncodingFrequencySelector": @@ -352,6 +351,12 @@ def get_configspace(name, n_classes=3, n_samples=1000, n_features=100, random_st ) #imputers.py + case "SimpleImputer": + return imputers.simple_imputer_cs + case "IterativeImputer": + return imputers.get_IterativeImputer_config_space(n_features=n_features, random_state=random_state) + case "KNNImputer": + return imputers.get_KNNImputer_config_space(n_samples=n_samples) #mdr_configs.py case "MDR": @@ -401,12 +406,12 @@ def get_configspace(name, n_classes=3, n_samples=1000, n_features=100, random_st raise ValueError(f"Could not find configspace for {name}") -def get_search_space(name, n_classes=3, n_samples=100, n_features=100, random_state=None, return_choice_pipeline=True): +def get_search_space(name, n_classes=3, n_samples=100, n_features=100, random_state=None, return_choice_pipeline=True, base_node=EstimatorNode): #if list of names, return a list of EstimatorNodes if isinstance(name, list) or isinstance(name, np.ndarray): - search_spaces = [get_search_space(n, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state, return_choice_pipeline=False) for n in name] + search_spaces = [get_search_space(n, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state, return_choice_pipeline=False, base_node=base_node) for n in name] #remove Nones search_spaces = [s for s in search_spaces if s is not None] @@ -417,12 +422,12 @@ def get_search_space(name, n_classes=3, n_samples=100, n_features=100, random_st if name in GROUPNAMES: name_list = GROUPNAMES[name] - return get_search_space(name_list, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state, return_choice_pipeline=return_choice_pipeline) + return get_search_space(name_list, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state, return_choice_pipeline=return_choice_pipeline, base_node=base_node) - return get_node(name, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state) + return get_node(name, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state, base_node=base_node) -def get_node(name, n_classes=3, n_samples=100, n_features=100, random_state=None): +def get_node(name, n_classes=3, n_samples=100, n_features=100, random_state=None, base_node=EstimatorNode): #these are wrappers that take in another estimator as a parameter # TODO Add AdaBoostRegressor, AdaBoostClassifier as wrappers? wrap a decision tree with different params? @@ -443,38 +448,47 @@ def get_node(name, n_classes=3, n_samples=100, n_features=100, random_state=None sfm_sp = get_configspace(name="SelectFromModel", n_classes=n_classes, n_samples=n_samples, random_state=random_state) ext = get_node("ExtraTreesRegressor", n_classes=n_classes, n_samples=n_samples, random_state=random_state) return WrapperPipeline(estimator_search_space=ext, method=SelectFromModel, space=sfm_sp) - + # TODO Add IterativeImputer with more estimator methods + ''' + if name == "IterativeImputer_learnedestimators": + iteative_sp = get_configspace(name="IterativeImputer", n_classes=n_classes, n_samples=n_samples, random_state=random_state) + regessor_searchspace = get_search_space(["LinearRegression", ..], n_classes=n_classes, n_samples=n_samples, random_state=random_state) + return WrapperPipeline(estimator_search_space=regressor_searchspace, method=ItartiveImputer, space=iteative_sp) + ''' #these are nodes that have special search spaces which require custom parsing of the hyperparameters + if name == "IterativeImputer": + configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state) + return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=imputers.IterativeImputer_hyperparameter_parser) if name == "RobustScaler": configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state) - return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=transformers.robust_scaler_hyperparameter_parser) + return base_node(STRING_TO_CLASS[name], configspace, hyperparameter_parser=transformers.robust_scaler_hyperparameter_parser) if name == "GradientBoostingClassifier": configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state) - return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=classifiers.GradientBoostingClassifier_hyperparameter_parser) + return base_node(STRING_TO_CLASS[name], configspace, hyperparameter_parser=classifiers.GradientBoostingClassifier_hyperparameter_parser) if name == "HistGradientBoostingClassifier": configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state) - return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=classifiers.HistGradientBoostingClassifier_hyperparameter_parser) + return base_node(STRING_TO_CLASS[name], configspace, hyperparameter_parser=classifiers.HistGradientBoostingClassifier_hyperparameter_parser) if name == "GradientBoostingRegressor": configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state) - return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=regressors.GradientBoostingRegressor_hyperparameter_parser) + return base_node(STRING_TO_CLASS[name], configspace, hyperparameter_parser=regressors.GradientBoostingRegressor_hyperparameter_parser) if name == "HistGradientBoostingRegressor": configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state) - return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=regressors.HistGradientBoostingRegressor_hyperparameter_parser) + return base_node(STRING_TO_CLASS[name], configspace, hyperparameter_parser=regressors.HistGradientBoostingRegressor_hyperparameter_parser) if name == "MLPClassifier": configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state) - return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=classifiers.MLPClassifier_hyperparameter_parser) + return base_node(STRING_TO_CLASS[name], configspace, hyperparameter_parser=classifiers.MLPClassifier_hyperparameter_parser) if name == "MLPRegressor": configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state) - return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=regressors.MLPRegressor_hyperparameter_parser) + return base_node(STRING_TO_CLASS[name], configspace, hyperparameter_parser=regressors.MLPRegressor_hyperparameter_parser) if name == "GaussianProcessRegressor": configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state) - return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=regressors.GaussianProcessRegressor_hyperparameter_parser) + return base_node(STRING_TO_CLASS[name], configspace, hyperparameter_parser=regressors.GaussianProcessRegressor_hyperparameter_parser) if name == "GaussianProcessClassifier": configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state) - return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=classifiers.GaussianProcessClassifier_hyperparameter_parser) + return base_node(STRING_TO_CLASS[name], configspace, hyperparameter_parser=classifiers.GaussianProcessClassifier_hyperparameter_parser) if name == "FeatureAgglomeration": configspace = get_configspace(name, n_features=n_features) - return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=transformers.FeatureAgglomeration_hyperparameter_parser) + return base_node(STRING_TO_CLASS[name], configspace, hyperparameter_parser=transformers.FeatureAgglomeration_hyperparameter_parser) configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state) if configspace is None: @@ -482,4 +496,4 @@ def get_node(name, n_classes=3, n_samples=100, n_features=100, random_state=None warnings.warn(f"Could not find configspace for {name}") return None - return EstimatorNode(STRING_TO_CLASS[name], configspace) \ No newline at end of file + return base_node(STRING_TO_CLASS[name], configspace) \ No newline at end of file diff --git a/tpot2/config/imputers.py b/tpot2/config/imputers.py index 3499c0aa..2c33629f 100644 --- a/tpot2/config/imputers.py +++ b/tpot2/config/imputers.py @@ -1,9 +1,80 @@ +import sklearn +import sklearn.ensemble +import sklearn.linear_model +import sklearn.neighbors from ConfigSpace import ConfigurationSpace from ConfigSpace import ConfigurationSpace, Integer, Float, Categorical, Normal +from ConfigSpace import EqualsCondition + simple_imputer_cs = ConfigurationSpace( space = { - 'strategy' : Categorical('strategy', ['mean','median', 'most_frequent', ]), - 'add_indicator' : Categorical('add_indicator', [True, False]), + 'strategy' : Categorical('strategy', + ['mean','median', 'most_frequent', 'constant'] + ), + #'add_indicator' : Categorical('add_indicator', [True, False]), + #Removed add_indicator, it appends a mask next to the rest of the data + # and can cause errors. gk + } +) + +def get_IterativeImputer_config_space(n_features, random_state): + space = { 'initial_strategy' : Categorical('initial_strategy', + ['mean', 'median', + 'most_frequent', 'constant']), + 'n_nearest_features' : Integer('n_nearest_features', + bounds=(1, n_features)), + 'imputation_order' : Categorical('imputation_order', + ['ascending', 'descending', + 'roman', 'arabic', 'random']), } -) \ No newline at end of file + + estimator = Categorical('estimator', ['Bayesian', 'RFR', 'Ridge', 'KNN']) + sample_posterior = Categorical('sample_posterior', [True, False]) + sampling_condition = EqualsCondition(sample_posterior, estimator, 'Bayesian') + + if random_state is not None: + #This is required because configspace doesn't allow None as a value + space['random_state'] = random_state + + cs = ConfigurationSpace(space=space) + cs.add_hyperparameters([estimator, sample_posterior]) + cs.add_conditions([sampling_condition]) + return cs + +def get_KNNImputer_config_space(n_samples): + space = { + 'n_neighbors': Integer('n_neighbors', bounds=(1, max(n_samples,100))), + 'weights': Categorical('weights', ['uniform', 'distance']) + } + + return ConfigurationSpace( + space=space + ) + +def IterativeImputer_hyperparameter_parser(params): + est = params['estimator'] + match est: + case 'Bayesian': + estimator = sklearn.linear_model.BayesianRidge() + case 'RFR': + estimator = sklearn.ensemble.RandomForestRegressor() + case 'Ridge': + estimator = sklearn.linear_model.Ridge() + case 'KNN': + estimator = sklearn.neighbors.KNeighborsRegressor() + + final_params = { + 'estimator' : estimator, + 'initial_strategy' : params['initial_strategy'], + 'n_nearest_features' : params['n_nearest_features'], + 'imputation_order' : params['imputation_order'], + } + + if 'sample_posterior' in params: + final_params['sample_posterior'] = params['sample_posterior'] + + if 'random_state' in params: + final_params['random_state'] = params['random_state'] + + return final_params \ No newline at end of file diff --git a/tpot2/config/regressors.py b/tpot2/config/regressors.py index d1b9343d..ab14a7ea 100644 --- a/tpot2/config/regressors.py +++ b/tpot2/config/regressors.py @@ -354,7 +354,7 @@ def get_ExtraTreesRegressor_ConfigurationSpace(random_state): def get_GaussianProcessRegressor_ConfigurationSpace(n_features, random_state): space = { 'n_features': n_features, - 'alpha': Float("alpha", bounds=(1e-14, 1.0), log=True), + 'alpha': Float("alpha", bounds=(1e-10, 1.0), log=True), 'thetaL': Float("thetaL", bounds=(1e-10, 1e-3), log=True), 'thetaU': Float("thetaU", bounds=(1.0, 100000), log=True), } diff --git a/tpot2/search_spaces/base.py b/tpot2/search_spaces/base.py index 3133057e..6dc2c76a 100644 --- a/tpot2/search_spaces/base.py +++ b/tpot2/search_spaces/base.py @@ -1,18 +1,10 @@ import tpot2 -import numpy as np -import pandas as pd import sklearn -from tpot2 import config -from typing import Generator, List, Tuple, Union -import random from sklearn.base import BaseEstimator import sklearn import networkx as nx from . import graph_utils from typing import final -from abc import ABC, abstractmethod - - diff --git a/tpot2/search_spaces/nodes/estimator_node_gradual.py b/tpot2/search_spaces/nodes/estimator_node_gradual.py new file mode 100644 index 00000000..f2e8cf81 --- /dev/null +++ b/tpot2/search_spaces/nodes/estimator_node_gradual.py @@ -0,0 +1,146 @@ +# try https://automl.github.io/ConfigSpace/main/api/hyperparameters.html + +import numpy as np +from tpot2.search_spaces.base import SklearnIndividual, SklearnIndividualGenerator +from ConfigSpace import ConfigurationSpace +from typing import final +import ConfigSpace + + +NONE_SPECIAL_STRING = "" +TRUE_SPECIAL_STRING = "" +FALSE_SPECIAL_STRING = "" + + +def default_hyperparameter_parser(params:dict) -> dict: + return params + + +# NOTE: This is not the default, currently experimental +class EstimatorNodeIndividual_gradual(SklearnIndividual): + """ + Note that ConfigurationSpace does not support None as a parameter. Instead, use the special string "". TPOT will automatically replace instances of this string with the Python None. + + Parameters + ---------- + method : type + The class of the estimator to be used + + space : ConfigurationSpace|dict + The hyperparameter space to be used. If a dict is passed, hyperparameters are fixed and not learned. + + """ + def __init__(self, method: type, + space: ConfigurationSpace|dict, #TODO If a dict is passed, hyperparameters are fixed and not learned. Is this confusing? Should we make a second node type? + hyperparameter_parser: callable = None, + rng=None) -> None: + super().__init__() + self.method = method + self.space = space + + if hyperparameter_parser is None: + self.hyperparameter_parser = default_hyperparameter_parser + else: + self.hyperparameter_parser = hyperparameter_parser + + if isinstance(space, dict): + self.hyperparameters = space + else: + rng = np.random.default_rng(rng) + self.space.seed(rng.integers(0, 2**32)) + self.hyperparameters = dict(self.space.sample_configuration()) + + self.check_hyperparameters_for_None() + + def mutate(self, rng=None): + if isinstance(self.space, dict): + return False + self.hyperparameters = gradual_hyperparameter_update(params=self.hyperparameters, configspace=self.space, rng=rng) + self.check_hyperparameters_for_None() + return True + + def crossover(self, other, rng=None): + if isinstance(self.space, dict): + return False + + rng = np.random.default_rng(rng) + if self.method != other.method: + return False + + #loop through hyperparameters, randomly swap items in self.hyperparameters with items in other.hyperparameters + for hyperparameter in self.space: + if rng.choice([True, False]): + if hyperparameter in other.hyperparameters: + self.hyperparameters[hyperparameter] = other.hyperparameters[hyperparameter] + + self.check_hyperparameters_for_None() + + return True + + def check_hyperparameters_for_None(self): + for key, value in self.hyperparameters.items(): + #if string + if isinstance(value, str): + if value == NONE_SPECIAL_STRING: + self.hyperparameters[key] = None + elif value == TRUE_SPECIAL_STRING: + self.hyperparameters[key] = True + elif value == FALSE_SPECIAL_STRING: + self.hyperparameters[key] = False + + @final #this method should not be overridden, instead override hyperparameter_parser + def export_pipeline(self, **kwargs): + return self.method(**self.hyperparameter_parser(self.hyperparameters)) + + def unique_id(self): + #return a dictionary of the method and the hyperparameters + method_str = self.method.__name__ + params = list(self.hyperparameters.keys()) + params = sorted(params) + + id_str = f"{method_str}({', '.join([f'{param}={self.hyperparameters[param]}' for param in params])})" + + return id_str + +def gradual_hyperparameter_update(params:dict, configspace:ConfigurationSpace, rng=None): + rng = np.random.default_rng(rng) + configspace.seed(rng.integers(0, 2**32)) + new_params = dict(configspace.sample_configuration()) + for param in list(new_params.keys()): + #if parameter is float, multiply by normal distribution + if param not in params: + continue + try: + if issubclass(type(configspace[param]), ConfigSpace.hyperparameters.hyperparameter.FloatHyperparameter): + + if configspace[param].log: + new_params[param] = params[param] * rng.lognormal(0, 1) + else: + new_params[param] = params[param] + rng.normal(0, .1)* (configspace[param].upper-configspace[param].lower) + # if check if above or below min and cap + if new_params[param] < configspace[param].lower: + new_params[param] = configspace[param].lower + elif new_params[param] > configspace[param].upper: + new_params[param] = configspace[param].upper + #if parameter is integer, add normal distribution + elif issubclass(type(configspace[param]), ConfigSpace.hyperparameters.hyperparameter.IntegerHyperparameter): + new_params[param] = params[param] * np.random.normal(0, 1) + # if check if above or below min and cap + if new_params[param] < configspace[param].lower: + new_params[param] = configspace[param].lower + elif new_params[param] > configspace[param].upper: + new_params[param] = configspace[param].upper + new_params[param] = int(new_params[param]) + except: + pass + + return new_params + +class EstimatorNode_gradual(SklearnIndividualGenerator): + def __init__(self, method, space, hyperparameter_parser=default_hyperparameter_parser): + self.method = method + self.space = space + self.hyperparameter_parser = hyperparameter_parser + + def generate(self, rng=None): + return EstimatorNodeIndividual_gradual(self.method, self.space, hyperparameter_parser=self.hyperparameter_parser, rng=rng) \ No newline at end of file diff --git a/tpot2/search_spaces/pipelines/choice.py b/tpot2/search_spaces/pipelines/choice.py index 25051aa0..af1a7a4d 100644 --- a/tpot2/search_spaces/pipelines/choice.py +++ b/tpot2/search_spaces/pipelines/choice.py @@ -12,7 +12,7 @@ def __init__(self, search_spaces : List[SklearnIndividualGenerator], rng=None) - super().__init__() self.search_spaces = search_spaces - self.node = np.random.default_rng(rng).choice(self.search_spaces).generate() + self.node = np.random.default_rng(rng).choice(self.search_spaces).generate(rng=rng) def mutate(self, rng=None): @@ -23,7 +23,7 @@ def mutate(self, rng=None): return self._mutate_node(rng) def _mutate_select_new_node(self, rng=None): - self.node = random.choice(self.search_spaces).generate() + self.node = random.choice(self.search_spaces).generate(rng=rng) return True def _mutate_node(self, rng=None): diff --git a/tpot2/selectors/map_elites_selection.py b/tpot2/selectors/map_elites_selection.py index 27ac6156..c3589801 100644 --- a/tpot2/selectors/map_elites_selection.py +++ b/tpot2/selectors/map_elites_selection.py @@ -1,56 +1,63 @@ import numpy as np #TODO make these functions take in a predetermined set of bins rather than calculating a new set each time -def create_nd_matrix(matrix, k): +def create_nd_matrix(matrix, grid_steps=None, bins=None): + + if grid_steps is not None and bins is not None: + raise ValueError("Either grid_steps or bins must be provided but not both") + # Extract scores and features - scores = [row[0] for row in matrix] - features = [row[1:] for row in matrix] + scores = matrix[:, 0] + features = matrix[:, 1:] # Determine the min and max of each feature min_vals = np.min(features, axis=0) max_vals = np.max(features, axis=0) # Create bins for each feature - bins = [np.linspace(min_vals[i], max_vals[i], k) for i in range(len(min_vals))] + if bins is None: + bins = [np.linspace(min_vals[i], max_vals[i], grid_steps) for i in range(len(min_vals))] # Initialize n-dimensional matrix with negative infinity - nd_matrix = np.full([k-1]*len(min_vals), {"score": -np.inf, "idx": None}) - + nd_matrix = np.full([len(b)+1 for b in bins], {"score": -np.inf, "idx": None}) # Fill in each cell with the highest score for that cell for idx, (score, feature) in enumerate(zip(scores, features)): - indices = [np.digitize(f, bin)-1 for f, bin in zip(feature, bins)] - - indices = [min(i, k-2) for i in indices] #the last bin is inclusive - + indices = [np.digitize(f, bin) for f, bin in zip(feature, bins)] cur_score = nd_matrix[tuple(indices)]["score"] if score > cur_score: nd_matrix[tuple(indices)] = {"score": score, "idx": idx} - return nd_matrix def manhattan(a, b): return sum(abs(val1-val2) for val1, val2 in zip(a,b)) +def map_elites_survival_selector(scores, k=None, rng=None, grid_steps= 10, bins=None): + + if grid_steps is not None and bins is not None: + raise ValueError("Either grid_steps or bins must be provided but not both") -def map_elites_survival_selector(scores, k, rng=None, grid_steps= 10): rng = np.random.default_rng(rng) scores = np.array(scores) #create grid - matrix = create_nd_matrix(scores, grid_steps) + matrix = create_nd_matrix(scores, grid_steps=grid_steps, bins=bins) matrix = matrix.flatten() indexes = [cell["idx"] for cell in matrix if cell["idx"] is not None] return np.unique(indexes) -def map_elites_parent_selector(scores, k, rng=None, grid_steps= 10, manhattan_distance = 2, n_parents=1,): +def map_elites_parent_selector(scores, k, rng=None, manhattan_distance = 2, n_parents=1, grid_steps= 10, bins=None): + + if grid_steps is not None and bins is not None: + raise ValueError("Either grid_steps or bins must be provided but not both") + rng = np.random.default_rng(rng) scores = np.array(scores) #create grid - matrix = create_nd_matrix(scores, grid_steps) + matrix = create_nd_matrix(scores, grid_steps=grid_steps, bins=bins) #return true if cell is not empty f = np.vectorize(lambda x: x["idx"] is not None) @@ -60,8 +67,6 @@ def map_elites_parent_selector(scores, k, rng=None, grid_steps= 10, manhattan_d idxes = [idx for idx in idx_to_coordinates.keys()] #all the indexes of best score per cell - - distance_matrix = np.zeros((len(idxes), len(idxes))) for i, idx1 in enumerate(idxes): @@ -87,17 +92,37 @@ def map_elites_parent_selector(scores, k, rng=None, grid_steps= 10, manhattan_d candidates = candidates[candidates != dm_idx] manhattan_distance += 1 - if manhattan_distance > grid_steps*scores.shape[1]: + if manhattan_distance > np.max(distance_matrix): break if len(candidates) == 0: - parents.append([idx]) + parents.append([idx, idx]) #if no other parents are found, select the same parent twice. weird to crossover with itself though + else: + this_parents = [idx] + for p in range(n_parents-1): + idx2_cords = rng.choice(candidates) + this_parents.append(idxes[idx2_cords]) + parents.append(this_parents) - this_parents = [idx] - for p in range(n_parents-1): - idx2_cords = rng.choice(candidates) - this_parents.append(idxes[idx2_cords]) + return np.array(parents) - parents.append(this_parents) - - return np.array(parents) \ No newline at end of file + +def get_bins_quantiles(arr, k=None, q=None): + bins = [] + + if q is not None and k is not None: + raise ValueError("Only one of k or q can be specified") + + if q is not None: + final_q = q + elif k is not None: + final_q = np.linspace(0, 1, k) + + for i in range(arr.shape[1]): + bins.append(np.quantile(arr[:,i], final_q)) + return bins + +def get_bins(arr, k): + min_vals = np.min(arr, axis=0) + max_vals = np.max(arr, axis=0) + [np.linspace(min_vals[i], max_vals[i], k) for i in range(len(min_vals))] \ No newline at end of file diff --git a/tpot2/utils/__init__.py b/tpot2/utils/__init__.py index e9c795a3..12231446 100644 --- a/tpot2/utils/__init__.py +++ b/tpot2/utils/__init__.py @@ -1,2 +1,11 @@ from . import eval_utils -from .utils import * \ No newline at end of file +from .utils import * + +# If amltk is installed, import the parser +try: + from .amltk_parser import tpot2_parser +except ImportError: + # Handle the case when amltk is not installed + pass + # print("amltk is not installed. Please install it to use tpot2_parser.") + # Optional: raise an exception or provide alternative functionality \ No newline at end of file diff --git a/tpot2/utils/amltk_parser.py b/tpot2/utils/amltk_parser.py new file mode 100644 index 00000000..c147dbd8 --- /dev/null +++ b/tpot2/utils/amltk_parser.py @@ -0,0 +1,72 @@ +from amltk.pipeline import Choice, Component, Sequential, Node, Fixed, Split, Join, Searchable +from tpot2.search_spaces.pipelines import SequentialPipeline, ChoicePipeline, UnionPipeline +from tpot2.search_spaces.nodes import EstimatorNode +from ConfigSpace import ConfigurationSpace + +def component_to_estimatornode(component: Component) -> EstimatorNode: + method = component.item + space_dict = {} + if component.space is not None: + space_dict.update(component.space) + if component.config is not None: + space_dict.update(component.config) + space = ConfigurationSpace(component.space) + + tpot2_sp = EstimatorNode(method=method, space=space) + return tpot2_sp + +def fixed_to_estimatornode(node: Fixed) -> EstimatorNode: + method = node.item + #check if method is a class or an object + if not isinstance(method, type): + method = type(method) + + #if baseestimator, get params + if hasattr(node.item, 'get_params'): + space_dict = node.item.get_params(deep=False) + else: + space_dict = {} + if node.space is not None: + space_dict.update(node.space) + if node.config is not None: + space_dict.update(node.config) + + tpot2_sp = EstimatorNode(method=method, space=space_dict) + return tpot2_sp + +def sequential_to_sequentialpipeline(sequential: Sequential) -> SequentialPipeline: + nodes = [tpot2_parser(node) for node in sequential.nodes] + tpot2_sp = SequentialPipeline(search_spaces=nodes) + return tpot2_sp + +def choice_to_choicepipeline(choice: Choice) -> ChoicePipeline: + nodes = [tpot2_parser(node) for node in choice.nodes] + tpot2_sp = ChoicePipeline(search_spaces=nodes) + return tpot2_sp + + +def split_to_unionpipeline(split: Split) -> UnionPipeline: + nodes = [tpot2_parser(node) for node in split.nodes] + tpot2_sp = UnionPipeline(search_spaces=nodes) + return tpot2_sp + +def tpot2_parser( + node: Node, + # *, + # flat: bool = False, + # conditionals: bool = False, + # delim: str = ":", + ): + + if isinstance(node, Component): + return component_to_estimatornode(node) + elif isinstance(node, Sequential): + return sequential_to_sequentialpipeline(node) + elif isinstance(node, Choice): + return choice_to_choicepipeline(node) + elif isinstance(node, Fixed): + return fixed_to_estimatornode(node) + elif isinstance(node, Split): + return split_to_unionpipeline(node) + else: + raise ValueError(f"Node type {type(node)} not supported")