From f1371fba036710c365de0370402d07978236b814 Mon Sep 17 00:00:00 2001 From: gketron Date: Wed, 17 Jul 2024 16:01:00 -0700 Subject: [PATCH 01/14] test --- test.txt | 1 + 1 file changed, 1 insertion(+) create mode 100644 test.txt diff --git a/test.txt b/test.txt new file mode 100644 index 00000000..30d74d25 --- /dev/null +++ b/test.txt @@ -0,0 +1 @@ +test \ No newline at end of file From 4f0fbf73e726f03e76909c5596713f5f4f0bb470 Mon Sep 17 00:00:00 2001 From: gketron Date: Thu, 18 Jul 2024 17:31:35 -0700 Subject: [PATCH 02/14] Simple, Iterative, and KNNImputers Added --- test.txt | 1 - tpot2/config/get_configspace.py | 25 +++++++++--- tpot2/config/imputers.py | 71 +++++++++++++++++++++++++++++++-- 3 files changed, 87 insertions(+), 10 deletions(-) delete mode 100644 test.txt diff --git a/test.txt b/test.txt deleted file mode 100644 index 30d74d25..00000000 --- a/test.txt +++ /dev/null @@ -1 +0,0 @@ -test \ No newline at end of file diff --git a/tpot2/config/get_configspace.py b/tpot2/config/get_configspace.py index 4a5cc997..3c3c2505 100644 --- a/tpot2/config/get_configspace.py +++ b/tpot2/config/get_configspace.py @@ -45,7 +45,7 @@ from sklearn.feature_selection import f_classif, f_regression #TODO create a selectomixin using these? from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis from sklearn.gaussian_process import GaussianProcessRegressor, GaussianProcessClassifier -from sklearn.impute import SimpleImputer +from sklearn.impute import SimpleImputer, IterativeImputer, KNNImputer all_methods = [SGDClassifier, RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier, MLPClassifier, DecisionTreeClassifier, XGBClassifier, KNeighborsClassifier, SVC, LogisticRegression, LGBMClassifier, LinearSVC, GaussianNB, BernoulliNB, MultinomialNB, ExtraTreesRegressor, RandomForestRegressor, GradientBoostingRegressor, BaggingRegressor, DecisionTreeRegressor, KNeighborsRegressor, XGBRegressor, ZeroCount, ColumnOneHotEncoder, Binarizer, FastICA, FeatureAgglomeration, MaxAbsScaler, MinMaxScaler, Normalizer, Nystroem, PCA, PolynomialFeatures, RBFSampler, RobustScaler, StandardScaler, SelectFwe, SelectPercentile, VarianceThreshold, SGDRegressor, Ridge, Lasso, ElasticNet, Lars, LassoLars, LassoLarsCV, RidgeCV, SVR, LinearSVR, AdaBoostRegressor, GradientBoostingRegressor, RandomForestRegressor, BaggingRegressor, ExtraTreesRegressor, DecisionTreeRegressor, KNeighborsRegressor, ElasticNetCV, AdaBoostClassifier,MLPRegressor, @@ -56,7 +56,7 @@ GaussianProcessClassifier, BaggingClassifier,LGBMRegressor, Passthrough,SkipTransformer, PassKBinsDiscretizer, - SimpleImputer, + SimpleImputer, IterativeImputer, KNNImputer ] @@ -124,7 +124,7 @@ "all_transformers" : ["transformers", "scalers"], "arithmatic": ["AddTransformer", "mul_neg_1_Transformer", "MulTransformer", "SafeReciprocalTransformer", "EQTransformer", "NETransformer", "GETransformer", "GTTransformer", "LETransformer", "LTTransformer", "MinTransformer", "MaxTransformer"], - "imputers": ["SimpleImputer"], + "imputers": ["SimpleImputer", "IterativeImputer", "KNNImputer"], "skrebate": ["ReliefF", "SURF", "SURFstar", "MultiSURF"], "genetic_encoders": ["DominantEncoder", "RecessiveEncoder", "HeterosisEncoder", "UnderDominanceEncoder", "OverDominanceEncoder"], @@ -136,8 +136,6 @@ def get_configspace(name, n_classes=3, n_samples=1000, n_features=100, random_state=None): match name: - case "SimpleImputer": - return imputers.simple_imputer_cs #autoqtl_builtins.py case "FeatureEncodingFrequencySelector": @@ -352,6 +350,12 @@ def get_configspace(name, n_classes=3, n_samples=1000, n_features=100, random_st ) #imputers.py + case "SimpleImputer": + return imputers.simple_imputer_cs + case "IterativeImputer": + return imputers.get_IterativeImputer_config_space(n_features=n_features, random_state=random_state) + case "KNNImputer": + return imputers.get_KNNImputer_config_space(n_samples=n_samples) #mdr_configs.py case "MDR": @@ -443,8 +447,17 @@ def get_node(name, n_classes=3, n_samples=100, n_features=100, random_state=None sfm_sp = get_configspace(name="SelectFromModel", n_classes=n_classes, n_samples=n_samples, random_state=random_state) ext = get_node("ExtraTreesRegressor", n_classes=n_classes, n_samples=n_samples, random_state=random_state) return WrapperPipeline(estimator_search_space=ext, method=SelectFromModel, space=sfm_sp) - + # TODO Add IterativeImputer with more estimator methods + ''' + if name == "IterativeImputer_learnedestimators": + iteative_sp = get_configspace(name="IterativeImputer", n_classes=n_classes, n_samples=n_samples, random_state=random_state) + regessor_searchspace = get_search_space(["LinearRegression", ..], n_classes=n_classes, n_samples=n_samples, random_state=random_state) + return WrapperPipeline(estimator_search_space=regressor_searchspace, method=ItartiveImputer, space=iteative_sp) + ''' #these are nodes that have special search spaces which require custom parsing of the hyperparameters + if name == "IterativeImputer": + configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state) + return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=imputers.IterativeImputer_hyperparameter_parser) if name == "RobustScaler": configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state) return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=transformers.robust_scaler_hyperparameter_parser) diff --git a/tpot2/config/imputers.py b/tpot2/config/imputers.py index 3499c0aa..5ad6d3a5 100644 --- a/tpot2/config/imputers.py +++ b/tpot2/config/imputers.py @@ -1,9 +1,74 @@ +import sklearn.ensemble +import sklearn.linear_model +import sklearn.neighbors from ConfigSpace import ConfigurationSpace from ConfigSpace import ConfigurationSpace, Integer, Float, Categorical, Normal +import sklearn simple_imputer_cs = ConfigurationSpace( space = { - 'strategy' : Categorical('strategy', ['mean','median', 'most_frequent', ]), - 'add_indicator' : Categorical('add_indicator', [True, False]), + 'strategy' : Categorical('strategy', + ['mean','median', 'most_frequent', 'constant'] + ), + #'add_indicator' : Categorical('add_indicator', [True, False]), + #Removed add_indicator, it appends a mask next to the rest of the data + # and can cause errors. gk } -) \ No newline at end of file +) + +def get_IterativeImputer_config_space(n_features, random_state): + space = { + 'estimator' : Categorical('estimator', + ['Bayesian', 'RFR', 'Ridge', + 'KNN', 'RandomForest']), + 'sample_posterior' : Categorical('sample_posterior', [True, False]), + 'initial_strategy' : Categorical('initial_strategy', + ['mean', 'median', + 'most_frequent', 'constant']), + 'n_nearest_features' : Integer('n_nearest_features', + bounds=(1, n_features)), + 'imputation_order' : Categorical('imputation_order', + ['ascending', 'descending', + 'roman', 'arabic', 'random']), + } + if random_state is not None: + #This is required because configspace doesn't allow None as a value + space['random_state'] = random_state + + return ConfigurationSpace( + space = space + ) + +def get_KNNImputer_config_space(n_samples): + space = { + 'n_neighbors': Integer('n_neighbors', bounds=(1, max(n_samples,100))), + 'weights': Categorical('weights', ['uniform', 'distance']) + } + return ConfigurationSpace( + space=space + ) + +def IterativeImputer_hyperparameter_parser(params): + est = params['estimator'] + match est: + case 'Bayesian': + estimator = sklearn.linear_model.BayesianRidge() + case 'RFR': + estimator = sklearn.ensemble.RandomForestRegressor() + case 'Ridge': + estimator = sklearn.linear_model.Ridge() + case 'KNN': + estimator = sklearn.neighbors.KNeighborsRegressor() + + final_params = { + 'estimator' : estimator, + 'sample_posterior' : params['sample_posterior'], + 'initial_strategy' : params['initial_strategy'], + 'n_nearest_features' : params['n_nearest_features'], + 'imputation_order' : params['imputation_order'], + } + + if "random_state" in params: + final_params['random_state'] = params['random_state'] + + return final_params \ No newline at end of file From d1b2a319209f193f6158c0caf792d91035346b6c Mon Sep 17 00:00:00 2001 From: gketron Date: Fri, 19 Jul 2024 11:02:59 -0700 Subject: [PATCH 03/14] Debugging --- tpot2/config/get_configspace.py | 1 + tpot2/config/imputers.py | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/tpot2/config/get_configspace.py b/tpot2/config/get_configspace.py index 3c3c2505..d142508c 100644 --- a/tpot2/config/get_configspace.py +++ b/tpot2/config/get_configspace.py @@ -45,6 +45,7 @@ from sklearn.feature_selection import f_classif, f_regression #TODO create a selectomixin using these? from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis from sklearn.gaussian_process import GaussianProcessRegressor, GaussianProcessClassifier +from sklearn.experimental import enable_iterative_imputer from sklearn.impute import SimpleImputer, IterativeImputer, KNNImputer all_methods = [SGDClassifier, RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier, MLPClassifier, DecisionTreeClassifier, XGBClassifier, KNeighborsClassifier, SVC, LogisticRegression, LGBMClassifier, LinearSVC, GaussianNB, BernoulliNB, MultinomialNB, ExtraTreesRegressor, RandomForestRegressor, GradientBoostingRegressor, BaggingRegressor, DecisionTreeRegressor, KNeighborsRegressor, XGBRegressor, ZeroCount, ColumnOneHotEncoder, Binarizer, FastICA, FeatureAgglomeration, MaxAbsScaler, MinMaxScaler, Normalizer, Nystroem, PCA, PolynomialFeatures, RBFSampler, RobustScaler, StandardScaler, SelectFwe, SelectPercentile, VarianceThreshold, SGDRegressor, Ridge, Lasso, ElasticNet, Lars, LassoLars, LassoLarsCV, RidgeCV, SVR, LinearSVR, AdaBoostRegressor, GradientBoostingRegressor, RandomForestRegressor, BaggingRegressor, ExtraTreesRegressor, DecisionTreeRegressor, KNeighborsRegressor, ElasticNetCV, diff --git a/tpot2/config/imputers.py b/tpot2/config/imputers.py index 5ad6d3a5..4c896060 100644 --- a/tpot2/config/imputers.py +++ b/tpot2/config/imputers.py @@ -1,9 +1,10 @@ +import sklearn import sklearn.ensemble import sklearn.linear_model import sklearn.neighbors from ConfigSpace import ConfigurationSpace from ConfigSpace import ConfigurationSpace, Integer, Float, Categorical, Normal -import sklearn + simple_imputer_cs = ConfigurationSpace( space = { @@ -20,7 +21,7 @@ def get_IterativeImputer_config_space(n_features, random_state): space = { 'estimator' : Categorical('estimator', ['Bayesian', 'RFR', 'Ridge', - 'KNN', 'RandomForest']), + 'KNN']), 'sample_posterior' : Categorical('sample_posterior', [True, False]), 'initial_strategy' : Categorical('initial_strategy', ['mean', 'median', From 6c2341d534d0d906019bf86caf8ee61b9c0efdf4 Mon Sep 17 00:00:00 2001 From: gketron Date: Thu, 25 Jul 2024 09:25:39 -0700 Subject: [PATCH 04/14] sample_posterior parameter made conditional --- tpot2/config/imputers.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tpot2/config/imputers.py b/tpot2/config/imputers.py index 4c896060..3355f6dc 100644 --- a/tpot2/config/imputers.py +++ b/tpot2/config/imputers.py @@ -54,16 +54,20 @@ def IterativeImputer_hyperparameter_parser(params): match est: case 'Bayesian': estimator = sklearn.linear_model.BayesianRidge() + posterior = params['sample_posterior'] case 'RFR': estimator = sklearn.ensemble.RandomForestRegressor() + posterior = False case 'Ridge': estimator = sklearn.linear_model.Ridge() + posterior = False case 'KNN': estimator = sklearn.neighbors.KNeighborsRegressor() + posterior = False final_params = { 'estimator' : estimator, - 'sample_posterior' : params['sample_posterior'], + 'sample_posterior' : posterior, 'initial_strategy' : params['initial_strategy'], 'n_nearest_features' : params['n_nearest_features'], 'imputation_order' : params['imputation_order'], From 90498c7c1bfa1389b2d4109029347e48dacce8d3 Mon Sep 17 00:00:00 2001 From: gketron Date: Mon, 29 Jul 2024 14:04:54 -0700 Subject: [PATCH 05/14] Conditional Sample Posterior Added for Iterative Imputer --- tpot2/config/imputers.py | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/tpot2/config/imputers.py b/tpot2/config/imputers.py index 3355f6dc..2c33629f 100644 --- a/tpot2/config/imputers.py +++ b/tpot2/config/imputers.py @@ -4,6 +4,7 @@ import sklearn.neighbors from ConfigSpace import ConfigurationSpace from ConfigSpace import ConfigurationSpace, Integer, Float, Categorical, Normal +from ConfigSpace import EqualsCondition simple_imputer_cs = ConfigurationSpace( @@ -18,33 +19,35 @@ ) def get_IterativeImputer_config_space(n_features, random_state): - space = { - 'estimator' : Categorical('estimator', - ['Bayesian', 'RFR', 'Ridge', - 'KNN']), - 'sample_posterior' : Categorical('sample_posterior', [True, False]), - 'initial_strategy' : Categorical('initial_strategy', + space = { 'initial_strategy' : Categorical('initial_strategy', ['mean', 'median', 'most_frequent', 'constant']), - 'n_nearest_features' : Integer('n_nearest_features', + 'n_nearest_features' : Integer('n_nearest_features', bounds=(1, n_features)), - 'imputation_order' : Categorical('imputation_order', + 'imputation_order' : Categorical('imputation_order', ['ascending', 'descending', 'roman', 'arabic', 'random']), } + + estimator = Categorical('estimator', ['Bayesian', 'RFR', 'Ridge', 'KNN']) + sample_posterior = Categorical('sample_posterior', [True, False]) + sampling_condition = EqualsCondition(sample_posterior, estimator, 'Bayesian') + if random_state is not None: #This is required because configspace doesn't allow None as a value space['random_state'] = random_state - return ConfigurationSpace( - space = space - ) + cs = ConfigurationSpace(space=space) + cs.add_hyperparameters([estimator, sample_posterior]) + cs.add_conditions([sampling_condition]) + return cs def get_KNNImputer_config_space(n_samples): space = { 'n_neighbors': Integer('n_neighbors', bounds=(1, max(n_samples,100))), 'weights': Categorical('weights', ['uniform', 'distance']) } + return ConfigurationSpace( space=space ) @@ -54,26 +57,24 @@ def IterativeImputer_hyperparameter_parser(params): match est: case 'Bayesian': estimator = sklearn.linear_model.BayesianRidge() - posterior = params['sample_posterior'] case 'RFR': estimator = sklearn.ensemble.RandomForestRegressor() - posterior = False case 'Ridge': estimator = sklearn.linear_model.Ridge() - posterior = False case 'KNN': estimator = sklearn.neighbors.KNeighborsRegressor() - posterior = False - + final_params = { 'estimator' : estimator, - 'sample_posterior' : posterior, 'initial_strategy' : params['initial_strategy'], 'n_nearest_features' : params['n_nearest_features'], 'imputation_order' : params['imputation_order'], } - if "random_state" in params: - final_params['random_state'] = params['random_state'] + if 'sample_posterior' in params: + final_params['sample_posterior'] = params['sample_posterior'] + if 'random_state' in params: + final_params['random_state'] = params['random_state'] + return final_params \ No newline at end of file From 9fbbad5ba7641d1f37ac8854803b5ec189cd9a1f Mon Sep 17 00:00:00 2001 From: perib Date: Mon, 9 Sep 2024 11:54:54 -0700 Subject: [PATCH 06/14] added option to change the base nodetype for hyperparameter mutation/crossover --- tpot2/config/get_configspace.py | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/tpot2/config/get_configspace.py b/tpot2/config/get_configspace.py index 4a5cc997..118d4412 100644 --- a/tpot2/config/get_configspace.py +++ b/tpot2/config/get_configspace.py @@ -401,12 +401,12 @@ def get_configspace(name, n_classes=3, n_samples=1000, n_features=100, random_st raise ValueError(f"Could not find configspace for {name}") -def get_search_space(name, n_classes=3, n_samples=100, n_features=100, random_state=None, return_choice_pipeline=True): +def get_search_space(name, n_classes=3, n_samples=100, n_features=100, random_state=None, return_choice_pipeline=True, base_node=EstimatorNode): #if list of names, return a list of EstimatorNodes if isinstance(name, list) or isinstance(name, np.ndarray): - search_spaces = [get_search_space(n, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state, return_choice_pipeline=False) for n in name] + search_spaces = [get_search_space(n, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state, return_choice_pipeline=False, base_node=base_node) for n in name] #remove Nones search_spaces = [s for s in search_spaces if s is not None] @@ -417,12 +417,12 @@ def get_search_space(name, n_classes=3, n_samples=100, n_features=100, random_st if name in GROUPNAMES: name_list = GROUPNAMES[name] - return get_search_space(name_list, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state, return_choice_pipeline=return_choice_pipeline) + return get_search_space(name_list, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state, return_choice_pipeline=return_choice_pipeline, base_node=base_node) - return get_node(name, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state) + return get_node(name, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state, base_node=base_node) -def get_node(name, n_classes=3, n_samples=100, n_features=100, random_state=None): +def get_node(name, n_classes=3, n_samples=100, n_features=100, random_state=None, base_node=EstimatorNode): #these are wrappers that take in another estimator as a parameter # TODO Add AdaBoostRegressor, AdaBoostClassifier as wrappers? wrap a decision tree with different params? @@ -447,34 +447,34 @@ def get_node(name, n_classes=3, n_samples=100, n_features=100, random_state=None #these are nodes that have special search spaces which require custom parsing of the hyperparameters if name == "RobustScaler": configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state) - return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=transformers.robust_scaler_hyperparameter_parser) + return base_node(STRING_TO_CLASS[name], configspace, hyperparameter_parser=transformers.robust_scaler_hyperparameter_parser) if name == "GradientBoostingClassifier": configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state) - return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=classifiers.GradientBoostingClassifier_hyperparameter_parser) + return base_node(STRING_TO_CLASS[name], configspace, hyperparameter_parser=classifiers.GradientBoostingClassifier_hyperparameter_parser) if name == "HistGradientBoostingClassifier": configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state) - return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=classifiers.HistGradientBoostingClassifier_hyperparameter_parser) + return base_node(STRING_TO_CLASS[name], configspace, hyperparameter_parser=classifiers.HistGradientBoostingClassifier_hyperparameter_parser) if name == "GradientBoostingRegressor": configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state) - return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=regressors.GradientBoostingRegressor_hyperparameter_parser) + return base_node(STRING_TO_CLASS[name], configspace, hyperparameter_parser=regressors.GradientBoostingRegressor_hyperparameter_parser) if name == "HistGradientBoostingRegressor": configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state) - return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=regressors.HistGradientBoostingRegressor_hyperparameter_parser) + return base_node(STRING_TO_CLASS[name], configspace, hyperparameter_parser=regressors.HistGradientBoostingRegressor_hyperparameter_parser) if name == "MLPClassifier": configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state) - return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=classifiers.MLPClassifier_hyperparameter_parser) + return base_node(STRING_TO_CLASS[name], configspace, hyperparameter_parser=classifiers.MLPClassifier_hyperparameter_parser) if name == "MLPRegressor": configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state) - return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=regressors.MLPRegressor_hyperparameter_parser) + return base_node(STRING_TO_CLASS[name], configspace, hyperparameter_parser=regressors.MLPRegressor_hyperparameter_parser) if name == "GaussianProcessRegressor": configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state) - return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=regressors.GaussianProcessRegressor_hyperparameter_parser) + return base_node(STRING_TO_CLASS[name], configspace, hyperparameter_parser=regressors.GaussianProcessRegressor_hyperparameter_parser) if name == "GaussianProcessClassifier": configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state) - return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=classifiers.GaussianProcessClassifier_hyperparameter_parser) + return base_node(STRING_TO_CLASS[name], configspace, hyperparameter_parser=classifiers.GaussianProcessClassifier_hyperparameter_parser) if name == "FeatureAgglomeration": configspace = get_configspace(name, n_features=n_features) - return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=transformers.FeatureAgglomeration_hyperparameter_parser) + return base_node(STRING_TO_CLASS[name], configspace, hyperparameter_parser=transformers.FeatureAgglomeration_hyperparameter_parser) configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state) if configspace is None: @@ -482,4 +482,4 @@ def get_node(name, n_classes=3, n_samples=100, n_features=100, random_state=None warnings.warn(f"Could not find configspace for {name}") return None - return EstimatorNode(STRING_TO_CLASS[name], configspace) \ No newline at end of file + return base_node(STRING_TO_CLASS[name], configspace) \ No newline at end of file From 190a0f60565f83d401175a8bd2c43b0170557adf Mon Sep 17 00:00:00 2001 From: perib Date: Mon, 9 Sep 2024 12:06:50 -0700 Subject: [PATCH 07/14] cleanup and gradual estimator node example --- tpot2/search_spaces/base.py | 8 - .../nodes/estimator_node_gradual.py | 146 ++++++++++++++++++ 2 files changed, 146 insertions(+), 8 deletions(-) create mode 100644 tpot2/search_spaces/nodes/estimator_node_gradual.py diff --git a/tpot2/search_spaces/base.py b/tpot2/search_spaces/base.py index 3133057e..6dc2c76a 100644 --- a/tpot2/search_spaces/base.py +++ b/tpot2/search_spaces/base.py @@ -1,18 +1,10 @@ import tpot2 -import numpy as np -import pandas as pd import sklearn -from tpot2 import config -from typing import Generator, List, Tuple, Union -import random from sklearn.base import BaseEstimator import sklearn import networkx as nx from . import graph_utils from typing import final -from abc import ABC, abstractmethod - - diff --git a/tpot2/search_spaces/nodes/estimator_node_gradual.py b/tpot2/search_spaces/nodes/estimator_node_gradual.py new file mode 100644 index 00000000..f2e8cf81 --- /dev/null +++ b/tpot2/search_spaces/nodes/estimator_node_gradual.py @@ -0,0 +1,146 @@ +# try https://automl.github.io/ConfigSpace/main/api/hyperparameters.html + +import numpy as np +from tpot2.search_spaces.base import SklearnIndividual, SklearnIndividualGenerator +from ConfigSpace import ConfigurationSpace +from typing import final +import ConfigSpace + + +NONE_SPECIAL_STRING = "" +TRUE_SPECIAL_STRING = "" +FALSE_SPECIAL_STRING = "" + + +def default_hyperparameter_parser(params:dict) -> dict: + return params + + +# NOTE: This is not the default, currently experimental +class EstimatorNodeIndividual_gradual(SklearnIndividual): + """ + Note that ConfigurationSpace does not support None as a parameter. Instead, use the special string "". TPOT will automatically replace instances of this string with the Python None. + + Parameters + ---------- + method : type + The class of the estimator to be used + + space : ConfigurationSpace|dict + The hyperparameter space to be used. If a dict is passed, hyperparameters are fixed and not learned. + + """ + def __init__(self, method: type, + space: ConfigurationSpace|dict, #TODO If a dict is passed, hyperparameters are fixed and not learned. Is this confusing? Should we make a second node type? + hyperparameter_parser: callable = None, + rng=None) -> None: + super().__init__() + self.method = method + self.space = space + + if hyperparameter_parser is None: + self.hyperparameter_parser = default_hyperparameter_parser + else: + self.hyperparameter_parser = hyperparameter_parser + + if isinstance(space, dict): + self.hyperparameters = space + else: + rng = np.random.default_rng(rng) + self.space.seed(rng.integers(0, 2**32)) + self.hyperparameters = dict(self.space.sample_configuration()) + + self.check_hyperparameters_for_None() + + def mutate(self, rng=None): + if isinstance(self.space, dict): + return False + self.hyperparameters = gradual_hyperparameter_update(params=self.hyperparameters, configspace=self.space, rng=rng) + self.check_hyperparameters_for_None() + return True + + def crossover(self, other, rng=None): + if isinstance(self.space, dict): + return False + + rng = np.random.default_rng(rng) + if self.method != other.method: + return False + + #loop through hyperparameters, randomly swap items in self.hyperparameters with items in other.hyperparameters + for hyperparameter in self.space: + if rng.choice([True, False]): + if hyperparameter in other.hyperparameters: + self.hyperparameters[hyperparameter] = other.hyperparameters[hyperparameter] + + self.check_hyperparameters_for_None() + + return True + + def check_hyperparameters_for_None(self): + for key, value in self.hyperparameters.items(): + #if string + if isinstance(value, str): + if value == NONE_SPECIAL_STRING: + self.hyperparameters[key] = None + elif value == TRUE_SPECIAL_STRING: + self.hyperparameters[key] = True + elif value == FALSE_SPECIAL_STRING: + self.hyperparameters[key] = False + + @final #this method should not be overridden, instead override hyperparameter_parser + def export_pipeline(self, **kwargs): + return self.method(**self.hyperparameter_parser(self.hyperparameters)) + + def unique_id(self): + #return a dictionary of the method and the hyperparameters + method_str = self.method.__name__ + params = list(self.hyperparameters.keys()) + params = sorted(params) + + id_str = f"{method_str}({', '.join([f'{param}={self.hyperparameters[param]}' for param in params])})" + + return id_str + +def gradual_hyperparameter_update(params:dict, configspace:ConfigurationSpace, rng=None): + rng = np.random.default_rng(rng) + configspace.seed(rng.integers(0, 2**32)) + new_params = dict(configspace.sample_configuration()) + for param in list(new_params.keys()): + #if parameter is float, multiply by normal distribution + if param not in params: + continue + try: + if issubclass(type(configspace[param]), ConfigSpace.hyperparameters.hyperparameter.FloatHyperparameter): + + if configspace[param].log: + new_params[param] = params[param] * rng.lognormal(0, 1) + else: + new_params[param] = params[param] + rng.normal(0, .1)* (configspace[param].upper-configspace[param].lower) + # if check if above or below min and cap + if new_params[param] < configspace[param].lower: + new_params[param] = configspace[param].lower + elif new_params[param] > configspace[param].upper: + new_params[param] = configspace[param].upper + #if parameter is integer, add normal distribution + elif issubclass(type(configspace[param]), ConfigSpace.hyperparameters.hyperparameter.IntegerHyperparameter): + new_params[param] = params[param] * np.random.normal(0, 1) + # if check if above or below min and cap + if new_params[param] < configspace[param].lower: + new_params[param] = configspace[param].lower + elif new_params[param] > configspace[param].upper: + new_params[param] = configspace[param].upper + new_params[param] = int(new_params[param]) + except: + pass + + return new_params + +class EstimatorNode_gradual(SklearnIndividualGenerator): + def __init__(self, method, space, hyperparameter_parser=default_hyperparameter_parser): + self.method = method + self.space = space + self.hyperparameter_parser = hyperparameter_parser + + def generate(self, rng=None): + return EstimatorNodeIndividual_gradual(self.method, self.space, hyperparameter_parser=self.hyperparameter_parser, rng=rng) \ No newline at end of file From 45aaa56cddebedf37b175b64b89ec0ef099095b0 Mon Sep 17 00:00:00 2001 From: perib Date: Mon, 9 Sep 2024 12:34:22 -0700 Subject: [PATCH 08/14] change param range to prevent error and fix failing test --- tpot2/config/classifiers.py | 2 +- tpot2/config/regressors.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tpot2/config/classifiers.py b/tpot2/config/classifiers.py index 49b714ac..2fb09e41 100644 --- a/tpot2/config/classifiers.py +++ b/tpot2/config/classifiers.py @@ -535,7 +535,7 @@ def MLPClassifier_hyperparameter_parser(params): def get_GaussianProcessClassifier_ConfigurationSpace(n_features, random_state): space = { 'n_features': n_features, - 'alpha': Float("alpha", bounds=(1e-14, 1.0), log=True), + 'alpha': Float("alpha", bounds=(1e-10, 1.0), log=True), 'thetaL': Float("thetaL", bounds=(1e-10, 1e-3), log=True), 'thetaU': Float("thetaU", bounds=(1.0, 100000), log=True), } diff --git a/tpot2/config/regressors.py b/tpot2/config/regressors.py index d1b9343d..ab14a7ea 100644 --- a/tpot2/config/regressors.py +++ b/tpot2/config/regressors.py @@ -354,7 +354,7 @@ def get_ExtraTreesRegressor_ConfigurationSpace(random_state): def get_GaussianProcessRegressor_ConfigurationSpace(n_features, random_state): space = { 'n_features': n_features, - 'alpha': Float("alpha", bounds=(1e-14, 1.0), log=True), + 'alpha': Float("alpha", bounds=(1e-10, 1.0), log=True), 'thetaL': Float("thetaL", bounds=(1e-10, 1e-3), log=True), 'thetaU': Float("thetaU", bounds=(1.0, 100000), log=True), } From 5a12031bfa36e7e3498af1cf1b111612de56ccda Mon Sep 17 00:00:00 2001 From: perib Date: Mon, 9 Sep 2024 17:25:13 -0700 Subject: [PATCH 09/14] added amltk parser --- .../amltk_search_space_parser_example.ipynb | 1945 +++++++++++++++++ setup.py | 3 +- tpot2/__init__.py | 2 +- tpot2/utils/__init__.py | 11 +- tpot2/utils/amltk_parser.py | 72 + 5 files changed, 2030 insertions(+), 3 deletions(-) create mode 100644 Tutorial/amltk_search_space_parser_example.ipynb create mode 100644 tpot2/utils/amltk_parser.py diff --git a/Tutorial/amltk_search_space_parser_example.ipynb b/Tutorial/amltk_search_space_parser_example.ipynb new file mode 100644 index 00000000..9dc62527 --- /dev/null +++ b/Tutorial/amltk_search_space_parser_example.ipynb @@ -0,0 +1,1945 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The AMLTK (https://github.com/automl/amltk) provides a framework for developing AutoML systems. One component of this system is the search space definitions. \n", + "\n", + "TPOT2 provides a function called tpot2.utils.tpot2_parser which can convert a search space defined in the AMLTK API into the search space class used by TPOT2. This allows users to define a single search space to be used by both algorithms, facilitating better comparisons. Below is an example of a few search spaces defined in AMLTK and how to use them in TPOT2.\n", + "\n", + "Note: this feature is still experimental and not all features present in the AMLTK API are fully supported in TPOT2 yet. (For example, automated splitting based on categorical vs numeric with amltk.pipeline.Split is not currently implemented in the parser.)" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
╭─ Split(split_imputation) ───────────────────────────────────────────────────────────────────────────────────────╮\n",
+       " ╭─ Sequential(categories) ───────────────────────────╮ ╭─ Sequential(numerics) ───────────────────────────────╮ \n",
+       "  ╭─ Fixed(ColumnTransformer) ─────────────────────╮   ╭─ Fixed(ColumnTransformer) ───────────────────────╮  \n",
+       "   item ColumnTransformer(transformers=[('passth…     item ColumnTransformer(transformers=[('passthro…   \n",
+       "        'passthrough',                                     'passthrough',                                \n",
+       "                                         <sklear…                                           <sklearn.…   \n",
+       "        object at 0x7ab9ec119d20>)])                       object at 0x7ab994db4c40>)])                  \n",
+       "  ╰────────────────────────────────────────────────╯   ╰──────────────────────────────────────────────────╯  \n",
+       "       \n",
+       "  ╭─ Fixed(SimpleImputer) ─────────────────────────╮   ╭─ Component(SimpleImputer) ─────────────╮            \n",
+       "   item SimpleImputer(fill_value='missing',           item  class SimpleImputer(...)                     \n",
+       "        strategy='constant')                          space {'strategy': ['mean', 'median']}             \n",
+       "  ╰────────────────────────────────────────────────╯   ╰────────────────────────────────────────╯            \n",
+       "    ╰──────────────────────────────────────────────────────╯ \n",
+       "  ╭─ Fixed(OneHotEncoder) ─────────────────────────╮                                                           \n",
+       "   item OneHotEncoder(drop='first',                                                                          \n",
+       "        sparse_output=False)                                                                                 \n",
+       "  ╰────────────────────────────────────────────────╯                                                           \n",
+       " ╰────────────────────────────────────────────────────╯                                                          \n",
+       "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[38;2;119;125;167m╭─\u001b[0m\u001b[38;2;119;125;167m \u001b[0m\u001b[1;38;2;119;125;167mSplit\u001b[0m\u001b[38;2;119;125;167m(\u001b[0m\u001b[3;38;2;119;125;167msplit_imputation\u001b[0m\u001b[38;2;119;125;167m) \u001b[0m\u001b[38;2;119;125;167m──────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[38;2;119;125;167m─╮\u001b[0m\n", + "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m╭─\u001b[0m\u001b[38;2;126;107;143m \u001b[0m\u001b[1;38;2;126;107;143mSequential\u001b[0m\u001b[38;2;126;107;143m(\u001b[0m\u001b[3;38;2;126;107;143mcategories\u001b[0m\u001b[38;2;126;107;143m) \u001b[0m\u001b[38;2;126;107;143m──────────────────────────\u001b[0m\u001b[38;2;126;107;143m─╮\u001b[0m \u001b[38;2;126;107;143m╭─\u001b[0m\u001b[38;2;126;107;143m \u001b[0m\u001b[1;38;2;126;107;143mSequential\u001b[0m\u001b[38;2;126;107;143m(\u001b[0m\u001b[3;38;2;126;107;143mnumerics\u001b[0m\u001b[38;2;126;107;143m) \u001b[0m\u001b[38;2;126;107;143m──────────────────────────────\u001b[0m\u001b[38;2;126;107;143m─╮\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", + "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╭─\u001b[0m\u001b[38;2;86;53;30m \u001b[0m\u001b[1;38;2;86;53;30mFixed\u001b[0m\u001b[38;2;86;53;30m(\u001b[0m\u001b[3;38;2;86;53;30mColumnTransformer\u001b[0m\u001b[38;2;86;53;30m) \u001b[0m\u001b[38;2;86;53;30m────────────────────\u001b[0m\u001b[38;2;86;53;30m─╮\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╭─\u001b[0m\u001b[38;2;86;53;30m \u001b[0m\u001b[1;38;2;86;53;30mFixed\u001b[0m\u001b[38;2;86;53;30m(\u001b[0m\u001b[3;38;2;86;53;30mColumnTransformer\u001b[0m\u001b[38;2;86;53;30m) \u001b[0m\u001b[38;2;86;53;30m──────────────────────\u001b[0m\u001b[38;2;86;53;30m─╮\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", + "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39mitem\u001b[0m\u001b[39m \u001b[0m\u001b[1;35mColumnTransformer\u001b[0m\u001b[1;39m(\u001b[0m\u001b[33mtransformers\u001b[0m\u001b[39m=\u001b[0m\u001b[1;39m[\u001b[0m\u001b[1;39m(\u001b[0m\u001b[32m'passth…\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39mitem\u001b[0m\u001b[39m \u001b[0m\u001b[1;35mColumnTransformer\u001b[0m\u001b[1;39m(\u001b[0m\u001b[33mtransformers\u001b[0m\u001b[39m=\u001b[0m\u001b[1;39m[\u001b[0m\u001b[1;39m(\u001b[0m\u001b[32m'passthro…\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", + "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[32m'passthrough'\u001b[0m\u001b[39m, \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[32m'passthrough'\u001b[0m\u001b[39m, \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", + "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[39m \u001b[0m\u001b[1;39m<\u001b[0m\u001b[1;95msklear…\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[39m \u001b[0m\u001b[1;39m<\u001b[0m\u001b[1;95msklearn.…\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", + "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[39mobject at \u001b[0m\u001b[1;36m0x7ab9ec119d20\u001b[0m\u001b[1;39m>\u001b[0m\u001b[1;39m)\u001b[0m\u001b[1;39m]\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[39mobject at \u001b[0m\u001b[1;36m0x7ab994db4c40\u001b[0m\u001b[1;39m>\u001b[0m\u001b[1;39m)\u001b[0m\u001b[1;39m]\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", + "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╰────────────────────────────────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╰──────────────────────────────────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", + "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[1m ↓ \u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[1m ↓ \u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", + "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╭─\u001b[0m\u001b[38;2;86;53;30m \u001b[0m\u001b[1;38;2;86;53;30mFixed\u001b[0m\u001b[38;2;86;53;30m(\u001b[0m\u001b[3;38;2;86;53;30mSimpleImputer\u001b[0m\u001b[38;2;86;53;30m) \u001b[0m\u001b[38;2;86;53;30m────────────────────────\u001b[0m\u001b[38;2;86;53;30m─╮\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;230;175;46m╭─\u001b[0m\u001b[38;2;230;175;46m \u001b[0m\u001b[1;38;2;230;175;46mComponent\u001b[0m\u001b[38;2;230;175;46m(\u001b[0m\u001b[3;38;2;230;175;46mSimpleImputer\u001b[0m\u001b[38;2;230;175;46m) \u001b[0m\u001b[38;2;230;175;46m────────────\u001b[0m\u001b[38;2;230;175;46m─╮\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", + "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39mitem\u001b[0m\u001b[39m \u001b[0m\u001b[1;35mSimpleImputer\u001b[0m\u001b[1;39m(\u001b[0m\u001b[33mfill_value\u001b[0m\u001b[39m=\u001b[0m\u001b[32m'missing'\u001b[0m\u001b[39m, \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mitem \u001b[0m\u001b[39m \u001b[0m\u001b[3;96mclass \u001b[0m\u001b]8;id=202447;https://www.scikit-learn.org/stable/modules/generated/sklearn.impute.SimpleImputer.html\u001b\\\u001b[4;39mSimpleImputer\u001b[0m\u001b]8;;\u001b\\\u001b[1;39m(\u001b[0m\u001b[33m...\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", + "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[33mstrategy\u001b[0m\u001b[39m=\u001b[0m\u001b[32m'constant'\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mspace\u001b[0m\u001b[39m \u001b[0m\u001b[1;39m{\u001b[0m\u001b[32m'strategy'\u001b[0m\u001b[39m: \u001b[0m\u001b[1;39m[\u001b[0m\u001b[32m'mean'\u001b[0m\u001b[39m, \u001b[0m\u001b[32m'median'\u001b[0m\u001b[1;39m]\u001b[0m\u001b[1;39m}\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", + "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╰────────────────────────────────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;230;175;46m╰────────────────────────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", + "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[1m ↓ \u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m╰──────────────────────────────────────────────────────╯\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", + "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╭─\u001b[0m\u001b[38;2;86;53;30m \u001b[0m\u001b[1;38;2;86;53;30mFixed\u001b[0m\u001b[38;2;86;53;30m(\u001b[0m\u001b[3;38;2;86;53;30mOneHotEncoder\u001b[0m\u001b[38;2;86;53;30m) \u001b[0m\u001b[38;2;86;53;30m────────────────────────\u001b[0m\u001b[38;2;86;53;30m─╮\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", + "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39mitem\u001b[0m\u001b[39m \u001b[0m\u001b[1;35mOneHotEncoder\u001b[0m\u001b[1;39m(\u001b[0m\u001b[33mdrop\u001b[0m\u001b[39m=\u001b[0m\u001b[32m'first'\u001b[0m\u001b[39m, \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", + "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[33msparse_output\u001b[0m\u001b[39m=\u001b[0m\u001b[3;91mFalse\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", + "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╰────────────────────────────────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", + "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m╰────────────────────────────────────────────────────╯\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", + "\u001b[38;2;119;125;167m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [], + "text/plain": [ + "Split(name='split_imputation', item=None, nodes=(Sequential(name='categories', item=None, nodes=(Fixed(name='ColumnTransformer', item=ColumnTransformer(transformers=[('passthrough', 'passthrough',\n", + " )]), nodes=(), config=None, space=None, fidelities=None, config_transform=None, meta=None), Fixed(name='SimpleImputer', item=SimpleImputer(fill_value='missing', strategy='constant'), nodes=(), config=None, space=None, fidelities=None, config_transform=None, meta=None), Fixed(name='OneHotEncoder', item=OneHotEncoder(drop='first', sparse_output=False), nodes=(), config=None, space=None, fidelities=None, config_transform=None, meta=None)), config=None, space=None, fidelities=None, config_transform=None, meta=None), Sequential(name='numerics', item=None, nodes=(Fixed(name='ColumnTransformer', item=ColumnTransformer(transformers=[('passthrough', 'passthrough',\n", + " )]), nodes=(), config=None, space=None, fidelities=None, config_transform=None, meta=None), Component(name='SimpleImputer', item=, nodes=(), config=None, space={'strategy': ['mean', 'median']}, fidelities=None, config_transform=None, meta=None)), config=None, space=None, fidelities=None, config_transform=None, meta=None)), config=None, space=None, fidelities=None, config_transform=None, meta=None)" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.compose import make_column_selector\n", + "import numpy as np\n", + "from sklearn.ensemble import RandomForestClassifier\n", + "from sklearn.impute import SimpleImputer\n", + "from sklearn.preprocessing import OneHotEncoder\n", + "from sklearn.svm import SVC\n", + "from amltk.pipeline import Choice, Component, Sequential, Split\n", + "import tpot2\n", + "from sklearn.preprocessing import FunctionTransformer\n", + "from sklearn.compose import make_column_transformer\n", + "import tpot2\n", + "import numpy as np\n", + "import sklearn\n", + "import sklearn.datasets\n", + "import pandas as pd\n", + "# create dummy pandas dataset with both categorical and numerical columns\n", + "X, y = sklearn.datasets.make_classification(n_samples=100, n_features=5, n_informative=3, n_classes=2, random_state=42)\n", + "X = pd.DataFrame(X, columns=[f\"num_{i}\" for i in range(5)])\n", + "# add 5 categorical columns\n", + "for i in range(5):\n", + " X[f\"cat_{i}\"] = np.random.choice([\"A\", \"B\", \"C\"], size=100)\n", + "y = y.flatten()\n", + "# train test split\n", + "X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, test_size=0.5)\n", + "\n", + "# TODO: implement support for this condition\n", + "# select_categories = make_column_selector(dtype_include=object)\n", + "# select_numerical = make_column_selector(dtype_include=np.number)\n", + "\n", + "# split_imputation = Split(\n", + "# {\n", + "# \"categories\": [SimpleImputer(strategy=\"constant\", fill_value=\"missing\"), OneHotEncoder(drop=\"first\")],\n", + "# \"numerics\": Component(SimpleImputer, space={\"strategy\": [\"mean\", \"median\"]}),\n", + "# },\n", + "# config={\"categories\": select_categories, \"numerics\": select_numerical}, #not yet supported\n", + "# name=\"feature_preprocessing\",\n", + "# )\n", + "# split_imputation\n", + "\n", + "select_categories = make_column_selector(dtype_include=object)\n", + "select_numerical = make_column_selector(dtype_include=np.number)\n", + "\n", + "cat_selector = make_column_transformer((\"passthrough\", select_categories))\n", + "num_selector = make_column_transformer((\"passthrough\", select_numerical))\n", + "\n", + "\n", + "split_imputation = Split(\n", + " {\n", + " \"categories\": [cat_selector,SimpleImputer(strategy=\"constant\", fill_value=\"missing\"), OneHotEncoder(drop=\"first\", sparse_output=False)],\n", + " \"numerics\": [num_selector, Component(SimpleImputer, space={\"strategy\": [\"mean\", \"median\"]})],\n", + " },\n", + " name=\"split_imputation\",\n", + ")\n", + "split_imputation" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
╭─ Sequential(my_pipeline) ───────────────────────────────────────────────────────────────────────────────────────╮\n",
+       " ╭─ Split(split_imputation) ───────────────────────────────────────────────────────────────────────────────────╮ \n",
+       "  ╭─ Sequential(categories) ─────────────────────────╮ ╭─ Sequential(numerics) ─────────────────────────────╮  \n",
+       "   ╭─ Fixed(ColumnTransformer) ───────────────────╮   ╭─ Fixed(ColumnTransformer) ─────────────────────╮   \n",
+       "    item ColumnTransformer(transformers=[('pass…     item ColumnTransformer(transformers=[('passth…    \n",
+       "         'passthrough',                                   'passthrough',                               \n",
+       "                                          <skle…                                           <sklear…    \n",
+       "         object at 0x7ab9ec119d20>)])                     object at 0x7ab994db4c40>)])                 \n",
+       "   ╰──────────────────────────────────────────────╯   ╰────────────────────────────────────────────────╯   \n",
+       "         \n",
+       "   ╭─ Fixed(SimpleImputer) ───────────────────────╮   ╭─ Component(SimpleImputer) ─────────────╮           \n",
+       "    item SimpleImputer(fill_value='missing',         item  class SimpleImputer(...)                    \n",
+       "         strategy='constant')                        space {'strategy': ['mean', 'median']}            \n",
+       "   ╰──────────────────────────────────────────────╯   ╰────────────────────────────────────────╯           \n",
+       "     ╰────────────────────────────────────────────────────╯  \n",
+       "   ╭─ Fixed(OneHotEncoder) ───────────────────────╮                                                          \n",
+       "    item OneHotEncoder(drop='first',                                                                       \n",
+       "         sparse_output=False)                                                                              \n",
+       "   ╰──────────────────────────────────────────────╯                                                          \n",
+       "  ╰──────────────────────────────────────────────────╯                                                         \n",
+       " ╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ \n",
+       "  \n",
+       " ╭─ Choice(selectors) ─────────────────────────────────────────────────────╮                                     \n",
+       "  ╭─ Component(SelectKBest) ─────╮ ╭─ Component(VarianceThreshold) ─────╮                                      \n",
+       "   item  class SelectKBest(...)   item  class VarianceThreshold(...)                                       \n",
+       "   space {'k': (1, 10)}           space {'threshold': (0.1, 1)}                                            \n",
+       "  ╰──────────────────────────────╯ ╰────────────────────────────────────╯                                      \n",
+       " ╰─────────────────────────────────────────────────────────────────────────╯                                     \n",
+       "  \n",
+       " ╭─ Split(transformers) ─────────────────────────────────────────────────────────────────────────────────╮       \n",
+       "  ╭─ Sequential(passthrough) ─╮ ╭─ Sequential(polynomial) ────────────────╮ ╭─ Sequential(zerocount) ─╮        \n",
+       "   ╭─ Fixed(Passthrough) ─╮    ╭─ Component(PolynomialFeatures) ─────╮   ╭─ Fixed(ZeroCount) ─╮          \n",
+       "    item Passthrough()        item  class PolynomialFeatures(...)     item ZeroCount()             \n",
+       "   ╰──────────────────────╯     space {'degree': [2, 3]}               ╰────────────────────╯          \n",
+       "  ╰───────────────────────────╯  ╰─────────────────────────────────────╯  ╰─────────────────────────╯        \n",
+       "                                ╰─────────────────────────────────────────╯                                    \n",
+       " ╰───────────────────────────────────────────────────────────────────────────────────────────────────────╯       \n",
+       "  \n",
+       " ╭─ Choice(estimator) ─────────────────────────────────────────────────────────────────────────────────────────╮ \n",
+       "  ╭─ Component(RandomForestClassifier) ──────────╮ ╭─ Component(SVC) ────────────────────────────╮             \n",
+       "   item   class RandomForestClassifier(...)       item  class SVC(...)                                     \n",
+       "   config {'max_depth': 3}                        space {'kernel': ['linear', 'rbf', 'poly']}              \n",
+       "   space  {                                      ╰─────────────────────────────────────────────╯             \n",
+       "              'n_estimators': (10, 100),                                                                     \n",
+       "              'criterion': [                                                                                 \n",
+       "                  'gini',                                                                                    \n",
+       "                  'log_loss'                                                                                 \n",
+       "              ]                                                                                              \n",
+       "          }                                                                                                  \n",
+       "  ╰──────────────────────────────────────────────╯                                                             \n",
+       " ╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ \n",
+       "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[38;2;126;107;143m╭─\u001b[0m\u001b[38;2;126;107;143m \u001b[0m\u001b[1;38;2;126;107;143mSequential\u001b[0m\u001b[38;2;126;107;143m(\u001b[0m\u001b[3;38;2;126;107;143mmy_pipeline\u001b[0m\u001b[38;2;126;107;143m) \u001b[0m\u001b[38;2;126;107;143m──────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[38;2;126;107;143m─╮\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m╭─\u001b[0m\u001b[38;2;119;125;167m \u001b[0m\u001b[1;38;2;119;125;167mSplit\u001b[0m\u001b[38;2;119;125;167m(\u001b[0m\u001b[3;38;2;119;125;167msplit_imputation\u001b[0m\u001b[38;2;119;125;167m) \u001b[0m\u001b[38;2;119;125;167m──────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[38;2;119;125;167m─╮\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m╭─\u001b[0m\u001b[38;2;126;107;143m \u001b[0m\u001b[1;38;2;126;107;143mSequential\u001b[0m\u001b[38;2;126;107;143m(\u001b[0m\u001b[3;38;2;126;107;143mcategories\u001b[0m\u001b[38;2;126;107;143m) \u001b[0m\u001b[38;2;126;107;143m────────────────────────\u001b[0m\u001b[38;2;126;107;143m─╮\u001b[0m \u001b[38;2;126;107;143m╭─\u001b[0m\u001b[38;2;126;107;143m \u001b[0m\u001b[1;38;2;126;107;143mSequential\u001b[0m\u001b[38;2;126;107;143m(\u001b[0m\u001b[3;38;2;126;107;143mnumerics\u001b[0m\u001b[38;2;126;107;143m) \u001b[0m\u001b[38;2;126;107;143m────────────────────────────\u001b[0m\u001b[38;2;126;107;143m─╮\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╭─\u001b[0m\u001b[38;2;86;53;30m \u001b[0m\u001b[1;38;2;86;53;30mFixed\u001b[0m\u001b[38;2;86;53;30m(\u001b[0m\u001b[3;38;2;86;53;30mColumnTransformer\u001b[0m\u001b[38;2;86;53;30m) \u001b[0m\u001b[38;2;86;53;30m──────────────────\u001b[0m\u001b[38;2;86;53;30m─╮\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╭─\u001b[0m\u001b[38;2;86;53;30m \u001b[0m\u001b[1;38;2;86;53;30mFixed\u001b[0m\u001b[38;2;86;53;30m(\u001b[0m\u001b[3;38;2;86;53;30mColumnTransformer\u001b[0m\u001b[38;2;86;53;30m) \u001b[0m\u001b[38;2;86;53;30m────────────────────\u001b[0m\u001b[38;2;86;53;30m─╮\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39mitem\u001b[0m\u001b[39m \u001b[0m\u001b[1;35mColumnTransformer\u001b[0m\u001b[1;39m(\u001b[0m\u001b[33mtransformers\u001b[0m\u001b[39m=\u001b[0m\u001b[1;39m[\u001b[0m\u001b[1;39m(\u001b[0m\u001b[32m'pass…\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39mitem\u001b[0m\u001b[39m \u001b[0m\u001b[1;35mColumnTransformer\u001b[0m\u001b[1;39m(\u001b[0m\u001b[33mtransformers\u001b[0m\u001b[39m=\u001b[0m\u001b[1;39m[\u001b[0m\u001b[1;39m(\u001b[0m\u001b[32m'passth…\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[32m'passthrough'\u001b[0m\u001b[39m, \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[32m'passthrough'\u001b[0m\u001b[39m, \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[39m \u001b[0m\u001b[1;39m<\u001b[0m\u001b[1;95mskle…\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[39m \u001b[0m\u001b[1;39m<\u001b[0m\u001b[1;95msklear…\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[39mobject at \u001b[0m\u001b[1;36m0x7ab9ec119d20\u001b[0m\u001b[1;39m>\u001b[0m\u001b[1;39m)\u001b[0m\u001b[1;39m]\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[39mobject at \u001b[0m\u001b[1;36m0x7ab994db4c40\u001b[0m\u001b[1;39m>\u001b[0m\u001b[1;39m)\u001b[0m\u001b[1;39m]\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╰──────────────────────────────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╰────────────────────────────────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[1m ↓ \u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[1m ↓ \u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╭─\u001b[0m\u001b[38;2;86;53;30m \u001b[0m\u001b[1;38;2;86;53;30mFixed\u001b[0m\u001b[38;2;86;53;30m(\u001b[0m\u001b[3;38;2;86;53;30mSimpleImputer\u001b[0m\u001b[38;2;86;53;30m) \u001b[0m\u001b[38;2;86;53;30m──────────────────────\u001b[0m\u001b[38;2;86;53;30m─╮\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;230;175;46m╭─\u001b[0m\u001b[38;2;230;175;46m \u001b[0m\u001b[1;38;2;230;175;46mComponent\u001b[0m\u001b[38;2;230;175;46m(\u001b[0m\u001b[3;38;2;230;175;46mSimpleImputer\u001b[0m\u001b[38;2;230;175;46m) \u001b[0m\u001b[38;2;230;175;46m────────────\u001b[0m\u001b[38;2;230;175;46m─╮\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39mitem\u001b[0m\u001b[39m \u001b[0m\u001b[1;35mSimpleImputer\u001b[0m\u001b[1;39m(\u001b[0m\u001b[33mfill_value\u001b[0m\u001b[39m=\u001b[0m\u001b[32m'missing'\u001b[0m\u001b[39m, \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mitem \u001b[0m\u001b[39m \u001b[0m\u001b[3;96mclass \u001b[0m\u001b]8;id=27035;https://www.scikit-learn.org/stable/modules/generated/sklearn.impute.SimpleImputer.html\u001b\\\u001b[4;39mSimpleImputer\u001b[0m\u001b]8;;\u001b\\\u001b[1;39m(\u001b[0m\u001b[33m...\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[33mstrategy\u001b[0m\u001b[39m=\u001b[0m\u001b[32m'constant'\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mspace\u001b[0m\u001b[39m \u001b[0m\u001b[1;39m{\u001b[0m\u001b[32m'strategy'\u001b[0m\u001b[39m: \u001b[0m\u001b[1;39m[\u001b[0m\u001b[32m'mean'\u001b[0m\u001b[39m, \u001b[0m\u001b[32m'median'\u001b[0m\u001b[1;39m]\u001b[0m\u001b[1;39m}\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╰──────────────────────────────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;230;175;46m╰────────────────────────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[1m ↓ \u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m╰────────────────────────────────────────────────────╯\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╭─\u001b[0m\u001b[38;2;86;53;30m \u001b[0m\u001b[1;38;2;86;53;30mFixed\u001b[0m\u001b[38;2;86;53;30m(\u001b[0m\u001b[3;38;2;86;53;30mOneHotEncoder\u001b[0m\u001b[38;2;86;53;30m) \u001b[0m\u001b[38;2;86;53;30m──────────────────────\u001b[0m\u001b[38;2;86;53;30m─╮\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39mitem\u001b[0m\u001b[39m \u001b[0m\u001b[1;35mOneHotEncoder\u001b[0m\u001b[1;39m(\u001b[0m\u001b[33mdrop\u001b[0m\u001b[39m=\u001b[0m\u001b[32m'first'\u001b[0m\u001b[39m, \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[33msparse_output\u001b[0m\u001b[39m=\u001b[0m\u001b[3;91mFalse\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╰──────────────────────────────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m╰──────────────────────────────────────────────────╯\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[1m ↓ \u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m╭─\u001b[0m\u001b[38;2;255;69;0m \u001b[0m\u001b[1;38;2;255;69;0mChoice\u001b[0m\u001b[38;2;255;69;0m(\u001b[0m\u001b[3;38;2;255;69;0mselectors\u001b[0m\u001b[38;2;255;69;0m) \u001b[0m\u001b[38;2;255;69;0m────────────────────────────────────────────────────\u001b[0m\u001b[38;2;255;69;0m─╮\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;230;175;46m╭─\u001b[0m\u001b[38;2;230;175;46m \u001b[0m\u001b[1;38;2;230;175;46mComponent\u001b[0m\u001b[38;2;230;175;46m(\u001b[0m\u001b[3;38;2;230;175;46mSelectKBest\u001b[0m\u001b[38;2;230;175;46m) \u001b[0m\u001b[38;2;230;175;46m────\u001b[0m\u001b[38;2;230;175;46m─╮\u001b[0m \u001b[38;2;230;175;46m╭─\u001b[0m\u001b[38;2;230;175;46m \u001b[0m\u001b[1;38;2;230;175;46mComponent\u001b[0m\u001b[38;2;230;175;46m(\u001b[0m\u001b[3;38;2;230;175;46mVarianceThreshold\u001b[0m\u001b[38;2;230;175;46m) \u001b[0m\u001b[38;2;230;175;46m────\u001b[0m\u001b[38;2;230;175;46m─╮\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mitem \u001b[0m\u001b[39m \u001b[0m\u001b[3;96mclass \u001b[0m\u001b]8;id=924552;https://www.scikit-learn.org/stable/modules/generated/sklearn.feature_selection.SelectKBest.html\u001b\\\u001b[4;39mSelectKBest\u001b[0m\u001b]8;;\u001b\\\u001b[1;39m(\u001b[0m\u001b[33m...\u001b[0m\u001b[1;39m)\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mitem \u001b[0m\u001b[39m \u001b[0m\u001b[3;96mclass \u001b[0m\u001b]8;id=534283;https://www.scikit-learn.org/stable/modules/generated/sklearn.feature_selection.VarianceThreshold.html\u001b\\\u001b[4;39mVarianceThreshold\u001b[0m\u001b]8;;\u001b\\\u001b[1;39m(\u001b[0m\u001b[33m...\u001b[0m\u001b[1;39m)\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mspace\u001b[0m\u001b[39m \u001b[0m\u001b[1;39m{\u001b[0m\u001b[32m'k'\u001b[0m\u001b[39m: \u001b[0m\u001b[1;39m(\u001b[0m\u001b[1;36m1\u001b[0m\u001b[39m, \u001b[0m\u001b[1;36m10\u001b[0m\u001b[1;39m)\u001b[0m\u001b[1;39m}\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mspace\u001b[0m\u001b[39m \u001b[0m\u001b[1;39m{\u001b[0m\u001b[32m'threshold'\u001b[0m\u001b[39m: \u001b[0m\u001b[1;39m(\u001b[0m\u001b[1;36m0.1\u001b[0m\u001b[39m, \u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;39m)\u001b[0m\u001b[1;39m}\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;230;175;46m╰──────────────────────────────╯\u001b[0m \u001b[38;2;230;175;46m╰────────────────────────────────────╯\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m╰─────────────────────────────────────────────────────────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[1m ↓ \u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m╭─\u001b[0m\u001b[38;2;119;125;167m \u001b[0m\u001b[1;38;2;119;125;167mSplit\u001b[0m\u001b[38;2;119;125;167m(\u001b[0m\u001b[3;38;2;119;125;167mtransformers\u001b[0m\u001b[38;2;119;125;167m) \u001b[0m\u001b[38;2;119;125;167m────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[38;2;119;125;167m─╮\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m╭─\u001b[0m\u001b[38;2;126;107;143m \u001b[0m\u001b[1;38;2;126;107;143mSequential\u001b[0m\u001b[38;2;126;107;143m(\u001b[0m\u001b[3;38;2;126;107;143mpassthrough\u001b[0m\u001b[38;2;126;107;143m) \u001b[0m\u001b[38;2;126;107;143m─╮\u001b[0m \u001b[38;2;126;107;143m╭─\u001b[0m\u001b[38;2;126;107;143m \u001b[0m\u001b[1;38;2;126;107;143mSequential\u001b[0m\u001b[38;2;126;107;143m(\u001b[0m\u001b[3;38;2;126;107;143mpolynomial\u001b[0m\u001b[38;2;126;107;143m) \u001b[0m\u001b[38;2;126;107;143m───────────────\u001b[0m\u001b[38;2;126;107;143m─╮\u001b[0m \u001b[38;2;126;107;143m╭─\u001b[0m\u001b[38;2;126;107;143m \u001b[0m\u001b[1;38;2;126;107;143mSequential\u001b[0m\u001b[38;2;126;107;143m(\u001b[0m\u001b[3;38;2;126;107;143mzerocount\u001b[0m\u001b[38;2;126;107;143m) \u001b[0m\u001b[38;2;126;107;143m─╮\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╭─\u001b[0m\u001b[38;2;86;53;30m \u001b[0m\u001b[1;38;2;86;53;30mFixed\u001b[0m\u001b[38;2;86;53;30m(\u001b[0m\u001b[3;38;2;86;53;30mPassthrough\u001b[0m\u001b[38;2;86;53;30m) \u001b[0m\u001b[38;2;86;53;30m─╮\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;230;175;46m╭─\u001b[0m\u001b[38;2;230;175;46m \u001b[0m\u001b[1;38;2;230;175;46mComponent\u001b[0m\u001b[38;2;230;175;46m(\u001b[0m\u001b[3;38;2;230;175;46mPolynomialFeatures\u001b[0m\u001b[38;2;230;175;46m) \u001b[0m\u001b[38;2;230;175;46m────\u001b[0m\u001b[38;2;230;175;46m─╮\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╭─\u001b[0m\u001b[38;2;86;53;30m \u001b[0m\u001b[1;38;2;86;53;30mFixed\u001b[0m\u001b[38;2;86;53;30m(\u001b[0m\u001b[3;38;2;86;53;30mZeroCount\u001b[0m\u001b[38;2;86;53;30m) \u001b[0m\u001b[38;2;86;53;30m─╮\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39mitem\u001b[0m\u001b[39m \u001b[0m\u001b[1;35mPassthrough\u001b[0m\u001b[1;39m(\u001b[0m\u001b[1;39m)\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mitem \u001b[0m\u001b[39m \u001b[0m\u001b[3;96mclass \u001b[0m\u001b]8;id=160272;https://www.scikit-learn.org/stable/modules/generated/sklearn.preprocessing.PolynomialFeatures.html\u001b\\\u001b[4;39mPolynomialFeatures\u001b[0m\u001b]8;;\u001b\\\u001b[1;39m(\u001b[0m\u001b[33m...\u001b[0m\u001b[1;39m)\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39mitem\u001b[0m\u001b[39m \u001b[0m\u001b[1;35mZeroCount\u001b[0m\u001b[1;39m(\u001b[0m\u001b[1;39m)\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╰──────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mspace\u001b[0m\u001b[39m \u001b[0m\u001b[1;39m{\u001b[0m\u001b[32m'degree'\u001b[0m\u001b[39m: \u001b[0m\u001b[1;39m[\u001b[0m\u001b[1;36m2\u001b[0m\u001b[39m, \u001b[0m\u001b[1;36m3\u001b[0m\u001b[1;39m]\u001b[0m\u001b[1;39m}\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╰────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m╰───────────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;230;175;46m╰─────────────────────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m╰─────────────────────────╯\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m╰─────────────────────────────────────────╯\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m╰───────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[1m ↓ \u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m╭─\u001b[0m\u001b[38;2;255;69;0m \u001b[0m\u001b[1;38;2;255;69;0mChoice\u001b[0m\u001b[38;2;255;69;0m(\u001b[0m\u001b[3;38;2;255;69;0mestimator\u001b[0m\u001b[38;2;255;69;0m) \u001b[0m\u001b[38;2;255;69;0m────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[38;2;255;69;0m─╮\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;230;175;46m╭─\u001b[0m\u001b[38;2;230;175;46m \u001b[0m\u001b[1;38;2;230;175;46mComponent\u001b[0m\u001b[38;2;230;175;46m(\u001b[0m\u001b[3;38;2;230;175;46mRandomForestClassifier\u001b[0m\u001b[38;2;230;175;46m) \u001b[0m\u001b[38;2;230;175;46m─────────\u001b[0m\u001b[38;2;230;175;46m─╮\u001b[0m \u001b[38;2;230;175;46m╭─\u001b[0m\u001b[38;2;230;175;46m \u001b[0m\u001b[1;38;2;230;175;46mComponent\u001b[0m\u001b[38;2;230;175;46m(\u001b[0m\u001b[3;38;2;230;175;46mSVC\u001b[0m\u001b[38;2;230;175;46m) \u001b[0m\u001b[38;2;230;175;46m───────────────────────────\u001b[0m\u001b[38;2;230;175;46m─╮\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mitem \u001b[0m\u001b[39m \u001b[0m\u001b[3;96mclass \u001b[0m\u001b]8;id=399181;https://www.scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html\u001b\\\u001b[4;39mRandomForestClassifier\u001b[0m\u001b]8;;\u001b\\\u001b[1;39m(\u001b[0m\u001b[33m...\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mitem \u001b[0m\u001b[39m \u001b[0m\u001b[3;96mclass \u001b[0m\u001b]8;id=945901;https://www.scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html\u001b\\\u001b[4;39mSVC\u001b[0m\u001b]8;;\u001b\\\u001b[1;39m(\u001b[0m\u001b[33m...\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mconfig\u001b[0m\u001b[39m \u001b[0m\u001b[1;39m{\u001b[0m\u001b[32m'max_depth'\u001b[0m\u001b[39m: \u001b[0m\u001b[1;36m3\u001b[0m\u001b[1;39m}\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mspace\u001b[0m\u001b[39m \u001b[0m\u001b[1;39m{\u001b[0m\u001b[32m'kernel'\u001b[0m\u001b[39m: \u001b[0m\u001b[1;39m[\u001b[0m\u001b[32m'linear'\u001b[0m\u001b[39m, \u001b[0m\u001b[32m'rbf'\u001b[0m\u001b[39m, \u001b[0m\u001b[32m'poly'\u001b[0m\u001b[1;39m]\u001b[0m\u001b[1;39m}\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mspace \u001b[0m\u001b[39m \u001b[0m\u001b[1;39m{\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;230;175;46m╰─────────────────────────────────────────────╯\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39m \u001b[0m\u001b[39m \u001b[0m\u001b[32m'n_estimators'\u001b[0m\u001b[39m: \u001b[0m\u001b[1;39m(\u001b[0m\u001b[1;36m10\u001b[0m\u001b[39m, \u001b[0m\u001b[1;36m100\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m, \u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39m \u001b[0m\u001b[39m \u001b[0m\u001b[32m'criterion'\u001b[0m\u001b[39m: \u001b[0m\u001b[1;39m[\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39m \u001b[0m\u001b[39m \u001b[0m\u001b[32m'gini'\u001b[0m\u001b[39m, \u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39m \u001b[0m\u001b[39m \u001b[0m\u001b[32m'log_loss'\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39m \u001b[0m\u001b[39m \u001b[0m\u001b[1;39m]\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39m \u001b[0m\u001b[1;39m}\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;230;175;46m╰──────────────────────────────────────────────╯\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [], + "text/plain": [ + "Sequential(name='my_pipeline', item=None, nodes=(Split(name='split_imputation', item=None, nodes=(Sequential(name='categories', item=None, nodes=(Fixed(name='ColumnTransformer', item=ColumnTransformer(transformers=[('passthrough', 'passthrough',\n", + " )]), nodes=(), config=None, space=None, fidelities=None, config_transform=None, meta=None), Fixed(name='SimpleImputer', item=SimpleImputer(fill_value='missing', strategy='constant'), nodes=(), config=None, space=None, fidelities=None, config_transform=None, meta=None), Fixed(name='OneHotEncoder', item=OneHotEncoder(drop='first', sparse_output=False), nodes=(), config=None, space=None, fidelities=None, config_transform=None, meta=None)), config=None, space=None, fidelities=None, config_transform=None, meta=None), Sequential(name='numerics', item=None, nodes=(Fixed(name='ColumnTransformer', item=ColumnTransformer(transformers=[('passthrough', 'passthrough',\n", + " )]), nodes=(), config=None, space=None, fidelities=None, config_transform=None, meta=None), Component(name='SimpleImputer', item=, nodes=(), config=None, space={'strategy': ['mean', 'median']}, fidelities=None, config_transform=None, meta=None)), config=None, space=None, fidelities=None, config_transform=None, meta=None)), config=None, space=None, fidelities=None, config_transform=None, meta=None), Choice(name='selectors', item=None, nodes=(Component(name='SelectKBest', item=, nodes=(), config=None, space={'k': (1, 10)}, fidelities=None, config_transform=None, meta=None), Component(name='VarianceThreshold', item=, nodes=(), config=None, space={'threshold': (0.1, 1)}, fidelities=None, config_transform=None, meta=None)), config=None, space=None, fidelities=None, config_transform=None, meta=None), Split(name='transformers', item=None, nodes=(Sequential(name='passthrough', item=None, nodes=(Fixed(name='Passthrough', item=Passthrough(), nodes=(), config=None, space=None, fidelities=None, config_transform=None, meta=None),), config=None, space=None, fidelities=None, config_transform=None, meta=None), Sequential(name='polynomial', item=None, nodes=(Component(name='PolynomialFeatures', item=, nodes=(), config=None, space={'degree': [2, 3]}, fidelities=None, config_transform=None, meta=None),), config=None, space=None, fidelities=None, config_transform=None, meta=None), Sequential(name='zerocount', item=None, nodes=(Fixed(name='ZeroCount', item=ZeroCount(), nodes=(), config=None, space=None, fidelities=None, config_transform=None, meta=None),), config=None, space=None, fidelities=None, config_transform=None, meta=None)), config=None, space=None, fidelities=None, config_transform=None, meta=None), Choice(name='estimator', item=None, nodes=(Component(name='RandomForestClassifier', item=, nodes=(), config={'max_depth': 3}, space={'n_estimators': (10, 100), 'criterion': ['gini', 'log_loss']}, fidelities=None, config_transform=None, meta=None), Component(name='SVC', item=, nodes=(), config=None, space={'kernel': ['linear', 'rbf', 'poly']}, fidelities=None, config_transform=None, meta=None)), config=None, space=None, fidelities=None, config_transform=None, meta=None)), config=None, space=None, fidelities=None, config_transform=None, meta=None)" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from tpot2.builtin_modules import Passthrough, ZeroCount\n", + "from sklearn.preprocessing import PolynomialFeatures\n", + "from sklearn.decomposition import PCA\n", + "\n", + "from sklearn.feature_selection import VarianceThreshold, SelectKBest\n", + "\n", + "selectors = Choice(\n", + " Component(VarianceThreshold, space={\"threshold\": (0.1,1)}),\n", + " Component(SelectKBest, space={\"k\": (1, 10)}),\n", + " name=\"selectors\",\n", + ")\n", + "\n", + "\n", + "transformers = Split(\n", + " {\n", + " \"passthrough\": Passthrough(),\n", + " \"polynomial\": Component(PolynomialFeatures, space={\"degree\": [2, 3]}),\n", + " \"zerocount\" : ZeroCount(),\n", + " },\n", + " # config={\"categories\": select_categories, \"numerics\": select_numerical},\n", + " name=\"transformers\",\n", + ")\n", + "\n", + "pipeline = (\n", + " Sequential(name=\"my_pipeline\")\n", + " >> split_imputation\n", + " # >> Component(SimpleImputer, space={\"strategy\": [\"mean\", \"median\"]}) # Choose either mean or median\n", + " \n", + " >> selectors\n", + " >> transformers\n", + " >> Choice(\n", + " # Our pipeline can choose between two different estimators\n", + " Component(\n", + " RandomForestClassifier,\n", + " space={\"n_estimators\": (10, 100), \"criterion\": [\"gini\", \"log_loss\"]},\n", + " config={\"max_depth\": 3},\n", + " ),\n", + " Component(SVC, space={\"kernel\": [\"linear\", \"rbf\", \"poly\"]}),\n", + " name=\"estimator\",\n", + " )\n", + ")\n", + "\n", + "# Display the amltk Pipeline\n", + "pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
Pipeline(steps=[('featureunion-1',\n",
+       "                 FeatureUnion(transformer_list=[('pipeline-1',\n",
+       "                                                 Pipeline(steps=[('columntransformer',\n",
+       "                                                                  ColumnTransformer(transformers=[('passthrough',\n",
+       "                                                                                                   'passthrough',\n",
+       "                                                                                                   <sklearn.compose._column_transformer.make_column_selector object at 0x7ab9ec119d20>)])),\n",
+       "                                                                 ('simpleimputer',\n",
+       "                                                                  SimpleImputer(fill_value='missing',\n",
+       "                                                                                strategy='constant')),\n",
+       "                                                                 ('onehotencode...\n",
+       "                ('selectkbest', SelectKBest(k=4)),\n",
+       "                ('featureunion-2',\n",
+       "                 FeatureUnion(transformer_list=[('pipeline-1',\n",
+       "                                                 Pipeline(steps=[('passthrough',\n",
+       "                                                                  Passthrough())])),\n",
+       "                                                ('pipeline-2',\n",
+       "                                                 Pipeline(steps=[('polynomialfeatures',\n",
+       "                                                                  PolynomialFeatures())])),\n",
+       "                                                ('pipeline-3',\n",
+       "                                                 Pipeline(steps=[('zerocount',\n",
+       "                                                                  ZeroCount())]))])),\n",
+       "                ('randomforestclassifier',\n",
+       "                 RandomForestClassifier(n_estimators=24))])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "Pipeline(steps=[('featureunion-1',\n", + " FeatureUnion(transformer_list=[('pipeline-1',\n", + " Pipeline(steps=[('columntransformer',\n", + " ColumnTransformer(transformers=[('passthrough',\n", + " 'passthrough',\n", + " )])),\n", + " ('simpleimputer',\n", + " SimpleImputer(fill_value='missing',\n", + " strategy='constant')),\n", + " ('onehotencode...\n", + " ('selectkbest', SelectKBest(k=4)),\n", + " ('featureunion-2',\n", + " FeatureUnion(transformer_list=[('pipeline-1',\n", + " Pipeline(steps=[('passthrough',\n", + " Passthrough())])),\n", + " ('pipeline-2',\n", + " Pipeline(steps=[('polynomialfeatures',\n", + " PolynomialFeatures())])),\n", + " ('pipeline-3',\n", + " Pipeline(steps=[('zerocount',\n", + " ZeroCount())]))])),\n", + " ('randomforestclassifier',\n", + " RandomForestClassifier(n_estimators=24))])" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#convert to tpot search space\n", + "tpot_search_space = tpot2.utils.tpot2_parser(pipeline)\n", + "\n", + "# sample a pipeline from the tpot search space\n", + "tpot_search_space.generate().export_pipeline()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Generation: 50%|█████ | 1/2 [00:03<00:03, 3.26s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generation: 1\n", + "Best roc_auc_score score: 0.9423333333333334\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Generation: 100%|██████████| 2/2 [00:03<00:00, 1.86s/it]\n", + "2024-09-09 17:18:39,054 - distributed.worker - ERROR - Failed to communicate with scheduler during heartbeat.\n", + "Traceback (most recent call last):\n", + " File \"/home/perib/miniconda3/envs/myenv/lib/python3.10/site-packages/distributed/comm/tcp.py\", line 225, in read\n", + " frames_nosplit_nbytes_bin = await stream.read_bytes(fmt_size)\n", + "tornado.iostream.StreamClosedError: Stream is closed\n", + "\n", + "The above exception was the direct cause of the following exception:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/home/perib/miniconda3/envs/myenv/lib/python3.10/site-packages/distributed/worker.py\", line 1250, in heartbeat\n", + " response = await retry_operation(\n", + " File \"/home/perib/miniconda3/envs/myenv/lib/python3.10/site-packages/distributed/utils_comm.py\", line 459, in retry_operation\n", + " return await retry(\n", + " File \"/home/perib/miniconda3/envs/myenv/lib/python3.10/site-packages/distributed/utils_comm.py\", line 438, in retry\n", + " return await coro()\n", + " File \"/home/perib/miniconda3/envs/myenv/lib/python3.10/site-packages/distributed/core.py\", line 1254, in send_recv_from_rpc\n", + " return await send_recv(comm=comm, op=key, **kwargs)\n", + " File \"/home/perib/miniconda3/envs/myenv/lib/python3.10/site-packages/distributed/core.py\", line 1013, in send_recv\n", + " response = await comm.read(deserializers=deserializers)\n", + " File \"/home/perib/miniconda3/envs/myenv/lib/python3.10/site-packages/distributed/comm/tcp.py\", line 236, in read\n", + " convert_stream_closed_error(self, e)\n", + " File \"/home/perib/miniconda3/envs/myenv/lib/python3.10/site-packages/distributed/comm/tcp.py\", line 142, in convert_stream_closed_error\n", + " raise CommClosedError(f\"in {obj}: {exc}\") from exc\n", + "distributed.comm.core.CommClosedError: in : Stream is closed\n", + "2024-09-09 17:18:39,055 - distributed.worker - ERROR - Failed to communicate with scheduler during heartbeat.\n", + "Traceback (most recent call last):\n", + " File \"/home/perib/miniconda3/envs/myenv/lib/python3.10/site-packages/distributed/comm/tcp.py\", line 225, in read\n", + " frames_nosplit_nbytes_bin = await stream.read_bytes(fmt_size)\n", + "tornado.iostream.StreamClosedError: Stream is closed\n", + "\n", + "The above exception was the direct cause of the following exception:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/home/perib/miniconda3/envs/myenv/lib/python3.10/site-packages/distributed/worker.py\", line 1250, in heartbeat\n", + " response = await retry_operation(\n", + " File \"/home/perib/miniconda3/envs/myenv/lib/python3.10/site-packages/distributed/utils_comm.py\", line 459, in retry_operation\n", + " return await retry(\n", + " File \"/home/perib/miniconda3/envs/myenv/lib/python3.10/site-packages/distributed/utils_comm.py\", line 438, in retry\n", + " return await coro()\n", + " File \"/home/perib/miniconda3/envs/myenv/lib/python3.10/site-packages/distributed/core.py\", line 1254, in send_recv_from_rpc\n", + " return await send_recv(comm=comm, op=key, **kwargs)\n", + " File \"/home/perib/miniconda3/envs/myenv/lib/python3.10/site-packages/distributed/core.py\", line 1013, in send_recv\n", + " response = await comm.read(deserializers=deserializers)\n", + " File \"/home/perib/miniconda3/envs/myenv/lib/python3.10/site-packages/distributed/comm/tcp.py\", line 236, in read\n", + " convert_stream_closed_error(self, e)\n", + " File \"/home/perib/miniconda3/envs/myenv/lib/python3.10/site-packages/distributed/comm/tcp.py\", line 142, in convert_stream_closed_error\n", + " raise CommClosedError(f\"in {obj}: {exc}\") from exc\n", + "distributed.comm.core.CommClosedError: in : Stream is closed\n", + "2024-09-09 17:18:39,062 - distributed.scheduler - ERROR - Removing worker 'tcp://127.0.0.1:39033' caused the cluster to lose scattered data, which can't be recovered: {'ndarray-4295957c2613499053c4412f415dedb8', 'DataFrame-ee6ff64644f78f1c23d469116500dd47'} (stimulus_id='handle-worker-cleanup-1725927519.0628352')\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generation: 2\n", + "Best roc_auc_score score: 0.96\n" + ] + }, + { + "data": { + "text/html": [ + "
TPOTEstimator(classification=True, generations=2, max_eval_time_seconds=300,\n",
+       "              n_jobs=10, population_size=10, scorers=['roc_auc'],\n",
+       "              scorers_weights=[1],\n",
+       "              search_space=<tpot2.search_spaces.pipelines.sequential.SequentialPipeline object at 0x7ab9928d8f40>,\n",
+       "              verbose=5)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "TPOTEstimator(classification=True, generations=2, max_eval_time_seconds=300,\n", + " n_jobs=10, population_size=10, scorers=['roc_auc'],\n", + " scorers_weights=[1],\n", + " search_space=,\n", + " verbose=5)" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\n", + "\n", + "\n", + "est = tpot2.TPOTEstimator(\n", + " scorers = [\"roc_auc\"],\n", + " scorers_weights = [1],\n", + " classification = True,\n", + " cv = 5,\n", + " search_space = tpot_search_space, #converted search space goes here\n", + " population_size= 10,\n", + " generations = 2,\n", + " max_eval_time_seconds = 60*5,\n", + " verbose = 5,\n", + " n_jobs=10,\n", + ")\n", + "\n", + "est.fit(X_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
Pipeline(steps=[('featureunion-1',\n",
+       "                 FeatureUnion(transformer_list=[('pipeline-1',\n",
+       "                                                 Pipeline(steps=[('columntransformer',\n",
+       "                                                                  ColumnTransformer(transformers=[('passthrough',\n",
+       "                                                                                                   'passthrough',\n",
+       "                                                                                                   <sklearn.compose._column_transformer.make_column_selector object at 0x7ab98dbdb100>)])),\n",
+       "                                                                 ('simpleimputer',\n",
+       "                                                                  SimpleImputer(fill_value='missing',\n",
+       "                                                                                strategy='constant')),\n",
+       "                                                                 ('onehotencode...\n",
+       "                 VarianceThreshold(threshold=0.6396211247532)),\n",
+       "                ('featureunion-2',\n",
+       "                 FeatureUnion(transformer_list=[('pipeline-1',\n",
+       "                                                 Pipeline(steps=[('passthrough',\n",
+       "                                                                  Passthrough())])),\n",
+       "                                                ('pipeline-2',\n",
+       "                                                 Pipeline(steps=[('polynomialfeatures',\n",
+       "                                                                  PolynomialFeatures())])),\n",
+       "                                                ('pipeline-3',\n",
+       "                                                 Pipeline(steps=[('zerocount',\n",
+       "                                                                  ZeroCount())]))])),\n",
+       "                ('randomforestclassifier',\n",
+       "                 RandomForestClassifier(criterion='log_loss',\n",
+       "                                        n_estimators=47))])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "Pipeline(steps=[('featureunion-1',\n", + " FeatureUnion(transformer_list=[('pipeline-1',\n", + " Pipeline(steps=[('columntransformer',\n", + " ColumnTransformer(transformers=[('passthrough',\n", + " 'passthrough',\n", + " )])),\n", + " ('simpleimputer',\n", + " SimpleImputer(fill_value='missing',\n", + " strategy='constant')),\n", + " ('onehotencode...\n", + " VarianceThreshold(threshold=0.6396211247532)),\n", + " ('featureunion-2',\n", + " FeatureUnion(transformer_list=[('pipeline-1',\n", + " Pipeline(steps=[('passthrough',\n", + " Passthrough())])),\n", + " ('pipeline-2',\n", + " Pipeline(steps=[('polynomialfeatures',\n", + " PolynomialFeatures())])),\n", + " ('pipeline-3',\n", + " Pipeline(steps=[('zerocount',\n", + " ZeroCount())]))])),\n", + " ('randomforestclassifier',\n", + " RandomForestClassifier(criterion='log_loss',\n", + " n_estimators=47))])" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "est.fitted_pipeline_" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0,\n", + " 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1,\n", + " 1, 0, 0, 1, 1, 0])" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "est.predict(X_test)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "myenv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/setup.py b/setup.py index 0a404280..8586dbe7 100644 --- a/setup.py +++ b/setup.py @@ -53,7 +53,8 @@ def calculate_version(): extras_require={ 'skrebate': ['skrebate>=0.3.4'], 'mdr': ['scikit-mdr>=0.4.4'], - 'sklearnex' : ['scikit-learn-intelex>=2023.2.1'] + 'sklearnex' : ['scikit-learn-intelex>=2023.2.1'], + 'amltk' : ['amltk>=1.12.1'], }, classifiers=[ 'Intended Audience :: Science/Research', diff --git a/tpot2/__init__.py b/tpot2/__init__.py index 62290884..f7014a29 100644 --- a/tpot2/__init__.py +++ b/tpot2/__init__.py @@ -8,9 +8,9 @@ from .population import Population from . import builtin_modules -from . import utils from . import config from . import search_spaces +from . import utils from . import evolvers from . import objectives from . import selectors diff --git a/tpot2/utils/__init__.py b/tpot2/utils/__init__.py index e9c795a3..12231446 100644 --- a/tpot2/utils/__init__.py +++ b/tpot2/utils/__init__.py @@ -1,2 +1,11 @@ from . import eval_utils -from .utils import * \ No newline at end of file +from .utils import * + +# If amltk is installed, import the parser +try: + from .amltk_parser import tpot2_parser +except ImportError: + # Handle the case when amltk is not installed + pass + # print("amltk is not installed. Please install it to use tpot2_parser.") + # Optional: raise an exception or provide alternative functionality \ No newline at end of file diff --git a/tpot2/utils/amltk_parser.py b/tpot2/utils/amltk_parser.py new file mode 100644 index 00000000..c147dbd8 --- /dev/null +++ b/tpot2/utils/amltk_parser.py @@ -0,0 +1,72 @@ +from amltk.pipeline import Choice, Component, Sequential, Node, Fixed, Split, Join, Searchable +from tpot2.search_spaces.pipelines import SequentialPipeline, ChoicePipeline, UnionPipeline +from tpot2.search_spaces.nodes import EstimatorNode +from ConfigSpace import ConfigurationSpace + +def component_to_estimatornode(component: Component) -> EstimatorNode: + method = component.item + space_dict = {} + if component.space is not None: + space_dict.update(component.space) + if component.config is not None: + space_dict.update(component.config) + space = ConfigurationSpace(component.space) + + tpot2_sp = EstimatorNode(method=method, space=space) + return tpot2_sp + +def fixed_to_estimatornode(node: Fixed) -> EstimatorNode: + method = node.item + #check if method is a class or an object + if not isinstance(method, type): + method = type(method) + + #if baseestimator, get params + if hasattr(node.item, 'get_params'): + space_dict = node.item.get_params(deep=False) + else: + space_dict = {} + if node.space is not None: + space_dict.update(node.space) + if node.config is not None: + space_dict.update(node.config) + + tpot2_sp = EstimatorNode(method=method, space=space_dict) + return tpot2_sp + +def sequential_to_sequentialpipeline(sequential: Sequential) -> SequentialPipeline: + nodes = [tpot2_parser(node) for node in sequential.nodes] + tpot2_sp = SequentialPipeline(search_spaces=nodes) + return tpot2_sp + +def choice_to_choicepipeline(choice: Choice) -> ChoicePipeline: + nodes = [tpot2_parser(node) for node in choice.nodes] + tpot2_sp = ChoicePipeline(search_spaces=nodes) + return tpot2_sp + + +def split_to_unionpipeline(split: Split) -> UnionPipeline: + nodes = [tpot2_parser(node) for node in split.nodes] + tpot2_sp = UnionPipeline(search_spaces=nodes) + return tpot2_sp + +def tpot2_parser( + node: Node, + # *, + # flat: bool = False, + # conditionals: bool = False, + # delim: str = ":", + ): + + if isinstance(node, Component): + return component_to_estimatornode(node) + elif isinstance(node, Sequential): + return sequential_to_sequentialpipeline(node) + elif isinstance(node, Choice): + return choice_to_choicepipeline(node) + elif isinstance(node, Fixed): + return fixed_to_estimatornode(node) + elif isinstance(node, Split): + return split_to_unionpipeline(node) + else: + raise ValueError(f"Node type {type(node)} not supported") From abd4ac6b73f63b0e2ab8ff504355495f750c68e3 Mon Sep 17 00:00:00 2001 From: perib Date: Mon, 9 Sep 2024 17:26:55 -0700 Subject: [PATCH 10/14] rerun --- .../amltk_search_space_parser_example.ipynb | 176 +++++++----------- 1 file changed, 64 insertions(+), 112 deletions(-) diff --git a/Tutorial/amltk_search_space_parser_example.ipynb b/Tutorial/amltk_search_space_parser_example.ipynb index 9dc62527..fe2038df 100644 --- a/Tutorial/amltk_search_space_parser_example.ipynb +++ b/Tutorial/amltk_search_space_parser_example.ipynb @@ -25,7 +25,7 @@ " item ColumnTransformer(transformers=[('passth… item ColumnTransformer(transformers=[('passthro… \n", " 'passthrough', 'passthrough', \n", " <sklear… <sklearn.… \n", - " object at 0x7ab9ec119d20>)]) object at 0x7ab994db4c40>)]) \n", + " object at 0x7d354d946290>)]) object at 0x7d34edf94fa0>)]) \n", " ╰────────────────────────────────────────────────╯ ╰──────────────────────────────────────────────────╯ \n", " \n", " ╭─ Fixed(SimpleImputer) ─────────────────────────╮ ╭─ Component(SimpleImputer) ─────────────╮ \n", @@ -48,11 +48,11 @@ "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39mitem\u001b[0m\u001b[39m \u001b[0m\u001b[1;35mColumnTransformer\u001b[0m\u001b[1;39m(\u001b[0m\u001b[33mtransformers\u001b[0m\u001b[39m=\u001b[0m\u001b[1;39m[\u001b[0m\u001b[1;39m(\u001b[0m\u001b[32m'passth…\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39mitem\u001b[0m\u001b[39m \u001b[0m\u001b[1;35mColumnTransformer\u001b[0m\u001b[1;39m(\u001b[0m\u001b[33mtransformers\u001b[0m\u001b[39m=\u001b[0m\u001b[1;39m[\u001b[0m\u001b[1;39m(\u001b[0m\u001b[32m'passthro…\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[32m'passthrough'\u001b[0m\u001b[39m, \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[32m'passthrough'\u001b[0m\u001b[39m, \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[39m \u001b[0m\u001b[1;39m<\u001b[0m\u001b[1;95msklear…\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[39m \u001b[0m\u001b[1;39m<\u001b[0m\u001b[1;95msklearn.…\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", - "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[39mobject at \u001b[0m\u001b[1;36m0x7ab9ec119d20\u001b[0m\u001b[1;39m>\u001b[0m\u001b[1;39m)\u001b[0m\u001b[1;39m]\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[39mobject at \u001b[0m\u001b[1;36m0x7ab994db4c40\u001b[0m\u001b[1;39m>\u001b[0m\u001b[1;39m)\u001b[0m\u001b[1;39m]\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", + "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[39mobject at \u001b[0m\u001b[1;36m0x7d354d946290\u001b[0m\u001b[1;39m>\u001b[0m\u001b[1;39m)\u001b[0m\u001b[1;39m]\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[39mobject at \u001b[0m\u001b[1;36m0x7d34edf94fa0\u001b[0m\u001b[1;39m>\u001b[0m\u001b[1;39m)\u001b[0m\u001b[1;39m]\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╰────────────────────────────────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╰──────────────────────────────────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[1m ↓ \u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[1m ↓ \u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╭─\u001b[0m\u001b[38;2;86;53;30m \u001b[0m\u001b[1;38;2;86;53;30mFixed\u001b[0m\u001b[38;2;86;53;30m(\u001b[0m\u001b[3;38;2;86;53;30mSimpleImputer\u001b[0m\u001b[38;2;86;53;30m) \u001b[0m\u001b[38;2;86;53;30m────────────────────────\u001b[0m\u001b[38;2;86;53;30m─╮\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;230;175;46m╭─\u001b[0m\u001b[38;2;230;175;46m \u001b[0m\u001b[1;38;2;230;175;46mComponent\u001b[0m\u001b[38;2;230;175;46m(\u001b[0m\u001b[3;38;2;230;175;46mSimpleImputer\u001b[0m\u001b[38;2;230;175;46m) \u001b[0m\u001b[38;2;230;175;46m────────────\u001b[0m\u001b[38;2;230;175;46m─╮\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", - "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39mitem\u001b[0m\u001b[39m \u001b[0m\u001b[1;35mSimpleImputer\u001b[0m\u001b[1;39m(\u001b[0m\u001b[33mfill_value\u001b[0m\u001b[39m=\u001b[0m\u001b[32m'missing'\u001b[0m\u001b[39m, \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mitem \u001b[0m\u001b[39m \u001b[0m\u001b[3;96mclass \u001b[0m\u001b]8;id=202447;https://www.scikit-learn.org/stable/modules/generated/sklearn.impute.SimpleImputer.html\u001b\\\u001b[4;39mSimpleImputer\u001b[0m\u001b]8;;\u001b\\\u001b[1;39m(\u001b[0m\u001b[33m...\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", + "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39mitem\u001b[0m\u001b[39m \u001b[0m\u001b[1;35mSimpleImputer\u001b[0m\u001b[1;39m(\u001b[0m\u001b[33mfill_value\u001b[0m\u001b[39m=\u001b[0m\u001b[32m'missing'\u001b[0m\u001b[39m, \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mitem \u001b[0m\u001b[39m \u001b[0m\u001b[3;96mclass \u001b[0m\u001b]8;id=861007;https://www.scikit-learn.org/stable/modules/generated/sklearn.impute.SimpleImputer.html\u001b\\\u001b[4;39mSimpleImputer\u001b[0m\u001b]8;;\u001b\\\u001b[1;39m(\u001b[0m\u001b[33m...\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[33mstrategy\u001b[0m\u001b[39m=\u001b[0m\u001b[32m'constant'\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mspace\u001b[0m\u001b[39m \u001b[0m\u001b[1;39m{\u001b[0m\u001b[32m'strategy'\u001b[0m\u001b[39m: \u001b[0m\u001b[1;39m[\u001b[0m\u001b[32m'mean'\u001b[0m\u001b[39m, \u001b[0m\u001b[32m'median'\u001b[0m\u001b[1;39m]\u001b[0m\u001b[1;39m}\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╰────────────────────────────────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;230;175;46m╰────────────────────────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", "\u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[1m ↓ \u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m╰──────────────────────────────────────────────────────╯\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m\n", @@ -72,8 +72,8 @@ "text/html": [], "text/plain": [ "Split(name='split_imputation', item=None, nodes=(Sequential(name='categories', item=None, nodes=(Fixed(name='ColumnTransformer', item=ColumnTransformer(transformers=[('passthrough', 'passthrough',\n", - " )]), nodes=(), config=None, space=None, fidelities=None, config_transform=None, meta=None), Fixed(name='SimpleImputer', item=SimpleImputer(fill_value='missing', strategy='constant'), nodes=(), config=None, space=None, fidelities=None, config_transform=None, meta=None), Fixed(name='OneHotEncoder', item=OneHotEncoder(drop='first', sparse_output=False), nodes=(), config=None, space=None, fidelities=None, config_transform=None, meta=None)), config=None, space=None, fidelities=None, config_transform=None, meta=None), Sequential(name='numerics', item=None, nodes=(Fixed(name='ColumnTransformer', item=ColumnTransformer(transformers=[('passthrough', 'passthrough',\n", - " )]), nodes=(), config=None, space=None, fidelities=None, config_transform=None, meta=None), Component(name='SimpleImputer', item=, nodes=(), config=None, space={'strategy': ['mean', 'median']}, fidelities=None, config_transform=None, meta=None)), config=None, space=None, fidelities=None, config_transform=None, meta=None)), config=None, space=None, fidelities=None, config_transform=None, meta=None)" + " )]), nodes=(), config=None, space=None, fidelities=None, config_transform=None, meta=None), Fixed(name='SimpleImputer', item=SimpleImputer(fill_value='missing', strategy='constant'), nodes=(), config=None, space=None, fidelities=None, config_transform=None, meta=None), Fixed(name='OneHotEncoder', item=OneHotEncoder(drop='first', sparse_output=False), nodes=(), config=None, space=None, fidelities=None, config_transform=None, meta=None)), config=None, space=None, fidelities=None, config_transform=None, meta=None), Sequential(name='numerics', item=None, nodes=(Fixed(name='ColumnTransformer', item=ColumnTransformer(transformers=[('passthrough', 'passthrough',\n", + " )]), nodes=(), config=None, space=None, fidelities=None, config_transform=None, meta=None), Component(name='SimpleImputer', item=, nodes=(), config=None, space={'strategy': ['mean', 'median']}, fidelities=None, config_transform=None, meta=None)), config=None, space=None, fidelities=None, config_transform=None, meta=None)), config=None, space=None, fidelities=None, config_transform=None, meta=None)" ] }, "execution_count": 1, @@ -153,7 +153,7 @@ " item ColumnTransformer(transformers=[('pass… item ColumnTransformer(transformers=[('passth… \n", " 'passthrough', 'passthrough', \n", " <skle… <sklear… \n", - " object at 0x7ab9ec119d20>)]) object at 0x7ab994db4c40>)]) \n", + " object at 0x7d354d946290>)]) object at 0x7d34edf94fa0>)]) \n", " ╰──────────────────────────────────────────────╯ ╰────────────────────────────────────────────────╯ \n", " \n", " ╭─ Fixed(SimpleImputer) ───────────────────────╮ ╭─ Component(SimpleImputer) ─────────────╮ \n", @@ -208,11 +208,11 @@ "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39mitem\u001b[0m\u001b[39m \u001b[0m\u001b[1;35mColumnTransformer\u001b[0m\u001b[1;39m(\u001b[0m\u001b[33mtransformers\u001b[0m\u001b[39m=\u001b[0m\u001b[1;39m[\u001b[0m\u001b[1;39m(\u001b[0m\u001b[32m'pass…\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39mitem\u001b[0m\u001b[39m \u001b[0m\u001b[1;35mColumnTransformer\u001b[0m\u001b[1;39m(\u001b[0m\u001b[33mtransformers\u001b[0m\u001b[39m=\u001b[0m\u001b[1;39m[\u001b[0m\u001b[1;39m(\u001b[0m\u001b[32m'passth…\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[32m'passthrough'\u001b[0m\u001b[39m, \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[32m'passthrough'\u001b[0m\u001b[39m, \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[39m \u001b[0m\u001b[1;39m<\u001b[0m\u001b[1;95mskle…\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[39m \u001b[0m\u001b[1;39m<\u001b[0m\u001b[1;95msklear…\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", - "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[39mobject at \u001b[0m\u001b[1;36m0x7ab9ec119d20\u001b[0m\u001b[1;39m>\u001b[0m\u001b[1;39m)\u001b[0m\u001b[1;39m]\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[39mobject at \u001b[0m\u001b[1;36m0x7ab994db4c40\u001b[0m\u001b[1;39m>\u001b[0m\u001b[1;39m)\u001b[0m\u001b[1;39m]\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[39mobject at \u001b[0m\u001b[1;36m0x7d354d946290\u001b[0m\u001b[1;39m>\u001b[0m\u001b[1;39m)\u001b[0m\u001b[1;39m]\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[39mobject at \u001b[0m\u001b[1;36m0x7d34edf94fa0\u001b[0m\u001b[1;39m>\u001b[0m\u001b[1;39m)\u001b[0m\u001b[1;39m]\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╰──────────────────────────────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╰────────────────────────────────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[1m ↓ \u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[1m ↓ \u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╭─\u001b[0m\u001b[38;2;86;53;30m \u001b[0m\u001b[1;38;2;86;53;30mFixed\u001b[0m\u001b[38;2;86;53;30m(\u001b[0m\u001b[3;38;2;86;53;30mSimpleImputer\u001b[0m\u001b[38;2;86;53;30m) \u001b[0m\u001b[38;2;86;53;30m──────────────────────\u001b[0m\u001b[38;2;86;53;30m─╮\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;230;175;46m╭─\u001b[0m\u001b[38;2;230;175;46m \u001b[0m\u001b[1;38;2;230;175;46mComponent\u001b[0m\u001b[38;2;230;175;46m(\u001b[0m\u001b[3;38;2;230;175;46mSimpleImputer\u001b[0m\u001b[38;2;230;175;46m) \u001b[0m\u001b[38;2;230;175;46m────────────\u001b[0m\u001b[38;2;230;175;46m─╮\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", - "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39mitem\u001b[0m\u001b[39m \u001b[0m\u001b[1;35mSimpleImputer\u001b[0m\u001b[1;39m(\u001b[0m\u001b[33mfill_value\u001b[0m\u001b[39m=\u001b[0m\u001b[32m'missing'\u001b[0m\u001b[39m, \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mitem \u001b[0m\u001b[39m \u001b[0m\u001b[3;96mclass \u001b[0m\u001b]8;id=27035;https://www.scikit-learn.org/stable/modules/generated/sklearn.impute.SimpleImputer.html\u001b\\\u001b[4;39mSimpleImputer\u001b[0m\u001b]8;;\u001b\\\u001b[1;39m(\u001b[0m\u001b[33m...\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39mitem\u001b[0m\u001b[39m \u001b[0m\u001b[1;35mSimpleImputer\u001b[0m\u001b[1;39m(\u001b[0m\u001b[33mfill_value\u001b[0m\u001b[39m=\u001b[0m\u001b[32m'missing'\u001b[0m\u001b[39m, \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mitem \u001b[0m\u001b[39m \u001b[0m\u001b[3;96mclass \u001b[0m\u001b]8;id=178888;https://www.scikit-learn.org/stable/modules/generated/sklearn.impute.SimpleImputer.html\u001b\\\u001b[4;39mSimpleImputer\u001b[0m\u001b]8;;\u001b\\\u001b[1;39m(\u001b[0m\u001b[33m...\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39m \u001b[0m\u001b[33mstrategy\u001b[0m\u001b[39m=\u001b[0m\u001b[32m'constant'\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mspace\u001b[0m\u001b[39m \u001b[0m\u001b[1;39m{\u001b[0m\u001b[32m'strategy'\u001b[0m\u001b[39m: \u001b[0m\u001b[1;39m[\u001b[0m\u001b[32m'mean'\u001b[0m\u001b[39m, \u001b[0m\u001b[32m'median'\u001b[0m\u001b[1;39m]\u001b[0m\u001b[1;39m}\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╰──────────────────────────────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;230;175;46m╰────────────────────────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[1m ↓ \u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m╰────────────────────────────────────────────────────╯\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", @@ -225,7 +225,7 @@ "\u001b[38;2;126;107;143m│\u001b[0m \u001b[1m ↓ \u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m╭─\u001b[0m\u001b[38;2;255;69;0m \u001b[0m\u001b[1;38;2;255;69;0mChoice\u001b[0m\u001b[38;2;255;69;0m(\u001b[0m\u001b[3;38;2;255;69;0mselectors\u001b[0m\u001b[38;2;255;69;0m) \u001b[0m\u001b[38;2;255;69;0m────────────────────────────────────────────────────\u001b[0m\u001b[38;2;255;69;0m─╮\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;230;175;46m╭─\u001b[0m\u001b[38;2;230;175;46m \u001b[0m\u001b[1;38;2;230;175;46mComponent\u001b[0m\u001b[38;2;230;175;46m(\u001b[0m\u001b[3;38;2;230;175;46mSelectKBest\u001b[0m\u001b[38;2;230;175;46m) \u001b[0m\u001b[38;2;230;175;46m────\u001b[0m\u001b[38;2;230;175;46m─╮\u001b[0m \u001b[38;2;230;175;46m╭─\u001b[0m\u001b[38;2;230;175;46m \u001b[0m\u001b[1;38;2;230;175;46mComponent\u001b[0m\u001b[38;2;230;175;46m(\u001b[0m\u001b[3;38;2;230;175;46mVarianceThreshold\u001b[0m\u001b[38;2;230;175;46m) \u001b[0m\u001b[38;2;230;175;46m────\u001b[0m\u001b[38;2;230;175;46m─╮\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", - "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mitem \u001b[0m\u001b[39m \u001b[0m\u001b[3;96mclass \u001b[0m\u001b]8;id=924552;https://www.scikit-learn.org/stable/modules/generated/sklearn.feature_selection.SelectKBest.html\u001b\\\u001b[4;39mSelectKBest\u001b[0m\u001b]8;;\u001b\\\u001b[1;39m(\u001b[0m\u001b[33m...\u001b[0m\u001b[1;39m)\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mitem \u001b[0m\u001b[39m \u001b[0m\u001b[3;96mclass \u001b[0m\u001b]8;id=534283;https://www.scikit-learn.org/stable/modules/generated/sklearn.feature_selection.VarianceThreshold.html\u001b\\\u001b[4;39mVarianceThreshold\u001b[0m\u001b]8;;\u001b\\\u001b[1;39m(\u001b[0m\u001b[33m...\u001b[0m\u001b[1;39m)\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mitem \u001b[0m\u001b[39m \u001b[0m\u001b[3;96mclass \u001b[0m\u001b]8;id=870666;https://www.scikit-learn.org/stable/modules/generated/sklearn.feature_selection.SelectKBest.html\u001b\\\u001b[4;39mSelectKBest\u001b[0m\u001b]8;;\u001b\\\u001b[1;39m(\u001b[0m\u001b[33m...\u001b[0m\u001b[1;39m)\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mitem \u001b[0m\u001b[39m \u001b[0m\u001b[3;96mclass \u001b[0m\u001b]8;id=23174;https://www.scikit-learn.org/stable/modules/generated/sklearn.feature_selection.VarianceThreshold.html\u001b\\\u001b[4;39mVarianceThreshold\u001b[0m\u001b]8;;\u001b\\\u001b[1;39m(\u001b[0m\u001b[33m...\u001b[0m\u001b[1;39m)\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mspace\u001b[0m\u001b[39m \u001b[0m\u001b[1;39m{\u001b[0m\u001b[32m'k'\u001b[0m\u001b[39m: \u001b[0m\u001b[1;39m(\u001b[0m\u001b[1;36m1\u001b[0m\u001b[39m, \u001b[0m\u001b[1;36m10\u001b[0m\u001b[1;39m)\u001b[0m\u001b[1;39m}\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mspace\u001b[0m\u001b[39m \u001b[0m\u001b[1;39m{\u001b[0m\u001b[32m'threshold'\u001b[0m\u001b[39m: \u001b[0m\u001b[1;39m(\u001b[0m\u001b[1;36m0.1\u001b[0m\u001b[39m, \u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;39m)\u001b[0m\u001b[1;39m}\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;230;175;46m╰──────────────────────────────╯\u001b[0m \u001b[38;2;230;175;46m╰────────────────────────────────────╯\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m╰─────────────────────────────────────────────────────────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", @@ -233,7 +233,7 @@ "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m╭─\u001b[0m\u001b[38;2;119;125;167m \u001b[0m\u001b[1;38;2;119;125;167mSplit\u001b[0m\u001b[38;2;119;125;167m(\u001b[0m\u001b[3;38;2;119;125;167mtransformers\u001b[0m\u001b[38;2;119;125;167m) \u001b[0m\u001b[38;2;119;125;167m────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[38;2;119;125;167m─╮\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m╭─\u001b[0m\u001b[38;2;126;107;143m \u001b[0m\u001b[1;38;2;126;107;143mSequential\u001b[0m\u001b[38;2;126;107;143m(\u001b[0m\u001b[3;38;2;126;107;143mpassthrough\u001b[0m\u001b[38;2;126;107;143m) \u001b[0m\u001b[38;2;126;107;143m─╮\u001b[0m \u001b[38;2;126;107;143m╭─\u001b[0m\u001b[38;2;126;107;143m \u001b[0m\u001b[1;38;2;126;107;143mSequential\u001b[0m\u001b[38;2;126;107;143m(\u001b[0m\u001b[3;38;2;126;107;143mpolynomial\u001b[0m\u001b[38;2;126;107;143m) \u001b[0m\u001b[38;2;126;107;143m───────────────\u001b[0m\u001b[38;2;126;107;143m─╮\u001b[0m \u001b[38;2;126;107;143m╭─\u001b[0m\u001b[38;2;126;107;143m \u001b[0m\u001b[1;38;2;126;107;143mSequential\u001b[0m\u001b[38;2;126;107;143m(\u001b[0m\u001b[3;38;2;126;107;143mzerocount\u001b[0m\u001b[38;2;126;107;143m) \u001b[0m\u001b[38;2;126;107;143m─╮\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╭─\u001b[0m\u001b[38;2;86;53;30m \u001b[0m\u001b[1;38;2;86;53;30mFixed\u001b[0m\u001b[38;2;86;53;30m(\u001b[0m\u001b[3;38;2;86;53;30mPassthrough\u001b[0m\u001b[38;2;86;53;30m) \u001b[0m\u001b[38;2;86;53;30m─╮\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;230;175;46m╭─\u001b[0m\u001b[38;2;230;175;46m \u001b[0m\u001b[1;38;2;230;175;46mComponent\u001b[0m\u001b[38;2;230;175;46m(\u001b[0m\u001b[3;38;2;230;175;46mPolynomialFeatures\u001b[0m\u001b[38;2;230;175;46m) \u001b[0m\u001b[38;2;230;175;46m────\u001b[0m\u001b[38;2;230;175;46m─╮\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╭─\u001b[0m\u001b[38;2;86;53;30m \u001b[0m\u001b[1;38;2;86;53;30mFixed\u001b[0m\u001b[38;2;86;53;30m(\u001b[0m\u001b[3;38;2;86;53;30mZeroCount\u001b[0m\u001b[38;2;86;53;30m) \u001b[0m\u001b[38;2;86;53;30m─╮\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", - "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39mitem\u001b[0m\u001b[39m \u001b[0m\u001b[1;35mPassthrough\u001b[0m\u001b[1;39m(\u001b[0m\u001b[1;39m)\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mitem \u001b[0m\u001b[39m \u001b[0m\u001b[3;96mclass \u001b[0m\u001b]8;id=160272;https://www.scikit-learn.org/stable/modules/generated/sklearn.preprocessing.PolynomialFeatures.html\u001b\\\u001b[4;39mPolynomialFeatures\u001b[0m\u001b]8;;\u001b\\\u001b[1;39m(\u001b[0m\u001b[33m...\u001b[0m\u001b[1;39m)\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39mitem\u001b[0m\u001b[39m \u001b[0m\u001b[1;35mZeroCount\u001b[0m\u001b[1;39m(\u001b[0m\u001b[1;39m)\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39mitem\u001b[0m\u001b[39m \u001b[0m\u001b[1;35mPassthrough\u001b[0m\u001b[1;39m(\u001b[0m\u001b[1;39m)\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mitem \u001b[0m\u001b[39m \u001b[0m\u001b[3;96mclass \u001b[0m\u001b]8;id=605509;https://www.scikit-learn.org/stable/modules/generated/sklearn.preprocessing.PolynomialFeatures.html\u001b\\\u001b[4;39mPolynomialFeatures\u001b[0m\u001b]8;;\u001b\\\u001b[1;39m(\u001b[0m\u001b[33m...\u001b[0m\u001b[1;39m)\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[39mitem\u001b[0m\u001b[39m \u001b[0m\u001b[1;35mZeroCount\u001b[0m\u001b[1;39m(\u001b[0m\u001b[1;39m)\u001b[0m \u001b[38;2;86;53;30m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╰──────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mspace\u001b[0m\u001b[39m \u001b[0m\u001b[1;39m{\u001b[0m\u001b[32m'degree'\u001b[0m\u001b[39m: \u001b[0m\u001b[1;39m[\u001b[0m\u001b[1;36m2\u001b[0m\u001b[39m, \u001b[0m\u001b[1;36m3\u001b[0m\u001b[1;39m]\u001b[0m\u001b[1;39m}\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;86;53;30m╰────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m╰───────────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;230;175;46m╰─────────────────────────────────────╯\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;126;107;143m╰─────────────────────────╯\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m╰─────────────────────────────────────────╯\u001b[0m \u001b[38;2;119;125;167m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", @@ -241,7 +241,7 @@ "\u001b[38;2;126;107;143m│\u001b[0m \u001b[1m ↓ \u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m╭─\u001b[0m\u001b[38;2;255;69;0m \u001b[0m\u001b[1;38;2;255;69;0mChoice\u001b[0m\u001b[38;2;255;69;0m(\u001b[0m\u001b[3;38;2;255;69;0mestimator\u001b[0m\u001b[38;2;255;69;0m) \u001b[0m\u001b[38;2;255;69;0m────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\u001b[38;2;255;69;0m─╮\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;230;175;46m╭─\u001b[0m\u001b[38;2;230;175;46m \u001b[0m\u001b[1;38;2;230;175;46mComponent\u001b[0m\u001b[38;2;230;175;46m(\u001b[0m\u001b[3;38;2;230;175;46mRandomForestClassifier\u001b[0m\u001b[38;2;230;175;46m) \u001b[0m\u001b[38;2;230;175;46m─────────\u001b[0m\u001b[38;2;230;175;46m─╮\u001b[0m \u001b[38;2;230;175;46m╭─\u001b[0m\u001b[38;2;230;175;46m \u001b[0m\u001b[1;38;2;230;175;46mComponent\u001b[0m\u001b[38;2;230;175;46m(\u001b[0m\u001b[3;38;2;230;175;46mSVC\u001b[0m\u001b[38;2;230;175;46m) \u001b[0m\u001b[38;2;230;175;46m───────────────────────────\u001b[0m\u001b[38;2;230;175;46m─╮\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", - "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mitem \u001b[0m\u001b[39m \u001b[0m\u001b[3;96mclass \u001b[0m\u001b]8;id=399181;https://www.scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html\u001b\\\u001b[4;39mRandomForestClassifier\u001b[0m\u001b]8;;\u001b\\\u001b[1;39m(\u001b[0m\u001b[33m...\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mitem \u001b[0m\u001b[39m \u001b[0m\u001b[3;96mclass \u001b[0m\u001b]8;id=945901;https://www.scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html\u001b\\\u001b[4;39mSVC\u001b[0m\u001b]8;;\u001b\\\u001b[1;39m(\u001b[0m\u001b[33m...\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", + "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mitem \u001b[0m\u001b[39m \u001b[0m\u001b[3;96mclass \u001b[0m\u001b]8;id=470078;https://www.scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html\u001b\\\u001b[4;39mRandomForestClassifier\u001b[0m\u001b]8;;\u001b\\\u001b[1;39m(\u001b[0m\u001b[33m...\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mitem \u001b[0m\u001b[39m \u001b[0m\u001b[3;96mclass \u001b[0m\u001b]8;id=315827;https://www.scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html\u001b\\\u001b[4;39mSVC\u001b[0m\u001b]8;;\u001b\\\u001b[1;39m(\u001b[0m\u001b[33m...\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mconfig\u001b[0m\u001b[39m \u001b[0m\u001b[1;39m{\u001b[0m\u001b[32m'max_depth'\u001b[0m\u001b[39m: \u001b[0m\u001b[1;36m3\u001b[0m\u001b[1;39m}\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mspace\u001b[0m\u001b[39m \u001b[0m\u001b[1;39m{\u001b[0m\u001b[32m'kernel'\u001b[0m\u001b[39m: \u001b[0m\u001b[1;39m[\u001b[0m\u001b[32m'linear'\u001b[0m\u001b[39m, \u001b[0m\u001b[32m'rbf'\u001b[0m\u001b[39m, \u001b[0m\u001b[32m'poly'\u001b[0m\u001b[1;39m]\u001b[0m\u001b[1;39m}\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39mspace \u001b[0m\u001b[39m \u001b[0m\u001b[1;39m{\u001b[0m\u001b[39m \u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;230;175;46m╰─────────────────────────────────────────────╯\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", "\u001b[38;2;126;107;143m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[39m \u001b[0m\u001b[39m \u001b[0m\u001b[32m'n_estimators'\u001b[0m\u001b[39m: \u001b[0m\u001b[1;39m(\u001b[0m\u001b[1;36m10\u001b[0m\u001b[39m, \u001b[0m\u001b[1;36m100\u001b[0m\u001b[1;39m)\u001b[0m\u001b[39m, \u001b[0m \u001b[38;2;230;175;46m│\u001b[0m \u001b[38;2;255;69;0m│\u001b[0m \u001b[38;2;126;107;143m│\u001b[0m\n", @@ -263,8 +263,8 @@ "text/html": [], "text/plain": [ "Sequential(name='my_pipeline', item=None, nodes=(Split(name='split_imputation', item=None, nodes=(Sequential(name='categories', item=None, nodes=(Fixed(name='ColumnTransformer', item=ColumnTransformer(transformers=[('passthrough', 'passthrough',\n", - " )]), nodes=(), config=None, space=None, fidelities=None, config_transform=None, meta=None), Fixed(name='SimpleImputer', item=SimpleImputer(fill_value='missing', strategy='constant'), nodes=(), config=None, space=None, fidelities=None, config_transform=None, meta=None), Fixed(name='OneHotEncoder', item=OneHotEncoder(drop='first', sparse_output=False), nodes=(), config=None, space=None, fidelities=None, config_transform=None, meta=None)), config=None, space=None, fidelities=None, config_transform=None, meta=None), Sequential(name='numerics', item=None, nodes=(Fixed(name='ColumnTransformer', item=ColumnTransformer(transformers=[('passthrough', 'passthrough',\n", - " )]), nodes=(), config=None, space=None, fidelities=None, config_transform=None, meta=None), Component(name='SimpleImputer', item=, nodes=(), config=None, space={'strategy': ['mean', 'median']}, fidelities=None, config_transform=None, meta=None)), config=None, space=None, fidelities=None, config_transform=None, meta=None)), config=None, space=None, fidelities=None, config_transform=None, meta=None), Choice(name='selectors', item=None, nodes=(Component(name='SelectKBest', item=, nodes=(), config=None, space={'k': (1, 10)}, fidelities=None, config_transform=None, meta=None), Component(name='VarianceThreshold', item=, nodes=(), config=None, space={'threshold': (0.1, 1)}, fidelities=None, config_transform=None, meta=None)), config=None, space=None, fidelities=None, config_transform=None, meta=None), Split(name='transformers', item=None, nodes=(Sequential(name='passthrough', item=None, nodes=(Fixed(name='Passthrough', item=Passthrough(), nodes=(), config=None, space=None, fidelities=None, config_transform=None, meta=None),), config=None, space=None, fidelities=None, config_transform=None, meta=None), Sequential(name='polynomial', item=None, nodes=(Component(name='PolynomialFeatures', item=, nodes=(), config=None, space={'degree': [2, 3]}, fidelities=None, config_transform=None, meta=None),), config=None, space=None, fidelities=None, config_transform=None, meta=None), Sequential(name='zerocount', item=None, nodes=(Fixed(name='ZeroCount', item=ZeroCount(), nodes=(), config=None, space=None, fidelities=None, config_transform=None, meta=None),), config=None, space=None, fidelities=None, config_transform=None, meta=None)), config=None, space=None, fidelities=None, config_transform=None, meta=None), Choice(name='estimator', item=None, nodes=(Component(name='RandomForestClassifier', item=, nodes=(), config={'max_depth': 3}, space={'n_estimators': (10, 100), 'criterion': ['gini', 'log_loss']}, fidelities=None, config_transform=None, meta=None), Component(name='SVC', item=, nodes=(), config=None, space={'kernel': ['linear', 'rbf', 'poly']}, fidelities=None, config_transform=None, meta=None)), config=None, space=None, fidelities=None, config_transform=None, meta=None)), config=None, space=None, fidelities=None, config_transform=None, meta=None)" + " )]), nodes=(), config=None, space=None, fidelities=None, config_transform=None, meta=None), Fixed(name='SimpleImputer', item=SimpleImputer(fill_value='missing', strategy='constant'), nodes=(), config=None, space=None, fidelities=None, config_transform=None, meta=None), Fixed(name='OneHotEncoder', item=OneHotEncoder(drop='first', sparse_output=False), nodes=(), config=None, space=None, fidelities=None, config_transform=None, meta=None)), config=None, space=None, fidelities=None, config_transform=None, meta=None), Sequential(name='numerics', item=None, nodes=(Fixed(name='ColumnTransformer', item=ColumnTransformer(transformers=[('passthrough', 'passthrough',\n", + " )]), nodes=(), config=None, space=None, fidelities=None, config_transform=None, meta=None), Component(name='SimpleImputer', item=, nodes=(), config=None, space={'strategy': ['mean', 'median']}, fidelities=None, config_transform=None, meta=None)), config=None, space=None, fidelities=None, config_transform=None, meta=None)), config=None, space=None, fidelities=None, config_transform=None, meta=None), Choice(name='selectors', item=None, nodes=(Component(name='SelectKBest', item=, nodes=(), config=None, space={'k': (1, 10)}, fidelities=None, config_transform=None, meta=None), Component(name='VarianceThreshold', item=, nodes=(), config=None, space={'threshold': (0.1, 1)}, fidelities=None, config_transform=None, meta=None)), config=None, space=None, fidelities=None, config_transform=None, meta=None), Split(name='transformers', item=None, nodes=(Sequential(name='passthrough', item=None, nodes=(Fixed(name='Passthrough', item=Passthrough(), nodes=(), config=None, space=None, fidelities=None, config_transform=None, meta=None),), config=None, space=None, fidelities=None, config_transform=None, meta=None), Sequential(name='polynomial', item=None, nodes=(Component(name='PolynomialFeatures', item=, nodes=(), config=None, space={'degree': [2, 3]}, fidelities=None, config_transform=None, meta=None),), config=None, space=None, fidelities=None, config_transform=None, meta=None), Sequential(name='zerocount', item=None, nodes=(Fixed(name='ZeroCount', item=ZeroCount(), nodes=(), config=None, space=None, fidelities=None, config_transform=None, meta=None),), config=None, space=None, fidelities=None, config_transform=None, meta=None)), config=None, space=None, fidelities=None, config_transform=None, meta=None), Choice(name='estimator', item=None, nodes=(Component(name='RandomForestClassifier', item=, nodes=(), config={'max_depth': 3}, space={'n_estimators': (10, 100), 'criterion': ['gini', 'log_loss']}, fidelities=None, config_transform=None, meta=None), Component(name='SVC', item=, nodes=(), config=None, space={'kernel': ['linear', 'rbf', 'poly']}, fidelities=None, config_transform=None, meta=None)), config=None, space=None, fidelities=None, config_transform=None, meta=None)), config=None, space=None, fidelities=None, config_transform=None, meta=None)" ] }, "execution_count": 2, @@ -321,7 +321,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -736,50 +736,50 @@ " Pipeline(steps=[('columntransformer',\n", " ColumnTransformer(transformers=[('passthrough',\n", " 'passthrough',\n", - " <sklearn.compose._column_transformer.make_column_selector object at 0x7ab9ec119d20>)])),\n", + " <sklearn.compose._column_transformer.make_column_selector object at 0x7d354d946290>)])),\n", " ('simpleimputer',\n", " SimpleImputer(fill_value='missing',\n", " strategy='constant')),\n", " ('onehotencode...\n", - " ('selectkbest', SelectKBest(k=4)),\n", + " VarianceThreshold(threshold=0.6738938110936)),\n", " ('featureunion-2',\n", " FeatureUnion(transformer_list=[('pipeline-1',\n", " Pipeline(steps=[('passthrough',\n", " Passthrough())])),\n", " ('pipeline-2',\n", " Pipeline(steps=[('polynomialfeatures',\n", - " PolynomialFeatures())])),\n", + " PolynomialFeatures(degree=3))])),\n", " ('pipeline-3',\n", " Pipeline(steps=[('zerocount',\n", " ZeroCount())]))])),\n", " ('randomforestclassifier',\n", - " RandomForestClassifier(n_estimators=24))])In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + " Pipeline(steps=[('zerocount', ZeroCount())]))])
Passthrough()
PolynomialFeatures(degree=3)
ZeroCount()
RandomForestClassifier(n_estimators=16)
" ], "text/plain": [ "Pipeline(steps=[('featureunion-1',\n", @@ -809,27 +809,27 @@ " Pipeline(steps=[('columntransformer',\n", " ColumnTransformer(transformers=[('passthrough',\n", " 'passthrough',\n", - " )])),\n", + " )])),\n", " ('simpleimputer',\n", " SimpleImputer(fill_value='missing',\n", " strategy='constant')),\n", " ('onehotencode...\n", - " ('selectkbest', SelectKBest(k=4)),\n", + " VarianceThreshold(threshold=0.6738938110936)),\n", " ('featureunion-2',\n", " FeatureUnion(transformer_list=[('pipeline-1',\n", " Pipeline(steps=[('passthrough',\n", " Passthrough())])),\n", " ('pipeline-2',\n", " Pipeline(steps=[('polynomialfeatures',\n", - " PolynomialFeatures())])),\n", + " PolynomialFeatures(degree=3))])),\n", " ('pipeline-3',\n", " Pipeline(steps=[('zerocount',\n", " ZeroCount())]))])),\n", " ('randomforestclassifier',\n", - " RandomForestClassifier(n_estimators=24))])" + " RandomForestClassifier(n_estimators=16))])" ] }, - "execution_count": 4, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -844,14 +844,14 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "Generation: 50%|█████ | 1/2 [00:03<00:03, 3.26s/it]" + "Generation: 50%|█████ | 1/2 [00:02<00:02, 2.60s/it]" ] }, { @@ -859,63 +859,15 @@ "output_type": "stream", "text": [ "Generation: 1\n", - "Best roc_auc_score score: 0.9423333333333334\n" + "Best roc_auc_score score: 0.976\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "Generation: 100%|██████████| 2/2 [00:03<00:00, 1.86s/it]\n", - "2024-09-09 17:18:39,054 - distributed.worker - ERROR - Failed to communicate with scheduler during heartbeat.\n", - "Traceback (most recent call last):\n", - " File \"/home/perib/miniconda3/envs/myenv/lib/python3.10/site-packages/distributed/comm/tcp.py\", line 225, in read\n", - " frames_nosplit_nbytes_bin = await stream.read_bytes(fmt_size)\n", - "tornado.iostream.StreamClosedError: Stream is closed\n", - "\n", - "The above exception was the direct cause of the following exception:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/home/perib/miniconda3/envs/myenv/lib/python3.10/site-packages/distributed/worker.py\", line 1250, in heartbeat\n", - " response = await retry_operation(\n", - " File \"/home/perib/miniconda3/envs/myenv/lib/python3.10/site-packages/distributed/utils_comm.py\", line 459, in retry_operation\n", - " return await retry(\n", - " File \"/home/perib/miniconda3/envs/myenv/lib/python3.10/site-packages/distributed/utils_comm.py\", line 438, in retry\n", - " return await coro()\n", - " File \"/home/perib/miniconda3/envs/myenv/lib/python3.10/site-packages/distributed/core.py\", line 1254, in send_recv_from_rpc\n", - " return await send_recv(comm=comm, op=key, **kwargs)\n", - " File \"/home/perib/miniconda3/envs/myenv/lib/python3.10/site-packages/distributed/core.py\", line 1013, in send_recv\n", - " response = await comm.read(deserializers=deserializers)\n", - " File \"/home/perib/miniconda3/envs/myenv/lib/python3.10/site-packages/distributed/comm/tcp.py\", line 236, in read\n", - " convert_stream_closed_error(self, e)\n", - " File \"/home/perib/miniconda3/envs/myenv/lib/python3.10/site-packages/distributed/comm/tcp.py\", line 142, in convert_stream_closed_error\n", - " raise CommClosedError(f\"in {obj}: {exc}\") from exc\n", - "distributed.comm.core.CommClosedError: in : Stream is closed\n", - "2024-09-09 17:18:39,055 - distributed.worker - ERROR - Failed to communicate with scheduler during heartbeat.\n", - "Traceback (most recent call last):\n", - " File \"/home/perib/miniconda3/envs/myenv/lib/python3.10/site-packages/distributed/comm/tcp.py\", line 225, in read\n", - " frames_nosplit_nbytes_bin = await stream.read_bytes(fmt_size)\n", - "tornado.iostream.StreamClosedError: Stream is closed\n", - "\n", - "The above exception was the direct cause of the following exception:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/home/perib/miniconda3/envs/myenv/lib/python3.10/site-packages/distributed/worker.py\", line 1250, in heartbeat\n", - " response = await retry_operation(\n", - " File \"/home/perib/miniconda3/envs/myenv/lib/python3.10/site-packages/distributed/utils_comm.py\", line 459, in retry_operation\n", - " return await retry(\n", - " File \"/home/perib/miniconda3/envs/myenv/lib/python3.10/site-packages/distributed/utils_comm.py\", line 438, in retry\n", - " return await coro()\n", - " File \"/home/perib/miniconda3/envs/myenv/lib/python3.10/site-packages/distributed/core.py\", line 1254, in send_recv_from_rpc\n", - " return await send_recv(comm=comm, op=key, **kwargs)\n", - " File \"/home/perib/miniconda3/envs/myenv/lib/python3.10/site-packages/distributed/core.py\", line 1013, in send_recv\n", - " response = await comm.read(deserializers=deserializers)\n", - " File \"/home/perib/miniconda3/envs/myenv/lib/python3.10/site-packages/distributed/comm/tcp.py\", line 236, in read\n", - " convert_stream_closed_error(self, e)\n", - " File \"/home/perib/miniconda3/envs/myenv/lib/python3.10/site-packages/distributed/comm/tcp.py\", line 142, in convert_stream_closed_error\n", - " raise CommClosedError(f\"in {obj}: {exc}\") from exc\n", - "distributed.comm.core.CommClosedError: in : Stream is closed\n", - "2024-09-09 17:18:39,062 - distributed.scheduler - ERROR - Removing worker 'tcp://127.0.0.1:39033' caused the cluster to lose scattered data, which can't be recovered: {'ndarray-4295957c2613499053c4412f415dedb8', 'DataFrame-ee6ff64644f78f1c23d469116500dd47'} (stimulus_id='handle-worker-cleanup-1725927519.0628352')\n" + "Generation: 100%|██████████| 2/2 [00:03<00:00, 1.57s/it]\n", + "2024-09-09 17:25:40,301 - distributed.scheduler - ERROR - Removing worker 'tcp://127.0.0.1:39897' caused the cluster to lose scattered data, which can't be recovered: {'ndarray-3f2f44921e6e9cc40ef07cfcd8ae90fb', 'DataFrame-5551f84174fd651642ff10eb71e30b22'} (stimulus_id='handle-worker-cleanup-1725927940.3010821')\n" ] }, { @@ -923,7 +875,7 @@ "output_type": "stream", "text": [ "Generation: 2\n", - "Best roc_auc_score score: 0.96\n" + "Best roc_auc_score score: 0.984\n" ] }, { @@ -1336,22 +1288,22 @@ "
TPOTEstimator(classification=True, generations=2, max_eval_time_seconds=300,\n",
        "              n_jobs=10, population_size=10, scorers=['roc_auc'],\n",
        "              scorers_weights=[1],\n",
-       "              search_space=<tpot2.search_spaces.pipelines.sequential.SequentialPipeline object at 0x7ab9928d8f40>,\n",
+       "              search_space=<tpot2.search_spaces.pipelines.sequential.SequentialPipeline object at 0x7d34ec1efbb0>,\n",
        "              verbose=5)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "TPOTEstimator(classification=True, generations=2, max_eval_time_seconds=300,\n", " n_jobs=10, population_size=10, scorers=['roc_auc'],\n", " scorers_weights=[1],\n", - " search_space=,\n", + " search_space=,\n", " verbose=5)" ] }, - "execution_count": 5, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -1378,7 +1330,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -1793,12 +1745,12 @@ " Pipeline(steps=[('columntransformer',\n", " ColumnTransformer(transformers=[('passthrough',\n", " 'passthrough',\n", - " <sklearn.compose._column_transformer.make_column_selector object at 0x7ab98dbdb100>)])),\n", + " <sklearn.compose._column_transformer.make_column_selector object at 0x7d34eb307cd0>)])),\n", " ('simpleimputer',\n", " SimpleImputer(fill_value='missing',\n", " strategy='constant')),\n", " ('onehotencode...\n", - " VarianceThreshold(threshold=0.6396211247532)),\n", + " VarianceThreshold(threshold=0.1557560591318)),\n", " ('featureunion-2',\n", " FeatureUnion(transformer_list=[('pipeline-1',\n", " Pipeline(steps=[('passthrough',\n", @@ -1811,17 +1763,17 @@ " ZeroCount())]))])),\n", " ('randomforestclassifier',\n", " RandomForestClassifier(criterion='log_loss',\n", - " n_estimators=47))])In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + " Pipeline(steps=[('zerocount', ZeroCount())]))])
Passthrough()
PolynomialFeatures()
ZeroCount()
RandomForestClassifier(criterion='log_loss', n_estimators=80)
" ], "text/plain": [ "Pipeline(steps=[('featureunion-1',\n", @@ -1868,12 +1820,12 @@ " Pipeline(steps=[('columntransformer',\n", " ColumnTransformer(transformers=[('passthrough',\n", " 'passthrough',\n", - " )])),\n", + " )])),\n", " ('simpleimputer',\n", " SimpleImputer(fill_value='missing',\n", " strategy='constant')),\n", " ('onehotencode...\n", - " VarianceThreshold(threshold=0.6396211247532)),\n", + " VarianceThreshold(threshold=0.1557560591318)),\n", " ('featureunion-2',\n", " FeatureUnion(transformer_list=[('pipeline-1',\n", " Pipeline(steps=[('passthrough',\n", @@ -1886,10 +1838,10 @@ " ZeroCount())]))])),\n", " ('randomforestclassifier',\n", " RandomForestClassifier(criterion='log_loss',\n", - " n_estimators=47))])" + " n_estimators=80))])" ] }, - "execution_count": 6, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -1900,18 +1852,18 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "array([1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0,\n", - " 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1,\n", - " 1, 0, 0, 1, 1, 0])" + "array([1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1,\n", + " 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0,\n", + " 1, 0, 0, 0, 0, 0])" ] }, - "execution_count": 7, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } From 419c7080f8e048125e6022ec436216a95b1ad8ff Mon Sep 17 00:00:00 2001 From: perib Date: Tue, 10 Sep 2024 10:14:04 -0700 Subject: [PATCH 11/14] fix seletors lists --- tpot2/config/get_configspace.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tpot2/config/get_configspace.py b/tpot2/config/get_configspace.py index 46b13b60..51061b93 100644 --- a/tpot2/config/get_configspace.py +++ b/tpot2/config/get_configspace.py @@ -114,8 +114,8 @@ GROUPNAMES = { "selectors": ["SelectFwe", "SelectPercentile", "VarianceThreshold",], - "selectors_classification": ["SelectFwe", "SelectPercentile", "VarianceThreshold", "RFE_classification", "SelectFromModel_classification"], - "selectors_regression": ["SelectFwe", "SelectPercentile", "VarianceThreshold", "RFE_regression", "SelectFromModel_regression"], + "selectors_classification": ["SelectFwe", "SelectPercentile", "RFE_classification", "SelectFromModel_classification"], + "selectors_regression": ["SelectFwe", "SelectPercentile", "RFE_regression", "SelectFromModel_regression"], "classifiers" : ["LGBMClassifier", "BaggingClassifier", 'AdaBoostClassifier', 'BernoulliNB', 'DecisionTreeClassifier', 'ExtraTreesClassifier', 'GaussianNB', 'HistGradientBoostingClassifier', 'KNeighborsClassifier','LinearDiscriminantAnalysis', 'LogisticRegression', "LinearSVC", "SVC", 'MLPClassifier', 'MultinomialNB', "QuadraticDiscriminantAnalysis", 'RandomForestClassifier', 'SGDClassifier', 'XGBClassifier'], "regressors" : ["LGBMRegressor", 'AdaBoostRegressor', "ARDRegression", 'DecisionTreeRegressor', 'ExtraTreesRegressor', 'HistGradientBoostingRegressor', 'KNeighborsRegressor', 'LinearSVR', "MLPRegressor", 'RandomForestRegressor', 'SGDRegressor', 'SVR', 'XGBRegressor'], From db614d84c3e949afa244e66e45acc0d86b4ba789 Mon Sep 17 00:00:00 2001 From: perib Date: Tue, 10 Sep 2024 10:15:36 -0700 Subject: [PATCH 12/14] undo --- tpot2/config/get_configspace.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tpot2/config/get_configspace.py b/tpot2/config/get_configspace.py index 51061b93..46b13b60 100644 --- a/tpot2/config/get_configspace.py +++ b/tpot2/config/get_configspace.py @@ -114,8 +114,8 @@ GROUPNAMES = { "selectors": ["SelectFwe", "SelectPercentile", "VarianceThreshold",], - "selectors_classification": ["SelectFwe", "SelectPercentile", "RFE_classification", "SelectFromModel_classification"], - "selectors_regression": ["SelectFwe", "SelectPercentile", "RFE_regression", "SelectFromModel_regression"], + "selectors_classification": ["SelectFwe", "SelectPercentile", "VarianceThreshold", "RFE_classification", "SelectFromModel_classification"], + "selectors_regression": ["SelectFwe", "SelectPercentile", "VarianceThreshold", "RFE_regression", "SelectFromModel_regression"], "classifiers" : ["LGBMClassifier", "BaggingClassifier", 'AdaBoostClassifier', 'BernoulliNB', 'DecisionTreeClassifier', 'ExtraTreesClassifier', 'GaussianNB', 'HistGradientBoostingClassifier', 'KNeighborsClassifier','LinearDiscriminantAnalysis', 'LogisticRegression', "LinearSVC", "SVC", 'MLPClassifier', 'MultinomialNB', "QuadraticDiscriminantAnalysis", 'RandomForestClassifier', 'SGDClassifier', 'XGBClassifier'], "regressors" : ["LGBMRegressor", 'AdaBoostRegressor', "ARDRegression", 'DecisionTreeRegressor', 'ExtraTreesRegressor', 'HistGradientBoostingRegressor', 'KNeighborsRegressor', 'LinearSVR', "MLPRegressor", 'RandomForestRegressor', 'SGDRegressor', 'SVR', 'XGBRegressor'], From 923782a9255490880fcb76fabe53dc18c59cfeaa Mon Sep 17 00:00:00 2001 From: perib Date: Wed, 11 Sep 2024 13:25:50 -0700 Subject: [PATCH 13/14] added fixed bins, fixed bugs --- tpot2/selectors/map_elites_selection.py | 77 ++++++++++++++++--------- 1 file changed, 51 insertions(+), 26 deletions(-) diff --git a/tpot2/selectors/map_elites_selection.py b/tpot2/selectors/map_elites_selection.py index 27ac6156..c3589801 100644 --- a/tpot2/selectors/map_elites_selection.py +++ b/tpot2/selectors/map_elites_selection.py @@ -1,56 +1,63 @@ import numpy as np #TODO make these functions take in a predetermined set of bins rather than calculating a new set each time -def create_nd_matrix(matrix, k): +def create_nd_matrix(matrix, grid_steps=None, bins=None): + + if grid_steps is not None and bins is not None: + raise ValueError("Either grid_steps or bins must be provided but not both") + # Extract scores and features - scores = [row[0] for row in matrix] - features = [row[1:] for row in matrix] + scores = matrix[:, 0] + features = matrix[:, 1:] # Determine the min and max of each feature min_vals = np.min(features, axis=0) max_vals = np.max(features, axis=0) # Create bins for each feature - bins = [np.linspace(min_vals[i], max_vals[i], k) for i in range(len(min_vals))] + if bins is None: + bins = [np.linspace(min_vals[i], max_vals[i], grid_steps) for i in range(len(min_vals))] # Initialize n-dimensional matrix with negative infinity - nd_matrix = np.full([k-1]*len(min_vals), {"score": -np.inf, "idx": None}) - + nd_matrix = np.full([len(b)+1 for b in bins], {"score": -np.inf, "idx": None}) # Fill in each cell with the highest score for that cell for idx, (score, feature) in enumerate(zip(scores, features)): - indices = [np.digitize(f, bin)-1 for f, bin in zip(feature, bins)] - - indices = [min(i, k-2) for i in indices] #the last bin is inclusive - + indices = [np.digitize(f, bin) for f, bin in zip(feature, bins)] cur_score = nd_matrix[tuple(indices)]["score"] if score > cur_score: nd_matrix[tuple(indices)] = {"score": score, "idx": idx} - return nd_matrix def manhattan(a, b): return sum(abs(val1-val2) for val1, val2 in zip(a,b)) +def map_elites_survival_selector(scores, k=None, rng=None, grid_steps= 10, bins=None): + + if grid_steps is not None and bins is not None: + raise ValueError("Either grid_steps or bins must be provided but not both") -def map_elites_survival_selector(scores, k, rng=None, grid_steps= 10): rng = np.random.default_rng(rng) scores = np.array(scores) #create grid - matrix = create_nd_matrix(scores, grid_steps) + matrix = create_nd_matrix(scores, grid_steps=grid_steps, bins=bins) matrix = matrix.flatten() indexes = [cell["idx"] for cell in matrix if cell["idx"] is not None] return np.unique(indexes) -def map_elites_parent_selector(scores, k, rng=None, grid_steps= 10, manhattan_distance = 2, n_parents=1,): +def map_elites_parent_selector(scores, k, rng=None, manhattan_distance = 2, n_parents=1, grid_steps= 10, bins=None): + + if grid_steps is not None and bins is not None: + raise ValueError("Either grid_steps or bins must be provided but not both") + rng = np.random.default_rng(rng) scores = np.array(scores) #create grid - matrix = create_nd_matrix(scores, grid_steps) + matrix = create_nd_matrix(scores, grid_steps=grid_steps, bins=bins) #return true if cell is not empty f = np.vectorize(lambda x: x["idx"] is not None) @@ -60,8 +67,6 @@ def map_elites_parent_selector(scores, k, rng=None, grid_steps= 10, manhattan_d idxes = [idx for idx in idx_to_coordinates.keys()] #all the indexes of best score per cell - - distance_matrix = np.zeros((len(idxes), len(idxes))) for i, idx1 in enumerate(idxes): @@ -87,17 +92,37 @@ def map_elites_parent_selector(scores, k, rng=None, grid_steps= 10, manhattan_d candidates = candidates[candidates != dm_idx] manhattan_distance += 1 - if manhattan_distance > grid_steps*scores.shape[1]: + if manhattan_distance > np.max(distance_matrix): break if len(candidates) == 0: - parents.append([idx]) + parents.append([idx, idx]) #if no other parents are found, select the same parent twice. weird to crossover with itself though + else: + this_parents = [idx] + for p in range(n_parents-1): + idx2_cords = rng.choice(candidates) + this_parents.append(idxes[idx2_cords]) + parents.append(this_parents) - this_parents = [idx] - for p in range(n_parents-1): - idx2_cords = rng.choice(candidates) - this_parents.append(idxes[idx2_cords]) + return np.array(parents) - parents.append(this_parents) - - return np.array(parents) \ No newline at end of file + +def get_bins_quantiles(arr, k=None, q=None): + bins = [] + + if q is not None and k is not None: + raise ValueError("Only one of k or q can be specified") + + if q is not None: + final_q = q + elif k is not None: + final_q = np.linspace(0, 1, k) + + for i in range(arr.shape[1]): + bins.append(np.quantile(arr[:,i], final_q)) + return bins + +def get_bins(arr, k): + min_vals = np.min(arr, axis=0) + max_vals = np.max(arr, axis=0) + [np.linspace(min_vals[i], max_vals[i], k) for i in range(len(min_vals))] \ No newline at end of file From bcc890e5f827c0510e0963e3d6f78fce4bf92625 Mon Sep 17 00:00:00 2001 From: perib Date: Thu, 12 Sep 2024 15:17:58 -0700 Subject: [PATCH 14/14] fix reproducibility bug --- tpot2/search_spaces/pipelines/choice.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tpot2/search_spaces/pipelines/choice.py b/tpot2/search_spaces/pipelines/choice.py index 25051aa0..af1a7a4d 100644 --- a/tpot2/search_spaces/pipelines/choice.py +++ b/tpot2/search_spaces/pipelines/choice.py @@ -12,7 +12,7 @@ def __init__(self, search_spaces : List[SklearnIndividualGenerator], rng=None) - super().__init__() self.search_spaces = search_spaces - self.node = np.random.default_rng(rng).choice(self.search_spaces).generate() + self.node = np.random.default_rng(rng).choice(self.search_spaces).generate(rng=rng) def mutate(self, rng=None): @@ -23,7 +23,7 @@ def mutate(self, rng=None): return self._mutate_node(rng) def _mutate_select_new_node(self, rng=None): - self.node = random.choice(self.search_spaces).generate() + self.node = random.choice(self.search_spaces).generate(rng=rng) return True def _mutate_node(self, rng=None):