Skip to content

Commit

Permalink
Merge pull request #142 from gketronDS/main
Browse files Browse the repository at this point in the history
Adding SimpleImputer, IterativeImputer and KNNImputer to the config space.
  • Loading branch information
perib authored Jul 29, 2024
2 parents e19701e + 90498c7 commit ed95419
Show file tree
Hide file tree
Showing 2 changed files with 94 additions and 9 deletions.
26 changes: 20 additions & 6 deletions tpot2/config/get_configspace.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@
from sklearn.feature_selection import f_classif, f_regression #TODO create a selectomixin using these?
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from sklearn.gaussian_process import GaussianProcessRegressor, GaussianProcessClassifier
from sklearn.impute import SimpleImputer
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import SimpleImputer, IterativeImputer, KNNImputer

all_methods = [SGDClassifier, RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier, MLPClassifier, DecisionTreeClassifier, XGBClassifier, KNeighborsClassifier, SVC, LogisticRegression, LGBMClassifier, LinearSVC, GaussianNB, BernoulliNB, MultinomialNB, ExtraTreesRegressor, RandomForestRegressor, GradientBoostingRegressor, BaggingRegressor, DecisionTreeRegressor, KNeighborsRegressor, XGBRegressor, ZeroCount, ColumnOneHotEncoder, Binarizer, FastICA, FeatureAgglomeration, MaxAbsScaler, MinMaxScaler, Normalizer, Nystroem, PCA, PolynomialFeatures, RBFSampler, RobustScaler, StandardScaler, SelectFwe, SelectPercentile, VarianceThreshold, SGDRegressor, Ridge, Lasso, ElasticNet, Lars, LassoLars, LassoLarsCV, RidgeCV, SVR, LinearSVR, AdaBoostRegressor, GradientBoostingRegressor, RandomForestRegressor, BaggingRegressor, ExtraTreesRegressor, DecisionTreeRegressor, KNeighborsRegressor, ElasticNetCV,
AdaBoostClassifier,MLPRegressor,
Expand All @@ -56,7 +57,7 @@
GaussianProcessClassifier, BaggingClassifier,LGBMRegressor,
Passthrough,SkipTransformer,
PassKBinsDiscretizer,
SimpleImputer,
SimpleImputer, IterativeImputer, KNNImputer
]


Expand Down Expand Up @@ -124,7 +125,7 @@
"all_transformers" : ["transformers", "scalers"],

"arithmatic": ["AddTransformer", "mul_neg_1_Transformer", "MulTransformer", "SafeReciprocalTransformer", "EQTransformer", "NETransformer", "GETransformer", "GTTransformer", "LETransformer", "LTTransformer", "MinTransformer", "MaxTransformer"],
"imputers": ["SimpleImputer"],
"imputers": ["SimpleImputer", "IterativeImputer", "KNNImputer"],
"skrebate": ["ReliefF", "SURF", "SURFstar", "MultiSURF"],
"genetic_encoders": ["DominantEncoder", "RecessiveEncoder", "HeterosisEncoder", "UnderDominanceEncoder", "OverDominanceEncoder"],

Expand All @@ -136,8 +137,6 @@

def get_configspace(name, n_classes=3, n_samples=1000, n_features=100, random_state=None):
match name:
case "SimpleImputer":
return imputers.simple_imputer_cs

#autoqtl_builtins.py
case "FeatureEncodingFrequencySelector":
Expand Down Expand Up @@ -352,6 +351,12 @@ def get_configspace(name, n_classes=3, n_samples=1000, n_features=100, random_st
)

#imputers.py
case "SimpleImputer":
return imputers.simple_imputer_cs
case "IterativeImputer":
return imputers.get_IterativeImputer_config_space(n_features=n_features, random_state=random_state)
case "KNNImputer":
return imputers.get_KNNImputer_config_space(n_samples=n_samples)

#mdr_configs.py
case "MDR":
Expand Down Expand Up @@ -443,8 +448,17 @@ def get_node(name, n_classes=3, n_samples=100, n_features=100, random_state=None
sfm_sp = get_configspace(name="SelectFromModel", n_classes=n_classes, n_samples=n_samples, random_state=random_state)
ext = get_node("ExtraTreesRegressor", n_classes=n_classes, n_samples=n_samples, random_state=random_state)
return WrapperPipeline(estimator_search_space=ext, method=SelectFromModel, space=sfm_sp)

# TODO Add IterativeImputer with more estimator methods
'''
if name == "IterativeImputer_learnedestimators":
iteative_sp = get_configspace(name="IterativeImputer", n_classes=n_classes, n_samples=n_samples, random_state=random_state)
regessor_searchspace = get_search_space(["LinearRegression", ..], n_classes=n_classes, n_samples=n_samples, random_state=random_state)
return WrapperPipeline(estimator_search_space=regressor_searchspace, method=ItartiveImputer, space=iteative_sp)
'''
#these are nodes that have special search spaces which require custom parsing of the hyperparameters
if name == "IterativeImputer":
configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state)
return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=imputers.IterativeImputer_hyperparameter_parser)
if name == "RobustScaler":
configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state)
return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=transformers.robust_scaler_hyperparameter_parser)
Expand Down
77 changes: 74 additions & 3 deletions tpot2/config/imputers.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,80 @@
import sklearn
import sklearn.ensemble
import sklearn.linear_model
import sklearn.neighbors
from ConfigSpace import ConfigurationSpace
from ConfigSpace import ConfigurationSpace, Integer, Float, Categorical, Normal
from ConfigSpace import EqualsCondition


simple_imputer_cs = ConfigurationSpace(
space = {
'strategy' : Categorical('strategy', ['mean','median', 'most_frequent', ]),
'add_indicator' : Categorical('add_indicator', [True, False]),
'strategy' : Categorical('strategy',
['mean','median', 'most_frequent', 'constant']
),
#'add_indicator' : Categorical('add_indicator', [True, False]),
#Removed add_indicator, it appends a mask next to the rest of the data
# and can cause errors. gk
}
)

def get_IterativeImputer_config_space(n_features, random_state):
space = { 'initial_strategy' : Categorical('initial_strategy',
['mean', 'median',
'most_frequent', 'constant']),
'n_nearest_features' : Integer('n_nearest_features',
bounds=(1, n_features)),
'imputation_order' : Categorical('imputation_order',
['ascending', 'descending',
'roman', 'arabic', 'random']),
}
)

estimator = Categorical('estimator', ['Bayesian', 'RFR', 'Ridge', 'KNN'])
sample_posterior = Categorical('sample_posterior', [True, False])
sampling_condition = EqualsCondition(sample_posterior, estimator, 'Bayesian')

if random_state is not None:
#This is required because configspace doesn't allow None as a value
space['random_state'] = random_state

cs = ConfigurationSpace(space=space)
cs.add_hyperparameters([estimator, sample_posterior])
cs.add_conditions([sampling_condition])
return cs

def get_KNNImputer_config_space(n_samples):
space = {
'n_neighbors': Integer('n_neighbors', bounds=(1, max(n_samples,100))),
'weights': Categorical('weights', ['uniform', 'distance'])
}

return ConfigurationSpace(
space=space
)

def IterativeImputer_hyperparameter_parser(params):
est = params['estimator']
match est:
case 'Bayesian':
estimator = sklearn.linear_model.BayesianRidge()
case 'RFR':
estimator = sklearn.ensemble.RandomForestRegressor()
case 'Ridge':
estimator = sklearn.linear_model.Ridge()
case 'KNN':
estimator = sklearn.neighbors.KNeighborsRegressor()

final_params = {
'estimator' : estimator,
'initial_strategy' : params['initial_strategy'],
'n_nearest_features' : params['n_nearest_features'],
'imputation_order' : params['imputation_order'],
}

if 'sample_posterior' in params:
final_params['sample_posterior'] = params['sample_posterior']

if 'random_state' in params:
final_params['random_state'] = params['random_state']

return final_params

0 comments on commit ed95419

Please sign in to comment.