Skip to content

Commit

Permalink
Merge pull request #132 from perib/new_search_space_def
Browse files Browse the repository at this point in the history
New search space def
  • Loading branch information
perib authored May 16, 2024
2 parents 8728fff + 0b1aa09 commit 2a3ee23
Show file tree
Hide file tree
Showing 6 changed files with 112 additions and 30 deletions.
19 changes: 11 additions & 8 deletions tpot2/config/classifiers.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def get_LogisticRegression_ConfigurationSpace(n_samples, n_features, random_stat

dual = n_samples<=n_features

dual = TRUE_SPECIAL_STRING if dual else FALSE_SPECIAL_STRING
dual = FALSE_SPECIAL_STRING

space = {"solver":"saga",
"max_iter":1000,
Expand All @@ -21,6 +21,7 @@ def get_LogisticRegression_ConfigurationSpace(n_samples, n_features, random_stat
penalty = Categorical('penalty', ['l1', 'l2',"elasticnet"], default='l2')
C = Float('C', (0.01, 1e5), log=True)
l1_ratio = Float('l1_ratio', (0.0, 1.0))
class_weight = Categorical('class_weight', [NONE_SPECIAL_STRING, 'balanced'])

l1_ratio_condition = EqualsCondition(l1_ratio, penalty, 'elasticnet')

Expand All @@ -29,7 +30,7 @@ def get_LogisticRegression_ConfigurationSpace(n_samples, n_features, random_stat


cs = ConfigurationSpace(space)
cs.add_hyperparameters([penalty, C, l1_ratio])
cs.add_hyperparameters([penalty, C, l1_ratio, class_weight])
cs.add_conditions([l1_ratio_condition])

return cs
Expand Down Expand Up @@ -84,6 +85,7 @@ def get_DecisionTreeClassifier_ConfigurationSpace(n_featues, random_state):
'min_samples_leaf': Integer("min_samples_leaf", bounds=(1, 20)),
'max_features': Categorical("max_features", [NONE_SPECIAL_STRING, 'sqrt', 'log2']),
'min_weight_fraction_leaf': 0.0,
'class_weight' : Categorical('class_weight', [NONE_SPECIAL_STRING, 'balanced']),
}


Expand All @@ -94,7 +96,7 @@ def get_DecisionTreeClassifier_ConfigurationSpace(n_featues, random_state):
space = space
)


#TODO Does not support predict_proba
def get_LinearSVC_ConfigurationSpace(random_state):
space = {"dual":"auto"}

Expand All @@ -120,12 +122,13 @@ def get_SVC_ConfigurationSpace(random_state):
'max_iter': 3000,
'probability':TRUE_SPECIAL_STRING}

kernel = Categorical("kernel", ['poly', 'rbf', 'sigmoid'])
kernel = Categorical("kernel", ['poly', 'rbf', 'sigmoid', 'linear'])
C = Float('C', (0.01, 1e5), log=True)
degree = Integer("degree", bounds=(1, 5))
gamma = Float("gamma", bounds=(1e-5, 8), log=True)
shrinking = Categorical("shrinking", [True, False])
coef0 = Float("coef0", bounds=(-1, 1))
class_weight = Categorical('class_weight', [NONE_SPECIAL_STRING, 'balanced'])

degree_condition = EqualsCondition(degree, kernel, 'poly')
gamma_condition = InCondition(gamma, kernel, ['rbf', 'poly'])
Expand All @@ -136,7 +139,7 @@ def get_SVC_ConfigurationSpace(random_state):


cs = ConfigurationSpace(space)
cs.add_hyperparameters([kernel, C, coef0, degree, gamma, shrinking])
cs.add_hyperparameters([kernel, C, coef0, degree, gamma, shrinking, class_weight])
cs.add_conditions([degree_condition, gamma_condition, coef0_condition])

return cs
Expand Down Expand Up @@ -187,12 +190,11 @@ def get_XGBClassifier_ConfigurationSpace(random_state,):
def get_LGBMClassifier_ConfigurationSpace(random_state,):

space = {
'objective': 'binary',
'metric': 'binary_logloss',
'boosting_type': Categorical("boosting_type", ['gbdt', 'dart', 'goss']),
'num_leaves': Integer("num_leaves", bounds=(2, 256)),
'max_depth': Integer("max_depth", bounds=(1, 10)),
'n_estimators': Integer("n_estimators", bounds=(10, 100)),
'class_weight': Categorical("class_weight", [NONE_SPECIAL_STRING, 'balanced']),
'verbose':-1,
'n_jobs': 1,
}
Expand All @@ -213,6 +215,7 @@ def get_ExtraTreesClassifier_ConfigurationSpace(random_state):
'min_samples_split': Integer("min_samples_split", bounds=(2, 20)),
'min_samples_leaf': Integer("min_samples_leaf", bounds=(1, 20)),
'bootstrap': Categorical("bootstrap", [True, False]),
'class_weight': Categorical("class_weight", [NONE_SPECIAL_STRING, 'balanced']),
'n_jobs': 1,
}

Expand All @@ -228,7 +231,7 @@ def get_ExtraTreesClassifier_ConfigurationSpace(random_state):
def get_SGDClassifier_ConfigurationSpace(random_state):

space = {
'loss': Categorical("loss", ['squared_hinge', 'modified_huber']), #don't include hinge because we have LinearSVC, don't include log because we have LogisticRegression
'loss': Categorical("loss", ['modified_huber']), #don't include hinge because we have LinearSVC, don't include log because we have LogisticRegression. TODO 'squared_hinge'? doesn't support predict proba
'penalty': 'elasticnet',
'alpha': Float("alpha", bounds=(1e-5, 0.01), log=True),
'l1_ratio': Float("l1_ratio", bounds=(0.0, 1.0)),
Expand Down
4 changes: 2 additions & 2 deletions tpot2/config/get_configspace.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,8 +113,8 @@
"selectors": ["SelectFwe", "SelectPercentile", "VarianceThreshold",],
"selectors_classification": ["SelectFwe", "SelectPercentile", "VarianceThreshold", "RFE_classification", "SelectFromModel_classification"],
"selectors_regression": ["SelectFwe", "SelectPercentile", "VarianceThreshold", "RFE_regression", "SelectFromModel_regression"],
"classifiers" : ["LGBMRegressor", "BaggingClassifier", 'AdaBoostClassifier', 'BernoulliNB', 'DecisionTreeClassifier', 'ExtraTreesClassifier', 'GaussianNB', 'HistGradientBoostingClassifier', 'KNeighborsClassifier','LinearDiscriminantAnalysis', 'LogisticRegression', "LinearSVC", "SVC", 'MLPClassifier', 'MultinomialNB', "QuadraticDiscriminantAnalysis", 'RandomForestClassifier', 'SGDClassifier', 'XGBClassifier'],
"regressors" : ['AdaBoostRegressor', "ARDRegression", 'DecisionTreeRegressor', 'ExtraTreesRegressor', 'HistGradientBoostingRegressor', 'KNeighborsRegressor', 'LinearSVR', "MLPRegressor", 'RandomForestRegressor', 'SGDRegressor', 'SVR', 'XGBRegressor'],
"classifiers" : ["LGBMClassifier", "BaggingClassifier", 'AdaBoostClassifier', 'BernoulliNB', 'DecisionTreeClassifier', 'ExtraTreesClassifier', 'GaussianNB', 'HistGradientBoostingClassifier', 'KNeighborsClassifier','LinearDiscriminantAnalysis', 'LogisticRegression', "LinearSVC", "SVC", 'MLPClassifier', 'MultinomialNB', "QuadraticDiscriminantAnalysis", 'RandomForestClassifier', 'SGDClassifier', 'XGBClassifier'],
"regressors" : ["LGBMRegressor", 'AdaBoostRegressor', "ARDRegression", 'DecisionTreeRegressor', 'ExtraTreesRegressor', 'HistGradientBoostingRegressor', 'KNeighborsRegressor', 'LinearSVR', "MLPRegressor", 'RandomForestRegressor', 'SGDRegressor', 'SVR', 'XGBRegressor'],


"transformers": ["Binarizer", "PCA", "ZeroCount", "ColumnOneHotEncoder", "FastICA", "FeatureAgglomeration", "Nystroem", "RBFSampler", "QuantileTransformer", "PowerTransformer"],
Expand Down
56 changes: 51 additions & 5 deletions tpot2/search_spaces/pipelines/dynamic_linear.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,16 +64,62 @@ def _mutate_step(self, rng=None):


def _crossover(self, other, rng=None):
#swap a random step in the pipeline with the corresponding step in the other pipeline

rng = np.random.default_rng()
cx_funcs = [self._crossover_swap_random_steps, self._crossover_inner_step]

rng.shuffle(cx_funcs)
for cx_func in cx_funcs:
if cx_func(other, rng):
return True

return False

def _crossover_swap_random_steps(self, other, rng):
rng = np.random.default_rng()

if len(self.pipeline) < 2 or len(other.pipeline) < 2:
return False
max_steps = int(min(len(self.pipeline), len(other.pipeline))/2)
max_steps = max(max_steps, 1)

if max_steps == 1:
n_steps_to_swap = 1
else:
n_steps_to_swap = rng.integers(1, max_steps)

idx = rng.integers(1,len(self.pipeline))
idx2 = rng.integers(1,len(other.pipeline))
self.pipeline[idx:] = copy.deepcopy(other.pipeline[idx2:])
other_indexes_to_take = rng.choice(len(other.pipeline), n_steps_to_swap, replace=False)
self_indexes_to_replace = rng.choice(len(self.pipeline), n_steps_to_swap, replace=False)

# self.pipeline[self_indexes_to_replace], other.pipeline[other_indexes_to_take] = other.pipeline[other_indexes_to_take], self.pipeline[self_indexes_to_replace]

for self_idx, other_idx in zip(self_indexes_to_replace, other_indexes_to_take):
self.pipeline[self_idx], other.pipeline[other_idx] = other.pipeline[other_idx], self.pipeline[self_idx]

return True

def _crossover_swap_step(self, other, rng):
if len(self.pipeline) != len(other.pipeline):
return False

if len(self.pipeline) < 2:
return False

rng = np.random.default_rng()
idx = rng.integers(1,len(self.pipeline))

self.pipeline[idx], other.pipeline[idx] = other.pipeline[idx], self.pipeline[idx]
return True

def _crossover_inner_step(self, other, rng):
rng = np.random.default_rng()

crossover_success = False
for idx in range(len(self.pipeline)):
if rng.random() < 0.5:
if self.pipeline[idx].crossover(other.pipeline[idx], rng):
crossover_success = True

return crossover_success

def export_pipeline(self, **graph_pipeline_args):
return [step.export_pipeline(**graph_pipeline_args) for step in self.pipeline]
Expand Down
11 changes: 9 additions & 2 deletions tpot2/search_spaces/pipelines/dynamicunion.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,12 +107,19 @@ def _crossover_swap_random_steps(self, other, rng):
max_steps = int(min(len(self.pipeline), len(other.pipeline))/2)
max_steps = max(max_steps, 1)

n_steps_to_swap = rng.integers(1, max_steps)
if max_steps == 1:
n_steps_to_swap = 1
else:
n_steps_to_swap = rng.integers(1, max_steps)

other_indexes_to_take = rng.choice(len(other.pipeline), n_steps_to_swap, replace=False)
self_indexes_to_replace = rng.choice(len(self.pipeline), n_steps_to_swap, replace=False)

self.pipeline[self_indexes_to_replace], other.pipeline[other_indexes_to_take] = other.pipeline[other_indexes_to_take], self.pipeline[self_indexes_to_replace]
# self.pipeline[self_indexes_to_replace], other.pipeline[other_indexes_to_take] = other.pipeline[other_indexes_to_take], self.pipeline[self_indexes_to_replace]

for self_idx, other_idx in zip(self_indexes_to_replace, other_indexes_to_take):
self.pipeline[self_idx], other.pipeline[other_idx] = other.pipeline[other_idx], self.pipeline[self_idx]

return True


Expand Down
32 changes: 23 additions & 9 deletions tpot2/search_spaces/pipelines/sequential.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,6 @@ def _crossover(self, other, rng=None):
#swap a random step in the pipeline with the corresponding step in the other pipeline
if len(self.pipeline) != len(other.pipeline):
return False

if len(self.pipeline) < 2:
return False

rng = np.random.default_rng()
cx_funcs = [self._crossover_swap_random_steps, self._crossover_swap_segment, self._crossover_inner_step]
Expand All @@ -51,8 +48,6 @@ def _crossover_swap_step(self, other, rng):
if len(self.pipeline) != len(other.pipeline):
return False

if len(self.pipeline) < 2:
return False

rng = np.random.default_rng()
idx = rng.integers(1,len(self.pipeline))
Expand All @@ -61,12 +56,29 @@ def _crossover_swap_step(self, other, rng):
return True

def _crossover_swap_random_steps(self, other, rng):

if len(self.pipeline) != len(other.pipeline):
return False

if len(self.pipeline) < 2:
return False

rng = np.random.default_rng()
#selet steps idxs with probability 0.5
idxs = rng.random(len(self.pipeline)) < 0.5
#swap steps
self.pipeline[idxs], other.pipeline[idxs] = other.pipeline[idxs], self.pipeline[idxs]

max_steps = int(min(len(self.pipeline), len(other.pipeline))/2)
max_steps = max(max_steps, 1)

if max_steps == 1:
n_steps_to_swap = 1
else:
n_steps_to_swap = rng.integers(1, max_steps)

indexes_to_swap = rng.choice(len(other.pipeline), n_steps_to_swap, replace=False)

for idx in indexes_to_swap:
self.pipeline[idx], other.pipeline[idx] = other.pipeline[idx], self.pipeline[idx]


return True

def _crossover_swap_segment(self, other, rng):
Expand Down Expand Up @@ -105,6 +117,8 @@ def unique_id(self):
l = [step.unique_id() for step in self.pipeline]
l = ["SequentialPipeline"] + l
return TupleIndex(tuple(l))




class SequentialPipeline(SklearnIndividualGenerator):
Expand Down
20 changes: 16 additions & 4 deletions tpot2/search_spaces/pipelines/union.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,23 @@ def _crossover_swap_step(self, other, rng):

def _crossover_swap_random_steps(self, other, rng):
rng = np.random.default_rng()
#selet steps idxs with probability 0.5
idxs = rng.random(len(self.pipeline)) < 0.5
#swap steps
self.pipeline[idxs], other.pipeline[idxs] = other.pipeline[idxs], self.pipeline[idxs]

max_steps = int(min(len(self.pipeline), len(other.pipeline))/2)
max_steps = max(max_steps, 1)

if max_steps == 1:
n_steps_to_swap = 1
else:
n_steps_to_swap = rng.integers(1, max_steps)

other_indexes_to_take = rng.choice(len(other.pipeline), n_steps_to_swap, replace=False)
self_indexes_to_replace = rng.choice(len(self.pipeline), n_steps_to_swap, replace=False)

# self.pipeline[self_indexes_to_replace], other.pipeline[other_indexes_to_take] = other.pipeline[other_indexes_to_take], self.pipeline[self_indexes_to_replace]

for self_idx, other_idx in zip(self_indexes_to_replace, other_indexes_to_take):
self.pipeline[self_idx], other.pipeline[other_idx] = other.pipeline[other_idx], self.pipeline[self_idx]

return True

def _crossover_inner_step(self, other, rng):
Expand Down

0 comments on commit 2a3ee23

Please sign in to comment.