diff --git a/tpot2/config/classifiers.py b/tpot2/config/classifiers.py index cad90a90..f7311a39 100644 --- a/tpot2/config/classifiers.py +++ b/tpot2/config/classifiers.py @@ -10,7 +10,7 @@ def get_LogisticRegression_ConfigurationSpace(n_samples, n_features, random_stat dual = n_samples<=n_features - dual = TRUE_SPECIAL_STRING if dual else FALSE_SPECIAL_STRING + dual = FALSE_SPECIAL_STRING space = {"solver":"saga", "max_iter":1000, @@ -21,6 +21,7 @@ def get_LogisticRegression_ConfigurationSpace(n_samples, n_features, random_stat penalty = Categorical('penalty', ['l1', 'l2',"elasticnet"], default='l2') C = Float('C', (0.01, 1e5), log=True) l1_ratio = Float('l1_ratio', (0.0, 1.0)) + class_weight = Categorical('class_weight', [NONE_SPECIAL_STRING, 'balanced']) l1_ratio_condition = EqualsCondition(l1_ratio, penalty, 'elasticnet') @@ -29,7 +30,7 @@ def get_LogisticRegression_ConfigurationSpace(n_samples, n_features, random_stat cs = ConfigurationSpace(space) - cs.add_hyperparameters([penalty, C, l1_ratio]) + cs.add_hyperparameters([penalty, C, l1_ratio, class_weight]) cs.add_conditions([l1_ratio_condition]) return cs @@ -84,6 +85,7 @@ def get_DecisionTreeClassifier_ConfigurationSpace(n_featues, random_state): 'min_samples_leaf': Integer("min_samples_leaf", bounds=(1, 20)), 'max_features': Categorical("max_features", [NONE_SPECIAL_STRING, 'sqrt', 'log2']), 'min_weight_fraction_leaf': 0.0, + 'class_weight' : Categorical('class_weight', [NONE_SPECIAL_STRING, 'balanced']), } @@ -94,7 +96,7 @@ def get_DecisionTreeClassifier_ConfigurationSpace(n_featues, random_state): space = space ) - +#TODO Does not support predict_proba def get_LinearSVC_ConfigurationSpace(random_state): space = {"dual":"auto"} @@ -120,12 +122,13 @@ def get_SVC_ConfigurationSpace(random_state): 'max_iter': 3000, 'probability':TRUE_SPECIAL_STRING} - kernel = Categorical("kernel", ['poly', 'rbf', 'sigmoid']) + kernel = Categorical("kernel", ['poly', 'rbf', 'sigmoid', 'linear']) C = Float('C', (0.01, 1e5), log=True) degree = Integer("degree", bounds=(1, 5)) gamma = Float("gamma", bounds=(1e-5, 8), log=True) shrinking = Categorical("shrinking", [True, False]) coef0 = Float("coef0", bounds=(-1, 1)) + class_weight = Categorical('class_weight', [NONE_SPECIAL_STRING, 'balanced']) degree_condition = EqualsCondition(degree, kernel, 'poly') gamma_condition = InCondition(gamma, kernel, ['rbf', 'poly']) @@ -136,7 +139,7 @@ def get_SVC_ConfigurationSpace(random_state): cs = ConfigurationSpace(space) - cs.add_hyperparameters([kernel, C, coef0, degree, gamma, shrinking]) + cs.add_hyperparameters([kernel, C, coef0, degree, gamma, shrinking, class_weight]) cs.add_conditions([degree_condition, gamma_condition, coef0_condition]) return cs @@ -187,12 +190,11 @@ def get_XGBClassifier_ConfigurationSpace(random_state,): def get_LGBMClassifier_ConfigurationSpace(random_state,): space = { - 'objective': 'binary', - 'metric': 'binary_logloss', 'boosting_type': Categorical("boosting_type", ['gbdt', 'dart', 'goss']), 'num_leaves': Integer("num_leaves", bounds=(2, 256)), 'max_depth': Integer("max_depth", bounds=(1, 10)), 'n_estimators': Integer("n_estimators", bounds=(10, 100)), + 'class_weight': Categorical("class_weight", [NONE_SPECIAL_STRING, 'balanced']), 'verbose':-1, 'n_jobs': 1, } @@ -213,6 +215,7 @@ def get_ExtraTreesClassifier_ConfigurationSpace(random_state): 'min_samples_split': Integer("min_samples_split", bounds=(2, 20)), 'min_samples_leaf': Integer("min_samples_leaf", bounds=(1, 20)), 'bootstrap': Categorical("bootstrap", [True, False]), + 'class_weight': Categorical("class_weight", [NONE_SPECIAL_STRING, 'balanced']), 'n_jobs': 1, } @@ -228,7 +231,7 @@ def get_ExtraTreesClassifier_ConfigurationSpace(random_state): def get_SGDClassifier_ConfigurationSpace(random_state): space = { - 'loss': Categorical("loss", ['squared_hinge', 'modified_huber']), #don't include hinge because we have LinearSVC, don't include log because we have LogisticRegression + 'loss': Categorical("loss", ['modified_huber']), #don't include hinge because we have LinearSVC, don't include log because we have LogisticRegression. TODO 'squared_hinge'? doesn't support predict proba 'penalty': 'elasticnet', 'alpha': Float("alpha", bounds=(1e-5, 0.01), log=True), 'l1_ratio': Float("l1_ratio", bounds=(0.0, 1.0)), diff --git a/tpot2/config/get_configspace.py b/tpot2/config/get_configspace.py index 05187067..70d2317d 100644 --- a/tpot2/config/get_configspace.py +++ b/tpot2/config/get_configspace.py @@ -113,8 +113,8 @@ "selectors": ["SelectFwe", "SelectPercentile", "VarianceThreshold",], "selectors_classification": ["SelectFwe", "SelectPercentile", "VarianceThreshold", "RFE_classification", "SelectFromModel_classification"], "selectors_regression": ["SelectFwe", "SelectPercentile", "VarianceThreshold", "RFE_regression", "SelectFromModel_regression"], - "classifiers" : ["LGBMRegressor", "BaggingClassifier", 'AdaBoostClassifier', 'BernoulliNB', 'DecisionTreeClassifier', 'ExtraTreesClassifier', 'GaussianNB', 'HistGradientBoostingClassifier', 'KNeighborsClassifier','LinearDiscriminantAnalysis', 'LogisticRegression', "LinearSVC", "SVC", 'MLPClassifier', 'MultinomialNB', "QuadraticDiscriminantAnalysis", 'RandomForestClassifier', 'SGDClassifier', 'XGBClassifier'], - "regressors" : ['AdaBoostRegressor', "ARDRegression", 'DecisionTreeRegressor', 'ExtraTreesRegressor', 'HistGradientBoostingRegressor', 'KNeighborsRegressor', 'LinearSVR', "MLPRegressor", 'RandomForestRegressor', 'SGDRegressor', 'SVR', 'XGBRegressor'], + "classifiers" : ["LGBMClassifier", "BaggingClassifier", 'AdaBoostClassifier', 'BernoulliNB', 'DecisionTreeClassifier', 'ExtraTreesClassifier', 'GaussianNB', 'HistGradientBoostingClassifier', 'KNeighborsClassifier','LinearDiscriminantAnalysis', 'LogisticRegression', "LinearSVC", "SVC", 'MLPClassifier', 'MultinomialNB', "QuadraticDiscriminantAnalysis", 'RandomForestClassifier', 'SGDClassifier', 'XGBClassifier'], + "regressors" : ["LGBMRegressor", 'AdaBoostRegressor', "ARDRegression", 'DecisionTreeRegressor', 'ExtraTreesRegressor', 'HistGradientBoostingRegressor', 'KNeighborsRegressor', 'LinearSVR', "MLPRegressor", 'RandomForestRegressor', 'SGDRegressor', 'SVR', 'XGBRegressor'], "transformers": ["Binarizer", "PCA", "ZeroCount", "ColumnOneHotEncoder", "FastICA", "FeatureAgglomeration", "Nystroem", "RBFSampler", "QuantileTransformer", "PowerTransformer"], diff --git a/tpot2/search_spaces/pipelines/dynamic_linear.py b/tpot2/search_spaces/pipelines/dynamic_linear.py index 33c1f670..f657df51 100644 --- a/tpot2/search_spaces/pipelines/dynamic_linear.py +++ b/tpot2/search_spaces/pipelines/dynamic_linear.py @@ -64,16 +64,62 @@ def _mutate_step(self, rng=None): def _crossover(self, other, rng=None): + #swap a random step in the pipeline with the corresponding step in the other pipeline + + rng = np.random.default_rng() + cx_funcs = [self._crossover_swap_random_steps, self._crossover_inner_step] + + rng.shuffle(cx_funcs) + for cx_func in cx_funcs: + if cx_func(other, rng): + return True + + return False + + def _crossover_swap_random_steps(self, other, rng): rng = np.random.default_rng() - if len(self.pipeline) < 2 or len(other.pipeline) < 2: - return False + max_steps = int(min(len(self.pipeline), len(other.pipeline))/2) + max_steps = max(max_steps, 1) + + if max_steps == 1: + n_steps_to_swap = 1 + else: + n_steps_to_swap = rng.integers(1, max_steps) - idx = rng.integers(1,len(self.pipeline)) - idx2 = rng.integers(1,len(other.pipeline)) - self.pipeline[idx:] = copy.deepcopy(other.pipeline[idx2:]) + other_indexes_to_take = rng.choice(len(other.pipeline), n_steps_to_swap, replace=False) + self_indexes_to_replace = rng.choice(len(self.pipeline), n_steps_to_swap, replace=False) + + # self.pipeline[self_indexes_to_replace], other.pipeline[other_indexes_to_take] = other.pipeline[other_indexes_to_take], self.pipeline[self_indexes_to_replace] + + for self_idx, other_idx in zip(self_indexes_to_replace, other_indexes_to_take): + self.pipeline[self_idx], other.pipeline[other_idx] = other.pipeline[other_idx], self.pipeline[self_idx] + + return True + + def _crossover_swap_step(self, other, rng): + if len(self.pipeline) != len(other.pipeline): + return False + if len(self.pipeline) < 2: + return False + + rng = np.random.default_rng() + idx = rng.integers(1,len(self.pipeline)) + + self.pipeline[idx], other.pipeline[idx] = other.pipeline[idx], self.pipeline[idx] return True + + def _crossover_inner_step(self, other, rng): + rng = np.random.default_rng() + + crossover_success = False + for idx in range(len(self.pipeline)): + if rng.random() < 0.5: + if self.pipeline[idx].crossover(other.pipeline[idx], rng): + crossover_success = True + + return crossover_success def export_pipeline(self, **graph_pipeline_args): return [step.export_pipeline(**graph_pipeline_args) for step in self.pipeline] diff --git a/tpot2/search_spaces/pipelines/dynamicunion.py b/tpot2/search_spaces/pipelines/dynamicunion.py index 72b9cb7c..0231ab43 100644 --- a/tpot2/search_spaces/pipelines/dynamicunion.py +++ b/tpot2/search_spaces/pipelines/dynamicunion.py @@ -107,12 +107,19 @@ def _crossover_swap_random_steps(self, other, rng): max_steps = int(min(len(self.pipeline), len(other.pipeline))/2) max_steps = max(max_steps, 1) - n_steps_to_swap = rng.integers(1, max_steps) + if max_steps == 1: + n_steps_to_swap = 1 + else: + n_steps_to_swap = rng.integers(1, max_steps) other_indexes_to_take = rng.choice(len(other.pipeline), n_steps_to_swap, replace=False) self_indexes_to_replace = rng.choice(len(self.pipeline), n_steps_to_swap, replace=False) - self.pipeline[self_indexes_to_replace], other.pipeline[other_indexes_to_take] = other.pipeline[other_indexes_to_take], self.pipeline[self_indexes_to_replace] + # self.pipeline[self_indexes_to_replace], other.pipeline[other_indexes_to_take] = other.pipeline[other_indexes_to_take], self.pipeline[self_indexes_to_replace] + + for self_idx, other_idx in zip(self_indexes_to_replace, other_indexes_to_take): + self.pipeline[self_idx], other.pipeline[other_idx] = other.pipeline[other_idx], self.pipeline[self_idx] + return True diff --git a/tpot2/search_spaces/pipelines/sequential.py b/tpot2/search_spaces/pipelines/sequential.py index 8667cd5f..26b6249b 100644 --- a/tpot2/search_spaces/pipelines/sequential.py +++ b/tpot2/search_spaces/pipelines/sequential.py @@ -33,9 +33,6 @@ def _crossover(self, other, rng=None): #swap a random step in the pipeline with the corresponding step in the other pipeline if len(self.pipeline) != len(other.pipeline): return False - - if len(self.pipeline) < 2: - return False rng = np.random.default_rng() cx_funcs = [self._crossover_swap_random_steps, self._crossover_swap_segment, self._crossover_inner_step] @@ -51,8 +48,6 @@ def _crossover_swap_step(self, other, rng): if len(self.pipeline) != len(other.pipeline): return False - if len(self.pipeline) < 2: - return False rng = np.random.default_rng() idx = rng.integers(1,len(self.pipeline)) @@ -61,12 +56,29 @@ def _crossover_swap_step(self, other, rng): return True def _crossover_swap_random_steps(self, other, rng): + + if len(self.pipeline) != len(other.pipeline): + return False + + if len(self.pipeline) < 2: + return False + rng = np.random.default_rng() - #selet steps idxs with probability 0.5 - idxs = rng.random(len(self.pipeline)) < 0.5 - #swap steps - self.pipeline[idxs], other.pipeline[idxs] = other.pipeline[idxs], self.pipeline[idxs] + max_steps = int(min(len(self.pipeline), len(other.pipeline))/2) + max_steps = max(max_steps, 1) + + if max_steps == 1: + n_steps_to_swap = 1 + else: + n_steps_to_swap = rng.integers(1, max_steps) + + indexes_to_swap = rng.choice(len(other.pipeline), n_steps_to_swap, replace=False) + + for idx in indexes_to_swap: + self.pipeline[idx], other.pipeline[idx] = other.pipeline[idx], self.pipeline[idx] + + return True def _crossover_swap_segment(self, other, rng): @@ -105,6 +117,8 @@ def unique_id(self): l = [step.unique_id() for step in self.pipeline] l = ["SequentialPipeline"] + l return TupleIndex(tuple(l)) + + class SequentialPipeline(SklearnIndividualGenerator): diff --git a/tpot2/search_spaces/pipelines/union.py b/tpot2/search_spaces/pipelines/union.py index c95408fe..43eaa395 100644 --- a/tpot2/search_spaces/pipelines/union.py +++ b/tpot2/search_spaces/pipelines/union.py @@ -51,11 +51,23 @@ def _crossover_swap_step(self, other, rng): def _crossover_swap_random_steps(self, other, rng): rng = np.random.default_rng() - #selet steps idxs with probability 0.5 - idxs = rng.random(len(self.pipeline)) < 0.5 - #swap steps - self.pipeline[idxs], other.pipeline[idxs] = other.pipeline[idxs], self.pipeline[idxs] + max_steps = int(min(len(self.pipeline), len(other.pipeline))/2) + max_steps = max(max_steps, 1) + + if max_steps == 1: + n_steps_to_swap = 1 + else: + n_steps_to_swap = rng.integers(1, max_steps) + + other_indexes_to_take = rng.choice(len(other.pipeline), n_steps_to_swap, replace=False) + self_indexes_to_replace = rng.choice(len(self.pipeline), n_steps_to_swap, replace=False) + + # self.pipeline[self_indexes_to_replace], other.pipeline[other_indexes_to_take] = other.pipeline[other_indexes_to_take], self.pipeline[self_indexes_to_replace] + + for self_idx, other_idx in zip(self_indexes_to_replace, other_indexes_to_take): + self.pipeline[self_idx], other.pipeline[other_idx] = other.pipeline[other_idx], self.pipeline[self_idx] + return True def _crossover_inner_step(self, other, rng):