diff --git a/.gitignore b/.gitignore index bff01e19..aa4eabb7 100644 --- a/.gitignore +++ b/.gitignore @@ -13,4 +13,5 @@ dask-worker-space/ target/ .venv/ build/* -*.egg \ No newline at end of file +*.egg +*.coverage* \ No newline at end of file diff --git a/tpot2/config/classifiers_sklearnex.py b/tpot2/config/classifiers_sklearnex.py index ad581898..e16d2c03 100644 --- a/tpot2/config/classifiers_sklearnex.py +++ b/tpot2/config/classifiers_sklearnex.py @@ -1,6 +1,6 @@ from ConfigSpace import ConfigurationSpace from ConfigSpace import ConfigurationSpace, Integer, Float, Categorical, Normal - +from ..search_spaces.nodes.estimator_node import NONE_SPECIAL_STRING, TRUE_SPECIAL_STRING, FALSE_SPECIAL_STRING def get_RandomForestClassifier_ConfigurationSpace(random_state): space = { @@ -66,10 +66,9 @@ def get_NuSVC_ConfigurationSpace(random_state): space = { 'nu': Float("nu", bounds=(0.05, 1.0)), 'kernel': Categorical("kernel", ['poly', 'rbf', 'linear', 'sigmoid']), - 'C': Float("C", bounds=(1e-4, 25), log=True), + #'C': Float("C", bounds=(1e-4, 25), log=True), 'degree': Integer("degree", bounds=(1, 4)), - #TODO work around for None value? - #'class_weight': Categorical("class_weight", [None, 'balanced']), + 'class_weight': Categorical("class_weight", [NONE_SPECIAL_STRING, 'balanced']), 'max_iter': 3000, 'tol': 0.005, 'probability': Categorical("probability", [True]), # configspace doesn't allow bools as a default value? but does allow them as a value inside a Categorical diff --git a/tpot2/config/get_configspace.py b/tpot2/config/get_configspace.py index 19dfb531..15b889ac 100644 --- a/tpot2/config/get_configspace.py +++ b/tpot2/config/get_configspace.py @@ -2,6 +2,7 @@ import sys import numpy as np import warnings +import importlib.util from ..search_spaces.nodes import EstimatorNode from ..search_spaces.pipelines import ChoicePipeline, WrapperPipeline @@ -27,7 +28,7 @@ from sklearn.linear_model import SGDClassifier from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier -from sklearn.neural_network import MLPClassifier +from sklearn.neural_network import MLPClassifier, MLPRegressor from sklearn.tree import DecisionTreeClassifier from xgboost import XGBClassifier from sklearn.neighbors import KNeighborsClassifier @@ -101,51 +102,64 @@ from tpot2.builtin_modules import AddTransformer, mul_neg_1_Transformer, MulTransformer, SafeReciprocalTransformer, EQTransformer, NETransformer, GETransformer, GTTransformer, LETransformer, LTTransformer, MinTransformer, MaxTransformer, ZeroTransformer, OneTransformer, NTransformer +from tpot2.builtin_modules.genetic_encoders import DominantEncoder, RecessiveEncoder, HeterosisEncoder, UnderDominanceEncoder, OverDominanceEncoder + #MDR all_methods = [SGDClassifier, RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier, MLPClassifier, DecisionTreeClassifier, XGBClassifier, KNeighborsClassifier, SVC, LogisticRegression, LGBMClassifier, LinearSVC, GaussianNB, BernoulliNB, MultinomialNB, ExtraTreesRegressor, RandomForestRegressor, GradientBoostingRegressor, BaggingRegressor, DecisionTreeRegressor, KNeighborsRegressor, XGBRegressor, ZeroCount, OneHotEncoder, ColumnOneHotEncoder, Binarizer, FastICA, FeatureAgglomeration, MaxAbsScaler, MinMaxScaler, Normalizer, Nystroem, PCA, PolynomialFeatures, RBFSampler, RobustScaler, StandardScaler, SelectFwe, SelectPercentile, VarianceThreshold, SGDRegressor, Ridge, Lasso, ElasticNet, Lars, LassoLars, LassoLarsCV, RidgeCV, SVR, LinearSVR, AdaBoostRegressor, GradientBoostingRegressor, RandomForestRegressor, BaggingRegressor, ExtraTreesRegressor, DecisionTreeRegressor, KNeighborsRegressor, ElasticNetCV, - AdaBoostClassifier, + AdaBoostClassifier,MLPRegressor, GaussianProcessRegressor, HistGradientBoostingClassifier, HistGradientBoostingRegressor, AddTransformer, mul_neg_1_Transformer, MulTransformer, SafeReciprocalTransformer, EQTransformer, NETransformer, GETransformer, GTTransformer, LETransformer, LTTransformer, MinTransformer, MaxTransformer, ZeroTransformer, OneTransformer, NTransformer, PowerTransformer, QuantileTransformer,ARDRegression, QuadraticDiscriminantAnalysis, PassiveAggressiveClassifier, LinearDiscriminantAnalysis, + DominantEncoder, RecessiveEncoder, HeterosisEncoder, UnderDominanceEncoder, OverDominanceEncoder, ] #if mdr is installed -if 'mdr' in sys.modules: +if importlib.util.find_spec('mdr') is not None: from mdr import MDR, ContinuousMDR all_methods.append(MDR) all_methods.append(ContinuousMDR) -if 'skrebate' in sys.modules: +if importlib.util.find_spec('skrebate') is not None: from skrebate import ReliefF, SURF, SURFstar, MultiSURF all_methods.append(ReliefF) all_methods.append(SURF) all_methods.append(SURFstar) all_methods.append(MultiSURF) -if 'sklearnex' in sys.modules: +STRING_TO_CLASS = { + t.__name__: t for t in all_methods +} + +if importlib.util.find_spec('sklearnex') is not None: import sklearnex + import sklearnex.linear_model + import sklearnex.svm + import sklearnex.ensemble + import sklearnex.neighbors - all_methods.append(sklearnex.linear_model.LinearRegression) - all_methods.append(sklearnex.linear_model.Ridge) - all_methods.append(sklearnex.linear_model.Lasso) - all_methods.append(sklearnex.linear_model.ElasticNet) - all_methods.append(sklearnex.svm.SVR) - all_methods.append(sklearnex.svm.NuSVR) - all_methods.append(sklearnex.ensemble.RandomForestRegressor) - all_methods.append(sklearnex.neighbors.KNeighborsRegressor) - all_methods.append(sklearnex.ensemble.RandomForestClassifier) - all_methods.append(sklearnex.neighbors.KNeighborsClassifier) - all_methods.append(sklearnex.svm.SVC) - all_methods.append(sklearnex.svm.NuSVC) - all_methods.append(sklearnex.linear_model.LogisticRegression) + + sklearnex_methods = [] + + sklearnex_methods.append(sklearnex.linear_model.LinearRegression) + sklearnex_methods.append(sklearnex.linear_model.Ridge) + sklearnex_methods.append(sklearnex.linear_model.Lasso) + sklearnex_methods.append(sklearnex.linear_model.ElasticNet) + sklearnex_methods.append(sklearnex.svm.SVR) + sklearnex_methods.append(sklearnex.svm.NuSVR) + sklearnex_methods.append(sklearnex.ensemble.RandomForestRegressor) + sklearnex_methods.append(sklearnex.neighbors.KNeighborsRegressor) + sklearnex_methods.append(sklearnex.ensemble.RandomForestClassifier) + sklearnex_methods.append(sklearnex.neighbors.KNeighborsClassifier) + sklearnex_methods.append(sklearnex.svm.SVC) + sklearnex_methods.append(sklearnex.svm.NuSVC) + sklearnex_methods.append(sklearnex.linear_model.LogisticRegression) + + STRING_TO_CLASS.update({f"{t.__name__}_sklearnex": t for t in sklearnex_methods}) -STRING_TO_CLASS = { - t.__name__: t for t in all_methods -} @@ -439,15 +453,6 @@ def get_search_space(name, n_classes=3, n_samples=100, n_features=100, random_st if name in GROUPNAMES: name_list = GROUPNAMES[name] return get_search_space(name_list, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state) - - if name is None: - warnings.warn(f"name is None") - return None - - if name not in STRING_TO_CLASS: - print("FOOO ", name) - warnings.warn(f"Could not find class for {name}") - return None return get_node(name, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state) @@ -458,21 +463,21 @@ def get_node(name, n_classes=3, n_samples=100, n_features=100, random_state=None # TODO Add AdaBoostRegressor, AdaBoostClassifier as wrappers? wrap a decision tree with different params? # TODO add other meta-estimators? if name == "RFE_classification": - rfe_sp = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state) + rfe_sp = get_configspace(name="RFE", n_classes=n_classes, n_samples=n_samples, random_state=random_state) ext = get_node("ExtraTreesClassifier", n_classes=n_classes, n_samples=n_samples, random_state=random_state) - return WrapperPipeline(nodegen=ext, method=RFE, configspace=rfe_sp) + return WrapperPipeline(nodegen=ext, method=RFE, space=rfe_sp) if name == "RFE_regression": - rfe_sp = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state) + rfe_sp = get_configspace(name="RFE", n_classes=n_classes, n_samples=n_samples, random_state=random_state) ext = get_node("ExtraTreesRegressor", n_classes=n_classes, n_samples=n_samples, random_state=random_state) - return WrapperPipeline(nodegen=ext, method=RFE, configspace=rfe_sp) + return WrapperPipeline(nodegen=ext, method=RFE, space=rfe_sp) if name == "SelectFromModel_classification": - sfm_sp = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state) + sfm_sp = get_configspace(name="SelectFromModel", n_classes=n_classes, n_samples=n_samples, random_state=random_state) ext = get_node("ExtraTreesClassifier", n_classes=n_classes, n_samples=n_samples, random_state=random_state) - return WrapperPipeline(nodegen=ext, method=SelectFromModel, configspace=sfm_sp) + return WrapperPipeline(nodegen=ext, method=SelectFromModel, space=sfm_sp) if name == "SelectFromModel_regression": - sfm_sp = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state) + sfm_sp = get_configspace(name="SelectFromModel", n_classes=n_classes, n_samples=n_samples, random_state=random_state) ext = get_node("ExtraTreesRegressor", n_classes=n_classes, n_samples=n_samples, random_state=random_state) - return WrapperPipeline(nodegen=ext, method=SelectFromModel, configspace=sfm_sp) + return WrapperPipeline(nodegen=ext, method=SelectFromModel, space=sfm_sp) #these are nodes that have special search spaces which require custom parsing of the hyperparameters if name == "RobustScaler": diff --git a/tpot2/config/tests/test_get_configspace.py b/tpot2/config/tests/test_get_configspace.py index bccb349f..bdab516b 100644 --- a/tpot2/config/tests/test_get_configspace.py +++ b/tpot2/config/tests/test_get_configspace.py @@ -6,7 +6,7 @@ import tpot2.config -from ..get_configspace import STRING_TO_CLASS +from ..get_configspace import STRING_TO_CLASS, GROUPNAMES def test_loop_through_all_hyperparameters(): @@ -22,4 +22,20 @@ def test_loop_through_all_hyperparameters(): for i in range(1): estnode = estnode_gen.generate() est = estnode.export_pipeline() - \ No newline at end of file + +def test_loop_through_groupnames(): + + n_classes=3 + n_samples=100 + n_features=100 + random_state=None + + for groupname, group in GROUPNAMES.items(): + for class_name in group: + print(class_name) + estnode_gen = tpot2.config.get_search_space(class_name, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state) + + #generate 10 random hyperparameters and make sure they are all valid + for i in range(100): + estnode = estnode_gen.generate() + est = estnode.export_pipeline() \ No newline at end of file diff --git a/tpot2/search_spaces/nodes/estimator_node.py b/tpot2/search_spaces/nodes/estimator_node.py index 0ec71e98..15b79b3e 100644 --- a/tpot2/search_spaces/nodes/estimator_node.py +++ b/tpot2/search_spaces/nodes/estimator_node.py @@ -45,7 +45,7 @@ def __init__(self, method: type, else: rng = np.random.default_rng(rng) self.space.seed(rng.integers(0, 2**32)) - self.hyperparameters = self.space.sample_configuration().get_dictionary() + self.hyperparameters = dict(self.space.sample_configuration()) self.check_hyperparameters_for_None() @@ -55,7 +55,7 @@ def mutate(self, rng=None): rng = np.random.default_rng(rng) self.space.seed(rng.integers(0, 2**32)) - self.hyperparameters = self.space.sample_configuration().get_dictionary() + self.hyperparameters = dict(self.space.sample_configuration()) self.check_hyperparameters_for_None() return True diff --git a/tpot2/search_spaces/pipelines/wrapper.py b/tpot2/search_spaces/pipelines/wrapper.py index 3521d8dd..712da75d 100644 --- a/tpot2/search_spaces/pipelines/wrapper.py +++ b/tpot2/search_spaces/pipelines/wrapper.py @@ -21,14 +21,14 @@ def __init__(self, super().__init__() self.nodegen = nodegen - self.node = np.random.default_rng(rng).choice(self.nodegen).generate() + self.node = self.nodegen.generate(rng) self.method = method self.space = space rng = np.random.default_rng(rng) self.space.seed(rng.integers(0, 2**32)) - self.hyperparameters = self.space.sample_configuration().get_dictionary() + self.hyperparameters = dict(self.space.sample_configuration()) @@ -43,7 +43,7 @@ def mutate(self, rng=None): def _mutate_hyperparameters(self, rng=None): rng = np.random.default_rng(rng) self.space.seed(rng.integers(0, 2**32)) - self.hyperparameters = self.space.sample_configuration().get_dictionary() + self.hyperparameters = dict(self.space.sample_configuration()) return True def _mutate_node(self, rng=None):