Skip to content

Commit

Permalink
Merge pull request #126 from perib/new_search_space_def
Browse files Browse the repository at this point in the history
more fixes with search spaces - wrapper, make sure all supported modu…
  • Loading branch information
perib authored Apr 18, 2024
2 parents 450a7e5 + ef42226 commit b2a00ed
Show file tree
Hide file tree
Showing 6 changed files with 71 additions and 50 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,5 @@ dask-worker-space/
target/
.venv/
build/*
*.egg
*.egg
*.coverage*
7 changes: 3 additions & 4 deletions tpot2/config/classifiers_sklearnex.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from ConfigSpace import ConfigurationSpace
from ConfigSpace import ConfigurationSpace, Integer, Float, Categorical, Normal

from ..search_spaces.nodes.estimator_node import NONE_SPECIAL_STRING, TRUE_SPECIAL_STRING, FALSE_SPECIAL_STRING

def get_RandomForestClassifier_ConfigurationSpace(random_state):
space = {
Expand Down Expand Up @@ -66,10 +66,9 @@ def get_NuSVC_ConfigurationSpace(random_state):
space = {
'nu': Float("nu", bounds=(0.05, 1.0)),
'kernel': Categorical("kernel", ['poly', 'rbf', 'linear', 'sigmoid']),
'C': Float("C", bounds=(1e-4, 25), log=True),
#'C': Float("C", bounds=(1e-4, 25), log=True),
'degree': Integer("degree", bounds=(1, 4)),
#TODO work around for None value?
#'class_weight': Categorical("class_weight", [None, 'balanced']),
'class_weight': Categorical("class_weight", [NONE_SPECIAL_STRING, 'balanced']),
'max_iter': 3000,
'tol': 0.005,
'probability': Categorical("probability", [True]), # configspace doesn't allow bools as a default value? but does allow them as a value inside a Categorical
Expand Down
81 changes: 43 additions & 38 deletions tpot2/config/get_configspace.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import sys
import numpy as np
import warnings
import importlib.util

from ..search_spaces.nodes import EstimatorNode
from ..search_spaces.pipelines import ChoicePipeline, WrapperPipeline
Expand All @@ -27,7 +28,7 @@

from sklearn.linear_model import SGDClassifier
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.neural_network import MLPClassifier, MLPRegressor
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier
from sklearn.neighbors import KNeighborsClassifier
Expand Down Expand Up @@ -101,51 +102,64 @@
from tpot2.builtin_modules import AddTransformer, mul_neg_1_Transformer, MulTransformer, SafeReciprocalTransformer, EQTransformer, NETransformer, GETransformer, GTTransformer, LETransformer, LTTransformer, MinTransformer, MaxTransformer, ZeroTransformer, OneTransformer, NTransformer


from tpot2.builtin_modules.genetic_encoders import DominantEncoder, RecessiveEncoder, HeterosisEncoder, UnderDominanceEncoder, OverDominanceEncoder

#MDR


all_methods = [SGDClassifier, RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier, MLPClassifier, DecisionTreeClassifier, XGBClassifier, KNeighborsClassifier, SVC, LogisticRegression, LGBMClassifier, LinearSVC, GaussianNB, BernoulliNB, MultinomialNB, ExtraTreesRegressor, RandomForestRegressor, GradientBoostingRegressor, BaggingRegressor, DecisionTreeRegressor, KNeighborsRegressor, XGBRegressor, ZeroCount, OneHotEncoder, ColumnOneHotEncoder, Binarizer, FastICA, FeatureAgglomeration, MaxAbsScaler, MinMaxScaler, Normalizer, Nystroem, PCA, PolynomialFeatures, RBFSampler, RobustScaler, StandardScaler, SelectFwe, SelectPercentile, VarianceThreshold, SGDRegressor, Ridge, Lasso, ElasticNet, Lars, LassoLars, LassoLarsCV, RidgeCV, SVR, LinearSVR, AdaBoostRegressor, GradientBoostingRegressor, RandomForestRegressor, BaggingRegressor, ExtraTreesRegressor, DecisionTreeRegressor, KNeighborsRegressor, ElasticNetCV,
AdaBoostClassifier,
AdaBoostClassifier,MLPRegressor,
GaussianProcessRegressor, HistGradientBoostingClassifier, HistGradientBoostingRegressor,
AddTransformer, mul_neg_1_Transformer, MulTransformer, SafeReciprocalTransformer, EQTransformer, NETransformer, GETransformer, GTTransformer, LETransformer, LTTransformer, MinTransformer, MaxTransformer, ZeroTransformer, OneTransformer, NTransformer,
PowerTransformer, QuantileTransformer,ARDRegression, QuadraticDiscriminantAnalysis, PassiveAggressiveClassifier, LinearDiscriminantAnalysis,
DominantEncoder, RecessiveEncoder, HeterosisEncoder, UnderDominanceEncoder, OverDominanceEncoder,
]


#if mdr is installed
if 'mdr' in sys.modules:
if importlib.util.find_spec('mdr') is not None:
from mdr import MDR, ContinuousMDR
all_methods.append(MDR)
all_methods.append(ContinuousMDR)

if 'skrebate' in sys.modules:
if importlib.util.find_spec('skrebate') is not None:
from skrebate import ReliefF, SURF, SURFstar, MultiSURF
all_methods.append(ReliefF)
all_methods.append(SURF)
all_methods.append(SURFstar)
all_methods.append(MultiSURF)

if 'sklearnex' in sys.modules:
STRING_TO_CLASS = {
t.__name__: t for t in all_methods
}

if importlib.util.find_spec('sklearnex') is not None:
import sklearnex
import sklearnex.linear_model
import sklearnex.svm
import sklearnex.ensemble
import sklearnex.neighbors

all_methods.append(sklearnex.linear_model.LinearRegression)
all_methods.append(sklearnex.linear_model.Ridge)
all_methods.append(sklearnex.linear_model.Lasso)
all_methods.append(sklearnex.linear_model.ElasticNet)
all_methods.append(sklearnex.svm.SVR)
all_methods.append(sklearnex.svm.NuSVR)
all_methods.append(sklearnex.ensemble.RandomForestRegressor)
all_methods.append(sklearnex.neighbors.KNeighborsRegressor)
all_methods.append(sklearnex.ensemble.RandomForestClassifier)
all_methods.append(sklearnex.neighbors.KNeighborsClassifier)
all_methods.append(sklearnex.svm.SVC)
all_methods.append(sklearnex.svm.NuSVC)
all_methods.append(sklearnex.linear_model.LogisticRegression)

sklearnex_methods = []

sklearnex_methods.append(sklearnex.linear_model.LinearRegression)
sklearnex_methods.append(sklearnex.linear_model.Ridge)
sklearnex_methods.append(sklearnex.linear_model.Lasso)
sklearnex_methods.append(sklearnex.linear_model.ElasticNet)
sklearnex_methods.append(sklearnex.svm.SVR)
sklearnex_methods.append(sklearnex.svm.NuSVR)
sklearnex_methods.append(sklearnex.ensemble.RandomForestRegressor)
sklearnex_methods.append(sklearnex.neighbors.KNeighborsRegressor)
sklearnex_methods.append(sklearnex.ensemble.RandomForestClassifier)
sklearnex_methods.append(sklearnex.neighbors.KNeighborsClassifier)
sklearnex_methods.append(sklearnex.svm.SVC)
sklearnex_methods.append(sklearnex.svm.NuSVC)
sklearnex_methods.append(sklearnex.linear_model.LogisticRegression)

STRING_TO_CLASS.update({f"{t.__name__}_sklearnex": t for t in sklearnex_methods})


STRING_TO_CLASS = {
t.__name__: t for t in all_methods
}



Expand Down Expand Up @@ -439,15 +453,6 @@ def get_search_space(name, n_classes=3, n_samples=100, n_features=100, random_st
if name in GROUPNAMES:
name_list = GROUPNAMES[name]
return get_search_space(name_list, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state)

if name is None:
warnings.warn(f"name is None")
return None

if name not in STRING_TO_CLASS:
print("FOOO ", name)
warnings.warn(f"Could not find class for {name}")
return None

return get_node(name, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state)

Expand All @@ -458,21 +463,21 @@ def get_node(name, n_classes=3, n_samples=100, n_features=100, random_state=None
# TODO Add AdaBoostRegressor, AdaBoostClassifier as wrappers? wrap a decision tree with different params?
# TODO add other meta-estimators?
if name == "RFE_classification":
rfe_sp = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state)
rfe_sp = get_configspace(name="RFE", n_classes=n_classes, n_samples=n_samples, random_state=random_state)
ext = get_node("ExtraTreesClassifier", n_classes=n_classes, n_samples=n_samples, random_state=random_state)
return WrapperPipeline(nodegen=ext, method=RFE, configspace=rfe_sp)
return WrapperPipeline(nodegen=ext, method=RFE, space=rfe_sp)
if name == "RFE_regression":
rfe_sp = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state)
rfe_sp = get_configspace(name="RFE", n_classes=n_classes, n_samples=n_samples, random_state=random_state)
ext = get_node("ExtraTreesRegressor", n_classes=n_classes, n_samples=n_samples, random_state=random_state)
return WrapperPipeline(nodegen=ext, method=RFE, configspace=rfe_sp)
return WrapperPipeline(nodegen=ext, method=RFE, space=rfe_sp)
if name == "SelectFromModel_classification":
sfm_sp = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state)
sfm_sp = get_configspace(name="SelectFromModel", n_classes=n_classes, n_samples=n_samples, random_state=random_state)
ext = get_node("ExtraTreesClassifier", n_classes=n_classes, n_samples=n_samples, random_state=random_state)
return WrapperPipeline(nodegen=ext, method=SelectFromModel, configspace=sfm_sp)
return WrapperPipeline(nodegen=ext, method=SelectFromModel, space=sfm_sp)
if name == "SelectFromModel_regression":
sfm_sp = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state)
sfm_sp = get_configspace(name="SelectFromModel", n_classes=n_classes, n_samples=n_samples, random_state=random_state)
ext = get_node("ExtraTreesRegressor", n_classes=n_classes, n_samples=n_samples, random_state=random_state)
return WrapperPipeline(nodegen=ext, method=SelectFromModel, configspace=sfm_sp)
return WrapperPipeline(nodegen=ext, method=SelectFromModel, space=sfm_sp)

#these are nodes that have special search spaces which require custom parsing of the hyperparameters
if name == "RobustScaler":
Expand Down
20 changes: 18 additions & 2 deletions tpot2/config/tests/test_get_configspace.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

import tpot2.config

from ..get_configspace import STRING_TO_CLASS
from ..get_configspace import STRING_TO_CLASS, GROUPNAMES

def test_loop_through_all_hyperparameters():

Expand All @@ -22,4 +22,20 @@ def test_loop_through_all_hyperparameters():
for i in range(1):
estnode = estnode_gen.generate()
est = estnode.export_pipeline()


def test_loop_through_groupnames():

n_classes=3
n_samples=100
n_features=100
random_state=None

for groupname, group in GROUPNAMES.items():
for class_name in group:
print(class_name)
estnode_gen = tpot2.config.get_search_space(class_name, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state)

#generate 10 random hyperparameters and make sure they are all valid
for i in range(100):
estnode = estnode_gen.generate()
est = estnode.export_pipeline()
4 changes: 2 additions & 2 deletions tpot2/search_spaces/nodes/estimator_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def __init__(self, method: type,
else:
rng = np.random.default_rng(rng)
self.space.seed(rng.integers(0, 2**32))
self.hyperparameters = self.space.sample_configuration().get_dictionary()
self.hyperparameters = dict(self.space.sample_configuration())

self.check_hyperparameters_for_None()

Expand All @@ -55,7 +55,7 @@ def mutate(self, rng=None):

rng = np.random.default_rng(rng)
self.space.seed(rng.integers(0, 2**32))
self.hyperparameters = self.space.sample_configuration().get_dictionary()
self.hyperparameters = dict(self.space.sample_configuration())

self.check_hyperparameters_for_None()
return True
Expand Down
6 changes: 3 additions & 3 deletions tpot2/search_spaces/pipelines/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,14 @@ def __init__(self,
super().__init__()

self.nodegen = nodegen
self.node = np.random.default_rng(rng).choice(self.nodegen).generate()
self.node = self.nodegen.generate(rng)


self.method = method
self.space = space
rng = np.random.default_rng(rng)
self.space.seed(rng.integers(0, 2**32))
self.hyperparameters = self.space.sample_configuration().get_dictionary()
self.hyperparameters = dict(self.space.sample_configuration())



Expand All @@ -43,7 +43,7 @@ def mutate(self, rng=None):
def _mutate_hyperparameters(self, rng=None):
rng = np.random.default_rng(rng)
self.space.seed(rng.integers(0, 2**32))
self.hyperparameters = self.space.sample_configuration().get_dictionary()
self.hyperparameters = dict(self.space.sample_configuration())
return True

def _mutate_node(self, rng=None):
Expand Down

0 comments on commit b2a00ed

Please sign in to comment.