Skip to content

Commit

Permalink
Merge pull request #148 from EpistasisLab/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
perib authored Sep 17, 2024
2 parents 96ef8bb + 46f42bb commit 944699a
Show file tree
Hide file tree
Showing 13 changed files with 2,293 additions and 66 deletions.
1,897 changes: 1,897 additions & 0 deletions Tutorial/amltk_search_space_parser_example.ipynb

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@ def calculate_version():
extras_require={
'skrebate': ['skrebate>=0.3.4'],
'mdr': ['scikit-mdr>=0.4.4'],
'sklearnex' : ['scikit-learn-intelex>=2023.2.1']
'sklearnex' : ['scikit-learn-intelex>=2023.2.1'],
'amltk' : ['amltk>=1.12.1'],
},
classifiers=[
'Intended Audience :: Science/Research',
Expand Down
2 changes: 1 addition & 1 deletion tpot2/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@
from .population import Population

from . import builtin_modules
from . import utils
from . import config
from . import search_spaces
from . import utils
from . import evolvers
from . import objectives
from . import selectors
Expand Down
2 changes: 1 addition & 1 deletion tpot2/config/classifiers.py
Original file line number Diff line number Diff line change
Expand Up @@ -535,7 +535,7 @@ def MLPClassifier_hyperparameter_parser(params):
def get_GaussianProcessClassifier_ConfigurationSpace(n_features, random_state):
space = {
'n_features': n_features,
'alpha': Float("alpha", bounds=(1e-14, 1.0), log=True),
'alpha': Float("alpha", bounds=(1e-10, 1.0), log=True),
'thetaL': Float("thetaL", bounds=(1e-10, 1e-3), log=True),
'thetaU': Float("thetaU", bounds=(1.0, 100000), log=True),
}
Expand Down
58 changes: 36 additions & 22 deletions tpot2/config/get_configspace.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@
from sklearn.feature_selection import f_classif, f_regression #TODO create a selectomixin using these?
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from sklearn.gaussian_process import GaussianProcessRegressor, GaussianProcessClassifier
from sklearn.impute import SimpleImputer
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import SimpleImputer, IterativeImputer, KNNImputer

all_methods = [SGDClassifier, RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier, MLPClassifier, DecisionTreeClassifier, XGBClassifier, KNeighborsClassifier, SVC, LogisticRegression, LGBMClassifier, LinearSVC, GaussianNB, BernoulliNB, MultinomialNB, ExtraTreesRegressor, RandomForestRegressor, GradientBoostingRegressor, BaggingRegressor, DecisionTreeRegressor, KNeighborsRegressor, XGBRegressor, ZeroCount, ColumnOneHotEncoder, Binarizer, FastICA, FeatureAgglomeration, MaxAbsScaler, MinMaxScaler, Normalizer, Nystroem, PCA, PolynomialFeatures, RBFSampler, RobustScaler, StandardScaler, SelectFwe, SelectPercentile, VarianceThreshold, SGDRegressor, Ridge, Lasso, ElasticNet, Lars, LassoLars, LassoLarsCV, RidgeCV, SVR, LinearSVR, AdaBoostRegressor, GradientBoostingRegressor, RandomForestRegressor, BaggingRegressor, ExtraTreesRegressor, DecisionTreeRegressor, KNeighborsRegressor, ElasticNetCV,
AdaBoostClassifier,MLPRegressor,
Expand All @@ -56,7 +57,7 @@
GaussianProcessClassifier, BaggingClassifier,LGBMRegressor,
Passthrough,SkipTransformer,
PassKBinsDiscretizer,
SimpleImputer,
SimpleImputer, IterativeImputer, KNNImputer
]


Expand Down Expand Up @@ -124,7 +125,7 @@
"all_transformers" : ["transformers", "scalers"],

"arithmatic": ["AddTransformer", "mul_neg_1_Transformer", "MulTransformer", "SafeReciprocalTransformer", "EQTransformer", "NETransformer", "GETransformer", "GTTransformer", "LETransformer", "LTTransformer", "MinTransformer", "MaxTransformer"],
"imputers": ["SimpleImputer"],
"imputers": ["SimpleImputer", "IterativeImputer", "KNNImputer"],
"skrebate": ["ReliefF", "SURF", "SURFstar", "MultiSURF"],
"genetic_encoders": ["DominantEncoder", "RecessiveEncoder", "HeterosisEncoder", "UnderDominanceEncoder", "OverDominanceEncoder"],

Expand All @@ -136,8 +137,6 @@

def get_configspace(name, n_classes=3, n_samples=1000, n_features=100, random_state=None):
match name:
case "SimpleImputer":
return imputers.simple_imputer_cs

#autoqtl_builtins.py
case "FeatureEncodingFrequencySelector":
Expand Down Expand Up @@ -352,6 +351,12 @@ def get_configspace(name, n_classes=3, n_samples=1000, n_features=100, random_st
)

#imputers.py
case "SimpleImputer":
return imputers.simple_imputer_cs
case "IterativeImputer":
return imputers.get_IterativeImputer_config_space(n_features=n_features, random_state=random_state)
case "KNNImputer":
return imputers.get_KNNImputer_config_space(n_samples=n_samples)

#mdr_configs.py
case "MDR":
Expand Down Expand Up @@ -401,12 +406,12 @@ def get_configspace(name, n_classes=3, n_samples=1000, n_features=100, random_st
raise ValueError(f"Could not find configspace for {name}")


def get_search_space(name, n_classes=3, n_samples=100, n_features=100, random_state=None, return_choice_pipeline=True):
def get_search_space(name, n_classes=3, n_samples=100, n_features=100, random_state=None, return_choice_pipeline=True, base_node=EstimatorNode):


#if list of names, return a list of EstimatorNodes
if isinstance(name, list) or isinstance(name, np.ndarray):
search_spaces = [get_search_space(n, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state, return_choice_pipeline=False) for n in name]
search_spaces = [get_search_space(n, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state, return_choice_pipeline=False, base_node=base_node) for n in name]
#remove Nones
search_spaces = [s for s in search_spaces if s is not None]

Expand All @@ -417,12 +422,12 @@ def get_search_space(name, n_classes=3, n_samples=100, n_features=100, random_st

if name in GROUPNAMES:
name_list = GROUPNAMES[name]
return get_search_space(name_list, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state, return_choice_pipeline=return_choice_pipeline)
return get_search_space(name_list, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state, return_choice_pipeline=return_choice_pipeline, base_node=base_node)

return get_node(name, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state)
return get_node(name, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state, base_node=base_node)


def get_node(name, n_classes=3, n_samples=100, n_features=100, random_state=None):
def get_node(name, n_classes=3, n_samples=100, n_features=100, random_state=None, base_node=EstimatorNode):

#these are wrappers that take in another estimator as a parameter
# TODO Add AdaBoostRegressor, AdaBoostClassifier as wrappers? wrap a decision tree with different params?
Expand All @@ -443,43 +448,52 @@ def get_node(name, n_classes=3, n_samples=100, n_features=100, random_state=None
sfm_sp = get_configspace(name="SelectFromModel", n_classes=n_classes, n_samples=n_samples, random_state=random_state)
ext = get_node("ExtraTreesRegressor", n_classes=n_classes, n_samples=n_samples, random_state=random_state)
return WrapperPipeline(estimator_search_space=ext, method=SelectFromModel, space=sfm_sp)

# TODO Add IterativeImputer with more estimator methods
'''
if name == "IterativeImputer_learnedestimators":
iteative_sp = get_configspace(name="IterativeImputer", n_classes=n_classes, n_samples=n_samples, random_state=random_state)
regessor_searchspace = get_search_space(["LinearRegression", ..], n_classes=n_classes, n_samples=n_samples, random_state=random_state)
return WrapperPipeline(estimator_search_space=regressor_searchspace, method=ItartiveImputer, space=iteative_sp)
'''
#these are nodes that have special search spaces which require custom parsing of the hyperparameters
if name == "IterativeImputer":
configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state)
return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=imputers.IterativeImputer_hyperparameter_parser)
if name == "RobustScaler":
configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state)
return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=transformers.robust_scaler_hyperparameter_parser)
return base_node(STRING_TO_CLASS[name], configspace, hyperparameter_parser=transformers.robust_scaler_hyperparameter_parser)
if name == "GradientBoostingClassifier":
configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state)
return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=classifiers.GradientBoostingClassifier_hyperparameter_parser)
return base_node(STRING_TO_CLASS[name], configspace, hyperparameter_parser=classifiers.GradientBoostingClassifier_hyperparameter_parser)
if name == "HistGradientBoostingClassifier":
configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state)
return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=classifiers.HistGradientBoostingClassifier_hyperparameter_parser)
return base_node(STRING_TO_CLASS[name], configspace, hyperparameter_parser=classifiers.HistGradientBoostingClassifier_hyperparameter_parser)
if name == "GradientBoostingRegressor":
configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state)
return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=regressors.GradientBoostingRegressor_hyperparameter_parser)
return base_node(STRING_TO_CLASS[name], configspace, hyperparameter_parser=regressors.GradientBoostingRegressor_hyperparameter_parser)
if name == "HistGradientBoostingRegressor":
configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state)
return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=regressors.HistGradientBoostingRegressor_hyperparameter_parser)
return base_node(STRING_TO_CLASS[name], configspace, hyperparameter_parser=regressors.HistGradientBoostingRegressor_hyperparameter_parser)
if name == "MLPClassifier":
configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state)
return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=classifiers.MLPClassifier_hyperparameter_parser)
return base_node(STRING_TO_CLASS[name], configspace, hyperparameter_parser=classifiers.MLPClassifier_hyperparameter_parser)
if name == "MLPRegressor":
configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state)
return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=regressors.MLPRegressor_hyperparameter_parser)
return base_node(STRING_TO_CLASS[name], configspace, hyperparameter_parser=regressors.MLPRegressor_hyperparameter_parser)
if name == "GaussianProcessRegressor":
configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state)
return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=regressors.GaussianProcessRegressor_hyperparameter_parser)
return base_node(STRING_TO_CLASS[name], configspace, hyperparameter_parser=regressors.GaussianProcessRegressor_hyperparameter_parser)
if name == "GaussianProcessClassifier":
configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state)
return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=classifiers.GaussianProcessClassifier_hyperparameter_parser)
return base_node(STRING_TO_CLASS[name], configspace, hyperparameter_parser=classifiers.GaussianProcessClassifier_hyperparameter_parser)
if name == "FeatureAgglomeration":
configspace = get_configspace(name, n_features=n_features)
return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=transformers.FeatureAgglomeration_hyperparameter_parser)
return base_node(STRING_TO_CLASS[name], configspace, hyperparameter_parser=transformers.FeatureAgglomeration_hyperparameter_parser)

configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state)
if configspace is None:
#raise warning
warnings.warn(f"Could not find configspace for {name}")
return None

return EstimatorNode(STRING_TO_CLASS[name], configspace)
return base_node(STRING_TO_CLASS[name], configspace)
77 changes: 74 additions & 3 deletions tpot2/config/imputers.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,80 @@
import sklearn
import sklearn.ensemble
import sklearn.linear_model
import sklearn.neighbors
from ConfigSpace import ConfigurationSpace
from ConfigSpace import ConfigurationSpace, Integer, Float, Categorical, Normal
from ConfigSpace import EqualsCondition


simple_imputer_cs = ConfigurationSpace(
space = {
'strategy' : Categorical('strategy', ['mean','median', 'most_frequent', ]),
'add_indicator' : Categorical('add_indicator', [True, False]),
'strategy' : Categorical('strategy',
['mean','median', 'most_frequent', 'constant']
),
#'add_indicator' : Categorical('add_indicator', [True, False]),
#Removed add_indicator, it appends a mask next to the rest of the data
# and can cause errors. gk
}
)

def get_IterativeImputer_config_space(n_features, random_state):
space = { 'initial_strategy' : Categorical('initial_strategy',
['mean', 'median',
'most_frequent', 'constant']),
'n_nearest_features' : Integer('n_nearest_features',
bounds=(1, n_features)),
'imputation_order' : Categorical('imputation_order',
['ascending', 'descending',
'roman', 'arabic', 'random']),
}
)

estimator = Categorical('estimator', ['Bayesian', 'RFR', 'Ridge', 'KNN'])
sample_posterior = Categorical('sample_posterior', [True, False])
sampling_condition = EqualsCondition(sample_posterior, estimator, 'Bayesian')

if random_state is not None:
#This is required because configspace doesn't allow None as a value
space['random_state'] = random_state

cs = ConfigurationSpace(space=space)
cs.add_hyperparameters([estimator, sample_posterior])
cs.add_conditions([sampling_condition])
return cs

def get_KNNImputer_config_space(n_samples):
space = {
'n_neighbors': Integer('n_neighbors', bounds=(1, max(n_samples,100))),
'weights': Categorical('weights', ['uniform', 'distance'])
}

return ConfigurationSpace(
space=space
)

def IterativeImputer_hyperparameter_parser(params):
est = params['estimator']
match est:
case 'Bayesian':
estimator = sklearn.linear_model.BayesianRidge()
case 'RFR':
estimator = sklearn.ensemble.RandomForestRegressor()
case 'Ridge':
estimator = sklearn.linear_model.Ridge()
case 'KNN':
estimator = sklearn.neighbors.KNeighborsRegressor()

final_params = {
'estimator' : estimator,
'initial_strategy' : params['initial_strategy'],
'n_nearest_features' : params['n_nearest_features'],
'imputation_order' : params['imputation_order'],
}

if 'sample_posterior' in params:
final_params['sample_posterior'] = params['sample_posterior']

if 'random_state' in params:
final_params['random_state'] = params['random_state']

return final_params
2 changes: 1 addition & 1 deletion tpot2/config/regressors.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,7 @@ def get_ExtraTreesRegressor_ConfigurationSpace(random_state):
def get_GaussianProcessRegressor_ConfigurationSpace(n_features, random_state):
space = {
'n_features': n_features,
'alpha': Float("alpha", bounds=(1e-14, 1.0), log=True),
'alpha': Float("alpha", bounds=(1e-10, 1.0), log=True),
'thetaL': Float("thetaL", bounds=(1e-10, 1e-3), log=True),
'thetaU': Float("thetaU", bounds=(1.0, 100000), log=True),
}
Expand Down
8 changes: 0 additions & 8 deletions tpot2/search_spaces/base.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,10 @@
import tpot2
import numpy as np
import pandas as pd
import sklearn
from tpot2 import config
from typing import Generator, List, Tuple, Union
import random
from sklearn.base import BaseEstimator
import sklearn
import networkx as nx
from . import graph_utils
from typing import final
from abc import ABC, abstractmethod





Expand Down
Loading

0 comments on commit 944699a

Please sign in to comment.