Skip to content

Commit

Permalink
misc reproducibility bugs
Browse files Browse the repository at this point in the history
  • Loading branch information
perib committed Sep 19, 2024
1 parent 370afcc commit f6d797b
Show file tree
Hide file tree
Showing 7 changed files with 27 additions and 26 deletions.
6 changes: 4 additions & 2 deletions tpot2/evolvers/base_evolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -423,7 +423,7 @@ def optimize(self, generations=None):
attempts = 2
while len(self.population.population) == 0 and attempts > 0:
new_initial_population = [next(self.individual_generator) for _ in range(self.cur_population_size)]
self.population.add_to_population(new_initial_population)
self.population.add_to_population(new_initial_population, rng=self.rng)
attempts -= 1
self.evaluate_population()

Expand Down Expand Up @@ -773,7 +773,9 @@ def evaluate_population_selection_early_stop(self,survival_counts, thresholds=No
max_to_remove = min(len(cur_individuals) - self.n_jobs, len(invalids))

if max_to_remove < len(invalids):
invalids = np.random.choice(invalids, max_to_remove, replace=False)
# invalids = np.random.choice(invalids, max_to_remove, replace=False)
invalids = self.rng.choice(invalids, max_to_remove, replace=False)


cur_individuals = remove_items(cur_individuals,invalids)
offspring_scores = remove_items(offspring_scores,invalids)
Expand Down
2 changes: 1 addition & 1 deletion tpot2/evolvers/steady_state_evolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -506,7 +506,7 @@ def optimize(self):

n_individuals_to_create = self.max_queue_size - len(submitted_futures)
initial_population = [next(self.individual_generator) for _ in range(n_individuals_to_create)]
self.population.add_to_population(initial_population)
self.population.add_to_population(initial_population, rng=self.rng)



Expand Down
30 changes: 15 additions & 15 deletions tpot2/population.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import pickle
import dask

def mutate(individual, rng=None):
def mutate(individual, rng):
rng = np.random.default_rng(rng)
if isinstance(individual, collections.abc.Iterable):
for ind in individual:
Expand All @@ -21,19 +21,19 @@ def mutate(individual, rng=None):
individual.mutate(rng=rng)
return individual

def crossover(parents, rng=None):
def crossover(parents, rng):
rng = np.random.default_rng(rng)
parents[0].crossover(parents[1], rng=rng)
return parents[0]

def mutate_and_crossover(parents, rng=None):
def mutate_and_crossover(parents, rng):
rng = np.random.default_rng(rng)
parents[0].crossover(parents[1], rng=rng)
parents[0].mutate(rng=rng)
parents[1].mutate(rng=rng)
return parents

def crossover_and_mutate(parents, rng=None):
def crossover_and_mutate(parents, rng):
rng = np.random.default_rng(rng)
for p in parents:
p.mutate(rng=rng)
Expand Down Expand Up @@ -91,7 +91,7 @@ def __init__( self,
self.callback=callback
self.population = []

def survival_select(self, selector, weights, columns_names, n_survivors, rng=None, inplace=True):
def survival_select(self, selector, weights, columns_names, n_survivors, rng, inplace=True):
rng = np.random.default_rng(rng)
weighted_scores = self.get_column(self.population, column_names=columns_names) * weights
new_population_index = np.ravel(selector(weighted_scores, k=n_survivors, rng=rng)) #TODO make it clear that we are concatenating scores...
Expand All @@ -100,7 +100,7 @@ def survival_select(self, selector, weights, columns_names, n_survivors, rng=Non
self.set_population(new_population, rng=rng)
return new_population

def parent_select(self, selector, weights, columns_names, k, n_parents, rng=None):
def parent_select(self, selector, weights, columns_names, k, n_parents, rng):
rng = np.random.default_rng(rng)
weighted_scores = self.get_column(self.population, column_names=columns_names) * weights
parents_index = selector(weighted_scores, k=k, n_parents=n_parents, rng=rng)
Expand Down Expand Up @@ -136,7 +136,7 @@ def remove_invalid_from_population(self, column_names, invalid_value = "INVALID"
# returns a list of individuals added to the live population
#TODO make keep repeats allow for previously evaluated individuals,
#but make sure that the live population only includes one of each, no repeats
def add_to_population(self, individuals: typing.List[BaseIndividual], rng=None, keep_repeats=False, mutate_until_unique=True):
def add_to_population(self, individuals: typing.List[BaseIndividual], rng, keep_repeats=False, mutate_until_unique=True):
'''
Add individuals to the live population. Add individuals to the evaluated_individuals if they are not already there.
Expand Down Expand Up @@ -252,7 +252,7 @@ def get_unevaluated_individuals(self, column_names, individual_list=None):
# return self.evaluated_individuals[~self.evaluated_individuals[column_names_to_check].isin(invalid_values).any(axis=1)]

#the live population empied and is set to new_population
def set_population(self, new_population, rng=None, keep_repeats=True):
def set_population(self, new_population, rng, keep_repeats=True):
'''
sets population to new population
for selection?
Expand All @@ -262,7 +262,7 @@ def set_population(self, new_population, rng=None, keep_repeats=True):
self.add_to_population(new_population, rng=rng, keep_repeats=keep_repeats)

#TODO should we just generate one offspring per crossover?
def create_offspring(self, parents_list, var_op_list, rng=None, add_to_population=True, keep_repeats=False, mutate_until_unique=True, n_jobs=1):
def create_offspring(self, parents_list, var_op_list, rng, add_to_population=True, keep_repeats=False, mutate_until_unique=True, n_jobs=1):
'''
parents_list: a list of lists of parents.
var_op_list: a list of var_ops to apply to each list of parents. Should be the same length as parents_list.
Expand Down Expand Up @@ -322,7 +322,7 @@ def create_offspring(self, parents_list, var_op_list, rng=None, add_to_populatio


#TODO should we just generate one offspring per crossover?
def create_offspring2(self, parents_list, var_op_list, mutation_functions,mutation_function_weights, crossover_functions,crossover_function_weights, rng=None, add_to_population=True, keep_repeats=False, mutate_until_unique=True):
def create_offspring2(self, parents_list, var_op_list, mutation_functions,mutation_function_weights, crossover_functions,crossover_function_weights, rng, add_to_population=True, keep_repeats=False, mutate_until_unique=True):

rng = np.random.default_rng(rng)
new_offspring = []
Expand Down Expand Up @@ -405,7 +405,7 @@ def create_offspring2(self, parents_list, var_op_list, mutation_functions,mutati
def get_id(individual):
return individual.unique_id()

def parallel_create_offspring(parents_list, var_op_list, rng=None, n_jobs=1):
def parallel_create_offspring(parents_list, var_op_list, rng, n_jobs=1):
rng = np.random.default_rng(rng)
if n_jobs == 1:
return nonparallel_create_offpring(parents_list, var_op_list, rng=rng)
Expand All @@ -421,7 +421,7 @@ def parallel_create_offspring(parents_list, var_op_list, rng=None, n_jobs=1):
num_workers=n_jobs, threads_per_worker=1)
return offspring

def nonparallel_create_offpring(parents_list, var_op_list, rng=None, n_jobs=1):
def nonparallel_create_offpring(parents_list, var_op_list, rng, n_jobs=1):
rng = np.random.default_rng(rng)
offspring = []
for parents, var_op in zip(parents_list,var_op_list):
Expand All @@ -435,23 +435,23 @@ def nonparallel_create_offpring(parents_list, var_op_list, rng=None, n_jobs=1):



def copy_and_change(parents, var_op, rng=None):
def copy_and_change(parents, var_op, rng):
rng = np.random.default_rng(rng)
offspring = copy.deepcopy(parents)
offspring = var_op(offspring, rng=rng)
if isinstance(offspring, collections.abc.Iterable):
offspring = offspring[0]
return offspring

def copy_and_mutate(parents, var_op, rng=None):
def copy_and_mutate(parents, var_op, rng):
rng = np.random.default_rng(rng)
offspring = copy.deepcopy(parents)
var_op(offspring, rng=rng)
if isinstance(offspring, collections.abc.Iterable):
offspring = offspring[0]
return offspring

def copy_and_crossover(parents, var_op, rng=None):
def copy_and_crossover(parents, var_op, rng):
rng = np.random.default_rng(rng)
offspring = copy.deepcopy(parents)
var_op(offspring[0],offspring[1], rng=rng)
Expand Down
2 changes: 1 addition & 1 deletion tpot2/search_spaces/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def validate_same_type(func):
def wrapper(self, other, rng=None, **kwargs):
if not isinstance(other, type(self)):
return False
return func(self, other, rng=None, **kwargs)
return func(self, other, rng=rng, **kwargs)

return wrapper

Expand Down
2 changes: 1 addition & 1 deletion tpot2/search_spaces/nodes/estimator_node_gradual.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ def gradual_hyperparameter_update(params:dict, configspace:ConfigurationSpace, r
new_params[param] = configspace[param].upper
#if parameter is integer, add normal distribution
elif issubclass(type(configspace[param]), ConfigSpace.hyperparameters.hyperparameter.IntegerHyperparameter):
new_params[param] = params[param] * np.random.normal(0, 1)
new_params[param] = params[param] * rng.normal(0, 1)
# if check if above or below min and cap
if new_params[param] < configspace[param].lower:
new_params[param] = configspace[param].lower
Expand Down
7 changes: 4 additions & 3 deletions tpot2/search_spaces/pipelines/choice.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@
class ChoicePipelineIndividual(SklearnIndividual):
def __init__(self, search_spaces : List[SklearnIndividualGenerator], rng=None) -> None:
super().__init__()

rng = np.random.default_rng(rng)
self.search_spaces = search_spaces
self.node = np.random.default_rng(rng).choice(self.search_spaces).generate(rng=rng)
self.node = rng.choice(self.search_spaces).generate(rng=rng)


def mutate(self, rng=None):
Expand All @@ -23,7 +23,8 @@ def mutate(self, rng=None):
return self._mutate_node(rng)

def _mutate_select_new_node(self, rng=None):
self.node = random.choice(self.search_spaces).generate(rng=rng)
rng = np.random.default_rng(rng)
self.node = rng.choice(self.search_spaces).generate(rng=rng)
return True

def _mutate_node(self, rng=None):
Expand Down
4 changes: 1 addition & 3 deletions tpot2/search_spaces/pipelines/sequential.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,13 @@ def __init__(self, search_spaces : List[SklearnIndividualGenerator], memory=None

#TODO, mutate all steps or just one?
def mutate(self, rng=None):
rng = np.random.default_rng(rng)

# mutated = False
# for step in self.pipeline:
# if rng.random() < 0.5:
# if step.mutate(rng):
# mutated = True
# return mutated

rng = np.random.default_rng(rng)
step = rng.choice(self.pipeline)
return step.mutate(rng)

Expand Down

0 comments on commit f6d797b

Please sign in to comment.