misc reproducibility bugs

EpistasisLab · Sep 19, 2024 · f6d797b · f6d797b
1 parent 370afcc
commit f6d797b
Show file tree

Hide file tree

Showing 7 changed files with 27 additions and 26 deletions.
diff --git a/tpot2/evolvers/base_evolver.py b/tpot2/evolvers/base_evolver.py
@@ -423,7 +423,7 @@ def optimize(self, generations=None):
                     attempts = 2
                     while len(self.population.population) == 0 and attempts > 0:
                         new_initial_population = [next(self.individual_generator) for _ in range(self.cur_population_size)]
-                        self.population.add_to_population(new_initial_population)
+                        self.population.add_to_population(new_initial_population, rng=self.rng)
                         attempts -= 1
                         self.evaluate_population()
 
@@ -773,7 +773,9 @@ def evaluate_population_selection_early_stop(self,survival_counts, thresholds=No
                         max_to_remove = min(len(cur_individuals) - self.n_jobs, len(invalids))
 
                         if max_to_remove < len(invalids):
-                            invalids = np.random.choice(invalids, max_to_remove, replace=False)
+                            # invalids = np.random.choice(invalids, max_to_remove, replace=False)
+                            invalids = self.rng.choice(invalids, max_to_remove, replace=False)
+
 
                         cur_individuals = remove_items(cur_individuals,invalids)
                         offspring_scores = remove_items(offspring_scores,invalids)

diff --git a/tpot2/evolvers/steady_state_evolver.py b/tpot2/evolvers/steady_state_evolver.py
@@ -506,7 +506,7 @@ def optimize(self):
 
                         n_individuals_to_create = self.max_queue_size - len(submitted_futures)
                         initial_population = [next(self.individual_generator) for _ in range(n_individuals_to_create)]
-                        self.population.add_to_population(initial_population)
+                        self.population.add_to_population(initial_population, rng=self.rng)
 
 
 

diff --git a/tpot2/population.py b/tpot2/population.py
@@ -12,7 +12,7 @@
 import pickle
 import dask
 
-def mutate(individual, rng=None):
+def mutate(individual, rng):
     rng = np.random.default_rng(rng)
     if isinstance(individual, collections.abc.Iterable):
         for ind in individual:
@@ -21,19 +21,19 @@ def mutate(individual, rng=None):
         individual.mutate(rng=rng)
     return individual
 
-def crossover(parents, rng=None):
+def crossover(parents, rng):
     rng = np.random.default_rng(rng)
     parents[0].crossover(parents[1], rng=rng)
     return parents[0]
 
-def mutate_and_crossover(parents, rng=None):
+def mutate_and_crossover(parents, rng):
     rng = np.random.default_rng(rng)
     parents[0].crossover(parents[1], rng=rng)
     parents[0].mutate(rng=rng)
     parents[1].mutate(rng=rng)
     return parents
 
-def crossover_and_mutate(parents, rng=None):
+def crossover_and_mutate(parents, rng):
     rng = np.random.default_rng(rng)
     for p in parents:
         p.mutate(rng=rng)
@@ -91,7 +91,7 @@ def __init__(   self,
         self.callback=callback
         self.population = []
 
-    def survival_select(self, selector, weights, columns_names, n_survivors, rng=None, inplace=True):
+    def survival_select(self, selector, weights, columns_names, n_survivors, rng, inplace=True):
         rng = np.random.default_rng(rng)
         weighted_scores = self.get_column(self.population, column_names=columns_names) * weights
         new_population_index = np.ravel(selector(weighted_scores, k=n_survivors, rng=rng)) #TODO make it clear that we are concatenating scores...
@@ -100,7 +100,7 @@ def survival_select(self, selector, weights, columns_names, n_survivors, rng=Non
             self.set_population(new_population, rng=rng)
         return new_population
 
-    def parent_select(self, selector, weights, columns_names, k, n_parents, rng=None):
+    def parent_select(self, selector, weights, columns_names, k, n_parents, rng):
         rng = np.random.default_rng(rng)
         weighted_scores = self.get_column(self.population, column_names=columns_names) * weights
         parents_index = selector(weighted_scores, k=k, n_parents=n_parents, rng=rng)
@@ -136,7 +136,7 @@ def remove_invalid_from_population(self, column_names, invalid_value = "INVALID"
     # returns a list of individuals added to the live population
     #TODO make keep repeats allow for previously evaluated individuals,
     #but make sure that the live population only includes one of each, no repeats
-    def add_to_population(self, individuals: typing.List[BaseIndividual], rng=None, keep_repeats=False, mutate_until_unique=True):
+    def add_to_population(self, individuals: typing.List[BaseIndividual], rng, keep_repeats=False, mutate_until_unique=True):
         '''
         Add individuals to the live population. Add individuals to the evaluated_individuals if they are not already there.
 
@@ -252,7 +252,7 @@ def get_unevaluated_individuals(self, column_names, individual_list=None):
     #     return self.evaluated_individuals[~self.evaluated_individuals[column_names_to_check].isin(invalid_values).any(axis=1)]
 
     #the live population empied and is set to new_population
-    def set_population(self,  new_population, rng=None, keep_repeats=True):
+    def set_population(self,  new_population, rng, keep_repeats=True):
         '''
         sets population to new population
         for selection?
@@ -262,7 +262,7 @@ def set_population(self,  new_population, rng=None, keep_repeats=True):
         self.add_to_population(new_population, rng=rng, keep_repeats=keep_repeats)
 
     #TODO should we just generate one offspring per crossover?
-    def create_offspring(self, parents_list, var_op_list, rng=None, add_to_population=True, keep_repeats=False, mutate_until_unique=True, n_jobs=1):
+    def create_offspring(self, parents_list, var_op_list, rng, add_to_population=True, keep_repeats=False, mutate_until_unique=True, n_jobs=1):
         '''
         parents_list: a list of lists of parents.
         var_op_list: a list of var_ops to apply to each list of parents. Should be the same length as parents_list.
@@ -322,7 +322,7 @@ def create_offspring(self, parents_list, var_op_list, rng=None, add_to_populatio
 
 
     #TODO should we just generate one offspring per crossover?
-    def create_offspring2(self, parents_list, var_op_list, mutation_functions,mutation_function_weights, crossover_functions,crossover_function_weights, rng=None, add_to_population=True, keep_repeats=False, mutate_until_unique=True):
+    def create_offspring2(self, parents_list, var_op_list, mutation_functions,mutation_function_weights, crossover_functions,crossover_function_weights, rng, add_to_population=True, keep_repeats=False, mutate_until_unique=True):
 
         rng = np.random.default_rng(rng)
         new_offspring = []
@@ -405,7 +405,7 @@ def create_offspring2(self, parents_list, var_op_list, mutation_functions,mutati
 def get_id(individual):
     return individual.unique_id()
 
-def parallel_create_offspring(parents_list, var_op_list, rng=None, n_jobs=1):
+def parallel_create_offspring(parents_list, var_op_list, rng, n_jobs=1):
     rng = np.random.default_rng(rng)
     if n_jobs == 1:
         return nonparallel_create_offpring(parents_list, var_op_list, rng=rng)
@@ -421,7 +421,7 @@ def parallel_create_offspring(parents_list, var_op_list, rng=None, n_jobs=1):
                                 num_workers=n_jobs, threads_per_worker=1)
         return offspring
 
-def nonparallel_create_offpring(parents_list, var_op_list, rng=None, n_jobs=1):
+def nonparallel_create_offpring(parents_list, var_op_list, rng, n_jobs=1):
     rng = np.random.default_rng(rng)
     offspring = []
     for parents, var_op in zip(parents_list,var_op_list):
@@ -435,23 +435,23 @@ def nonparallel_create_offpring(parents_list, var_op_list, rng=None, n_jobs=1):
 
 
 
-def copy_and_change(parents, var_op, rng=None):
+def copy_and_change(parents, var_op, rng):
     rng = np.random.default_rng(rng)
     offspring = copy.deepcopy(parents)
     offspring = var_op(offspring, rng=rng)
     if isinstance(offspring, collections.abc.Iterable):
         offspring = offspring[0]
     return offspring
 
-def copy_and_mutate(parents, var_op, rng=None):
+def copy_and_mutate(parents, var_op, rng):
     rng = np.random.default_rng(rng)
     offspring = copy.deepcopy(parents)
     var_op(offspring, rng=rng)
     if isinstance(offspring, collections.abc.Iterable):
         offspring = offspring[0]
     return offspring
 
-def copy_and_crossover(parents, var_op, rng=None):
+def copy_and_crossover(parents, var_op, rng):
     rng = np.random.default_rng(rng)
     offspring = copy.deepcopy(parents)
     var_op(offspring[0],offspring[1], rng=rng)

diff --git a/tpot2/search_spaces/base.py b/tpot2/search_spaces/base.py
@@ -29,7 +29,7 @@ def validate_same_type(func):
         def wrapper(self, other, rng=None, **kwargs):
             if not isinstance(other, type(self)):
                 return False
-            return func(self, other, rng=None, **kwargs)
+            return func(self, other, rng=rng, **kwargs)
 
         return wrapper
 

diff --git a/tpot2/search_spaces/nodes/estimator_node_gradual.py b/tpot2/search_spaces/nodes/estimator_node_gradual.py
@@ -124,7 +124,7 @@ def gradual_hyperparameter_update(params:dict, configspace:ConfigurationSpace, r
                     new_params[param] = configspace[param].upper
             #if parameter is integer, add normal distribution
             elif issubclass(type(configspace[param]), ConfigSpace.hyperparameters.hyperparameter.IntegerHyperparameter):
-                new_params[param] = params[param] * np.random.normal(0, 1)
+                new_params[param] = params[param] * rng.normal(0, 1)
                 # if check if above or below min and cap
                 if new_params[param] < configspace[param].lower:
                     new_params[param] = configspace[param].lower

diff --git a/tpot2/search_spaces/pipelines/choice.py b/tpot2/search_spaces/pipelines/choice.py
@@ -10,9 +10,9 @@
 class ChoicePipelineIndividual(SklearnIndividual):
     def __init__(self, search_spaces : List[SklearnIndividualGenerator], rng=None) -> None:
         super().__init__()
-
+        rng = np.random.default_rng(rng)
         self.search_spaces = search_spaces
-        self.node = np.random.default_rng(rng).choice(self.search_spaces).generate(rng=rng)
+        self.node = rng.choice(self.search_spaces).generate(rng=rng)
 
 
     def mutate(self, rng=None):
@@ -23,7 +23,8 @@ def mutate(self, rng=None):
             return self._mutate_node(rng)
 
     def _mutate_select_new_node(self, rng=None):
-        self.node = random.choice(self.search_spaces).generate(rng=rng)
+        rng = np.random.default_rng(rng)
+        self.node = rng.choice(self.search_spaces).generate(rng=rng)
         return True
 
     def _mutate_node(self, rng=None):

diff --git a/tpot2/search_spaces/pipelines/sequential.py b/tpot2/search_spaces/pipelines/sequential.py
@@ -25,15 +25,13 @@ def __init__(self, search_spaces : List[SklearnIndividualGenerator], memory=None
 
     #TODO, mutate all steps or just one?
     def mutate(self, rng=None):
-        rng = np.random.default_rng(rng)
-
         # mutated = False
         # for step in self.pipeline:
         #     if rng.random() < 0.5:
         #         if step.mutate(rng):
         #             mutated = True
         # return mutated
-
+        rng = np.random.default_rng(rng)
         step = rng.choice(self.pipeline)
         return step.mutate(rng)