From 102cdd537cbf8d57d25ee65cf65cd6c832ac7fce Mon Sep 17 00:00:00 2001
From: perib <pedroribeiroh@gmail.com>
Date: Mon, 8 Jul 2024 17:07:10 -0700
Subject: [PATCH 01/12] removed unused files, passed rng into default_rng

---
 .../nodes/estimator_node_custom_sampler.py    |  59 -------
 .../nodes/estimator_node_simple.py            |  70 --------
 .../nodes/genetic_feature_selection.py        |   8 +-
 .../search_spaces/pipelines/dynamic_linear.py |  18 +-
 .../pipelines/dynamicunion copy.py            | 165 ++++++++++++++++++
 tpot2/search_spaces/pipelines/dynamicunion.py |  18 +-
 .../pipelines/genetic_sample_weight.py        |   1 -
 .../pipelines/hierarchical_individual.py      |   1 -
 tpot2/search_spaces/pipelines/sequential.py   |  12 +-
 tpot2/search_spaces/pipelines/union.py        |  10 +-
 10 files changed, 198 insertions(+), 164 deletions(-)
 delete mode 100644 tpot2/search_spaces/nodes/estimator_node_custom_sampler.py
 delete mode 100644 tpot2/search_spaces/nodes/estimator_node_simple.py
 create mode 100644 tpot2/search_spaces/pipelines/dynamicunion copy.py
 delete mode 100644 tpot2/search_spaces/pipelines/genetic_sample_weight.py
 delete mode 100644 tpot2/search_spaces/pipelines/hierarchical_individual.py

diff --git a/tpot2/search_spaces/nodes/estimator_node_custom_sampler.py b/tpot2/search_spaces/nodes/estimator_node_custom_sampler.py
deleted file mode 100644
index 93a55a4e..00000000
--- a/tpot2/search_spaces/nodes/estimator_node_custom_sampler.py
+++ /dev/null
@@ -1,59 +0,0 @@
-# try https://automl.github.io/ConfigSpace/main/api/hyperparameters.html
-import tpot2
-import numpy as np
-import pandas as pd
-import sklearn
-from tpot2 import config
-from typing import Generator, List, Tuple, Union
-import random
-from ..base import SklearnIndividual, SklearnIndividualGenerator, check_same_subclass
-from ConfigSpace import ConfigurationSpace
-
-
-class EstimatorNodeCustomIndividual(SklearnIndividual):
-    def __init__(self, method: type, 
-                        sample_func : callable, 
-                        rng=None) -> None:
-        super().__init__()
-        self.method = method
-        self.sample_func = sample_func
-        
-        self.hyperparameters = self.sample_func(rng)
-
-    def mutate(self, rng=None):    
-        rng = np.random.default_rng(rng)
-        self.hyperparameters = self.sample_func(rng)
-        return True
-
-    def _crossover(self, other, rng=None):
-        rng = np.random.default_rng(rng)
-        if self.method != other.method:
-            return False
-
-        #loop through hyperparameters, randomly swap items in self.hyperparameters with items in other.hyperparameters
-        for hyperparameter in self.space:
-            if rng.choice([True, False]):
-                if hyperparameter in other.hyperparameters:
-                    self.hyperparameters[hyperparameter] = other.hyperparameters[hyperparameter]
-
-    def export_pipeline(self, **kwargs):
-        return self.method(**self.hyperparameters)
-    
-    def unique_id(self):
-        #return a dictionary of the method and the hyperparameters
-        method_str = self.method.__name__
-        params = list(self.hyperparameters.keys())
-        params = sorted(params)
-
-        id_str = f"{method_str}({', '.join([f'{param}={self.hyperparameters[param]}' for param in params])})"
-        
-        return id_str
-
-class EstimatorNodeCustom(SklearnIndividualGenerator):
-    def __init__(self,  method : type, 
-                        sample_func: callable):
-        self.method = method
-        self.sample_func = sample_func
-
-    def generate(self, rng=None):
-        return EstimatorNodeCustomIndividual(self.method, self.sample_func)
\ No newline at end of file
diff --git a/tpot2/search_spaces/nodes/estimator_node_simple.py b/tpot2/search_spaces/nodes/estimator_node_simple.py
deleted file mode 100644
index 8063526a..00000000
--- a/tpot2/search_spaces/nodes/estimator_node_simple.py
+++ /dev/null
@@ -1,70 +0,0 @@
-# try https://automl.github.io/ConfigSpace/main/api/hyperparameters.html
-import tpot2
-import numpy as np
-import pandas as pd
-import sklearn
-from tpot2 import config
-from typing import Generator, List, Tuple, Union
-import random
-from ..base import SklearnIndividual, SklearnIndividualGenerator
-
-class EstimatorNodeIndividual(SklearnIndividual):
-    def __init__(self, method, space ) -> None:
-        super().__init__()
-        self.method = method
-        self.space = space #a dictionary. keys are hyperparameters, values are the space of the hyperparameter. If list, then hyperparameter is categorical. If tuple, then hyperparameter is continuous. If single value, then hyperparameter is fixed.
-        
-        self._mutate_hyperparameters()
-
-    def mutate(self, rng=None):
-        rng = np.random.default_rng(rng)
-        return self._mutate_hyperparameters(rng)
-    
-    def _mutate_hyperparameters(self, rng=None):
-        rng = np.random.default_rng(rng)
-        self.hyperparameters = {}
-        #sample new hyperparameters from the space
-        for hyperparameter in self.space:
-            hyperparameter_space = self.space[hyperparameter]
-            if isinstance(hyperparameter_space, list):
-                hp = rng.choice(hyperparameter_space)
-            elif isinstance(hyperparameter_space, tuple):
-                hp = rng.uniform(hyperparameter_space[0], hyperparameter_space[1])
-            else:
-                hp = hyperparameter_space
-
-            self.hyperparameters[hyperparameter] = hp
-            
-        return True
-
-    def _crossover(self, other, rng=None):
-        rng = np.random.default_rng(rng)
-        if self.method != other.method:
-            return False
-
-        #loop through hyperparameters, randomly swap items in self.hyperparameters with items in other.hyperparameters
-        for hyperparameter in self.space:
-            if rng.choice([True, False]):
-                if hyperparameter in other.hyperparameters:
-                    self.hyperparameters[hyperparameter] = other.hyperparameters[hyperparameter]
-
-    def export_pipeline(self, **kwargs):
-        return self.method(**self.hyperparameters)
-    
-    def unique_id(self):
-        #return a dictionary of the method and the hyperparameters
-        method_str = self.method.__name__
-        params = list(self.hyperparameters.keys())
-        params = sorted(params)
-
-        id_str = f"{method_str}({', '.join([f'{param}={self.hyperparameters[param]}' for param in params])})"
-        
-        return id_str
-
-class EstimatorNode(SklearnIndividualGenerator):
-    def __init__(self, method, space):
-        self.method = method
-        self.space = space
-
-    def generate(self, rng=None):
-        return EstimatorNodeIndividual(self.method, self.space)
\ No newline at end of file
diff --git a/tpot2/search_spaces/nodes/genetic_feature_selection.py b/tpot2/search_spaces/nodes/genetic_feature_selection.py
index 0fe16586..9e36e666 100644
--- a/tpot2/search_spaces/nodes/genetic_feature_selection.py
+++ b/tpot2/search_spaces/nodes/genetic_feature_selection.py
@@ -31,16 +31,16 @@ def __init__(   self,
                     start_p=0.2,
                     mutation_rate = 0.5,
                     crossover_rate = 0.5,
-                    mutation_rate_rate = 0,
-                    crossover_rate_rate = 0,
                     rng=None,
                 ):
 
         self.start_p = start_p
         self.mutation_rate = mutation_rate
         self.crossover_rate = crossover_rate
-        self.mutation_rate_rate = mutation_rate_rate
-        self.crossover_rate_rate = crossover_rate_rate
+        self.mutation_rate_rate = 0
+        self.crossover_rate_rate = 0
+
+
 
         rng = np.random.default_rng(rng)
 
diff --git a/tpot2/search_spaces/pipelines/dynamic_linear.py b/tpot2/search_spaces/pipelines/dynamic_linear.py
index 6da90d81..79ccedef 100644
--- a/tpot2/search_spaces/pipelines/dynamic_linear.py
+++ b/tpot2/search_spaces/pipelines/dynamic_linear.py
@@ -26,7 +26,7 @@ def __init__(self, search_space : SklearnIndividualGenerator, max_length: int ,
         self.pipeline = self._generate_pipeline(rng)
 
     def _generate_pipeline(self, rng=None):
-        rng = np.random.default_rng()
+        rng = np.random.default_rng(rng)
         pipeline = []
         length = rng.integers(self.min_length, self.max_length)
         length = min(length, 3)
@@ -37,7 +37,7 @@ def _generate_pipeline(self, rng=None):
     
 
     def mutate(self, rng=None):
-        rng = np.random.default_rng()
+        rng = np.random.default_rng(rng)
         options = []
         if len(self.pipeline) > self.min_length:
             options.append(self._mutate_remove_node)
@@ -48,19 +48,19 @@ def mutate(self, rng=None):
         return rng.choice(options)(rng)
     
     def _mutate_add_node(self, rng=None):
-        rng = np.random.default_rng()
+        rng = np.random.default_rng(rng)
         new_node = self.search_space.generate(rng)
         idx = rng.integers(len(self.pipeline))
         self.pipeline.insert(idx, new_node)
 
     def _mutate_remove_node(self, rng=None):
-        rng = np.random.default_rng()
+        rng = np.random.default_rng(rng)
         idx = rng.integers(len(self.pipeline))
         self.pipeline.pop(idx)
 
     def _mutate_step(self, rng=None):
         #choose a random step in the pipeline and mutate it
-        rng = np.random.default_rng()
+        rng = np.random.default_rng(rng)
         step = rng.choice(self.pipeline)
         return step.mutate(rng)
     
@@ -68,7 +68,7 @@ def _mutate_step(self, rng=None):
     def _crossover(self, other, rng=None):
         #swap a random step in the pipeline with the corresponding step in the other pipeline
 
-        rng = np.random.default_rng()
+        rng = np.random.default_rng(rng)
         cx_funcs = [self._crossover_swap_random_steps, self._crossover_inner_step]
 
         rng.shuffle(cx_funcs)
@@ -79,7 +79,7 @@ def _crossover(self, other, rng=None):
         return False
     
     def _crossover_swap_random_steps(self, other, rng):
-        rng = np.random.default_rng()
+        rng = np.random.default_rng(rng)
 
         max_steps = int(min(len(self.pipeline), len(other.pipeline))/2)
         max_steps = max(max_steps, 1)
@@ -106,14 +106,14 @@ def _crossover_swap_step(self, other, rng):
         if len(self.pipeline) < 2:
             return False
         
-        rng = np.random.default_rng()
+        rng = np.random.default_rng(rng)
         idx = rng.integers(1,len(self.pipeline))
 
         self.pipeline[idx], other.pipeline[idx] = other.pipeline[idx], self.pipeline[idx]
         return True
 
     def _crossover_inner_step(self, other, rng):
-        rng = np.random.default_rng()
+        rng = np.random.default_rng(rng)
         
         pipeline1_indexes= list(range(len(self.pipeline)))
         pipeline2_indexes= list(range(len(other.pipeline)))
diff --git a/tpot2/search_spaces/pipelines/dynamicunion copy.py b/tpot2/search_spaces/pipelines/dynamicunion copy.py
new file mode 100644
index 00000000..7951c25c
--- /dev/null
+++ b/tpot2/search_spaces/pipelines/dynamicunion copy.py	
@@ -0,0 +1,165 @@
+import tpot2
+import numpy as np
+import pandas as pd
+import sklearn
+from tpot2 import config
+from typing import Generator, List, Tuple, Union
+import random
+from ..base import SklearnIndividual, SklearnIndividualGenerator
+from ..tuple_index import TupleIndex
+
+class DynamicUnionPipelineIndividual(SklearnIndividual):
+    """
+    Takes in one search space.
+    Will produce a FeatureUnion of up to max_estimators number of steps.
+    The output of the FeatureUnion will the all of the steps concatenated together.
+    
+    """
+
+    def __init__(self, search_space : SklearnIndividualGenerator, max_estimators=None, rng=None) -> None:
+        super().__init__()
+        self.search_space = search_space
+        
+        if max_estimators is None:
+            self.max_estimators = np.inf
+        else:
+            self.max_estimators = max_estimators
+
+        self.pipeline = []
+        
+        if self.max_estimators == np.inf:
+            init_max = 3
+        else:
+            init_max = self.max_estimators
+
+        rng = np.random.default_rng(rng)
+
+        for _ in range(rng.integers(1, init_max)):
+            self.pipeline.append(self.search_space.generate(rng))
+    
+    def mutate(self, rng=None):
+        rng = np.random.default_rng(rng)
+        mutation_funcs = [self._mutate_add_step, self._mutate_remove_step, self._mutate_replace_step, self._mutate_inner_step]
+        rng.shuffle(mutation_funcs)
+        for mutation_func in mutation_funcs:
+            if mutation_func(rng):
+                return True
+    
+    def _mutate_add_step(self, rng):
+        rng = np.random.default_rng(rng)
+        if len(self.pipeline) < self.max_estimators:
+            self.pipeline.append(self.search_space.generate(rng))
+            return True
+        return False
+    
+    def _mutate_remove_step(self, rng):
+        rng = np.random.default_rng(rng)
+        if len(self.pipeline) > 1:
+            self.pipeline.pop(rng.integers(0, len(self.pipeline)))
+            return True
+        return False
+
+    def _mutate_replace_step(self, rng):
+        rng = np.random.default_rng(rng)
+        idx = rng.integers(0, len(self.pipeline))
+        self.pipeline[idx] = self.search_space.generate(rng)
+        return True
+    
+    #TODO mutate one step or multiple?
+    def _mutate_inner_step(self, rng):
+        rng = np.random.default_rng(rng)
+        indexes = rng.random(len(self.pipeline)) < 0.5
+        indexes = np.where(indexes)[0]
+        mutated = False
+        if len(indexes) > 0:
+            for idx in indexes:
+                if self.pipeline[idx].mutate(rng):
+                    mutated = True
+        else:
+            mutated = self.pipeline[rng.integers(0, len(self.pipeline))].mutate(rng)
+
+        return mutated
+
+
+    def _crossover(self, other, rng=None):
+        rng = np.random.default_rng(rng)
+
+        cx_funcs = [self._crossover_swap_random_steps, self._crossover_inner_step]
+        rng.shuffle(cx_funcs)
+        for cx_func in cx_funcs:
+            if cx_func(other, rng):
+                return True
+
+        return False
+    
+    def _crossover_swap_step(self, other, rng):
+        rng = np.random.default_rng(rng)
+        idx = rng.integers(1,len(self.pipeline))
+        idx2 = rng.integers(1,len(other.pipeline))
+
+        self.pipeline[idx], other.pipeline[idx2] = other.pipeline[idx2], self.pipeline[idx]
+        # self.pipeline[idx] = other.pipeline[idx2]
+        return True
+    
+    def _crossover_swap_random_steps(self, other, rng):
+        rng = np.random.default_rng(rng)
+
+        max_steps = int(min(len(self.pipeline), len(other.pipeline))/2)
+        max_steps = max(max_steps, 1)
+        
+        if max_steps == 1:
+            n_steps_to_swap = 1
+        else:
+            n_steps_to_swap = rng.integers(1, max_steps)
+
+        other_indexes_to_take = rng.choice(len(other.pipeline), n_steps_to_swap, replace=False)
+        self_indexes_to_replace = rng.choice(len(self.pipeline), n_steps_to_swap, replace=False)
+
+        # self.pipeline[self_indexes_to_replace], other.pipeline[other_indexes_to_take] = other.pipeline[other_indexes_to_take], self.pipeline[self_indexes_to_replace]
+        
+        for self_idx, other_idx in zip(self_indexes_to_replace, other_indexes_to_take):
+            self.pipeline[self_idx], other.pipeline[other_idx] = other.pipeline[other_idx], self.pipeline[self_idx]
+        
+        return True
+        
+
+
+    def _crossover_inner_step(self, other, rng):
+        rng = np.random.default_rng(rng)
+        
+        #randomly select pairs of steps to crossover
+        indexes = list(range(1, len(self.pipeline)))
+        other_indexes = list(range(1, len(other.pipeline)))
+        #shuffle
+        rng.shuffle(indexes)
+        rng.shuffle(other_indexes)
+
+        crossover_success = False
+        for idx, other_idx in zip(indexes, other_indexes):
+            if self.pipeline[idx].crossover(other.pipeline[other_idx], rng):
+                crossover_success = True
+                
+        return crossover_success
+    
+    def export_pipeline(self):
+        return sklearn.pipeline.make_union(*[step.export_pipeline() for step in self.pipeline])
+    
+    def unique_id(self):
+        l = [step.unique_id() for step in self.pipeline]
+        # if all items are strings, then sort them
+        if all([isinstance(x, str) for x in l]):
+            l.sort()
+        l = ["FeatureUnion"] + l
+        return TupleIndex(tuple(l))
+
+
+class DynamicUnionPipeline(SklearnIndividualGenerator):
+    def __init__(self, search_spaces : List[SklearnIndividualGenerator] ) -> None:
+        """
+        Takes in a list of search spaces. will produce a pipeline of Sequential length. Each step in the pipeline will correspond to the the search space provided in the same index.
+        """
+        
+        self.search_spaces = search_spaces
+
+    def generate(self, rng=None):
+        return DynamicUnionPipelineIndividual(self.search_spaces)
\ No newline at end of file
diff --git a/tpot2/search_spaces/pipelines/dynamicunion.py b/tpot2/search_spaces/pipelines/dynamicunion.py
index 48fa9669..401c16ef 100644
--- a/tpot2/search_spaces/pipelines/dynamicunion.py
+++ b/tpot2/search_spaces/pipelines/dynamicunion.py
@@ -41,7 +41,7 @@ def __init__(self, search_space : SklearnIndividualGenerator, max_estimators=Non
             
     
     def mutate(self, rng=None):
-        rng = np.random.default_rng()
+        rng = np.random.default_rng(rng)
         mutation_funcs = [self._mutate_add_step, self._mutate_remove_step, self._mutate_replace_step, self._mutate_inner_step]
         rng.shuffle(mutation_funcs)
         for mutation_func in mutation_funcs:
@@ -49,7 +49,7 @@ def mutate(self, rng=None):
                 return True
     
     def _mutate_add_step(self, rng):
-        rng = np.random.default_rng()
+        rng = np.random.default_rng(rng)
         max_attempts = 10
         if len(self.union_dict) < self.max_estimators:
             for _ in range(max_attempts):
@@ -60,20 +60,20 @@ def _mutate_add_step(self, rng):
         return False
     
     def _mutate_remove_step(self, rng):
-        rng = np.random.default_rng()
+        rng = np.random.default_rng(rng)
         if len(self.union_dict) > 1:
             self.union_dict.pop( rng.choice(list(self.union_dict.keys())))  
             return True
         return False
 
     def _mutate_replace_step(self, rng):
-        rng = np.random.default_rng()        
+        rng = np.random.default_rng(rng)        
         changed = self._mutate_remove_step(rng) or self._mutate_add_step(rng)
         return changed
     
     #TODO mutate one step or multiple?
     def _mutate_inner_step(self, rng):
-        rng = np.random.default_rng()
+        rng = np.random.default_rng(rng)
         changed = False
         values = list(self.union_dict.values())
         for step in values:
@@ -86,7 +86,7 @@ def _mutate_inner_step(self, rng):
 
 
     def _crossover(self, other, rng=None):
-        rng = np.random.default_rng()
+        rng = np.random.default_rng(rng)
 
         cx_funcs = [self._crossover_swap_random_steps, self._crossover_inner_step]
         rng.shuffle(cx_funcs)
@@ -97,7 +97,7 @@ def _crossover(self, other, rng=None):
         return False
     
     def _crossover_swap_step(self, other, rng):
-        rng = np.random.default_rng()
+        rng = np.random.default_rng(rng)
         changed = False
 
         self_step = rng.choice(list(self.union_dict.values()))
@@ -118,7 +118,7 @@ def _crossover_swap_step(self, other, rng):
 
     
     def _crossover_swap_random_steps(self, other, rng):
-        rng = np.random.default_rng()
+        rng = np.random.default_rng(rng)
         self_values = list(self.union_dict.values())
         other_values = list(other.union_dict.values())
 
@@ -137,7 +137,7 @@ def _crossover_swap_random_steps(self, other, rng):
 
 
     def _crossover_inner_step(self, other, rng):
-        rng = np.random.default_rng()
+        rng = np.random.default_rng(rng)
         
         changed = False
         self_values = list(self.union_dict.values())
diff --git a/tpot2/search_spaces/pipelines/genetic_sample_weight.py b/tpot2/search_spaces/pipelines/genetic_sample_weight.py
deleted file mode 100644
index db731a85..00000000
--- a/tpot2/search_spaces/pipelines/genetic_sample_weight.py
+++ /dev/null
@@ -1 +0,0 @@
-from ..base import SklearnIndividual, SklearnIndividualGenerator
\ No newline at end of file
diff --git a/tpot2/search_spaces/pipelines/hierarchical_individual.py b/tpot2/search_spaces/pipelines/hierarchical_individual.py
deleted file mode 100644
index db731a85..00000000
--- a/tpot2/search_spaces/pipelines/hierarchical_individual.py
+++ /dev/null
@@ -1 +0,0 @@
-from ..base import SklearnIndividual, SklearnIndividualGenerator
\ No newline at end of file
diff --git a/tpot2/search_spaces/pipelines/sequential.py b/tpot2/search_spaces/pipelines/sequential.py
index da52222a..7a7e6a99 100644
--- a/tpot2/search_spaces/pipelines/sequential.py
+++ b/tpot2/search_spaces/pipelines/sequential.py
@@ -25,7 +25,7 @@ def __init__(self, search_spaces : List[SklearnIndividualGenerator], memory=None
         
     #TODO, mutate all steps or just one?
     def mutate(self, rng=None):
-        rng = np.random.default_rng()
+        rng = np.random.default_rng(rng)
 
         # mutated = False
         # for step in self.pipeline:
@@ -43,7 +43,7 @@ def _crossover(self, other, rng=None):
         if len(self.pipeline) != len(other.pipeline):
             return False
 
-        rng = np.random.default_rng()
+        rng = np.random.default_rng(rng)
         cx_funcs = [self._crossover_swap_random_steps, self._crossover_swap_segment, self._crossover_inner_step]
 
         rng.shuffle(cx_funcs)
@@ -58,7 +58,7 @@ def _crossover_swap_step(self, other, rng):
             return False
         
         
-        rng = np.random.default_rng()
+        rng = np.random.default_rng(rng)
         idx = rng.integers(1,len(self.pipeline))
 
         self.pipeline[idx], other.pipeline[idx] = other.pipeline[idx], self.pipeline[idx]
@@ -72,7 +72,7 @@ def _crossover_swap_random_steps(self, other, rng):
         if len(self.pipeline) < 2:
             return False
     
-        rng = np.random.default_rng()
+        rng = np.random.default_rng(rng)
 
         max_steps = int(min(len(self.pipeline), len(other.pipeline))/2)
         max_steps = max(max_steps, 1)
@@ -97,7 +97,7 @@ def _crossover_swap_segment(self, other, rng):
         if len(self.pipeline) < 2:
             return False
         
-        rng = np.random.default_rng()
+        rng = np.random.default_rng(rng)
         idx = rng.integers(1,len(self.pipeline))
 
         left = rng.choice([True, False])
@@ -109,7 +109,7 @@ def _crossover_swap_segment(self, other, rng):
         return True
     
     def _crossover_inner_step(self, other, rng):
-        rng = np.random.default_rng()
+        rng = np.random.default_rng(rng)
         
         # crossover_success = False
         # for idx in range(len(self.pipeline)):
diff --git a/tpot2/search_spaces/pipelines/union.py b/tpot2/search_spaces/pipelines/union.py
index 1e1a58c8..32f988e6 100644
--- a/tpot2/search_spaces/pipelines/union.py
+++ b/tpot2/search_spaces/pipelines/union.py
@@ -25,14 +25,14 @@ def __init__(self, search_spaces : List[SklearnIndividualGenerator], rng=None) -
             self.pipeline.append(space.generate(rng))
     
     def mutate(self, rng=None):
-        rng = np.random.default_rng()
+        rng = np.random.default_rng(rng)
         step = rng.choice(self.pipeline)
         return step.mutate(rng)
      
 
     def _crossover(self, other, rng=None):
         #swap a random step in the pipeline with the corresponding step in the other pipeline
-        rng = np.random.default_rng()
+        rng = np.random.default_rng(rng)
 
         cx_funcs = [self._crossover_inner_step]
         rng.shuffle(cx_funcs)
@@ -43,14 +43,14 @@ def _crossover(self, other, rng=None):
         return False
     
     def _crossover_swap_step(self, other, rng):
-        rng = np.random.default_rng()
+        rng = np.random.default_rng(rng)
         idx = rng.integers(1,len(self.pipeline))
 
         self.pipeline[idx], other.pipeline[idx] = other.pipeline[idx], self.pipeline[idx]
         return True
     
     def _crossover_swap_random_steps(self, other, rng):
-        rng = np.random.default_rng()
+        rng = np.random.default_rng(rng)
 
         max_steps = int(min(len(self.pipeline), len(other.pipeline))/2)
         max_steps = max(max_steps, 1)
@@ -71,7 +71,7 @@ def _crossover_swap_random_steps(self, other, rng):
         return True
 
     def _crossover_inner_step(self, other, rng):
-        rng = np.random.default_rng()
+        rng = np.random.default_rng(rng)
         
         crossover_success = False
         for idx in range(len(self.pipeline)):

From 2b59ec8c783cb87457cbad467bb32f6a98e5fcae Mon Sep 17 00:00:00 2001
From: perib <pedroribeiroh@gmail.com>
Date: Mon, 8 Jul 2024 17:36:36 -0700
Subject: [PATCH 02/12] wrapper now crossover over hyperparametesr

---
 tpot2/search_spaces/pipelines/wrapper.py | 53 +++++++++++++++++++-----
 1 file changed, 42 insertions(+), 11 deletions(-)

diff --git a/tpot2/search_spaces/pipelines/wrapper.py b/tpot2/search_spaces/pipelines/wrapper.py
index 1cd33bf3..7b49e182 100644
--- a/tpot2/search_spaces/pipelines/wrapper.py
+++ b/tpot2/search_spaces/pipelines/wrapper.py
@@ -9,6 +9,11 @@
 from ConfigSpace import ConfigurationSpace
 from ..tuple_index import TupleIndex
 
+NONE_SPECIAL_STRING = "<NONE>"
+TRUE_SPECIAL_STRING = "<TRUE>"
+FALSE_SPECIAL_STRING = "<FALSE>"
+
+
 class WrapperPipelineIndividual(SklearnIndividual):
     def __init__(
             self, 
@@ -18,14 +23,7 @@ def __init__(
             hyperparameter_parser: callable = None,
             wrapped_param_name: str = None,
             rng=None) -> None:
-
-
-
         super().__init__()
-        
-        
-        
-
 
         self.method = method
         self.space = space
@@ -33,7 +31,6 @@ def __init__(
         self.hyperparameters_parser = hyperparameter_parser
         self.wrapped_param_name = wrapped_param_name
 
-
         rng = np.random.default_rng(rng)
         self.node = self.estimator_search_space.generate(rng)
         
@@ -44,8 +41,7 @@ def __init__(
             self.space.seed(rng.integers(0, 2**32))
             self.hyperparameters = dict(self.space.sample_configuration())
 
-        
-        
+        self.check_hyperparameters_for_None()
 
     def mutate(self, rng=None):
         rng = np.random.default_rng(rng)
@@ -60,14 +56,49 @@ def _mutate_hyperparameters(self, rng=None):
         rng = np.random.default_rng(rng)
         self.space.seed(rng.integers(0, 2**32))
         self.hyperparameters = dict(self.space.sample_configuration())
+        self.check_hyperparameters_for_None()
         return True
     
     def _mutate_node(self, rng=None):
         return self.node.mutate(rng)
 
     def _crossover(self, other, rng=None):
-        return self.node.crossover(other.node, rng)
+        if rng.choice([True, False]):
+            return self._crossover_hyperparameters(other, rng)
+        else:
+            self.estimator_search_space.crossover(other.estimator_search_space, rng)
     
+
+    def _crossover_hyperparameters(self, other, rng=None):
+        if isinstance(self.space, dict):
+            return False
+        
+        rng = np.random.default_rng(rng)
+        if self.method != other.method:
+            return False
+
+        #loop through hyperparameters, randomly swap items in self.hyperparameters with items in other.hyperparameters
+        for hyperparameter in self.space:
+            if rng.choice([True, False]):
+                if hyperparameter in other.hyperparameters:
+                    self.hyperparameters[hyperparameter] = other.hyperparameters[hyperparameter]
+
+        self.check_hyperparameters_for_None()
+
+        return True
+
+    def check_hyperparameters_for_None(self):
+        for key, value in self.hyperparameters.items():
+            #if string
+            if isinstance(value, str):
+                if value == NONE_SPECIAL_STRING:
+                    self.hyperparameters[key] = None
+                elif value == TRUE_SPECIAL_STRING:
+                    self.hyperparameters[key] = True
+                elif value == FALSE_SPECIAL_STRING:
+                    self.hyperparameters[key] = False
+
+
     def export_pipeline(self):
         
         if self.hyperparameters_parser is not None:

From 1b63414fdae8898d837aa582806219e61ea2edaa Mon Sep 17 00:00:00 2001
From: perib <pedroribeiroh@gmail.com>
Date: Mon, 8 Jul 2024 17:36:49 -0700
Subject: [PATCH 03/12] made naming clearer

---
 .../search_spaces/pipelines/dynamic_linear.py |   8 +-
 .../pipelines/dynamicunion copy.py            | 165 ------------------
 tpot2/search_spaces/pipelines/dynamicunion.py |  12 +-
 tpot2/search_spaces/pipelines/sequential.py   |   8 +-
 tpot2/search_spaces/pipelines/union.py        |  27 +--
 5 files changed, 17 insertions(+), 203 deletions(-)
 delete mode 100644 tpot2/search_spaces/pipelines/dynamicunion copy.py

diff --git a/tpot2/search_spaces/pipelines/dynamic_linear.py b/tpot2/search_spaces/pipelines/dynamic_linear.py
index 79ccedef..2ff2bf0b 100644
--- a/tpot2/search_spaces/pipelines/dynamic_linear.py
+++ b/tpot2/search_spaces/pipelines/dynamic_linear.py
@@ -69,7 +69,7 @@ def _crossover(self, other, rng=None):
         #swap a random step in the pipeline with the corresponding step in the other pipeline
 
         rng = np.random.default_rng(rng)
-        cx_funcs = [self._crossover_swap_random_steps, self._crossover_inner_step]
+        cx_funcs = [self._crossover_swap_multiple_nodes, self._crossover_node]
 
         rng.shuffle(cx_funcs)
         for cx_func in cx_funcs:
@@ -78,7 +78,7 @@ def _crossover(self, other, rng=None):
             
         return False
     
-    def _crossover_swap_random_steps(self, other, rng):
+    def _crossover_swap_multiple_nodes(self, other, rng):
         rng = np.random.default_rng(rng)
 
         max_steps = int(min(len(self.pipeline), len(other.pipeline))/2)
@@ -99,7 +99,7 @@ def _crossover_swap_random_steps(self, other, rng):
         
         return True
 
-    def _crossover_swap_step(self, other, rng):
+    def _crossover_swap_node(self, other, rng):
         if len(self.pipeline) != len(other.pipeline):
             return False
         
@@ -112,7 +112,7 @@ def _crossover_swap_step(self, other, rng):
         self.pipeline[idx], other.pipeline[idx] = other.pipeline[idx], self.pipeline[idx]
         return True
 
-    def _crossover_inner_step(self, other, rng):
+    def _crossover_node(self, other, rng):
         rng = np.random.default_rng(rng)
         
         pipeline1_indexes= list(range(len(self.pipeline)))
diff --git a/tpot2/search_spaces/pipelines/dynamicunion copy.py b/tpot2/search_spaces/pipelines/dynamicunion copy.py
deleted file mode 100644
index 7951c25c..00000000
--- a/tpot2/search_spaces/pipelines/dynamicunion copy.py	
+++ /dev/null
@@ -1,165 +0,0 @@
-import tpot2
-import numpy as np
-import pandas as pd
-import sklearn
-from tpot2 import config
-from typing import Generator, List, Tuple, Union
-import random
-from ..base import SklearnIndividual, SklearnIndividualGenerator
-from ..tuple_index import TupleIndex
-
-class DynamicUnionPipelineIndividual(SklearnIndividual):
-    """
-    Takes in one search space.
-    Will produce a FeatureUnion of up to max_estimators number of steps.
-    The output of the FeatureUnion will the all of the steps concatenated together.
-    
-    """
-
-    def __init__(self, search_space : SklearnIndividualGenerator, max_estimators=None, rng=None) -> None:
-        super().__init__()
-        self.search_space = search_space
-        
-        if max_estimators is None:
-            self.max_estimators = np.inf
-        else:
-            self.max_estimators = max_estimators
-
-        self.pipeline = []
-        
-        if self.max_estimators == np.inf:
-            init_max = 3
-        else:
-            init_max = self.max_estimators
-
-        rng = np.random.default_rng(rng)
-
-        for _ in range(rng.integers(1, init_max)):
-            self.pipeline.append(self.search_space.generate(rng))
-    
-    def mutate(self, rng=None):
-        rng = np.random.default_rng(rng)
-        mutation_funcs = [self._mutate_add_step, self._mutate_remove_step, self._mutate_replace_step, self._mutate_inner_step]
-        rng.shuffle(mutation_funcs)
-        for mutation_func in mutation_funcs:
-            if mutation_func(rng):
-                return True
-    
-    def _mutate_add_step(self, rng):
-        rng = np.random.default_rng(rng)
-        if len(self.pipeline) < self.max_estimators:
-            self.pipeline.append(self.search_space.generate(rng))
-            return True
-        return False
-    
-    def _mutate_remove_step(self, rng):
-        rng = np.random.default_rng(rng)
-        if len(self.pipeline) > 1:
-            self.pipeline.pop(rng.integers(0, len(self.pipeline)))
-            return True
-        return False
-
-    def _mutate_replace_step(self, rng):
-        rng = np.random.default_rng(rng)
-        idx = rng.integers(0, len(self.pipeline))
-        self.pipeline[idx] = self.search_space.generate(rng)
-        return True
-    
-    #TODO mutate one step or multiple?
-    def _mutate_inner_step(self, rng):
-        rng = np.random.default_rng(rng)
-        indexes = rng.random(len(self.pipeline)) < 0.5
-        indexes = np.where(indexes)[0]
-        mutated = False
-        if len(indexes) > 0:
-            for idx in indexes:
-                if self.pipeline[idx].mutate(rng):
-                    mutated = True
-        else:
-            mutated = self.pipeline[rng.integers(0, len(self.pipeline))].mutate(rng)
-
-        return mutated
-
-
-    def _crossover(self, other, rng=None):
-        rng = np.random.default_rng(rng)
-
-        cx_funcs = [self._crossover_swap_random_steps, self._crossover_inner_step]
-        rng.shuffle(cx_funcs)
-        for cx_func in cx_funcs:
-            if cx_func(other, rng):
-                return True
-
-        return False
-    
-    def _crossover_swap_step(self, other, rng):
-        rng = np.random.default_rng(rng)
-        idx = rng.integers(1,len(self.pipeline))
-        idx2 = rng.integers(1,len(other.pipeline))
-
-        self.pipeline[idx], other.pipeline[idx2] = other.pipeline[idx2], self.pipeline[idx]
-        # self.pipeline[idx] = other.pipeline[idx2]
-        return True
-    
-    def _crossover_swap_random_steps(self, other, rng):
-        rng = np.random.default_rng(rng)
-
-        max_steps = int(min(len(self.pipeline), len(other.pipeline))/2)
-        max_steps = max(max_steps, 1)
-        
-        if max_steps == 1:
-            n_steps_to_swap = 1
-        else:
-            n_steps_to_swap = rng.integers(1, max_steps)
-
-        other_indexes_to_take = rng.choice(len(other.pipeline), n_steps_to_swap, replace=False)
-        self_indexes_to_replace = rng.choice(len(self.pipeline), n_steps_to_swap, replace=False)
-
-        # self.pipeline[self_indexes_to_replace], other.pipeline[other_indexes_to_take] = other.pipeline[other_indexes_to_take], self.pipeline[self_indexes_to_replace]
-        
-        for self_idx, other_idx in zip(self_indexes_to_replace, other_indexes_to_take):
-            self.pipeline[self_idx], other.pipeline[other_idx] = other.pipeline[other_idx], self.pipeline[self_idx]
-        
-        return True
-        
-
-
-    def _crossover_inner_step(self, other, rng):
-        rng = np.random.default_rng(rng)
-        
-        #randomly select pairs of steps to crossover
-        indexes = list(range(1, len(self.pipeline)))
-        other_indexes = list(range(1, len(other.pipeline)))
-        #shuffle
-        rng.shuffle(indexes)
-        rng.shuffle(other_indexes)
-
-        crossover_success = False
-        for idx, other_idx in zip(indexes, other_indexes):
-            if self.pipeline[idx].crossover(other.pipeline[other_idx], rng):
-                crossover_success = True
-                
-        return crossover_success
-    
-    def export_pipeline(self):
-        return sklearn.pipeline.make_union(*[step.export_pipeline() for step in self.pipeline])
-    
-    def unique_id(self):
-        l = [step.unique_id() for step in self.pipeline]
-        # if all items are strings, then sort them
-        if all([isinstance(x, str) for x in l]):
-            l.sort()
-        l = ["FeatureUnion"] + l
-        return TupleIndex(tuple(l))
-
-
-class DynamicUnionPipeline(SklearnIndividualGenerator):
-    def __init__(self, search_spaces : List[SklearnIndividualGenerator] ) -> None:
-        """
-        Takes in a list of search spaces. will produce a pipeline of Sequential length. Each step in the pipeline will correspond to the the search space provided in the same index.
-        """
-        
-        self.search_spaces = search_spaces
-
-    def generate(self, rng=None):
-        return DynamicUnionPipelineIndividual(self.search_spaces)
\ No newline at end of file
diff --git a/tpot2/search_spaces/pipelines/dynamicunion.py b/tpot2/search_spaces/pipelines/dynamicunion.py
index 401c16ef..01651c29 100644
--- a/tpot2/search_spaces/pipelines/dynamicunion.py
+++ b/tpot2/search_spaces/pipelines/dynamicunion.py
@@ -42,7 +42,7 @@ def __init__(self, search_space : SklearnIndividualGenerator, max_estimators=Non
     
     def mutate(self, rng=None):
         rng = np.random.default_rng(rng)
-        mutation_funcs = [self._mutate_add_step, self._mutate_remove_step, self._mutate_replace_step, self._mutate_inner_step]
+        mutation_funcs = [self._mutate_add_step, self._mutate_remove_step, self._mutate_replace_step, self._mutate_note]
         rng.shuffle(mutation_funcs)
         for mutation_func in mutation_funcs:
             if mutation_func(rng):
@@ -72,7 +72,7 @@ def _mutate_replace_step(self, rng):
         return changed
     
     #TODO mutate one step or multiple?
-    def _mutate_inner_step(self, rng):
+    def _mutate_note(self, rng):
         rng = np.random.default_rng(rng)
         changed = False
         values = list(self.union_dict.values())
@@ -88,7 +88,7 @@ def _mutate_inner_step(self, rng):
     def _crossover(self, other, rng=None):
         rng = np.random.default_rng(rng)
 
-        cx_funcs = [self._crossover_swap_random_steps, self._crossover_inner_step]
+        cx_funcs = [self._crossover_swap_multiple_nodes, self._crossover_node]
         rng.shuffle(cx_funcs)
         for cx_func in cx_funcs:
             if cx_func(other, rng):
@@ -96,7 +96,7 @@ def _crossover(self, other, rng=None):
 
         return False
     
-    def _crossover_swap_step(self, other, rng):
+    def _crossover_swap_node(self, other, rng):
         rng = np.random.default_rng(rng)
         changed = False
 
@@ -117,7 +117,7 @@ def _crossover_swap_step(self, other, rng):
 
 
     
-    def _crossover_swap_random_steps(self, other, rng):
+    def _crossover_swap_multiple_nodes(self, other, rng):
         rng = np.random.default_rng(rng)
         self_values = list(self.union_dict.values())
         other_values = list(other.union_dict.values())
@@ -136,7 +136,7 @@ def _crossover_swap_random_steps(self, other, rng):
         return True
 
 
-    def _crossover_inner_step(self, other, rng):
+    def _crossover_node(self, other, rng):
         rng = np.random.default_rng(rng)
         
         changed = False
diff --git a/tpot2/search_spaces/pipelines/sequential.py b/tpot2/search_spaces/pipelines/sequential.py
index 7a7e6a99..2fa15f9a 100644
--- a/tpot2/search_spaces/pipelines/sequential.py
+++ b/tpot2/search_spaces/pipelines/sequential.py
@@ -44,7 +44,7 @@ def _crossover(self, other, rng=None):
             return False
 
         rng = np.random.default_rng(rng)
-        cx_funcs = [self._crossover_swap_random_steps, self._crossover_swap_segment, self._crossover_inner_step]
+        cx_funcs = [self._crossover_swap_multiple_nodes, self._crossover_swap_segment, self._crossover_node]
 
         rng.shuffle(cx_funcs)
         for cx_func in cx_funcs:
@@ -53,7 +53,7 @@ def _crossover(self, other, rng=None):
             
         return False
 
-    def _crossover_swap_step(self, other, rng):
+    def _crossover_swap_node(self, other, rng):
         if len(self.pipeline) != len(other.pipeline):
             return False
         
@@ -64,7 +64,7 @@ def _crossover_swap_step(self, other, rng):
         self.pipeline[idx], other.pipeline[idx] = other.pipeline[idx], self.pipeline[idx]
         return True
     
-    def _crossover_swap_random_steps(self, other, rng):
+    def _crossover_swap_multiple_nodes(self, other, rng):
 
         if len(self.pipeline) != len(other.pipeline):
             return False
@@ -108,7 +108,7 @@ def _crossover_swap_segment(self, other, rng):
 
         return True
     
-    def _crossover_inner_step(self, other, rng):
+    def _crossover_node(self, other, rng):
         rng = np.random.default_rng(rng)
         
         # crossover_success = False
diff --git a/tpot2/search_spaces/pipelines/union.py b/tpot2/search_spaces/pipelines/union.py
index 32f988e6..a9f8215a 100644
--- a/tpot2/search_spaces/pipelines/union.py
+++ b/tpot2/search_spaces/pipelines/union.py
@@ -34,7 +34,7 @@ def _crossover(self, other, rng=None):
         #swap a random step in the pipeline with the corresponding step in the other pipeline
         rng = np.random.default_rng(rng)
 
-        cx_funcs = [self._crossover_inner_step]
+        cx_funcs = [self._crossover_node, self._crossover_swap_node]
         rng.shuffle(cx_funcs)
         for cx_func in cx_funcs:
             if cx_func(other, rng):
@@ -42,35 +42,14 @@ def _crossover(self, other, rng=None):
 
         return False
     
-    def _crossover_swap_step(self, other, rng):
+    def _crossover_swap_node(self, other, rng):
         rng = np.random.default_rng(rng)
         idx = rng.integers(1,len(self.pipeline))
 
         self.pipeline[idx], other.pipeline[idx] = other.pipeline[idx], self.pipeline[idx]
         return True
-    
-    def _crossover_swap_random_steps(self, other, rng):
-        rng = np.random.default_rng(rng)
-
-        max_steps = int(min(len(self.pipeline), len(other.pipeline))/2)
-        max_steps = max(max_steps, 1)
-        
-        if max_steps == 1:
-            n_steps_to_swap = 1
-        else:
-            n_steps_to_swap = rng.integers(1, max_steps)
-
-        other_indexes_to_take = rng.choice(len(other.pipeline), n_steps_to_swap, replace=False)
-        self_indexes_to_replace = rng.choice(len(self.pipeline), n_steps_to_swap, replace=False)
-
-        # self.pipeline[self_indexes_to_replace], other.pipeline[other_indexes_to_take] = other.pipeline[other_indexes_to_take], self.pipeline[self_indexes_to_replace]
-        
-        for self_idx, other_idx in zip(self_indexes_to_replace, other_indexes_to_take):
-            self.pipeline[self_idx], other.pipeline[other_idx] = other.pipeline[other_idx], self.pipeline[self_idx]
-        
-        return True
 
-    def _crossover_inner_step(self, other, rng):
+    def _crossover_node(self, other, rng):
         rng = np.random.default_rng(rng)
         
         crossover_success = False

From e3886513b627ce882ba0ad1fcf6d78f062752c77 Mon Sep 17 00:00:00 2001
From: perib <pedroribeiroh@gmail.com>
Date: Mon, 8 Jul 2024 18:21:48 -0700
Subject: [PATCH 04/12] wrapper for crossover functions so that subclasses can
 use crossover instead of _crossover

---
 tpot2/search_spaces/base.py                   | 22 ++++++---
 tpot2/search_spaces/nodes/estimator_node.py   |  2 +-
 tpot2/search_spaces/nodes/fss_node.py         |  2 +-
 .../nodes/genetic_feature_selection.py        |  2 +-
 tpot2/search_spaces/pipelines/choice.py       |  2 +-
 .../search_spaces/pipelines/dynamic_linear.py |  2 +-
 tpot2/search_spaces/pipelines/dynamicunion.py | 25 ++--------
 tpot2/search_spaces/pipelines/graph.py        | 46 +++++++++----------
 tpot2/search_spaces/pipelines/sequential.py   |  2 +-
 tpot2/search_spaces/pipelines/union.py        |  2 +-
 tpot2/search_spaces/pipelines/wrapper.py      |  2 +-
 11 files changed, 48 insertions(+), 61 deletions(-)

diff --git a/tpot2/search_spaces/base.py b/tpot2/search_spaces/base.py
index 2977d491..3133057e 100644
--- a/tpot2/search_spaces/base.py
+++ b/tpot2/search_spaces/base.py
@@ -15,23 +15,31 @@
 
 
 
+
 class SklearnIndividual(tpot2.BaseIndividual):
 
+    def __init_subclass__(cls):
+        cls.crossover = cls.validate_same_type(cls.crossover)
+
+
     def __init__(self,) -> None:
         super().__init__()
 
     def mutate(self, rng=None):
         return
 
-    @final
     def crossover(self, other, rng=None, **kwargs):
-        if not isinstance(other, type(self)):
-            return False
-        return self._crossover(other, rng=rng, **kwargs)
+        return 
     
-    @abstractmethod
-    def _crossover(self, other, rng=None):
-        return
+    @final
+    def validate_same_type(func):
+
+        def wrapper(self, other, rng=None, **kwargs):
+            if not isinstance(other, type(self)):
+                return False
+            return func(self, other, rng=None, **kwargs)
+
+        return wrapper
 
     def export_pipeline(self) -> BaseEstimator:
         return
diff --git a/tpot2/search_spaces/nodes/estimator_node.py b/tpot2/search_spaces/nodes/estimator_node.py
index 4724405e..50d698f3 100644
--- a/tpot2/search_spaces/nodes/estimator_node.py
+++ b/tpot2/search_spaces/nodes/estimator_node.py
@@ -60,7 +60,7 @@ def mutate(self, rng=None):
         self.check_hyperparameters_for_None()
         return True
 
-    def _crossover(self, other, rng=None):
+    def crossover(self, other, rng=None):
         if isinstance(self.space, dict):
             return False
         
diff --git a/tpot2/search_spaces/nodes/fss_node.py b/tpot2/search_spaces/nodes/fss_node.py
index 46aef024..4dda0d92 100644
--- a/tpot2/search_spaces/nodes/fss_node.py
+++ b/tpot2/search_spaces/nodes/fss_node.py
@@ -51,7 +51,7 @@ def mutate(self, rng=None):
         self.sel_subset = self.subset_dict[self.selected_subset_name]
         
     
-    def _crossover(self, other, rng=None):
+    def crossover(self, other, rng=None):
         self.selected_subset_name = other.selected_subset_name
         self.sel_subset = other.sel_subset
 
diff --git a/tpot2/search_spaces/nodes/genetic_feature_selection.py b/tpot2/search_spaces/nodes/genetic_feature_selection.py
index 9e36e666..f9c4892a 100644
--- a/tpot2/search_spaces/nodes/genetic_feature_selection.py
+++ b/tpot2/search_spaces/nodes/genetic_feature_selection.py
@@ -69,7 +69,7 @@ def mutate(self, rng=None):
         
         return rng.choice(self.mutation_list)(rng)
     
-    def _crossover(self, other, rng=None):
+    def crossover(self, other, rng=None):
         rng = np.random.default_rng(rng)
         
         if rng.uniform() < self.crossover_rate_rate:
diff --git a/tpot2/search_spaces/pipelines/choice.py b/tpot2/search_spaces/pipelines/choice.py
index ebe9c51c..25051aa0 100644
--- a/tpot2/search_spaces/pipelines/choice.py
+++ b/tpot2/search_spaces/pipelines/choice.py
@@ -29,7 +29,7 @@ def _mutate_select_new_node(self, rng=None):
     def _mutate_node(self, rng=None):
         return self.node.mutate(rng)
 
-    def _crossover(self, other, rng=None):
+    def crossover(self, other, rng=None):
         return self.node.crossover(other.node, rng)
     
     def export_pipeline(self):
diff --git a/tpot2/search_spaces/pipelines/dynamic_linear.py b/tpot2/search_spaces/pipelines/dynamic_linear.py
index 2ff2bf0b..528ec7c4 100644
--- a/tpot2/search_spaces/pipelines/dynamic_linear.py
+++ b/tpot2/search_spaces/pipelines/dynamic_linear.py
@@ -65,7 +65,7 @@ def _mutate_step(self, rng=None):
         return step.mutate(rng)
     
 
-    def _crossover(self, other, rng=None):
+    def crossover(self, other, rng=None):
         #swap a random step in the pipeline with the corresponding step in the other pipeline
 
         rng = np.random.default_rng(rng)
diff --git a/tpot2/search_spaces/pipelines/dynamicunion.py b/tpot2/search_spaces/pipelines/dynamicunion.py
index 01651c29..8d8772eb 100644
--- a/tpot2/search_spaces/pipelines/dynamicunion.py
+++ b/tpot2/search_spaces/pipelines/dynamicunion.py
@@ -85,7 +85,7 @@ def _mutate_note(self, rng):
         return changed
 
 
-    def _crossover(self, other, rng=None):
+    def crossover(self, other, rng=None):
         rng = np.random.default_rng(rng)
 
         cx_funcs = [self._crossover_swap_multiple_nodes, self._crossover_node]
@@ -95,28 +95,8 @@ def _crossover(self, other, rng=None):
                 return True
 
         return False
-    
-    def _crossover_swap_node(self, other, rng):
-        rng = np.random.default_rng(rng)
-        changed = False
-
-        self_step = rng.choice(list(self.union_dict.values()))
-        other_step = rng.choice(list(other.union_dict.values()))
-
-        if other_step.unique_id() in self.union_dict:
-            self.union_dict[other_step.unique_id()] = other_step
-            self.union_dict.pop(self_step.unique_id())
-            changed = True
-
-        if self_step.unique_id() in other.union_dict:
-            other.union_dict[self_step.unique_id()] = self_step
-            other.union_dict.pop(other_step.unique_id())
 
-        return changed
-        
-
-
-    
+            
     def _crossover_swap_multiple_nodes(self, other, rng):
         rng = np.random.default_rng(rng)
         self_values = list(self.union_dict.values())
@@ -128,6 +108,7 @@ def _crossover_swap_multiple_nodes(self, other, rng):
         self_idx = rng.integers(0,len(self_values))
         other_idx = rng.integers(0,len(other_values))
 
+        #Note that this is not one-point-crossover since the sequence doesn't matter. this is just a quick way to swap multiple random items
         self_values[:self_idx], other_values[:other_idx] = other_values[:other_idx], self_values[:self_idx]
         
         self.union_dict = {step.unique_id(): step for step in self_values}
diff --git a/tpot2/search_spaces/pipelines/graph.py b/tpot2/search_spaces/pipelines/graph.py
index e3e49a1b..fc769b1c 100644
--- a/tpot2/search_spaces/pipelines/graph.py
+++ b/tpot2/search_spaces/pipelines/graph.py
@@ -111,35 +111,33 @@ def __init__(
 
     def mutate(self, rng=None):
         rng = np.random.default_rng(rng)
+        rng.shuffle(self.mutate_methods_list)
+        for mutate_method in self.mutate_methods_list:
+            if mutate_method(rng=rng):
+                
+                if self.merge_duplicated_nodes_toggle:
+                    self._merge_duplicated_nodes()
 
-        for i in range(0,random.randint(1,15)):
-            rng.shuffle(self.mutate_methods_list)
-            for mutate_method in self.mutate_methods_list:
-                if mutate_method(rng=rng):
-                    
-                    if self.merge_duplicated_nodes_toggle:
-                        self._merge_duplicated_nodes()
-
-                    if self.__debug:
-                        print(mutate_method)
+                if self.__debug:
+                    print(mutate_method)
 
-                        if self.root not in self.graph.nodes:
-                            print('lost root something went wrong with ', mutate_method)
+                    if self.root not in self.graph.nodes:
+                        print('lost root something went wrong with ', mutate_method)
 
-                        if len(self.graph.predecessors(self.root)) > 0:
-                            print('root has parents ', mutate_method)
+                    if len(self.graph.predecessors(self.root)) > 0:
+                        print('root has parents ', mutate_method)
 
-                        if any([n in nx.ancestors(self.graph,n) for n in self.graph.nodes]):
-                            print('a node is connecting to itself...')
+                    if any([n in nx.ancestors(self.graph,n) for n in self.graph.nodes]):
+                        print('a node is connecting to itself...')
 
-                        if self.__debug:
-                            try:
-                                nx.find_cycle(self.graph)
-                                print('something went wrong with ', mutate_method)
-                            except:
-                                pass
+                    if self.__debug:
+                        try:
+                            nx.find_cycle(self.graph)
+                            print('something went wrong with ', mutate_method)
+                        except:
+                            pass
 
-                    self.graphkey = None
+                self.graphkey = None
 
         return False
 
@@ -323,7 +321,7 @@ def _mutate_insert_bypass_node(self, rng=None):
         return False
 
 
-    def _crossover(self, ind2, rng=None):
+    def crossover(self, ind2, rng=None):
         '''
         self is the first individual, ind2 is the second individual
         If crossover_same_depth, it will select graphindividuals at the same recursive depth.
diff --git a/tpot2/search_spaces/pipelines/sequential.py b/tpot2/search_spaces/pipelines/sequential.py
index 2fa15f9a..75bad8d2 100644
--- a/tpot2/search_spaces/pipelines/sequential.py
+++ b/tpot2/search_spaces/pipelines/sequential.py
@@ -38,7 +38,7 @@ def mutate(self, rng=None):
         return step.mutate(rng)
      
 
-    def _crossover(self, other, rng=None):
+    def crossover(self, other, rng=None):
         #swap a random step in the pipeline with the corresponding step in the other pipeline
         if len(self.pipeline) != len(other.pipeline):
             return False
diff --git a/tpot2/search_spaces/pipelines/union.py b/tpot2/search_spaces/pipelines/union.py
index a9f8215a..811ef38b 100644
--- a/tpot2/search_spaces/pipelines/union.py
+++ b/tpot2/search_spaces/pipelines/union.py
@@ -30,7 +30,7 @@ def mutate(self, rng=None):
         return step.mutate(rng)
      
 
-    def _crossover(self, other, rng=None):
+    def crossover(self, other, rng=None):
         #swap a random step in the pipeline with the corresponding step in the other pipeline
         rng = np.random.default_rng(rng)
 
diff --git a/tpot2/search_spaces/pipelines/wrapper.py b/tpot2/search_spaces/pipelines/wrapper.py
index 7b49e182..df504a89 100644
--- a/tpot2/search_spaces/pipelines/wrapper.py
+++ b/tpot2/search_spaces/pipelines/wrapper.py
@@ -62,7 +62,7 @@ def _mutate_hyperparameters(self, rng=None):
     def _mutate_node(self, rng=None):
         return self.node.mutate(rng)
 
-    def _crossover(self, other, rng=None):
+    def crossover(self, other, rng=None):
         if rng.choice([True, False]):
             return self._crossover_hyperparameters(other, rng)
         else:

From a250ed2efc2ef671f63401baf61ca652bce273d4 Mon Sep 17 00:00:00 2001
From: perib <pedroribeiroh@gmail.com>
Date: Mon, 8 Jul 2024 18:25:16 -0700
Subject: [PATCH 05/12] tpot estimator documentation edit

---
 tpot2/tpot_estimator/estimator.py | 35 +++++++++++++++++--------------
 1 file changed, 19 insertions(+), 16 deletions(-)

diff --git a/tpot2/tpot_estimator/estimator.py b/tpot2/tpot_estimator/estimator.py
index f4c0e954..9bc33e8a 100644
--- a/tpot2/tpot_estimator/estimator.py
+++ b/tpot2/tpot_estimator/estimator.py
@@ -112,7 +112,7 @@ def __init__(self,
 
         Parameters
         ----------
-        default_search_space : (String, tpot2.search_spaces.SklearnIndividualGenerator)
+        search_space : (String, tpot2.search_spaces.SklearnIndividualGenerator)
             - String : The default search space to use for the optimization. This can be either "linear" or "graph". If "linear", will use the default linear pipeline search space. If "graph", will use the default graph pipeline search space.
             - SklearnIndividualGenerator : The search space to use for the optimization. This should be an instance of a SklearnIndividualGenerator.
                 The search space to use for the optimization. This should be an instance of a SklearnIndividualGenerator.
@@ -145,6 +145,7 @@ def __init__(self,
 
         bigger_is_better : bool, default=True
             If True, the objective function is maximized. If False, the objective function is minimized. Use negative weights to reverse the direction.
+        
         cross_val_predict_cv : int, default=0
             Number of folds to use for the cross_val_predict function for inner classifiers and regressors. Estimators will still be fit on the full dataset, but the following node will get the outputs from cross_val_predict.
 
@@ -152,20 +153,6 @@ def __init__(self,
             - >=2 : When fitting pipelines with inner classifiers or regressors, they will still be fit on the full dataset.
                     However, the output to the next node will come from cross_val_predict with the specified number of folds.
 
-        categorical_features: list or None
-            Categorical columns to inpute and/or one hot encode during the preprocessing step. Used only if preprocessing is not False.
-            - None : If None, TPOT2 will automatically use object columns in pandas dataframes as objects for one hot encoding in preprocessing.
-            - List of categorical features. If X is a dataframe, this should be a list of column names. If X is a numpy array, this should be a list of column indices
-
-        subsets : str or list, default=None
-            Sets the subsets that the FeatureSetSeletor will select from if set as an option in one of the configuration dictionaries.
-            - str : If a string, it is assumed to be a path to a csv file with the subsets.
-                The first column is assumed to be the name of the subset and the remaining columns are the features in the subset.
-            - list or np.ndarray : If a list or np.ndarray, it is assumed to be a list of subsets.
-            - None : If None, each column will be treated as a subset. One column will be selected per subset.
-            If subsets is None, each column will be treated as a subset. One column will be selected per subset.
-
-
         memory: Memory object or string, default=None
             If supplied, pipeline will cache each transformer after calling fit. This feature
             is used to avoid computing the fit transformers within a pipeline if the parameters
@@ -180,7 +167,20 @@ def __init__(self,
                 TPOT uses the instance of joblib.Memory for memory caching,
                 and TPOT does NOT clean the caching directory up upon shutdown.
             - None:
-                TPOT does not use memory caching.
+                TPOT does not use memory caching.              
+
+        categorical_features: list or None
+            Categorical columns to inpute and/or one hot encode during the preprocessing step. Used only if preprocessing is not False.
+            - None : If None, TPOT2 will automatically use object columns in pandas dataframes as objects for one hot encoding in preprocessing.
+            - List of categorical features. If X is a dataframe, this should be a list of column names. If X is a numpy array, this should be a list of column indices
+
+        subsets : str or list, default=None
+            Sets the subsets that the FeatureSetSeletor will select from if set as an option in one of the configuration dictionaries.
+            - str : If a string, it is assumed to be a path to a csv file with the subsets.
+                The first column is assumed to be the name of the subset and the remaining columns are the features in the subset.
+            - list or np.ndarray : If a list or np.ndarray, it is assumed to be a list of subsets.
+            - None : If None, each column will be treated as a subset. One column will be selected per subset.
+            If subsets is None, each column will be treated as a subset. One column will be selected per subset.
 
         preprocessing : bool or BaseEstimator/Pipeline,
             EXPERIMENTAL
@@ -329,6 +329,9 @@ def __init__(self,
             >=5. full warnings trace
             6. evaluations progress bar. (Temporary: This used to be 2. Currently, using evaluation progress bar may prevent some instances were we terminate a generation early due to it reaching max_time_seconds in the middle of a generation OR a pipeline failed to be terminated normally and we need to manually terminate it.)
 
+        scatter : bool, default=True
+            If True, will scatter the data to the dask workers. If False, will not scatter the data. This can be useful for debugging.
+
         random_state : int, None, default=None
             A seed for reproducability of experiments. This value will be passed to numpy.random.default_rng() to create an instnce of the genrator to pass to other classes
 

From 6905e6f91b6b86d52a0334a32c06d9462e23ada6 Mon Sep 17 00:00:00 2001
From: perib <pedroribeiroh@gmail.com>
Date: Mon, 8 Jul 2024 19:12:00 -0700
Subject: [PATCH 06/12] update documentation

---
 tpot2/tpot_estimator/estimator.py             | 48 ++++++++++---------
 .../tpot_estimator/templates/tpottemplates.py |  8 ++--
 2 files changed, 30 insertions(+), 26 deletions(-)

diff --git a/tpot2/tpot_estimator/estimator.py b/tpot2/tpot_estimator/estimator.py
index 9bc33e8a..bb1f3b3f 100644
--- a/tpot2/tpot_estimator/estimator.py
+++ b/tpot2/tpot_estimator/estimator.py
@@ -184,8 +184,8 @@ def __init__(self,
 
         preprocessing : bool or BaseEstimator/Pipeline,
             EXPERIMENTAL
-            A pipeline that will be used to preprocess the data before CV.
-            - bool : If True, will use a default preprocessing pipeline.
+            A pipeline that will be used to preprocess the data before CV. Note that the parameters for these steps are not optimized. Add them to the search space to be optimized.
+            - bool : If True, will use a default preprocessing pipeline which includes imputation followed by one hot encoding.
             - Pipeline : If an instance of a pipeline is given, will use that pipeline as the preprocessing pipeline.
 
         population_size : int, default=50
@@ -562,7 +562,7 @@ def fit(self, X, y):
                 if self.categorical_features is not None: #if categorical features are specified, use those
                     pipeline_steps.append(("impute_categorical", tpot2.builtin_modules.ColumnSimpleImputer(self.categorical_features, strategy='most_frequent')))
                     pipeline_steps.append(("impute_numeric", tpot2.builtin_modules.ColumnSimpleImputer("numeric", strategy='mean')))
-                    pipeline_steps.append(("impute_categorical", tpot2.builtin_modules.ColumnOneHotEncoder(self.categorical_features, strategy='most_frequent')))
+                    pipeline_steps.append(("ColumnOneHotEncoder", tpot2.builtin_modules.ColumnOneHotEncoder(self.categorical_features, strategy='most_frequent')))
 
                 else:
                     if isinstance(X, pd.DataFrame):
@@ -570,7 +570,7 @@ def fit(self, X, y):
                         if len(categorical_columns) > 0:
                             pipeline_steps.append(("impute_categorical", tpot2.builtin_modules.ColumnSimpleImputer("categorical", strategy='most_frequent')))
                             pipeline_steps.append(("impute_numeric", tpot2.builtin_modules.ColumnSimpleImputer("numeric", strategy='mean')))
-                            pipeline_steps.append(("impute_categorical", tpot2.builtin_modules.ColumnOneHotEncoder("categorical", strategy='most_frequent')))
+                            pipeline_steps.append(("ColumnOneHotEncoder", tpot2.builtin_modules.ColumnOneHotEncoder("categorical", strategy='most_frequent')))
                         else:
                             pipeline_steps.append(("impute_numeric", tpot2.builtin_modules.ColumnSimpleImputer("all", strategy='mean')))
                     else:
@@ -661,30 +661,34 @@ def objective_function(pipeline_individual,
         self._search_space = get_default_search_space(self.search_space, classification=True, inner_predictors=True, **get_search_space_params)
 
 
-        if check_empty_values(X):
-            from sklearn.experimental import enable_iterative_imputer
+        # TODO : Add check for empty values in X and if so, add imputation to the search space
+        # make this depend on self.preprocessing
+        # if check_empty_values(X):
+        #     from sklearn.experimental import enable_iterative_imputer
 
-            from ConfigSpace import ConfigurationSpace
-            from ConfigSpace import ConfigurationSpace, Integer, Float, Categorical, Normal
-            iterative_imputer_cs = ConfigurationSpace(
-                space = {
-                    'n_nearest_features' : Categorical('n_nearest_features', [100]),
-                    'initial_strategy' : Categorical('initial_strategy', ['mean','median', 'most_frequent', ]),
-                    'add_indicator' : Categorical('add_indicator', [True, False]),
-                }
-            )
+        #     from ConfigSpace import ConfigurationSpace
+        #     from ConfigSpace import ConfigurationSpace, Integer, Float, Categorical, Normal
+        #     iterative_imputer_cs = ConfigurationSpace(
+        #         space = {
+        #             'n_nearest_features' : Categorical('n_nearest_features', [100]),
+        #             'initial_strategy' : Categorical('initial_strategy', ['mean','median', 'most_frequent', ]),
+        #             'add_indicator' : Categorical('add_indicator', [True, False]),
+        #         }
+        #     )
 
-            imputation_search = tpot2.search_spaces.pipelines.ChoicePipeline([
-                tpot2.config.get_search_space("SimpleImputer"),
-                tpot2.search_spaces.nodes.EstimatorNode(sklearn.impute.IterativeImputer, iterative_imputer_cs)
-            ])
+        #     imputation_search = tpot2.search_spaces.pipelines.ChoicePipeline([
+        #         tpot2.config.get_search_space("SimpleImputer"),
+        #         tpot2.search_spaces.nodes.EstimatorNode(sklearn.impute.IterativeImputer, iterative_imputer_cs)
+        #     ])
 
 
 
 
-            self.search_space_final = tpot2.search_spaces.pipelines.SequentialPipeline(search_spaces=[ imputation_search, self._search_space], memory="sklearn_pipeline_memory")
-        else:
-            self.search_space_final = self._search_space
+        #     self.search_space_final = tpot2.search_spaces.pipelines.SequentialPipeline(search_spaces=[ imputation_search, self._search_space], memory="sklearn_pipeline_memory")
+        # else:
+        #     self.search_space_final = self._search_space
+
+        self.search_space_final = self._search_space
 
         def ind_generator(rng):
             rng = np.random.default_rng(rng)
diff --git a/tpot2/tpot_estimator/templates/tpottemplates.py b/tpot2/tpot_estimator/templates/tpottemplates.py
index d31dcc88..3871e6e1 100644
--- a/tpot2/tpot_estimator/templates/tpottemplates.py
+++ b/tpot2/tpot_estimator/templates/tpottemplates.py
@@ -104,8 +104,8 @@ def __init__(       self,
 
         preprocessing : bool or BaseEstimator/Pipeline,
             EXPERIMENTAL
-            A pipeline that will be used to preprocess the data before CV.
-            - bool : If True, will use a default preprocessing pipeline.
+            A pipeline that will be used to preprocess the data before CV. Note that the parameters for these steps are not optimized. Add them to the search space to be optimized.
+            - bool : If True, will use a default preprocessing pipeline which includes imputation followed by one hot encoding.
             - Pipeline : If an instance of a pipeline is given, will use that pipeline as the preprocessing pipeline.
 
         max_time_seconds : float, default=float("inf")
@@ -358,8 +358,8 @@ def __init__(       self,
 
         preprocessing : bool or BaseEstimator/Pipeline,
             EXPERIMENTAL
-            A pipeline that will be used to preprocess the data before CV.
-            - bool : If True, will use a default preprocessing pipeline.
+            A pipeline that will be used to preprocess the data before CV. Note that the parameters for these steps are not optimized. Add them to the search space to be optimized.
+            - bool : If True, will use a default preprocessing pipeline which includes imputation followed by one hot encoding.
             - Pipeline : If an instance of a pipeline is given, will use that pipeline as the preprocessing pipeline.
 
         max_time_seconds : float, default=float("inf")

From f7b4b270e72e8ac77fd6caa815a5d1ee3ed3266d Mon Sep 17 00:00:00 2001
From: perib <pedroribeiroh@gmail.com>
Date: Tue, 9 Jul 2024 13:45:26 -0700
Subject: [PATCH 07/12] fixed imputation tutorial

---
 .../Example_Search_Spaces/imputation.ipynb    | 507 +++++++++++++++++-
 1 file changed, 481 insertions(+), 26 deletions(-)

diff --git a/Tutorial/Example_Search_Spaces/imputation.ipynb b/Tutorial/Example_Search_Spaces/imputation.ipynb
index 07532532..b6de7ef8 100644
--- a/Tutorial/Example_Search_Spaces/imputation.ipynb
+++ b/Tutorial/Example_Search_Spaces/imputation.ipynb
@@ -2,16 +2,32 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Configuration(values={\n",
+       "  'add_indicator': False,\n",
+       "  'strategy': 'most_frequent',\n",
+       "})"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "from ConfigSpace import ConfigurationSpace\n",
     "from ConfigSpace import ConfigurationSpace, Integer, Float, Categorical, Normal\n",
+    "import tpot2\n",
+    "from sklearn.impute import SimpleImputer\n",
     "\n",
     "simple_imputer = ConfigurationSpace(\n",
     "    space = {\n",
-    "        'strategy' : Categorical('strategy', [['mean','median',], ['most_frequent'] ]),\n",
+    "        'strategy' : Categorical('strategy', ['mean','median','most_frequent']),\n",
     "        'add_indicator' : Categorical('add_indicator', [True, False]), \n",
     "    }\n",
     ")\n",
@@ -21,43 +37,482 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
      "data": {
+      "text/html": [
+       "<style>#sk-container-id-1 {\n",
+       "  /* Definition of color scheme common for light and dark mode */\n",
+       "  --sklearn-color-text: black;\n",
+       "  --sklearn-color-line: gray;\n",
+       "  /* Definition of color scheme for unfitted estimators */\n",
+       "  --sklearn-color-unfitted-level-0: #fff5e6;\n",
+       "  --sklearn-color-unfitted-level-1: #f6e4d2;\n",
+       "  --sklearn-color-unfitted-level-2: #ffe0b3;\n",
+       "  --sklearn-color-unfitted-level-3: chocolate;\n",
+       "  /* Definition of color scheme for fitted estimators */\n",
+       "  --sklearn-color-fitted-level-0: #f0f8ff;\n",
+       "  --sklearn-color-fitted-level-1: #d4ebff;\n",
+       "  --sklearn-color-fitted-level-2: #b3dbfd;\n",
+       "  --sklearn-color-fitted-level-3: cornflowerblue;\n",
+       "\n",
+       "  /* Specific color for light theme */\n",
+       "  --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
+       "  --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n",
+       "  --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
+       "  --sklearn-color-icon: #696969;\n",
+       "\n",
+       "  @media (prefers-color-scheme: dark) {\n",
+       "    /* Redefinition of color scheme for dark theme */\n",
+       "    --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
+       "    --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n",
+       "    --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
+       "    --sklearn-color-icon: #878787;\n",
+       "  }\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 {\n",
+       "  color: var(--sklearn-color-text);\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 pre {\n",
+       "  padding: 0;\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 input.sk-hidden--visually {\n",
+       "  border: 0;\n",
+       "  clip: rect(1px 1px 1px 1px);\n",
+       "  clip: rect(1px, 1px, 1px, 1px);\n",
+       "  height: 1px;\n",
+       "  margin: -1px;\n",
+       "  overflow: hidden;\n",
+       "  padding: 0;\n",
+       "  position: absolute;\n",
+       "  width: 1px;\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 div.sk-dashed-wrapped {\n",
+       "  border: 1px dashed var(--sklearn-color-line);\n",
+       "  margin: 0 0.4em 0.5em 0.4em;\n",
+       "  box-sizing: border-box;\n",
+       "  padding-bottom: 0.4em;\n",
+       "  background-color: var(--sklearn-color-background);\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 div.sk-container {\n",
+       "  /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
+       "     but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
+       "     so we also need the `!important` here to be able to override the\n",
+       "     default hidden behavior on the sphinx rendered scikit-learn.org.\n",
+       "     See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
+       "  display: inline-block !important;\n",
+       "  position: relative;\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 div.sk-text-repr-fallback {\n",
+       "  display: none;\n",
+       "}\n",
+       "\n",
+       "div.sk-parallel-item,\n",
+       "div.sk-serial,\n",
+       "div.sk-item {\n",
+       "  /* draw centered vertical line to link estimators */\n",
+       "  background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
+       "  background-size: 2px 100%;\n",
+       "  background-repeat: no-repeat;\n",
+       "  background-position: center center;\n",
+       "}\n",
+       "\n",
+       "/* Parallel-specific style estimator block */\n",
+       "\n",
+       "#sk-container-id-1 div.sk-parallel-item::after {\n",
+       "  content: \"\";\n",
+       "  width: 100%;\n",
+       "  border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
+       "  flex-grow: 1;\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 div.sk-parallel {\n",
+       "  display: flex;\n",
+       "  align-items: stretch;\n",
+       "  justify-content: center;\n",
+       "  background-color: var(--sklearn-color-background);\n",
+       "  position: relative;\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 div.sk-parallel-item {\n",
+       "  display: flex;\n",
+       "  flex-direction: column;\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 div.sk-parallel-item:first-child::after {\n",
+       "  align-self: flex-end;\n",
+       "  width: 50%;\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 div.sk-parallel-item:last-child::after {\n",
+       "  align-self: flex-start;\n",
+       "  width: 50%;\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 div.sk-parallel-item:only-child::after {\n",
+       "  width: 0;\n",
+       "}\n",
+       "\n",
+       "/* Serial-specific style estimator block */\n",
+       "\n",
+       "#sk-container-id-1 div.sk-serial {\n",
+       "  display: flex;\n",
+       "  flex-direction: column;\n",
+       "  align-items: center;\n",
+       "  background-color: var(--sklearn-color-background);\n",
+       "  padding-right: 1em;\n",
+       "  padding-left: 1em;\n",
+       "}\n",
+       "\n",
+       "\n",
+       "/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
+       "clickable and can be expanded/collapsed.\n",
+       "- Pipeline and ColumnTransformer use this feature and define the default style\n",
+       "- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
+       "*/\n",
+       "\n",
+       "/* Pipeline and ColumnTransformer style (default) */\n",
+       "\n",
+       "#sk-container-id-1 div.sk-toggleable {\n",
+       "  /* Default theme specific background. It is overwritten whether we have a\n",
+       "  specific estimator or a Pipeline/ColumnTransformer */\n",
+       "  background-color: var(--sklearn-color-background);\n",
+       "}\n",
+       "\n",
+       "/* Toggleable label */\n",
+       "#sk-container-id-1 label.sk-toggleable__label {\n",
+       "  cursor: pointer;\n",
+       "  display: block;\n",
+       "  width: 100%;\n",
+       "  margin-bottom: 0;\n",
+       "  padding: 0.5em;\n",
+       "  box-sizing: border-box;\n",
+       "  text-align: center;\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 label.sk-toggleable__label-arrow:before {\n",
+       "  /* Arrow on the left of the label */\n",
+       "  content: \"▸\";\n",
+       "  float: left;\n",
+       "  margin-right: 0.25em;\n",
+       "  color: var(--sklearn-color-icon);\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {\n",
+       "  color: var(--sklearn-color-text);\n",
+       "}\n",
+       "\n",
+       "/* Toggleable content - dropdown */\n",
+       "\n",
+       "#sk-container-id-1 div.sk-toggleable__content {\n",
+       "  max-height: 0;\n",
+       "  max-width: 0;\n",
+       "  overflow: hidden;\n",
+       "  text-align: left;\n",
+       "  /* unfitted */\n",
+       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 div.sk-toggleable__content.fitted {\n",
+       "  /* fitted */\n",
+       "  background-color: var(--sklearn-color-fitted-level-0);\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 div.sk-toggleable__content pre {\n",
+       "  margin: 0.2em;\n",
+       "  border-radius: 0.25em;\n",
+       "  color: var(--sklearn-color-text);\n",
+       "  /* unfitted */\n",
+       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 div.sk-toggleable__content.fitted pre {\n",
+       "  /* unfitted */\n",
+       "  background-color: var(--sklearn-color-fitted-level-0);\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
+       "  /* Expand drop-down */\n",
+       "  max-height: 200px;\n",
+       "  max-width: 100%;\n",
+       "  overflow: auto;\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
+       "  content: \"▾\";\n",
+       "}\n",
+       "\n",
+       "/* Pipeline/ColumnTransformer-specific style */\n",
+       "\n",
+       "#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
+       "  color: var(--sklearn-color-text);\n",
+       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
+       "  background-color: var(--sklearn-color-fitted-level-2);\n",
+       "}\n",
+       "\n",
+       "/* Estimator-specific style */\n",
+       "\n",
+       "/* Colorize estimator box */\n",
+       "#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
+       "  /* unfitted */\n",
+       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
+       "  /* fitted */\n",
+       "  background-color: var(--sklearn-color-fitted-level-2);\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 div.sk-label label.sk-toggleable__label,\n",
+       "#sk-container-id-1 div.sk-label label {\n",
+       "  /* The background is the default theme color */\n",
+       "  color: var(--sklearn-color-text-on-default-background);\n",
+       "}\n",
+       "\n",
+       "/* On hover, darken the color of the background */\n",
+       "#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {\n",
+       "  color: var(--sklearn-color-text);\n",
+       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
+       "}\n",
+       "\n",
+       "/* Label box, darken color on hover, fitted */\n",
+       "#sk-container-id-1 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
+       "  color: var(--sklearn-color-text);\n",
+       "  background-color: var(--sklearn-color-fitted-level-2);\n",
+       "}\n",
+       "\n",
+       "/* Estimator label */\n",
+       "\n",
+       "#sk-container-id-1 div.sk-label label {\n",
+       "  font-family: monospace;\n",
+       "  font-weight: bold;\n",
+       "  display: inline-block;\n",
+       "  line-height: 1.2em;\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 div.sk-label-container {\n",
+       "  text-align: center;\n",
+       "}\n",
+       "\n",
+       "/* Estimator-specific */\n",
+       "#sk-container-id-1 div.sk-estimator {\n",
+       "  font-family: monospace;\n",
+       "  border: 1px dotted var(--sklearn-color-border-box);\n",
+       "  border-radius: 0.25em;\n",
+       "  box-sizing: border-box;\n",
+       "  margin-bottom: 0.5em;\n",
+       "  /* unfitted */\n",
+       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 div.sk-estimator.fitted {\n",
+       "  /* fitted */\n",
+       "  background-color: var(--sklearn-color-fitted-level-0);\n",
+       "}\n",
+       "\n",
+       "/* on hover */\n",
+       "#sk-container-id-1 div.sk-estimator:hover {\n",
+       "  /* unfitted */\n",
+       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 div.sk-estimator.fitted:hover {\n",
+       "  /* fitted */\n",
+       "  background-color: var(--sklearn-color-fitted-level-2);\n",
+       "}\n",
+       "\n",
+       "/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
+       "\n",
+       "/* Common style for \"i\" and \"?\" */\n",
+       "\n",
+       ".sk-estimator-doc-link,\n",
+       "a:link.sk-estimator-doc-link,\n",
+       "a:visited.sk-estimator-doc-link {\n",
+       "  float: right;\n",
+       "  font-size: smaller;\n",
+       "  line-height: 1em;\n",
+       "  font-family: monospace;\n",
+       "  background-color: var(--sklearn-color-background);\n",
+       "  border-radius: 1em;\n",
+       "  height: 1em;\n",
+       "  width: 1em;\n",
+       "  text-decoration: none !important;\n",
+       "  margin-left: 1ex;\n",
+       "  /* unfitted */\n",
+       "  border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
+       "  color: var(--sklearn-color-unfitted-level-1);\n",
+       "}\n",
+       "\n",
+       ".sk-estimator-doc-link.fitted,\n",
+       "a:link.sk-estimator-doc-link.fitted,\n",
+       "a:visited.sk-estimator-doc-link.fitted {\n",
+       "  /* fitted */\n",
+       "  border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
+       "  color: var(--sklearn-color-fitted-level-1);\n",
+       "}\n",
+       "\n",
+       "/* On hover */\n",
+       "div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
+       ".sk-estimator-doc-link:hover,\n",
+       "div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
+       ".sk-estimator-doc-link:hover {\n",
+       "  /* unfitted */\n",
+       "  background-color: var(--sklearn-color-unfitted-level-3);\n",
+       "  color: var(--sklearn-color-background);\n",
+       "  text-decoration: none;\n",
+       "}\n",
+       "\n",
+       "div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
+       ".sk-estimator-doc-link.fitted:hover,\n",
+       "div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
+       ".sk-estimator-doc-link.fitted:hover {\n",
+       "  /* fitted */\n",
+       "  background-color: var(--sklearn-color-fitted-level-3);\n",
+       "  color: var(--sklearn-color-background);\n",
+       "  text-decoration: none;\n",
+       "}\n",
+       "\n",
+       "/* Span, style for the box shown on hovering the info icon */\n",
+       ".sk-estimator-doc-link span {\n",
+       "  display: none;\n",
+       "  z-index: 9999;\n",
+       "  position: relative;\n",
+       "  font-weight: normal;\n",
+       "  right: .2ex;\n",
+       "  padding: .5ex;\n",
+       "  margin: .5ex;\n",
+       "  width: min-content;\n",
+       "  min-width: 20ex;\n",
+       "  max-width: 50ex;\n",
+       "  color: var(--sklearn-color-text);\n",
+       "  box-shadow: 2pt 2pt 4pt #999;\n",
+       "  /* unfitted */\n",
+       "  background: var(--sklearn-color-unfitted-level-0);\n",
+       "  border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
+       "}\n",
+       "\n",
+       ".sk-estimator-doc-link.fitted span {\n",
+       "  /* fitted */\n",
+       "  background: var(--sklearn-color-fitted-level-0);\n",
+       "  border: var(--sklearn-color-fitted-level-3);\n",
+       "}\n",
+       "\n",
+       ".sk-estimator-doc-link:hover span {\n",
+       "  display: block;\n",
+       "}\n",
+       "\n",
+       "/* \"?\"-specific style due to the `<a>` HTML tag */\n",
+       "\n",
+       "#sk-container-id-1 a.estimator_doc_link {\n",
+       "  float: right;\n",
+       "  font-size: 1rem;\n",
+       "  line-height: 1em;\n",
+       "  font-family: monospace;\n",
+       "  background-color: var(--sklearn-color-background);\n",
+       "  border-radius: 1rem;\n",
+       "  height: 1rem;\n",
+       "  width: 1rem;\n",
+       "  text-decoration: none;\n",
+       "  /* unfitted */\n",
+       "  color: var(--sklearn-color-unfitted-level-1);\n",
+       "  border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 a.estimator_doc_link.fitted {\n",
+       "  /* fitted */\n",
+       "  border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
+       "  color: var(--sklearn-color-fitted-level-1);\n",
+       "}\n",
+       "\n",
+       "/* On hover */\n",
+       "#sk-container-id-1 a.estimator_doc_link:hover {\n",
+       "  /* unfitted */\n",
+       "  background-color: var(--sklearn-color-unfitted-level-3);\n",
+       "  color: var(--sklearn-color-background);\n",
+       "  text-decoration: none;\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-1 a.estimator_doc_link.fitted:hover {\n",
+       "  /* fitted */\n",
+       "  background-color: var(--sklearn-color-fitted-level-3);\n",
+       "}\n",
+       "</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>Pipeline(steps=[(&#x27;simpleimputer&#x27;,\n",
+       "                 SimpleImputer(add_indicator=True, strategy=&#x27;median&#x27;)),\n",
+       "                (&#x27;selectpercentile&#x27;,\n",
+       "                 SelectPercentile(percentile=44.546578384975824)),\n",
+       "                (&#x27;featureagglomeration&#x27;,\n",
+       "                 FeatureAgglomeration(linkage=&#x27;complete&#x27;, metric=&#x27;cosine&#x27;,\n",
+       "                                      n_clusters=102,\n",
+       "                                      pooling_func=&lt;function median at 0x711a67539830&gt;)),\n",
+       "                (&#x27;extratreesclassifier&#x27;,\n",
+       "                 ExtraTreesClassifier(bootstrap=True, class_weight=&#x27;balanced&#x27;,\n",
+       "                                      max_features=0.9974817877523433,\n",
+       "                                      min_samples_leaf=8, min_samples_split=20,\n",
+       "                                      n_jobs=1))])</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label  sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" ><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label  sk-toggleable__label-arrow \">&nbsp;&nbsp;Pipeline<a class=\"sk-estimator-doc-link \" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.pipeline.Pipeline.html\">?<span>Documentation for Pipeline</span></a><span class=\"sk-estimator-doc-link \">i<span>Not fitted</span></span></label><div class=\"sk-toggleable__content \"><pre>Pipeline(steps=[(&#x27;simpleimputer&#x27;,\n",
+       "                 SimpleImputer(add_indicator=True, strategy=&#x27;median&#x27;)),\n",
+       "                (&#x27;selectpercentile&#x27;,\n",
+       "                 SelectPercentile(percentile=44.546578384975824)),\n",
+       "                (&#x27;featureagglomeration&#x27;,\n",
+       "                 FeatureAgglomeration(linkage=&#x27;complete&#x27;, metric=&#x27;cosine&#x27;,\n",
+       "                                      n_clusters=102,\n",
+       "                                      pooling_func=&lt;function median at 0x711a67539830&gt;)),\n",
+       "                (&#x27;extratreesclassifier&#x27;,\n",
+       "                 ExtraTreesClassifier(bootstrap=True, class_weight=&#x27;balanced&#x27;,\n",
+       "                                      max_features=0.9974817877523433,\n",
+       "                                      min_samples_leaf=8, min_samples_split=20,\n",
+       "                                      n_jobs=1))])</pre></div> </div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator  sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-2\" type=\"checkbox\" ><label for=\"sk-estimator-id-2\" class=\"sk-toggleable__label  sk-toggleable__label-arrow \">&nbsp;SimpleImputer<a class=\"sk-estimator-doc-link \" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.impute.SimpleImputer.html\">?<span>Documentation for SimpleImputer</span></a></label><div class=\"sk-toggleable__content \"><pre>SimpleImputer(add_indicator=True, strategy=&#x27;median&#x27;)</pre></div> </div></div><div class=\"sk-item\"><div class=\"sk-estimator  sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-3\" type=\"checkbox\" ><label for=\"sk-estimator-id-3\" class=\"sk-toggleable__label  sk-toggleable__label-arrow \">&nbsp;SelectPercentile<a class=\"sk-estimator-doc-link \" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.feature_selection.SelectPercentile.html\">?<span>Documentation for SelectPercentile</span></a></label><div class=\"sk-toggleable__content \"><pre>SelectPercentile(percentile=44.546578384975824)</pre></div> </div></div><div class=\"sk-item\"><div class=\"sk-estimator  sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-4\" type=\"checkbox\" ><label for=\"sk-estimator-id-4\" class=\"sk-toggleable__label  sk-toggleable__label-arrow \">&nbsp;FeatureAgglomeration<a class=\"sk-estimator-doc-link \" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.cluster.FeatureAgglomeration.html\">?<span>Documentation for FeatureAgglomeration</span></a></label><div class=\"sk-toggleable__content \"><pre>FeatureAgglomeration(linkage=&#x27;complete&#x27;, metric=&#x27;cosine&#x27;, n_clusters=102,\n",
+       "                     pooling_func=&lt;function median at 0x711a67539830&gt;)</pre></div> </div></div><div class=\"sk-item\"><div class=\"sk-estimator  sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-5\" type=\"checkbox\" ><label for=\"sk-estimator-id-5\" class=\"sk-toggleable__label  sk-toggleable__label-arrow \">&nbsp;ExtraTreesClassifier<a class=\"sk-estimator-doc-link \" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.ensemble.ExtraTreesClassifier.html\">?<span>Documentation for ExtraTreesClassifier</span></a></label><div class=\"sk-toggleable__content \"><pre>ExtraTreesClassifier(bootstrap=True, class_weight=&#x27;balanced&#x27;,\n",
+       "                     max_features=0.9974817877523433, min_samples_leaf=8,\n",
+       "                     min_samples_split=20, n_jobs=1)</pre></div> </div></div></div></div></div></div>"
+      ],
       "text/plain": [
-       "Configuration(values={\n",
-       "  '2': 2,\n",
-       "  'a': 2,\n",
-       "})"
+       "Pipeline(steps=[('simpleimputer',\n",
+       "                 SimpleImputer(add_indicator=True, strategy='median')),\n",
+       "                ('selectpercentile',\n",
+       "                 SelectPercentile(percentile=44.546578384975824)),\n",
+       "                ('featureagglomeration',\n",
+       "                 FeatureAgglomeration(linkage='complete', metric='cosine',\n",
+       "                                      n_clusters=102,\n",
+       "                                      pooling_func=<function median at 0x711a67539830>)),\n",
+       "                ('extratreesclassifier',\n",
+       "                 ExtraTreesClassifier(bootstrap=True, class_weight='balanced',\n",
+       "                                      max_features=0.9974817877523433,\n",
+       "                                      min_samples_leaf=8, min_samples_split=20,\n",
+       "                                      n_jobs=1))])"
       ]
      },
-     "execution_count": 11,
+     "execution_count": 6,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "from ConfigSpace import ConfigurationSpace, EqualsCondition\n",
-    "import ConfigSpace\n",
-    "\n",
-    "cs = ConfigurationSpace({\n",
-    "\n",
-    "    \"1\": [1,2,3],\n",
-    "    \"2\": ConfigSpace.Constant(\"2\", 2),\n",
-    "\n",
-    "    \"a\": [1, 2, 3],\n",
-    "\n",
-    "})\n",
+    "imputation_node =tpot2.search_spaces.nodes.EstimatorNode(\n",
+    "    method = SimpleImputer,\n",
+    "    space = simple_imputer,\n",
+    ")\n",
     "\n",
-    "cond = EqualsCondition(cs['1'], cs['a'], 1)\n",
-    "cond2 = EqualsCondition(cs['2'], cs['a'], 2)\n",
+    "impute_classifier_space  = tpot2.search_spaces.pipelines.SequentialPipeline([\n",
+    "    imputation_node,\n",
+    "    tpot2.config.get_search_space(\"selectors\"), \n",
+    "    tpot2.config.get_search_space(\"transformers\"),\n",
+    "    tpot2.config.get_search_space(\"classifiers\"),\n",
+    "    \n",
+    "])\n",
     "\n",
-    "cs.add_condition(cond)\n",
-    "cs.add_condition(cond2)\n",
     "\n",
-    "cs.sample_configuration()"
+    "impute_classifier_space.generate().export_pipeline()"
    ]
   }
  ],
@@ -77,7 +532,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.13"
+   "version": "3.10.14"
   }
  },
  "nbformat": 4,

From 76f76faf4a59181813d361ef42aa8ae0be391d36 Mon Sep 17 00:00:00 2001
From: perib <pedroribeiroh@gmail.com>
Date: Tue, 9 Jul 2024 14:37:39 -0700
Subject: [PATCH 08/12] fix

---
 tpot2/search_spaces/pipelines/wrapper.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tpot2/search_spaces/pipelines/wrapper.py b/tpot2/search_spaces/pipelines/wrapper.py
index df504a89..2c1ad138 100644
--- a/tpot2/search_spaces/pipelines/wrapper.py
+++ b/tpot2/search_spaces/pipelines/wrapper.py
@@ -63,6 +63,7 @@ def _mutate_node(self, rng=None):
         return self.node.mutate(rng)
 
     def crossover(self, other, rng=None):
+        rng = np.random.default_rng(rng)
         if rng.choice([True, False]):
             return self._crossover_hyperparameters(other, rng)
         else:

From 334ca58319d767aecd097232c09ddc6596ca659a Mon Sep 17 00:00:00 2001
From: perib <pedroribeiroh@gmail.com>
Date: Tue, 9 Jul 2024 14:41:36 -0700
Subject: [PATCH 09/12] wrap fix

---
 tpot2/search_spaces/pipelines/wrapper.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tpot2/search_spaces/pipelines/wrapper.py b/tpot2/search_spaces/pipelines/wrapper.py
index 2c1ad138..d61bc5f3 100644
--- a/tpot2/search_spaces/pipelines/wrapper.py
+++ b/tpot2/search_spaces/pipelines/wrapper.py
@@ -67,7 +67,7 @@ def crossover(self, other, rng=None):
         if rng.choice([True, False]):
             return self._crossover_hyperparameters(other, rng)
         else:
-            self.estimator_search_space.crossover(other.estimator_search_space, rng)
+            self.node.crossover(other.estimator_search_space, rng)
     
 
     def _crossover_hyperparameters(self, other, rng=None):

From 8c0379c4c2cfca3bf414cd51174505ce5bde8561 Mon Sep 17 00:00:00 2001
From: perib <pedroribeiroh@gmail.com>
Date: Tue, 9 Jul 2024 17:46:10 -0700
Subject: [PATCH 10/12] added check for infinite max_eval_time_seconds

---
 tpot2/evolvers/steady_state_evolver.py | 19 ++++++++++---------
 tpot2/utils/eval_utils.py              |  2 +-
 2 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/tpot2/evolvers/steady_state_evolver.py b/tpot2/evolvers/steady_state_evolver.py
index eecc2b29..e0b9b593 100644
--- a/tpot2/evolvers/steady_state_evolver.py
+++ b/tpot2/evolvers/steady_state_evolver.py
@@ -299,17 +299,18 @@ def optimize(self):
                                 eval_error = "INVALID"
                     else: #if future is not done
 
-                        #check if the future has been running for too long, cancel the future
-                        if time.time() - submitted_futures[completed_future]["time"] > self.max_eval_time_seconds*1.25:
-                            completed_future.cancel()
+                        if self.max_eval_time_seconds is not None:
+                            #check if the future has been running for too long, cancel the future
+                            if time.time() - submitted_futures[completed_future]["time"] > self.max_eval_time_seconds*1.25:
+                                completed_future.cancel()
 
-                            if self.verbose >= 4:
-                                print(f'WARNING AN INDIVIDUAL TIMED OUT (Fallback): \n {submitted_futures[completed_future]} \n')
+                                if self.verbose >= 4:
+                                    print(f'WARNING AN INDIVIDUAL TIMED OUT (Fallback): \n {submitted_futures[completed_future]} \n')
 
-                            scores = [np.nan for _ in range(len(self.objective_names))]
-                            eval_error = "TIMEOUT"
-                        else:
-                            continue #otherwise, continue to next future
+                                scores = [np.nan for _ in range(len(self.objective_names))]
+                                eval_error = "TIMEOUT"
+                            else:
+                                continue #otherwise, continue to next future
 
 
 
diff --git a/tpot2/utils/eval_utils.py b/tpot2/utils/eval_utils.py
index f37cb823..f8d4bd7f 100644
--- a/tpot2/utils/eval_utils.py
+++ b/tpot2/utils/eval_utils.py
@@ -218,7 +218,7 @@ def parallel_eval_objective_list2(individual_list,
                 
 
                 #check if the future has been running for too long, cancel the future
-                if time.time() - submitted_futures[completed_future]["time"] > max_eval_time_seconds*1.25:
+                if max_eval_time_seconds is not None and time.time() - submitted_futures[completed_future]["time"] > max_eval_time_seconds*1.25:
                     completed_future.cancel()
                     
                     if verbose >= 4:

From dc1fb8aa03c705c0110af292b4b8d473fef9559b Mon Sep 17 00:00:00 2001
From: Jay Moran <jay-moran@outlook.com>
Date: Wed, 10 Jul 2024 15:37:39 -0700
Subject: [PATCH 11/12] Print out package versions and make tests verbose

---
 tox.ini | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tox.ini b/tox.ini
index 4e250aef..f215d6d0 100644
--- a/tox.ini
+++ b/tox.ini
@@ -16,7 +16,8 @@ setenv =
 deps =
     -r{toxinidir}/requirements_dev.txt
 commands =
-    pytest --basetemp={envtmpdir}
+    pip freeze
+    pytest --basetemp={envtmpdir} -v
 
 [testenv:flake8]
 basepython = python3.10
@@ -27,4 +28,4 @@ commands = flake8 tpot2
 basepython = python3.10
 deps =
     -r{toxinidir}/requirements_dev.txt
-commands = mypy tpot2
\ No newline at end of file
+commands = mypy tpot2

From 76d3989e8cd6bc93144ef79a49fe6e6837be425e Mon Sep 17 00:00:00 2001
From: Jay Moran <jay-moran@outlook.com>
Date: Wed, 10 Jul 2024 16:59:13 -0700
Subject: [PATCH 12/12] Pin numpy version

---
 setup.py | 2 +-
 tox.ini  | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/setup.py b/setup.py
index 27b4a474..0a404280 100644
--- a/setup.py
+++ b/setup.py
@@ -28,7 +28,7 @@ def calculate_version():
 
 ''',
     zip_safe=True,
-    install_requires=['numpy>=1.26.4',
+    install_requires=['numpy==1.26.4',
                       'scipy>=1.3.1',
                       'scikit-learn>=1.3.0',
                       'update_checker>=0.16',
diff --git a/tox.ini b/tox.ini
index f215d6d0..7177d0a7 100644
--- a/tox.ini
+++ b/tox.ini
@@ -16,8 +16,7 @@ setenv =
 deps =
     -r{toxinidir}/requirements_dev.txt
 commands =
-    pip freeze
-    pytest --basetemp={envtmpdir} -v
+    pytest --basetemp={envtmpdir}
 
 [testenv:flake8]
 basepython = python3.10