From aea5c9c0e0ccabd9c50b94ffa9f45fa019633a94 Mon Sep 17 00:00:00 2001
From: "Sergey E. Koposov" <skoposov@ed.ac.uk>
Date: Thu, 29 Aug 2024 18:10:48 +0100
Subject: [PATCH] further refactor merging. Now I create a unique batch list.
 I.e. if one run used [-inf, inf], [-5,5], [-4,4] and another [-inf, inf],
 [-5,5], [-3,2] the runs will be [-inf,inf] [-5,5] [-4, 4] [3,2]

I also add a test that test for issue uncovered in #481
---
 py/dynesty/utils.py | 24 +++++++++++++++---------
 tests/test_gau.py   | 28 +++++++++++++++++++++++-----
 2 files changed, 38 insertions(+), 14 deletions(-)

diff --git a/py/dynesty/utils.py b/py/dynesty/utils.py
index 037d8ece..06344f25 100644
--- a/py/dynesty/utils.py
+++ b/py/dynesty/utils.py
@@ -1946,12 +1946,17 @@ def _merge_two(res1, res2, compute_aux=False):
         combined_info[curk] = []
 
     # Check if batch info is the same and modify counters accordingly.
-    if np.all(base_info['bounds'] == new_info['bounds']):
-        bounds = base_info['bounds']
-        boffset = 0
-    else:
-        bounds = np.concatenate((base_info['bounds'], new_info['bounds']))
-        boffset = len(base_info['bounds'])
+    ubounds = np.unique(np.concatenate(
+        (base_info['bounds'], new_info['bounds'])),
+                        axis=0)
+    new_bound_map = {}
+    base_bound_map = {}
+    for i in range(len(new_info['bounds'])):
+        new_bound_map[i] = np.where(
+            np.all(new_info['bounds'][i] == ubounds, axis=1))[0][0]
+    for i in range(len(base_info['bounds'])):
+        base_bound_map[i] = np.where(
+            np.all(base_info['bounds'][i] == ubounds, axis=1))[0][0]
 
     # Start our counters at the beginning of each set of dead points.
     idx_base, idx_new = 0, 0
@@ -1999,13 +2004,14 @@ def _merge_two(res1, res2, compute_aux=False):
         if logl_b <= logl_n:
             add_idx = idx_base
             from_run = base_info
+            from_map = base_bound_map
             idx_base += 1
-            combined_info['batch'].append(from_run['batch'][add_idx])
         else:
             add_idx = idx_new
             from_run = new_info
+            from_map = new_bound_map
             idx_new += 1
-            combined_info['batch'].append(from_run['batch'][add_idx] + boffset)
+        combined_info['batch'].append(from_map[from_run['batch'][add_idx]])
 
         for curk in ['id', 'u', 'v', 'logl', 'nc', 'it', 'blob']:
             combined_info[curk].append(from_run[curk][add_idx])
@@ -2051,7 +2057,7 @@ def _merge_two(res1, res2, compute_aux=False):
              samples=np.asarray(combined_info['v']),
              logl=np.asarray(combined_info['logl']),
              logvol=np.asarray(combined_info['logvol']),
-             batch_bounds=np.asarray(bounds),
+             batch_bounds=np.asarray(ubounds),
              blob=np.asarray(combined_info['blob']))
 
     for curk in ['id', 'it', 'n', 'u', 'batch']:
diff --git a/tests/test_gau.py b/tests/test_gau.py
index 36ad75e8..65c1ca67 100644
--- a/tests/test_gau.py
+++ b/tests/test_gau.py
@@ -164,13 +164,31 @@ def test_gaussian():
         assert (np.abs(logz - g.logz_truth) < sig * results.logzerr[-1])
     res_comb = dyfunc.merge_runs([result_list[0]])
     res_comb = dyfunc.merge_runs(result_list)
-    assert (np.abs(res_comb['logz'][-1] - g.logz_truth) <
-            sig * results['logzerr'][-1])
+    assert (np.abs(res_comb['logz'][-1] - g.logz_truth)
+            < sig * results['logzerr'][-1])
     # check summary
     res = sampler.results
     res.summary()
 
 
+def test_merge():
+    rstate = get_rstate()
+    g = Gaussian()
+    sampler1 = dynesty.DynamicNestedSampler(g.loglikelihood,
+                                            g.prior_transform,
+                                            g.ndim,
+                                            nlive=nlive,
+                                            rstate=rstate)
+    sampler1.run_nested(print_progress=printing, maxbatch=1)
+    sampler2 = dynesty.DynamicNestedSampler(g.loglikelihood,
+                                            g.prior_transform,
+                                            g.ndim,
+                                            nlive=nlive,
+                                            rstate=rstate)
+    sampler2.run_nested(print_progress=printing, maxbatch=2)
+    dyfunc.merge_runs((sampler1.results, sampler2.results))
+
+
 def test_generator():
     # Test that we can use the sampler as a generator
     rstate = get_rstate()
@@ -239,9 +257,9 @@ def test_bounding_sample(bound, sample):
     print(sampler.citations)
 
 
-@pytest.mark.parametrize("bound,sample",
-                         itertools.product(
-                             ['single', 'multi', 'balls', 'cubes'], ['unif']))
+@pytest.mark.parametrize(
+    "bound,sample",
+    itertools.product(['single', 'multi', 'balls', 'cubes'], ['unif']))
 def test_bounding_bootstrap(bound, sample):
     # check various bounding methods with bootstrap