better load balancer defaults

PHAREHUB · May 23, 2024 · 9e669d1 · 9e669d1
1 parent d24d498
commit 9e669d1
Show file tree

Hide file tree

Showing 5 changed files with 47 additions and 40 deletions.
diff --git a/pyphare/pyphare/core/__init__.py b/pyphare/pyphare/core/__init__.py
@@ -10,6 +10,7 @@
   -d, --dry-run  Validate but do not run simulations
 """
 
+import os
 import sys
 import dataclasses
 
@@ -18,6 +19,9 @@ def disabled_for_testing():
     # check if any module is loaded from PHARE tests directory
     from pathlib import Path
 
+    if "PHARE_SKIP_CLI" in os.environ:
+        return os.environ["PHARE_SKIP_CLI"] == "1"
+
     test_dir = Path(__file__).resolve().parent.parent.parent.parent / "tests"
     if test_dir.exists():
         test_dir = str(test_dir)
@@ -34,8 +38,7 @@ class CliArgs:
 
 
 def parse_cli_args():
-    default_off = len(sys.argv) == 1 and disabled_for_testing()
-    if default_off:
+    if disabled_for_testing():
         return CliArgs()
 
     import argparse

diff --git a/pyphare/pyphare/pharein/load_balancer.py b/pyphare/pyphare/pharein/load_balancer.py
@@ -20,18 +20,25 @@ class LoadBalancer:
     on_init: bool = field(default_factory=lambda: True)
 
     # if auto, other values are not used if active
-    auto: bool = field(default_factory=lambda: True)
+    auto: bool = field(default_factory=lambda: False)
     next_rebalance_backoff_multiplier: int = field(default_factory=lambda: 2)
     next_rebalance: int = field(default_factory=lambda: 200)
     max_next_rebalance: int = field(default_factory=lambda: 1000)
 
     # if !auto these values are used if active
-    every: int = field(default_factory=lambda: 1)
+    every: int = field(default_factory=lambda: None)
 
     # internal, allows not registering object for default init
     _register: bool = field(default_factory=lambda: True)
 
     def __post_init__(self):
+        if self.auto and self.every:
+            raise RuntimeError(f"LoadBalancer cannot work with both 'every' and 'auto'")
+
+        if self.every is None:
+            self.auto = True
+            self.every = 0  # python3 -> c++ doesn't understand 'None'
+
         allowed_modes = [
             "nppc",  # count particles per rank
             "homogeneous",  # count cells per rank

diff --git a/src/hdf5/detail/h5/h5_file.hpp b/src/hdf5/detail/h5/h5_file.hpp
@@ -69,10 +69,7 @@ class HighFiveFile
 
     ~HighFiveFile() {}
 
-    NO_DISCARD HiFile& file()
-    {
-        return h5file_;
-    }
+    NO_DISCARD HiFile& file() { return h5file_; }
 
 
     template<typename T, std::size_t dim = 1>
@@ -149,8 +146,6 @@ class HighFiveFile
         // clang-format off
         PHARE_DEBUG_DO(
             auto const paths = core::mpi::collect(keyPath, core::mpi::size());
-            for (auto const& path : paths)
-                PHARE_LOG_LINE_STR(std::to_string(core::mpi::size()) << " " << path)
             if (!core::all(paths, [&](auto const& path) { return path == paths[0]; }))
                 throw std::runtime_error("Function does not support different paths per mpi core");
         )

diff --git a/tests/simulator/test_load_balancing.py b/tests/simulator/test_load_balancing.py
@@ -3,11 +3,11 @@
 # basically a harris run
 
 import unittest
+from ddt import data, ddt, unpack
 import pyphare.pharein as ph
 from pyphare.pharesee.hierarchy import hierarchy_compare
 from pyphare.pharesee.particles import single_patch_per_level_per_pop_from
 
-
 from pyphare.simulator.simulator import Simulator, startMPI
 from tests.simulator import SimulatorTest
 
@@ -24,7 +24,7 @@
 ndim = 2
 interp = 1
 mpi_size = cpp.mpi_size()
-time_step_nbr = 2
+time_step_nbr = 3
 time_step = 0.001
 cells = (100, 100)
 dl = (0.2, 0.2)
@@ -197,6 +197,7 @@ def _parse_rank(patch_id):
     return per_rank
 
 
+@ddt
 class LoadBalancingTest(SimulatorTest):
     def tearDown(self):
         ph.global_vars.sim = None
@@ -207,37 +208,40 @@ def run_sim(self, diags_dir, dic={}):
         Simulator(config(diags_dir, dic)).run()
         return diags_dir
 
-    def test_has_balanced_auto(self):
+    @data(dict(auto=True, every=1))
+    @unpack
+    def test_raises(self, **lbkwargs):
         if mpi_size == 1:  # doesn't make sense
             return
 
-        diag_dir = self.run_sim(
-            self.unique_diag_dir_for_test_case(diag_outputs, ndim, interp),
-            dict(active=True, auto=True, mode="nppc", tol=0.05),
-        )
-
-        if cpp.mpi_rank() > 0:
-            return
-
-        t0_sdev = np.std(list(time_info(diag_dir).values()))
-        tend_sdev = np.std(list(time_info(diag_dir, timestamps[-1]).values()))
-        self.assertLess(tend_sdev, t0_sdev * 0.1)  # empirical
-
-    def test_has_balanced_manual(self):
+        with self.assertRaises(RuntimeError):
+            diag_dir = self.run_sim(
+                self.unique_diag_dir_for_test_case(diag_outputs, ndim, interp),
+                dict(active=True, mode="nppc", tol=0.01, **lbkwargs),
+            )
+            # does not get here
+
+    @data(
+        dict(auto=True),  # tolerance checks
+        dict(on_init=True, every=0),  # on init only
+        dict(on_init=True, every=1),
+        dict(on_init=False, auto=True, next_rebalance=1),
+        dict(on_init=False, every=1),
+    )
+    @unpack
+    def test_has_balanced(self, **lbkwargs):
         if mpi_size == 1:  # doesn't make sense
             return
 
         diag_dir = self.run_sim(
             self.unique_diag_dir_for_test_case(diag_outputs, ndim, interp),
-            dict(active=True, on_init=True, every=1, mode="nppc", tol=0.05),
+            dict(active=True, mode="nppc", tol=0.01, **lbkwargs),
         )
 
-        if cpp.mpi_rank() > 0:
-            return
-
-        t0_sdev = np.std(list(time_info(diag_dir).values()))
-        tend_sdev = np.std(list(time_info(diag_dir, timestamps[-1]).values()))
-        self.assertLess(tend_sdev, t0_sdev * 0.1)  # empirical
+        if cpp.mpi_rank() == 0:
+            t0_sdev = np.std(list(time_info(diag_dir).values()))
+            tend_sdev = np.std(list(time_info(diag_dir, timestamps[-1]).values()))
+            self.assertLess(tend_sdev, t0_sdev * 0.1)  # empirical
 
     def test_has_not_balanced_as_defaults(self):
         if mpi_size == 1:  # doesn't make sense
@@ -247,12 +251,10 @@ def test_has_not_balanced_as_defaults(self):
             self.unique_diag_dir_for_test_case(diag_outputs, ndim, interp)
         )
 
-        if cpp.mpi_rank() > 0:
-            return
-
-        t0_sdev = np.std(list(time_info(diag_dir).values()))
-        tend_sdev = np.std(list(time_info(diag_dir, timestamps[-1]).values()))
-        self.assertGreater(tend_sdev, t0_sdev * 0.1)  # empirical
+        if cpp.mpi_rank() == 0:
+            t0_sdev = np.std(list(time_info(diag_dir).values()))
+            tend_sdev = np.std(list(time_info(diag_dir, timestamps[-1]).values()))
+            self.assertGreater(tend_sdev, t0_sdev * 0.1)  # empirical
 
     def test_compare_is_and_is_not_balanced(self):
         if mpi_size == 1:  # doesn't make sense

diff --git a/tools/config/config.py b/tools/config/config.py
@@ -245,7 +245,7 @@ def write_local_cmake_file(mpi_results):
         if mpi_type == "OMPI":
             # work around for https://github.com/open-mpi/ompi/issues/10761#issuecomment-1236909802
             file.write(
-                """set (PHARE_MPIRUN_POSTFIX "${PHARE_MPIRUN_POSTFIX} -q --bind-to none")
+                """set (PHARE_MPIRUN_POSTFIX "${PHARE_MPIRUN_POSTFIX} --bind-to none")
                 """
             )