Skip to content

Commit

Permalink
better load balancer defaults
Browse files Browse the repository at this point in the history
  • Loading branch information
PhilipDeegan committed May 29, 2024
1 parent b6d7319 commit 4f501ca
Show file tree
Hide file tree
Showing 5 changed files with 44 additions and 40 deletions.
2 changes: 1 addition & 1 deletion pyphare/pyphare/pharein/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ def as_paths(rb):
add_double("simulation/algo/ohm/hyper_resistivity", simulation.hyper_resistivity)

# load balancer block start
lb = simulation.load_balancer or LoadBalancer(_register=False)
lb = simulation.load_balancer or LoadBalancer(active=False, _register=False)
base = "simulation/AMR/loadbalancing"
add_bool(f"{base}/active", lb.active)
add_string(f"{base}/mode", lb.mode)
Expand Down
13 changes: 10 additions & 3 deletions pyphare/pyphare/pharein/load_balancer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
@dataclass
class LoadBalancer:
# whether or not load balancing is performed
active: bool = field(default_factory=lambda: False)
active: bool = field(default_factory=lambda: True)

# which way load is assessed
mode: str = field(default_factory=lambda: "nppc")
Expand All @@ -20,18 +20,25 @@ class LoadBalancer:
on_init: bool = field(default_factory=lambda: True)

# if auto, other values are not used if active
auto: bool = field(default_factory=lambda: True)
auto: bool = field(default_factory=lambda: False)
next_rebalance_backoff_multiplier: int = field(default_factory=lambda: 2)
next_rebalance: int = field(default_factory=lambda: 200)
max_next_rebalance: int = field(default_factory=lambda: 1000)

# if !auto these values are used if active
every: int = field(default_factory=lambda: 1)
every: int = field(default_factory=lambda: None)

# internal, allows not registering object for default init
_register: bool = field(default_factory=lambda: True)

def __post_init__(self):
if self.auto and self.every:
raise RuntimeError(f"LoadBalancer cannot work with both 'every' and 'auto'")

if self.every is None:
self.auto = True
self.every = 0 # python3 -> c++ doesn't understand 'None'

allowed_modes = [
"nppc", # count particles per rank
"homogeneous", # count cells per rank
Expand Down
7 changes: 1 addition & 6 deletions src/hdf5/detail/h5/h5_file.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,7 @@ class HighFiveFile

~HighFiveFile() {}

NO_DISCARD HiFile& file()
{
return h5file_;
}
NO_DISCARD HiFile& file() { return h5file_; }


template<typename T, std::size_t dim = 1>
Expand Down Expand Up @@ -149,8 +146,6 @@ class HighFiveFile
// clang-format off
PHARE_DEBUG_DO(
auto const paths = core::mpi::collect(keyPath, core::mpi::size());
for (auto const& path : paths)
PHARE_LOG_LINE_STR(std::to_string(core::mpi::size()) << " " << path)
if (!core::all(paths, [&](auto const& path) { return path == paths[0]; }))
throw std::runtime_error("Function does not support different paths per mpi core");
)
Expand Down
60 changes: 31 additions & 29 deletions tests/simulator/test_load_balancing.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@
# basically a harris run

import unittest
from ddt import data, ddt, unpack
import pyphare.pharein as ph
from pyphare.pharesee.hierarchy import hierarchy_compare
from pyphare.pharesee.particles import single_patch_per_level_per_pop_from


from pyphare.simulator.simulator import Simulator, startMPI
from tests.simulator import SimulatorTest

Expand All @@ -24,7 +24,7 @@
ndim = 2
interp = 1
mpi_size = cpp.mpi_size()
time_step_nbr = 2
time_step_nbr = 3
time_step = 0.001
cells = (100, 100)
dl = (0.2, 0.2)
Expand Down Expand Up @@ -197,6 +197,7 @@ def _parse_rank(patch_id):
return per_rank


@ddt
class LoadBalancingTest(SimulatorTest):
def tearDown(self):
ph.global_vars.sim = None
Expand All @@ -207,37 +208,40 @@ def run_sim(self, diags_dir, dic={}):
Simulator(config(diags_dir, dic)).run()
return diags_dir

def test_has_balanced_auto(self):
@data(dict(auto=True, every=1))
@unpack
def test_raises(self, **lbkwargs):
if mpi_size == 1: # doesn't make sense
return

diag_dir = self.run_sim(
self.unique_diag_dir_for_test_case(diag_outputs, ndim, interp),
dict(active=True, auto=True, mode="nppc", tol=0.05),
)

if cpp.mpi_rank() > 0:
return

t0_sdev = np.std(list(time_info(diag_dir).values()))
tend_sdev = np.std(list(time_info(diag_dir, timestamps[-1]).values()))
self.assertLess(tend_sdev, t0_sdev * 0.1) # empirical

def test_has_balanced_manual(self):
with self.assertRaises(RuntimeError):
diag_dir = self.run_sim(
self.unique_diag_dir_for_test_case(diag_outputs, ndim, interp),
dict(active=True, mode="nppc", tol=0.01, **lbkwargs),
)
# does not get here

@data(
dict(auto=True), # tolerance checks
dict(on_init=True, every=0), # on init only
dict(on_init=True, every=1),
dict(on_init=False, auto=True, next_rebalance=1),
dict(on_init=False, every=1),
)
@unpack
def test_has_balanced(self, **lbkwargs):
if mpi_size == 1: # doesn't make sense
return

diag_dir = self.run_sim(
self.unique_diag_dir_for_test_case(diag_outputs, ndim, interp),
dict(active=True, on_init=True, every=1, mode="nppc", tol=0.05),
dict(active=True, mode="nppc", tol=0.01, **lbkwargs),
)

if cpp.mpi_rank() > 0:
return

t0_sdev = np.std(list(time_info(diag_dir).values()))
tend_sdev = np.std(list(time_info(diag_dir, timestamps[-1]).values()))
self.assertLess(tend_sdev, t0_sdev * 0.1) # empirical
if cpp.mpi_rank() == 0:
t0_sdev = np.std(list(time_info(diag_dir).values()))
tend_sdev = np.std(list(time_info(diag_dir, timestamps[-1]).values()))
self.assertLess(tend_sdev, t0_sdev * 0.1) # empirical

def test_has_not_balanced_as_defaults(self):
if mpi_size == 1: # doesn't make sense
Expand All @@ -247,12 +251,10 @@ def test_has_not_balanced_as_defaults(self):
self.unique_diag_dir_for_test_case(diag_outputs, ndim, interp)
)

if cpp.mpi_rank() > 0:
return

t0_sdev = np.std(list(time_info(diag_dir).values()))
tend_sdev = np.std(list(time_info(diag_dir, timestamps[-1]).values()))
self.assertGreater(tend_sdev, t0_sdev * 0.1) # empirical
if cpp.mpi_rank() == 0:
t0_sdev = np.std(list(time_info(diag_dir).values()))
tend_sdev = np.std(list(time_info(diag_dir, timestamps[-1]).values()))
self.assertGreater(tend_sdev, t0_sdev * 0.1) # empirical

def test_compare_is_and_is_not_balanced(self):
if mpi_size == 1: # doesn't make sense
Expand Down
2 changes: 1 addition & 1 deletion tools/config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ def write_local_cmake_file(mpi_results):
if mpi_type == "OMPI":
# work around for https://github.com/open-mpi/ompi/issues/10761#issuecomment-1236909802
file.write(
"""set (PHARE_MPIRUN_POSTFIX "${PHARE_MPIRUN_POSTFIX} -q --bind-to none")
"""set (PHARE_MPIRUN_POSTFIX "${PHARE_MPIRUN_POSTFIX} --bind-to none")
"""
)

Expand Down

0 comments on commit 4f501ca

Please sign in to comment.