Skip to content

Commit

Permalink
better load balancer defaults
Browse files Browse the repository at this point in the history
  • Loading branch information
PhilipDeegan committed May 23, 2024
1 parent d24d498 commit 9e669d1
Show file tree
Hide file tree
Showing 5 changed files with 47 additions and 40 deletions.
7 changes: 5 additions & 2 deletions pyphare/pyphare/core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
-d, --dry-run Validate but do not run simulations
"""

import os
import sys
import dataclasses

Expand All @@ -18,6 +19,9 @@ def disabled_for_testing():
# check if any module is loaded from PHARE tests directory
from pathlib import Path

if "PHARE_SKIP_CLI" in os.environ:
return os.environ["PHARE_SKIP_CLI"] == "1"

test_dir = Path(__file__).resolve().parent.parent.parent.parent / "tests"
if test_dir.exists():
test_dir = str(test_dir)
Expand All @@ -34,8 +38,7 @@ class CliArgs:


def parse_cli_args():
default_off = len(sys.argv) == 1 and disabled_for_testing()
if default_off:
if disabled_for_testing():
return CliArgs()

import argparse
Expand Down
11 changes: 9 additions & 2 deletions pyphare/pyphare/pharein/load_balancer.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,18 +20,25 @@ class LoadBalancer:
on_init: bool = field(default_factory=lambda: True)

# if auto, other values are not used if active
auto: bool = field(default_factory=lambda: True)
auto: bool = field(default_factory=lambda: False)
next_rebalance_backoff_multiplier: int = field(default_factory=lambda: 2)
next_rebalance: int = field(default_factory=lambda: 200)
max_next_rebalance: int = field(default_factory=lambda: 1000)

# if !auto these values are used if active
every: int = field(default_factory=lambda: 1)
every: int = field(default_factory=lambda: None)

# internal, allows not registering object for default init
_register: bool = field(default_factory=lambda: True)

def __post_init__(self):
if self.auto and self.every:
raise RuntimeError(f"LoadBalancer cannot work with both 'every' and 'auto'")

if self.every is None:
self.auto = True
self.every = 0 # python3 -> c++ doesn't understand 'None'

allowed_modes = [
"nppc", # count particles per rank
"homogeneous", # count cells per rank
Expand Down
7 changes: 1 addition & 6 deletions src/hdf5/detail/h5/h5_file.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,7 @@ class HighFiveFile

~HighFiveFile() {}

NO_DISCARD HiFile& file()
{
return h5file_;
}
NO_DISCARD HiFile& file() { return h5file_; }


template<typename T, std::size_t dim = 1>
Expand Down Expand Up @@ -149,8 +146,6 @@ class HighFiveFile
// clang-format off
PHARE_DEBUG_DO(
auto const paths = core::mpi::collect(keyPath, core::mpi::size());
for (auto const& path : paths)
PHARE_LOG_LINE_STR(std::to_string(core::mpi::size()) << " " << path)
if (!core::all(paths, [&](auto const& path) { return path == paths[0]; }))
throw std::runtime_error("Function does not support different paths per mpi core");
)
Expand Down
60 changes: 31 additions & 29 deletions tests/simulator/test_load_balancing.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@
# basically a harris run

import unittest
from ddt import data, ddt, unpack
import pyphare.pharein as ph
from pyphare.pharesee.hierarchy import hierarchy_compare
from pyphare.pharesee.particles import single_patch_per_level_per_pop_from


from pyphare.simulator.simulator import Simulator, startMPI
from tests.simulator import SimulatorTest

Expand All @@ -24,7 +24,7 @@
ndim = 2
interp = 1
mpi_size = cpp.mpi_size()
time_step_nbr = 2
time_step_nbr = 3
time_step = 0.001
cells = (100, 100)
dl = (0.2, 0.2)
Expand Down Expand Up @@ -197,6 +197,7 @@ def _parse_rank(patch_id):
return per_rank


@ddt
class LoadBalancingTest(SimulatorTest):
def tearDown(self):
ph.global_vars.sim = None
Expand All @@ -207,37 +208,40 @@ def run_sim(self, diags_dir, dic={}):
Simulator(config(diags_dir, dic)).run()
return diags_dir

def test_has_balanced_auto(self):
@data(dict(auto=True, every=1))
@unpack
def test_raises(self, **lbkwargs):
if mpi_size == 1: # doesn't make sense
return

diag_dir = self.run_sim(
self.unique_diag_dir_for_test_case(diag_outputs, ndim, interp),
dict(active=True, auto=True, mode="nppc", tol=0.05),
)

if cpp.mpi_rank() > 0:
return

t0_sdev = np.std(list(time_info(diag_dir).values()))
tend_sdev = np.std(list(time_info(diag_dir, timestamps[-1]).values()))
self.assertLess(tend_sdev, t0_sdev * 0.1) # empirical

def test_has_balanced_manual(self):
with self.assertRaises(RuntimeError):
diag_dir = self.run_sim(
self.unique_diag_dir_for_test_case(diag_outputs, ndim, interp),
dict(active=True, mode="nppc", tol=0.01, **lbkwargs),
)
# does not get here

@data(
dict(auto=True), # tolerance checks
dict(on_init=True, every=0), # on init only
dict(on_init=True, every=1),
dict(on_init=False, auto=True, next_rebalance=1),
dict(on_init=False, every=1),
)
@unpack
def test_has_balanced(self, **lbkwargs):
if mpi_size == 1: # doesn't make sense
return

diag_dir = self.run_sim(
self.unique_diag_dir_for_test_case(diag_outputs, ndim, interp),
dict(active=True, on_init=True, every=1, mode="nppc", tol=0.05),
dict(active=True, mode="nppc", tol=0.01, **lbkwargs),
)

if cpp.mpi_rank() > 0:
return

t0_sdev = np.std(list(time_info(diag_dir).values()))
tend_sdev = np.std(list(time_info(diag_dir, timestamps[-1]).values()))
self.assertLess(tend_sdev, t0_sdev * 0.1) # empirical
if cpp.mpi_rank() == 0:
t0_sdev = np.std(list(time_info(diag_dir).values()))
tend_sdev = np.std(list(time_info(diag_dir, timestamps[-1]).values()))
self.assertLess(tend_sdev, t0_sdev * 0.1) # empirical

def test_has_not_balanced_as_defaults(self):
if mpi_size == 1: # doesn't make sense
Expand All @@ -247,12 +251,10 @@ def test_has_not_balanced_as_defaults(self):
self.unique_diag_dir_for_test_case(diag_outputs, ndim, interp)
)

if cpp.mpi_rank() > 0:
return

t0_sdev = np.std(list(time_info(diag_dir).values()))
tend_sdev = np.std(list(time_info(diag_dir, timestamps[-1]).values()))
self.assertGreater(tend_sdev, t0_sdev * 0.1) # empirical
if cpp.mpi_rank() == 0:
t0_sdev = np.std(list(time_info(diag_dir).values()))
tend_sdev = np.std(list(time_info(diag_dir, timestamps[-1]).values()))
self.assertGreater(tend_sdev, t0_sdev * 0.1) # empirical

def test_compare_is_and_is_not_balanced(self):
if mpi_size == 1: # doesn't make sense
Expand Down
2 changes: 1 addition & 1 deletion tools/config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ def write_local_cmake_file(mpi_results):
if mpi_type == "OMPI":
# work around for https://github.com/open-mpi/ompi/issues/10761#issuecomment-1236909802
file.write(
"""set (PHARE_MPIRUN_POSTFIX "${PHARE_MPIRUN_POSTFIX} -q --bind-to none")
"""set (PHARE_MPIRUN_POSTFIX "${PHARE_MPIRUN_POSTFIX} --bind-to none")
"""
)

Expand Down

0 comments on commit 9e669d1

Please sign in to comment.