Skip to content

Commit

Permalink
Add logging to the load balancing functions
Browse files Browse the repository at this point in the history
  • Loading branch information
IshaanDesai committed Feb 11, 2025
1 parent a685a29 commit 3a41a60
Show file tree
Hide file tree
Showing 5 changed files with 85 additions and 11 deletions.
60 changes: 52 additions & 8 deletions micro_manager/adaptivity/global_adaptivity_lb.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ def __init__(
configurator,
global_number_of_sims: int,
global_ids: list,
logger,
rank: int,
comm,
) -> None:
Expand All @@ -35,6 +36,8 @@ def __init__(
Total number of simulations in the macro-micro coupled problem.
global_ids : list
List of global IDs of simulations living on this rank.
logger : object of class Logger
Logger to log to terminal.
rank : int
MPI rank.
comm : MPI.COMM_WORLD
Expand All @@ -49,13 +52,21 @@ def __init__(
"MicroSimulation",
)

self._base_logger = logger

self._local_number_of_sims = len(global_ids)

self._threshold = configurator.get_load_balancing_threshold()

self._is_load_balancing_done_in_two_steps = (
configurator.is_load_balancing_two_step()
)

self._threshold = configurator.get_load_balancing_threshold()
if self._threshold > 0 and self._is_load_balancing_done_in_two_steps:
self._is_load_balancing_done_in_two_steps = False
self._base_logger.log_warning_one_rank(
"Threshold is not zero, so two step load balancing is disabled."
)

self._balance_inactive_sims = configurator.balance_inactive_sims()

Expand Down Expand Up @@ -126,7 +137,9 @@ def _redistribute_active_sims(self, micro_sims: list) -> None:

if n_global_send_sims == 0 or n_global_recv_sims == 0:
self._nothing_to_balance = True
# TODO: Add a warning log before returning
self._base_logger.log_warning_one_rank(
"It appears that the micro simulations are already fairly balanced. No load balancing will be done. Try changing the threshold value to provoke load balancing."
)
return

if n_global_send_sims < n_global_recv_sims:
Expand Down Expand Up @@ -174,7 +187,10 @@ def _redistribute_active_sims(self, micro_sims: list) -> None:
)

self._communicate_micro_sims(micro_sims, send_map, recv_map)
# TODO: Add a warning if the probable send or receive requests are zero, because then two step load balancing does not happen
else:
self._base_logger.log_warning_one_rank(
"No load balancing was done in the second step because the micro simulations are already almost perfectly balanced."
)

def _redistribute_inactive_sims(self, micro_sims: list) -> None:
"""
Expand Down Expand Up @@ -218,9 +234,26 @@ def _redistribute_inactive_sims(self, micro_sims: list) -> None:

self._communicate_micro_sims(micro_sims, send_map, recv_map)

def _get_communication_maps(self, global_send_sims, global_recv_sims):
def _get_communication_maps(
self, global_send_sims: list, global_recv_sims: list
) -> tuple:
"""
...
Create dictionaries which map global IDs of simulations to ranks for sending and receiving.
Parameters
----------
global_send_sims : list
Number of simulations that each rank sends.
global_recv_sims : list
Number of simulations that each rank receives.
Returns
-------
tuple of dicts
send_map : dict
keys are global IDs of sim states to send, values are ranks to send the sims to
recv_map : dict
keys are global IDs of sim states to receive, values are ranks to receive from
"""
global_ids_of_active_sims_local = []
for global_id in self._global_ids:
Expand Down Expand Up @@ -290,7 +323,7 @@ def _get_communication_maps(self, global_send_sims, global_recv_sims):
global_recv_sims[recv_rank] -= sims

# Remove the global IDs which are already mapped for moving
del self._rank_wise_global_ids_of_active_sims[send_rank][0:sims]
del rank_wise_global_ids_of_active_sims[send_rank][0:sims]

sims = 0

Expand All @@ -314,9 +347,20 @@ def _get_communication_maps(self, global_send_sims, global_recv_sims):

return send_map, recv_map

def _communicate_micro_sims(self, micro_sims, send_map, recv_map):
def _communicate_micro_sims(
self, micro_sims: list, send_map: dict, recv_map: dict
) -> None:
"""
...
Communicate micro simulation states between ranks to balance the load of solving micro simulations.
Parameters
----------
micro_sims : list
List of objects of class MicroProblem, which are the micro simulations
send_map : dict
keys are global IDs of sim states to send, values are ranks to send the sims to
recv_map : dict
keys are global IDs of sim states to receive, values are ranks to receive from
"""
# Asynchronous send operations
send_reqs = []
Expand Down
3 changes: 2 additions & 1 deletion micro_manager/micro_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -481,12 +481,13 @@ def initialize(self) -> None:
)
)
elif self._config.get_adaptivity_type() == "global":
if self._config.is_adaptivity_with_load_balancing():
if self._is_adaptivity_with_load_balancing:
self._adaptivity_controller: GlobalAdaptivityLBCalculator = (
GlobalAdaptivityLBCalculator(
self._config,
self._global_number_of_sims,
self._global_ids_of_local_sims,
self._logger,
self._rank,
self._comm,
)
Expand Down
23 changes: 23 additions & 0 deletions micro_manager/tools/logging_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,3 +100,26 @@ def log_error_any_rank(self, message):
Message to log.
"""
self._logger.error(message)

def log_warning_one_rank(self, message):
"""
Log a message. Only the rank 0 logs the message.
Parameters
----------
message : string
Message to log.
"""
if self._rank == 0:
self._logger.warning(message)

def log_warning_any_rank(self, message):
"""
Log a message. All ranks log the message.
Parameters
----------
message : string
Message to log.
"""
self._logger.warning(message)
2 changes: 1 addition & 1 deletion tests/integration/test_unit_cube/precice-config.xml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
</participant>

<participant name="Micro-Manager">
<receive-mesh name="macro-cube-mesh" from="macro-cube" direct-access="true" safety-factor="0" />
<receive-mesh name="macro-cube-mesh" from="macro-cube" direct-access="true" />
<read-data name="macro-data-1" mesh="macro-cube-mesh" />
<write-data name="micro-data-1" mesh="macro-cube-mesh" />
<write-data name="micro-data-2" mesh="macro-cube-mesh" />
Expand Down
8 changes: 7 additions & 1 deletion tests/unit/test_global_adaptivity_lb.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ def test_redistribute_active_sims_two_ranks(self):
self._configurator,
global_number_of_sims,
global_ids,
logger=MagicMock(),
rank=self._rank,
comm=self._comm,
)
Expand Down Expand Up @@ -111,6 +112,7 @@ def test_redistribute_inactive_sims_two_ranks(self):
self._configurator,
global_number_of_sims,
global_ids,
logger=MagicMock(),
rank=self._rank,
comm=self._comm,
)
Expand Down Expand Up @@ -165,6 +167,7 @@ def test_redistribute_active_sims_four_ranks_one_step(self):
self._configurator,
global_number_of_sims,
global_ids,
logger=MagicMock(),
rank=self._rank,
comm=self._comm,
)
Expand Down Expand Up @@ -236,6 +239,7 @@ def test_redistribute_active_sims_four_ranks_two_steps(self):
self._configurator,
global_number_of_sims,
global_ids,
logger=MagicMock(),
rank=self._rank,
comm=self._comm,
)
Expand Down Expand Up @@ -281,7 +285,8 @@ def test_redistribute_active_sims_four_ranks_two_steps(self):
)
def test_redistribute_inactive_sims_four_ranks(self):
"""
...
Test load balancing functionality to redistribute inactive simulations.
Run this test in parallel using MPI with 4 ranks.
"""
global_number_of_sims = 15

Expand All @@ -304,6 +309,7 @@ def test_redistribute_inactive_sims_four_ranks(self):
self._configurator,
global_number_of_sims,
global_ids,
logger=MagicMock(),
rank=self._rank,
comm=self._comm,
)
Expand Down

0 comments on commit 3a41a60

Please sign in to comment.