From b4a5ba25e20874f95e895d59df958af81c99cbd1 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Wed, 28 Aug 2024 15:35:37 -0500 Subject: [PATCH] warpx.roundrobin_sfc: A runtime parameter to control distribution mapping (#4909) * warpx.roundrobin_sfc: A runtime parameter to control distribution mapping The default is false. If it's true, AMReX's RRSFS strategy will be used to override the default SFC strategy used by amrex::AmrCore. The motivation for this is that this might mitigate the load imbalance issue during initialization by avoiding putting neighboring boxes on the same process. * Update parameters.rst * Add control in PICMI * Remove WarpX:roundrobin_sfc * add const * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- Docs/source/usage/parameters.rst | 8 ++++++++ Python/pywarpx/picmi.py | 6 ++++++ Source/WarpX.H | 4 ++++ Source/WarpX.cpp | 26 ++++++++++++++++++++++++++ 4 files changed, 44 insertions(+) diff --git a/Docs/source/usage/parameters.rst b/Docs/source/usage/parameters.rst index a09e57f10a1..8d140ab82ce 100644 --- a/Docs/source/usage/parameters.rst +++ b/Docs/source/usage/parameters.rst @@ -733,6 +733,14 @@ Distribution across MPI ranks and parallelization * ``warpx.do_dynamic_scheduling`` (`0` or `1`) optional (default `1`) Whether to activate OpenMP dynamic scheduling. +* ``warpx.roundrobin_sfc`` (`0` or `1`) optional (default `0`) + Whether to use AMReX's RRSFS strategy for making DistributionMapping to + override the default space filling curve (SFC) strategy. If this is + enabled, the round robin method is used to distribute Boxes ordered by + SFC. This could potentially mitigate the load imbalance issue during + initialization by avoiding putting neighboring boxes on the same + process. + .. _running-cpp-parameters-parser: Math parser and user-defined constants diff --git a/Python/pywarpx/picmi.py b/Python/pywarpx/picmi.py index fbaa7043fbc..854a168bcec 100644 --- a/Python/pywarpx/picmi.py +++ b/Python/pywarpx/picmi.py @@ -2594,6 +2594,9 @@ class Simulation(picmistandard.PICMI_Simulation): warpx_do_dynamic_scheduling: bool, default=True Whether to do dynamic scheduling with OpenMP + warpx_roundrobin_sfc: bool, default=False + Whether to use the RRSFC strategy for making DistributionMapping + warpx_load_balance_intervals: string, default='0' The intervals for doing load balancing @@ -2710,6 +2713,7 @@ def init(self, kw): ) self.random_seed = kw.pop("warpx_random_seed", None) self.do_dynamic_scheduling = kw.pop("warpx_do_dynamic_scheduling", None) + self.roundrobin_sfc = kw.pop("warpx_roundrobin_sfc", None) self.load_balance_intervals = kw.pop("warpx_load_balance_intervals", None) self.load_balance_efficiency_ratio_threshold = kw.pop( "warpx_load_balance_efficiency_ratio_threshold", None @@ -2805,6 +2809,8 @@ def initialize_inputs(self): pywarpx.warpx.do_dynamic_scheduling = self.do_dynamic_scheduling + pywarpx.warpx.roundrobin_sfc = self.roundrobin_sfc + pywarpx.particles.use_fdtd_nci_corr = self.use_fdtd_nci_corr pywarpx.amr.check_input = self.amr_check_input diff --git a/Source/WarpX.H b/Source/WarpX.H index 2caea95e926..943bc24eb2a 100644 --- a/Source/WarpX.H +++ b/Source/WarpX.H @@ -1242,6 +1242,10 @@ protected: //! This function is called in amrex::AmrCore::InitFromScratch. void PostProcessBaseGrids (amrex::BoxArray& ba0) const final; + //! Use this function to override how DistributionMapping is made. + [[nodiscard]] amrex::DistributionMapping + MakeDistributionMap (int lev, amrex::BoxArray const& ba) final; + //! Make a new level from scratch using provided BoxArray and //! DistributionMapping. Only used during initialization. Called //! by AmrCoreInitFromScratch. diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp index f9e86ab171f..9504cb949a3 100644 --- a/Source/WarpX.cpp +++ b/Source/WarpX.cpp @@ -3587,6 +3587,32 @@ WarpX::getField(FieldType field_type, const int lev, const int direction) const return *getFieldPointer(field_type, lev, direction); } +amrex::DistributionMapping +WarpX::MakeDistributionMap (int lev, amrex::BoxArray const& ba) +{ + bool roundrobin_sfc = false; + const ParmParse pp("warpx"); + pp.query("roundrobin_sfc", roundrobin_sfc); + + // If this is true, AMReX's RRSFC strategy is used to make + // DistributionMapping. Note that the DistributionMapping made by the + // here could still be overridden by load balancing. In the RRSFC + // strategy, the Round robin method is used to distribute Boxes orderd + // by the space filling curve. This might help avoid some processes + // running out of memory due to having too many particles during + // initialization. + + if (roundrobin_sfc) { + auto old_strategy = amrex::DistributionMapping::strategy(); + amrex::DistributionMapping::strategy(amrex::DistributionMapping::RRSFC); + amrex::DistributionMapping dm(ba); + amrex::DistributionMapping::strategy(old_strategy); + return dm; + } else { + return amrex::AmrCore::MakeDistributionMap(lev, ba); + } +} + const amrex::Vector,3>>& WarpX::getMultiLevelField(warpx::fields::FieldType field_type) const {