Skip to content

Commit

Permalink
Use amrex logic for device synchronization in tiny profiler instead o…
Browse files Browse the repository at this point in the history
…f custom solution
  • Loading branch information
lucafedeli88 committed Apr 24, 2024
1 parent 486e793 commit 76a6dd2
Show file tree
Hide file tree
Showing 13 changed files with 51 additions and 80 deletions.
1 change: 1 addition & 0 deletions Source/Diagnostics/ParticleIO.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include "Utils/TextMsg.H"
#include "Utils/WarpXConst.H"
#include "Utils/WarpXProfilerWrapper.H"
#include "WarpX.H"

#include <ablastr/utils/text/StreamUtils.H>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
#include <AMReX_Array.H>
#include <AMReX_REAL.H>

#include <optional>

/**
* \brief This class contains the parameters needed to evaluate hybrid field
* solutions (kinetic ions with fluid electrons).
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@

#include "HybridPICModel.H"

#include "WarpX.H"

using namespace amrex;

HybridPICModel::HybridPICModel ( int nlevs_max )
Expand Down
1 change: 1 addition & 0 deletions Source/Filter/Filter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

#include "Utils/TextMsg.H"
#include "Utils/WarpXProfilerWrapper.H"
#include "WarpX.H"

#include <AMReX_Array4.H>
#include <AMReX_Box.H>
Expand Down
19 changes: 19 additions & 0 deletions Source/Initialization/WarpXAMReXInit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,12 @@

#include "Initialization/WarpXAMReXInit.H"

#include "Utils/TextMsg.H"

#include <AMReX.H>
#include <AMReX_ccse-mpi.H>
#include <AMReX_ParmParse.H>
#include <AMReX_TinyProfiler.H>

namespace {
/** Overwrite defaults in AMReX Inputs
Expand Down Expand Up @@ -44,6 +47,22 @@ namespace {
pp_amr.add("blocking_factor", 1);
}

//https://github.com/AMReX-Codes/amrex/pull/3763
#ifdef AMREX_USE_GPU
bool warpx_do_device_synchronize = true;
#else
bool warpx_do_device_synchronize = false;
#endif
pp_warpx.query("do_device_synchronize", warpx_do_device_synchronize);
bool do_device_synchronize = warpx_do_device_synchronize;
amrex::ParmParse pp_tiny_profiler("tiny_profiler");
if (pp_tiny_profiler.queryAdd("device_synchronize_around_region", do_device_synchronize) )
{
WARPX_ALWAYS_ASSERT_WITH_MESSAGE(
do_device_synchronize == warpx_do_device_synchronize,
"tiny_profiler.device_synchronize_around_region overrides warpx.do_device_synchronize.");
}

// Here we override the default tiling option for particles, which is always
// "false" in AMReX, to "false" if compiling for GPU execution and "true"
// if compiling for CPU.
Expand Down
1 change: 1 addition & 0 deletions Source/Particles/LaserParticleContainer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "Utils/WarpXAlgorithmSelection.H"
#include "Utils/WarpXConst.H"
#include "Utils/WarpXProfilerWrapper.H"
#include "WarpX.H"

#include <ablastr/warn_manager/WarnManager.H>

Expand Down
4 changes: 1 addition & 3 deletions Source/Particles/WarpXParticleContainer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1161,9 +1161,7 @@ WarpXParticleContainer::DepositCharge (WarpXParIter& pti, RealVector const& wp,
WarpX::noz, dx, xyzmin, WarpX::n_rz_azimuthal_modes,
ng_rho, depos_lev, ref_ratio,
offset, np_to_deposit,
icomp, nc,
WarpX::do_device_synchronize
);
icomp, nc);
}
}

Expand Down
16 changes: 7 additions & 9 deletions Source/Utils/WarpXProfilerWrapper.H
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,15 @@
#ifndef WARPX_PROFILERWRAPPER_H_
#define WARPX_PROFILERWRAPPER_H_

#include "WarpX.H"
#include "ablastr/profiler/ProfilerWrapper.H"

#include <AMReX_BLProfiler.H>

// `BL_PROFILE_PASTE(SYNC_SCOPE_, __COUNTER__)` and `SYNC_V_##vname` used to make unique names for
// synchronizeOnDestruct objects, like `SYNC_SCOPE_0` and `SYNC_V_pmain`
#define WARPX_PROFILE(fname) ABLASTR_PROFILE(fname, WarpX::do_device_synchronize)
#define WARPX_PROFILE_VAR(fname, vname) ABLASTR_PROFILE_VAR(fname, vname, WarpX::do_device_synchronize)
#define WARPX_PROFILE_VAR_NS(fname, vname) ABLASTR_PROFILE_VAR_NS(fname, vname, WarpX::do_device_synchronize)
#define WARPX_PROFILE_VAR_START(vname) ABLASTR_PROFILE_VAR_START(vname, WarpX::do_device_synchronize)
#define WARPX_PROFILE_VAR_STOP(vname) ABLASTR_PROFILE_VAR_STOP(vname, WarpX::do_device_synchronize)
#define WARPX_PROFILE_REGION(rname) ABLASTR_PROFILE_REGION(rname, WarpX::do_device_synchronize)
#define WARPX_PROFILE(fname) BL_PROFILE(fname)
#define WARPX_PROFILE_VAR(fname, vname) BL_PROFILE_VAR(fname, vname)
#define WARPX_PROFILE_VAR_NS(fname, vname) BL_PROFILE_VAR_NS(fname, vname)
#define WARPX_PROFILE_VAR_START(vname) BL_PROFILE_VAR_START(vname)
#define WARPX_PROFILE_VAR_STOP(vname) BL_PROFILE_VAR_STOP(vname)
#define WARPX_PROFILE_REGION(rname) BL_PROFILE_REGION(rname)

#endif // WARPX_PROFILERWRAPPER_H_
8 changes: 0 additions & 8 deletions Source/WarpX.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -212,12 +212,6 @@ int WarpX::n_current_deposition_buffer = -1;
short WarpX::grid_type;
amrex::IntVect m_rho_nodal_flag;

#ifdef AMREX_USE_GPU
bool WarpX::do_device_synchronize = true;
#else
bool WarpX::do_device_synchronize = false;
#endif

WarpX* WarpX::m_instance = nullptr;

void WarpX::MakeWarpX ()
Expand Down Expand Up @@ -684,8 +678,6 @@ WarpX::ReadParameters ()

ReadBoostedFrameParameters(gamma_boost, beta_boost, boost_direction);

pp_warpx.query("do_device_synchronize", do_device_synchronize);

// queryWithParser returns 1 if argument zmax_plasma_to_compute_max_step is
// specified by the user, 0 otherwise.
do_compute_max_step_from_zmax = utils::parser::queryWithParser(
Expand Down
2 changes: 1 addition & 1 deletion Source/ablastr/fields/PoissonSolver.H
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ computePhi (amrex::Vector<amrex::MultiFab*> const & rho,
{
using namespace amrex::literals;

ABLASTR_PROFILE("computePhi", false);
ABLASTR_PROFILE("computePhi");

if (!rel_ref_ratio.has_value()) {
ABLASTR_ALWAYS_ASSERT_WITH_MESSAGE(rho.size() == 1u,
Expand Down
6 changes: 3 additions & 3 deletions Source/ablastr/math/fft/WrapCuFFT.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ namespace ablastr::math::anyfft
Complex * const complex_array, const direction dir, const int dim)
{
FFTplan fft_plan;
ABLASTR_PROFILE("ablastr::math::anyfft::CreatePlan", false);
ABLASTR_PROFILE("ablastr::math::anyfft::CreatePlan");

// Initialize fft_plan.m_plan with the vendor fft plan.
cufftResult result;
Expand Down Expand Up @@ -71,12 +71,12 @@ namespace ablastr::math::anyfft

void DestroyPlan(FFTplan& fft_plan)
{
ABLASTR_PROFILE("ablastr::math::anyfft::DestroyPlan", false);
ABLASTR_PROFILE("ablastr::math::anyfft::DestroyPlan");
cufftDestroy( fft_plan.m_plan );
}

void Execute(FFTplan& fft_plan){
ABLASTR_PROFILE("ablastr::math::anyfft::Execute", false);
ABLASTR_PROFILE("ablastr::math::anyfft::Execute");
// make sure that this is done on the same GPU stream as the above copy
cudaStream_t stream = amrex::Gpu::Device::cudaStream();
cufftSetStream ( fft_plan.m_plan, stream);
Expand Down
16 changes: 7 additions & 9 deletions Source/ablastr/particles/DepositCharge.H
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ namespace ablastr::particles
* \param np_to_deposit number of particles to deposit (default: pti.numParticles())
* \param icomp component in MultiFab to start depositing to
* \param nc number of components to deposit
* \param do_device_synchronize call amrex::Gpu::synchronize() for tiny profiler regions (default: true)
*/
template< typename T_PC >
static void
Expand All @@ -63,8 +62,7 @@ deposit_charge (typename T_PC::ParIterType& pti,
std::optional<amrex::IntVect> rel_ref_ratio = std::nullopt,
long const offset = 0,
std::optional<long> np_to_deposit = std::nullopt,
int const icomp = 0, int const nc = 1,
bool const do_device_synchronize = true)
int const icomp = 0, int const nc = 1)
{
// deposition guards
amrex::IntVect ng_rho = rho->nGrowVect();
Expand Down Expand Up @@ -131,8 +129,8 @@ deposit_charge (typename T_PC::ParIterType& pti,
amrex::numParticlesOutOfRange(pti, range) == 0,
"Particles shape does not fit within tile (CPU) or guard cells (GPU) used for charge deposition");

ABLASTR_PROFILE_VAR_NS("ablastr::particles::deposit_charge::ChargeDeposition", blp_ppc_chd, do_device_synchronize);
ABLASTR_PROFILE_VAR_NS("ablastr::particles::deposit_charge::Accumulate", blp_accumulate, do_device_synchronize);
ABLASTR_PROFILE_VAR_NS("ablastr::particles::deposit_charge::ChargeDeposition", blp_ppc_chd);
ABLASTR_PROFILE_VAR_NS("ablastr::particles::deposit_charge::Accumulate", blp_accumulate);

// Get tile box where charge is deposited.
// The tile box is different when depositing in the buffers (depos_lev<lev)
Expand Down Expand Up @@ -173,7 +171,7 @@ deposit_charge (typename T_PC::ParIterType& pti,
// Indices of the lower bound
const amrex::Dim3 lo = lbound(tilebox);

ABLASTR_PROFILE_VAR_START(blp_ppc_chd, do_device_synchronize);
ABLASTR_PROFILE_VAR_START(blp_ppc_chd);

if (nox == 1){
doChargeDepositionShapeN<1>(GetPosition, wp.dataPtr()+offset, ion_lev,
Expand All @@ -192,13 +190,13 @@ deposit_charge (typename T_PC::ParIterType& pti,
rho_fab, np_to_deposit.value(), dx, xyzmin, lo, charge,
n_rz_azimuthal_modes);
}
ABLASTR_PROFILE_VAR_STOP(blp_ppc_chd, do_device_synchronize);
ABLASTR_PROFILE_VAR_STOP(blp_ppc_chd);

#ifndef AMREX_USE_GPU
// CPU, tiling: atomicAdd local_rho into rho
ABLASTR_PROFILE_VAR_START(blp_accumulate, do_device_synchronize);
ABLASTR_PROFILE_VAR_START(blp_accumulate);
(*rho)[pti].lockAdd(local_rho, tb, tb, 0, icomp*nc, nc);
ABLASTR_PROFILE_VAR_STOP(blp_accumulate, do_device_synchronize);
ABLASTR_PROFILE_VAR_STOP(blp_accumulate);
#endif
}

Expand Down
53 changes: 6 additions & 47 deletions Source/ablastr/profiler/ProfilerWrapper.H
Original file line number Diff line number Diff line change
Expand Up @@ -9,54 +9,13 @@
#define ABLASTR_PROFILERWRAPPER_H_

#include <AMReX_BLProfiler.H>
#include <AMReX_GpuDevice.H>


namespace ablastr::profiler
{
/** Conditionally synchronizes active GPU operations
*
* @param do_device_synchronize perform amrex::Gpu::synchronize() if true
*/
AMREX_FORCE_INLINE
void
device_synchronize(bool const do_device_synchronize = false) {
if (do_device_synchronize) {
amrex::Gpu::synchronize();
}
}

/** An object that conditionally calls device_synchronize() on destruction
*
* Note that objects are destructed in the reverse order of declaration
*/
struct SynchronizeOnDestruct {
SynchronizeOnDestruct(bool const do_device_synchronize = false)
: m_do_device_synchronize(do_device_synchronize) {}

AMREX_FORCE_INLINE
~SynchronizeOnDestruct() {
device_synchronize(m_do_device_synchronize);
}

// default move and copy operations
SynchronizeOnDestruct(const SynchronizeOnDestruct&) = default;
SynchronizeOnDestruct& operator=(const SynchronizeOnDestruct&) = default;
SynchronizeOnDestruct(SynchronizeOnDestruct&&) = default;
SynchronizeOnDestruct& operator=(SynchronizeOnDestruct&& field_data) = default;

bool m_do_device_synchronize = false;
};

} // namespace ablastr::profiler

// `BL_PROFILE_PASTE(SYNC_SCOPE_, __COUNTER__)` and `SYNC_V_##vname` used to make unique names for
// synchronizeOnDestruct objects, like `SYNC_SCOPE_0` and `SYNC_V_pmain`
#define ABLASTR_PROFILE(fname, sync) ablastr::profiler::device_synchronize(sync); BL_PROFILE(fname); const ablastr::profiler::SynchronizeOnDestruct BL_PROFILE_PASTE(SYNC_SCOPE_, __COUNTER__){sync}
#define ABLASTR_PROFILE_VAR(fname, vname, sync) ablastr::profiler::device_synchronize(sync); BL_PROFILE_VAR(fname, vname); const ablastr::profiler::SynchronizeOnDestruct SYNC_V_##vname{sync}
#define ABLASTR_PROFILE_VAR_NS(fname, vname, sync) BL_PROFILE_VAR_NS(fname, vname); const ablastr::profiler::SynchronizeOnDestruct SYNC_V_##vname{sync}
#define ABLASTR_PROFILE_VAR_START(vname, sync) ablastr::profiler::device_synchronize(sync); BL_PROFILE_VAR_START(vname)
#define ABLASTR_PROFILE_VAR_STOP(vname, sync) ablastr::profiler::device_synchronize(sync); BL_PROFILE_VAR_STOP(vname)
#define ABLASTR_PROFILE_REGION(rname, sync) ablastr::profiler::device_synchronize(sync); BL_PROFILE_REGION(rname); const ablastr::profiler::SynchronizeOnDestruct BL_PROFILE_PASTE(SYNC_R_, __COUNTER__){sync}
#define ABLASTR_PROFILE(fname) BL_PROFILE(fname)
#define ABLASTR_PROFILE_VAR(fname, vname) BL_PROFILE_VAR(fname, vname)
#define ABLASTR_PROFILE_VAR_NS(fname, vname) BL_PROFILE_VAR_NS(fname, vname)
#define ABLASTR_PROFILE_VAR_START(vname) BL_PROFILE_VAR_START(vname)
#define ABLASTR_PROFILE_VAR_STOP(vname) BL_PROFILE_VAR_STOP(vname)
#define ABLASTR_PROFILE_REGION(rname) BL_PROFILE_REGION(rname)

#endif // ABLASTR_PROFILERWRAPPER_H_

0 comments on commit 76a6dd2

Please sign in to comment.