Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Integrate LLAMA #4003

Draft
wants to merge 2 commits into
base: dev
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[submodule "thirdParty/llama"]
path = thirdParty/llama
url = https://github.com/alpaka-group/llama
18 changes: 9 additions & 9 deletions include/picongpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -130,15 +130,15 @@ set(CMAKE_CXX_STANDARD 17)
get_filename_component(SOURCE_DIR_ROOT ${PIConGPUapp_SOURCE_DIR}/../.. ABSOLUTE)
string(FIND "${PIConGPUapp_BINARY_DIR}"
"${SOURCE_DIR_ROOT}/" IN_SRC_POS)
if(IN_SRC_POS GREATER -1)
message(FATAL_ERROR
"PIConGPU requires an out of source build. "
"Please remove \n"
" - CMakeCache.txt\n"
" - CMakeFiles/\n"
"and create a separate build directory. "
"See: INSTALL.rst")
endif()
#if(IN_SRC_POS GREATER -1)
# message(FATAL_ERROR
# "PIConGPU requires an out of source build. "
# "Please remove \n"
# " - CMakeCache.txt\n"
# " - CMakeFiles/\n"
# "and create a separate build directory. "
# "See: INSTALL.rst")
#endif()
Comment on lines -133 to +141
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you should not change this, it is fine for your pull request, but not for the PIConGPU in general

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, this should not get merged here. However, some IDEs like Visual Studio and CLion, when they splot a CMakeLists.txt, start to configure the project in a subfolder of the working copy, so they can offer better help to the developer. This is still an out-of-source build btw. and you can build picongpu fine this way as well. It is just not the intended workflow when working on a cluster.


unset(IN_SRC_POS)

Expand Down
4 changes: 2 additions & 2 deletions include/picongpu/algorithms/Set.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,9 @@ namespace picongpu
}

template<typename Dst, typename T_Worker>
HDINLINE void operator()(T_Worker const&, Dst& dst) const
HDINLINE void operator()(T_Worker const&, Dst&& dst) const
{
dst = value;
std::forward<Dst>(dst) = value;
}

private:
Expand Down
2 changes: 1 addition & 1 deletion include/picongpu/algorithms/Velocity.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ namespace picongpu
struct Velocity
{
template<typename MomType, typename MassType>
HDINLINE MomType operator()(const MomType mom, const MassType mass0)
HDINLINE auto operator()(const MomType mom, const MassType mass0)
{
const float_X rc2 = MUE0_EPS0;
const float_X m0_2 = mass0 * mass0;
Expand Down
3 changes: 2 additions & 1 deletion include/picongpu/fields/FieldJ.kernel
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,8 @@ namespace picongpu
// The rest uses normal weighting
const float_X weighting = particle[weighting_];
Velocity velocity;
const float3_X vel = velocity(particle[momentum_], attribute::getMass(weighting, particle));
const float3_X vel
= velocity(static_cast<float3_X>(particle[momentum_]), attribute::getMass(weighting, particle));
auto fieldJShiftToParticle = jBox.shift(localCell);
ParticleAlgo perParticle;
perParticle(worker, fieldJShiftToParticle, pos, vel, charge, m_deltaTime);
Expand Down
4 changes: 3 additions & 1 deletion include/picongpu/fields/FieldTmp.kernel
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,9 @@ namespace picongpu
if(!forEachParticle.hasParticles())
return;

auto cachedVal = CachedBox::create<0, typename T_TmpBox::ValueType>(worker, T_BlockDescription{});
auto cachedVal = CachedBox::create<0, SharedDataBoxMemoryLayout, typename T_TmpBox::ValueType>(
worker,
T_BlockDescription{});
Set<typename T_TmpBox::ValueType> set(float_X(0.0));

auto collective = makeThreadCollective<T_BlockDescription>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@

#include "picongpu/simulation_defines.hpp"

#include "picongpu/param/memory.param"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this a good idea?


#include <pmacc/dimensions/SuperCellDescription.hpp>
#include <pmacc/lockstep.hpp>
#include <pmacc/mappings/threads/ThreadCollective.hpp>
Expand Down Expand Up @@ -68,7 +70,9 @@ namespace picongpu::fields::maxwellSolver

constexpr uint32_t cellsPerSuperCell = pmacc::math::CT::volume<SuperCellSize>::type::value;

auto cachedJ = CachedBox::create<0, typename FieldJ::DataBoxType::ValueType>(worker, BlockArea());
auto cachedJ = CachedBox::create<0, SharedDataBoxMemoryLayout, typename FieldJ::DataBoxType::ValueType>(
worker,
BlockArea());

pmacc::math::operation::Assign assign;
DataSpace<simDim> const block(
Expand Down
4 changes: 3 additions & 1 deletion include/picongpu/fields/MaxwellSolver/FDTD/FDTDBase.kernel
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,9 @@ namespace picongpu
auto srcFieldBlock = srcField.shift(beginCellIdx);
auto cacheStencilArea = makeThreadCollective<StencilCfg>();
auto cachedSrcField
= CachedBox::create<0u, typename T_SrcBox::ValueType>(worker, StencilCfg{});
= CachedBox::create<0u, SharedDataBoxMemoryLayout, typename T_SrcBox::ValueType>(
worker,
StencilCfg{});
cacheStencilArea(worker, assign, cachedSrcField, srcFieldBlock);

worker.sync();
Expand Down
11 changes: 2 additions & 9 deletions include/picongpu/fields/currentDeposition/Cache.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,15 +48,11 @@ namespace picongpu
*/
template<typename T_BlockDescription, typename T_Worker, typename T_FieldBox>
DINLINE static auto create(T_Worker const& worker, T_FieldBox const& fieldBox)
#if(!BOOST_COMP_CLANG)
-> decltype(CachedBox::create<0u, typename T_FieldBox::ValueType>(
worker,
std::declval<T_BlockDescription>()))
#endif
{
using ValueType = typename T_FieldBox::ValueType;
/* this memory is used by all virtual blocks */
auto cache = CachedBox::create<0u, ValueType>(worker, T_BlockDescription{});
auto cache
= CachedBox::create<0u, SharedDataBoxMemoryLayout, ValueType>(worker, T_BlockDescription{});

Set<ValueType> set(ValueType::create(0.0_X));
auto collectiveFill = makeThreadCollective<T_BlockDescription>();
Expand Down Expand Up @@ -90,9 +86,6 @@ namespace picongpu
*/
template<typename T_BlockDescription, typename T_Worker, typename T_FieldBox>
DINLINE static auto create([[maybe_unused]] T_Worker const& worker, T_FieldBox const& fieldBox)
#if(!BOOST_COMP_CLANG)
-> T_FieldBox
#endif
{
return fieldBox;
}
Expand Down
5 changes: 3 additions & 2 deletions include/picongpu/fields/incidentField/Solver.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -283,8 +283,9 @@ namespace picongpu
using IntVector = pmacc::math::Vector<int, simDim>;
auto const beginLocalUserIdx
= Index{math::max(IntVector{beginUserIdx - totalCellOffset}, IntVector::create(0))};
auto const endLocalUserIdx
= Index{math::min(IntVector{endUserIdx - totalCellOffset}, IntVector{localDomain.size})};
auto const endLocalUserIdx = Index{math::min(
IntVector{endUserIdx - totalCellOffset},
static_cast<const IntVector&>(localDomain.size))};

// Check if we have any active cells in the local domain
bool areAnyCellsInLocalDomain = true;
Expand Down
15 changes: 15 additions & 0 deletions include/picongpu/param/memory.param
Original file line number Diff line number Diff line change
Expand Up @@ -114,4 +114,19 @@ namespace picongpu
*/
constexpr bool fieldTmpSupportGatherCommunication = true;

struct ParticleFrameMemoryLayout
: llama::mapping::BindSoA<llama::mapping::Blobs::Single, llama::mapping::SubArrayAlignment::Align>
{
inline static constexpr bool splitVector = false;
};

struct ParticleFrameMemoryLayoutOpenPMD : llama::mapping::BindSoA<llama::mapping::Blobs::OnePerField>
{
inline static constexpr bool splitVector = false;
};

struct SharedDataBoxMemoryLayout : llama::mapping::BindAoS<>
{
inline static constexpr bool splitVector = false;
};
} // namespace picongpu
5 changes: 4 additions & 1 deletion include/picongpu/particles/Particles.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

#include "picongpu/fields/Fields.def"
#include "picongpu/fields/Fields.hpp"
#include "picongpu/param/memory.param"
#include "picongpu/particles/boundary/Description.hpp"
#include "picongpu/particles/boundary/Utility.hpp"
#include "picongpu/particles/manipulators/manipulators.def"
Expand Down Expand Up @@ -89,6 +90,7 @@ namespace picongpu
pmacc::HandleGuardRegion<
pmacc::particles::policies::ExchangeParticles,
pmacc::particles::policies::DoNothing>>>,
ParticleFrameMemoryLayout,
MappingDesc,
DeviceHeap>
, public ISimulationData
Expand All @@ -108,7 +110,8 @@ namespace picongpu
pmacc::HandleGuardRegion<
pmacc::particles::policies::ExchangeParticles,
pmacc::particles::policies::DoNothing>>>;
using ParticlesBaseType = ParticlesBase<SpeciesParticleDescription, picongpu::MappingDesc, DeviceHeap>;
using ParticlesBaseType
= ParticlesBase<SpeciesParticleDescription, ParticleFrameMemoryLayout, picongpu::MappingDesc, DeviceHeap>;
using FrameType = typename ParticlesBaseType::FrameType;
using FrameTypeBorder = typename ParticlesBaseType::FrameTypeBorder;
using ParticlesBoxType = typename ParticlesBaseType::ParticlesBoxType;
Expand Down
6 changes: 4 additions & 2 deletions include/picongpu/particles/Particles.kernel
Original file line number Diff line number Diff line change
Expand Up @@ -222,8 +222,10 @@ namespace picongpu

onlyMaster([&]() { mustShiftSupercell = 0; });

auto cachedB = CachedBox::create<0, typename T_BBox::ValueType>(worker, T_DataDomain());
auto cachedE = CachedBox::create<1, typename T_EBox::ValueType>(worker, T_DataDomain());
auto cachedB
= CachedBox::create<0, SharedDataBoxMemoryLayout, typename T_BBox::ValueType>(worker, T_DataDomain());
auto cachedE
= CachedBox::create<1, SharedDataBoxMemoryLayout, typename T_EBox::ValueType>(worker, T_DataDomain());

worker.sync();

Expand Down
20 changes: 19 additions & 1 deletion include/picongpu/particles/Particles.tpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
#include <pmacc/traits/Resolve.hpp>

#include <algorithm>
#include <fstream>
#include <iostream>
#include <limits>
#include <memory>
Expand Down Expand Up @@ -197,7 +198,9 @@ namespace picongpu
const std::shared_ptr<DeviceHeap>& heap,
picongpu::MappingDesc cellDescription,
SimulationDataId datasetID)
: ParticlesBase<SpeciesParticleDescription, picongpu::MappingDesc, DeviceHeap>(heap, cellDescription)
: ParticlesBase<SpeciesParticleDescription, ParticleFrameMemoryLayout, picongpu::MappingDesc, DeviceHeap>(
heap,
cellDescription)
, m_datasetID(datasetID)
{
constexpr bool particleHasShape = pmacc::traits::HasIdentifier<FrameType, shape<>>::type::value;
Expand All @@ -212,6 +215,21 @@ namespace picongpu

size_t sizeOfExchanges = 0u;

#if __has_include(<fmt/format.h>)
// dump the data layout of the particle frames
if constexpr(PIConGPUVerbose::log_level & picLog::MEMORY::lvl)
{
log<picLog::MEMORY>(
"Dumping LLAMA memory layout for frame and border into llama_frame.* and llama_border_fream.*");
auto fm = typename decltype(FrameType::view)::Mapping{};
std::ofstream{"llama_frame.html"} << llama::toHtml(fm);
std::ofstream{"llama_frame.svg"} << llama::toSvg(fm);
auto bfm = typename decltype(FrameTypeBorder::view)::Mapping{};
std::ofstream{"llama_border_frame.html"} << llama::toHtml(bfm);
std::ofstream{"llama_border_frame.svg"} << llama::toSvg(bfm);
}
#endif

const uint32_t commTag = pmacc::traits::getUniqueId();
log<picLog::MEMORY>("communication tag for species %1%: %2%") % FrameType::getName() % commTag;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

#include "picongpu/fields/CellType.hpp"
#include "picongpu/fields/FieldTmp.hpp"
#include "picongpu/param/memory.param"
#include "picongpu/particles/atomicPhysics/SetChargeState.hpp"
#include "picongpu/particles/ionization/byCollision/ThomasFermi/AlgorithmThomasFermi.hpp"
#include "picongpu/particles/ionization/byCollision/ThomasFermi/ThomasFermi.def"
Expand Down Expand Up @@ -104,8 +105,20 @@ namespace picongpu
PMACC_ALIGN(eneBox, FieldTmp::DataBoxType);

/* shared memory EM-field device databoxes */
PMACC_ALIGN(cachedRho, DataBox<SharedBox<ValueType_Rho, typename BlockArea::FullSuperCellSize, 0>>);
PMACC_ALIGN(cachedEne, DataBox<SharedBox<ValueType_Ene, typename BlockArea::FullSuperCellSize, 1>>);
PMACC_ALIGN(
cachedRho,
DataBox<SharedBox<
ValueType_Rho,
typename BlockArea::FullSuperCellSize,
0,
SharedDataBoxMemoryLayout>>);
PMACC_ALIGN(
cachedEne,
DataBox<SharedBox<
ValueType_Ene,
typename BlockArea::FullSuperCellSize,
1,
SharedDataBoxMemoryLayout>>);

public:
/* host constructor initializing member : random number generator */
Expand Down Expand Up @@ -185,8 +198,8 @@ namespace picongpu
DINLINE void collectiveInit(const T_Worker& worker, const DataSpace<simDim>& blockCell)
{
/* caching of density and "temperature" fields */
cachedRho = CachedBox::create<0, ValueType_Rho>(worker, BlockArea());
cachedEne = CachedBox::create<1, ValueType_Ene>(worker, BlockArea());
cachedRho = CachedBox::create<0, SharedDataBoxMemoryLayout, ValueType_Rho>(worker, BlockArea());
cachedEne = CachedBox::create<1, SharedDataBoxMemoryLayout, ValueType_Ene>(worker, BlockArea());

/* instance of nvidia assignment operator */
pmacc::math::operation::Assign assign;
Expand Down
14 changes: 10 additions & 4 deletions include/picongpu/particles/ionization/byField/ADK/ADK_Impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,8 +102,14 @@ namespace picongpu
PMACC_ALIGN(bBox, FieldB::DataBoxType);
PMACC_ALIGN(jBox, FieldJ::DataBoxType);
/* shared memory EM-field device databoxes */
PMACC_ALIGN(cachedE, DataBox<SharedBox<ValueType_E, typename BlockArea::FullSuperCellSize, 1>>);
PMACC_ALIGN(cachedB, DataBox<SharedBox<ValueType_B, typename BlockArea::FullSuperCellSize, 0>>);
PMACC_ALIGN(
cachedE,
DataBox<
SharedBox<ValueType_E, typename BlockArea::FullSuperCellSize, 1, SharedDataBoxMemoryLayout>>);
PMACC_ALIGN(
cachedB,
DataBox<
SharedBox<ValueType_B, typename BlockArea::FullSuperCellSize, 0, SharedDataBoxMemoryLayout>>);

public:
/* host constructor initializing member : random number generator */
Expand Down Expand Up @@ -137,8 +143,8 @@ namespace picongpu
jBox = jBox.shift(blockCell);

/* caching of E and B fields */
cachedB = CachedBox::create<0, ValueType_B>(worker, BlockArea());
cachedE = CachedBox::create<1, ValueType_E>(worker, BlockArea());
cachedB = CachedBox::create<0, SharedDataBoxMemoryLayout, ValueType_B>(worker, BlockArea());
cachedE = CachedBox::create<1, SharedDataBoxMemoryLayout, ValueType_E>(worker, BlockArea());

/* instance of nvidia assignment operator */
pmacc::math::operation::Assign assign;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include "picongpu/fields/FieldB.hpp"
#include "picongpu/fields/FieldE.hpp"
#include "picongpu/fields/FieldJ.hpp"
#include "picongpu/param/memory.param"
#include "picongpu/particles/ParticlesFunctors.hpp"
#include "picongpu/particles/atomicPhysics/SetChargeState.hpp"
#include "picongpu/particles/ionization/byField/BSI/AlgorithmBSI.hpp"
Expand Down Expand Up @@ -93,7 +94,10 @@ namespace picongpu
FieldE::DataBoxType eBox;
FieldJ::DataBoxType jBox;
/* shared memory EM-field device databoxes */
PMACC_ALIGN(cachedE, DataBox<SharedBox<ValueType_E, typename BlockArea::FullSuperCellSize, 1>>);
PMACC_ALIGN(
cachedE,
DataBox<
SharedBox<ValueType_E, typename BlockArea::FullSuperCellSize, 1, SharedDataBoxMemoryLayout>>);

public:
/* host constructor */
Expand Down Expand Up @@ -125,7 +129,7 @@ namespace picongpu
jBox = jBox.shift(blockCell);

/* caching of E field */
cachedE = CachedBox::create<1, ValueType_E>(worker, BlockArea());
cachedE = CachedBox::create<1, SharedDataBoxMemoryLayout, ValueType_E>(worker, BlockArea());

/* instance of nvidia assignment operator */
pmacc::math::operation::Assign assign;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,8 +102,14 @@ namespace picongpu
PMACC_ALIGN(bBox, FieldB::DataBoxType);
PMACC_ALIGN(jBox, FieldJ::DataBoxType);
/* shared memory EM-field device databoxes */
PMACC_ALIGN(cachedE, DataBox<SharedBox<ValueType_E, typename BlockArea::FullSuperCellSize, 1>>);
PMACC_ALIGN(cachedB, DataBox<SharedBox<ValueType_B, typename BlockArea::FullSuperCellSize, 0>>);
PMACC_ALIGN(
cachedE,
DataBox<
SharedBox<ValueType_E, typename BlockArea::FullSuperCellSize, 1, SharedDataBoxMemoryLayout>>);
PMACC_ALIGN(
cachedB,
DataBox<
SharedBox<ValueType_B, typename BlockArea::FullSuperCellSize, 0, SharedDataBoxMemoryLayout>>);

public:
/* host constructor initializing member : random number generator */
Expand Down Expand Up @@ -137,8 +143,8 @@ namespace picongpu
jBox = jBox.shift(blockCell);

/* caching of E and B fields */
cachedB = CachedBox::create<0, ValueType_B>(worker, BlockArea());
cachedE = CachedBox::create<1, ValueType_E>(worker, BlockArea());
cachedB = CachedBox::create<0, SharedDataBoxMemoryLayout, ValueType_B>(worker, BlockArea());
cachedE = CachedBox::create<1, SharedDataBoxMemoryLayout, ValueType_E>(worker, BlockArea());

/* instance of nvidia assignment operator */
pmacc::math::operation::Assign assign;
Expand Down
2 changes: 1 addition & 1 deletion include/picongpu/plugins/PhaseSpace/PhaseSpaceFunctors.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ namespace picongpu
/* create shared mem */
constexpr int blockCellsInDir = SuperCellSize::template at<r_dir>::type::value;
using SharedMemSize = SuperCellDescription<pmacc::math::CT::Int<num_pbins, blockCellsInDir>>;
auto sharedMemHist = CachedBox::create<0u, float_PS>(worker, SharedMemSize{});
auto sharedMemHist = CachedBox::create<0u, SharedDataBoxMemoryLayout, float_PS>(worker, SharedMemSize{});

Set<float_PS> set(float_PS{0.0});
auto collectiveOnSharedHistogram = makeThreadCollective<SharedMemSize>();
Expand Down
Loading