Skip to content

Commit

Permalink
Merge pull request #340 from lks1248/gpu-observables
Browse files Browse the repository at this point in the history
gpu observables
  • Loading branch information
sekelle authored Jan 9, 2023
2 parents cf4510f + 463dad5 commit 4a587ec
Show file tree
Hide file tree
Showing 12 changed files with 451 additions and 107 deletions.
6 changes: 3 additions & 3 deletions main/src/observables/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
if(CMAKE_HIP_COMPILER)
set_source_files_properties(conserved_gpu.cu PROPERTIES LANGUAGE HIP)
set_source_files_properties(conserved_gpu.cu gpu_reductions.cu PROPERTIES LANGUAGE HIP)
endif()

if(CMAKE_CUDA_COMPILER OR CMAKE_HIP_COMPILER)
add_library(observables_gpu conserved_gpu.cu)
target_include_directories(observables_gpu PRIVATE ${CSTONE_DIR})
add_library(observables_gpu conserved_gpu.cu gpu_reductions.cu)
target_include_directories(observables_gpu PRIVATE ${CSTONE_DIR} ${SPH_DIR})
endif()
34 changes: 0 additions & 34 deletions main/src/observables/conserved_gpu.cu
Original file line number Diff line number Diff line change
Expand Up @@ -101,38 +101,4 @@ CONSERVED_Q_GPU(double, double, double, float);
CONSERVED_Q_GPU(double, float, double, float);
CONSERVED_Q_GPU(float, float, float, float);

template<class Tc, class Tv>
struct MachSquareSum
{
HOST_DEVICE_FUN
double operator()(const thrust::tuple<Tv, Tv, Tv, Tc>& p)
{
Vec3<double> V{get<0>(p), get<1>(p), get<2>(p)};
Tc c = get<3>(p);
return norm2(V) / (double(c) * c);
}
};

template<class Tc, class Tv>
double machSquareSumGpu(const Tv* vx, const Tv* vy, const Tv* vz, const Tc* c, size_t first, size_t last)
{
auto it1 = thrust::make_zip_iterator(thrust::make_tuple(vx + first, vy + first, vz + first, c + first));
auto it2 = thrust::make_zip_iterator(thrust::make_tuple(vx + last, vy + last, vz + last, c + last));

auto plus = thrust::plus<double>{};
double localMachSquareSum = 0.0;

localMachSquareSum =
thrust::transform_reduce(thrust::device, it1, it2, MachSquareSum<Tc, Tv>{}, localMachSquareSum, plus);

return localMachSquareSum;
}

#define MACH_GPU(Tc, Tv) \
template double machSquareSumGpu(const Tv* vx, const Tv* vy, const Tv* vz, const Tc* c, size_t, size_t)

MACH_GPU(double, double);
MACH_GPU(double, float);
MACH_GPU(float, float);

} // namespace sphexa
15 changes: 0 additions & 15 deletions main/src/observables/conserved_gpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,19 +58,4 @@ extern std::tuple<double, double, cstone::Vec3<double>, cstone::Vec3<double>>
conservedQuantitiesGpu(Tt cv, const Tc* x, const Tc* y, const Tc* z, const Tv* vx, const Tv* vy, const Tv* vz,
const Tt* temp, const Tm* m, size_t first, size_t last);

/*! @brief compute square of local Mach number sum
*
* @tparam Tc float or double
* @tparam Tv float or double
* @param[in] vx velocities x-component
* @param[in] vy velocities y-component
* @param[in] vz velocities z-component
* @param[in] c local speed of sound
* @param[in] first first local particle in vx,vy,vz,c arrays
* @param[in] last last local particle
* @return
*/
template<class Tc, class Tv>
extern double machSquareSumGpu(const Tv* vx, const Tv* vy, const Tv* vz, const Tc* c, size_t first, size_t last);

} // namespace sphexa
28 changes: 1 addition & 27 deletions main/src/observables/conserved_quantities.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,26 +84,6 @@ auto localConservedQuantities(size_t startIndex, size_t endIndex, Dataset& d)
return std::make_tuple(0.5 * eKin, eInt, linmom, angmom);
}

template<class Dataset>
double localMachSquareSum(size_t first, size_t last, Dataset& d)
{
const auto* vx = d.vx.data();
const auto* vy = d.vy.data();
const auto* vz = d.vz.data();
const auto* c = d.c.data();

double localMachSquareSum = 0.0;

#pragma omp parallel for reduction(+ : localMachSquareSum)
for (size_t i = first; i < last; ++i)
{
util::array<double, 3> V{vx[i], vy[i], vz[i]};
localMachSquareSum += norm2(V) / (c[i] * c[i]);
}

return localMachSquareSum;
}

/*! @brief Computation of globally conserved quantities
*
* @tparam T float or double
Expand All @@ -126,9 +106,6 @@ void computeConservedQuantities(size_t startIndex, size_t endIndex, Dataset& d,
sph::idealGasCv(d.muiConst, d.gamma), rawPtr(d.devData.x), rawPtr(d.devData.y), rawPtr(d.devData.z),
rawPtr(d.devData.vx), rawPtr(d.devData.vy), rawPtr(d.devData.vz), rawPtr(d.devData.temp),
rawPtr(d.devData.m), startIndex, endIndex);

machSqSum = machSquareSumGpu(rawPtr(d.devData.vx), rawPtr(d.devData.vy), rawPtr(d.devData.vz),
rawPtr(d.devData.c), startIndex, endIndex);
}
else
{
Expand All @@ -139,10 +116,9 @@ void computeConservedQuantities(size_t startIndex, size_t endIndex, Dataset& d,
}

std::tie(eKin, eInt, linmom, angmom) = localConservedQuantities(startIndex, endIndex, d);
machSqSum = localMachSquareSum(startIndex, endIndex, d);
}

util::array<double, 11> quantities, globalQuantities;
util::array<double, 10> quantities, globalQuantities;
std::fill(globalQuantities.begin(), globalQuantities.end(), double(0));

quantities[0] = eKin;
Expand All @@ -155,7 +131,6 @@ void computeConservedQuantities(size_t startIndex, size_t endIndex, Dataset& d,
quantities[7] = angmom[1];
quantities[8] = angmom[2];
quantities[9] = double(ncsum);
quantities[10] = machSqSum;

int rootRank = 0;
MPI_Reduce(quantities.data(), globalQuantities.data(), quantities.size(), MpiType<double>{}, MPI_SUM, rootRank,
Expand All @@ -171,7 +146,6 @@ void computeConservedQuantities(size_t startIndex, size_t endIndex, Dataset& d,
d.linmom = std::sqrt(norm2(globalLinmom));
d.angmom = std::sqrt(norm2(globalAngmom));
d.totalNeighbors = size_t(globalQuantities[9]);
d.machRMS = std::sqrt(globalQuantities[10] / d.numParticlesGlobal);
}

} // namespace sphexa
15 changes: 12 additions & 3 deletions main/src/observables/factory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
#include "time_energies.hpp"
#include "gravitational_waves.hpp"
#include "wind_bubble_fraction.hpp"
#include "turbulence_mach_rms.hpp"

namespace sphexa
{
Expand Down Expand Up @@ -98,15 +99,18 @@ std::unique_ptr<IObservables<Dataset>> observablesFactory(const std::string& tes
std::string khGrowthRate = "KelvinHelmholtzGrowthRate";
std::string gravWaves = "observeGravWaves";

if (haveH5Attribute(testCase, khGrowthRate, H5PART_INT64))
if (haveH5Attribute(testCase, khGrowthRate, H5PART_INT64) || testCase == "kelvin-helmholtz")
{
h5part_int64_t attrValue;
H5PartFile* h5_file = nullptr;
h5_file = H5PartOpenFile(testCase.c_str(), H5PART_READ);
H5PartReadFileAttrib(h5_file, khGrowthRate.c_str(), &attrValue);
H5PartCloseFile(h5_file);

if (attrValue) { return std::make_unique<TimeEnergyGrowth<Dataset>>(constantsFile); }
if (attrValue || testCase == "kelvin-helmholtz")
{
return std::make_unique<TimeEnergyGrowth<Dataset>>(constantsFile);
}
}

if (haveH5Attribute(testCase, gravWaves, H5PART_FLOAT64))
Expand All @@ -121,6 +125,7 @@ std::unique_ptr<IObservables<Dataset>> observablesFactory(const std::string& tes
return std::make_unique<GravWaves<Dataset>>(constantsFile, attrValue[1], attrValue[2]);
}
}
#endif

if (testCase == "wind-shock")
{
Expand All @@ -130,7 +135,11 @@ std::unique_ptr<IObservables<Dataset>> observablesFactory(const std::string& tes
double bubbleMass = bubbleVolume * rhoInt;
return std::make_unique<WindBubble<Dataset>>(constantsFile, rhoInt, uExt, bubbleMass);
}
#endif

if (testCase == "turbulence")
{
return std::make_unique<TurbulenceMachRMS<Dataset>>(constantsFile);
}

if (testCase == "nbody") { return std::make_unique<IObservables<Dataset>>(); }

Expand Down
183 changes: 183 additions & 0 deletions main/src/observables/gpu_reductions.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
/*
* MIT License
*
* Copyright (c) 2022 CSCS, ETH Zurich
* 2021 University of Basel
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/

/*! @file
* @brief reductions for different observables on the GPU
*
* @author Lukas Schmidt
*/

#include <thrust/execution_policy.h>
#include <thrust/iterator/zip_iterator.h>
#include <thrust/transform_reduce.h>

#include "cstone/util/tuple.hpp"
#include "gpu_reductions.h"

namespace sphexa
{

using cstone::Box;
using cstone::Vec3;
using thrust::get;

//!@brief functor for Kelvin-Helmholtz growth rate
template<class Tc, class Tv, class Tm>
struct GrowthRate
{
HOST_DEVICE_FUN
thrust::tuple<double, double, double> operator()(const thrust::tuple<Tc, Tc, Tv, Tm, Tm>& p)
{
Tc xi = get<0>(p);
Tc yi = get<1>(p);
Tv vyi = get<2>(p);
Tm xmi = get<3>(p);
Tm kxi = get<4>(p);
Tc voli = xmi / kxi;
Tc aux;

if (yi < ybox * Tc(0.5)) { aux = std::exp(-4.0 * PI * std::abs(yi - 0.25)); }
else { aux = std::exp(-4.0 * PI * std::abs(ybox - yi - 0.25)); }
Tc si = vyi * voli * std::sin(4.0 * PI * xi) * aux;
Tc ci = vyi * voli * std::cos(4.0 * PI * xi) * aux;
Tc di = voli * aux;

return thrust::make_tuple(si, ci, di);
}

Tc ybox;
};

template<class Tc, class Tv, class Tm>
std::tuple<double, double, double> gpuGrowthRate(const Tc* x, const Tc* y, const Tv* vy, const Tm* xm, const Tm* kx,
const cstone::Box<Tc>& box, size_t startIndex, size_t endIndex)
{
auto it1 = thrust::make_zip_iterator(
thrust::make_tuple(x + startIndex, y + startIndex, vy + startIndex, xm + startIndex, kx + startIndex));

auto it2 = thrust::make_zip_iterator(
thrust::make_tuple(x + endIndex, y + endIndex, vy + endIndex, xm + endIndex, kx + endIndex));

auto plus = util::TuplePlus<thrust::tuple<double, double, double>>{};
auto init = thrust::make_tuple(0.0, 0.0, 0.0);

auto result = thrust::transform_reduce(thrust::device, it1, it2, GrowthRate<Tc, Tm, Tv>{box.ly()}, init, plus);

return {get<0>(result), get<1>(result), get<2>(result)};
}

#define GROWTH_GPU(Tc, Tv, Tm) \
template std::tuple<double, double, double> gpuGrowthRate(const Tc* x, const Tc* y, const Tv* vy, const Tm* xm, \
const Tm* kx, const cstone::Box<Tc>& box, \
size_t startIndex, size_t endIndex)

GROWTH_GPU(double, double, double);
GROWTH_GPU(double, double, float);
GROWTH_GPU(double, float, float);
GROWTH_GPU(float, float, float);

//!@brief functor for the machSquare summing
template<class Tc, class Tv>
struct MachSquareSum
{
HOST_DEVICE_FUN
double operator()(const thrust::tuple<Tv, Tv, Tv, Tc>& p)
{
Vec3<double> V{get<0>(p), get<1>(p), get<2>(p)};
Tc c = get<3>(p);
return norm2(V) / (double(c) * c);
}
};

template<class Tc, class Tv>
double machSquareSumGpu(const Tv* vx, const Tv* vy, const Tv* vz, const Tc* c, size_t first, size_t last)
{
auto it1 = thrust::make_zip_iterator(thrust::make_tuple(vx + first, vy + first, vz + first, c + first));
auto it2 = thrust::make_zip_iterator(thrust::make_tuple(vx + last, vy + last, vz + last, c + last));

auto plus = thrust::plus<double>{};
double localMachSquareSum = 0.0;

localMachSquareSum =
thrust::transform_reduce(thrust::device, it1, it2, MachSquareSum<Tc, Tv>{}, localMachSquareSum, plus);

return localMachSquareSum;
}

#define MACH_GPU(Tc, Tv) \
template double machSquareSumGpu(const Tv* vx, const Tv* vy, const Tv* vz, const Tc* c, size_t, size_t)

MACH_GPU(double, double);
MACH_GPU(double, float);
MACH_GPU(float, float);

//!@brief functor to count particles that belong to the cloud
template<class Tc, class Tt, class Tm>
struct Survivors
{
HOST_DEVICE_FUN
double operator()(const thrust::tuple<Tt, Tc, Tc, Tm>& p)
{
size_t isCloud;
Tt temp = get<0>(p);
Tc kx = get<1>(p);
Tc xm = get<2>(p);
Tm m = get<3>(p);
Tc rhoi = kx / xm * m;
if (rhoi >= 0.64 * rhoBubble && temp <= 0.9 * tempWind) { isCloud = 1; }
else { isCloud = 0; }
return isCloud;
}

double rhoBubble;
double tempWind;
};
template<class Tc, class Tt, class Tm>
size_t survivorsGpu(const Tt* temp, const Tt* kx, const Tc* xmass, const Tm* m, double rhoBubble, double tempWind,
size_t first, size_t last)
{
auto it1 = thrust::make_zip_iterator(thrust::make_tuple(temp + first, kx + first, xmass + first, m + first));
auto it2 = thrust::make_zip_iterator(thrust::make_tuple(temp + last, kx + last, xmass + last, m + last));

auto plus = thrust::plus<size_t>{};

size_t localSurvivors = 0;

localSurvivors = thrust::transform_reduce(thrust::device, it1, it2, Survivors<Tc, Tt, Tm>{rhoBubble, tempWind},
localSurvivors, plus);

return localSurvivors;
}

#define SURVIVORS(Tc, Tt, Tm) \
template size_t survivorsGpu(const Tt* temp, const Tt* kx, const Tc* xmass, const Tm* m, double, double, size_t, \
size_t)

SURVIVORS(double, double, double);
SURVIVORS(double, double, float);
SURVIVORS(double, float, float);
SURVIVORS(float, float, float);

} // namespace sphexa
Loading

0 comments on commit 4a587ec

Please sign in to comment.