Merge pull request #340 from lks1248/gpu-observables

gpu observables
sphexa-org · Jan 9, 2023 · 4a587ec · 4a587ec
2 parents cf4510f + 463dad5
commit 4a587ec
Show file tree

Hide file tree

Showing 12 changed files with 451 additions and 107 deletions.
diff --git a/main/src/observables/CMakeLists.txt b/main/src/observables/CMakeLists.txt
@@ -1,8 +1,8 @@
 if(CMAKE_HIP_COMPILER)
-    set_source_files_properties(conserved_gpu.cu PROPERTIES LANGUAGE HIP)
+    set_source_files_properties(conserved_gpu.cu gpu_reductions.cu PROPERTIES LANGUAGE HIP)
 endif()
 
 if(CMAKE_CUDA_COMPILER OR CMAKE_HIP_COMPILER)
-    add_library(observables_gpu conserved_gpu.cu)
-    target_include_directories(observables_gpu PRIVATE ${CSTONE_DIR})
+    add_library(observables_gpu conserved_gpu.cu gpu_reductions.cu)
+    target_include_directories(observables_gpu PRIVATE ${CSTONE_DIR} ${SPH_DIR})
 endif()
diff --git a/main/src/observables/conserved_gpu.cu b/main/src/observables/conserved_gpu.cu
@@ -101,38 +101,4 @@ CONSERVED_Q_GPU(double, double, double, float);
 CONSERVED_Q_GPU(double, float, double, float);
 CONSERVED_Q_GPU(float, float, float, float);
 
-template<class Tc, class Tv>
-struct MachSquareSum
-{
-    HOST_DEVICE_FUN
-    double operator()(const thrust::tuple<Tv, Tv, Tv, Tc>& p)
-    {
-        Vec3<double> V{get<0>(p), get<1>(p), get<2>(p)};
-        Tc           c = get<3>(p);
-        return norm2(V) / (double(c) * c);
-    }
-};
-
-template<class Tc, class Tv>
-double machSquareSumGpu(const Tv* vx, const Tv* vy, const Tv* vz, const Tc* c, size_t first, size_t last)
-{
-    auto it1 = thrust::make_zip_iterator(thrust::make_tuple(vx + first, vy + first, vz + first, c + first));
-    auto it2 = thrust::make_zip_iterator(thrust::make_tuple(vx + last, vy + last, vz + last, c + last));
-
-    auto   plus               = thrust::plus<double>{};
-    double localMachSquareSum = 0.0;
-
-    localMachSquareSum =
-        thrust::transform_reduce(thrust::device, it1, it2, MachSquareSum<Tc, Tv>{}, localMachSquareSum, plus);
-
-    return localMachSquareSum;
-}
-
-#define MACH_GPU(Tc, Tv)                                                                                               \
-    template double machSquareSumGpu(const Tv* vx, const Tv* vy, const Tv* vz, const Tc* c, size_t, size_t)
-
-MACH_GPU(double, double);
-MACH_GPU(double, float);
-MACH_GPU(float, float);
-
 } // namespace sphexa
diff --git a/main/src/observables/conserved_gpu.h b/main/src/observables/conserved_gpu.h
@@ -58,19 +58,4 @@ extern std::tuple<double, double, cstone::Vec3<double>, cstone::Vec3<double>>
 conservedQuantitiesGpu(Tt cv, const Tc* x, const Tc* y, const Tc* z, const Tv* vx, const Tv* vy, const Tv* vz,
                        const Tt* temp, const Tm* m, size_t first, size_t last);
 
-/*! @brief compute square of local Mach number sum
- *
- * @tparam     Tc     float or double
- * @tparam     Tv     float or double
- * @param[in]  vx     velocities x-component
- * @param[in]  vy     velocities y-component
- * @param[in]  vz     velocities z-component
- * @param[in]  c      local speed of sound
- * @param[in]  first  first local particle in vx,vy,vz,c arrays
- * @param[in]  last   last local particle
- * @return
- */
-template<class Tc, class Tv>
-extern double machSquareSumGpu(const Tv* vx, const Tv* vy, const Tv* vz, const Tc* c, size_t first, size_t last);
-
 } // namespace sphexa
diff --git a/main/src/observables/conserved_quantities.hpp b/main/src/observables/conserved_quantities.hpp
@@ -84,26 +84,6 @@ auto localConservedQuantities(size_t startIndex, size_t endIndex, Dataset& d)
     return std::make_tuple(0.5 * eKin, eInt, linmom, angmom);
 }
 
-template<class Dataset>
-double localMachSquareSum(size_t first, size_t last, Dataset& d)
-{
-    const auto* vx = d.vx.data();
-    const auto* vy = d.vy.data();
-    const auto* vz = d.vz.data();
-    const auto* c  = d.c.data();
-
-    double localMachSquareSum = 0.0;
-
-#pragma omp parallel for reduction(+ : localMachSquareSum)
-    for (size_t i = first; i < last; ++i)
-    {
-        util::array<double, 3> V{vx[i], vy[i], vz[i]};
-        localMachSquareSum += norm2(V) / (c[i] * c[i]);
-    }
-
-    return localMachSquareSum;
-}
-
 /*! @brief Computation of globally conserved quantities
  *
  * @tparam        T            float or double
@@ -126,9 +106,6 @@ void computeConservedQuantities(size_t startIndex, size_t endIndex, Dataset& d,
             sph::idealGasCv(d.muiConst, d.gamma), rawPtr(d.devData.x), rawPtr(d.devData.y), rawPtr(d.devData.z),
             rawPtr(d.devData.vx), rawPtr(d.devData.vy), rawPtr(d.devData.vz), rawPtr(d.devData.temp),
             rawPtr(d.devData.m), startIndex, endIndex);
-
-        machSqSum = machSquareSumGpu(rawPtr(d.devData.vx), rawPtr(d.devData.vy), rawPtr(d.devData.vz),
-                                     rawPtr(d.devData.c), startIndex, endIndex);
     }
     else
     {
@@ -139,10 +116,9 @@ void computeConservedQuantities(size_t startIndex, size_t endIndex, Dataset& d,
         }
 
         std::tie(eKin, eInt, linmom, angmom) = localConservedQuantities(startIndex, endIndex, d);
-        machSqSum                            = localMachSquareSum(startIndex, endIndex, d);
     }
 
-    util::array<double, 11> quantities, globalQuantities;
+    util::array<double, 10> quantities, globalQuantities;
     std::fill(globalQuantities.begin(), globalQuantities.end(), double(0));
 
     quantities[0]  = eKin;
@@ -155,7 +131,6 @@ void computeConservedQuantities(size_t startIndex, size_t endIndex, Dataset& d,
     quantities[7]  = angmom[1];
     quantities[8]  = angmom[2];
     quantities[9]  = double(ncsum);
-    quantities[10] = machSqSum;
 
     int rootRank = 0;
     MPI_Reduce(quantities.data(), globalQuantities.data(), quantities.size(), MpiType<double>{}, MPI_SUM, rootRank,
@@ -171,7 +146,6 @@ void computeConservedQuantities(size_t startIndex, size_t endIndex, Dataset& d,
     d.linmom         = std::sqrt(norm2(globalLinmom));
     d.angmom         = std::sqrt(norm2(globalAngmom));
     d.totalNeighbors = size_t(globalQuantities[9]);
-    d.machRMS        = std::sqrt(globalQuantities[10] / d.numParticlesGlobal);
 }
 
 } // namespace sphexa
diff --git a/main/src/observables/factory.hpp b/main/src/observables/factory.hpp
@@ -39,6 +39,7 @@
 #include "time_energies.hpp"
 #include "gravitational_waves.hpp"
 #include "wind_bubble_fraction.hpp"
+#include "turbulence_mach_rms.hpp"
 
 namespace sphexa
 {
@@ -98,15 +99,18 @@ std::unique_ptr<IObservables<Dataset>> observablesFactory(const std::string& tes
     std::string khGrowthRate = "KelvinHelmholtzGrowthRate";
     std::string gravWaves    = "observeGravWaves";
 
-    if (haveH5Attribute(testCase, khGrowthRate, H5PART_INT64))
+    if (haveH5Attribute(testCase, khGrowthRate, H5PART_INT64) || testCase == "kelvin-helmholtz")
     {
         h5part_int64_t attrValue;
         H5PartFile*    h5_file = nullptr;
         h5_file                = H5PartOpenFile(testCase.c_str(), H5PART_READ);
         H5PartReadFileAttrib(h5_file, khGrowthRate.c_str(), &attrValue);
         H5PartCloseFile(h5_file);
 
-        if (attrValue) { return std::make_unique<TimeEnergyGrowth<Dataset>>(constantsFile); }
+        if (attrValue || testCase == "kelvin-helmholtz")
+        {
+            return std::make_unique<TimeEnergyGrowth<Dataset>>(constantsFile);
+        }
     }
 
     if (haveH5Attribute(testCase, gravWaves, H5PART_FLOAT64))
@@ -121,6 +125,7 @@ std::unique_ptr<IObservables<Dataset>> observablesFactory(const std::string& tes
             return std::make_unique<GravWaves<Dataset>>(constantsFile, attrValue[1], attrValue[2]);
         }
     }
+#endif
 
     if (testCase == "wind-shock")
     {
@@ -130,7 +135,11 @@ std::unique_ptr<IObservables<Dataset>> observablesFactory(const std::string& tes
         double bubbleMass   = bubbleVolume * rhoInt;
         return std::make_unique<WindBubble<Dataset>>(constantsFile, rhoInt, uExt, bubbleMass);
     }
-#endif
+
+    if (testCase == "turbulence")
+    {
+        return std::make_unique<TurbulenceMachRMS<Dataset>>(constantsFile);
+    }
 
     if (testCase == "nbody") { return std::make_unique<IObservables<Dataset>>(); }
 

diff --git a/main/src/observables/gpu_reductions.cu b/main/src/observables/gpu_reductions.cu
@@ -0,0 +1,183 @@
+/*
+ * MIT License
+ *
+ * Copyright (c) 2022 CSCS, ETH Zurich
+ *               2021 University of Basel
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*! @file
+ * @brief reductions for different observables on the GPU
+ *
+ * @author Lukas Schmidt
+ */
+
+#include <thrust/execution_policy.h>
+#include <thrust/iterator/zip_iterator.h>
+#include <thrust/transform_reduce.h>
+
+#include "cstone/util/tuple.hpp"
+#include "gpu_reductions.h"
+
+namespace sphexa
+{
+
+using cstone::Box;
+using cstone::Vec3;
+using thrust::get;
+
+//!@brief functor for Kelvin-Helmholtz growth rate
+template<class Tc, class Tv, class Tm>
+struct GrowthRate
+{
+    HOST_DEVICE_FUN
+    thrust::tuple<double, double, double> operator()(const thrust::tuple<Tc, Tc, Tv, Tm, Tm>& p)
+    {
+        Tc xi   = get<0>(p);
+        Tc yi   = get<1>(p);
+        Tv vyi  = get<2>(p);
+        Tm xmi  = get<3>(p);
+        Tm kxi  = get<4>(p);
+        Tc voli = xmi / kxi;
+        Tc aux;
+
+        if (yi < ybox * Tc(0.5)) { aux = std::exp(-4.0 * PI * std::abs(yi - 0.25)); }
+        else { aux = std::exp(-4.0 * PI * std::abs(ybox - yi - 0.25)); }
+        Tc si = vyi * voli * std::sin(4.0 * PI * xi) * aux;
+        Tc ci = vyi * voli * std::cos(4.0 * PI * xi) * aux;
+        Tc di = voli * aux;
+
+        return thrust::make_tuple(si, ci, di);
+    }
+
+    Tc ybox;
+};
+
+template<class Tc, class Tv, class Tm>
+std::tuple<double, double, double> gpuGrowthRate(const Tc* x, const Tc* y, const Tv* vy, const Tm* xm, const Tm* kx,
+                                                 const cstone::Box<Tc>& box, size_t startIndex, size_t endIndex)
+{
+    auto it1 = thrust::make_zip_iterator(
+        thrust::make_tuple(x + startIndex, y + startIndex, vy + startIndex, xm + startIndex, kx + startIndex));
+
+    auto it2 = thrust::make_zip_iterator(
+        thrust::make_tuple(x + endIndex, y + endIndex, vy + endIndex, xm + endIndex, kx + endIndex));
+
+    auto plus = util::TuplePlus<thrust::tuple<double, double, double>>{};
+    auto init = thrust::make_tuple(0.0, 0.0, 0.0);
+
+    auto result = thrust::transform_reduce(thrust::device, it1, it2, GrowthRate<Tc, Tm, Tv>{box.ly()}, init, plus);
+
+    return {get<0>(result), get<1>(result), get<2>(result)};
+}
+
+#define GROWTH_GPU(Tc, Tv, Tm)                                                                                         \
+    template std::tuple<double, double, double> gpuGrowthRate(const Tc* x, const Tc* y, const Tv* vy, const Tm* xm,    \
+                                                              const Tm* kx, const cstone::Box<Tc>& box,                \
+                                                              size_t startIndex, size_t endIndex)
+
+GROWTH_GPU(double, double, double);
+GROWTH_GPU(double, double, float);
+GROWTH_GPU(double, float, float);
+GROWTH_GPU(float, float, float);
+
+//!@brief functor for the machSquare summing
+template<class Tc, class Tv>
+struct MachSquareSum
+{
+    HOST_DEVICE_FUN
+    double operator()(const thrust::tuple<Tv, Tv, Tv, Tc>& p)
+    {
+        Vec3<double> V{get<0>(p), get<1>(p), get<2>(p)};
+        Tc           c = get<3>(p);
+        return norm2(V) / (double(c) * c);
+    }
+};
+
+template<class Tc, class Tv>
+double machSquareSumGpu(const Tv* vx, const Tv* vy, const Tv* vz, const Tc* c, size_t first, size_t last)
+{
+    auto it1 = thrust::make_zip_iterator(thrust::make_tuple(vx + first, vy + first, vz + first, c + first));
+    auto it2 = thrust::make_zip_iterator(thrust::make_tuple(vx + last, vy + last, vz + last, c + last));
+
+    auto   plus               = thrust::plus<double>{};
+    double localMachSquareSum = 0.0;
+
+    localMachSquareSum =
+        thrust::transform_reduce(thrust::device, it1, it2, MachSquareSum<Tc, Tv>{}, localMachSquareSum, plus);
+
+    return localMachSquareSum;
+}
+
+#define MACH_GPU(Tc, Tv)                                                                                               \
+    template double machSquareSumGpu(const Tv* vx, const Tv* vy, const Tv* vz, const Tc* c, size_t, size_t)
+
+MACH_GPU(double, double);
+MACH_GPU(double, float);
+MACH_GPU(float, float);
+
+//!@brief functor to count particles that belong to the cloud
+template<class Tc, class Tt, class Tm>
+struct Survivors
+{
+    HOST_DEVICE_FUN
+    double operator()(const thrust::tuple<Tt, Tc, Tc, Tm>& p)
+    {
+        size_t isCloud;
+        Tt     temp = get<0>(p);
+        Tc     kx   = get<1>(p);
+        Tc     xm   = get<2>(p);
+        Tm     m    = get<3>(p);
+        Tc     rhoi = kx / xm * m;
+        if (rhoi >= 0.64 * rhoBubble && temp <= 0.9 * tempWind) { isCloud = 1; }
+        else { isCloud = 0; }
+        return isCloud;
+    }
+
+    double rhoBubble;
+    double tempWind;
+};
+template<class Tc, class Tt, class Tm>
+size_t survivorsGpu(const Tt* temp, const Tt* kx, const Tc* xmass, const Tm* m, double rhoBubble, double tempWind,
+                    size_t first, size_t last)
+{
+    auto it1 = thrust::make_zip_iterator(thrust::make_tuple(temp + first, kx + first, xmass + first, m + first));
+    auto it2 = thrust::make_zip_iterator(thrust::make_tuple(temp + last, kx + last, xmass + last, m + last));
+
+    auto plus = thrust::plus<size_t>{};
+
+    size_t localSurvivors = 0;
+
+    localSurvivors = thrust::transform_reduce(thrust::device, it1, it2, Survivors<Tc, Tt, Tm>{rhoBubble, tempWind},
+                                              localSurvivors, plus);
+
+    return localSurvivors;
+}
+
+#define SURVIVORS(Tc, Tt, Tm)                                                                                          \
+    template size_t survivorsGpu(const Tt* temp, const Tt* kx, const Tc* xmass, const Tm* m, double, double, size_t,   \
+                                 size_t)
+
+SURVIVORS(double, double, double);
+SURVIVORS(double, double, float);
+SURVIVORS(double, float, float);
+SURVIVORS(float, float, float);
+
+} // namespace sphexa