diff --git a/src/grid/grid3D.cpp b/src/grid/grid3D.cpp index 9e7e9d5cc..c77bf2952 100644 --- a/src/grid/grid3D.cpp +++ b/src/grid/grid3D.cpp @@ -8,8 +8,9 @@ #endif #include "../global/global.h" #include "../grid/grid3D.h" -#include "../grid/grid_enum.h" // provides grid_enum -#include "../hydro/hydro_cuda.h" // provides Calc_dt_GPU +#include "../grid/grid_enum.h" // provides grid_enum +#include "../hydro/average_cells.h" // provides Average_Slow_Cells and SlowCellConditionChecker +#include "../hydro/hydro_cuda.h" // provides Calc_dt_GPU #include "../integrators/VL_1D_cuda.h" #include "../integrators/VL_2D_cuda.h" #include "../integrators/VL_3D_cuda.h" @@ -152,7 +153,9 @@ void Grid3D::Initialize(struct Parameters *P) #ifdef AVERAGE_SLOW_CELLS H.min_dt_slow = 1e-100; // Initialize the minumum dt to a tiny number -#endif // AVERAGE_SLOW_CELLS +#else + H.min_dt_slow = -1.0; +#endif // AVERAGE_SLOW_CELLS #ifndef MPI_CHOLLA @@ -446,12 +449,12 @@ void Grid3D::Execute_Hydro_Integrator(void) #ifdef VL VL_Algorithm_3D_CUDA(C.device, C.d_Grav_potential, H.nx, H.ny, H.nz, x_off, y_off, z_off, H.n_ghost, H.dx, H.dy, H.dz, H.xbound, H.ybound, H.zbound, H.dt, H.n_fields, H.custom_grav, H.density_floor, - C.Grav_potential); + C.Grav_potential, SlowCellConditionChecker(1.0 / H.min_dt_slow, H.dx, H.dy, H.dz)); #endif // VL #ifdef SIMPLE Simple_Algorithm_3D_CUDA(C.device, C.d_Grav_potential, H.nx, H.ny, H.nz, x_off, y_off, z_off, H.n_ghost, H.dx, H.dy, H.dz, H.xbound, H.ybound, H.zbound, H.dt, H.n_fields, H.custom_grav, H.density_floor, - C.Grav_potential); + C.Grav_potential, SlowCellConditionChecker(1.0 / H.min_dt_slow, H.dx, H.dy, H.dz)); #endif // SIMPLE } else { chprintf("Error: Grid dimensions nx: %d ny: %d nz: %d not supported.\n", H.nx, H.ny, H.nz); @@ -578,8 +581,9 @@ Real Grid3D::Update_Hydro_Grid() ny_off = ny_local_start; nz_off = nz_local_start; #endif - Average_Slow_Cells(C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_fields, H.dx, H.dy, H.dz, gama, max_dti_slow, H.xbound, - H.ybound, H.zbound, nx_off, ny_off, nz_off); + Average_Slow_Cells(C.device, H.nx, H.ny, H.nz, H.n_ghost, H.n_fields, gama, + SlowCellConditionChecker(max_dti_slow, H.dx, H.dy, H.dz), H.xbound, H.ybound, H.zbound, nx_off, + ny_off, nz_off); #endif // AVERAGE_SLOW_CELLS // ==Calculate the next time step using Calc_dt_GPU from hydro/hydro_cuda.h== diff --git a/src/grid/grid3D.h b/src/grid/grid3D.h index cb4c0dbbb..1b417afa8 100644 --- a/src/grid/grid3D.h +++ b/src/grid/grid3D.h @@ -210,9 +210,10 @@ struct Header { * \brief Length of the current timestep */ Real dt; -#ifdef AVERAGE_SLOW_CELLS + /*! \brief Cells that introduce timesteps shorter than will be averaged with + * neighboring cells. Should be a negative value when the + * AVERAGE_SLOW_CELLS macro isn't defined. */ Real min_dt_slow; -#endif /*! \var t_wall * \brief Wall time */ diff --git a/src/hydro/average_cells.h b/src/hydro/average_cells.h new file mode 100644 index 000000000..2ffac225e --- /dev/null +++ b/src/hydro/average_cells.h @@ -0,0 +1,54 @@ +/*! \file average_cells.h + * \brief Definitions of functions and classes that implement logic related to averaging cells with + * neighbors. */ + +#ifndef AVERAGE_CELLS_H +#define AVERAGE_CELLS_H + +#include + +#include "../global/global.h" + +/*! \brief Object that checks whether a given cell meets the conditions for slow-cell averaging. + * The main motivation for creating this class is reducing ifdef statements (and allow to modify the + * actual slow-cell-condition. */ +struct SlowCellConditionChecker { +// omit data-members if they aren't used for anything +#ifdef AVERAGE_SLOW_CELLS + Real max_dti_slow, dx, dy, dz; +#endif + + /*! \brief Construct a new object. */ + __host__ __device__ SlowCellConditionChecker(Real max_dti_slow, Real dx, Real dy, Real dz) +#ifdef AVERAGE_SLOW_CELLS + : max_dti_slow{max_dti_slow}, dx{dx}, dy{dy}, dz{dz} +#endif + { + } + + /*! \brief Returns whether the cell meets the condition for being considered a slow cell that must + * be averaged. */ + template + __device__ bool is_slow(Real E, Real d, Real d_inv, Real vx, Real vy, Real vz, Real gamma) const + { + return this->max_dti_if_slow(E, d, d_inv, vx, vy, vz, gamma) >= 0.0; + } + + /*! \brief Returns the max inverse timestep of the specified cell, if it meets the criteria for being + * a slow cell. If it doesn't, return a negative value instead. + */ + __device__ Real max_dti_if_slow(Real E, Real d, Real d_inv, Real vx, Real vy, Real vz, Real gamma) const; +}; + +#ifdef AVERAGE_SLOW_CELLS + +void Average_Slow_Cells(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real gamma, + SlowCellConditionChecker slow_check, Real xbound, Real ybound, Real zbound, int nx_offset, + int ny_offset, int nz_offset); + +__global__ void Average_Slow_Cells_3D(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, + Real gamma, SlowCellConditionChecker slow_check, Real xbound, Real ybound, + Real zbound, int nx_offset, int ny_offset, int nz_offset); +#endif + +#endif /* AVERAGE_CELLS_H */ \ No newline at end of file diff --git a/src/hydro/hydro_cuda.cu b/src/hydro/hydro_cuda.cu index 1b223708b..1db81ba2d 100644 --- a/src/hydro/hydro_cuda.cu +++ b/src/hydro/hydro_cuda.cu @@ -10,6 +10,7 @@ #include "../global/global.h" #include "../global/global_cuda.h" #include "../gravity/static_grav.h" +#include "../hydro/average_cells.h" #include "../hydro/hydro_cuda.h" #include "../utils/DeviceVector.h" #include "../utils/cuda_utilities.h" @@ -377,14 +378,30 @@ __global__ void Update_Conserved_Variables_3D(Real *dev_conserved, Real *Q_Lx, R 0.5 * dt * (gx * (d * vx + d_n * vx_n) + gy * (d * vy + d_n * vy_n) + gz * (d * vz + d_n * vz_n)); #endif // GRAVITY + } +} + +__global__ void PostUpdate_Conserved_Correct_Crashed_3D(Real *dev_conserved, int nx, int ny, int nz, int x_off, + int y_off, int z_off, int n_ghost, Real gamma, int n_fields, + SlowCellConditionChecker slow_check) +{ + int n_cells = nx * ny * nz; + // get a global thread ID + int id = threadIdx.x + blockIdx.x * blockDim.x; + int zid = id / (nx * ny); + int yid = (id - zid * nx * ny) / nx; + int xid = id - zid * nx * ny - yid * nx; + + if (xid > n_ghost - 1 && xid < nx - n_ghost && yid > n_ghost - 1 && yid < ny - n_ghost && zid > n_ghost - 1 && + zid < nz - n_ghost) { #if !(defined(DENSITY_FLOOR) && defined(TEMPERATURE_FLOOR)) + // threads corresponding to real cells do the calculation if (dev_conserved[id] < 0.0 || dev_conserved[id] != dev_conserved[id] || dev_conserved[4 * n_cells + id] < 0.0 || dev_conserved[4 * n_cells + id] != dev_conserved[4 * n_cells + id]) { - printf("%3d %3d %3d Thread crashed in final update. %e %e %e %e %e\n", xid + x_off, yid + y_off, zid + z_off, - dev_conserved[id], dtodx * (dev_F_x[imo] - dev_F_x[id]), dtody * (dev_F_y[jmo] - dev_F_y[id]), - dtodz * (dev_F_z[kmo] - dev_F_z[id]), dev_conserved[4 * n_cells + id]); - Average_Cell_All_Fields(xid, yid, zid, nx, ny, nz, n_cells, n_fields, gamma, dev_conserved); + printf("%3d %3d %3d Thread crashed in final update. %e - - - %e\n", xid + x_off, yid + y_off, zid + z_off, + dev_conserved[id], dev_conserved[4 * n_cells + id]); + Average_Cell_All_Fields(xid, yid, zid, nx, ny, nz, n_cells, n_fields, gamma, dev_conserved, n_ghost, slow_check); } #endif // DENSITY_FLOOR /* @@ -400,7 +417,6 @@ __global__ void Update_Conserved_Variables_3D(Real *dev_conserved, Real *Q_Lx, R */ } } - __device__ __host__ Real hydroInverseCrossingTime(Real const &E, Real const &d, Real const &d_inv, Real const &vx, Real const &vy, Real const &vz, Real const &dx, Real const &dy, Real const &dz, Real const &gamma) @@ -667,10 +683,21 @@ void Temperature_Ceiling(Real *dev_conserved, int nx, int ny, int nz, int n_ghos } } +__device__ Real SlowCellConditionChecker::max_dti_if_slow(Real E, Real d, Real d_inv, Real vx, Real vy, Real vz, + Real gamma) const +{ +#ifndef AVERAGE_SLOW_CELLS + return -1.0; +#else + Real max_dti = hydroInverseCrossingTime(E, d, d_inv, vx, vy, vz, dx, dy, dz, gamma); + return (max_dti > max_dti_slow) ? max_dti : -1.0; +#endif +} + #ifdef AVERAGE_SLOW_CELLS -void Average_Slow_Cells(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dx, Real dy, - Real dz, Real gamma, Real max_dti_slow, Real xbound, Real ybound, Real zbound, int nx_offset, +void Average_Slow_Cells(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real gamma, + SlowCellConditionChecker slow_check, Real xbound, Real ybound, Real zbound, int nx_offset, int ny_offset, int nz_offset) { // set values for GPU kernels @@ -683,12 +710,12 @@ void Average_Slow_Cells(Real *dev_conserved, int nx, int ny, int nz, int n_ghost if (nx > 1 && ny > 1 && nz > 1) { // 3D hipLaunchKernelGGL(Average_Slow_Cells_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields, - dx, dy, dz, gamma, max_dti_slow, xbound, ybound, zbound, nx_offset, ny_offset, nz_offset); + gamma, slow_check, xbound, ybound, zbound, nx_offset, ny_offset, nz_offset); } } -__global__ void Average_Slow_Cells_3D(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dx, - Real dy, Real dz, Real gamma, Real max_dti_slow, Real xbound, Real ybound, +__global__ void Average_Slow_Cells_3D(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, + Real gamma, SlowCellConditionChecker slow_check, Real xbound, Real ybound, Real zbound, int nx_offset, int ny_offset, int nz_offset) { int id, xid, yid, zid, n_cells; @@ -711,25 +738,26 @@ __global__ void Average_Slow_Cells_3D(Real *dev_conserved, int nx, int ny, int n vz = dev_conserved[3 * n_cells + id] * d_inv; E = dev_conserved[4 * n_cells + id]; - // Compute the maximum inverse crossing time in the cell - max_dti = hydroInverseCrossingTime(E, d, d_inv, vx, vy, vz, dx, dy, dz, gamma); + // retrieve the max inverse crossing time in the cell if the cell meets the threshold for being a slow-cell. + // (if the cell doesn't meet the threshold, a negative value is returned instead) + max_dti = slow_check.max_dti_if_slow(E, d, d_inv, vx, vy, vz, gamma); - if (max_dti > max_dti_slow) { + if (max_dti >= 0) { speed = sqrt(vx * vx + vy * vy + vz * vz); temp = (gamma - 1) * (E - 0.5 * (speed * speed) * d) * ENERGY_UNIT / (d * DENSITY_UNIT / 0.6 / MP) / KB; P = (E - 0.5 * d * (vx * vx + vy * vy + vz * vz)) * (gamma - 1.0); cs = sqrt(d_inv * gamma * P) * VELOCITY_UNIT * 1e-5; - Real x = xbound + (nx_offset + xid - n_ghost + 0.5) * dx; - Real y = ybound + (ny_offset + yid - n_ghost + 0.5) * dy; - Real z = zbound + (nz_offset + zid - n_ghost + 0.5) * dz; + Real x = xbound + (nx_offset + xid - n_ghost + 0.5) * slow_check.dx; + Real y = ybound + (ny_offset + yid - n_ghost + 0.5) * slow_check.dy; + Real z = zbound + (nz_offset + zid - n_ghost + 0.5) * slow_check.dz; // Average this cell kernel_printf( " Average Slow Cell [ %.5e %.5e %.5e ] -> dt_cell=%f dt_min=%f, n=%.3e, " "T=%.3e, v=%.3e (%.3e, %.3e, %.3e), cs=%.3e\n", - x, y, z, 1. / max_dti, 1. / max_dti_slow, dev_conserved[id] * DENSITY_UNIT / 0.6 / MP, temp, + x, y, z, 1. / max_dti, 1. / slow_check.max_dti_slow, dev_conserved[id] * DENSITY_UNIT / 0.6 / MP, temp, speed * VELOCITY_UNIT * 1e-5, vx * VELOCITY_UNIT * 1e-5, vy * VELOCITY_UNIT * 1e-5, vz * VELOCITY_UNIT * 1e-5, cs); - Average_Cell_All_Fields(xid, yid, zid, nx, ny, nz, n_cells, n_fields, gamma, dev_conserved); + Average_Cell_All_Fields(xid, yid, zid, nx, ny, nz, n_cells, n_fields, gamma, dev_conserved, n_ghost, slow_check); } } } @@ -1253,7 +1281,8 @@ __device__ Real Average_Cell_Single_Field(int field_indx, int i, int j, int k, i } __device__ void Average_Cell_All_Fields(int i, int j, int k, int nx, int ny, int nz, int ncells, int n_fields, - Real gamma, Real *conserved) + Real gamma, Real *conserved, int stale_depth, + SlowCellConditionChecker slow_check) { int id = i + (j)*nx + (k)*nx * ny; @@ -1281,18 +1310,25 @@ __device__ void Average_Cell_All_Fields(int i, int j, int k, int nx, int ny, int for (int kk = k - 1; kk <= k + 1; kk++) { for (int jj = j - 1; jj <= j + 1; jj++) { for (int ii = i - 1; ii <= i + 1; ii++) { + if (ii <= stale_depth - 1 || ii >= nx - stale_depth || jj <= stale_depth - 1 || jj >= ny - stale_depth || + kk <= stale_depth - 1 || kk >= nz - stale_depth) { + continue; + } + idn = ii + jj * nx + kk * nx * ny; d = conserved[grid_enum::density * ncells + idn]; mx = conserved[grid_enum::momentum_x * ncells + idn]; my = conserved[grid_enum::momentum_y * ncells + idn]; mz = conserved[grid_enum::momentum_z * ncells + idn]; - P = (conserved[grid_enum::Energy * ncells + idn] - (0.5 / d) * (mx * mx + my * my + mz * mz)) * (gamma - 1.0); + E = conserved[grid_enum::Energy * ncells + idn]; + P = (E - (0.5 / d) * (mx * mx + my * my + mz * mz)) * (gamma - 1.0); #ifdef SCALAR for (int n = 0; n < NSCALARS; n++) { // NOLINT scalar[n] = conserved[grid_enum::scalar * ncells + idn]; } #endif - if (d > 0.0 && P > 0.0) { + Real d_inv = 1.0 / d; + if (d > 0.0 && P > 0.0 && not slow_check.is_slow(E, d, d_inv, mx * d_inv, my * d_inv, mz * d_inv, gamma)) { d_av += d; vx_av += mx; vy_av += my; diff --git a/src/hydro/hydro_cuda.h b/src/hydro/hydro_cuda.h index 61b1073de..8deed8ad9 100644 --- a/src/hydro/hydro_cuda.h +++ b/src/hydro/hydro_cuda.h @@ -5,6 +5,7 @@ #define HYDRO_CUDA_H #include "../global/global.h" +#include "../hydro/average_cells.h" #include "../utils/mhd_utilities.h" __global__ void Update_Conserved_Variables_1D(Real *dev_conserved, Real *dev_F, int n_cells, int x_off, int n_ghost, @@ -21,6 +22,10 @@ __global__ void Update_Conserved_Variables_3D(Real *dev_conserved, Real *Q_Lx, R Real gamma, int n_fields, int custom_grav, Real density_floor, Real *dev_potential); +__global__ void PostUpdate_Conserved_Correct_Crashed_3D(Real *dev_conserved, int nx, int ny, int nz, int x_off, + int y_off, int z_off, int n_ghost, Real gamma, int n_fields, + SlowCellConditionChecker slow_check); + /*! * \brief Determine the maximum inverse crossing time in a specific cell * @@ -80,17 +85,6 @@ void Temperature_Ceiling(Real *dev_conserved, int nx, int ny, int nz, int n_ghos Real T_ceiling); #endif // TEMPERATURE CEILING -#ifdef AVERAGE_SLOW_CELLS - -void Average_Slow_Cells(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dx, Real dy, - Real dz, Real gamma, Real max_dti_slow, Real xbound, Real ybound, Real zbound, int nx_offset, - int ny_offset, int nz_offset); - -__global__ void Average_Slow_Cells_3D(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real dx, - Real dy, Real dz, Real gamma, Real max_dti_slow, Real xbound, Real ybound, - Real zbound, int nx_offset, int ny_offset, int nz_offset); -#endif - void Apply_Temperature_Floor(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, Real U_floor); __global__ void Temperature_Floor_Kernel(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields, @@ -119,8 +113,38 @@ __global__ void Select_Internal_Energy_2D(Real *dev_conserved, int nx, int ny, i __global__ void Select_Internal_Energy_3D(Real *dev_conserved, int nx, int ny, int nz, int n_ghost, int n_fields); +/*! \brief Overwrites the values in the specified cell with the average of all the values from the (up to) 26 + * neighboring cells. + * + * Care is taken when applying this logic to a cell near the edge of a block (where the entire simulation domain + * is decomposed into 1 or more blocks). + * * Recall that the entire reason we have ghost zones is that the stencil for computing flux-divergence can't + * be applied uniformly to all cells -- the cells in the ghost zone can't be properly updated with the rest + * the local block when applying the flux-divergence. We might refer to these cells that aren't properly + * updated as being "stale". We refer to the width of the outer ring of stale values as the ``stale-depth`` + * * For concreteness, consider a pure hydro/mhd simulation using the VL integrator: + * - Right after refreshing the ghost-zones, the stale_depth is 0 + * - After the partial time-step, the stale_depth is 1. + * - After the full timestep, the stale depth depends on the choice of reconstruction. (e.g. it is 2 for + * for nearest neighbor and 3 for plmp). + * - The ghost-depth should always be equal to the max stale-depth at the end of a simulation cycle (if + * ghost-depth is bigger, extra work is done. If it's smaller, then your simulation is wrong) + * * To respect the simulations boundaries, values in "stale" cells are excluded from the averages. If + * stale-depth is 0, then values from beyond the edge of the simulation are excluded from averages + * + * \note + * From a perfectionist's perspective, one could argue that we really should increment the stale-depth whenever + * we call this function (in other words, we should have an extra layer of ghost zones for each time we call + * this function). + * * rationale: if we don't, then the the number of neighbors considered results of the simulation can vary + * based on how close a cell is to a block-edge (the number of cells varies from 7 to 26). + * * more pragmatically: this probably doesn't matter a whole lot given that this piece of machinery is a + * band-aid solution to begin with. + * * Aside: a similar argument could be made for the energy-synchronization step of the dual-energy formalism. + */ __device__ void Average_Cell_All_Fields(int i, int j, int k, int nx, int ny, int nz, int ncells, int n_fields, - Real gamma, Real *conserved); + Real gamma, Real *conserved, int stale_depth, + SlowCellConditionChecker slow_check); __device__ Real Average_Cell_Single_Field(int field_indx, int i, int j, int k, int nx, int ny, int nz, int ncells, Real *conserved); diff --git a/src/integrators/VL_3D_cuda.cu b/src/integrators/VL_3D_cuda.cu index 0f9ccc013..202ee7b4a 100644 --- a/src/integrators/VL_3D_cuda.cu +++ b/src/integrators/VL_3D_cuda.cu @@ -35,7 +35,8 @@ __global__ void Update_Conserved_Variables_3D_half(Real *dev_conserved, Real *de void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int ny, int nz, int x_off, int y_off, int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, - Real dt, int n_fields, int custom_grav, Real density_floor, Real *host_grav_potential) + Real dt, int n_fields, int custom_grav, Real density_floor, Real *host_grav_potential, + const SlowCellConditionChecker &slow_check) { // Here, *dev_conserved contains the entire // set of conserved variables on the grid @@ -373,6 +374,11 @@ void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int dt, gama, n_fields, custom_grav, density_floor, dev_grav_potential); GPU_Error_Check(); + // Step 6b: Address any crashed threads + hipLaunchKernelGGL(PostUpdate_Conserved_Correct_Crashed_3D, update_full_launch_params.get_numBlocks(), + update_full_launch_params.get_threadsPerBlock(), 0, 0, dev_conserved, nx, ny, nz, x_off, y_off, + z_off, n_ghost, gama, n_fields, slow_check); + #ifdef MHD // Update the magnetic fields hipLaunchKernelGGL(mhd::Update_Magnetic_Field_3D, update_magnetic_launch_params.get_numBlocks(), diff --git a/src/integrators/VL_3D_cuda.h b/src/integrators/VL_3D_cuda.h index 4b80a4604..e9d3d70fb 100644 --- a/src/integrators/VL_3D_cuda.h +++ b/src/integrators/VL_3D_cuda.h @@ -5,10 +5,12 @@ #define VL_3D_CUDA_H #include "../global/global.h" +#include "../hydro/average_cells.h" void VL_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int ny, int nz, int x_off, int y_off, int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, - Real dt, int n_fields, int custom_grav, Real density_floor, Real *host_grav_potential); + Real dt, int n_fields, int custom_grav, Real density_floor, Real *host_grav_potential, + const SlowCellConditionChecker &slow_check); void Free_Memory_VL_3D(); diff --git a/src/integrators/simple_3D_cuda.cu b/src/integrators/simple_3D_cuda.cu index 2e834e370..6f8eb0182 100644 --- a/src/integrators/simple_3D_cuda.cu +++ b/src/integrators/simple_3D_cuda.cu @@ -24,7 +24,8 @@ void Simple_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int ny, int nz, int x_off, int y_off, int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, - Real dt, int n_fields, int custom_grav, Real density_floor, Real *host_grav_potential) + Real dt, int n_fields, int custom_grav, Real density_floor, Real *host_grav_potential, + const SlowCellConditionChecker &slow_check) { // Here, *dev_conserved contains the entire // set of conserved variables on the grid @@ -154,6 +155,11 @@ void Simple_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, zbound, dt, gama, n_fields, custom_grav, density_floor, dev_grav_potential); GPU_Error_Check(); + // Step 3b: Address any crashed threads + hipLaunchKernelGGL(PostUpdate_Conserved_Correct_Crashed_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, + x_off, y_off, z_off, n_ghost, gama, n_fields, slow_check); + GPU_Error_Check(); + #ifdef DE hipLaunchKernelGGL(Select_Internal_Energy_3D, dim1dGrid, dim1dBlock, 0, 0, dev_conserved, nx, ny, nz, n_ghost, n_fields); diff --git a/src/integrators/simple_3D_cuda.h b/src/integrators/simple_3D_cuda.h index 847b93c61..b505c64eb 100644 --- a/src/integrators/simple_3D_cuda.h +++ b/src/integrators/simple_3D_cuda.h @@ -9,7 +9,8 @@ void Simple_Algorithm_3D_CUDA(Real *d_conserved, Real *d_grav_potential, int nx, int ny, int nz, int x_off, int y_off, int z_off, int n_ghost, Real dx, Real dy, Real dz, Real xbound, Real ybound, Real zbound, - Real dt, int n_fields, int custom_grav, Real density_floor, Real *host_grav_potential); + Real dt, int n_fields, int custom_grav, Real density_floor, Real *host_grav_potential, + const SlowCellConditionChecker &slow_check); void Free_Memory_Simple_3D();